160 lines
6.2 KiB
Python
160 lines
6.2 KiB
Python
|
|
"""
|
|||
|
|
数据库模型定义
|
|||
|
|
"""
|
|||
|
|
from sqlalchemy import Column, Integer, String, Text, DateTime, Boolean, ForeignKey, Enum
|
|||
|
|
from sqlalchemy.orm import relationship
|
|||
|
|
from datetime import datetime, timezone
|
|||
|
|
import enum
|
|||
|
|
|
|||
|
|
from app.database import Base
|
|||
|
|
|
|||
|
|
|
|||
|
|
def utc_now():
|
|||
|
|
"""获取当前UTC时间 (不带时区,适配PostgreSQL)"""
|
|||
|
|
# asyncpg 要求 TIMESTAMP WITHOUT TIME ZONE 字段必须传入 naive datetime
|
|||
|
|
return datetime.now(timezone.utc).replace(tzinfo=None)
|
|||
|
|
|
|||
|
|
|
|||
|
|
class AnalysisStatus(enum.Enum):
|
|||
|
|
"""分析状态枚举"""
|
|||
|
|
PENDING = "pending" # 待分析
|
|||
|
|
EXTRACTING = "extracting" # 提取中
|
|||
|
|
ANALYZING = "analyzing" # 分析中
|
|||
|
|
SUMMARIZING = "summarizing" # 汇总中
|
|||
|
|
COMPLETED = "completed" # 已完成
|
|||
|
|
FAILED = "failed" # 失败
|
|||
|
|
|
|||
|
|
|
|||
|
|
class Company(Base):
|
|||
|
|
"""上市公司表"""
|
|||
|
|
__tablename__ = "companies"
|
|||
|
|
|
|||
|
|
id = Column(Integer, primary_key=True, index=True)
|
|||
|
|
stock_code = Column(String(10), unique=True, index=True, nullable=False, comment="股票代码")
|
|||
|
|
company_name = Column(String(100), nullable=False, comment="公司名称")
|
|||
|
|
short_name = Column(String(50), comment="公司简称")
|
|||
|
|
industry = Column(String(50), comment="所属行业")
|
|||
|
|
org_id = Column(String(50), comment="巨潮机构ID")
|
|||
|
|
is_active = Column(Boolean, default=True, comment="是否启用监控")
|
|||
|
|
created_at = Column(DateTime, default=utc_now, comment="创建时间")
|
|||
|
|
updated_at = Column(DateTime, default=utc_now, onupdate=utc_now, comment="更新时间")
|
|||
|
|
|
|||
|
|
# 关联报告
|
|||
|
|
reports = relationship("Report", back_populates="company", cascade="all, delete-orphan")
|
|||
|
|
|
|||
|
|
def __repr__(self):
|
|||
|
|
return f"<Company {self.stock_code} {self.short_name}>"
|
|||
|
|
|
|||
|
|
|
|||
|
|
class Report(Base):
|
|||
|
|
"""年报/半年报表"""
|
|||
|
|
__tablename__ = "reports"
|
|||
|
|
|
|||
|
|
id = Column(Integer, primary_key=True, index=True)
|
|||
|
|
company_id = Column(Integer, ForeignKey("companies.id"), nullable=False)
|
|||
|
|
|
|||
|
|
# 报告基本信息
|
|||
|
|
title = Column(String(500), nullable=False, comment="报告标题")
|
|||
|
|
report_type = Column(String(50), comment="报告类型: 年度报告/半年度报告")
|
|||
|
|
report_year = Column(Integer, comment="报告年份")
|
|||
|
|
report_period = Column(String(20), comment="报告期间: 年报/半年报")
|
|||
|
|
|
|||
|
|
# 巨潮信息
|
|||
|
|
announcement_id = Column(String(100), unique=True, comment="公告ID")
|
|||
|
|
announcement_time = Column(DateTime, comment="公告时间")
|
|||
|
|
pdf_url = Column(String(500), comment="PDF下载链接")
|
|||
|
|
|
|||
|
|
# 本地存储
|
|||
|
|
local_path = Column(String(500), comment="本地存储路径")
|
|||
|
|
file_size = Column(Integer, comment="文件大小(字节)")
|
|||
|
|
|
|||
|
|
# 状态
|
|||
|
|
is_downloaded = Column(Boolean, default=False, comment="是否已下载")
|
|||
|
|
is_extracted = Column(Boolean, default=False, comment="是否已提取内容")
|
|||
|
|
is_analyzed = Column(Boolean, default=False, comment="是否已分析")
|
|||
|
|
analysis_status = Column(String(20), default=AnalysisStatus.PENDING.value, comment="分析状态")
|
|||
|
|
|
|||
|
|
created_at = Column(DateTime, default=utc_now, comment="创建时间")
|
|||
|
|
updated_at = Column(DateTime, default=utc_now, onupdate=utc_now, comment="更新时间")
|
|||
|
|
|
|||
|
|
# 关联
|
|||
|
|
company = relationship("Company", back_populates="reports")
|
|||
|
|
extracted_contents = relationship("ExtractedContent", back_populates="report", cascade="all, delete-orphan")
|
|||
|
|
analysis_results = relationship("AnalysisResult", back_populates="report", cascade="all, delete-orphan")
|
|||
|
|
|
|||
|
|
def __repr__(self):
|
|||
|
|
return f"<Report {self.title}>"
|
|||
|
|
|
|||
|
|
|
|||
|
|
class ExtractedContent(Base):
|
|||
|
|
"""提取的内容表"""
|
|||
|
|
__tablename__ = "extracted_contents"
|
|||
|
|
|
|||
|
|
id = Column(Integer, primary_key=True, index=True)
|
|||
|
|
report_id = Column(Integer, ForeignKey("reports.id"), nullable=False)
|
|||
|
|
|
|||
|
|
section_name = Column(String(100), comment="章节名称")
|
|||
|
|
section_keyword = Column(String(50), comment="匹配的关键词")
|
|||
|
|
content = Column(Text, comment="提取的内容")
|
|||
|
|
page_start = Column(Integer, comment="起始页码")
|
|||
|
|
page_end = Column(Integer, comment="结束页码")
|
|||
|
|
char_count = Column(Integer, comment="字符数")
|
|||
|
|
|
|||
|
|
created_at = Column(DateTime, default=utc_now, comment="创建时间")
|
|||
|
|
|
|||
|
|
# 关联
|
|||
|
|
report = relationship("Report", back_populates="extracted_contents")
|
|||
|
|
|
|||
|
|
def __repr__(self):
|
|||
|
|
return f"<ExtractedContent {self.section_name}>"
|
|||
|
|
|
|||
|
|
|
|||
|
|
class AnalysisResult(Base):
|
|||
|
|
"""AI分析结果表"""
|
|||
|
|
__tablename__ = "analysis_results"
|
|||
|
|
|
|||
|
|
id = Column(Integer, primary_key=True, index=True)
|
|||
|
|
report_id = Column(Integer, ForeignKey("reports.id"), nullable=False)
|
|||
|
|
|
|||
|
|
# 分析信息
|
|||
|
|
analysis_type = Column(String(50), comment="分析类型: section/summary")
|
|||
|
|
section_name = Column(String(100), comment="分析的章节名称(如果是章节分析)")
|
|||
|
|
ai_model = Column(String(50), comment="使用的AI模型")
|
|||
|
|
|
|||
|
|
# 分析内容
|
|||
|
|
prompt = Column(Text, comment="发送给AI的提示词")
|
|||
|
|
response = Column(Text, comment="AI的原始响应")
|
|||
|
|
summary = Column(Text, comment="提取的摘要")
|
|||
|
|
key_points = Column(Text, comment="关键要点(JSON格式)")
|
|||
|
|
|
|||
|
|
# 状态
|
|||
|
|
is_final = Column(Boolean, default=False, comment="是否为最终汇总结果")
|
|||
|
|
token_count = Column(Integer, comment="消耗的token数")
|
|||
|
|
|
|||
|
|
created_at = Column(DateTime, default=utc_now, comment="创建时间")
|
|||
|
|
|
|||
|
|
# 关联
|
|||
|
|
report = relationship("Report", back_populates="analysis_results")
|
|||
|
|
|
|||
|
|
def __repr__(self):
|
|||
|
|
return f"<AnalysisResult {self.analysis_type} for Report {self.report_id}>"
|
|||
|
|
|
|||
|
|
|
|||
|
|
class TaskLog(Base):
|
|||
|
|
"""任务日志表"""
|
|||
|
|
__tablename__ = "task_logs"
|
|||
|
|
|
|||
|
|
id = Column(Integer, primary_key=True, index=True)
|
|||
|
|
|
|||
|
|
task_type = Column(String(50), comment="任务类型: crawl/extract/analyze")
|
|||
|
|
task_name = Column(String(200), comment="任务名称")
|
|||
|
|
status = Column(String(20), comment="状态: started/running/completed/failed")
|
|||
|
|
message = Column(Text, comment="日志消息")
|
|||
|
|
error = Column(Text, comment="错误信息")
|
|||
|
|
|
|||
|
|
started_at = Column(DateTime, default=utc_now, comment="开始时间")
|
|||
|
|
completed_at = Column(DateTime, comment="完成时间")
|
|||
|
|
|
|||
|
|
def __repr__(self):
|
|||
|
|
return f"<TaskLog {self.task_type} {self.status}>"
|