160 lines
6.2 KiB
Python
160 lines
6.2 KiB
Python
"""
|
||
数据库模型定义
|
||
"""
|
||
from sqlalchemy import Column, Integer, String, Text, DateTime, Boolean, ForeignKey, Enum
|
||
from sqlalchemy.orm import relationship
|
||
from datetime import datetime, timezone
|
||
import enum
|
||
|
||
from app.database import Base
|
||
|
||
|
||
def utc_now():
|
||
"""获取当前UTC时间 (不带时区,适配PostgreSQL)"""
|
||
# asyncpg 要求 TIMESTAMP WITHOUT TIME ZONE 字段必须传入 naive datetime
|
||
return datetime.now(timezone.utc).replace(tzinfo=None)
|
||
|
||
|
||
class AnalysisStatus(enum.Enum):
|
||
"""分析状态枚举"""
|
||
PENDING = "pending" # 待分析
|
||
EXTRACTING = "extracting" # 提取中
|
||
ANALYZING = "analyzing" # 分析中
|
||
SUMMARIZING = "summarizing" # 汇总中
|
||
COMPLETED = "completed" # 已完成
|
||
FAILED = "failed" # 失败
|
||
|
||
|
||
class Company(Base):
|
||
"""上市公司表"""
|
||
__tablename__ = "companies"
|
||
|
||
id = Column(Integer, primary_key=True, index=True)
|
||
stock_code = Column(String(10), unique=True, index=True, nullable=False, comment="股票代码")
|
||
company_name = Column(String(100), nullable=False, comment="公司名称")
|
||
short_name = Column(String(50), comment="公司简称")
|
||
industry = Column(String(50), comment="所属行业")
|
||
org_id = Column(String(50), comment="巨潮机构ID")
|
||
is_active = Column(Boolean, default=True, comment="是否启用监控")
|
||
created_at = Column(DateTime, default=utc_now, comment="创建时间")
|
||
updated_at = Column(DateTime, default=utc_now, onupdate=utc_now, comment="更新时间")
|
||
|
||
# 关联报告
|
||
reports = relationship("Report", back_populates="company", cascade="all, delete-orphan")
|
||
|
||
def __repr__(self):
|
||
return f"<Company {self.stock_code} {self.short_name}>"
|
||
|
||
|
||
class Report(Base):
|
||
"""年报/半年报表"""
|
||
__tablename__ = "reports"
|
||
|
||
id = Column(Integer, primary_key=True, index=True)
|
||
company_id = Column(Integer, ForeignKey("companies.id"), nullable=False)
|
||
|
||
# 报告基本信息
|
||
title = Column(String(500), nullable=False, comment="报告标题")
|
||
report_type = Column(String(50), comment="报告类型: 年度报告/半年度报告")
|
||
report_year = Column(Integer, comment="报告年份")
|
||
report_period = Column(String(20), comment="报告期间: 年报/半年报")
|
||
|
||
# 巨潮信息
|
||
announcement_id = Column(String(100), unique=True, comment="公告ID")
|
||
announcement_time = Column(DateTime, comment="公告时间")
|
||
pdf_url = Column(String(500), comment="PDF下载链接")
|
||
|
||
# 本地存储
|
||
local_path = Column(String(500), comment="本地存储路径")
|
||
file_size = Column(Integer, comment="文件大小(字节)")
|
||
|
||
# 状态
|
||
is_downloaded = Column(Boolean, default=False, comment="是否已下载")
|
||
is_extracted = Column(Boolean, default=False, comment="是否已提取内容")
|
||
is_analyzed = Column(Boolean, default=False, comment="是否已分析")
|
||
analysis_status = Column(String(20), default=AnalysisStatus.PENDING.value, comment="分析状态")
|
||
|
||
created_at = Column(DateTime, default=utc_now, comment="创建时间")
|
||
updated_at = Column(DateTime, default=utc_now, onupdate=utc_now, comment="更新时间")
|
||
|
||
# 关联
|
||
company = relationship("Company", back_populates="reports")
|
||
extracted_contents = relationship("ExtractedContent", back_populates="report", cascade="all, delete-orphan")
|
||
analysis_results = relationship("AnalysisResult", back_populates="report", cascade="all, delete-orphan")
|
||
|
||
def __repr__(self):
|
||
return f"<Report {self.title}>"
|
||
|
||
|
||
class ExtractedContent(Base):
|
||
"""提取的内容表"""
|
||
__tablename__ = "extracted_contents"
|
||
|
||
id = Column(Integer, primary_key=True, index=True)
|
||
report_id = Column(Integer, ForeignKey("reports.id"), nullable=False)
|
||
|
||
section_name = Column(String(100), comment="章节名称")
|
||
section_keyword = Column(String(50), comment="匹配的关键词")
|
||
content = Column(Text, comment="提取的内容")
|
||
page_start = Column(Integer, comment="起始页码")
|
||
page_end = Column(Integer, comment="结束页码")
|
||
char_count = Column(Integer, comment="字符数")
|
||
|
||
created_at = Column(DateTime, default=utc_now, comment="创建时间")
|
||
|
||
# 关联
|
||
report = relationship("Report", back_populates="extracted_contents")
|
||
|
||
def __repr__(self):
|
||
return f"<ExtractedContent {self.section_name}>"
|
||
|
||
|
||
class AnalysisResult(Base):
|
||
"""AI分析结果表"""
|
||
__tablename__ = "analysis_results"
|
||
|
||
id = Column(Integer, primary_key=True, index=True)
|
||
report_id = Column(Integer, ForeignKey("reports.id"), nullable=False)
|
||
|
||
# 分析信息
|
||
analysis_type = Column(String(50), comment="分析类型: section/summary")
|
||
section_name = Column(String(100), comment="分析的章节名称(如果是章节分析)")
|
||
ai_model = Column(String(50), comment="使用的AI模型")
|
||
|
||
# 分析内容
|
||
prompt = Column(Text, comment="发送给AI的提示词")
|
||
response = Column(Text, comment="AI的原始响应")
|
||
summary = Column(Text, comment="提取的摘要")
|
||
key_points = Column(Text, comment="关键要点(JSON格式)")
|
||
|
||
# 状态
|
||
is_final = Column(Boolean, default=False, comment="是否为最终汇总结果")
|
||
token_count = Column(Integer, comment="消耗的token数")
|
||
|
||
created_at = Column(DateTime, default=utc_now, comment="创建时间")
|
||
|
||
# 关联
|
||
report = relationship("Report", back_populates="analysis_results")
|
||
|
||
def __repr__(self):
|
||
return f"<AnalysisResult {self.analysis_type} for Report {self.report_id}>"
|
||
|
||
|
||
class TaskLog(Base):
|
||
"""任务日志表"""
|
||
__tablename__ = "task_logs"
|
||
|
||
id = Column(Integer, primary_key=True, index=True)
|
||
|
||
task_type = Column(String(50), comment="任务类型: crawl/extract/analyze")
|
||
task_name = Column(String(200), comment="任务名称")
|
||
status = Column(String(20), comment="状态: started/running/completed/failed")
|
||
message = Column(Text, comment="日志消息")
|
||
error = Column(Text, comment="错误信息")
|
||
|
||
started_at = Column(DateTime, default=utc_now, comment="开始时间")
|
||
completed_at = Column(DateTime, comment="完成时间")
|
||
|
||
def __repr__(self):
|
||
return f"<TaskLog {self.task_type} {self.status}>"
|