huibao/backend/app/models.py

160 lines
6.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
数据库模型定义
"""
from sqlalchemy import Column, Integer, String, Text, DateTime, Boolean, ForeignKey, Enum
from sqlalchemy.orm import relationship
from datetime import datetime, timezone
import enum
from app.database import Base
def utc_now():
"""获取当前UTC时间 (不带时区适配PostgreSQL)"""
# asyncpg 要求 TIMESTAMP WITHOUT TIME ZONE 字段必须传入 naive datetime
return datetime.now(timezone.utc).replace(tzinfo=None)
class AnalysisStatus(enum.Enum):
"""分析状态枚举"""
PENDING = "pending" # 待分析
EXTRACTING = "extracting" # 提取中
ANALYZING = "analyzing" # 分析中
SUMMARIZING = "summarizing" # 汇总中
COMPLETED = "completed" # 已完成
FAILED = "failed" # 失败
class Company(Base):
"""上市公司表"""
__tablename__ = "companies"
id = Column(Integer, primary_key=True, index=True)
stock_code = Column(String(10), unique=True, index=True, nullable=False, comment="股票代码")
company_name = Column(String(100), nullable=False, comment="公司名称")
short_name = Column(String(50), comment="公司简称")
industry = Column(String(50), comment="所属行业")
org_id = Column(String(50), comment="巨潮机构ID")
is_active = Column(Boolean, default=True, comment="是否启用监控")
created_at = Column(DateTime, default=utc_now, comment="创建时间")
updated_at = Column(DateTime, default=utc_now, onupdate=utc_now, comment="更新时间")
# 关联报告
reports = relationship("Report", back_populates="company", cascade="all, delete-orphan")
def __repr__(self):
return f"<Company {self.stock_code} {self.short_name}>"
class Report(Base):
"""年报/半年报表"""
__tablename__ = "reports"
id = Column(Integer, primary_key=True, index=True)
company_id = Column(Integer, ForeignKey("companies.id"), nullable=False)
# 报告基本信息
title = Column(String(500), nullable=False, comment="报告标题")
report_type = Column(String(50), comment="报告类型: 年度报告/半年度报告")
report_year = Column(Integer, comment="报告年份")
report_period = Column(String(20), comment="报告期间: 年报/半年报")
# 巨潮信息
announcement_id = Column(String(100), unique=True, comment="公告ID")
announcement_time = Column(DateTime, comment="公告时间")
pdf_url = Column(String(500), comment="PDF下载链接")
# 本地存储
local_path = Column(String(500), comment="本地存储路径")
file_size = Column(Integer, comment="文件大小(字节)")
# 状态
is_downloaded = Column(Boolean, default=False, comment="是否已下载")
is_extracted = Column(Boolean, default=False, comment="是否已提取内容")
is_analyzed = Column(Boolean, default=False, comment="是否已分析")
analysis_status = Column(String(20), default=AnalysisStatus.PENDING.value, comment="分析状态")
created_at = Column(DateTime, default=utc_now, comment="创建时间")
updated_at = Column(DateTime, default=utc_now, onupdate=utc_now, comment="更新时间")
# 关联
company = relationship("Company", back_populates="reports")
extracted_contents = relationship("ExtractedContent", back_populates="report", cascade="all, delete-orphan")
analysis_results = relationship("AnalysisResult", back_populates="report", cascade="all, delete-orphan")
def __repr__(self):
return f"<Report {self.title}>"
class ExtractedContent(Base):
"""提取的内容表"""
__tablename__ = "extracted_contents"
id = Column(Integer, primary_key=True, index=True)
report_id = Column(Integer, ForeignKey("reports.id"), nullable=False)
section_name = Column(String(100), comment="章节名称")
section_keyword = Column(String(50), comment="匹配的关键词")
content = Column(Text, comment="提取的内容")
page_start = Column(Integer, comment="起始页码")
page_end = Column(Integer, comment="结束页码")
char_count = Column(Integer, comment="字符数")
created_at = Column(DateTime, default=utc_now, comment="创建时间")
# 关联
report = relationship("Report", back_populates="extracted_contents")
def __repr__(self):
return f"<ExtractedContent {self.section_name}>"
class AnalysisResult(Base):
"""AI分析结果表"""
__tablename__ = "analysis_results"
id = Column(Integer, primary_key=True, index=True)
report_id = Column(Integer, ForeignKey("reports.id"), nullable=False)
# 分析信息
analysis_type = Column(String(50), comment="分析类型: section/summary")
section_name = Column(String(100), comment="分析的章节名称(如果是章节分析)")
ai_model = Column(String(50), comment="使用的AI模型")
# 分析内容
prompt = Column(Text, comment="发送给AI的提示词")
response = Column(Text, comment="AI的原始响应")
summary = Column(Text, comment="提取的摘要")
key_points = Column(Text, comment="关键要点(JSON格式)")
# 状态
is_final = Column(Boolean, default=False, comment="是否为最终汇总结果")
token_count = Column(Integer, comment="消耗的token数")
created_at = Column(DateTime, default=utc_now, comment="创建时间")
# 关联
report = relationship("Report", back_populates="analysis_results")
def __repr__(self):
return f"<AnalysisResult {self.analysis_type} for Report {self.report_id}>"
class TaskLog(Base):
"""任务日志表"""
__tablename__ = "task_logs"
id = Column(Integer, primary_key=True, index=True)
task_type = Column(String(50), comment="任务类型: crawl/extract/analyze")
task_name = Column(String(200), comment="任务名称")
status = Column(String(20), comment="状态: started/running/completed/failed")
message = Column(Text, comment="日志消息")
error = Column(Text, comment="错误信息")
started_at = Column(DateTime, default=utc_now, comment="开始时间")
completed_at = Column(DateTime, comment="完成时间")
def __repr__(self):
return f"<TaskLog {self.task_type} {self.status}>"