huibao/backend/process_reports.py

64 lines
2.2 KiB
Python
Raw Normal View History

"""
处理已下载的报告提取内容 + AI分析
"""
import asyncio
import sys
sys.path.insert(0, ".")
from app.database import AsyncSessionLocal, init_db
from app.models import Report
from app.services.pdf_extractor import pdf_extractor
from app.services.ai_analyzer import ai_analyzer
from sqlalchemy import select
from sqlalchemy.orm import selectinload
async def process_reports():
print("=" * 60)
print("处理已下载的报告:提取内容 + AI分析")
print("=" * 60)
await init_db()
async with AsyncSessionLocal() as db:
# 查找已下载但未提取的报告
stmt = select(Report).where(
Report.is_downloaded == True,
Report.is_extracted == False
).options(selectinload(Report.company))
result = await db.execute(stmt)
reports = result.scalars().all()
print(f"\n找到 {len(reports)} 份待处理的报告")
for i, report in enumerate(reports, 1):
company_name = report.company.short_name if report.company else "未知"
print(f"\n[{i}/{len(reports)}] 处理: {company_name} - {report.title[:30]}...")
try:
# 1. 提取PDF内容
print(" → 提取PDF内容...")
contents = await pdf_extractor.extract_and_save(db, report)
print(f" ✓ 提取了 {len(contents)} 个章节")
# 2. AI分析如果有提取内容
if contents:
print(" → 执行AI分析...")
success = await ai_analyzer.analyze_report(db, report)
if success:
print(" ✓ AI分析完成")
else:
print(" ✗ AI分析失败")
else:
print(" ⚠ 无提取内容跳过AI分析")
except Exception as e:
print(f" ✗ 处理失败: {e}")
print("\n" + "=" * 60)
print("处理完成!")
print("=" * 60)
if __name__ == "__main__":
asyncio.run(process_reports())