""" 处理已下载的报告:提取内容 + AI分析 """ import asyncio import sys sys.path.insert(0, ".") from app.database import AsyncSessionLocal, init_db from app.models import Report from app.services.pdf_extractor import pdf_extractor from app.services.ai_analyzer import ai_analyzer from sqlalchemy import select from sqlalchemy.orm import selectinload async def process_reports(): print("=" * 60) print("处理已下载的报告:提取内容 + AI分析") print("=" * 60) await init_db() async with AsyncSessionLocal() as db: # 查找已下载但未提取的报告 stmt = select(Report).where( Report.is_downloaded == True, Report.is_extracted == False ).options(selectinload(Report.company)) result = await db.execute(stmt) reports = result.scalars().all() print(f"\n找到 {len(reports)} 份待处理的报告") for i, report in enumerate(reports, 1): company_name = report.company.short_name if report.company else "未知" print(f"\n[{i}/{len(reports)}] 处理: {company_name} - {report.title[:30]}...") try: # 1. 提取PDF内容 print(" → 提取PDF内容...") contents = await pdf_extractor.extract_and_save(db, report) print(f" ✓ 提取了 {len(contents)} 个章节") # 2. AI分析(如果有提取内容) if contents: print(" → 执行AI分析...") success = await ai_analyzer.analyze_report(db, report) if success: print(" ✓ AI分析完成") else: print(" ✗ AI分析失败") else: print(" ⚠ 无提取内容,跳过AI分析") except Exception as e: print(f" ✗ 处理失败: {e}") print("\n" + "=" * 60) print("处理完成!") print("=" * 60) if __name__ == "__main__": asyncio.run(process_reports())