64 lines
2.2 KiB
Python
64 lines
2.2 KiB
Python
"""
|
||
处理已下载的报告:提取内容 + AI分析
|
||
"""
|
||
import asyncio
|
||
import sys
|
||
sys.path.insert(0, ".")
|
||
|
||
from app.database import AsyncSessionLocal, init_db
|
||
from app.models import Report
|
||
from app.services.pdf_extractor import pdf_extractor
|
||
from app.services.ai_analyzer import ai_analyzer
|
||
from sqlalchemy import select
|
||
from sqlalchemy.orm import selectinload
|
||
|
||
async def process_reports():
|
||
print("=" * 60)
|
||
print("处理已下载的报告:提取内容 + AI分析")
|
||
print("=" * 60)
|
||
|
||
await init_db()
|
||
|
||
async with AsyncSessionLocal() as db:
|
||
# 查找已下载但未提取的报告
|
||
stmt = select(Report).where(
|
||
Report.is_downloaded == True,
|
||
Report.is_extracted == False
|
||
).options(selectinload(Report.company))
|
||
|
||
result = await db.execute(stmt)
|
||
reports = result.scalars().all()
|
||
|
||
print(f"\n找到 {len(reports)} 份待处理的报告")
|
||
|
||
for i, report in enumerate(reports, 1):
|
||
company_name = report.company.short_name if report.company else "未知"
|
||
print(f"\n[{i}/{len(reports)}] 处理: {company_name} - {report.title[:30]}...")
|
||
|
||
try:
|
||
# 1. 提取PDF内容
|
||
print(" → 提取PDF内容...")
|
||
contents = await pdf_extractor.extract_and_save(db, report)
|
||
print(f" ✓ 提取了 {len(contents)} 个章节")
|
||
|
||
# 2. AI分析(如果有提取内容)
|
||
if contents:
|
||
print(" → 执行AI分析...")
|
||
success = await ai_analyzer.analyze_report(db, report)
|
||
if success:
|
||
print(" ✓ AI分析完成")
|
||
else:
|
||
print(" ✗ AI分析失败")
|
||
else:
|
||
print(" ⚠ 无提取内容,跳过AI分析")
|
||
|
||
except Exception as e:
|
||
print(f" ✗ 处理失败: {e}")
|
||
|
||
print("\n" + "=" * 60)
|
||
print("处理完成!")
|
||
print("=" * 60)
|
||
|
||
if __name__ == "__main__":
|
||
asyncio.run(process_reports())
|