huibao/backend/process_reports.py

64 lines
2.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
处理已下载的报告:提取内容 + AI分析
"""
import asyncio
import sys
sys.path.insert(0, ".")
from app.database import AsyncSessionLocal, init_db
from app.models import Report
from app.services.pdf_extractor import pdf_extractor
from app.services.ai_analyzer import ai_analyzer
from sqlalchemy import select
from sqlalchemy.orm import selectinload
async def process_reports():
print("=" * 60)
print("处理已下载的报告:提取内容 + AI分析")
print("=" * 60)
await init_db()
async with AsyncSessionLocal() as db:
# 查找已下载但未提取的报告
stmt = select(Report).where(
Report.is_downloaded == True,
Report.is_extracted == False
).options(selectinload(Report.company))
result = await db.execute(stmt)
reports = result.scalars().all()
print(f"\n找到 {len(reports)} 份待处理的报告")
for i, report in enumerate(reports, 1):
company_name = report.company.short_name if report.company else "未知"
print(f"\n[{i}/{len(reports)}] 处理: {company_name} - {report.title[:30]}...")
try:
# 1. 提取PDF内容
print(" → 提取PDF内容...")
contents = await pdf_extractor.extract_and_save(db, report)
print(f" ✓ 提取了 {len(contents)} 个章节")
# 2. AI分析如果有提取内容
if contents:
print(" → 执行AI分析...")
success = await ai_analyzer.analyze_report(db, report)
if success:
print(" ✓ AI分析完成")
else:
print(" ✗ AI分析失败")
else:
print(" ⚠ 无提取内容跳过AI分析")
except Exception as e:
print(f" ✗ 处理失败: {e}")
print("\n" + "=" * 60)
print("处理完成!")
print("=" * 60)
if __name__ == "__main__":
asyncio.run(process_reports())