64 lines
2.2 KiB
Python
64 lines
2.2 KiB
Python
|
|
"""
|
|||
|
|
处理已下载的报告:提取内容 + AI分析
|
|||
|
|
"""
|
|||
|
|
import asyncio
|
|||
|
|
import sys
|
|||
|
|
sys.path.insert(0, ".")
|
|||
|
|
|
|||
|
|
from app.database import AsyncSessionLocal, init_db
|
|||
|
|
from app.models import Report
|
|||
|
|
from app.services.pdf_extractor import pdf_extractor
|
|||
|
|
from app.services.ai_analyzer import ai_analyzer
|
|||
|
|
from sqlalchemy import select
|
|||
|
|
from sqlalchemy.orm import selectinload
|
|||
|
|
|
|||
|
|
async def process_reports():
|
|||
|
|
print("=" * 60)
|
|||
|
|
print("处理已下载的报告:提取内容 + AI分析")
|
|||
|
|
print("=" * 60)
|
|||
|
|
|
|||
|
|
await init_db()
|
|||
|
|
|
|||
|
|
async with AsyncSessionLocal() as db:
|
|||
|
|
# 查找已下载但未提取的报告
|
|||
|
|
stmt = select(Report).where(
|
|||
|
|
Report.is_downloaded == True,
|
|||
|
|
Report.is_extracted == False
|
|||
|
|
).options(selectinload(Report.company))
|
|||
|
|
|
|||
|
|
result = await db.execute(stmt)
|
|||
|
|
reports = result.scalars().all()
|
|||
|
|
|
|||
|
|
print(f"\n找到 {len(reports)} 份待处理的报告")
|
|||
|
|
|
|||
|
|
for i, report in enumerate(reports, 1):
|
|||
|
|
company_name = report.company.short_name if report.company else "未知"
|
|||
|
|
print(f"\n[{i}/{len(reports)}] 处理: {company_name} - {report.title[:30]}...")
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# 1. 提取PDF内容
|
|||
|
|
print(" → 提取PDF内容...")
|
|||
|
|
contents = await pdf_extractor.extract_and_save(db, report)
|
|||
|
|
print(f" ✓ 提取了 {len(contents)} 个章节")
|
|||
|
|
|
|||
|
|
# 2. AI分析(如果有提取内容)
|
|||
|
|
if contents:
|
|||
|
|
print(" → 执行AI分析...")
|
|||
|
|
success = await ai_analyzer.analyze_report(db, report)
|
|||
|
|
if success:
|
|||
|
|
print(" ✓ AI分析完成")
|
|||
|
|
else:
|
|||
|
|
print(" ✗ AI分析失败")
|
|||
|
|
else:
|
|||
|
|
print(" ⚠ 无提取内容,跳过AI分析")
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" ✗ 处理失败: {e}")
|
|||
|
|
|
|||
|
|
print("\n" + "=" * 60)
|
|||
|
|
print("处理完成!")
|
|||
|
|
print("=" * 60)
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
asyncio.run(process_reports())
|