""" 清理旧数据 + 补充缺失的AI总结 """ import asyncio import sys sys.path.insert(0, ".") from app.database import AsyncSessionLocal, init_db from app.models import Report, AnalysisResult, ExtractedContent from app.services.ai_analyzer import ai_analyzer from sqlalchemy import select, delete from sqlalchemy.orm import selectinload async def cleanup_and_fix(): print("=" * 60) print("数据清理和修复") print("=" * 60) await init_db() async with AsyncSessionLocal() as db: # 1. 删除2024年之前的报告(保留2024和2025年) print("\n1. 清理2024年之前的旧数据...") stmt = select(Report).where(Report.report_year < 2024) result = await db.execute(stmt) old_reports = result.scalars().all() print(f" 找到 {len(old_reports)} 份2024年前的报告") for report in old_reports: # 删除关联数据 await db.execute(delete(ExtractedContent).where(ExtractedContent.report_id == report.id)) await db.execute(delete(AnalysisResult).where(AnalysisResult.report_id == report.id)) await db.delete(report) await db.commit() print(f" ✓ 已删除 {len(old_reports)} 份旧报告") # 2. 为缺失AI总结的报告补充汇总 print("\n2. 为缺失AI总结的报告生成汇总...") stmt = select(Report).where( Report.is_extracted == True, Report.report_year >= 2024 ).options( selectinload(Report.company), selectinload(Report.analysis_results) ) result = await db.execute(stmt) reports = result.scalars().all() missing_summary_count = 0 fixed_count = 0 for report in reports: # 检查是否缺少summary has_summary = any(r.analysis_type == "summary" for r in report.analysis_results) section_analyses = [r for r in report.analysis_results if r.analysis_type == "section"] if not has_summary and section_analyses: missing_summary_count += 1 print(f"\n [{missing_summary_count}] 修复: {report.company.short_name} - {report.title[:20]}...") # 生成汇总 valid_results = [{ "section_name": s.section_name, "analysis": s.summary, "success": True } for s in section_analyses] company_name = report.company.short_name or report.company.company_name summary = await ai_analyzer.summarize_analyses(valid_results, company_name, report.title) if summary.get("success"): final_analysis = AnalysisResult( report_id=report.id, analysis_type="summary", section_name="综合分析", ai_model=ai_analyzer.model, summary=summary["summary"], token_count=summary.get("tokens", 0), is_final=True ) db.add(final_analysis) await db.commit() fixed_count += 1 print(f" ✓ 已生成AI总结") else: print(f" ✗ 生成失败") print(f"\n修复完成: 共 {fixed_count}/{missing_summary_count} 份报告已补充AI总结") # 3. 统计当前数据 print("\n3. 当前数据统计:") stmt = select(Report).where(Report.report_year >= 2024) result = await db.execute(stmt) current_reports = result.scalars().all() print(f" 2024年及以后的报告: {len(current_reports)} 份") if __name__ == "__main__": asyncio.run(cleanup_and_fix())