""" 清理旧的空记录并重新同步 """ import asyncio import sys sys.path.insert(0, ".") from app.database import AsyncSessionLocal, init_db from app.models import Company, Report, ExtractedContent, AnalysisResult from app.services.cninfo_crawler import cninfo_service from sqlalchemy import select, delete async def cleanup_and_resync(): print("=" * 60) print("清理旧记录并重新同步") print("=" * 60) await init_db() async with AsyncSessionLocal() as db: # 1. 删除所有未成功下载的报告记录 print("\n1. 清理未下载成功的报告记录...") # 先删除关联的提取内容和分析结果 stmt = select(Report).where(Report.is_downloaded == False) result = await db.execute(stmt) failed_reports = result.scalars().all() print(f" 找到 {len(failed_reports)} 条未下载的记录") for report in failed_reports: # 删除关联的提取内容 await db.execute(delete(ExtractedContent).where(ExtractedContent.report_id == report.id)) # 删除关联的分析结果 await db.execute(delete(AnalysisResult).where(AnalysisResult.report_id == report.id)) # 删除报告 await db.delete(report) await db.commit() print(f" ✓ 已清理 {len(failed_reports)} 条空记录") # 2. 重新同步所有公司 print("\n2. 开始重新同步所有公司的报告...") stmt = select(Company).where(Company.is_active == True) result = await db.execute(stmt) companies = result.scalars().all() print(f" 共 {len(companies)} 家公司需要同步") total_new = 0 for company in companies: print(f"\n 同步: {company.stock_code} {company.short_name}") try: new_count = await cninfo_service.sync_company_reports(db, company) total_new += new_count print(f" ✓ 新增 {new_count} 份报告") except Exception as e: print(f" ✗ 同步失败: {e}") print("\n" + "=" * 60) print(f"同步完成!共新增 {total_new} 份报告") print("=" * 60) if __name__ == "__main__": asyncio.run(cleanup_and_resync())