huibao/backend/cleanup_and_resync.py

67 lines
2.3 KiB
Python

"""
清理旧的空记录并重新同步
"""
import asyncio
import sys
sys.path.insert(0, ".")
from app.database import AsyncSessionLocal, init_db
from app.models import Company, Report, ExtractedContent, AnalysisResult
from app.services.cninfo_crawler import cninfo_service
from sqlalchemy import select, delete
async def cleanup_and_resync():
print("=" * 60)
print("清理旧记录并重新同步")
print("=" * 60)
await init_db()
async with AsyncSessionLocal() as db:
# 1. 删除所有未成功下载的报告记录
print("\n1. 清理未下载成功的报告记录...")
# 先删除关联的提取内容和分析结果
stmt = select(Report).where(Report.is_downloaded == False)
result = await db.execute(stmt)
failed_reports = result.scalars().all()
print(f" 找到 {len(failed_reports)} 条未下载的记录")
for report in failed_reports:
# 删除关联的提取内容
await db.execute(delete(ExtractedContent).where(ExtractedContent.report_id == report.id))
# 删除关联的分析结果
await db.execute(delete(AnalysisResult).where(AnalysisResult.report_id == report.id))
# 删除报告
await db.delete(report)
await db.commit()
print(f" ✓ 已清理 {len(failed_reports)} 条空记录")
# 2. 重新同步所有公司
print("\n2. 开始重新同步所有公司的报告...")
stmt = select(Company).where(Company.is_active == True)
result = await db.execute(stmt)
companies = result.scalars().all()
print(f"{len(companies)} 家公司需要同步")
total_new = 0
for company in companies:
print(f"\n 同步: {company.stock_code} {company.short_name}")
try:
new_count = await cninfo_service.sync_company_reports(db, company)
total_new += new_count
print(f" ✓ 新增 {new_count} 份报告")
except Exception as e:
print(f" ✗ 同步失败: {e}")
print("\n" + "=" * 60)
print(f"同步完成!共新增 {total_new} 份报告")
print("=" * 60)
if __name__ == "__main__":
asyncio.run(cleanup_and_resync())