huibao/backend/test_pdf_url.py

79 lines
2.9 KiB
Python
Raw Normal View History

"""
检查PDF下载链接格式 - 详细版
"""
import asyncio
import httpx
async def check_pdf_url():
print("=" * 60)
print("检查PDF下载链接格式")
print("=" * 60)
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Accept": "application/json, text/javascript, */*; q=0.01",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"Origin": "https://www.cninfo.com.cn",
"Referer": "https://www.cninfo.com.cn/new/disclosure",
}
# 先获取orgId
search_url = "https://www.cninfo.com.cn/new/information/topSearch/query"
search_data = {"keyWord": "300622", "maxNum": 5}
org_id = None
async with httpx.AsyncClient(timeout=30.0, verify=False) as client:
response = await client.post(search_url, data=search_data, headers=headers)
if response.status_code == 200:
results = response.json()
for item in results:
if item.get("code") == "300622":
org_id = item.get("orgId")
break
print(f"orgId: {org_id}")
# 查询公告
api_url = "https://www.cninfo.com.cn/new/hisAnnouncement/query"
form_data = {
"pageNum": "1",
"pageSize": "5",
"column": "szse",
"tabName": "fulltext",
"stock": f"300622,{org_id}",
"category": "category_ndbg_szsh",
"seDate": "",
}
async with httpx.AsyncClient(timeout=30.0, verify=False) as client:
response = await client.post(api_url, data=form_data, headers=headers)
if response.status_code == 200:
data = response.json()
announcements = data.get("announcements", [])
print(f"\n找到 {len(announcements)} 条公告")
for ann in announcements[:2]:
title = ann.get("announcementTitle", "")
adjunct_url = ann.get("adjunctUrl", "")
print(f"\n标题: {title}")
print(f"adjunctUrl原始值: {adjunct_url}")
if adjunct_url:
# 新版下载链接应该使用 static.cninfo.com.cn
new_url = f"https://static.cninfo.com.cn/{adjunct_url}"
print(f"新版链接: {new_url}")
# 测试下载
print("测试下载...")
try:
dl_response = await client.head(new_url, follow_redirects=True)
print(f"状态码: {dl_response.status_code}")
content_type = dl_response.headers.get("content-type", "")
print(f"Content-Type: {content_type}")
except Exception as e:
print(f"测试失败: {e}")
if __name__ == "__main__":
asyncio.run(check_pdf_url())