79 lines
2.9 KiB
Python
79 lines
2.9 KiB
Python
|
|
"""
|
||
|
|
检查PDF下载链接格式 - 详细版
|
||
|
|
"""
|
||
|
|
import asyncio
|
||
|
|
import httpx
|
||
|
|
|
||
|
|
async def check_pdf_url():
|
||
|
|
print("=" * 60)
|
||
|
|
print("检查PDF下载链接格式")
|
||
|
|
print("=" * 60)
|
||
|
|
|
||
|
|
headers = {
|
||
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
||
|
|
"Accept": "application/json, text/javascript, */*; q=0.01",
|
||
|
|
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
|
||
|
|
"Origin": "https://www.cninfo.com.cn",
|
||
|
|
"Referer": "https://www.cninfo.com.cn/new/disclosure",
|
||
|
|
}
|
||
|
|
|
||
|
|
# 先获取orgId
|
||
|
|
search_url = "https://www.cninfo.com.cn/new/information/topSearch/query"
|
||
|
|
search_data = {"keyWord": "300622", "maxNum": 5}
|
||
|
|
|
||
|
|
org_id = None
|
||
|
|
async with httpx.AsyncClient(timeout=30.0, verify=False) as client:
|
||
|
|
response = await client.post(search_url, data=search_data, headers=headers)
|
||
|
|
if response.status_code == 200:
|
||
|
|
results = response.json()
|
||
|
|
for item in results:
|
||
|
|
if item.get("code") == "300622":
|
||
|
|
org_id = item.get("orgId")
|
||
|
|
break
|
||
|
|
|
||
|
|
print(f"orgId: {org_id}")
|
||
|
|
|
||
|
|
# 查询公告
|
||
|
|
api_url = "https://www.cninfo.com.cn/new/hisAnnouncement/query"
|
||
|
|
form_data = {
|
||
|
|
"pageNum": "1",
|
||
|
|
"pageSize": "5",
|
||
|
|
"column": "szse",
|
||
|
|
"tabName": "fulltext",
|
||
|
|
"stock": f"300622,{org_id}",
|
||
|
|
"category": "category_ndbg_szsh",
|
||
|
|
"seDate": "",
|
||
|
|
}
|
||
|
|
|
||
|
|
async with httpx.AsyncClient(timeout=30.0, verify=False) as client:
|
||
|
|
response = await client.post(api_url, data=form_data, headers=headers)
|
||
|
|
if response.status_code == 200:
|
||
|
|
data = response.json()
|
||
|
|
announcements = data.get("announcements", [])
|
||
|
|
|
||
|
|
print(f"\n找到 {len(announcements)} 条公告")
|
||
|
|
|
||
|
|
for ann in announcements[:2]:
|
||
|
|
title = ann.get("announcementTitle", "")
|
||
|
|
adjunct_url = ann.get("adjunctUrl", "")
|
||
|
|
print(f"\n标题: {title}")
|
||
|
|
print(f"adjunctUrl原始值: {adjunct_url}")
|
||
|
|
|
||
|
|
if adjunct_url:
|
||
|
|
# 新版下载链接应该使用 static.cninfo.com.cn
|
||
|
|
new_url = f"https://static.cninfo.com.cn/{adjunct_url}"
|
||
|
|
print(f"新版链接: {new_url}")
|
||
|
|
|
||
|
|
# 测试下载
|
||
|
|
print("测试下载...")
|
||
|
|
try:
|
||
|
|
dl_response = await client.head(new_url, follow_redirects=True)
|
||
|
|
print(f"状态码: {dl_response.status_code}")
|
||
|
|
content_type = dl_response.headers.get("content-type", "")
|
||
|
|
print(f"Content-Type: {content_type}")
|
||
|
|
except Exception as e:
|
||
|
|
print(f"测试失败: {e}")
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
asyncio.run(check_pdf_url())
|