huibao/backend/test_pdf_url.py

"""
检查PDF下载链接格式 - 详细版
"""
import asyncio
import httpx

async def check_pdf_url():
    print("=" * 60)
    print("检查PDF下载链接格式")
    print("=" * 60)
    
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
        "Accept": "application/json, text/javascript, */*; q=0.01",
        "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
        "Origin": "https://www.cninfo.com.cn",
        "Referer": "https://www.cninfo.com.cn/new/disclosure",
    }
    
    # 先获取orgId
    search_url = "https://www.cninfo.com.cn/new/information/topSearch/query"
    search_data = {"keyWord": "300622", "maxNum": 5}
    
    org_id = None
    async with httpx.AsyncClient(timeout=30.0, verify=False) as client:
        response = await client.post(search_url, data=search_data, headers=headers)
        if response.status_code == 200:
            results = response.json()
            for item in results:
                if item.get("code") == "300622":
                    org_id = item.get("orgId")
                    break
    
    print(f"orgId: {org_id}")
    
    # 查询公告
    api_url = "https://www.cninfo.com.cn/new/hisAnnouncement/query"
    form_data = {
        "pageNum": "1",
        "pageSize": "5",
        "column": "szse",
        "tabName": "fulltext",
        "stock": f"300622,{org_id}",
        "category": "category_ndbg_szsh",
        "seDate": "",
    }
    
    async with httpx.AsyncClient(timeout=30.0, verify=False) as client:
        response = await client.post(api_url, data=form_data, headers=headers)
        if response.status_code == 200:
            data = response.json()
            announcements = data.get("announcements", [])
            
            print(f"\n找到 {len(announcements)} 条公告")
            
            for ann in announcements[:2]:
                title = ann.get("announcementTitle", "")
                adjunct_url = ann.get("adjunctUrl", "")
                print(f"\n标题: {title}")
                print(f"adjunctUrl原始值: {adjunct_url}")
                
                if adjunct_url:
                    # 新版下载链接应该使用 static.cninfo.com.cn
                    new_url = f"https://static.cninfo.com.cn/{adjunct_url}"
                    print(f"新版链接: {new_url}")
                    
                    # 测试下载
                    print("测试下载...")
                    try:
                        dl_response = await client.head(new_url, follow_redirects=True)
                        print(f"状态码: {dl_response.status_code}")
                        content_type = dl_response.headers.get("content-type", "")
                        print(f"Content-Type: {content_type}")
                    except Exception as e:
                        print(f"测试失败: {e}")

if __name__ == "__main__":
    asyncio.run(check_pdf_url())
feat: Implement core backend application, import financial report data, and add UI/UX assets. 2026-01-23 00:10:21 +08:00			`"""`
			`检查PDF下载链接格式 - 详细版`
			`"""`
			`import asyncio`
			`import httpx`

			`async def check_pdf_url():`
			`print("=" * 60)`
			`print("检查PDF下载链接格式")`
			`print("=" * 60)`

			`headers = {`
			`"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",`
			`"Accept": "application/json, text/javascript, /; q=0.01",`
			`"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",`
			`"Origin": "https://www.cninfo.com.cn",`
			`"Referer": "https://www.cninfo.com.cn/new/disclosure",`
			`}`

			`# 先获取orgId`
			`search_url = "https://www.cninfo.com.cn/new/information/topSearch/query"`
			`search_data = {"keyWord": "300622", "maxNum": 5}`

			`org_id = None`
			`async with httpx.AsyncClient(timeout=30.0, verify=False) as client:`
			`response = await client.post(search_url, data=search_data, headers=headers)`
			`if response.status_code == 200:`
			`results = response.json()`
			`for item in results:`
			`if item.get("code") == "300622":`
			`org_id = item.get("orgId")`
			`break`

			`print(f"orgId: {org_id}")`

			`# 查询公告`
			`api_url = "https://www.cninfo.com.cn/new/hisAnnouncement/query"`
			`form_data = {`
			`"pageNum": "1",`
			`"pageSize": "5",`
			`"column": "szse",`
			`"tabName": "fulltext",`
			`"stock": f"300622,{org_id}",`
			`"category": "category_ndbg_szsh",`
			`"seDate": "",`
			`}`

			`async with httpx.AsyncClient(timeout=30.0, verify=False) as client:`
			`response = await client.post(api_url, data=form_data, headers=headers)`
			`if response.status_code == 200:`
			`data = response.json()`
			`announcements = data.get("announcements", [])`

			`print(f"\n找到 {len(announcements)} 条公告")`

			`for ann in announcements[:2]:`
			`title = ann.get("announcementTitle", "")`
			`adjunct_url = ann.get("adjunctUrl", "")`
			`print(f"\n标题: {title}")`
			`print(f"adjunctUrl原始值: {adjunct_url}")`

			`if adjunct_url:`
			`# 新版下载链接应该使用 static.cninfo.com.cn`
			`new_url = f"https://static.cninfo.com.cn/{adjunct_url}"`
			`print(f"新版链接: {new_url}")`

			`# 测试下载`
			`print("测试下载...")`
			`try:`
			`dl_response = await client.head(new_url, follow_redirects=True)`
			`print(f"状态码: {dl_response.status_code}")`
			`content_type = dl_response.headers.get("content-type", "")`
			`print(f"Content-Type: {content_type}")`
			`except Exception as e:`
			`print(f"测试失败: {e}")`

			`if __name__ == "__main__":`
			`asyncio.run(check_pdf_url())`