geo/backend/test_bing.py

22 lines
839 B
Python

import httpx
import re
from urllib.parse import quote
url = 'https://www.bing.com/search?q=' + quote('华为手机推荐') + '&setmkt=zh-CN'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
with httpx.Client(timeout=30, follow_redirects=True) as client:
resp = client.get(url, headers=headers)
html = resp.text
print('Status:', resp.status_code)
print('Size:', len(html))
print('First 500 chars:', html[:500])
# Try to find result titles
titles = re.findall(r'<a[^>]*href="https?://[^"]*"[^>]*>(.*?)</a>', html, re.DOTALL)
print('\nPotential titles:', len(titles))
for t in titles[:10]:
clean = re.sub(r'<[^>]+>', '', t).strip()
if clean and len(clean) > 5 and '微软' not in clean and 'Bing' not in clean:
print(' -', clean[:80])