22 lines
839 B
Python
22 lines
839 B
Python
import httpx
|
|
import re
|
|
from urllib.parse import quote
|
|
|
|
url = 'https://www.bing.com/search?q=' + quote('华为手机推荐') + '&setmkt=zh-CN'
|
|
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
|
|
|
|
with httpx.Client(timeout=30, follow_redirects=True) as client:
|
|
resp = client.get(url, headers=headers)
|
|
html = resp.text
|
|
print('Status:', resp.status_code)
|
|
print('Size:', len(html))
|
|
print('First 500 chars:', html[:500])
|
|
|
|
# Try to find result titles
|
|
titles = re.findall(r'<a[^>]*href="https?://[^"]*"[^>]*>(.*?)</a>', html, re.DOTALL)
|
|
print('\nPotential titles:', len(titles))
|
|
for t in titles[:10]:
|
|
clean = re.sub(r'<[^>]+>', '', t).strip()
|
|
if clean and len(clean) > 5 and '微软' not in clean and 'Bing' not in clean:
|
|
print(' -', clean[:80])
|