1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
| import requests from bs4 import BeautifulSoup import os from PIL import Image from io import BytesIO import re import json from urllib3.util.retry import Retry from requests.adapters import HTTPAdapter
os.makedirs('images', exist_ok=True)
with open('insects.txt', 'r', encoding='utf-8') as file: lines = file.readlines()
retry_strategy = Retry( total=3, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504], allowed_methods=["HEAD", "GET", "OPTIONS"] ) adapter = HTTPAdapter(max_retries=retry_strategy) http = requests.Session() http.mount("https://", adapter) http.mount("http://", adapter)
for line in lines: chinese_name = ''.join(filter(lambda x: u'\u4e00' <= x <= u'\u9fff', line)) search_url = f"https://cn.bing.com/images/async?q={chinese_name}" try: response = http.get(search_url) response.raise_for_status() except requests.exceptions.RequestException as e: print(f"Failed to fetch search results for {chinese_name}: {e}") continue
soup = BeautifulSoup(response.text, 'html.parser')
image_tags = soup.find_all('a', {'class': 'iusc'}, limit=3) image_urls = []
for tag in image_tags: try: m_data = json.loads(tag['m']) image_url = m_data.get('murl') if image_url and not re.match(r'^data:', image_url): image_urls.append(image_url) except (KeyError, json.JSONDecodeError): continue
if image_urls: best_image_url = None best_image_pixels = 0
for selected_image_url in image_urls: try: img_response = http.get(selected_image_url) img_response.raise_for_status() except requests.exceptions.RequestException as e: print(f"Failed to fetch image from URL: {selected_image_url}: {e}") continue try: img = Image.open(BytesIO(img_response.content)) if img.mode == 'RGBA': img = img.convert('RGB')
img_pixels = img.width * img.height
if img_pixels > best_image_pixels: best_image_url = selected_image_url best_image_pixels = img_pixels best_img = img except IOError: print(f"Failed to open image from URL: {selected_image_url}") continue if best_image_url: img_name = line.strip() + '.jpg' best_img.save(os.path.join('images', img_name)) print(f"Saved {img_name}") else: print(f"No valid image URL found for {chinese_name}") else: print(f"No valid image URL found for {chinese_name}")
|