sreepathi-ravikumar commited on
Commit
8b0cadb
·
verified ·
1 Parent(s): 77b90e5

Create image_fetcher.py

Browse files
Files changed (1) hide show
  1. image_fetcher.py +71 -0
image_fetcher.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import asyncio
3
+ import aiohttp
4
+ import random
5
+ from PIL import Image
6
+ from io import BytesIO
7
+ from duckduckgo_search import DDGS
8
+ import time
9
+
10
+ # Setup /tmp directory for Hugging Face
11
+ IMAGE_DIR = "/tmp/images"
12
+ os.makedirs(IMAGE_DIR, exist_ok=True)
13
+
14
+ # Headers
15
+ def get_headers():
16
+ user_agents = [
17
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
18
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)",
19
+ "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:90.0)"
20
+ ]
21
+ return {"User-Agent": random.choice(user_agents)}
22
+
23
+ # Image validation
24
+ def is_valid_image(image):
25
+ width, height = image.size
26
+ aspect_ratio = round(width / height, 2)
27
+ return width >= 854 and height >= 480 and abs(aspect_ratio - (16 / 9)) <= 0.2
28
+
29
+ # Async fetch image
30
+ async def fetch_image(session, url, name):
31
+ try:
32
+ async with session.get(url, timeout=10) as response:
33
+ content = await response.read()
34
+ image = Image.open(BytesIO(content)).convert("RGB")
35
+
36
+ if not is_valid_image(image):
37
+ return f"Skipped (invalid): {name}"
38
+
39
+ unique_name = f"{name}_{int(time.time() * 1000)}.jpg"
40
+ path = os.path.join(IMAGE_DIR, unique_name)
41
+ image.save(path)
42
+ return f"Saved: {unique_name}"
43
+
44
+ except Exception as e:
45
+ return f"Error: {name} | {e}"
46
+
47
+ # Async search + download with DDGS inside thread
48
+ async def search_and_download(session, prompt, sem):
49
+ async with sem:
50
+ name = prompt.replace(" ", "_").lower()
51
+ try:
52
+ loop = asyncio.get_event_loop()
53
+ results = await loop.run_in_executor(None, lambda: list(DDGS().images(prompt, max_results=15)))
54
+ for item in results:
55
+ url = item.get("image")
56
+ result = await fetch_image(session, url, name)
57
+ if "Saved" in result:
58
+ return result
59
+ return f"No valid image for: {prompt}"
60
+ except Exception as e:
61
+ return f"Search failed for {prompt}: {e}"
62
+
63
+ # Main runner
64
+ async def main(prompts):
65
+ sem = asyncio.Semaphore(5) # Limit concurrency
66
+ async with aiohttp.ClientSession(headers=get_headers()) as session:
67
+ tasks = [search_and_download(session, prompt, sem) for prompt in prompts]
68
+ results = await asyncio.gather(*tasks)
69
+ for res in results:
70
+ print(res)
71
+