Spaces:
Sleeping
Sleeping
from flask import Flask, jsonify, request | |
import requests | |
from bs4 import BeautifulSoup | |
import os | |
import re | |
import urllib.parse | |
import time | |
import random | |
import base64 | |
from io import BytesIO | |
app = Flask(__name__) | |
def search_images(query, num_images=5): | |
# Headers to mimic a browser request | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', | |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', | |
'Accept-Language': 'en-US,en;q=0.5', | |
'Accept-Encoding': 'gzip, deflate', | |
'DNT': '1', | |
'Connection': 'keep-alive', | |
} | |
# Format the query for URL | |
formatted_query = urllib.parse.quote(query) | |
# Google Images URL | |
url = f"https://www.google.com/search?q={formatted_query}&tbm=isch&safe=active" | |
try: | |
# Get the HTML content | |
response = requests.get(url, headers=headers, timeout=30) | |
response.raise_for_status() | |
# Find all image URLs using regex | |
image_urls = re.findall(r'https?://[^"\']*?(?:jpg|jpeg|png|gif)', response.text) | |
# Remove duplicates while preserving order | |
image_urls = list(dict.fromkeys(image_urls)) | |
# Store results | |
results = [] | |
downloaded = 0 | |
for img_url in image_urls: | |
if downloaded >= num_images: | |
break | |
try: | |
# Skip small thumbnails and icons | |
if 'gstatic.com' in img_url or 'google.com' in img_url: | |
continue | |
# Download image | |
img_response = requests.get(img_url, headers=headers, timeout=10) | |
img_response.raise_for_status() | |
# Check if the response is actually an image | |
content_type = img_response.headers.get('Content-Type', '') | |
if not content_type.startswith('image/'): | |
continue | |
# Convert image to base64 | |
image_base64 = base64.b64encode(img_response.content).decode('utf-8') | |
# Add to results | |
results.append({ | |
'image_url': img_url, | |
'base64_data': f"data:{content_type};base64,{image_base64}" | |
}) | |
downloaded += 1 | |
# Add a random delay between downloads | |
time.sleep(random.uniform(0.5, 1)) | |
except Exception as e: | |
print(f"Error downloading image: {str(e)}") | |
continue | |
return results | |
except Exception as e: | |
print(f"An error occurred: {str(e)}") | |
return [] | |
def api_search_images(): | |
try: | |
# Get query parameters | |
query = request.args.get('query', '') | |
num_images = int(request.args.get('num_images', 5)) | |
if not query: | |
return jsonify({'error': 'Query parameter is required'}), 400 | |
if num_images < 1 or num_images > 20: | |
return jsonify({'error': 'Number of images must be between 1 and 20'}), 400 | |
# Search for images | |
results = search_images(query, num_images) | |
return jsonify({ | |
'success': True, | |
'query': query, | |
'results': results | |
}) | |
except Exception as e: | |
return jsonify({ | |
'success': False, | |
'error': str(e) | |
}), 500 | |
if __name__ == '__main__': | |
app.run(host='0.0.0.0', port=5000) | |