Spaces:
Running
Running
File size: 5,921 Bytes
2dd1349 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from chromedriver_py import binary_path
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import requests
from selenium.webdriver.common.keys import Keys
from google_images_download import google_images_download
from bing_image_downloader import downloader
import os
import shutil
class GoogleSearch:
@classmethod
def _init_driver(cls):
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
service = Service(binary_path)
driver = webdriver.Chrome(service=service, options=chrome_options)
wait = WebDriverWait(driver, 100)
return driver, wait
@classmethod
def search_result(cls, question: str, screenshot_path: str = "screenshot.png") -> str:
"""Search a question on Google and return a screenshot of the search result."""
driver, wait = cls._init_driver()
if not question:
raise ValueError("Please provide a question")
# Perform Google search
search_url = f"https://www.google.com/search?q={question}"
driver.get(search_url)
# Give some time for the page to load
time.sleep(3)
# Take a screenshot
driver.save_screenshot(screenshot_path)
driver.quit()
return screenshot_path
@classmethod
def search_image_org(cls, query: str, download_path: str = 'top_image.png') -> str:
"""Search for an image on Google and download the top result."""
driver, wait = cls._init_driver()
if not query:
raise ValueError("Please provide a query")
# Perform Google image search
search_url = f"https://www.google.com/search?tbm=isch&q={query}"
driver.get(search_url)
# Find all image elements
image_elements = driver.find_elements(By.CSS_SELECTOR, "img")
# Filter out Google icon images and get the first valid image URL
image_url = None
for img in image_elements:
src = img.get_attribute("src")
if src and "googlelogo" not in src:
image_url = src
try:
response = requests.get(image_url)
with open(download_path, 'wb') as file:
file.write(response.content)
driver.quit()
print(image_url)
return download_path
except Exception:
print("Error downloading image, skipping.")
continue
driver.quit()
raise Exception("No valid image found")
@classmethod
def search_image_prev(cls, query, output_dir='./downloads', limit=10):
# Download images using Bing Image Downloader
downloader.download(query, limit=limit, output_dir=output_dir, adult_filter_off=True, force_replace=False, timeout=60)
# List the files in the output directory
image_dir = os.path.join(output_dir, query)
if not os.path.exists(image_dir):
raise FileNotFoundError(f"No images found for query '{query}' in directory '{output_dir}'")
# Collect all image paths
image_paths = [os.path.join(image_dir, file) for file in os.listdir(image_dir) if file.endswith(('jpg', 'jpeg', 'png'))]
# Return the first image
return image_paths[0]
@classmethod
def search_image_prev(cls, query, output_dir='./downloads', limit=10):
# Download images using Bing Image Downloader
downloader.download(query, limit=limit, output_dir=output_dir, adult_filter_off=True, force_replace=False, timeout=60)
# List the files in the output directory
image_dir = os.path.join(output_dir, query)
if not os.path.exists(image_dir):
raise FileNotFoundError(f"No images found for query '{query}' in directory '{output_dir}'")
# Collect all image paths
image_paths = [os.path.join(image_dir, file) for file in os.listdir(image_dir) if file.endswith(('jpg', 'jpeg', 'png'))]
# Return the first image
return image_paths[0]
@classmethod
def search_image(cls, query, save_path):
"""
Search for an image based on the query and save the result to the specified path.
Args:
query (str): The query to search for.
save_path (str): The path to save the downloaded image.
Returns:
str: The path where the image was saved.
"""
# Create a temporary directory for storing downloaded images
temp_dir = "./temp_download"
os.makedirs(temp_dir, exist_ok=True)
# Download only the top image result
downloader.download(query, limit=1, output_dir=temp_dir, adult_filter_off=True, force_replace=True, timeout=60)
# Construct the expected directory and image path
image_dir = os.path.join(temp_dir, query)
image_files = [file for file in os.listdir(image_dir) if file.endswith(('jpg', 'jpeg', 'png'))]
# Check if any image files were downloaded
if not image_files:
raise FileNotFoundError(f"No images found for query '{query}'.")
# Copy the top image to the desired save path
top_image_path = os.path.join(image_dir, image_files[0])
shutil.move(top_image_path, save_path)
# Clean up temporary directory
shutil.rmtree(temp_dir)
return save_path |