Bing_image_downloader

Running

App Files Files Community

davidegato1 commited on Feb 18

Commit

5c74dbf

verified ·

1 Parent(s): f702eb1

Update app.py

Browse files

Files changed (1) hide show

app.py +121 -78

app.py CHANGED Viewed

@@ -5,91 +5,134 @@ import requests
 from bs4 import BeautifulSoup
 from urllib.parse import quote
 import urllib.request
-# Fonction pour télécharger des images depuis Bing
-def downloader_images_bing(search_query, limit, adult_filter_off, timeout=20):
-    # Télécharger des images depuis Bing
-    adult_filter = adult_filter_off == "True"
-    downloader.download(
-        search_query,
-        limit=limit,
-        adult_filter_off=adult_filter,
-        force_replace=False,
-        timeout=timeout
-    )
-    # Obtenir les chemins des images téléchargées
-    output_dir = f'dataset/{search_query}'
-    if os.path.exists(output_dir):
-        image_paths = [
-            os.path.join(output_dir, img)
-            for img in os.listdir(output_dir)
-            if os.path.isfile(os.path.join(output_dir, img))
-        ]
-    else:
-        image_paths = []
-    return image_paths
-# Fonction pour télécharger des images depuis Google
-def downloader_images_google(search_query, limit):
-    query = quote(search_query)
-    url = f"https://www.google.com/search?q={query}&tbm=isch"
-    headers = {
-        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
-    }
-    response = requests.get(url, headers=headers)
-    soup = BeautifulSoup(response.text, 'html.parser')
-    # Extraire les URLs des images
-    image_tags = soup.find_all('img')
-    image_urls = [img['src'] for img in image_tags if 'src' in img.attrs]
-    # Limiter le nombre d'images à télécharger
-    image_urls = image_urls[:limit]
-    # Créer un dossier pour stocker les images
-    output_dir = f'dataset/google_{search_query}'
-    os.makedirs(output_dir, exist_ok=True)
-    # Télécharger les images
-    image_paths = []
-    for i, img_url in enumerate(image_urls):
-        try:
-            img_path = os.path.join(output_dir, f'image_{i+1}.jpg')
-            urllib.request.urlretrieve(img_url, img_path)
-            image_paths.append(img_path)
-        except Exception as e:
-            print(f"Erreur lors du téléchargement de l'image {img_url}: {e}")
-    return image_paths
-# Fonction principale qui gère les deux sources d'images
-def download_images(source, search_query, limit, adult_filter_off="True"):
-    if source == "Bing":
-        return downloader_images_bing(search_query, limit, adult_filter_off)
-    elif source == "Google":
-        return downloader_images_google(search_query, limit)
-# Créer une interface Gradio
-interface = gr.Interface(
-    fn=download_images,
-    inputs=[
-        gr.Radio(["Bing", "Google"], label="Source d'images"),
-        gr.Textbox(label="Mot de recherche"),
-        gr.Slider(1, 100, step=5, label="Nombre d'images"),
-        gr.Radio(["True", "False"], label="Mode protégé (Bing uniquement)", value="True")
-    ],
-    outputs=gr.Gallery(label="Images téléchargées"),
-    title="Télécharger des images avec Bing ou Google",
-    description="Sélectionnez la source, entrez votre mot de recherche, choisissez le nombre d'images à télécharger et activez/désactivez le mode protégé pour Bing.",
-    examples=[
-        ["Bing", "chat", 10, "True"],
-        ["Google", "chien", 10, "True"]
-    ]
-)
-# Démarrer l'interface
-interface.launch(share=True)

 from bs4 import BeautifulSoup
 from urllib.parse import quote
 import urllib.request
+import re
+import time
+# Configuration des paramètres
+HEADERS = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
+    'Accept-Language': 'en-US,en;q=0.5'
+}
+# Fonction pour Bing
+def download_bing_images(search_query, limit, adult_filter_off):
+    try:
+        adult_filter = not adult_filter_off
+        downloader.download(
+            search_query,
+            limit=limit,
+            adult_filter_off=adult_filter,
+            force_replace=False,
+            timeout=60,
+            filter_type='photo'
+        )
+        output_dir = os.path.join('dataset', search_query)
+        return get_image_paths(output_dir)
+    except Exception as e:
+        print(f"Erreur Bing : {str(e)}")
+        return []
+# Fonction pour Google (méthode sécurisée)
+def download_google_images(search_query, limit):
+    try:
+        output_dir = os.path.join('dataset', f'google_{search_query}')
+        os.makedirs(output_dir, exist_ok=True)
+        url = f"https://www.google.com/search?q={quote(search_query)}&tbm=isch"
+        response = requests.get(url, headers=HEADERS)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.text, 'html.parser')
+        scripts = soup.find_all('script')
+        image_urls = []
+        pattern = r'\[\"(https?://[^\]\"]*\.(?:jpg|jpeg|png))\"'
+        for script in scripts:
+            if 'AF_initDataCallback' in script.text:
+                matches = re.findall(pattern, script.text)
+                image_urls.extend(matches)
+        image_urls = list(set(image_urls))[:limit]
+        return download_and_save(image_urls, output_dir)
+    except Exception as e:
+        print(f"Erreur Google : {str(e)}")
+        return []
+# Téléchargement et sauvegarde des images
+def download_and_save(urls, output_dir):
+    saved_paths = []
+    for idx, url in enumerate(urls):
+        try:
+            filename = f"image_{idx+1}_{int(time.time())}.jpg"
+            full_path = os.path.join(output_dir, filename)
+            req = urllib.request.Request(url, headers=HEADERS)
+            with urllib.request.urlopen(req, timeout=10) as response:
+                with open(full_path, 'wb') as f:
+                    f.write(response.read())
+                    saved_paths.append(full_path)
+        except Exception as e:
+            print(f"Erreur téléchargement {url} : {str(e)}")
+    return saved_paths
+def get_image_paths(directory):
+    if os.path.exists(directory):
+        return [os.path.join(directory, f) for f in os.listdir(directory)
+                if f.lower().endswith(('png', 'jpg', 'jpeg'))]
+    return []
+# Fonction principale
+def download_handler(source, query, limit, safe_mode):
+    limit = max(1, min(limit, 100))  # Limite entre 1-100
+    try:
+        if source == "Bing":
+            return download_bing_images(query, limit, safe_mode == "Off")
+        elif source == "Google":
+            return download_google_images(query, limit)
+    except Exception as e:
+        print(f"Erreur globale : {str(e)}")
+        return []
+# Interface Gradio améliorée
+with gr.Blocks(theme=gr.themes.Soft(), title="Image Downloader") as app:
+    gr.Markdown("# 📸 Téléchargeur d'Images Multi-Sources")
+    gr.Markdown("Téléchargez des images depuis Bing ou Google (max 100)")
+    with gr.Row():
+        source = gr.Radio(["Bing", "Google"], label="Source", value="Bing")
+        query = gr.Textbox(label="Recherche", placeholder="Entrez votre recherche...")
+        limit = gr.Slider(1, 100, value=20, step=1, label="Nombre d'images")
+        safe_mode = gr.Radio(["On", "Off"], label="Filtre de sécurité (Bing)", value="On")
+    submit_btn = gr.Button("🚀 Lancer le téléchargement", variant="primary")
+    gallery = gr.Gallery(
+        label="Résultats",
+        columns=5,
+        object_fit="contain",
+        height="auto"
+    )
+    status = gr.Textbox(label="Statut", interactive=False)
+    submit_btn.click(
+        fn=download_handler,
+        inputs=[source, query, limit, safe_mode],
+        outputs=[gallery, status],
+        api_name="download"
+    )
+    gr.Examples(
+        examples=[
+            ["Bing", "chatons mignons", 10, "On"],
+            ["Google", "paysages montagneux", 15, "On"]
+        ],
+        inputs=[source, query, limit, safe_mode]
+    )
+if __name__ == "__main__":
+    app.launch(server_port=7860, show_error=True)