davidegato1 commited on
Commit
5c74dbf
·
verified ·
1 Parent(s): f702eb1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -78
app.py CHANGED
@@ -5,91 +5,134 @@ import requests
5
  from bs4 import BeautifulSoup
6
  from urllib.parse import quote
7
  import urllib.request
 
 
8
 
9
- # Fonction pour télécharger des images depuis Bing
10
- def downloader_images_bing(search_query, limit, adult_filter_off, timeout=20):
11
- # Télécharger des images depuis Bing
12
- adult_filter = adult_filter_off == "True"
13
- downloader.download(
14
- search_query,
15
- limit=limit,
16
- adult_filter_off=adult_filter,
17
- force_replace=False,
18
- timeout=timeout
19
- )
20
-
21
- # Obtenir les chemins des images téléchargées
22
- output_dir = f'dataset/{search_query}'
23
- if os.path.exists(output_dir):
24
- image_paths = [
25
- os.path.join(output_dir, img)
26
- for img in os.listdir(output_dir)
27
- if os.path.isfile(os.path.join(output_dir, img))
28
- ]
29
- else:
30
- image_paths = []
31
-
32
- return image_paths
33
 
34
- # Fonction pour télécharger des images depuis Google
35
- def downloader_images_google(search_query, limit):
36
- query = quote(search_query)
37
- url = f"https://www.google.com/search?q={query}&tbm=isch"
 
 
 
 
 
 
 
 
 
 
 
38
 
39
- headers = {
40
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
41
- }
42
-
43
- response = requests.get(url, headers=headers)
44
- soup = BeautifulSoup(response.text, 'html.parser')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- # Extraire les URLs des images
47
- image_tags = soup.find_all('img')
48
- image_urls = [img['src'] for img in image_tags if 'src' in img.attrs]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
- # Limiter le nombre d'images à télécharger
51
- image_urls = image_urls[:limit]
 
 
 
52
 
53
- # Créer un dossier pour stocker les images
54
- output_dir = f'dataset/google_{search_query}'
55
- os.makedirs(output_dir, exist_ok=True)
56
 
57
- # Télécharger les images
58
- image_paths = []
59
- for i, img_url in enumerate(image_urls):
60
- try:
61
- img_path = os.path.join(output_dir, f'image_{i+1}.jpg')
62
- urllib.request.urlretrieve(img_url, img_path)
63
- image_paths.append(img_path)
64
- except Exception as e:
65
- print(f"Erreur lors du téléchargement de l'image {img_url}: {e}")
66
 
67
- return image_paths
68
 
69
- # Fonction principale qui gère les deux sources d'images
70
- def download_images(source, search_query, limit, adult_filter_off="True"):
71
- if source == "Bing":
72
- return downloader_images_bing(search_query, limit, adult_filter_off)
73
- elif source == "Google":
74
- return downloader_images_google(search_query, limit)
75
-
76
- # Créer une interface Gradio
77
- interface = gr.Interface(
78
- fn=download_images,
79
- inputs=[
80
- gr.Radio(["Bing", "Google"], label="Source d'images"),
81
- gr.Textbox(label="Mot de recherche"),
82
- gr.Slider(1, 100, step=5, label="Nombre d'images"),
83
- gr.Radio(["True", "False"], label="Mode protégé (Bing uniquement)", value="True")
84
- ],
85
- outputs=gr.Gallery(label="Images téléchargées"),
86
- title="Télécharger des images avec Bing ou Google",
87
- description="Sélectionnez la source, entrez votre mot de recherche, choisissez le nombre d'images à télécharger et activez/désactivez le mode protégé pour Bing.",
88
- examples=[
89
- ["Bing", "chat", 10, "True"],
90
- ["Google", "chien", 10, "True"]
91
- ]
92
- )
93
 
94
- # Démarrer l'interface
95
- interface.launch(share=True)
 
5
  from bs4 import BeautifulSoup
6
  from urllib.parse import quote
7
  import urllib.request
8
+ import re
9
+ import time
10
 
11
+ # Configuration des paramètres
12
+ HEADERS = {
13
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
14
+ 'Accept-Language': 'en-US,en;q=0.5'
15
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ # Fonction pour Bing
18
+ def download_bing_images(search_query, limit, adult_filter_off):
19
+ try:
20
+ adult_filter = not adult_filter_off
21
+ downloader.download(
22
+ search_query,
23
+ limit=limit,
24
+ adult_filter_off=adult_filter,
25
+ force_replace=False,
26
+ timeout=60,
27
+ filter_type='photo'
28
+ )
29
+
30
+ output_dir = os.path.join('dataset', search_query)
31
+ return get_image_paths(output_dir)
32
 
33
+ except Exception as e:
34
+ print(f"Erreur Bing : {str(e)}")
35
+ return []
36
+
37
+ # Fonction pour Google (méthode sécurisée)
38
+ def download_google_images(search_query, limit):
39
+ try:
40
+ output_dir = os.path.join('dataset', f'google_{search_query}')
41
+ os.makedirs(output_dir, exist_ok=True)
42
+
43
+ url = f"https://www.google.com/search?q={quote(search_query)}&tbm=isch"
44
+ response = requests.get(url, headers=HEADERS)
45
+ response.raise_for_status()
46
+
47
+ soup = BeautifulSoup(response.text, 'html.parser')
48
+ scripts = soup.find_all('script')
49
+
50
+ image_urls = []
51
+ pattern = r'\[\"(https?://[^\]\"]*\.(?:jpg|jpeg|png))\"'
52
+
53
+ for script in scripts:
54
+ if 'AF_initDataCallback' in script.text:
55
+ matches = re.findall(pattern, script.text)
56
+ image_urls.extend(matches)
57
+
58
+ image_urls = list(set(image_urls))[:limit]
59
+ return download_and_save(image_urls, output_dir)
60
 
61
+ except Exception as e:
62
+ print(f"Erreur Google : {str(e)}")
63
+ return []
64
+
65
+ # Téléchargement et sauvegarde des images
66
+ def download_and_save(urls, output_dir):
67
+ saved_paths = []
68
+ for idx, url in enumerate(urls):
69
+ try:
70
+ filename = f"image_{idx+1}_{int(time.time())}.jpg"
71
+ full_path = os.path.join(output_dir, filename)
72
+
73
+ req = urllib.request.Request(url, headers=HEADERS)
74
+ with urllib.request.urlopen(req, timeout=10) as response:
75
+ with open(full_path, 'wb') as f:
76
+ f.write(response.read())
77
+ saved_paths.append(full_path)
78
+ except Exception as e:
79
+ print(f"Erreur téléchargement {url} : {str(e)}")
80
+ return saved_paths
81
+
82
+ def get_image_paths(directory):
83
+ if os.path.exists(directory):
84
+ return [os.path.join(directory, f) for f in os.listdir(directory)
85
+ if f.lower().endswith(('png', 'jpg', 'jpeg'))]
86
+ return []
87
+
88
+ # Fonction principale
89
+ def download_handler(source, query, limit, safe_mode):
90
+ limit = max(1, min(limit, 100)) # Limite entre 1-100
91
+ try:
92
+ if source == "Bing":
93
+ return download_bing_images(query, limit, safe_mode == "Off")
94
+ elif source == "Google":
95
+ return download_google_images(query, limit)
96
+ except Exception as e:
97
+ print(f"Erreur globale : {str(e)}")
98
+ return []
99
+
100
+ # Interface Gradio améliorée
101
+ with gr.Blocks(theme=gr.themes.Soft(), title="Image Downloader") as app:
102
+ gr.Markdown("# 📸 Téléchargeur d'Images Multi-Sources")
103
+ gr.Markdown("Téléchargez des images depuis Bing ou Google (max 100)")
104
 
105
+ with gr.Row():
106
+ source = gr.Radio(["Bing", "Google"], label="Source", value="Bing")
107
+ query = gr.Textbox(label="Recherche", placeholder="Entrez votre recherche...")
108
+ limit = gr.Slider(1, 100, value=20, step=1, label="Nombre d'images")
109
+ safe_mode = gr.Radio(["On", "Off"], label="Filtre de sécurité (Bing)", value="On")
110
 
111
+ submit_btn = gr.Button("🚀 Lancer le téléchargement", variant="primary")
 
 
112
 
113
+ gallery = gr.Gallery(
114
+ label="Résultats",
115
+ columns=5,
116
+ object_fit="contain",
117
+ height="auto"
118
+ )
 
 
 
119
 
120
+ status = gr.Textbox(label="Statut", interactive=False)
121
 
122
+ submit_btn.click(
123
+ fn=download_handler,
124
+ inputs=[source, query, limit, safe_mode],
125
+ outputs=[gallery, status],
126
+ api_name="download"
127
+ )
128
+
129
+ gr.Examples(
130
+ examples=[
131
+ ["Bing", "chatons mignons", 10, "On"],
132
+ ["Google", "paysages montagneux", 15, "On"]
133
+ ],
134
+ inputs=[source, query, limit, safe_mode]
135
+ )
 
 
 
 
 
 
 
 
 
 
136
 
137
+ if __name__ == "__main__":
138
+ app.launch(server_port=7860, show_error=True)