rapacious commited on
Commit
b0327be
·
verified ·
1 Parent(s): 7157fb7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +187 -0
app.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ import os
4
+ import urllib.request
5
+ import json
6
+ import time
7
+ import random
8
+ import tkinter as tk
9
+ from tkinter import ttk, messagebox, filedialog
10
+ import shutil
11
+ import threading
12
+
13
+ # Hàm lấy gợi ý tìm kiếm từ Google
14
+ def get_google_suggestions(query):
15
+ url = f"http://suggestqueries.google.com/complete/search?client=firefox&q={query}"
16
+ headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
17
+ try:
18
+ response = requests.get(url, headers=headers, timeout=5)
19
+ return json.loads(response.text)[1]
20
+ except:
21
+ return []
22
+
23
+ # Hàm tải ảnh
24
+ def download_images(search_term, num_images_per_term, save_folder, status_callback):
25
+ if not os.path.exists(save_folder):
26
+ os.makedirs(save_folder)
27
+
28
+ search_url = f"https://www.google.com/search?q={search_term}+free&tbm=isch"
29
+ headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
30
+
31
+ try:
32
+ response = requests.get(search_url, headers=headers, timeout=10)
33
+ response.raise_for_status()
34
+ except:
35
+ status_callback(f"Lỗi truy cập {search_term}")
36
+ return 0
37
+
38
+ soup = BeautifulSoup(response.text, 'html.parser')
39
+ img_tags = soup.find_all('img')
40
+
41
+ count = 0
42
+ for i, img in enumerate(img_tags):
43
+ if count >= num_images_per_term:
44
+ break
45
+ try:
46
+ img_url = img.get('src')
47
+ if img_url and img_url.startswith('http'):
48
+ file_name = f"{save_folder}/{search_term.replace(' ', '_')}_{count}.jpg"
49
+ urllib.request.urlretrieve(img_url, file_name)
50
+ status_callback(f"Đã tải: {file_name}")
51
+ count += 1
52
+ time.sleep(random.uniform(1, 3))
53
+ except Exception as e:
54
+ status_callback(f"Lỗi ảnh {i} ({search_term}): {str(e)}")
55
+ continue
56
+
57
+ return count
58
+
59
+ # Hàm nén thư mục thành file zip
60
+ def zip_folder(folder_path, output_zip):
61
+ try:
62
+ shutil.make_archive(output_zip, 'zip', folder_path)
63
+ return True
64
+ except Exception as e:
65
+ return str(e)
66
+
67
+ # Hàm chính tải ảnh
68
+ def start_download(initial_query, target_images, save_folder, max_per_term, status_text):
69
+ def download_thread():
70
+ total_downloaded = 0
71
+ current_query = initial_query
72
+ used_queries = set()
73
+
74
+ while total_downloaded < target_images:
75
+ suggestions = get_google_suggestions(current_query)
76
+ if not suggestions:
77
+ status_text(f"Hết gợi ý, dừng lại.")
78
+ break
79
+
80
+ available_suggestions = [s for s in suggestions if s not in used_queries]
81
+ if not available_suggestions:
82
+ status_text("Hết gợi ý mới, dừng lại.")
83
+ break
84
+
85
+ current_query = random.choice(available_suggestions)
86
+ used_queries.add(current_query)
87
+
88
+ remaining = target_images - total_downloaded
89
+ images_to_download = min(max_per_term, remaining)
90
+
91
+ status_text(f"Tìm kiếm: {current_query}")
92
+ downloaded = download_images(current_query, images_to_download, save_folder, status_text)
93
+ total_downloaded += downloaded
94
+
95
+ status_text(f"Tổng: {total_downloaded}/{target_images}")
96
+ time.sleep(random.uniform(2, 5))
97
+
98
+ status_text(f"Hoàn tất! Đã tải {total_downloaded} ảnh.")
99
+
100
+ # Nén thư mục
101
+ status_text("Đang nén thư mục...")
102
+ zip_result = zip_folder(save_folder, os.path.join(os.path.dirname(save_folder), "downloaded_images"))
103
+ if zip_result is True:
104
+ status_text("Đã nén thành công: downloaded_images.zip")
105
+ else:
106
+ status_text(f"Lỗi khi nén: {zip_result}")
107
+
108
+ thread = threading.Thread(target=download_thread)
109
+ thread.start()
110
+
111
+ # Giao diện GUI
112
+ class ImageDownloaderApp:
113
+ def __init__(self, root):
114
+ self.root = root
115
+ self.root.title("Image Downloader")
116
+ self.root.geometry("600x500")
117
+
118
+ # Nhãn và ô nhập từ khóa
119
+ tk.Label(root, text="Từ khóa ban đầu:").pack(pady=5)
120
+ self.query_entry = tk.Entry(root, width=40)
121
+ self.query_entry.insert(0, "free images")
122
+ self.query_entry.pack()
123
+
124
+ # Số lượng ảnh mục tiêu
125
+ tk.Label(root, text="Số lượng ảnh cần tải:").pack(pady=5)
126
+ self.target_entry = tk.Entry(root, width=10)
127
+ self.target_entry.insert(0, "10000")
128
+ self.target_entry.pack()
129
+
130
+ # Số ảnh tối đa mỗi từ khóa
131
+ tk.Label(root, text="Số ảnh tối đa mỗi từ khóa:").pack(pady=5)
132
+ self.max_per_term_entry = tk.Entry(root, width=10)
133
+ self.max_per_term_entry.insert(0, "20")
134
+ self.max_per_term_entry.pack()
135
+
136
+ # Thư mục lưu
137
+ tk.Label(root, text="Thư mục lưu:").pack(pady=5)
138
+ self.folder_var = tk.StringVar(value="free_images")
139
+ folder_frame = tk.Frame(root)
140
+ folder_frame.pack()
141
+ tk.Entry(folder_frame, textvariable=self.folder_var, width=30).pack(side=tk.LEFT)
142
+ tk.Button(folder_frame, text="Chọn", command=self.choose_folder).pack(side=tk.LEFT, padx=5)
143
+
144
+ # Nút bắt đầu
145
+ self.start_button = tk.Button(root, text="Bắt đầu tải", command=self.start_download)
146
+ self.start_button.pack(pady=10)
147
+
148
+ # Khu vực hiển thị trạng thái
149
+ tk.Label(root, text="Trạng thái:").pack(pady=5)
150
+ self.status_text = tk.Text(root, height=15, width=70)
151
+ self.status_text.pack()
152
+
153
+ def choose_folder(self):
154
+ folder = filedialog.askdirectory()
155
+ if folder:
156
+ self.folder_var.set(folder)
157
+
158
+ def update_status(self, message):
159
+ self.status_text.insert(tk.END, f"{message}\n")
160
+ self.status_text.see(tk.END)
161
+ self.root.update_idletasks()
162
+
163
+ def start_download(self):
164
+ try:
165
+ initial_query = self.query_entry.get()
166
+ target_images = int(self.target_entry.get())
167
+ max_per_term = int(self.max_per_term_entry.get())
168
+ save_folder = self.folder_var.get()
169
+
170
+ if not initial_query or target_images <= 0 or max_per_term <= 0 or not save_folder:
171
+ messagebox.showerror("Lỗi", "Vui lòng nhập đầy đủ và hợp lệ các thông tin!")
172
+ return
173
+
174
+ self.start_button.config(state="disabled")
175
+ self.status_text.delete(1.0, tk.END)
176
+ start_download(initial_query, target_images, save_folder, max_per_term, self.update_status)
177
+
178
+ except ValueError:
179
+ messagebox.showerror("Lỗi", "Số lượng phải là số nguyên!")
180
+ finally:
181
+ self.start_button.config(state="normal")
182
+
183
+ # Chạy ứng dụng
184
+ if __name__ == "__main__":
185
+ root = tk.Tk()
186
+ app = ImageDownloaderApp(root)
187
+ root.mainloop()