Spaces:
Running
Running
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
from bs4 import BeautifulSoup
|
3 |
+
import os
|
4 |
+
import urllib.request
|
5 |
+
import json
|
6 |
+
import time
|
7 |
+
import random
|
8 |
+
import tkinter as tk
|
9 |
+
from tkinter import ttk, messagebox, filedialog
|
10 |
+
import shutil
|
11 |
+
import threading
|
12 |
+
|
13 |
+
# Hàm lấy gợi ý tìm kiếm từ Google
|
14 |
+
def get_google_suggestions(query):
|
15 |
+
url = f"http://suggestqueries.google.com/complete/search?client=firefox&q={query}"
|
16 |
+
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
|
17 |
+
try:
|
18 |
+
response = requests.get(url, headers=headers, timeout=5)
|
19 |
+
return json.loads(response.text)[1]
|
20 |
+
except:
|
21 |
+
return []
|
22 |
+
|
23 |
+
# Hàm tải ảnh
|
24 |
+
def download_images(search_term, num_images_per_term, save_folder, status_callback):
|
25 |
+
if not os.path.exists(save_folder):
|
26 |
+
os.makedirs(save_folder)
|
27 |
+
|
28 |
+
search_url = f"https://www.google.com/search?q={search_term}+free&tbm=isch"
|
29 |
+
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
|
30 |
+
|
31 |
+
try:
|
32 |
+
response = requests.get(search_url, headers=headers, timeout=10)
|
33 |
+
response.raise_for_status()
|
34 |
+
except:
|
35 |
+
status_callback(f"Lỗi truy cập {search_term}")
|
36 |
+
return 0
|
37 |
+
|
38 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
39 |
+
img_tags = soup.find_all('img')
|
40 |
+
|
41 |
+
count = 0
|
42 |
+
for i, img in enumerate(img_tags):
|
43 |
+
if count >= num_images_per_term:
|
44 |
+
break
|
45 |
+
try:
|
46 |
+
img_url = img.get('src')
|
47 |
+
if img_url and img_url.startswith('http'):
|
48 |
+
file_name = f"{save_folder}/{search_term.replace(' ', '_')}_{count}.jpg"
|
49 |
+
urllib.request.urlretrieve(img_url, file_name)
|
50 |
+
status_callback(f"Đã tải: {file_name}")
|
51 |
+
count += 1
|
52 |
+
time.sleep(random.uniform(1, 3))
|
53 |
+
except Exception as e:
|
54 |
+
status_callback(f"Lỗi ảnh {i} ({search_term}): {str(e)}")
|
55 |
+
continue
|
56 |
+
|
57 |
+
return count
|
58 |
+
|
59 |
+
# Hàm nén thư mục thành file zip
|
60 |
+
def zip_folder(folder_path, output_zip):
|
61 |
+
try:
|
62 |
+
shutil.make_archive(output_zip, 'zip', folder_path)
|
63 |
+
return True
|
64 |
+
except Exception as e:
|
65 |
+
return str(e)
|
66 |
+
|
67 |
+
# Hàm chính tải ảnh
|
68 |
+
def start_download(initial_query, target_images, save_folder, max_per_term, status_text):
|
69 |
+
def download_thread():
|
70 |
+
total_downloaded = 0
|
71 |
+
current_query = initial_query
|
72 |
+
used_queries = set()
|
73 |
+
|
74 |
+
while total_downloaded < target_images:
|
75 |
+
suggestions = get_google_suggestions(current_query)
|
76 |
+
if not suggestions:
|
77 |
+
status_text(f"Hết gợi ý, dừng lại.")
|
78 |
+
break
|
79 |
+
|
80 |
+
available_suggestions = [s for s in suggestions if s not in used_queries]
|
81 |
+
if not available_suggestions:
|
82 |
+
status_text("Hết gợi ý mới, dừng lại.")
|
83 |
+
break
|
84 |
+
|
85 |
+
current_query = random.choice(available_suggestions)
|
86 |
+
used_queries.add(current_query)
|
87 |
+
|
88 |
+
remaining = target_images - total_downloaded
|
89 |
+
images_to_download = min(max_per_term, remaining)
|
90 |
+
|
91 |
+
status_text(f"Tìm kiếm: {current_query}")
|
92 |
+
downloaded = download_images(current_query, images_to_download, save_folder, status_text)
|
93 |
+
total_downloaded += downloaded
|
94 |
+
|
95 |
+
status_text(f"Tổng: {total_downloaded}/{target_images}")
|
96 |
+
time.sleep(random.uniform(2, 5))
|
97 |
+
|
98 |
+
status_text(f"Hoàn tất! Đã tải {total_downloaded} ảnh.")
|
99 |
+
|
100 |
+
# Nén thư mục
|
101 |
+
status_text("Đang nén thư mục...")
|
102 |
+
zip_result = zip_folder(save_folder, os.path.join(os.path.dirname(save_folder), "downloaded_images"))
|
103 |
+
if zip_result is True:
|
104 |
+
status_text("Đã nén thành công: downloaded_images.zip")
|
105 |
+
else:
|
106 |
+
status_text(f"Lỗi khi nén: {zip_result}")
|
107 |
+
|
108 |
+
thread = threading.Thread(target=download_thread)
|
109 |
+
thread.start()
|
110 |
+
|
111 |
+
# Giao diện GUI
|
112 |
+
class ImageDownloaderApp:
|
113 |
+
def __init__(self, root):
|
114 |
+
self.root = root
|
115 |
+
self.root.title("Image Downloader")
|
116 |
+
self.root.geometry("600x500")
|
117 |
+
|
118 |
+
# Nhãn và ô nhập từ khóa
|
119 |
+
tk.Label(root, text="Từ khóa ban đầu:").pack(pady=5)
|
120 |
+
self.query_entry = tk.Entry(root, width=40)
|
121 |
+
self.query_entry.insert(0, "free images")
|
122 |
+
self.query_entry.pack()
|
123 |
+
|
124 |
+
# Số lượng ảnh mục tiêu
|
125 |
+
tk.Label(root, text="Số lượng ảnh cần tải:").pack(pady=5)
|
126 |
+
self.target_entry = tk.Entry(root, width=10)
|
127 |
+
self.target_entry.insert(0, "10000")
|
128 |
+
self.target_entry.pack()
|
129 |
+
|
130 |
+
# Số ảnh tối đa mỗi từ khóa
|
131 |
+
tk.Label(root, text="Số ảnh tối đa mỗi từ khóa:").pack(pady=5)
|
132 |
+
self.max_per_term_entry = tk.Entry(root, width=10)
|
133 |
+
self.max_per_term_entry.insert(0, "20")
|
134 |
+
self.max_per_term_entry.pack()
|
135 |
+
|
136 |
+
# Thư mục lưu
|
137 |
+
tk.Label(root, text="Thư mục lưu:").pack(pady=5)
|
138 |
+
self.folder_var = tk.StringVar(value="free_images")
|
139 |
+
folder_frame = tk.Frame(root)
|
140 |
+
folder_frame.pack()
|
141 |
+
tk.Entry(folder_frame, textvariable=self.folder_var, width=30).pack(side=tk.LEFT)
|
142 |
+
tk.Button(folder_frame, text="Chọn", command=self.choose_folder).pack(side=tk.LEFT, padx=5)
|
143 |
+
|
144 |
+
# Nút bắt đầu
|
145 |
+
self.start_button = tk.Button(root, text="Bắt đầu tải", command=self.start_download)
|
146 |
+
self.start_button.pack(pady=10)
|
147 |
+
|
148 |
+
# Khu vực hiển thị trạng thái
|
149 |
+
tk.Label(root, text="Trạng thái:").pack(pady=5)
|
150 |
+
self.status_text = tk.Text(root, height=15, width=70)
|
151 |
+
self.status_text.pack()
|
152 |
+
|
153 |
+
def choose_folder(self):
|
154 |
+
folder = filedialog.askdirectory()
|
155 |
+
if folder:
|
156 |
+
self.folder_var.set(folder)
|
157 |
+
|
158 |
+
def update_status(self, message):
|
159 |
+
self.status_text.insert(tk.END, f"{message}\n")
|
160 |
+
self.status_text.see(tk.END)
|
161 |
+
self.root.update_idletasks()
|
162 |
+
|
163 |
+
def start_download(self):
|
164 |
+
try:
|
165 |
+
initial_query = self.query_entry.get()
|
166 |
+
target_images = int(self.target_entry.get())
|
167 |
+
max_per_term = int(self.max_per_term_entry.get())
|
168 |
+
save_folder = self.folder_var.get()
|
169 |
+
|
170 |
+
if not initial_query or target_images <= 0 or max_per_term <= 0 or not save_folder:
|
171 |
+
messagebox.showerror("Lỗi", "Vui lòng nhập đầy đủ và hợp lệ các thông tin!")
|
172 |
+
return
|
173 |
+
|
174 |
+
self.start_button.config(state="disabled")
|
175 |
+
self.status_text.delete(1.0, tk.END)
|
176 |
+
start_download(initial_query, target_images, save_folder, max_per_term, self.update_status)
|
177 |
+
|
178 |
+
except ValueError:
|
179 |
+
messagebox.showerror("Lỗi", "Số lượng phải là số nguyên!")
|
180 |
+
finally:
|
181 |
+
self.start_button.config(state="normal")
|
182 |
+
|
183 |
+
# Chạy ứng dụng
|
184 |
+
if __name__ == "__main__":
|
185 |
+
root = tk.Tk()
|
186 |
+
app = ImageDownloaderApp(root)
|
187 |
+
root.mainloop()
|