File size: 7,242 Bytes
b0327be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import requests
from bs4 import BeautifulSoup
import os
import urllib.request
import json
import time
import random
import tkinter as tk
from tkinter import ttk, messagebox, filedialog
import shutil
import threading

# Hàm lấy gợi ý tìm kiếm từ Google
def get_google_suggestions(query):
    url = f"http://suggestqueries.google.com/complete/search?client=firefox&q={query}"
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
    try:
        response = requests.get(url, headers=headers, timeout=5)
        return json.loads(response.text)[1]
    except:
        return []

# Hàm tải ảnh
def download_images(search_term, num_images_per_term, save_folder, status_callback):
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)
    
    search_url = f"https://www.google.com/search?q={search_term}+free&tbm=isch"
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
    
    try:
        response = requests.get(search_url, headers=headers, timeout=10)
        response.raise_for_status()
    except:
        status_callback(f"Lỗi truy cập {search_term}")
        return 0

    soup = BeautifulSoup(response.text, 'html.parser')
    img_tags = soup.find_all('img')
    
    count = 0
    for i, img in enumerate(img_tags):
        if count >= num_images_per_term:
            break
        try:
            img_url = img.get('src')
            if img_url and img_url.startswith('http'):
                file_name = f"{save_folder}/{search_term.replace(' ', '_')}_{count}.jpg"
                urllib.request.urlretrieve(img_url, file_name)
                status_callback(f"Đã tải: {file_name}")
                count += 1
                time.sleep(random.uniform(1, 3))
        except Exception as e:
            status_callback(f"Lỗi ảnh {i} ({search_term}): {str(e)}")
            continue
    
    return count

# Hàm nén thư mục thành file zip
def zip_folder(folder_path, output_zip):
    try:
        shutil.make_archive(output_zip, 'zip', folder_path)
        return True
    except Exception as e:
        return str(e)

# Hàm chính tải ảnh
def start_download(initial_query, target_images, save_folder, max_per_term, status_text):
    def download_thread():
        total_downloaded = 0
        current_query = initial_query
        used_queries = set()
        
        while total_downloaded < target_images:
            suggestions = get_google_suggestions(current_query)
            if not suggestions:
                status_text(f"Hết gợi ý, dừng lại.")
                break
            
            available_suggestions = [s for s in suggestions if s not in used_queries]
            if not available_suggestions:
                status_text("Hết gợi ý mới, dừng lại.")
                break
            
            current_query = random.choice(available_suggestions)
            used_queries.add(current_query)
            
            remaining = target_images - total_downloaded
            images_to_download = min(max_per_term, remaining)
            
            status_text(f"Tìm kiếm: {current_query}")
            downloaded = download_images(current_query, images_to_download, save_folder, status_text)
            total_downloaded += downloaded
            
            status_text(f"Tổng: {total_downloaded}/{target_images}")
            time.sleep(random.uniform(2, 5))
        
        status_text(f"Hoàn tất! Đã tải {total_downloaded} ảnh.")
        
        # Nén thư mục
        status_text("Đang nén thư mục...")
        zip_result = zip_folder(save_folder, os.path.join(os.path.dirname(save_folder), "downloaded_images"))
        if zip_result is True:
            status_text("Đã nén thành công: downloaded_images.zip")
        else:
            status_text(f"Lỗi khi nén: {zip_result}")
    
    thread = threading.Thread(target=download_thread)
    thread.start()

# Giao diện GUI
class ImageDownloaderApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Image Downloader")
        self.root.geometry("600x500")
        
        # Nhãn và ô nhập từ khóa
        tk.Label(root, text="Từ khóa ban đầu:").pack(pady=5)
        self.query_entry = tk.Entry(root, width=40)
        self.query_entry.insert(0, "free images")
        self.query_entry.pack()
        
        # Số lượng ảnh mục tiêu
        tk.Label(root, text="Số lượng ảnh cần tải:").pack(pady=5)
        self.target_entry = tk.Entry(root, width=10)
        self.target_entry.insert(0, "10000")
        self.target_entry.pack()
        
        # Số ảnh tối đa mỗi từ khóa
        tk.Label(root, text="Số ảnh tối đa mỗi từ khóa:").pack(pady=5)
        self.max_per_term_entry = tk.Entry(root, width=10)
        self.max_per_term_entry.insert(0, "20")
        self.max_per_term_entry.pack()
        
        # Thư mục lưu
        tk.Label(root, text="Thư mục lưu:").pack(pady=5)
        self.folder_var = tk.StringVar(value="free_images")
        folder_frame = tk.Frame(root)
        folder_frame.pack()
        tk.Entry(folder_frame, textvariable=self.folder_var, width=30).pack(side=tk.LEFT)
        tk.Button(folder_frame, text="Chọn", command=self.choose_folder).pack(side=tk.LEFT, padx=5)
        
        # Nút bắt đầu
        self.start_button = tk.Button(root, text="Bắt đầu tải", command=self.start_download)
        self.start_button.pack(pady=10)
        
        # Khu vực hiển thị trạng thái
        tk.Label(root, text="Trạng thái:").pack(pady=5)
        self.status_text = tk.Text(root, height=15, width=70)
        self.status_text.pack()
        
    def choose_folder(self):
        folder = filedialog.askdirectory()
        if folder:
            self.folder_var.set(folder)
    
    def update_status(self, message):
        self.status_text.insert(tk.END, f"{message}\n")
        self.status_text.see(tk.END)
        self.root.update_idletasks()
    
    def start_download(self):
        try:
            initial_query = self.query_entry.get()
            target_images = int(self.target_entry.get())
            max_per_term = int(self.max_per_term_entry.get())
            save_folder = self.folder_var.get()
            
            if not initial_query or target_images <= 0 or max_per_term <= 0 or not save_folder:
                messagebox.showerror("Lỗi", "Vui lòng nhập đầy đủ và hợp lệ các thông tin!")
                return
            
            self.start_button.config(state="disabled")
            self.status_text.delete(1.0, tk.END)
            start_download(initial_query, target_images, save_folder, max_per_term, self.update_status)
            
        except ValueError:
            messagebox.showerror("Lỗi", "Số lượng phải là số nguyên!")
        finally:
            self.start_button.config(state="normal")

# Chạy ứng dụng
if __name__ == "__main__":
    root = tk.Tk()
    app = ImageDownloaderApp(root)
    root.mainloop()