import requests from bs4 import BeautifulSoup import os import urllib.request import json import time import random import tkinter as tk from tkinter import ttk, messagebox, filedialog import shutil import threading # Hàm lấy gợi ý tìm kiếm từ Google def get_google_suggestions(query): url = f"http://suggestqueries.google.com/complete/search?client=firefox&q={query}" headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"} try: response = requests.get(url, headers=headers, timeout=5) return json.loads(response.text)[1] except: return [] # Hàm tải ảnh def download_images(search_term, num_images_per_term, save_folder, status_callback): if not os.path.exists(save_folder): os.makedirs(save_folder) search_url = f"https://www.google.com/search?q={search_term}+free&tbm=isch" headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"} try: response = requests.get(search_url, headers=headers, timeout=10) response.raise_for_status() except: status_callback(f"Lỗi truy cập {search_term}") return 0 soup = BeautifulSoup(response.text, 'html.parser') img_tags = soup.find_all('img') count = 0 for i, img in enumerate(img_tags): if count >= num_images_per_term: break try: img_url = img.get('src') if img_url and img_url.startswith('http'): file_name = f"{save_folder}/{search_term.replace(' ', '_')}_{count}.jpg" urllib.request.urlretrieve(img_url, file_name) status_callback(f"Đã tải: {file_name}") count += 1 time.sleep(random.uniform(1, 3)) except Exception as e: status_callback(f"Lỗi ảnh {i} ({search_term}): {str(e)}") continue return count # Hàm nén thư mục thành file zip def zip_folder(folder_path, output_zip): try: shutil.make_archive(output_zip, 'zip', folder_path) return True except Exception as e: return str(e) # Hàm chính tải ảnh def start_download(initial_query, target_images, save_folder, max_per_term, status_text): def download_thread(): total_downloaded = 0 current_query = initial_query used_queries = set() while total_downloaded < target_images: suggestions = get_google_suggestions(current_query) if not suggestions: status_text(f"Hết gợi ý, dừng lại.") break available_suggestions = [s for s in suggestions if s not in used_queries] if not available_suggestions: status_text("Hết gợi ý mới, dừng lại.") break current_query = random.choice(available_suggestions) used_queries.add(current_query) remaining = target_images - total_downloaded images_to_download = min(max_per_term, remaining) status_text(f"Tìm kiếm: {current_query}") downloaded = download_images(current_query, images_to_download, save_folder, status_text) total_downloaded += downloaded status_text(f"Tổng: {total_downloaded}/{target_images}") time.sleep(random.uniform(2, 5)) status_text(f"Hoàn tất! Đã tải {total_downloaded} ảnh.") # Nén thư mục status_text("Đang nén thư mục...") zip_result = zip_folder(save_folder, os.path.join(os.path.dirname(save_folder), "downloaded_images")) if zip_result is True: status_text("Đã nén thành công: downloaded_images.zip") else: status_text(f"Lỗi khi nén: {zip_result}") thread = threading.Thread(target=download_thread) thread.start() # Giao diện GUI class ImageDownloaderApp: def __init__(self, root): self.root = root self.root.title("Image Downloader") self.root.geometry("600x500") # Nhãn và ô nhập từ khóa tk.Label(root, text="Từ khóa ban đầu:").pack(pady=5) self.query_entry = tk.Entry(root, width=40) self.query_entry.insert(0, "free images") self.query_entry.pack() # Số lượng ảnh mục tiêu tk.Label(root, text="Số lượng ảnh cần tải:").pack(pady=5) self.target_entry = tk.Entry(root, width=10) self.target_entry.insert(0, "10000") self.target_entry.pack() # Số ảnh tối đa mỗi từ khóa tk.Label(root, text="Số ảnh tối đa mỗi từ khóa:").pack(pady=5) self.max_per_term_entry = tk.Entry(root, width=10) self.max_per_term_entry.insert(0, "20") self.max_per_term_entry.pack() # Thư mục lưu tk.Label(root, text="Thư mục lưu:").pack(pady=5) self.folder_var = tk.StringVar(value="free_images") folder_frame = tk.Frame(root) folder_frame.pack() tk.Entry(folder_frame, textvariable=self.folder_var, width=30).pack(side=tk.LEFT) tk.Button(folder_frame, text="Chọn", command=self.choose_folder).pack(side=tk.LEFT, padx=5) # Nút bắt đầu self.start_button = tk.Button(root, text="Bắt đầu tải", command=self.start_download) self.start_button.pack(pady=10) # Khu vực hiển thị trạng thái tk.Label(root, text="Trạng thái:").pack(pady=5) self.status_text = tk.Text(root, height=15, width=70) self.status_text.pack() def choose_folder(self): folder = filedialog.askdirectory() if folder: self.folder_var.set(folder) def update_status(self, message): self.status_text.insert(tk.END, f"{message}\n") self.status_text.see(tk.END) self.root.update_idletasks() def start_download(self): try: initial_query = self.query_entry.get() target_images = int(self.target_entry.get()) max_per_term = int(self.max_per_term_entry.get()) save_folder = self.folder_var.get() if not initial_query or target_images <= 0 or max_per_term <= 0 or not save_folder: messagebox.showerror("Lỗi", "Vui lòng nhập đầy đủ và hợp lệ các thông tin!") return self.start_button.config(state="disabled") self.status_text.delete(1.0, tk.END) start_download(initial_query, target_images, save_folder, max_per_term, self.update_status) except ValueError: messagebox.showerror("Lỗi", "Số lượng phải là số nguyên!") finally: self.start_button.config(state="normal") # Chạy ứng dụng if __name__ == "__main__": root = tk.Tk() app = ImageDownloaderApp(root) root.mainloop()