import requests from bs4 import BeautifulSoup import os import urllib.request import json import time import random import gradio as gr import shutil import threading # Hàm lấy gợi ý tìm kiếm từ Google def get_google_suggestions(query): url = f"http://suggestqueries.google.com/complete/search?client=firefox&q={query}" headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"} try: response = requests.get(url, headers=headers, timeout=5) return json.loads(response.text)[1] except: return [] # Hàm tải ảnh def download_images(search_term, num_images_per_term, save_folder, status_callback): if not os.path.exists(save_folder): os.makedirs(save_folder) search_url = f"https://www.google.com/search?q={search_term}+free&tbm=isch" headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"} try: response = requests.get(search_url, headers=headers, timeout=10) response.raise_for_status() except: status_callback(f"Lỗi truy cập {search_term}") return 0 soup = BeautifulSoup(response.text, 'html.parser') img_tags = soup.find_all('img') count = 0 for i, img in enumerate(img_tags): if count >= num_images_per_term: break try: img_url = img.get('src') if img_url and img_url.startswith('http'): file_name = f"{save_folder}/{search_term.replace(' ', '_')}_{count}.jpg" urllib.request.urlretrieve(img_url, file_name) status_callback(f"Đã tải: {file_name}") count += 1 time.sleep(random.uniform(1, 3)) except Exception as e: status_callback(f"Lỗi ảnh {i} ({search_term}): {str(e)}") continue return count # Hàm nén thư mục def zip_folder(folder_path): output_zip = os.path.join(os.path.dirname(folder_path), "downloaded_images") try: shutil.make_archive(output_zip, 'zip', folder_path) return f"Đã nén thành công: {output_zip}.zip" except Exception as e: return f"Lỗi khi nén: {str(e)}" # Hàm chính tải ảnh def start_download(initial_query, target_images, max_per_term, save_folder): status_log = [] def status_callback(message): status_log.append(message) total_downloaded = 0 current_query = initial_query used_queries = set() while total_downloaded < target_images: suggestions = get_google_suggestions(current_query) if not suggestions: status_callback("Hết gợi ý, dừng lại.") break available_suggestions = [s for s in suggestions if s not in used_queries] if not available_suggestions: status_callback("Hết gợi ý mới, dừng lại.") break current_query = random.choice(available_suggestions) used_queries.add(current_query) remaining = target_images - total_downloaded images_to_download = min(max_per_term, remaining) status_callback(f"Tìm kiếm: {current_query}") downloaded = download_images(current_query, images_to_download, save_folder, status_callback) total_downloaded += downloaded status_callback(f"Tổng: {total_downloaded}/{target_images}") time.sleep(random.uniform(2, 5)) status_callback(f"Hoàn tất! Đã tải {total_downloaded} ảnh.") status_callback("Đang nén thư mục...") zip_result = zip_folder(save_folder) status_callback(zip_result) return "\n".join(status_log) # Giao diện Gradio def create_interface(): with gr.Blocks(title="Image Downloader") as demo: gr.Markdown("# Image Downloader") gr.Markdown("Tải ảnh từ Google Images và nén thành file zip.") with gr.Row(): with gr.Column(): initial_query = gr.Textbox(label="Từ khóa ban đầu", value="free images") target_images = gr.Number(label="Số lượng ảnh cần tải", value=10000, precision=0) max_per_term = gr.Number(label="Số ảnh tối đa mỗi từ khóa", value=20, precision=0) save_folder = gr.Textbox(label="Thư mục lưu", value="free_images") with gr.Column(): output = gr.Textbox(label="Trạng thái", lines=20, interactive=False) submit_btn = gr.Button("Bắt đầu tải") def run_download(query, target, max_term, folder): return start_download(query, int(target), int(max_term), folder) submit_btn.click( fn=run_download, inputs=[initial_query, target_images, max_per_term, save_folder], outputs=output ) return demo # Chạy ứng dụng if __name__ == "__main__": interface = create_interface() interface.launch()