Spaces:
Running
Running
import requests | |
from bs4 import BeautifulSoup | |
import os | |
import urllib.request | |
import json | |
import time | |
import random | |
import gradio as gr | |
import shutil | |
# Các hàm phụ trợ giữ nguyên | |
def get_google_suggestions(query): | |
url = f"http://suggestqueries.google.com/complete/search?client=firefox&q={query}" | |
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"} | |
try: | |
response = requests.get(url, headers=headers, timeout=5) | |
return json.loads(response.text)[1] | |
except: | |
return [] | |
def download_images(search_term, num_images_per_term, save_folder, status_callback): | |
if not os.path.exists(save_folder): | |
os.makedirs(save_folder) | |
search_url = f"https://www.google.com/search?q={search_term}+free&tbm=isch" | |
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"} | |
try: | |
response = requests.get(search_url, headers=headers, timeout=10) | |
response.raise_for_status() | |
except: | |
status_callback(f"Lỗi truy cập {search_term}") | |
return 0 | |
soup = BeautifulSoup(response.text, 'html.parser') | |
img_tags = soup.find_all('img') | |
count = 0 | |
for i, img in enumerate(img_tags): | |
if count >= num_images_per_term: | |
break | |
try: | |
img_url = img.get('src') | |
if img_url and img_url.startswith('http'): | |
file_name = f"{save_folder}/{search_term.replace(' ', '_')}_{count}.jpg" | |
urllib.request.urlretrieve(img_url, file_name) | |
status_callback(f"Đã tải: {file_name}") | |
count += 1 | |
time.sleep(random.uniform(1, 3)) | |
except Exception as e: | |
status_callback(f"Lỗi ảnh {i} ({search_term}): {str(e)}") | |
continue | |
return count | |
def zip_folder(folder_path): | |
output_zip = os.path.join(os.path.dirname(folder_path), "downloaded_images") | |
try: | |
shutil.make_archive(output_zip, 'zip', folder_path) | |
return output_zip + ".zip", "Đã nén thành công" | |
except Exception as e: | |
return None, f"Lỗi khi nén: {str(e)}" | |
def start_download(initial_query, target_images, max_per_term, save_folder, zip_files): | |
status_log = [] | |
def status_callback(message): | |
status_log.append(message) | |
total_downloaded = 0 | |
current_query = initial_query | |
used_queries = set() | |
while total_downloaded < target_images: | |
suggestions = get_google_suggestions(current_query) | |
if not suggestions: | |
status_callback("Hết gợi ý, dừng lại.") | |
break | |
available_suggestions = [s for s in suggestions if s not in used_queries] | |
if not available_suggestions: | |
status_callback("Hết gợi ý mới, dừng lại.") | |
break | |
current_query = random.choice(available_suggestions) | |
used_queries.add(current_query) | |
remaining = target_images - total_downloaded | |
images_to_download = min(max_per_term, remaining) | |
status_callback(f"Tìm kiếm: {current_query}") | |
downloaded = download_images(current_query, images_to_download, save_folder, status_callback) | |
total_downloaded += downloaded | |
status_callback(f"Tổng: {total_downloaded}/{target_images}") | |
time.sleep(random.uniform(2, 5)) | |
status_callback(f"Hoàn tất! Đã tải {total_downloaded} ảnh.") | |
zip_file_path = None | |
if zip_files: | |
status_callback("Đang nén thư mục...") | |
zip_file_path, zip_message = zip_folder(save_folder) | |
status_callback(zip_message) | |
return "\n".join(status_log), zip_file_path | |
# Giao diện Gradio | |
def create_interface(): | |
css = """ | |
body { | |
background: #2b2b2b; | |
font-family: 'Segoe UI', sans-serif; | |
} | |
.container { | |
max-width: 1000px; | |
margin: 20px auto; | |
background: #36393f; | |
padding: 25px; | |
border-radius: 10px; | |
box-shadow: 0 2px 10px rgba(0,0,0,0.3); | |
} | |
h1 { | |
color: #7289da; | |
text-align: center; | |
margin-bottom: 15px; | |
} | |
.description { | |
color: #b9bbbe; | |
text-align: center; | |
font-size: 14px; | |
margin-bottom: 20px; | |
} | |
.input-group { | |
background: #40444b; | |
padding: 15px; | |
border-radius: 8px; | |
margin-bottom: 15px; | |
} | |
.status-box { | |
background: #2f3136; | |
border: 1px solid #202225; | |
border-radius: 8px; | |
padding: 15px; | |
height: 350px; | |
color: #dcddde; | |
font-family: 'Courier New', monospace; | |
font-size: 14px; | |
} | |
.button-primary { | |
background: #7289da !important; | |
color: white !important; | |
border: none !important; | |
border-radius: 5px !important; | |
padding: 10px 20px !important; | |
font-weight: bold !important; | |
transition: all 0.3s !important; | |
} | |
.button-primary:hover { | |
background: #677bc4 !important; | |
} | |
.button-secondary { | |
background: #4f545c !important; | |
color: #dcddde !important; | |
border-radius: 5px !important; | |
padding: 8px 15px !important; | |
} | |
.button-secondary:hover { | |
background: #5c6169 !important; | |
} | |
.footer { | |
text-align: center; | |
color: #72767d; | |
font-size: 12px; | |
margin-top: 20px; | |
} | |
""" | |
with gr.Blocks(css=css, title="Image Downloader Pro") as demo: | |
gr.Markdown("<h1>📷 Image Downloader Pro</h1>") | |
gr.Markdown("<p class='description'>Tải ảnh từ Google Images với giao diện tối giản và thân thiện</p>") | |
with gr.Row(elem_classes="container"): | |
with gr.Column(scale=1, min_width=300): | |
with gr.Group(elem_classes="input-group"): | |
gr.Markdown("#### Cài đặt tải") | |
initial_query = gr.Textbox( | |
label="Từ khóa ban đầu", | |
value="free images", | |
placeholder="Nhập từ khóa tìm kiếm..." | |
) | |
target_images = gr.Slider( | |
label="Số lượng ảnh", | |
minimum=1, | |
maximum=10000, | |
value=100, | |
step=1 | |
) | |
max_per_term = gr.Slider( | |
label="Ảnh tối đa mỗi từ khóa", | |
minimum=1, | |
maximum=50, | |
value=20, | |
step=1 | |
) | |
save_folder = gr.Textbox( | |
label="Thư mục lưu", | |
value=os.path.join(os.getcwd(), "free_images"), | |
placeholder="Đường dẫn thư mục..." | |
) | |
zip_files = gr.Checkbox(label="Nén thành ZIP sau khi tải", value=True) | |
submit_btn = gr.Button("Bắt đầu tải", elem_classes="button-primary") | |
with gr.Column(scale=2): | |
with gr.Group(elem_classes="input-group"): | |
gr.Markdown("#### Trạng thái") | |
output_status = gr.Textbox( | |
label="Nhật ký tải", | |
lines=15, | |
interactive=False, | |
elem_classes="status-box" | |
) | |
output_file = gr.File(label="File ZIP (nếu có)", visible=False) | |
gr.Markdown("<p class='footer'>Powered by Gradio & xAI</p>") | |
# Xử lý tải | |
def run_download(query, target, max_term, folder, zip_opt): | |
if not folder: | |
return "Vui lòng nhập thư mục lưu!", gr.File.update(visible=False) | |
status, zip_path = start_download(query, int(target), int(max_term), folder, zip_opt) | |
if zip_path: | |
return status, gr.File.update(value=zip_path, visible=True) | |
return status, gr.File.update(visible=False) | |
submit_btn.click( | |
fn=run_download, | |
inputs=[initial_query, target_images, max_per_term, save_folder, zip_files], | |
outputs=[output_status, output_file] | |
) | |
return demo | |
if __name__ == "__main__": | |
interface = create_interface() | |
interface.launch() |