Spaces:
Sleeping
Sleeping
from pydrive2.auth import GoogleAuth | |
from pydrive2.drive import GoogleDrive | |
import os | |
import gradio as gr | |
from datasets import load_dataset, Dataset | |
import pandas as pd | |
from PIL import Image | |
import shutil | |
from tqdm import tqdm | |
import logging | |
# Set up logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
class DatasetManager: | |
def __init__(self, dataset_name=None, local_images_dir="downloaded_cards"): | |
self.dataset_name = dataset_name | |
self.local_images_dir = local_images_dir | |
self.drive = None | |
# Create local directory if it doesn't exist | |
os.makedirs(local_images_dir, exist_ok=True) | |
def authenticate_drive(self): | |
"""Authenticate with Google Drive""" | |
try: | |
gauth = GoogleAuth() | |
gauth.LocalWebserverAuth() | |
self.drive = GoogleDrive(gauth) | |
return True, "Successfully authenticated with Google Drive" | |
except Exception as e: | |
return False, f"Authentication failed: {str(e)}" | |
def download_and_rename_files(self, drive_folder_id, naming_convention): | |
"""Download files from Google Drive and rename them""" | |
if not self.drive: | |
return False, "Google Drive not authenticated", [] | |
try: | |
# List files in the folder | |
query = f"'{drive_folder_id}' in parents and trashed=false" | |
file_list = self.drive.ListFile({'q': query}).GetList() | |
if not file_list: | |
return False, "No files found in the specified folder", [] | |
renamed_files = [] | |
for i, file in enumerate(tqdm(file_list, desc="Downloading files")): | |
if file['mimeType'].startswith('image/'): | |
new_filename = f"{naming_convention}_{i+1}.jpg" | |
file_path = os.path.join(self.local_images_dir, new_filename) | |
# Download file | |
file.GetContentFile(file_path) | |
# Verify the image can be opened | |
try: | |
with Image.open(file_path) as img: | |
img.verify() | |
renamed_files.append({ | |
'file_path': file_path, | |
'original_name': file['title'], | |
'new_name': new_filename | |
}) | |
except Exception as e: | |
logger.error(f"Error processing image {file['title']}: {str(e)}") | |
if os.path.exists(file_path): | |
os.remove(file_path) | |
return True, f"Successfully processed {len(renamed_files)} images", renamed_files | |
except Exception as e: | |
return False, f"Error downloading files: {str(e)}", [] | |
def update_huggingface_dataset(self, dataset_name, renamed_files): | |
"""Update or create Hugging Face dataset with new images""" | |
try: | |
# Create a DataFrame with the file information | |
df = pd.DataFrame(renamed_files) | |
# Create a Hugging Face Dataset | |
dataset = Dataset.from_pandas(df) | |
# Push to Hugging Face Hub | |
dataset.push_to_hub(dataset_name) | |
return True, f"Successfully updated dataset '{dataset_name}' with {len(renamed_files)} images" | |
except Exception as e: | |
return False, f"Error updating Hugging Face dataset: {str(e)}" | |
def process_pipeline(folder_id, naming_convention, dataset_name): | |
"""Main pipeline to process images and update dataset""" | |
manager = DatasetManager() | |
# Step 1: Authenticate | |
auth_success, auth_message = manager.authenticate_drive() | |
if not auth_success: | |
return auth_message | |
# Step 2: Download and rename files | |
success, message, renamed_files = manager.download_and_rename_files(folder_id, naming_convention) | |
if not success: | |
return message | |
# Step 3: Update Hugging Face dataset | |
if dataset_name: | |
success, hf_message = manager.update_huggingface_dataset(dataset_name, renamed_files) | |
return f"{message}\n{hf_message}" | |
return message | |
# Gradio interface | |
demo = gr.Interface( | |
fn=process_pipeline, | |
inputs=[ | |
gr.Textbox( | |
label="Google Drive Folder ID", | |
placeholder="Enter the folder ID from your Google Drive URL" | |
), | |
gr.Textbox( | |
label="Naming Convention", | |
placeholder="e.g., card", | |
value="card" | |
), | |
gr.Textbox( | |
label="Hugging Face Dataset Name", | |
placeholder="username/dataset-name (optional)", | |
required=False | |
) | |
], | |
outputs=gr.Textbox(label="Status"), | |
title="Card Image Processor", | |
description="Download card images from Google Drive and add them to your Hugging Face dataset" | |
) | |
if __name__ == "__main__": | |
demo.launch() |