ai-card-hub / app.py
GotThatData's picture
Update app.py
85ad390 verified
raw
history blame
5.08 kB
from pydrive2.auth import GoogleAuth
from pydrive2.drive import GoogleDrive
import os
import gradio as gr
from datasets import load_dataset, Dataset
import pandas as pd
from PIL import Image
import shutil
from tqdm import tqdm
import logging
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class DatasetManager:
def __init__(self, dataset_name=None, local_images_dir="downloaded_cards"):
self.dataset_name = dataset_name
self.local_images_dir = local_images_dir
self.drive = None
# Create local directory if it doesn't exist
os.makedirs(local_images_dir, exist_ok=True)
def authenticate_drive(self):
"""Authenticate with Google Drive"""
try:
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
self.drive = GoogleDrive(gauth)
return True, "Successfully authenticated with Google Drive"
except Exception as e:
return False, f"Authentication failed: {str(e)}"
def download_and_rename_files(self, drive_folder_id, naming_convention):
"""Download files from Google Drive and rename them"""
if not self.drive:
return False, "Google Drive not authenticated", []
try:
# List files in the folder
query = f"'{drive_folder_id}' in parents and trashed=false"
file_list = self.drive.ListFile({'q': query}).GetList()
if not file_list:
return False, "No files found in the specified folder", []
renamed_files = []
for i, file in enumerate(tqdm(file_list, desc="Downloading files")):
if file['mimeType'].startswith('image/'):
new_filename = f"{naming_convention}_{i+1}.jpg"
file_path = os.path.join(self.local_images_dir, new_filename)
# Download file
file.GetContentFile(file_path)
# Verify the image can be opened
try:
with Image.open(file_path) as img:
img.verify()
renamed_files.append({
'file_path': file_path,
'original_name': file['title'],
'new_name': new_filename
})
except Exception as e:
logger.error(f"Error processing image {file['title']}: {str(e)}")
if os.path.exists(file_path):
os.remove(file_path)
return True, f"Successfully processed {len(renamed_files)} images", renamed_files
except Exception as e:
return False, f"Error downloading files: {str(e)}", []
def update_huggingface_dataset(self, dataset_name, renamed_files):
"""Update or create Hugging Face dataset with new images"""
try:
# Create a DataFrame with the file information
df = pd.DataFrame(renamed_files)
# Create a Hugging Face Dataset
dataset = Dataset.from_pandas(df)
# Push to Hugging Face Hub
dataset.push_to_hub(dataset_name)
return True, f"Successfully updated dataset '{dataset_name}' with {len(renamed_files)} images"
except Exception as e:
return False, f"Error updating Hugging Face dataset: {str(e)}"
def process_pipeline(folder_id, naming_convention, dataset_name):
"""Main pipeline to process images and update dataset"""
manager = DatasetManager()
# Step 1: Authenticate
auth_success, auth_message = manager.authenticate_drive()
if not auth_success:
return auth_message
# Step 2: Download and rename files
success, message, renamed_files = manager.download_and_rename_files(folder_id, naming_convention)
if not success:
return message
# Step 3: Update Hugging Face dataset
if dataset_name:
success, hf_message = manager.update_huggingface_dataset(dataset_name, renamed_files)
return f"{message}\n{hf_message}"
return message
# Gradio interface
demo = gr.Interface(
fn=process_pipeline,
inputs=[
gr.Textbox(
label="Google Drive Folder ID",
placeholder="Enter the folder ID from your Google Drive URL"
),
gr.Textbox(
label="Naming Convention",
placeholder="e.g., card",
value="card"
),
gr.Textbox(
label="Hugging Face Dataset Name",
placeholder="username/dataset-name (optional)",
required=False
)
],
outputs=gr.Textbox(label="Status"),
title="Card Image Processor",
description="Download card images from Google Drive and add them to your Hugging Face dataset"
)
if __name__ == "__main__":
demo.launch()