ai-card-hub / app.py
GotThatData's picture
yeah
71ac033 verified
raw
history blame
2.82 kB
from pydrive2.auth import GoogleAuth
from pydrive2.drive import GoogleDrive
import os
import gradio as gr
from datasets import load_dataset, Dataset, concatenate_datasets
import pandas as pd
from PIL import Image
from tqdm import tqdm
import logging
import yaml
# Set up logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Load settings
if not os.path.exists("settings.yaml"):
raise FileNotFoundError("settings.yaml file is missing. Please add it with 'client_secrets_file'.")
with open('settings.yaml', 'r') as file:
settings = yaml.safe_load(file)
[... keep all the utility functions and DatasetManager class the same ...]
def process_pipeline(folder_id, naming_convention):
"""Main pipeline for processing images and updating dataset."""
# Validate input
if not folder_id or not naming_convention:
return "Please provide both folder ID and naming convention", []
manager = DatasetManager()
# Step 1: Authenticate Google Drive
auth_success, auth_message = manager.authenticate_drive()
if not auth_success:
return auth_message, []
# Step 2: Download and rename files
success, message, renamed_files = manager.download_and_rename_files(folder_id, naming_convention)
if not success:
return message, []
# Step 3: Update Hugging Face dataset
success, hf_message = manager.update_huggingface_dataset(renamed_files)
return f"{message}\n{hf_message}", renamed_files
def process_ui(folder_id, naming_convention):
"""UI handler for the process pipeline"""
status, renamed_files = process_pipeline(folder_id, naming_convention)
table_data = [[file['original_name'], file['new_name'], file['file_path']]
for file in renamed_files] if renamed_files else []
return status, table_data
# Simplified Gradio interface
demo = gr.Interface(
fn=process_ui,
inputs=[
gr.Textbox(
label="Google Drive Folder ID",
placeholder="Enter the folder ID from the URL"
),
gr.Textbox(
label="Naming Convention",
placeholder="e.g., sports_card",
value="sports_card"
)
],
outputs=[
gr.Textbox(label="Status"),
gr.Dataframe(
headers=["Original Name", "New Name", "File Path"]
)
],
title="Sports Cards Dataset Processor",
description="""
Instructions:
1. Enter the Google Drive folder ID (found in the folder's URL)
2. Specify a naming convention for the files (e.g., 'sports_card')
3. Click submit to start processing
Note: Only image files will be processed. Invalid images will be skipped.
"""
)
if __name__ == "__main__":
demo.launch()