Spaces:
Sleeping
Sleeping
from pydrive2.auth import GoogleAuth | |
from pydrive2.drive import GoogleDrive | |
import os | |
import gradio as gr | |
from datasets import load_dataset, Dataset, concatenate_datasets | |
import pandas as pd | |
from PIL import Image | |
from tqdm import tqdm | |
import logging | |
import yaml | |
# Set up logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(levelname)s - %(message)s' | |
) | |
logger = logging.getLogger(__name__) | |
# Load settings | |
if not os.path.exists("settings.yaml"): | |
raise FileNotFoundError("settings.yaml file is missing. Please add it with 'client_secrets_file'.") | |
with open('settings.yaml', 'r') as file: | |
settings = yaml.safe_load(file) | |
[... keep all the utility functions and DatasetManager class the same ...] | |
def process_pipeline(folder_id, naming_convention): | |
"""Main pipeline for processing images and updating dataset.""" | |
# Validate input | |
if not folder_id or not naming_convention: | |
return "Please provide both folder ID and naming convention", [] | |
manager = DatasetManager() | |
# Step 1: Authenticate Google Drive | |
auth_success, auth_message = manager.authenticate_drive() | |
if not auth_success: | |
return auth_message, [] | |
# Step 2: Download and rename files | |
success, message, renamed_files = manager.download_and_rename_files(folder_id, naming_convention) | |
if not success: | |
return message, [] | |
# Step 3: Update Hugging Face dataset | |
success, hf_message = manager.update_huggingface_dataset(renamed_files) | |
return f"{message}\n{hf_message}", renamed_files | |
def process_ui(folder_id, naming_convention): | |
"""UI handler for the process pipeline""" | |
status, renamed_files = process_pipeline(folder_id, naming_convention) | |
table_data = [[file['original_name'], file['new_name'], file['file_path']] | |
for file in renamed_files] if renamed_files else [] | |
return status, table_data | |
# Simplified Gradio interface | |
demo = gr.Interface( | |
fn=process_ui, | |
inputs=[ | |
gr.Textbox( | |
label="Google Drive Folder ID", | |
placeholder="Enter the folder ID from the URL" | |
), | |
gr.Textbox( | |
label="Naming Convention", | |
placeholder="e.g., sports_card", | |
value="sports_card" | |
) | |
], | |
outputs=[ | |
gr.Textbox(label="Status"), | |
gr.Dataframe( | |
headers=["Original Name", "New Name", "File Path"] | |
) | |
], | |
title="Sports Cards Dataset Processor", | |
description=""" | |
Instructions: | |
1. Enter the Google Drive folder ID (found in the folder's URL) | |
2. Specify a naming convention for the files (e.g., 'sports_card') | |
3. Click submit to start processing | |
Note: Only image files will be processed. Invalid images will be skipped. | |
""" | |
) | |
if __name__ == "__main__": | |
demo.launch() |