Spaces:
Sleeping
Sleeping
File size: 2,819 Bytes
85ad390 bcde0da b489b3f 85ad390 d86b86d bcde0da 85ad390 b4b0910 85ad390 bcde0da d86b86d b489b3f d86b86d 1552b06 71ac033 b489b3f 9cc14fb b489b3f b4b0910 71ac033 b4b0910 85ad390 b489b3f 85ad390 b489b3f 9cc14fb 85ad390 b489b3f 9cc14fb b489b3f 71ac033 b4b0910 71ac033 b4b0910 71ac033 b4b0910 71ac033 b4b0910 71ac033 85ad390 b4b0910 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
from pydrive2.auth import GoogleAuth
from pydrive2.drive import GoogleDrive
import os
import gradio as gr
from datasets import load_dataset, Dataset, concatenate_datasets
import pandas as pd
from PIL import Image
from tqdm import tqdm
import logging
import yaml
# Set up logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Load settings
if not os.path.exists("settings.yaml"):
raise FileNotFoundError("settings.yaml file is missing. Please add it with 'client_secrets_file'.")
with open('settings.yaml', 'r') as file:
settings = yaml.safe_load(file)
[... keep all the utility functions and DatasetManager class the same ...]
def process_pipeline(folder_id, naming_convention):
"""Main pipeline for processing images and updating dataset."""
# Validate input
if not folder_id or not naming_convention:
return "Please provide both folder ID and naming convention", []
manager = DatasetManager()
# Step 1: Authenticate Google Drive
auth_success, auth_message = manager.authenticate_drive()
if not auth_success:
return auth_message, []
# Step 2: Download and rename files
success, message, renamed_files = manager.download_and_rename_files(folder_id, naming_convention)
if not success:
return message, []
# Step 3: Update Hugging Face dataset
success, hf_message = manager.update_huggingface_dataset(renamed_files)
return f"{message}\n{hf_message}", renamed_files
def process_ui(folder_id, naming_convention):
"""UI handler for the process pipeline"""
status, renamed_files = process_pipeline(folder_id, naming_convention)
table_data = [[file['original_name'], file['new_name'], file['file_path']]
for file in renamed_files] if renamed_files else []
return status, table_data
# Simplified Gradio interface
demo = gr.Interface(
fn=process_ui,
inputs=[
gr.Textbox(
label="Google Drive Folder ID",
placeholder="Enter the folder ID from the URL"
),
gr.Textbox(
label="Naming Convention",
placeholder="e.g., sports_card",
value="sports_card"
)
],
outputs=[
gr.Textbox(label="Status"),
gr.Dataframe(
headers=["Original Name", "New Name", "File Path"]
)
],
title="Sports Cards Dataset Processor",
description="""
Instructions:
1. Enter the Google Drive folder ID (found in the folder's URL)
2. Specify a naming convention for the files (e.g., 'sports_card')
3. Click submit to start processing
Note: Only image files will be processed. Invalid images will be skipped.
"""
)
if __name__ == "__main__":
demo.launch() |