File size: 2,819 Bytes
85ad390
 
 
bcde0da
b489b3f
85ad390
 
 
 
d86b86d
bcde0da
85ad390
b4b0910
 
 
 
85ad390
bcde0da
d86b86d
b489b3f
 
 
d86b86d
 
1552b06
71ac033
b489b3f
9cc14fb
b489b3f
b4b0910
71ac033
 
b4b0910
85ad390
b489b3f
 
85ad390
 
b489b3f
 
 
9cc14fb
85ad390
b489b3f
 
 
9cc14fb
b489b3f
 
71ac033
 
 
 
 
 
 
 
 
 
 
 
b4b0910
71ac033
 
 
b4b0910
 
71ac033
b4b0910
71ac033
 
 
 
 
 
 
 
 
 
 
 
 
b4b0910
71ac033
 
 
85ad390
 
b4b0910
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from pydrive2.auth import GoogleAuth
from pydrive2.drive import GoogleDrive
import os
import gradio as gr
from datasets import load_dataset, Dataset, concatenate_datasets
import pandas as pd
from PIL import Image
from tqdm import tqdm
import logging
import yaml

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Load settings
if not os.path.exists("settings.yaml"):
    raise FileNotFoundError("settings.yaml file is missing. Please add it with 'client_secrets_file'.")

with open('settings.yaml', 'r') as file:
    settings = yaml.safe_load(file)

[... keep all the utility functions and DatasetManager class the same ...]

def process_pipeline(folder_id, naming_convention):
    """Main pipeline for processing images and updating dataset."""
    # Validate input
    if not folder_id or not naming_convention:
        return "Please provide both folder ID and naming convention", []

    manager = DatasetManager()

    # Step 1: Authenticate Google Drive
    auth_success, auth_message = manager.authenticate_drive()
    if not auth_success:
        return auth_message, []

    # Step 2: Download and rename files
    success, message, renamed_files = manager.download_and_rename_files(folder_id, naming_convention)
    if not success:
        return message, []

    # Step 3: Update Hugging Face dataset
    success, hf_message = manager.update_huggingface_dataset(renamed_files)
    return f"{message}\n{hf_message}", renamed_files

def process_ui(folder_id, naming_convention):
    """UI handler for the process pipeline"""
    status, renamed_files = process_pipeline(folder_id, naming_convention)
    table_data = [[file['original_name'], file['new_name'], file['file_path']] 
                 for file in renamed_files] if renamed_files else []
    return status, table_data

# Simplified Gradio interface
demo = gr.Interface(
    fn=process_ui,
    inputs=[
        gr.Textbox(
            label="Google Drive Folder ID",
            placeholder="Enter the folder ID from the URL"
        ),
        gr.Textbox(
            label="Naming Convention",
            placeholder="e.g., sports_card",
            value="sports_card"
        )
    ],
    outputs=[
        gr.Textbox(label="Status"),
        gr.Dataframe(
            headers=["Original Name", "New Name", "File Path"]
        )
    ],
    title="Sports Cards Dataset Processor",
    description="""
    Instructions:
    1. Enter the Google Drive folder ID (found in the folder's URL)
    2. Specify a naming convention for the files (e.g., 'sports_card')
    3. Click submit to start processing
    
    Note: Only image files will be processed. Invalid images will be skipped.
    """
)

if __name__ == "__main__":
    demo.launch()