GotThatData commited on
Commit
85ad390
·
verified ·
1 Parent(s): bcde0da

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +134 -6
app.py CHANGED
@@ -1,9 +1,137 @@
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
2
 
3
- # Simple card greeting function
4
- def greet(name):
5
- return "Hello " + name + "!!"
6
 
7
- # Gradio Interface
8
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
9
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydrive2.auth import GoogleAuth
2
+ from pydrive2.drive import GoogleDrive
3
+ import os
4
  import gradio as gr
5
+ from datasets import load_dataset, Dataset
6
+ import pandas as pd
7
+ from PIL import Image
8
+ import shutil
9
+ from tqdm import tqdm
10
+ import logging
11
 
12
+ # Set up logging
13
+ logging.basicConfig(level=logging.INFO)
14
+ logger = logging.getLogger(__name__)
15
 
16
+ class DatasetManager:
17
+ def __init__(self, dataset_name=None, local_images_dir="downloaded_cards"):
18
+ self.dataset_name = dataset_name
19
+ self.local_images_dir = local_images_dir
20
+ self.drive = None
21
+
22
+ # Create local directory if it doesn't exist
23
+ os.makedirs(local_images_dir, exist_ok=True)
24
+
25
+ def authenticate_drive(self):
26
+ """Authenticate with Google Drive"""
27
+ try:
28
+ gauth = GoogleAuth()
29
+ gauth.LocalWebserverAuth()
30
+ self.drive = GoogleDrive(gauth)
31
+ return True, "Successfully authenticated with Google Drive"
32
+ except Exception as e:
33
+ return False, f"Authentication failed: {str(e)}"
34
+
35
+ def download_and_rename_files(self, drive_folder_id, naming_convention):
36
+ """Download files from Google Drive and rename them"""
37
+ if not self.drive:
38
+ return False, "Google Drive not authenticated", []
39
+
40
+ try:
41
+ # List files in the folder
42
+ query = f"'{drive_folder_id}' in parents and trashed=false"
43
+ file_list = self.drive.ListFile({'q': query}).GetList()
44
+
45
+ if not file_list:
46
+ return False, "No files found in the specified folder", []
47
+
48
+ renamed_files = []
49
+ for i, file in enumerate(tqdm(file_list, desc="Downloading files")):
50
+ if file['mimeType'].startswith('image/'):
51
+ new_filename = f"{naming_convention}_{i+1}.jpg"
52
+ file_path = os.path.join(self.local_images_dir, new_filename)
53
+
54
+ # Download file
55
+ file.GetContentFile(file_path)
56
+
57
+ # Verify the image can be opened
58
+ try:
59
+ with Image.open(file_path) as img:
60
+ img.verify()
61
+ renamed_files.append({
62
+ 'file_path': file_path,
63
+ 'original_name': file['title'],
64
+ 'new_name': new_filename
65
+ })
66
+ except Exception as e:
67
+ logger.error(f"Error processing image {file['title']}: {str(e)}")
68
+ if os.path.exists(file_path):
69
+ os.remove(file_path)
70
+
71
+ return True, f"Successfully processed {len(renamed_files)} images", renamed_files
72
+ except Exception as e:
73
+ return False, f"Error downloading files: {str(e)}", []
74
+
75
+ def update_huggingface_dataset(self, dataset_name, renamed_files):
76
+ """Update or create Hugging Face dataset with new images"""
77
+ try:
78
+ # Create a DataFrame with the file information
79
+ df = pd.DataFrame(renamed_files)
80
+
81
+ # Create a Hugging Face Dataset
82
+ dataset = Dataset.from_pandas(df)
83
+
84
+ # Push to Hugging Face Hub
85
+ dataset.push_to_hub(dataset_name)
86
+
87
+ return True, f"Successfully updated dataset '{dataset_name}' with {len(renamed_files)} images"
88
+ except Exception as e:
89
+ return False, f"Error updating Hugging Face dataset: {str(e)}"
90
+
91
+ def process_pipeline(folder_id, naming_convention, dataset_name):
92
+ """Main pipeline to process images and update dataset"""
93
+ manager = DatasetManager()
94
+
95
+ # Step 1: Authenticate
96
+ auth_success, auth_message = manager.authenticate_drive()
97
+ if not auth_success:
98
+ return auth_message
99
+
100
+ # Step 2: Download and rename files
101
+ success, message, renamed_files = manager.download_and_rename_files(folder_id, naming_convention)
102
+ if not success:
103
+ return message
104
+
105
+ # Step 3: Update Hugging Face dataset
106
+ if dataset_name:
107
+ success, hf_message = manager.update_huggingface_dataset(dataset_name, renamed_files)
108
+ return f"{message}\n{hf_message}"
109
+
110
+ return message
111
+
112
+ # Gradio interface
113
+ demo = gr.Interface(
114
+ fn=process_pipeline,
115
+ inputs=[
116
+ gr.Textbox(
117
+ label="Google Drive Folder ID",
118
+ placeholder="Enter the folder ID from your Google Drive URL"
119
+ ),
120
+ gr.Textbox(
121
+ label="Naming Convention",
122
+ placeholder="e.g., card",
123
+ value="card"
124
+ ),
125
+ gr.Textbox(
126
+ label="Hugging Face Dataset Name",
127
+ placeholder="username/dataset-name (optional)",
128
+ required=False
129
+ )
130
+ ],
131
+ outputs=gr.Textbox(label="Status"),
132
+ title="Card Image Processor",
133
+ description="Download card images from Google Drive and add them to your Hugging Face dataset"
134
+ )
135
+
136
+ if __name__ == "__main__":
137
+ demo.launch()