nikkmitra commited on
Commit
85c5439
·
verified ·
1 Parent(s): 3fb02e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +143 -364
app.py CHANGED
@@ -1,21 +1,24 @@
1
  import os
2
  import streamlit as st
3
- import pandas as pd
4
  from dotenv import load_dotenv
5
  from pymongo import MongoClient
 
6
  from bson import ObjectId
7
- from huggingface_hub import HfApi, HfFolder, delete_file
8
- import tempfile
9
- import base64
10
- import requests
11
- import urllib.parse # For URL encoding
12
 
13
- # Load environment variables
14
  load_dotenv()
15
 
16
- # Get MongoDB URI and Hugging Face token from .env file
17
  mongodb_uri = os.getenv('MONGODB_URI')
18
- hf_token = os.getenv('HF_TOKEN')
 
 
 
 
 
 
19
 
20
  # Connect to MongoDB
21
  @st.cache_resource
@@ -23,368 +26,144 @@ def init_connection():
23
  return MongoClient(mongodb_uri)
24
 
25
  client = init_connection()
 
 
26
  db = client['mitra']
27
- voices_collection = db['voices']
28
 
29
- # Define the upload_to_huggingface function here
30
- def upload_to_huggingface(audio_file, voice_name):
31
- api = HfApi()
32
-
33
- # Set the Hugging Face token
34
- HfFolder.save_token(hf_token)
35
-
36
- # Create a temporary file to store the uploaded audio
37
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
38
- temp_file.write(audio_file.getvalue())
39
- temp_file_path = temp_file.name
40
 
41
- try:
42
- # Upload the file to Hugging Face
43
- api.upload_file(
44
- path_or_fileobj=temp_file_path,
45
- path_in_repo=f"voices/{voice_name}.mp3",
46
- repo_id="nikkmitra/clone",
47
- repo_type="space",
48
- token=hf_token # Use the token from .env
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  )
50
- return True
51
- except Exception as e:
52
- st.error(f"Error uploading file: {str(e)}")
53
- return False
54
- finally:
55
- # Clean up the temporary file
56
- os.unlink(temp_file_path)
57
-
58
- # Function to add a new voice
59
- def add_voice(category, name, is_free):
60
- # Strip leading/trailing spaces from the voice name
61
- name = name.strip()
62
-
63
- # Check for duplicate voice name within the same category
64
- if voices_collection.find_one({"category": category, "voices.name": name}):
65
- st.error(f"Voice '{name}' already exists in category '{category}'.")
66
- return False
67
- voices_collection.update_one(
68
- {"category": category},
69
- {"$push": {"voices": {"name": name, "free": is_free}}},
70
- upsert=True # Ensure the category exists
71
- )
72
- return True
73
-
74
- # Function to add a new voice with base64
75
- def add_voice_base64(category, name, is_free, audio_file):
76
- # Strip leading/trailing spaces from the voice name
77
- name = name.strip()
78
-
79
- # Check for duplicate voice name within the same category
80
- if voices_collection.find_one({"category": category, "voices.name": name}):
81
- st.error(f"Voice '{name}' already exists in category '{category}'.")
82
- return False
83
-
84
- # Encode the audio file to base64
85
- audio_base64 = base64.b64encode(audio_file.getvalue()).decode('utf-8')
86
-
87
- # Update the database with the new voice
88
- voices_collection.update_one(
89
- {"category": category},
90
- {"$push": {"voices": {"name": name, "free": is_free, "base64": audio_base64}}},
91
- upsert=True # Ensure the category exists
92
- )
93
-
94
- return True
95
-
96
- # Function to remove a voice
97
- def remove_voice(category, name):
98
- # Find the voice to determine storage type
99
- voice_doc = voices_collection.find_one({"category": category, "voices.name": name})
100
- if not voice_doc:
101
- st.error(f"Voice '{name}' not found in category '{category}'.")
102
- return False
103
- voice_data = next((v for v in voice_doc['voices'] if v['name'] == name), None)
104
- if not voice_data:
105
- st.error(f"Voice '{name}' not found in category '{category}'.")
106
- return False
107
-
108
- # Remove from MongoDB
109
- result = voices_collection.update_one(
110
- {"category": category},
111
- {"$pull": {"voices": {"name": name}}}
112
- )
113
-
114
- # If removal was successful, handle deletion from Hugging Face if applicable
115
- if result.modified_count > 0:
116
- if 'base64' not in voice_data:
117
- # Voice is stored on Hugging Face
118
- try:
119
- api = HfApi()
120
- delete_file(
121
- path_in_repo=f"voices/{name}.mp3",
122
- repo_id="nikkmitra/clone",
123
- repo_type="space",
124
- token=hf_token
125
- )
126
- st.success(f"Deleted {name}'s voice file from Hugging Face repository")
127
- except Exception as e:
128
- st.warning(f"Failed to delete {name}'s voice file from Hugging Face: {str(e)}")
129
  else:
130
- # Voice was stored as base64 in MongoDB
131
- st.success(f"Removed {name} from the '{category}' category in the database")
132
- return result.modified_count > 0
133
-
134
- # Function to update all voices by adding base64 field
135
- def update_all_voices():
136
- st.info("Starting the bulk update process. This may take a while depending on the number of voices.")
137
-
138
- # Fetch all categories and their voices
139
- all_categories = voices_collection.find()
140
-
141
- total_voices = 0
142
- updated_voices = 0
143
- failed_voices = []
144
-
145
- for category in all_categories:
146
- category_name = category['category']
147
- voices = category.get('voices', [])
148
-
149
- for voice in voices:
150
- total_voices += 1
151
- voice_name = voice['name'].strip() # Strip leading/trailing spaces
152
-
153
- # Check if 'base64' field already exists
154
- if 'base64' in voice:
155
- continue # Skip voices already updated
156
-
157
- # Construct the file path
158
- file_path = f"voices/{voice_name}.mp3"
159
- repo_id = "nikkmitra/clone" # Update this if different
160
 
161
- # Conditional space encoding
162
- if ' ' in voice_name:
163
- # Replace spaces with %20
164
- encoded_voice_name = voice_name.replace(' ', '%20')
 
 
 
 
 
 
 
165
  else:
166
- # No spaces, use the name as-is
167
- encoded_voice_name = voice_name
168
-
169
- # Reconstruct the file path with or without %20
170
- encoded_file_path = f"voices/{encoded_voice_name}.mp3"
171
-
172
- # Construct the URL
173
- url = f"https://huggingface.co/spaces/{repo_id}/resolve/main/{encoded_file_path}"
174
-
175
- st.write(f"Attempting to download: {url}") # Log the URL
176
-
177
- try:
178
- # Download the audio file from Hugging Face
179
- response = requests.get(url, headers={"Authorization": f"Bearer {hf_token}"})
180
- if response.status_code == 200:
181
- audio_data = response.content
182
-
183
- # Encode to base64
184
- audio_base64 = base64.b64encode(audio_data).decode('utf-8')
185
-
186
- # Update the MongoDB document
187
- voices_collection.update_one(
188
- {"category": category_name, "voices.name": voice_name},
189
- {"$set": {"voices.$.base64": audio_base64}}
190
- )
191
-
192
- updated_voices += 1
193
- else:
194
- failed_voices.append((category_name, voice_name, f"HTTP {response.status_code}"))
195
- st.error(f"Failed to download {voice_name}: HTTP {response.status_code}")
196
- except Exception as e:
197
- failed_voices.append((category_name, voice_name, str(e)))
198
- st.error(f"Exception for {voice_name}: {str(e)}")
199
-
200
- # Summary of the update process
201
- st.success(f"Bulk update completed. Total voices processed: {total_voices}")
202
- st.success(f"Voices successfully updated with base64: {updated_voices}")
203
-
204
- if failed_voices:
205
- st.error(f"Failed to update {len(failed_voices)} voices:")
206
- for fail in failed_voices:
207
- st.error(f"Category: {fail[0]}, Voice: {fail[1]}, Reason: {fail[2]}")
208
- else:
209
- st.success("All voices updated successfully!")
210
-
211
- # Function to sanitize existing voice names by removing leading/trailing spaces
212
- def sanitize_voice_names():
213
- all_categories = voices_collection.find()
214
- sanitized_count = 0
215
- for category in all_categories:
216
- category_name = category['category']
217
- voices = category.get('voices', [])
218
-
219
- for voice in voices:
220
- original_name = voice['name']
221
- sanitized_name = original_name.strip()
222
-
223
- if original_name != sanitized_name:
224
- # Update the voice name in MongoDB
225
- voices_collection.update_one(
226
- {"category": category_name, "voices.name": original_name},
227
- {"$set": {"voices.$.name": sanitized_name}}
228
- )
229
- sanitized_count += 1
230
- st.write(f"Sanitized voice name from '{original_name}' to '{sanitized_name}' in category '{category_name}'")
231
-
232
- st.success(f"Sanitization complete. Total voice names sanitized: {sanitized_count}")
233
-
234
- # Streamlit app
235
- st.title("Mitra Voices")
236
-
237
- # Fetch existing categories from the database
238
- categories_cursor = voices_collection.find({}, {"category": 1, "_id": 0})
239
- categories = [cat['category'] for cat in categories_cursor]
240
-
241
- # Upload Voice Section
242
- st.header("Upload Voice")
243
-
244
- if categories:
245
- # Select category from dropdown
246
- selected_category = st.selectbox(
247
- "Select Category",
248
- categories,
249
- help="Choose the category to which you want to add the voice"
250
- )
251
-
252
- # Input for voice name
253
- voice_name = st.text_input(
254
- "Voice Name",
255
- placeholder="Enter the name of the voice"
256
- )
257
-
258
- # File uploader for MP3 file
259
- st.write("Please upload an MP3 file of the voice:")
260
- audio_file = st.file_uploader(
261
- "Choose an MP3 file",
262
- type="mp3",
263
- help="Select an MP3 file of the voice"
264
- )
265
-
266
- # Storage method selection
267
- storage_method = st.radio(
268
- "Select Storage Method",
269
- ("Upload to Hugging Face", "Store as Base64 in MongoDB"),
270
- help="Choose how you want to store the voice file."
271
- )
272
-
273
- if selected_category and voice_name and audio_file:
274
- st.audio(audio_file, format='audio/mp3')
275
- st.write("Preview the uploaded audio:")
276
-
277
- if storage_method == "Upload to Hugging Face":
278
- if st.button("Upload Voice", type="primary"):
279
- if upload_to_huggingface(audio_file, voice_name):
280
- st.success(f"✅ Uploaded {voice_name.strip()}'s voice to Hugging Face repository")
281
-
282
- # Add the new voice to the selected category in the database
283
- if add_voice(selected_category, voice_name, False): # Assuming new uploads are not free by default
284
- st.success(f"✅ Added {voice_name.strip()} to the '{selected_category}' category in the database")
285
-
286
- # Clear the form by rerunning the app
287
- st.rerun()
288
- else:
289
- st.error("❌ Failed to upload the voice file")
290
- elif storage_method == "Store as Base64 in MongoDB":
291
- if st.button("Store Voice", type="primary"):
292
- if add_voice_base64(selected_category, voice_name, False, audio_file):
293
- st.success(f"✅ Stored {voice_name.strip()}'s voice in the '{selected_category}' category in the database as base64")
294
- st.rerun()
295
- else:
296
- st.error("❌ Failed to store the voice file as base64")
297
- elif selected_category and (voice_name or audio_file):
298
- if not voice_name.strip():
299
- st.info("Please enter the voice name to proceed")
300
- if not audio_file:
301
- st.info("Please upload an MP3 file of the voice to proceed")
302
- else:
303
- st.info("No categories available. Please add a new category first.")
304
-
305
- # Add a separator
306
- st.markdown("---")
307
-
308
- # Button to clear the database
309
- def clear_database():
310
- voices_collection.delete_many({})
311
-
312
- if st.button("Clear Database"):
313
- clear_database()
314
- st.success("Database cleared successfully.")
315
- st.rerun()
316
-
317
- # Bulk Update Section
318
- st.markdown("---")
319
- st.header("Bulk Update Voices")
320
-
321
- if st.button("Update All Voices"):
322
- update_all_voices()
323
-
324
- # Sanitize Voice Names Section
325
- st.markdown("---")
326
- st.header("Sanitize Voice Names")
327
-
328
- if st.button("Sanitize All Voice Names"):
329
- sanitize_voice_names()
330
-
331
- # Add new voice category
332
- st.header("Add New Voice Category")
333
- new_category_name = st.text_input(
334
- "New Category Name",
335
- placeholder="Enter new category name"
336
- )
337
-
338
- if st.button("Add Category"):
339
- if new_category_name.strip():
340
- if voices_collection.find_one({"category": new_category_name.strip()}):
341
- st.warning(f"Category '{new_category_name.strip()}' already exists.")
342
  else:
343
- voices_collection.insert_one({"category": new_category_name.strip(), "voices": []})
344
- st.success(f"Added new category: {new_category_name.strip()}")
345
- st.rerun()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
  else:
347
- st.error("Please enter a category name")
348
-
349
- # Display and manage voice data
350
- st.header("Voice Categories")
351
 
352
- all_voices = voices_collection.find()
353
- for category in all_voices:
354
- st.subheader(category['category'])
355
- for i, voice in enumerate(category['voices']):
356
- col1, col2, col3 = st.columns([3, 1, 1])
357
- with col1:
358
- st.write(f"{voice['name']} ({'Free' if voice['free'] else 'Premium'})")
359
- if 'base64' in voice:
360
- try:
361
- audio_data = base64.b64decode(voice['base64'])
362
- st.audio(audio_data, format='audio/mp3')
363
- except Exception as e:
364
- st.error(f"Failed to load audio for {voice['name']}: {str(e)}")
365
- else:
366
- # If stored on Hugging Face, provide a link
367
- repo_id = "nikkmitra/clone" # Update if different
368
- file_path = f"voices/{voice['name'].strip()}.mp3"
369
- if ' ' in voice['name']:
370
- # Replace spaces with %20
371
- encoded_voice_name = voice['name'].strip().replace(' ', '%20')
372
- else:
373
- encoded_voice_name = voice['name'].strip()
374
- encoded_file_path = f"voices/{encoded_voice_name}.mp3"
375
- url = f"https://huggingface.co/spaces/{repo_id}/resolve/main/{encoded_file_path}"
376
- st.markdown(f"[🔗 Listen on Hugging Face]({url})")
377
- with col2:
378
- if st.button("Remove", key=f"remove_{category['category']}_{voice['name']}_{i}"):
379
- if remove_voice(category['category'], voice['name']):
380
- st.success(f"Removed {voice['name']} from {category['category']}")
381
- st.rerun()
382
- else:
383
- st.error(f"Failed to remove {voice['name']}")
384
- with col3:
385
- if st.button("Toggle Free", key=f"toggle_{category['category']}_{voice['name']}_{i}"):
386
- voices_collection.update_one(
387
- {"category": category['category'], "voices.name": voice['name']},
388
- {"$set": {"voices.$.free": not voice['free']}}
389
- )
390
- st.rerun()
 
1
  import os
2
  import streamlit as st
 
3
  from dotenv import load_dotenv
4
  from pymongo import MongoClient
5
+ import pandas as pd
6
  from bson import ObjectId
7
+ import cloudinary
8
+ import cloudinary.uploader
 
 
 
9
 
10
+ # Load environment variables from .env file
11
  load_dotenv()
12
 
13
+ # Get MongoDB URI from .env file
14
  mongodb_uri = os.getenv('MONGODB_URI')
15
+
16
+ # Configure Cloudinary
17
+ cloudinary.config(
18
+ cloud_name = os.getenv('CLOUDINARY_CLOUD_NAME'),
19
+ api_key = os.getenv('CLOUDINARY_API_KEY'),
20
+ api_secret = os.getenv('CLOUDINARY_API_SECRET')
21
+ )
22
 
23
  # Connect to MongoDB
24
  @st.cache_resource
 
26
  return MongoClient(mongodb_uri)
27
 
28
  client = init_connection()
29
+
30
+ # Access the 'mitra' database
31
  db = client['mitra']
 
32
 
33
+ # Access the 'base-voices' collection
34
+ collection = db['base-voices']
 
 
 
 
 
 
 
 
 
35
 
36
+ # Streamlit app
37
+ st.title('Base Voices Data Management')
38
+
39
+ # Sidebar for actions
40
+ st.sidebar.header('Actions')
41
+ action = st.sidebar.radio('Choose an action:', ['View Data', 'Add Category', 'Remove Category', 'Add Voice', 'Remove Voice'])
42
+
43
+ if action == 'View Data':
44
+ # Retrieve all documents from the collection
45
+ @st.cache_data
46
+ def get_base_voices():
47
+ base_voices = list(collection.find({}, {'_id': 0})) # Exclude the '_id' field
48
+ return base_voices
49
+
50
+ data = get_base_voices()
51
+
52
+ # Display the data
53
+ if data:
54
+ # Normalize the data for better display
55
+ normalized_data = []
56
+ for category in data:
57
+ for voice in category['voices']:
58
+ normalized_data.append({
59
+ 'Category': category['category'],
60
+ 'Voice Name': voice['name'],
61
+ 'Is Free': 'Yes' if voice['is_free'] else 'No',
62
+ 'File URL': voice['file_url']
63
+ })
64
+
65
+ df = pd.DataFrame(normalized_data)
66
+
67
+ # Display the table
68
+ st.subheader("Voice Data Table")
69
+ st.dataframe(df, use_container_width=True)
70
+
71
+ # Display audio players for each voice
72
+ st.subheader("Audio Samples")
73
+ for category in data:
74
+ st.write(f"**{category['category']}**")
75
+ for voice in category['voices']:
76
+ col1, col2 = st.columns([3, 1])
77
+ with col1:
78
+ st.write(f"{voice['name']} ({'Free' if voice['is_free'] else 'Paid'})")
79
+ st.audio(voice['file_url'])
80
+ with col2:
81
+ st.markdown(f"[Download]({voice['file_url']})")
82
+ st.write("---")
83
+
84
+ # Optional: Add a download button
85
+ csv = df.to_csv(index=False)
86
+ st.download_button(
87
+ label="Download data as CSV",
88
+ data=csv,
89
+ file_name="base_voices.csv",
90
+ mime="text/csv",
91
  )
92
+ else:
93
+ st.write("No data found in the 'base-voices' collection.")
94
+
95
+ elif action == 'Add Category':
96
+ st.header('Add New Category')
97
+ new_category = st.text_input('Enter new category name:')
98
+ if st.button('Add Category'):
99
+ if new_category:
100
+ new_doc = {'category': new_category, 'voices': []}
101
+ result = collection.insert_one(new_doc)
102
+ st.success(f'Category "{new_category}" added successfully!')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  else:
104
+ st.error('Please enter a category name.')
105
+
106
+ elif action == 'Remove Category':
107
+ st.header('Remove Category')
108
+ categories = [doc['category'] for doc in collection.find({}, {'category': 1})]
109
+ category_to_remove = st.selectbox('Select category to remove:', categories)
110
+ if st.button('Remove Category'):
111
+ result = collection.delete_one({'category': category_to_remove})
112
+ if result.deleted_count > 0:
113
+ st.success(f'Category "{category_to_remove}" removed successfully!')
114
+ else:
115
+ st.error('Failed to remove category. Please try again.')
116
+
117
+ elif action == 'Add Voice':
118
+ st.header('Add Voice to Category')
119
+ categories = [doc['category'] for doc in collection.find({}, {'category': 1})]
120
+ selected_category = st.selectbox('Select category:', categories)
121
+ voice_name = st.text_input('Enter voice name:')
122
+ voice_file = st.file_uploader("Upload voice file", type=['mp3', 'wav'])
123
+ is_free = st.checkbox('Is this voice free?')
124
+
125
+ if st.button('Add Voice'):
126
+ if voice_name and voice_file:
127
+ # Upload file to Cloudinary
128
+ upload_result = cloudinary.uploader.upload(voice_file, resource_type="auto")
129
+ voice_url = upload_result['secure_url']
 
 
 
 
130
 
131
+ new_voice = {
132
+ 'name': voice_name,
133
+ 'file_url': voice_url,
134
+ 'is_free': is_free
135
+ }
136
+ result = collection.update_one(
137
+ {'category': selected_category},
138
+ {'$push': {'voices': new_voice}}
139
+ )
140
+ if result.modified_count > 0:
141
+ st.success(f'Voice "{voice_name}" added to category "{selected_category}" successfully!')
142
  else:
143
+ st.error('Failed to add voice. Please try again.')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  else:
145
+ st.error('Please enter a voice name and upload a file.')
146
+
147
+ elif action == 'Remove Voice':
148
+ st.header('Remove Voice from Category')
149
+ categories = [doc['category'] for doc in collection.find({}, {'category': 1})]
150
+ selected_category = st.selectbox('Select category:', categories)
151
+ category_doc = collection.find_one({'category': selected_category})
152
+ if category_doc and 'voices' in category_doc:
153
+ voice_names = [voice['name'] for voice in category_doc['voices']]
154
+ voice_to_remove = st.selectbox('Select voice to remove:', voice_names)
155
+ if st.button('Remove Voice'):
156
+ result = collection.update_one(
157
+ {'category': selected_category},
158
+ {'$pull': {'voices': {'name': voice_to_remove}}}
159
+ )
160
+ if result.modified_count > 0:
161
+ st.success(f'Voice "{voice_to_remove}" removed from category "{selected_category}" successfully!')
162
+ else:
163
+ st.error('Failed to remove voice. Please try again.')
164
  else:
165
+ st.warning(f'No voices found in category "{selected_category}".')
 
 
 
166
 
167
+ # Refresh data after actions
168
+ if st.button('Refresh Data'):
169
+ st.experimental_rerun()