Spaces:

nikkmitra
/

mitra-voices

Runtime error

App Files Files Community

nikkmitra commited on Oct 18, 2024

Commit

85c5439

verified ·

1 Parent(s): 3fb02e0

Update app.py

Browse files

Files changed (1) hide show

app.py +143 -364

app.py CHANGED Viewed

@@ -1,21 +1,24 @@
 import os
 import streamlit as st
-import pandas as pd
 from dotenv import load_dotenv
 from pymongo import MongoClient
 from bson import ObjectId
-from huggingface_hub import HfApi, HfFolder, delete_file
-import tempfile
-import base64
-import requests
-import urllib.parse  # For URL encoding
-# Load environment variables
 load_dotenv()
-# Get MongoDB URI and Hugging Face token from .env file
 mongodb_uri = os.getenv('MONGODB_URI')
-hf_token = os.getenv('HF_TOKEN')
 # Connect to MongoDB
 @st.cache_resource
@@ -23,368 +26,144 @@ def init_connection():
     return MongoClient(mongodb_uri)
 client = init_connection()
 db = client['mitra']
-voices_collection = db['voices']
-# Define the upload_to_huggingface function here
-def upload_to_huggingface(audio_file, voice_name):
-    api = HfApi()
-    # Set the Hugging Face token
-    HfFolder.save_token(hf_token)
-    # Create a temporary file to store the uploaded audio
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
-        temp_file.write(audio_file.getvalue())
-        temp_file_path = temp_file.name
-    try:
-        # Upload the file to Hugging Face
-        api.upload_file(
-            path_or_fileobj=temp_file_path,
-            path_in_repo=f"voices/{voice_name}.mp3",
-            repo_id="nikkmitra/clone",
-            repo_type="space",
-            token=hf_token  # Use the token from .env
         )
-        return True
-    except Exception as e:
-        st.error(f"Error uploading file: {str(e)}")
-        return False
-    finally:
-        # Clean up the temporary file
-        os.unlink(temp_file_path)
-# Function to add a new voice
-def add_voice(category, name, is_free):
-    # Strip leading/trailing spaces from the voice name
-    name = name.strip()
-    # Check for duplicate voice name within the same category
-    if voices_collection.find_one({"category": category, "voices.name": name}):
-        st.error(f"Voice '{name}' already exists in category '{category}'.")
-        return False
-    voices_collection.update_one(
-        {"category": category},
-        {"$push": {"voices": {"name": name, "free": is_free}}},
-        upsert=True  # Ensure the category exists
-    )
-    return True
-# Function to add a new voice with base64
-def add_voice_base64(category, name, is_free, audio_file):
-    # Strip leading/trailing spaces from the voice name
-    name = name.strip()
-    # Check for duplicate voice name within the same category
-    if voices_collection.find_one({"category": category, "voices.name": name}):
-        st.error(f"Voice '{name}' already exists in category '{category}'.")
-        return False
-    # Encode the audio file to base64
-    audio_base64 = base64.b64encode(audio_file.getvalue()).decode('utf-8')
-    # Update the database with the new voice
-    voices_collection.update_one(
-        {"category": category},
-        {"$push": {"voices": {"name": name, "free": is_free, "base64": audio_base64}}},
-        upsert=True  # Ensure the category exists
-    )
-    return True
-# Function to remove a voice
-def remove_voice(category, name):
-    # Find the voice to determine storage type
-    voice_doc = voices_collection.find_one({"category": category, "voices.name": name})
-    if not voice_doc:
-        st.error(f"Voice '{name}' not found in category '{category}'.")
-        return False
-    voice_data = next((v for v in voice_doc['voices'] if v['name'] == name), None)
-    if not voice_data:
-        st.error(f"Voice '{name}' not found in category '{category}'.")
-        return False
-    # Remove from MongoDB
-    result = voices_collection.update_one(
-        {"category": category},
-        {"$pull": {"voices": {"name": name}}}
-    )
-    # If removal was successful, handle deletion from Hugging Face if applicable
-    if result.modified_count > 0:
-        if 'base64' not in voice_data:
-            # Voice is stored on Hugging Face
-            try:
-                api = HfApi()
-                delete_file(
-                    path_in_repo=f"voices/{name}.mp3",
-                    repo_id="nikkmitra/clone",
-                    repo_type="space",
-                    token=hf_token
-                )
-                st.success(f"Deleted {name}'s voice file from Hugging Face repository")
-            except Exception as e:
-                st.warning(f"Failed to delete {name}'s voice file from Hugging Face: {str(e)}")
         else:
-            # Voice was stored as base64 in MongoDB
-            st.success(f"Removed {name} from the '{category}' category in the database")
-    return result.modified_count > 0
-# Function to update all voices by adding base64 field
-def update_all_voices():
-    st.info("Starting the bulk update process. This may take a while depending on the number of voices.")
-    # Fetch all categories and their voices
-    all_categories = voices_collection.find()
-    total_voices = 0
-    updated_voices = 0
-    failed_voices = []
-    for category in all_categories:
-        category_name = category['category']
-        voices = category.get('voices', [])
-        for voice in voices:
-            total_voices += 1
-            voice_name = voice['name'].strip()  # Strip leading/trailing spaces
-            # Check if 'base64' field already exists
-            if 'base64' in voice:
-                continue  # Skip voices already updated
-            # Construct the file path
-            file_path = f"voices/{voice_name}.mp3"
-            repo_id = "nikkmitra/clone"  # Update this if different
-            # Conditional space encoding
-            if ' ' in voice_name:
-                # Replace spaces with %20
-                encoded_voice_name = voice_name.replace(' ', '%20')
             else:
-                # No spaces, use the name as-is
-                encoded_voice_name = voice_name
-            # Reconstruct the file path with or without %20
-            encoded_file_path = f"voices/{encoded_voice_name}.mp3"
-            # Construct the URL
-            url = f"https://huggingface.co/spaces/{repo_id}/resolve/main/{encoded_file_path}"
-            st.write(f"Attempting to download: {url}")  # Log the URL
-            try:
-                # Download the audio file from Hugging Face
-                response = requests.get(url, headers={"Authorization": f"Bearer {hf_token}"})
-                if response.status_code == 200:
-                    audio_data = response.content
-                    # Encode to base64
-                    audio_base64 = base64.b64encode(audio_data).decode('utf-8')
-                    # Update the MongoDB document
-                    voices_collection.update_one(
-                        {"category": category_name, "voices.name": voice_name},
-                        {"$set": {"voices.$.base64": audio_base64}}
-                    )
-                    updated_voices += 1
-                else:
-                    failed_voices.append((category_name, voice_name, f"HTTP {response.status_code}"))
-                    st.error(f"Failed to download {voice_name}: HTTP {response.status_code}")
-            except Exception as e:
-                failed_voices.append((category_name, voice_name, str(e)))
-                st.error(f"Exception for {voice_name}: {str(e)}")
-    # Summary of the update process
-    st.success(f"Bulk update completed. Total voices processed: {total_voices}")
-    st.success(f"Voices successfully updated with base64: {updated_voices}")
-    if failed_voices:
-        st.error(f"Failed to update {len(failed_voices)} voices:")
-        for fail in failed_voices:
-            st.error(f"Category: {fail[0]}, Voice: {fail[1]}, Reason: {fail[2]}")
-    else:
-        st.success("All voices updated successfully!")
-# Function to sanitize existing voice names by removing leading/trailing spaces
-def sanitize_voice_names():
-    all_categories = voices_collection.find()
-    sanitized_count = 0
-    for category in all_categories:
-        category_name = category['category']
-        voices = category.get('voices', [])
-        for voice in voices:
-            original_name = voice['name']
-            sanitized_name = original_name.strip()
-            if original_name != sanitized_name:
-                # Update the voice name in MongoDB
-                voices_collection.update_one(
-                    {"category": category_name, "voices.name": original_name},
-                    {"$set": {"voices.$.name": sanitized_name}}
-                )
-                sanitized_count += 1
-                st.write(f"Sanitized voice name from '{original_name}' to '{sanitized_name}' in category '{category_name}'")
-    st.success(f"Sanitization complete. Total voice names sanitized: {sanitized_count}")
-# Streamlit app
-st.title("Mitra Voices")
-# Fetch existing categories from the database
-categories_cursor = voices_collection.find({}, {"category": 1, "_id": 0})
-categories = [cat['category'] for cat in categories_cursor]
-# Upload Voice Section
-st.header("Upload Voice")
-if categories:
-    # Select category from dropdown
-    selected_category = st.selectbox(
-        "Select Category",
-        categories,
-        help="Choose the category to which you want to add the voice"
-    )
-    # Input for voice name
-    voice_name = st.text_input(
-        "Voice Name",
-        placeholder="Enter the name of the voice"
-    )
-    # File uploader for MP3 file
-    st.write("Please upload an MP3 file of the voice:")
-    audio_file = st.file_uploader(
-        "Choose an MP3 file",
-        type="mp3",
-        help="Select an MP3 file of the voice"
-    )
-    # Storage method selection
-    storage_method = st.radio(
-        "Select Storage Method",
-        ("Upload to Hugging Face", "Store as Base64 in MongoDB"),
-        help="Choose how you want to store the voice file."
-    )
-    if selected_category and voice_name and audio_file:
-        st.audio(audio_file, format='audio/mp3')
-        st.write("Preview the uploaded audio:")
-        if storage_method == "Upload to Hugging Face":
-            if st.button("Upload Voice", type="primary"):
-                if upload_to_huggingface(audio_file, voice_name):
-                    st.success(f"✅ Uploaded {voice_name.strip()}'s voice to Hugging Face repository")
-                    # Add the new voice to the selected category in the database
-                    if add_voice(selected_category, voice_name, False):  # Assuming new uploads are not free by default
-                        st.success(f"✅ Added {voice_name.strip()} to the '{selected_category}' category in the database")
-                    # Clear the form by rerunning the app
-                    st.rerun()
-                else:
-                    st.error("❌ Failed to upload the voice file")
-        elif storage_method == "Store as Base64 in MongoDB":
-            if st.button("Store Voice", type="primary"):
-                if add_voice_base64(selected_category, voice_name, False, audio_file):
-                    st.success(f"✅ Stored {voice_name.strip()}'s voice in the '{selected_category}' category in the database as base64")
-                    st.rerun()
-                else:
-                    st.error("❌ Failed to store the voice file as base64")
-    elif selected_category and (voice_name or audio_file):
-        if not voice_name.strip():
-            st.info("Please enter the voice name to proceed")
-        if not audio_file:
-            st.info("Please upload an MP3 file of the voice to proceed")
-else:
-    st.info("No categories available. Please add a new category first.")
-# Add a separator
-st.markdown("---")
-# Button to clear the database
-def clear_database():
-    voices_collection.delete_many({})
-if st.button("Clear Database"):
-    clear_database()
-    st.success("Database cleared successfully.")
-    st.rerun()
-# Bulk Update Section
-st.markdown("---")
-st.header("Bulk Update Voices")
-if st.button("Update All Voices"):
-    update_all_voices()
-# Sanitize Voice Names Section
-st.markdown("---")
-st.header("Sanitize Voice Names")
-if st.button("Sanitize All Voice Names"):
-    sanitize_voice_names()
-# Add new voice category
-st.header("Add New Voice Category")
-new_category_name = st.text_input(
-    "New Category Name",
-    placeholder="Enter new category name"
-)
-if st.button("Add Category"):
-    if new_category_name.strip():
-        if voices_collection.find_one({"category": new_category_name.strip()}):
-            st.warning(f"Category '{new_category_name.strip()}' already exists.")
         else:
-            voices_collection.insert_one({"category": new_category_name.strip(), "voices": []})
-            st.success(f"Added new category: {new_category_name.strip()}")
-            st.rerun()
     else:
-        st.error("Please enter a category name")
-# Display and manage voice data
-st.header("Voice Categories")
-all_voices = voices_collection.find()
-for category in all_voices:
-    st.subheader(category['category'])
-    for i, voice in enumerate(category['voices']):
-        col1, col2, col3 = st.columns([3, 1, 1])
-        with col1:
-            st.write(f"{voice['name']} ({'Free' if voice['free'] else 'Premium'})")
-            if 'base64' in voice:
-                try:
-                    audio_data = base64.b64decode(voice['base64'])
-                    st.audio(audio_data, format='audio/mp3')
-                except Exception as e:
-                    st.error(f"Failed to load audio for {voice['name']}: {str(e)}")
-            else:
-                # If stored on Hugging Face, provide a link
-                repo_id = "nikkmitra/clone"  # Update if different
-                file_path = f"voices/{voice['name'].strip()}.mp3"
-                if ' ' in voice['name']:
-                    # Replace spaces with %20
-                    encoded_voice_name = voice['name'].strip().replace(' ', '%20')
-                else:
-                    encoded_voice_name = voice['name'].strip()
-                encoded_file_path = f"voices/{encoded_voice_name}.mp3"
-                url = f"https://huggingface.co/spaces/{repo_id}/resolve/main/{encoded_file_path}"
-                st.markdown(f"[🔗 Listen on Hugging Face]({url})")
-        with col2:
-            if st.button("Remove", key=f"remove_{category['category']}_{voice['name']}_{i}"):
-                if remove_voice(category['category'], voice['name']):
-                    st.success(f"Removed {voice['name']} from {category['category']}")
-                    st.rerun()
-                else:
-                    st.error(f"Failed to remove {voice['name']}")
-        with col3:
-            if st.button("Toggle Free", key=f"toggle_{category['category']}_{voice['name']}_{i}"):
-                voices_collection.update_one(
-                    {"category": category['category'], "voices.name": voice['name']},
-                    {"$set": {"voices.$.free": not voice['free']}}
-                )
-                st.rerun()

 import os
 import streamlit as st
 from dotenv import load_dotenv
 from pymongo import MongoClient
+import pandas as pd
 from bson import ObjectId
+import cloudinary
+import cloudinary.uploader
+# Load environment variables from .env file
 load_dotenv()
+# Get MongoDB URI from .env file
 mongodb_uri = os.getenv('MONGODB_URI')
+# Configure Cloudinary
+cloudinary.config(
+    cloud_name = os.getenv('CLOUDINARY_CLOUD_NAME'),
+    api_key = os.getenv('CLOUDINARY_API_KEY'),
+    api_secret = os.getenv('CLOUDINARY_API_SECRET')
+)
 # Connect to MongoDB
 @st.cache_resource
     return MongoClient(mongodb_uri)
 client = init_connection()
+# Access the 'mitra' database
 db = client['mitra']
+# Access the 'base-voices' collection
+collection = db['base-voices']
+# Streamlit app
+st.title('Base Voices Data Management')
+# Sidebar for actions
+st.sidebar.header('Actions')
+action = st.sidebar.radio('Choose an action:', ['View Data', 'Add Category', 'Remove Category', 'Add Voice', 'Remove Voice'])
+if action == 'View Data':
+    # Retrieve all documents from the collection
+    @st.cache_data
+    def get_base_voices():
+        base_voices = list(collection.find({}, {'_id': 0}))  # Exclude the '_id' field
+        return base_voices
+    data = get_base_voices()
+    # Display the data
+    if data:
+        # Normalize the data for better display
+        normalized_data = []
+        for category in data:
+            for voice in category['voices']:
+                normalized_data.append({
+                    'Category': category['category'],
+                    'Voice Name': voice['name'],
+                    'Is Free': 'Yes' if voice['is_free'] else 'No',
+                    'File URL': voice['file_url']
+                })
+        df = pd.DataFrame(normalized_data)
+        # Display the table
+        st.subheader("Voice Data Table")
+        st.dataframe(df, use_container_width=True)
+        # Display audio players for each voice
+        st.subheader("Audio Samples")
+        for category in data:
+            st.write(f"**{category['category']}**")
+            for voice in category['voices']:
+                col1, col2 = st.columns([3, 1])
+                with col1:
+                    st.write(f"{voice['name']} ({'Free' if voice['is_free'] else 'Paid'})")
+                    st.audio(voice['file_url'])
+                with col2:
+                    st.markdown(f"[Download]({voice['file_url']})")
+            st.write("---")
+        # Optional: Add a download button
+        csv = df.to_csv(index=False)
+        st.download_button(
+            label="Download data as CSV",
+            data=csv,
+            file_name="base_voices.csv",
+            mime="text/csv",
         )
+    else:
+        st.write("No data found in the 'base-voices' collection.")
+elif action == 'Add Category':
+    st.header('Add New Category')
+    new_category = st.text_input('Enter new category name:')
+    if st.button('Add Category'):
+        if new_category:
+            new_doc = {'category': new_category, 'voices': []}
+            result = collection.insert_one(new_doc)
+            st.success(f'Category "{new_category}" added successfully!')
         else:
+            st.error('Please enter a category name.')
+elif action == 'Remove Category':
+    st.header('Remove Category')
+    categories = [doc['category'] for doc in collection.find({}, {'category': 1})]
+    category_to_remove = st.selectbox('Select category to remove:', categories)
+    if st.button('Remove Category'):
+        result = collection.delete_one({'category': category_to_remove})
+        if result.deleted_count > 0:
+            st.success(f'Category "{category_to_remove}" removed successfully!')
+        else:
+            st.error('Failed to remove category. Please try again.')
+elif action == 'Add Voice':
+    st.header('Add Voice to Category')
+    categories = [doc['category'] for doc in collection.find({}, {'category': 1})]
+    selected_category = st.selectbox('Select category:', categories)
+    voice_name = st.text_input('Enter voice name:')
+    voice_file = st.file_uploader("Upload voice file", type=['mp3', 'wav'])
+    is_free = st.checkbox('Is this voice free?')
+    if st.button('Add Voice'):
+        if voice_name and voice_file:
+            # Upload file to Cloudinary
+            upload_result = cloudinary.uploader.upload(voice_file, resource_type="auto")
+            voice_url = upload_result['secure_url']
+            new_voice = {
+                'name': voice_name,
+                'file_url': voice_url,
+                'is_free': is_free
+            }
+            result = collection.update_one(
+                {'category': selected_category},
+                {'$push': {'voices': new_voice}}
+            )
+            if result.modified_count > 0:
+                st.success(f'Voice "{voice_name}" added to category "{selected_category}" successfully!')
             else:
+                st.error('Failed to add voice. Please try again.')
         else:
+            st.error('Please enter a voice name and upload a file.')
+elif action == 'Remove Voice':
+    st.header('Remove Voice from Category')
+    categories = [doc['category'] for doc in collection.find({}, {'category': 1})]
+    selected_category = st.selectbox('Select category:', categories)
+    category_doc = collection.find_one({'category': selected_category})
+    if category_doc and 'voices' in category_doc:
+        voice_names = [voice['name'] for voice in category_doc['voices']]
+        voice_to_remove = st.selectbox('Select voice to remove:', voice_names)
+        if st.button('Remove Voice'):
+            result = collection.update_one(
+                {'category': selected_category},
+                {'$pull': {'voices': {'name': voice_to_remove}}}
+            )
+            if result.modified_count > 0:
+                st.success(f'Voice "{voice_to_remove}" removed from category "{selected_category}" successfully!')
+            else:
+                st.error('Failed to remove voice. Please try again.')
     else:
+        st.warning(f'No voices found in category "{selected_category}".')
+# Refresh data after actions
+if st.button('Refresh Data'):
+    st.experimental_rerun()