Spaces:

okewunmi
/

tts

Running

App Files Files Community

okewunmi commited on Mar 13

Commit

9147378

verified ·

1 Parent(s): 03d09ab

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -5

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 import gradio as gr
 import torch
 import torchaudio
@@ -7,8 +8,14 @@ import numpy as np
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from outetts.wav_tokenizer.decoder import WavTokenizer
 # Import the YarnGPT AudioTokenizer
-# Assuming the git repository is cloned in the same directory
 from yarngpt.audiotokenizer import AudioTokenizerV2
 # Constants and paths
@@ -17,17 +24,24 @@ WAV_TOKENIZER_CONFIG_PATH = "wavtokenizer_config.yaml"
 WAV_TOKENIZER_MODEL_PATH = "wavtokenizer_model.ckpt"
 # Download the model files at startup
-os.system(f"wget -O {WAV_TOKENIZER_CONFIG_PATH} https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml")
-os.system(f"wget -O {WAV_TOKENIZER_MODEL_PATH} https://huggingface.co/novateur/WavTokenizer-large-speech-75token/resolve/main/wavtokenizer_large_speech_320_24k.ckpt")
-os.system("git clone https://github.com/saheedniyi02/yarngpt.git")
 # Initialize the model and tokenizer
 def initialize_model():
     audio_tokenizer = AudioTokenizerV2(
         MODEL_PATH,
         WAV_TOKENIZER_MODEL_PATH,
         WAV_TOKENIZER_CONFIG_PATH
     )
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_PATH,
         torch_dtype="auto"
@@ -36,7 +50,9 @@ def initialize_model():
     return model, audio_tokenizer
 # Initialize the model and tokenizer
 model, audio_tokenizer = initialize_model()
 # Available voices and languages
 VOICES = ["idera", "jude", "kemi", "tunde", "funmi"]
@@ -75,6 +91,13 @@ def generate_speech(text, language, voice, temperature=0.1, rep_penalty=1.1):
     except Exception as e:
         return None, f"Error generating speech: {str(e)}"
 # Create the Gradio interface
 with gr.Blocks(title="YarnGPT - Nigerian Accented Text-to-Speech") as demo:
     gr.Markdown("# YarnGPT - Nigerian Accented Text-to-Speech")
@@ -118,6 +141,14 @@ with gr.Blocks(title="YarnGPT - Nigerian Accented Text-to-Speech") as demo:
                 audio_output = gr.Audio(label="Generated Speech")
                 status_output = gr.Textbox(label="Status")
     generate_btn.click(
         generate_speech,
         inputs=[text_input, language, voice, temperature, rep_penalty],
@@ -134,4 +165,5 @@ with gr.Blocks(title="YarnGPT - Nigerian Accented Text-to-Speech") as demo:
     """)
 # Launch the app
-demo.launch()

 import os
+import sys
 import gradio as gr
 import torch
 import torchaudio
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from outetts.wav_tokenizer.decoder import WavTokenizer
+# Clone and install YarnGPT at startup
+if not os.path.exists("yarngpt"):
+    print("Cloning YarnGPT repository...")
+    os.system("git clone https://github.com/saheedniyi02/yarngpt.git")
+    # Add the repository to Python path
+    sys.path.append("yarngpt")
 # Import the YarnGPT AudioTokenizer
 from yarngpt.audiotokenizer import AudioTokenizerV2
 # Constants and paths
 WAV_TOKENIZER_MODEL_PATH = "wavtokenizer_model.ckpt"
 # Download the model files at startup
+if not os.path.exists(WAV_TOKENIZER_CONFIG_PATH):
+    print("Downloading WavTokenizer config...")
+    os.system(f"wget -O {WAV_TOKENIZER_CONFIG_PATH} https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml")
+if not os.path.exists(WAV_TOKENIZER_MODEL_PATH):
+    print("Downloading WavTokenizer model...")
+    os.system(f"wget -O {WAV_TOKENIZER_MODEL_PATH} https://huggingface.co/novateur/WavTokenizer-large-speech-75token/resolve/main/wavtokenizer_large_speech_320_24k.ckpt")
 # Initialize the model and tokenizer
 def initialize_model():
+    print("Initializing AudioTokenizer and model...")
     audio_tokenizer = AudioTokenizerV2(
         MODEL_PATH,
         WAV_TOKENIZER_MODEL_PATH,
         WAV_TOKENIZER_CONFIG_PATH
     )
+    print("Loading YarnGPT model...")
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_PATH,
         torch_dtype="auto"
     return model, audio_tokenizer
 # Initialize the model and tokenizer
+print("Starting model initialization...")
 model, audio_tokenizer = initialize_model()
+print("Model initialization complete!")
 # Available voices and languages
 VOICES = ["idera", "jude", "kemi", "tunde", "funmi"]
     except Exception as e:
         return None, f"Error generating speech: {str(e)}"
+# Example text for demonstration
+examples = [
+    ["Hello, my name is Claude. I am an AI assistant created by Anthropic.", "english", "idera"],
+    ["Báwo ni o ṣe wà? Mo ń gbádùn ọjọ́ mi.", "yoruba", "kemi"],
+    ["I don dey come house now, make you prepare food.", "pidgin", "jude"]
+]
 # Create the Gradio interface
 with gr.Blocks(title="YarnGPT - Nigerian Accented Text-to-Speech") as demo:
     gr.Markdown("# YarnGPT - Nigerian Accented Text-to-Speech")
                 audio_output = gr.Audio(label="Generated Speech")
                 status_output = gr.Textbox(label="Status")
+        gr.Examples(
+            examples=examples,
+            inputs=[text_input, language, voice],
+            outputs=[audio_output, status_output],
+            fn=generate_speech,
+            cache_examples=False
+        )
     generate_btn.click(
         generate_speech,
         inputs=[text_input, language, voice, temperature, rep_penalty],
     """)
 # Launch the app
+if __name__ == "__main__":
+    demo.launch()