Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import os
|
|
|
2 |
import gradio as gr
|
3 |
import torch
|
4 |
import torchaudio
|
@@ -7,8 +8,14 @@ import numpy as np
|
|
7 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
8 |
from outetts.wav_tokenizer.decoder import WavTokenizer
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
# Import the YarnGPT AudioTokenizer
|
11 |
-
# Assuming the git repository is cloned in the same directory
|
12 |
from yarngpt.audiotokenizer import AudioTokenizerV2
|
13 |
|
14 |
# Constants and paths
|
@@ -17,17 +24,24 @@ WAV_TOKENIZER_CONFIG_PATH = "wavtokenizer_config.yaml"
|
|
17 |
WAV_TOKENIZER_MODEL_PATH = "wavtokenizer_model.ckpt"
|
18 |
|
19 |
# Download the model files at startup
|
20 |
-
os.
|
21 |
-
|
22 |
-
os.system("
|
|
|
|
|
|
|
|
|
23 |
|
24 |
# Initialize the model and tokenizer
|
25 |
def initialize_model():
|
|
|
26 |
audio_tokenizer = AudioTokenizerV2(
|
27 |
MODEL_PATH,
|
28 |
WAV_TOKENIZER_MODEL_PATH,
|
29 |
WAV_TOKENIZER_CONFIG_PATH
|
30 |
)
|
|
|
|
|
31 |
model = AutoModelForCausalLM.from_pretrained(
|
32 |
MODEL_PATH,
|
33 |
torch_dtype="auto"
|
@@ -36,7 +50,9 @@ def initialize_model():
|
|
36 |
return model, audio_tokenizer
|
37 |
|
38 |
# Initialize the model and tokenizer
|
|
|
39 |
model, audio_tokenizer = initialize_model()
|
|
|
40 |
|
41 |
# Available voices and languages
|
42 |
VOICES = ["idera", "jude", "kemi", "tunde", "funmi"]
|
@@ -75,6 +91,13 @@ def generate_speech(text, language, voice, temperature=0.1, rep_penalty=1.1):
|
|
75 |
except Exception as e:
|
76 |
return None, f"Error generating speech: {str(e)}"
|
77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
# Create the Gradio interface
|
79 |
with gr.Blocks(title="YarnGPT - Nigerian Accented Text-to-Speech") as demo:
|
80 |
gr.Markdown("# YarnGPT - Nigerian Accented Text-to-Speech")
|
@@ -118,6 +141,14 @@ with gr.Blocks(title="YarnGPT - Nigerian Accented Text-to-Speech") as demo:
|
|
118 |
audio_output = gr.Audio(label="Generated Speech")
|
119 |
status_output = gr.Textbox(label="Status")
|
120 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
generate_btn.click(
|
122 |
generate_speech,
|
123 |
inputs=[text_input, language, voice, temperature, rep_penalty],
|
@@ -134,4 +165,5 @@ with gr.Blocks(title="YarnGPT - Nigerian Accented Text-to-Speech") as demo:
|
|
134 |
""")
|
135 |
|
136 |
# Launch the app
|
137 |
-
|
|
|
|
1 |
import os
|
2 |
+
import sys
|
3 |
import gradio as gr
|
4 |
import torch
|
5 |
import torchaudio
|
|
|
8 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
9 |
from outetts.wav_tokenizer.decoder import WavTokenizer
|
10 |
|
11 |
+
# Clone and install YarnGPT at startup
|
12 |
+
if not os.path.exists("yarngpt"):
|
13 |
+
print("Cloning YarnGPT repository...")
|
14 |
+
os.system("git clone https://github.com/saheedniyi02/yarngpt.git")
|
15 |
+
# Add the repository to Python path
|
16 |
+
sys.path.append("yarngpt")
|
17 |
+
|
18 |
# Import the YarnGPT AudioTokenizer
|
|
|
19 |
from yarngpt.audiotokenizer import AudioTokenizerV2
|
20 |
|
21 |
# Constants and paths
|
|
|
24 |
WAV_TOKENIZER_MODEL_PATH = "wavtokenizer_model.ckpt"
|
25 |
|
26 |
# Download the model files at startup
|
27 |
+
if not os.path.exists(WAV_TOKENIZER_CONFIG_PATH):
|
28 |
+
print("Downloading WavTokenizer config...")
|
29 |
+
os.system(f"wget -O {WAV_TOKENIZER_CONFIG_PATH} https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml")
|
30 |
+
|
31 |
+
if not os.path.exists(WAV_TOKENIZER_MODEL_PATH):
|
32 |
+
print("Downloading WavTokenizer model...")
|
33 |
+
os.system(f"wget -O {WAV_TOKENIZER_MODEL_PATH} https://huggingface.co/novateur/WavTokenizer-large-speech-75token/resolve/main/wavtokenizer_large_speech_320_24k.ckpt")
|
34 |
|
35 |
# Initialize the model and tokenizer
|
36 |
def initialize_model():
|
37 |
+
print("Initializing AudioTokenizer and model...")
|
38 |
audio_tokenizer = AudioTokenizerV2(
|
39 |
MODEL_PATH,
|
40 |
WAV_TOKENIZER_MODEL_PATH,
|
41 |
WAV_TOKENIZER_CONFIG_PATH
|
42 |
)
|
43 |
+
|
44 |
+
print("Loading YarnGPT model...")
|
45 |
model = AutoModelForCausalLM.from_pretrained(
|
46 |
MODEL_PATH,
|
47 |
torch_dtype="auto"
|
|
|
50 |
return model, audio_tokenizer
|
51 |
|
52 |
# Initialize the model and tokenizer
|
53 |
+
print("Starting model initialization...")
|
54 |
model, audio_tokenizer = initialize_model()
|
55 |
+
print("Model initialization complete!")
|
56 |
|
57 |
# Available voices and languages
|
58 |
VOICES = ["idera", "jude", "kemi", "tunde", "funmi"]
|
|
|
91 |
except Exception as e:
|
92 |
return None, f"Error generating speech: {str(e)}"
|
93 |
|
94 |
+
# Example text for demonstration
|
95 |
+
examples = [
|
96 |
+
["Hello, my name is Claude. I am an AI assistant created by Anthropic.", "english", "idera"],
|
97 |
+
["Báwo ni o ṣe wà? Mo ń gbádùn ọjọ́ mi.", "yoruba", "kemi"],
|
98 |
+
["I don dey come house now, make you prepare food.", "pidgin", "jude"]
|
99 |
+
]
|
100 |
+
|
101 |
# Create the Gradio interface
|
102 |
with gr.Blocks(title="YarnGPT - Nigerian Accented Text-to-Speech") as demo:
|
103 |
gr.Markdown("# YarnGPT - Nigerian Accented Text-to-Speech")
|
|
|
141 |
audio_output = gr.Audio(label="Generated Speech")
|
142 |
status_output = gr.Textbox(label="Status")
|
143 |
|
144 |
+
gr.Examples(
|
145 |
+
examples=examples,
|
146 |
+
inputs=[text_input, language, voice],
|
147 |
+
outputs=[audio_output, status_output],
|
148 |
+
fn=generate_speech,
|
149 |
+
cache_examples=False
|
150 |
+
)
|
151 |
+
|
152 |
generate_btn.click(
|
153 |
generate_speech,
|
154 |
inputs=[text_input, language, voice, temperature, rep_penalty],
|
|
|
165 |
""")
|
166 |
|
167 |
# Launch the app
|
168 |
+
if __name__ == "__main__":
|
169 |
+
demo.launch()
|