okewunmi commited on
Commit
9147378
·
verified ·
1 Parent(s): 03d09ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -5
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import gradio as gr
3
  import torch
4
  import torchaudio
@@ -7,8 +8,14 @@ import numpy as np
7
  from transformers import AutoModelForCausalLM, AutoTokenizer
8
  from outetts.wav_tokenizer.decoder import WavTokenizer
9
 
 
 
 
 
 
 
 
10
  # Import the YarnGPT AudioTokenizer
11
- # Assuming the git repository is cloned in the same directory
12
  from yarngpt.audiotokenizer import AudioTokenizerV2
13
 
14
  # Constants and paths
@@ -17,17 +24,24 @@ WAV_TOKENIZER_CONFIG_PATH = "wavtokenizer_config.yaml"
17
  WAV_TOKENIZER_MODEL_PATH = "wavtokenizer_model.ckpt"
18
 
19
  # Download the model files at startup
20
- os.system(f"wget -O {WAV_TOKENIZER_CONFIG_PATH} https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml")
21
- os.system(f"wget -O {WAV_TOKENIZER_MODEL_PATH} https://huggingface.co/novateur/WavTokenizer-large-speech-75token/resolve/main/wavtokenizer_large_speech_320_24k.ckpt")
22
- os.system("git clone https://github.com/saheedniyi02/yarngpt.git")
 
 
 
 
23
 
24
  # Initialize the model and tokenizer
25
  def initialize_model():
 
26
  audio_tokenizer = AudioTokenizerV2(
27
  MODEL_PATH,
28
  WAV_TOKENIZER_MODEL_PATH,
29
  WAV_TOKENIZER_CONFIG_PATH
30
  )
 
 
31
  model = AutoModelForCausalLM.from_pretrained(
32
  MODEL_PATH,
33
  torch_dtype="auto"
@@ -36,7 +50,9 @@ def initialize_model():
36
  return model, audio_tokenizer
37
 
38
  # Initialize the model and tokenizer
 
39
  model, audio_tokenizer = initialize_model()
 
40
 
41
  # Available voices and languages
42
  VOICES = ["idera", "jude", "kemi", "tunde", "funmi"]
@@ -75,6 +91,13 @@ def generate_speech(text, language, voice, temperature=0.1, rep_penalty=1.1):
75
  except Exception as e:
76
  return None, f"Error generating speech: {str(e)}"
77
 
 
 
 
 
 
 
 
78
  # Create the Gradio interface
79
  with gr.Blocks(title="YarnGPT - Nigerian Accented Text-to-Speech") as demo:
80
  gr.Markdown("# YarnGPT - Nigerian Accented Text-to-Speech")
@@ -118,6 +141,14 @@ with gr.Blocks(title="YarnGPT - Nigerian Accented Text-to-Speech") as demo:
118
  audio_output = gr.Audio(label="Generated Speech")
119
  status_output = gr.Textbox(label="Status")
120
 
 
 
 
 
 
 
 
 
121
  generate_btn.click(
122
  generate_speech,
123
  inputs=[text_input, language, voice, temperature, rep_penalty],
@@ -134,4 +165,5 @@ with gr.Blocks(title="YarnGPT - Nigerian Accented Text-to-Speech") as demo:
134
  """)
135
 
136
  # Launch the app
137
- demo.launch()
 
 
1
  import os
2
+ import sys
3
  import gradio as gr
4
  import torch
5
  import torchaudio
 
8
  from transformers import AutoModelForCausalLM, AutoTokenizer
9
  from outetts.wav_tokenizer.decoder import WavTokenizer
10
 
11
+ # Clone and install YarnGPT at startup
12
+ if not os.path.exists("yarngpt"):
13
+ print("Cloning YarnGPT repository...")
14
+ os.system("git clone https://github.com/saheedniyi02/yarngpt.git")
15
+ # Add the repository to Python path
16
+ sys.path.append("yarngpt")
17
+
18
  # Import the YarnGPT AudioTokenizer
 
19
  from yarngpt.audiotokenizer import AudioTokenizerV2
20
 
21
  # Constants and paths
 
24
  WAV_TOKENIZER_MODEL_PATH = "wavtokenizer_model.ckpt"
25
 
26
  # Download the model files at startup
27
+ if not os.path.exists(WAV_TOKENIZER_CONFIG_PATH):
28
+ print("Downloading WavTokenizer config...")
29
+ os.system(f"wget -O {WAV_TOKENIZER_CONFIG_PATH} https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml")
30
+
31
+ if not os.path.exists(WAV_TOKENIZER_MODEL_PATH):
32
+ print("Downloading WavTokenizer model...")
33
+ os.system(f"wget -O {WAV_TOKENIZER_MODEL_PATH} https://huggingface.co/novateur/WavTokenizer-large-speech-75token/resolve/main/wavtokenizer_large_speech_320_24k.ckpt")
34
 
35
  # Initialize the model and tokenizer
36
  def initialize_model():
37
+ print("Initializing AudioTokenizer and model...")
38
  audio_tokenizer = AudioTokenizerV2(
39
  MODEL_PATH,
40
  WAV_TOKENIZER_MODEL_PATH,
41
  WAV_TOKENIZER_CONFIG_PATH
42
  )
43
+
44
+ print("Loading YarnGPT model...")
45
  model = AutoModelForCausalLM.from_pretrained(
46
  MODEL_PATH,
47
  torch_dtype="auto"
 
50
  return model, audio_tokenizer
51
 
52
  # Initialize the model and tokenizer
53
+ print("Starting model initialization...")
54
  model, audio_tokenizer = initialize_model()
55
+ print("Model initialization complete!")
56
 
57
  # Available voices and languages
58
  VOICES = ["idera", "jude", "kemi", "tunde", "funmi"]
 
91
  except Exception as e:
92
  return None, f"Error generating speech: {str(e)}"
93
 
94
+ # Example text for demonstration
95
+ examples = [
96
+ ["Hello, my name is Claude. I am an AI assistant created by Anthropic.", "english", "idera"],
97
+ ["Báwo ni o ṣe wà? Mo ń gbádùn ọjọ́ mi.", "yoruba", "kemi"],
98
+ ["I don dey come house now, make you prepare food.", "pidgin", "jude"]
99
+ ]
100
+
101
  # Create the Gradio interface
102
  with gr.Blocks(title="YarnGPT - Nigerian Accented Text-to-Speech") as demo:
103
  gr.Markdown("# YarnGPT - Nigerian Accented Text-to-Speech")
 
141
  audio_output = gr.Audio(label="Generated Speech")
142
  status_output = gr.Textbox(label="Status")
143
 
144
+ gr.Examples(
145
+ examples=examples,
146
+ inputs=[text_input, language, voice],
147
+ outputs=[audio_output, status_output],
148
+ fn=generate_speech,
149
+ cache_examples=False
150
+ )
151
+
152
  generate_btn.click(
153
  generate_speech,
154
  inputs=[text_input, language, voice, temperature, rep_penalty],
 
165
  """)
166
 
167
  # Launch the app
168
+ if __name__ == "__main__":
169
+ demo.launch()