Update app.py
Browse files
app.py
CHANGED
@@ -2,14 +2,31 @@ import gradio as gr
|
|
2 |
import torch
|
3 |
import torchaudio
|
4 |
import os
|
|
|
5 |
import subprocess
|
6 |
from transformers import AutoModelForCausalLM
|
7 |
-
from
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
# Initialize the model (this runs when the app starts)
|
11 |
def initialize_model():
|
12 |
-
#
|
13 |
hf_path = "saheedniyi/YarnGPT"
|
14 |
wav_tokenizer_config_path = "wavtokenizer_config.yaml"
|
15 |
wav_tokenizer_model_path = "wavtokenizer_model.ckpt"
|
@@ -24,22 +41,6 @@ def initialize_model():
|
|
24 |
|
25 |
return model, audio_tokenizer
|
26 |
|
27 |
-
def download_if_not_exists(url, filename):
|
28 |
-
if not os.path.exists(filename):
|
29 |
-
print(f"Downloading {filename}...")
|
30 |
-
subprocess.run(["wget", url, "-O", filename])
|
31 |
-
print(f"Downloaded {filename}")
|
32 |
-
|
33 |
-
# Download the model files if they're not already present
|
34 |
-
download_if_not_exists(
|
35 |
-
"https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml",
|
36 |
-
"wavtokenizer_config.yaml"
|
37 |
-
)
|
38 |
-
download_if_not_exists(
|
39 |
-
"https://huggingface.co/novateur/WavTokenizer-large-speech-75token/blob/main/wavtokenizer_large_speech_320_v2.ckpt",
|
40 |
-
"wavtokenizer_model.ckpt"
|
41 |
-
)
|
42 |
-
|
43 |
# Generate audio from text
|
44 |
def generate_speech(text, speaker_name):
|
45 |
# Create prompt
|
|
|
2 |
import torch
|
3 |
import torchaudio
|
4 |
import os
|
5 |
+
import re
|
6 |
import subprocess
|
7 |
from transformers import AutoModelForCausalLM
|
8 |
+
from yarngpt_utils import AudioTokenizer
|
9 |
+
|
10 |
+
# Download model files if they don't exist
|
11 |
+
def download_if_not_exists(url, filename):
|
12 |
+
if not os.path.exists(filename):
|
13 |
+
print(f"Downloading {filename}...")
|
14 |
+
subprocess.run(["wget", url, "-O", filename])
|
15 |
+
print(f"Downloaded {filename}")
|
16 |
+
|
17 |
+
# Download necessary files
|
18 |
+
download_if_not_exists(
|
19 |
+
"https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml",
|
20 |
+
"wavtokenizer_config.yaml"
|
21 |
+
)
|
22 |
+
download_if_not_exists(
|
23 |
+
"https://huggingface.co/novateur/WavTokenizer-large-speech-75token/blob/main/wavtokenizer_large_speech_320_v2.ckpt",
|
24 |
+
"wavtokenizer_model.ckpt"
|
25 |
+
)
|
26 |
|
27 |
# Initialize the model (this runs when the app starts)
|
28 |
def initialize_model():
|
29 |
+
# Set paths
|
30 |
hf_path = "saheedniyi/YarnGPT"
|
31 |
wav_tokenizer_config_path = "wavtokenizer_config.yaml"
|
32 |
wav_tokenizer_model_path = "wavtokenizer_model.ckpt"
|
|
|
41 |
|
42 |
return model, audio_tokenizer
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
# Generate audio from text
|
45 |
def generate_speech(text, speaker_name):
|
46 |
# Create prompt
|