okewunmi commited on
Commit
d4a2e16
·
verified ·
1 Parent(s): c816d1a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -19
app.py CHANGED
@@ -2,14 +2,31 @@ import gradio as gr
2
  import torch
3
  import torchaudio
4
  import os
 
5
  import subprocess
6
  from transformers import AutoModelForCausalLM
7
- from outetts.wav_tokenizer.decoder import WavTokenizer
8
- from yarngpt.audiotokenizer import AudioTokenizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  # Initialize the model (this runs when the app starts)
11
  def initialize_model():
12
- # Download model and tokenizer
13
  hf_path = "saheedniyi/YarnGPT"
14
  wav_tokenizer_config_path = "wavtokenizer_config.yaml"
15
  wav_tokenizer_model_path = "wavtokenizer_model.ckpt"
@@ -24,22 +41,6 @@ def initialize_model():
24
 
25
  return model, audio_tokenizer
26
 
27
- def download_if_not_exists(url, filename):
28
- if not os.path.exists(filename):
29
- print(f"Downloading {filename}...")
30
- subprocess.run(["wget", url, "-O", filename])
31
- print(f"Downloaded {filename}")
32
-
33
- # Download the model files if they're not already present
34
- download_if_not_exists(
35
- "https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml",
36
- "wavtokenizer_config.yaml"
37
- )
38
- download_if_not_exists(
39
- "https://huggingface.co/novateur/WavTokenizer-large-speech-75token/blob/main/wavtokenizer_large_speech_320_v2.ckpt",
40
- "wavtokenizer_model.ckpt"
41
- )
42
-
43
  # Generate audio from text
44
  def generate_speech(text, speaker_name):
45
  # Create prompt
 
2
  import torch
3
  import torchaudio
4
  import os
5
+ import re
6
  import subprocess
7
  from transformers import AutoModelForCausalLM
8
+ from yarngpt_utils import AudioTokenizer
9
+
10
+ # Download model files if they don't exist
11
+ def download_if_not_exists(url, filename):
12
+ if not os.path.exists(filename):
13
+ print(f"Downloading {filename}...")
14
+ subprocess.run(["wget", url, "-O", filename])
15
+ print(f"Downloaded {filename}")
16
+
17
+ # Download necessary files
18
+ download_if_not_exists(
19
+ "https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml",
20
+ "wavtokenizer_config.yaml"
21
+ )
22
+ download_if_not_exists(
23
+ "https://huggingface.co/novateur/WavTokenizer-large-speech-75token/blob/main/wavtokenizer_large_speech_320_v2.ckpt",
24
+ "wavtokenizer_model.ckpt"
25
+ )
26
 
27
  # Initialize the model (this runs when the app starts)
28
  def initialize_model():
29
+ # Set paths
30
  hf_path = "saheedniyi/YarnGPT"
31
  wav_tokenizer_config_path = "wavtokenizer_config.yaml"
32
  wav_tokenizer_model_path = "wavtokenizer_model.ckpt"
 
41
 
42
  return model, audio_tokenizer
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  # Generate audio from text
45
  def generate_speech(text, speaker_name):
46
  # Create prompt