okewunmi commited on
Commit
78b0078
·
verified ·
1 Parent(s): c129794

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -0
app.py CHANGED
@@ -1,6 +1,8 @@
1
  import gradio as gr
2
  import torch
3
  import torchaudio
 
 
4
  from transformers import AutoModelForCausalLM
5
  from outetts.wav_tokenizer.decoder import WavTokenizer
6
  from yarngpt.audiotokenizer import AudioTokenizer
@@ -22,6 +24,22 @@ def initialize_model():
22
 
23
  return model, audio_tokenizer
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  # Generate audio from text
26
  def generate_speech(text, speaker_name):
27
  # Create prompt
 
1
  import gradio as gr
2
  import torch
3
  import torchaudio
4
+ import os
5
+ import subprocess
6
  from transformers import AutoModelForCausalLM
7
  from outetts.wav_tokenizer.decoder import WavTokenizer
8
  from yarngpt.audiotokenizer import AudioTokenizer
 
24
 
25
  return model, audio_tokenizer
26
 
27
+ def download_if_not_exists(url, filename):
28
+ if not os.path.exists(filename):
29
+ print(f"Downloading {filename}...")
30
+ subprocess.run(["wget", url, "-O", filename])
31
+ print(f"Downloaded {filename}")
32
+
33
+ # Download the model files if they're not already present
34
+ download_if_not_exists(
35
+ "https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml",
36
+ "wavtokenizer_config.yaml"
37
+ )
38
+ download_if_not_exists(
39
+ "https://huggingface.co/novateur/WavTokenizer-large-speech-75token/blob/main/wavtokenizer_large_speech_320_v2.ckpt",
40
+ "wavtokenizer_model.ckpt"
41
+ )
42
+
43
  # Generate audio from text
44
  def generate_speech(text, speaker_name):
45
  # Create prompt