okewunmi commited on
Commit
6c2dbc0
·
verified ·
1 Parent(s): b3a5955

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -26
app.py CHANGED
@@ -5,54 +5,128 @@ import torch
5
  import torchaudio
6
  import uroman
7
  import numpy as np
 
 
8
  from transformers import AutoModelForCausalLM, AutoTokenizer
9
  from outetts.wav_tokenizer.decoder import WavTokenizer
10
 
11
- # Clone and install YarnGPT at startup
 
 
 
 
 
12
  if not os.path.exists("yarngpt"):
13
- print("Cloning YarnGPT repository...")
14
  os.system("git clone https://github.com/saheedniyi02/yarngpt.git")
15
  # Add the repository to Python path
16
  sys.path.append("yarngpt")
 
 
17
 
18
  # Import the YarnGPT AudioTokenizer
19
  from yarngpt.audiotokenizer import AudioTokenizerV2
20
 
21
  # Constants and paths
22
  MODEL_PATH = "saheedniyi/YarnGPT2b"
 
 
23
  WAV_TOKENIZER_CONFIG_PATH = "wavtokenizer_config.yaml"
24
  WAV_TOKENIZER_MODEL_PATH = "wavtokenizer_model.ckpt"
25
 
26
- # Download the model files at startup
27
- if not os.path.exists(WAV_TOKENIZER_CONFIG_PATH):
28
- print("Downloading WavTokenizer config...")
29
- os.system(f"wget -O {WAV_TOKENIZER_CONFIG_PATH} https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
- if not os.path.exists(WAV_TOKENIZER_MODEL_PATH):
32
- print("Downloading WavTokenizer model...")
33
- os.system(f"wget -O {WAV_TOKENIZER_MODEL_PATH} https://huggingface.co/novateur/WavTokenizer-large-speech-75token/resolve/main/wavtokenizer_large_speech_320_24k.ckpt")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  # Initialize the model and tokenizer
36
  def initialize_model():
37
- print("Initializing AudioTokenizer and model...")
38
- audio_tokenizer = AudioTokenizerV2(
39
- MODEL_PATH,
40
- WAV_TOKENIZER_MODEL_PATH,
41
- WAV_TOKENIZER_CONFIG_PATH
42
- )
43
-
44
- print("Loading YarnGPT model...")
45
- model = AutoModelForCausalLM.from_pretrained(
46
- MODEL_PATH,
47
- torch_dtype="auto"
48
- ).to(audio_tokenizer.device)
49
-
50
- return model, audio_tokenizer
 
 
 
 
 
 
 
 
51
 
52
  # Initialize the model and tokenizer
53
- print("Starting model initialization...")
54
- model, audio_tokenizer = initialize_model()
55
- print("Model initialization complete!")
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  # Available voices and languages
58
  VOICES = ["idera", "jude", "kemi", "tunde", "funmi"]
@@ -64,6 +138,8 @@ def generate_speech(text, language, voice, temperature=0.1, rep_penalty=1.1):
64
  return None, "Please enter some text to convert to speech."
65
 
66
  try:
 
 
67
  # Create prompt
68
  prompt = audio_tokenizer.create_prompt(text, lang=language, speaker_name=voice)
69
 
@@ -86,9 +162,11 @@ def generate_speech(text, language, voice, temperature=0.1, rep_penalty=1.1):
86
  temp_audio_path = "output.wav"
87
  torchaudio.save(temp_audio_path, audio, sample_rate=24000)
88
 
 
89
  return temp_audio_path, f"Successfully generated speech for: {text[:50]}..."
90
 
91
  except Exception as e:
 
92
  return None, f"Error generating speech: {str(e)}"
93
 
94
  # Example text for demonstration
 
5
  import torchaudio
6
  import uroman
7
  import numpy as np
8
+ import requests
9
+ import hashlib
10
  from transformers import AutoModelForCausalLM, AutoTokenizer
11
  from outetts.wav_tokenizer.decoder import WavTokenizer
12
 
13
+ # Set up logging
14
+ import logging
15
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
16
+ logger = logging.getLogger(__name__)
17
+
18
+ # Clone YarnGPT at startup
19
  if not os.path.exists("yarngpt"):
20
+ logger.info("Cloning YarnGPT repository...")
21
  os.system("git clone https://github.com/saheedniyi02/yarngpt.git")
22
  # Add the repository to Python path
23
  sys.path.append("yarngpt")
24
+ else:
25
+ sys.path.append("yarngpt")
26
 
27
  # Import the YarnGPT AudioTokenizer
28
  from yarngpt.audiotokenizer import AudioTokenizerV2
29
 
30
  # Constants and paths
31
  MODEL_PATH = "saheedniyi/YarnGPT2b"
32
+ WAV_TOKENIZER_CONFIG_URL = "https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
33
+ WAV_TOKENIZER_MODEL_URL = "https://huggingface.co/novateur/WavTokenizer-large-speech-75token/resolve/main/wavtokenizer_large_speech_320_24k.ckpt"
34
  WAV_TOKENIZER_CONFIG_PATH = "wavtokenizer_config.yaml"
35
  WAV_TOKENIZER_MODEL_PATH = "wavtokenizer_model.ckpt"
36
 
37
+ # Function to download files with verification
38
+ def download_file(url, output_path):
39
+ """Download a file with progress tracking and verification"""
40
+ logger.info(f"Downloading {url} to {output_path}")
41
+
42
+ # Stream the file download
43
+ with requests.get(url, stream=True) as response:
44
+ response.raise_for_status()
45
+ total_size = int(response.headers.get('content-length', 0))
46
+
47
+ with open(output_path, 'wb') as f:
48
+ downloaded = 0
49
+ for chunk in response.iter_content(chunk_size=8192):
50
+ if chunk:
51
+ f.write(chunk)
52
+ downloaded += len(chunk)
53
+ percent = int(100 * downloaded / total_size) if total_size > 0 else 0
54
+ if percent % 10 == 0:
55
+ logger.info(f"Download progress: {percent}%")
56
+
57
+ # Verify the file exists and has content
58
+ if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
59
+ logger.info(f"Successfully downloaded {output_path}")
60
+ return True
61
+ else:
62
+ logger.error(f"Failed to download {output_path}")
63
+ return False
64
 
65
+ # Download the required files
66
+ def download_required_files():
67
+ # Download config file
68
+ if not os.path.exists(WAV_TOKENIZER_CONFIG_PATH) or os.path.getsize(WAV_TOKENIZER_CONFIG_PATH) == 0:
69
+ logger.info("Downloading WavTokenizer config...")
70
+ if not download_file(WAV_TOKENIZER_CONFIG_URL, WAV_TOKENIZER_CONFIG_PATH):
71
+ raise RuntimeError("Failed to download WavTokenizer config")
72
+
73
+ # Download model file
74
+ if not os.path.exists(WAV_TOKENIZER_MODEL_PATH) or os.path.getsize(WAV_TOKENIZER_MODEL_PATH) == 0:
75
+ logger.info("Downloading WavTokenizer model...")
76
+ if not download_file(WAV_TOKENIZER_MODEL_URL, WAV_TOKENIZER_MODEL_PATH):
77
+ raise RuntimeError("Failed to download WavTokenizer model")
78
+
79
+ # Verify files exist
80
+ if not os.path.exists(WAV_TOKENIZER_CONFIG_PATH) or not os.path.exists(WAV_TOKENIZER_MODEL_PATH):
81
+ raise RuntimeError("Required files not found")
82
+
83
+ # Verify files have content
84
+ if os.path.getsize(WAV_TOKENIZER_CONFIG_PATH) == 0 or os.path.getsize(WAV_TOKENIZER_MODEL_PATH) == 0:
85
+ raise RuntimeError("Downloaded files are empty")
86
+
87
+ logger.info("All required files are downloaded and verified")
88
 
89
  # Initialize the model and tokenizer
90
  def initialize_model():
91
+ try:
92
+ # Download required files
93
+ download_required_files()
94
+
95
+ logger.info("Initializing AudioTokenizer...")
96
+ audio_tokenizer = AudioTokenizerV2(
97
+ MODEL_PATH,
98
+ WAV_TOKENIZER_MODEL_PATH,
99
+ WAV_TOKENIZER_CONFIG_PATH
100
+ )
101
+
102
+ logger.info("Loading YarnGPT model...")
103
+ model = AutoModelForCausalLM.from_pretrained(
104
+ MODEL_PATH,
105
+ torch_dtype="auto"
106
+ ).to(audio_tokenizer.device)
107
+
108
+ logger.info("Model initialization complete!")
109
+ return model, audio_tokenizer
110
+ except Exception as e:
111
+ logger.error(f"Failed to initialize model: {str(e)}")
112
+ raise
113
 
114
  # Initialize the model and tokenizer
115
+ logger.info("Starting model initialization...")
116
+ try:
117
+ model, audio_tokenizer = initialize_model()
118
+ except Exception as e:
119
+ logger.error(f"Error initializing model: {str(e)}")
120
+ # Provide a basic interface to show the error
121
+ demo = gr.Interface(
122
+ fn=lambda x: f"Model initialization failed: {str(e)}. Please check the space logs for more details.",
123
+ inputs=gr.Textbox(label="Error occurred during initialization"),
124
+ outputs=gr.Textbox(),
125
+ title="YarnGPT - Initialization Error"
126
+ )
127
+ demo.launch()
128
+ # Exit the script
129
+ sys.exit(1)
130
 
131
  # Available voices and languages
132
  VOICES = ["idera", "jude", "kemi", "tunde", "funmi"]
 
138
  return None, "Please enter some text to convert to speech."
139
 
140
  try:
141
+ logger.info(f"Generating speech for text: {text[:50]}...")
142
+
143
  # Create prompt
144
  prompt = audio_tokenizer.create_prompt(text, lang=language, speaker_name=voice)
145
 
 
162
  temp_audio_path = "output.wav"
163
  torchaudio.save(temp_audio_path, audio, sample_rate=24000)
164
 
165
+ logger.info("Speech generation complete")
166
  return temp_audio_path, f"Successfully generated speech for: {text[:50]}..."
167
 
168
  except Exception as e:
169
+ logger.error(f"Error generating speech: {str(e)}")
170
  return None, f"Error generating speech: {str(e)}"
171
 
172
  # Example text for demonstration