Spaces:

okewunmi
/

tts

Running

App Files Files Community

okewunmi commited on Mar 13

Commit

6c2dbc0

verified ·

1 Parent(s): b3a5955

Update app.py

Browse files

Files changed (1) hide show

app.py +104 -26

app.py CHANGED Viewed

@@ -5,54 +5,128 @@ import torch
 import torchaudio
 import uroman
 import numpy as np
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from outetts.wav_tokenizer.decoder import WavTokenizer
-# Clone and install YarnGPT at startup
 if not os.path.exists("yarngpt"):
-    print("Cloning YarnGPT repository...")
     os.system("git clone https://github.com/saheedniyi02/yarngpt.git")
     # Add the repository to Python path
     sys.path.append("yarngpt")
 # Import the YarnGPT AudioTokenizer
 from yarngpt.audiotokenizer import AudioTokenizerV2
 # Constants and paths
 MODEL_PATH = "saheedniyi/YarnGPT2b"
 WAV_TOKENIZER_CONFIG_PATH = "wavtokenizer_config.yaml"
 WAV_TOKENIZER_MODEL_PATH = "wavtokenizer_model.ckpt"
-# Download the model files at startup
-if not os.path.exists(WAV_TOKENIZER_CONFIG_PATH):
-    print("Downloading WavTokenizer config...")
-    os.system(f"wget -O {WAV_TOKENIZER_CONFIG_PATH} https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml")
-if not os.path.exists(WAV_TOKENIZER_MODEL_PATH):
-    print("Downloading WavTokenizer model...")
-    os.system(f"wget -O {WAV_TOKENIZER_MODEL_PATH} https://huggingface.co/novateur/WavTokenizer-large-speech-75token/resolve/main/wavtokenizer_large_speech_320_24k.ckpt")
 # Initialize the model and tokenizer
 def initialize_model():
-    print("Initializing AudioTokenizer and model...")
-    audio_tokenizer = AudioTokenizerV2(
-        MODEL_PATH,
-        WAV_TOKENIZER_MODEL_PATH,
-        WAV_TOKENIZER_CONFIG_PATH
-    )
-    print("Loading YarnGPT model...")
-    model = AutoModelForCausalLM.from_pretrained(
-        MODEL_PATH,
-        torch_dtype="auto"
-    ).to(audio_tokenizer.device)
-    return model, audio_tokenizer
 # Initialize the model and tokenizer
-print("Starting model initialization...")
-model, audio_tokenizer = initialize_model()
-print("Model initialization complete!")
 # Available voices and languages
 VOICES = ["idera", "jude", "kemi", "tunde", "funmi"]
@@ -64,6 +138,8 @@ def generate_speech(text, language, voice, temperature=0.1, rep_penalty=1.1):
         return None, "Please enter some text to convert to speech."
     try:
         # Create prompt
         prompt = audio_tokenizer.create_prompt(text, lang=language, speaker_name=voice)
@@ -86,9 +162,11 @@ def generate_speech(text, language, voice, temperature=0.1, rep_penalty=1.1):
         temp_audio_path = "output.wav"
         torchaudio.save(temp_audio_path, audio, sample_rate=24000)
         return temp_audio_path, f"Successfully generated speech for: {text[:50]}..."
     except Exception as e:
         return None, f"Error generating speech: {str(e)}"
 # Example text for demonstration

 import torchaudio
 import uroman
 import numpy as np
+import requests
+import hashlib
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from outetts.wav_tokenizer.decoder import WavTokenizer
+# Set up logging
+import logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+# Clone YarnGPT at startup
 if not os.path.exists("yarngpt"):
+    logger.info("Cloning YarnGPT repository...")
     os.system("git clone https://github.com/saheedniyi02/yarngpt.git")
     # Add the repository to Python path
     sys.path.append("yarngpt")
+else:
+    sys.path.append("yarngpt")
 # Import the YarnGPT AudioTokenizer
 from yarngpt.audiotokenizer import AudioTokenizerV2
 # Constants and paths
 MODEL_PATH = "saheedniyi/YarnGPT2b"
+WAV_TOKENIZER_CONFIG_URL = "https://huggingface.co/novateur/WavTokenizer-medium-speech-75token/resolve/main/wavtokenizer_mediumdata_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml"
+WAV_TOKENIZER_MODEL_URL = "https://huggingface.co/novateur/WavTokenizer-large-speech-75token/resolve/main/wavtokenizer_large_speech_320_24k.ckpt"
 WAV_TOKENIZER_CONFIG_PATH = "wavtokenizer_config.yaml"
 WAV_TOKENIZER_MODEL_PATH = "wavtokenizer_model.ckpt"
+# Function to download files with verification
+def download_file(url, output_path):
+    """Download a file with progress tracking and verification"""
+    logger.info(f"Downloading {url} to {output_path}")
+    # Stream the file download
+    with requests.get(url, stream=True) as response:
+        response.raise_for_status()
+        total_size = int(response.headers.get('content-length', 0))
+        with open(output_path, 'wb') as f:
+            downloaded = 0
+            for chunk in response.iter_content(chunk_size=8192):
+                if chunk:
+                    f.write(chunk)
+                    downloaded += len(chunk)
+                    percent = int(100 * downloaded / total_size) if total_size > 0 else 0
+                    if percent % 10 == 0:
+                        logger.info(f"Download progress: {percent}%")
+    # Verify the file exists and has content
+    if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
+        logger.info(f"Successfully downloaded {output_path}")
+        return True
+    else:
+        logger.error(f"Failed to download {output_path}")
+        return False
+# Download the required files
+def download_required_files():
+    # Download config file
+    if not os.path.exists(WAV_TOKENIZER_CONFIG_PATH) or os.path.getsize(WAV_TOKENIZER_CONFIG_PATH) == 0:
+        logger.info("Downloading WavTokenizer config...")
+        if not download_file(WAV_TOKENIZER_CONFIG_URL, WAV_TOKENIZER_CONFIG_PATH):
+            raise RuntimeError("Failed to download WavTokenizer config")
+    # Download model file
+    if not os.path.exists(WAV_TOKENIZER_MODEL_PATH) or os.path.getsize(WAV_TOKENIZER_MODEL_PATH) == 0:
+        logger.info("Downloading WavTokenizer model...")
+        if not download_file(WAV_TOKENIZER_MODEL_URL, WAV_TOKENIZER_MODEL_PATH):
+            raise RuntimeError("Failed to download WavTokenizer model")
+    # Verify files exist
+    if not os.path.exists(WAV_TOKENIZER_CONFIG_PATH) or not os.path.exists(WAV_TOKENIZER_MODEL_PATH):
+        raise RuntimeError("Required files not found")
+    # Verify files have content
+    if os.path.getsize(WAV_TOKENIZER_CONFIG_PATH) == 0 or os.path.getsize(WAV_TOKENIZER_MODEL_PATH) == 0:
+        raise RuntimeError("Downloaded files are empty")
+    logger.info("All required files are downloaded and verified")
 # Initialize the model and tokenizer
 def initialize_model():
+    try:
+        # Download required files
+        download_required_files()
+        logger.info("Initializing AudioTokenizer...")
+        audio_tokenizer = AudioTokenizerV2(
+            MODEL_PATH,
+            WAV_TOKENIZER_MODEL_PATH,
+            WAV_TOKENIZER_CONFIG_PATH
+        )
+        logger.info("Loading YarnGPT model...")
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_PATH,
+            torch_dtype="auto"
+        ).to(audio_tokenizer.device)
+        logger.info("Model initialization complete!")
+        return model, audio_tokenizer
+    except Exception as e:
+        logger.error(f"Failed to initialize model: {str(e)}")
+        raise
 # Initialize the model and tokenizer
+logger.info("Starting model initialization...")
+try:
+    model, audio_tokenizer = initialize_model()
+except Exception as e:
+    logger.error(f"Error initializing model: {str(e)}")
+    # Provide a basic interface to show the error
+    demo = gr.Interface(
+        fn=lambda x: f"Model initialization failed: {str(e)}. Please check the space logs for more details.",
+        inputs=gr.Textbox(label="Error occurred during initialization"),
+        outputs=gr.Textbox(),
+        title="YarnGPT - Initialization Error"
+    )
+    demo.launch()
+    # Exit the script
+    sys.exit(1)
 # Available voices and languages
 VOICES = ["idera", "jude", "kemi", "tunde", "funmi"]
         return None, "Please enter some text to convert to speech."
     try:
+        logger.info(f"Generating speech for text: {text[:50]}...")
         # Create prompt
         prompt = audio_tokenizer.create_prompt(text, lang=language, speaker_name=voice)
         temp_audio_path = "output.wav"
         torchaudio.save(temp_audio_path, audio, sample_rate=24000)
+        logger.info("Speech generation complete")
         return temp_audio_path, f"Successfully generated speech for: {text[:50]}..."
     except Exception as e:
+        logger.error(f"Error generating speech: {str(e)}")
         return None, f"Error generating speech: {str(e)}"
 # Example text for demonstration