csm-1b

Runtime error

App Files Files Community

Bradarr commited on Mar 16

Commit

6989477

verified ·

1 Parent(s): 2e50dda

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -39

app.py CHANGED Viewed

@@ -4,11 +4,11 @@ import numpy as np
 import spaces
 import torch
 import torchaudio
-from generator import Segment, load_csm_1b
-from huggingface_hub import hf_hub_download, login
 from watermarking import watermark
-import whisper
-from transformers import AutoTokenizer, AutoModelForCausalLM
 import logging
 from transformers import GenerationConfig
@@ -47,14 +47,14 @@ MAX_GEMMA_LENGTH = 128
 # --- Global Conversation History ---
 conversation_history = []
-# --- Model Downloading (PRE-DOWNLOAD) ---
-# Download Sesame CSM 1B
-csm_1b_model_path = "csm_1b_ckpt.pt"  # Local path
 try:
     if not os.path.exists(csm_1b_model_path):
         hf_hub_download(repo_id="sesame/csm-1b", filename="ckpt.pt", local_dir=".", local_dir_use_symlinks=False)
-        os.rename("ckpt.pt", csm_1b_model_path)  # Rename to avoid confusion
         logging.info("Sesame CSM 1B model downloaded.")
     else:
         logging.info("Sesame CSM 1B model already downloaded.")
@@ -62,25 +62,40 @@ except Exception as e:
     logging.error(f"Error downloading Sesame CSM 1B: {e}")
     raise
-# Download Whisper (using the built-in download mechanism)
 whisper_model_name = "small.en"
 try:
-    whisper.load_model(whisper_model_name)  # This downloads if not already present
-    logging.info(f"Whisper model '{whisper_model_name}' downloaded/loaded.")
 except Exception as e:
-    logging.error(f"Error downloading Whisper model: {e}")
-    raise
-# Download Gemma 3 1B (Tokenizer and Model)
 gemma_repo_id = "google/gemma-3-1b-it"
-gemma_local_path = "gemma_model" # Using a directory
 try:
     if not os.path.exists(gemma_local_path):
-        tokenizer_gemma = AutoTokenizer.from_pretrained(gemma_repo_id, cache_dir=gemma_local_path)  #downloads
-        model_gemma = AutoModelForCausalLM.from_pretrained(gemma_repo_id, cache_dir=gemma_local_path) #downloads
-        logging.info("Gemma 3 1B model and tokenizer downloaded.")
     else:
-        logging.info("Gemma 3 1B model and tokenizer already downloaded.")
 except Exception as e:
     logging.error(f"Error downloading Gemma 3 1B: {e}")
     raise
@@ -88,7 +103,7 @@ except Exception as e:
 # --- Helper Functions ---
-def transcribe_audio(audio_path: str, whisper_model) -> str:  # Pass whisper_model
     try:
         audio = whisper.load_audio(audio_path)
         audio = whisper.pad_or_trim(audio)
@@ -98,23 +113,19 @@ def transcribe_audio(audio_path: str, whisper_model) -> str:  # Pass whisper_mod
         logging.error(f"Whisper transcription error: {e}")
         return "Error: Could not transcribe audio."
-def generate_response(text: str, model_gemma, tokenizer_gemma, device) -> str: # Pass model and tokenizer
     try:
-        # Gemma 3 chat template format
         messages = [{"role": "user", "content": text}]
         input = tokenizer_gemma.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(device)
         generation_config = GenerationConfig(
             max_new_tokens=MAX_GEMMA_LENGTH,
             early_stopping=True,
         )
         generated_output = model_gemma.generate(input, generation_config=generation_config)
         decoded_output = tokenizer_gemma.decode(generated_output[0], skip_special_tokens=False)
-        # Extract the assistant's response (Gemma specific)
         start_token = "<start_of_turn>model"
         end_token = "<end_of_turn>"
         start_index = decoded_output.find(start_token)
         if start_index != -1:
             start_index += len(start_token)
@@ -122,11 +133,12 @@ def generate_response(text: str, model_gemma, tokenizer_gemma, device) -> str: #
             assistant_response = decoded_output[start_index:].strip()
             return assistant_response
         return decoded_output
     except Exception as e:
         logging.error(f"Gemma response generation error: {e}")
         return "I'm sorry, I encountered an error generating a response."
-def load_audio(audio_path: str, generator) -> torch.Tensor: #Pass generator
     try:
         audio_tensor, sample_rate = torchaudio.load(audio_path)
         audio_tensor = audio_tensor.mean(dim=0)
@@ -145,35 +157,31 @@ def clear_history():
 # --- Main Inference Function ---
-@spaces.GPU(duration=gpu_timeout)  # Decorator FIRST
 def infer(user_audio) -> tuple[int, np.ndarray]:
-    # --- CUDA Availability Check (INSIDE infer) ---
     if torch.cuda.is_available():
         device = "cuda"
         logging.info(f"CUDA is available! Using device: {torch.cuda.get_device_name(0)}")
     else:
         device = "cpu"
-        logging.info("CUDA is NOT available.  Using CPU.")
     try:
-      # --- Model Loading (INSIDE infer, after device is set) ---
-        # Load Sesame CSM 1B (from local file)
         generator = load_csm_1b(csm_1b_model_path, device)
         logging.info("Sesame CSM 1B loaded successfully.")
-        # Load Whisper (from local cache or downloaded)
-        whisper_model = whisper.load_model(whisper_model_name, device=device)
         logging.info(f"Whisper model '{whisper_model_name}' loaded successfully.")
-        # Load Gemma (from local cache)
         tokenizer_gemma = AutoTokenizer.from_pretrained(gemma_local_path)
         model_gemma = AutoModelForCausalLM.from_pretrained(gemma_local_path).to(device)
         logging.info("Gemma 3 1B pt model loaded successfully.")
         if not user_audio:
             raise ValueError("No audio input received.")
-        return _infer(user_audio, generator, whisper_model, tokenizer_gemma, model_gemma, device) #Pass all models
     except Exception as e:
         logging.exception(f"Inference error: {e}")
         raise gr.Error(f"An error occurred during processing: {e}")
@@ -182,10 +190,10 @@ def _infer(user_audio, generator, whisper_model, tokenizer_gemma, model_gemma, d
     global conversation_history
     try:
-        user_text = transcribe_audio(user_audio, whisper_model) # Pass whisper_model
         logging.info(f"User: {user_text}")
-        ai_text = generate_response(user_text, model_gemma, tokenizer_gemma, device)  # Pass model and tokenizer
         logging.info(f"AI: {ai_text}")
         try:
@@ -201,7 +209,7 @@ def _infer(user_audio, generator, whisper_model, tokenizer_gemma, model_gemma, d
              raise gr.Error(f"Sesame response generation error: {e}")
-        user_segment = Segment(speaker = 1, text = user_text, audio = load_audio(user_audio, generator)) #Pass Generator
         ai_segment =  Segment(speaker = SPEAKER_ID, text = ai_text, audio = ai_audio)
         conversation_history.append(user_segment)
         conversation_history.append(ai_segment)

 import spaces
 import torch
 import torchaudio
+from generator import Segment, load_csm_1b  # We'll use load_csm_1b *later*
+from huggingface_hub import hf_hub_download, login, HfApi
 from watermarking import watermark
+import whisper  # We'll use whisper.load_model *later*
+from transformers import AutoTokenizer, AutoModelForCausalLM  # We'll use these *later*
 import logging
 from transformers import GenerationConfig
 # --- Global Conversation History ---
 conversation_history = []
+# --- Model Downloading (PRE-DOWNLOAD, NO LOADING) ---
+# 1. Download Sesame CSM 1B
+csm_1b_model_path = "csm_1b_ckpt.pt"  # Local path for the downloaded model
 try:
     if not os.path.exists(csm_1b_model_path):
         hf_hub_download(repo_id="sesame/csm-1b", filename="ckpt.pt", local_dir=".", local_dir_use_symlinks=False)
+        os.rename("ckpt.pt", csm_1b_model_path)
         logging.info("Sesame CSM 1B model downloaded.")
     else:
         logging.info("Sesame CSM 1B model already downloaded.")
     logging.error(f"Error downloading Sesame CSM 1B: {e}")
     raise
+# 2. Download Whisper (using hf_hub_download for consistency)
 whisper_model_name = "small.en"
+whisper_local_dir = "whisper_model"  # Local directory for Whisper
 try:
+    if not os.path.exists(whisper_local_dir):
+        os.makedirs(whisper_local_dir, exist_ok=True) #Create if not exist
+        #Whisper uses a specific download method. This command should pre download everything needed
+        whisper.load_model(whisper_model_name, download_root=whisper_local_dir)
+    else:
+        logging.info("Whisper model already downloaded.")
 except Exception as e:
+        logging.error(f"Whisper model download failed with exception: {e}")
+# 3. Download Gemma 3 1B (using hf_hub_download, individual files)
 gemma_repo_id = "google/gemma-3-1b-it"
+gemma_local_path = os.path.abspath("gemma_model")  # Absolute path
 try:
     if not os.path.exists(gemma_local_path):
+        os.makedirs(gemma_local_path, exist_ok=True)  # Create the directory
+        api = HfApi()
+        # List all files in the repository
+        repo_files = api.list_repo_files(gemma_repo_id)
+        # Download each file individually
+        for file in repo_files:
+            hf_hub_download(
+                repo_id=gemma_repo_id,
+                filename=file,
+                local_dir=gemma_local_path,
+                local_dir_use_symlinks=False,  # Ensure files are copied, not linked
+            )
+        logging.info("Gemma 3 1B model and tokenizer files downloaded.")
     else:
+        logging.info("Gemma 3 1B model and tokenizer files already downloaded.")
 except Exception as e:
     logging.error(f"Error downloading Gemma 3 1B: {e}")
     raise
 # --- Helper Functions ---
+def transcribe_audio(audio_path: str, whisper_model) -> str:
     try:
         audio = whisper.load_audio(audio_path)
         audio = whisper.pad_or_trim(audio)
         logging.error(f"Whisper transcription error: {e}")
         return "Error: Could not transcribe audio."
+def generate_response(text: str, model_gemma, tokenizer_gemma, device) -> str:
     try:
         messages = [{"role": "user", "content": text}]
         input = tokenizer_gemma.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(device)
         generation_config = GenerationConfig(
             max_new_tokens=MAX_GEMMA_LENGTH,
             early_stopping=True,
         )
         generated_output = model_gemma.generate(input, generation_config=generation_config)
         decoded_output = tokenizer_gemma.decode(generated_output[0], skip_special_tokens=False)
         start_token = "<start_of_turn>model"
         end_token = "<end_of_turn>"
         start_index = decoded_output.find(start_token)
         if start_index != -1:
             start_index += len(start_token)
             assistant_response = decoded_output[start_index:].strip()
             return assistant_response
         return decoded_output
     except Exception as e:
         logging.error(f"Gemma response generation error: {e}")
         return "I'm sorry, I encountered an error generating a response."
+def load_audio(audio_path: str, generator) -> torch.Tensor:
     try:
         audio_tensor, sample_rate = torchaudio.load(audio_path)
         audio_tensor = audio_tensor.mean(dim=0)
 # --- Main Inference Function ---
+@spaces.GPU(duration=gpu_timeout)  # GPU decorator
 def infer(user_audio) -> tuple[int, np.ndarray]:
     if torch.cuda.is_available():
         device = "cuda"
         logging.info(f"CUDA is available! Using device: {torch.cuda.get_device_name(0)}")
     else:
         device = "cpu"
+        logging.info("CUDA is NOT available. Using CPU.")
     try:
+        # --- Model Loading (ONLY inside infer, after GPU is available) ---
         generator = load_csm_1b(csm_1b_model_path, device)
         logging.info("Sesame CSM 1B loaded successfully.")
+        whisper_model = whisper.load_model(whisper_model_name, device=device, download_root=whisper_local_dir)
         logging.info(f"Whisper model '{whisper_model_name}' loaded successfully.")
         tokenizer_gemma = AutoTokenizer.from_pretrained(gemma_local_path)
         model_gemma = AutoModelForCausalLM.from_pretrained(gemma_local_path).to(device)
         logging.info("Gemma 3 1B pt model loaded successfully.")
         if not user_audio:
             raise ValueError("No audio input received.")
+        return _infer(user_audio, generator, whisper_model, tokenizer_gemma, model_gemma, device)
     except Exception as e:
         logging.exception(f"Inference error: {e}")
         raise gr.Error(f"An error occurred during processing: {e}")
     global conversation_history
     try:
+        user_text = transcribe_audio(user_audio, whisper_model)
         logging.info(f"User: {user_text}")
+        ai_text = generate_response(user_text, model_gemma, tokenizer_gemma, device)
         logging.info(f"AI: {ai_text}")
         try:
              raise gr.Error(f"Sesame response generation error: {e}")
+        user_segment = Segment(speaker = 1, text = user_text, audio = load_audio(user_audio, generator))
         ai_segment =  Segment(speaker = SPEAKER_ID, text = ai_text, audio = ai_audio)
         conversation_history.append(user_segment)
         conversation_history.append(ai_segment)