Spaces:

Somnath3570
/

Voice_Assistant

Runtime error

App Files Files Community

Somnath3570 commited on Feb 17

Commit

3e435ed

verified ·

1 Parent(s): f5d5522

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -52

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# app.py
 import gradio as gr
 import torch
 import transformers
@@ -8,51 +7,42 @@ import os
 class UltravoxInterface:
     def __init__(self):
-        """Initialize the Ultravox model and settings"""
-        print("Loading Ultravox model... This may take a few minutes...")
         self.pipe = transformers.pipeline(
-            model='fixie-ai/ultravox-v0_4',
-            trust_remote_code=True,
             device=0 if torch.cuda.is_available() else -1
         )
-        print("Model loaded successfully!")
-        # Default system prompt
-        self.default_prompt = "You are a friendly and helpful character. You love to answer questions for people."
     def process_audio(self, audio_path, custom_prompt=None):
-        """Process audio input and return model response"""
         try:
-            # Load and preprocess audio
-            audio, sr = librosa.load(audio_path, sr=16000)
-            # Prepare conversation turns
-            turns = [
-                {
-                    "role": "system",
-                    "content": custom_prompt if custom_prompt else self.default_prompt
-                }
-            ]
-            # Get model response
-            result = self.pipe(
-                {
-                    'audio': audio,
-                    'turns': turns,
-                    'sampling_rate': sr
-                },
-                max_new_tokens=30
-            )
-            # Handle different response formats
-            if isinstance(result, str):
-                return result
-            elif isinstance(result, list):
-                return result[0] if result else "No response generated"
-            elif isinstance(result, dict):
-                return result.get('generated_text', "No response generated")
-            else:
-                return str(result)
         except Exception as e:
             return f"Error processing audio: {str(e)}"
@@ -60,12 +50,12 @@ class UltravoxInterface:
     def create_interface(self):
         """Create and configure the Gradio interface"""
-        with gr.Blocks(title="Ultravox Voice Assistant", theme=gr.themes.Soft(
             primary_hue="orange",
             secondary_hue="gray",
         )) as interface:
-            gr.Markdown("# 🎙️ Ultravox Voice Assistant")
-            gr.Markdown("Speak into the microphone and get AI-generated responses!")
             with gr.Row():
                 with gr.Column():
@@ -75,12 +65,6 @@ class UltravoxInterface:
                         type="filepath"
                     )
-                    system_prompt = gr.Textbox(
-                        label="System Prompt (Optional)",
-                        placeholder="Enter custom system prompt or leave empty for default",
-                        value=self.default_prompt
-                    )
                     submit_btn = gr.Button(
                         "Process Audio",
                         variant="primary"
@@ -88,26 +72,26 @@ class UltravoxInterface:
                 with gr.Column():
                     output_text = gr.Textbox(
-                        label="AI Response",
                         lines=5,
-                        placeholder="AI response will appear here..."
                     )
             submit_btn.click(
                 fn=self.process_audio,
-                inputs=[audio_input, system_prompt],
                 outputs=output_text
             )
             gr.Markdown("""
             ## How to use:
             1. Click the microphone icon and allow browser access
-            2. Speak your question or prompt
             3. Click 'Stop' when finished
-            4. Click 'Process Audio' to get the AI response
             ## Note:
-            First-time loading may take a few minutes as the model is downloaded.
             """)
         return interface

 import gradio as gr
 import torch
 import transformers
 class UltravoxInterface:
     def __init__(self):
+        """Initialize with smaller model footprint"""
+        print("Initializing voice interface...")
+        # Use smaller whisper model instead of full Ultravox
+        self.model_name = "openai/whisper-small"
         self.pipe = transformers.pipeline(
+            "automatic-speech-recognition",
+            model=self.model_name,
+            torch_dtype=torch.float16,  # Use half precision
             device=0 if torch.cuda.is_available() else -1
         )
+        print("Model loaded successfully!")
     def process_audio(self, audio_path, custom_prompt=None):
+        """Process audio with optimized memory usage"""
         try:
+            if audio_path is None:
+                return "Please provide an audio input."
+            # Load audio in chunks to save memory
+            audio, sr = librosa.load(audio_path, sr=16000, mono=True)
+            # Process audio in smaller segments if needed
+            max_length = 30 * sr  # 30 seconds chunks
+            if len(audio) > max_length:
+                segments = []
+                for i in range(0, len(audio), max_length):
+                    segment = audio[i:i + max_length]
+                    result = self.pipe(segment, batch_size=1)
+                    segments.append(result["text"])
+                return " ".join(segments)
+            # Process shorter audio directly
+            result = self.pipe(audio, batch_size=1)
+            return result["text"]
         except Exception as e:
             return f"Error processing audio: {str(e)}"
     def create_interface(self):
         """Create and configure the Gradio interface"""
+        with gr.Blocks(title="Voice Assistant", theme=gr.themes.Soft(
             primary_hue="orange",
             secondary_hue="gray",
         )) as interface:
+            gr.Markdown("# 🎙️ Voice Assistant")
+            gr.Markdown("Speak into the microphone and get text transcription!")
             with gr.Row():
                 with gr.Column():
                         type="filepath"
                     )
                     submit_btn = gr.Button(
                         "Process Audio",
                         variant="primary"
                 with gr.Column():
                     output_text = gr.Textbox(
+                        label="Transcription",
                         lines=5,
+                        placeholder="Transcription will appear here..."
                     )
             submit_btn.click(
                 fn=self.process_audio,
+                inputs=[audio_input],
                 outputs=output_text
             )
             gr.Markdown("""
             ## How to use:
             1. Click the microphone icon and allow browser access
+            2. Speak your message
             3. Click 'Stop' when finished
+            4. Click 'Process Audio' to get the transcription
             ## Note:
+            Optimized for short audio clips (up to 30 seconds).
             """)
         return interface