Somnath3570 commited on
Commit
2504ba2
·
verified ·
1 Parent(s): c397b91

Create ultravox_app.py

Browse files
Files changed (1) hide show
  1. ultravox_app.py +138 -0
ultravox_app.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import transformers
4
+ import librosa
5
+ import numpy as np
6
+ import tempfile
7
+ import os
8
+
9
+ class UltravoxInterface:
10
+ def __init__(self):
11
+ """Initialize the Ultravox model and settings"""
12
+ print("Loading Ultravox model... This may take a few minutes...")
13
+ self.pipe = transformers.pipeline(
14
+ model='fixie-ai/ultravox-v0_4',
15
+ trust_remote_code=True
16
+ )
17
+ print("Model loaded successfully!")
18
+
19
+ # Default system prompt
20
+ self.default_prompt = "You are a friendly and helpful character. You love to answer questions for people."
21
+
22
+ def process_audio(self, audio_path, custom_prompt=None):
23
+ """
24
+ Process audio input and return model response
25
+
26
+ Args:
27
+ audio_path: Path to the audio file
28
+ custom_prompt: Optional custom system prompt
29
+ """
30
+ try:
31
+ # Load and preprocess audio
32
+ audio, sr = librosa.load(audio_path, sr=16000)
33
+
34
+ # Prepare conversation turns
35
+ turns = [
36
+ {
37
+ "role": "system",
38
+ "content": custom_prompt if custom_prompt else self.default_prompt
39
+ }
40
+ ]
41
+
42
+ # Get model response
43
+ result = self.pipe(
44
+ {
45
+ 'audio': audio,
46
+ 'turns': turns,
47
+ 'sampling_rate': sr
48
+ },
49
+ max_new_tokens=30
50
+ )
51
+
52
+ # The output format changed in v0_4 - handle it directly
53
+ if isinstance(result, str):
54
+ return result
55
+ elif isinstance(result, list):
56
+ return result[0] if result else "No response generated"
57
+ elif isinstance(result, dict):
58
+ return result.get('generated_text', "No response generated")
59
+ else:
60
+ return str(result)
61
+
62
+ except Exception as e:
63
+ return f"Error processing audio: {str(e)}\nType of result: {type(result)}"
64
+
65
+ def create_interface(self):
66
+ """Create and configure the Gradio interface"""
67
+
68
+ with gr.Blocks(title="Ultravox Voice Interface") as interface:
69
+ gr.Markdown("# 🎙️ Ultravox Voice Assistant")
70
+ gr.Markdown("Speak into the microphone and get AI-generated responses!")
71
+
72
+ with gr.Row():
73
+ with gr.Column():
74
+ # Updated Audio input component
75
+ audio_input = gr.Audio(
76
+ label="Speak here",
77
+ sources=["microphone"], # Changed from source to sources
78
+ type="filepath"
79
+ )
80
+
81
+ # Optional system prompt
82
+ system_prompt = gr.Textbox(
83
+ label="System Prompt (Optional)",
84
+ placeholder="Enter custom system prompt or leave empty for default",
85
+ value=self.default_prompt
86
+ )
87
+
88
+ # Submit button
89
+ submit_btn = gr.Button("Process Audio", variant="primary")
90
+
91
+ with gr.Column():
92
+ # Output text area
93
+ output_text = gr.Textbox(
94
+ label="AI Response",
95
+ lines=5,
96
+ placeholder="AI response will appear here..."
97
+ )
98
+
99
+ # Handle submission
100
+ submit_btn.click(
101
+ fn=self.process_audio,
102
+ inputs=[audio_input, system_prompt],
103
+ outputs=output_text
104
+ )
105
+
106
+ # Example usage instructions
107
+ gr.Markdown("""
108
+ ## How to use:
109
+ 1. Click the microphone icon and allow browser access
110
+ 2. Speak your question or prompt
111
+ 3. Click 'Stop' when finished
112
+ 4. Click 'Process Audio' to get the AI response
113
+
114
+ ## Requirements:
115
+ - GPU with 24GB+ VRAM recommended
116
+ - Working microphone
117
+ - Stable internet connection
118
+
119
+ ## Note:
120
+ First-time loading may take a few minutes as the model is downloaded.
121
+ """)
122
+
123
+ return interface
124
+
125
+ def main():
126
+ # Create instance of UltravoxInterface
127
+ app = UltravoxInterface()
128
+
129
+ # Launch the interface
130
+ interface = app.create_interface()
131
+ interface.launch(
132
+ share=True, # Enable sharing via Gradio
133
+ server_name="0.0.0.0", # Make available on all network interfaces
134
+ server_port=7860 # Default Gradio port
135
+ )
136
+
137
+ if __name__ == "__main__":
138
+ main()