Somnath3570 commited on
Commit
f5d5522
·
verified ·
1 Parent(s): 77b0422

Delete ultravox_app.py

Browse files
Files changed (1) hide show
  1. ultravox_app.py +0 -138
ultravox_app.py DELETED
@@ -1,138 +0,0 @@
1
- import gradio as gr
2
- import torch
3
- import transformers
4
- import librosa
5
- import numpy as np
6
- import tempfile
7
- import os
8
-
9
- class UltravoxInterface:
10
- def __init__(self):
11
- """Initialize the Ultravox model and settings"""
12
- print("Loading Ultravox model... This may take a few minutes...")
13
- self.pipe = transformers.pipeline(
14
- model='fixie-ai/ultravox-v0_4',
15
- trust_remote_code=True
16
- )
17
- print("Model loaded successfully!")
18
-
19
- # Default system prompt
20
- self.default_prompt = "You are a friendly and helpful character. You love to answer questions for people."
21
-
22
- def process_audio(self, audio_path, custom_prompt=None):
23
- """
24
- Process audio input and return model response
25
-
26
- Args:
27
- audio_path: Path to the audio file
28
- custom_prompt: Optional custom system prompt
29
- """
30
- try:
31
- # Load and preprocess audio
32
- audio, sr = librosa.load(audio_path, sr=16000)
33
-
34
- # Prepare conversation turns
35
- turns = [
36
- {
37
- "role": "system",
38
- "content": custom_prompt if custom_prompt else self.default_prompt
39
- }
40
- ]
41
-
42
- # Get model response
43
- result = self.pipe(
44
- {
45
- 'audio': audio,
46
- 'turns': turns,
47
- 'sampling_rate': sr
48
- },
49
- max_new_tokens=30
50
- )
51
-
52
- # The output format changed in v0_4 - handle it directly
53
- if isinstance(result, str):
54
- return result
55
- elif isinstance(result, list):
56
- return result[0] if result else "No response generated"
57
- elif isinstance(result, dict):
58
- return result.get('generated_text', "No response generated")
59
- else:
60
- return str(result)
61
-
62
- except Exception as e:
63
- return f"Error processing audio: {str(e)}\nType of result: {type(result)}"
64
-
65
- def create_interface(self):
66
- """Create and configure the Gradio interface"""
67
-
68
- with gr.Blocks(title="Ultravox Voice Interface") as interface:
69
- gr.Markdown("# 🎙️ Ultravox Voice Assistant")
70
- gr.Markdown("Speak into the microphone and get AI-generated responses!")
71
-
72
- with gr.Row():
73
- with gr.Column():
74
- # Updated Audio input component
75
- audio_input = gr.Audio(
76
- label="Speak here",
77
- sources=["microphone"], # Changed from source to sources
78
- type="filepath"
79
- )
80
-
81
- # Optional system prompt
82
- system_prompt = gr.Textbox(
83
- label="System Prompt (Optional)",
84
- placeholder="Enter custom system prompt or leave empty for default",
85
- value=self.default_prompt
86
- )
87
-
88
- # Submit button
89
- submit_btn = gr.Button("Process Audio", variant="primary")
90
-
91
- with gr.Column():
92
- # Output text area
93
- output_text = gr.Textbox(
94
- label="AI Response",
95
- lines=5,
96
- placeholder="AI response will appear here..."
97
- )
98
-
99
- # Handle submission
100
- submit_btn.click(
101
- fn=self.process_audio,
102
- inputs=[audio_input, system_prompt],
103
- outputs=output_text
104
- )
105
-
106
- # Example usage instructions
107
- gr.Markdown("""
108
- ## How to use:
109
- 1. Click the microphone icon and allow browser access
110
- 2. Speak your question or prompt
111
- 3. Click 'Stop' when finished
112
- 4. Click 'Process Audio' to get the AI response
113
-
114
- ## Requirements:
115
- - GPU with 24GB+ VRAM recommended
116
- - Working microphone
117
- - Stable internet connection
118
-
119
- ## Note:
120
- First-time loading may take a few minutes as the model is downloaded.
121
- """)
122
-
123
- return interface
124
-
125
- def main():
126
- # Create instance of UltravoxInterface
127
- app = UltravoxInterface()
128
-
129
- # Launch the interface
130
- interface = app.create_interface()
131
- interface.launch(
132
- share=True, # Enable sharing via Gradio
133
- server_name="0.0.0.0", # Make available on all network interfaces
134
- server_port=7860 # Default Gradio port
135
- )
136
-
137
- if __name__ == "__main__":
138
- main()