Somnath3570 commited on
Commit
c336d2f
·
verified ·
1 Parent(s): b3ce7b4

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -0
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import gradio as gr
3
+ import torch
4
+ import transformers
5
+ import librosa
6
+ import numpy as np
7
+ import os
8
+
9
+ class UltravoxInterface:
10
+ def __init__(self):
11
+ """Initialize the Ultravox model and settings"""
12
+ print("Loading Ultravox model... This may take a few minutes...")
13
+ self.pipe = transformers.pipeline(
14
+ model='fixie-ai/ultravox-v0_4',
15
+ trust_remote_code=True,
16
+ device=0 if torch.cuda.is_available() else -1
17
+ )
18
+ print("Model loaded successfully!")
19
+
20
+ # Default system prompt
21
+ self.default_prompt = "You are a friendly and helpful character. You love to answer questions for people."
22
+
23
+ def process_audio(self, audio_path, custom_prompt=None):
24
+ """Process audio input and return model response"""
25
+ try:
26
+ # Load and preprocess audio
27
+ audio, sr = librosa.load(audio_path, sr=16000)
28
+
29
+ # Prepare conversation turns
30
+ turns = [
31
+ {
32
+ "role": "system",
33
+ "content": custom_prompt if custom_prompt else self.default_prompt
34
+ }
35
+ ]
36
+
37
+ # Get model response
38
+ result = self.pipe(
39
+ {
40
+ 'audio': audio,
41
+ 'turns': turns,
42
+ 'sampling_rate': sr
43
+ },
44
+ max_new_tokens=30
45
+ )
46
+
47
+ # Handle different response formats
48
+ if isinstance(result, str):
49
+ return result
50
+ elif isinstance(result, list):
51
+ return result[0] if result else "No response generated"
52
+ elif isinstance(result, dict):
53
+ return result.get('generated_text', "No response generated")
54
+ else:
55
+ return str(result)
56
+
57
+ except Exception as e:
58
+ return f"Error processing audio: {str(e)}"
59
+
60
+ def create_interface(self):
61
+ """Create and configure the Gradio interface"""
62
+
63
+ with gr.Blocks(title="Ultravox Voice Assistant", theme=gr.themes.Soft(
64
+ primary_hue="orange",
65
+ secondary_hue="gray",
66
+ )) as interface:
67
+ gr.Markdown("# 🎙️ Ultravox Voice Assistant")
68
+ gr.Markdown("Speak into the microphone and get AI-generated responses!")
69
+
70
+ with gr.Row():
71
+ with gr.Column():
72
+ audio_input = gr.Audio(
73
+ label="Speak here",
74
+ sources=["microphone"],
75
+ type="filepath"
76
+ )
77
+
78
+ system_prompt = gr.Textbox(
79
+ label="System Prompt (Optional)",
80
+ placeholder="Enter custom system prompt or leave empty for default",
81
+ value=self.default_prompt
82
+ )
83
+
84
+ submit_btn = gr.Button(
85
+ "Process Audio",
86
+ variant="primary"
87
+ )
88
+
89
+ with gr.Column():
90
+ output_text = gr.Textbox(
91
+ label="AI Response",
92
+ lines=5,
93
+ placeholder="AI response will appear here..."
94
+ )
95
+
96
+ submit_btn.click(
97
+ fn=self.process_audio,
98
+ inputs=[audio_input, system_prompt],
99
+ outputs=output_text
100
+ )
101
+
102
+ gr.Markdown("""
103
+ ## How to use:
104
+ 1. Click the microphone icon and allow browser access
105
+ 2. Speak your question or prompt
106
+ 3. Click 'Stop' when finished
107
+ 4. Click 'Process Audio' to get the AI response
108
+
109
+ ## Note:
110
+ First-time loading may take a few minutes as the model is downloaded.
111
+ """)
112
+
113
+ return interface
114
+
115
+ app = UltravoxInterface()
116
+ interface = app.create_interface()