Somnath3570 commited on
Commit
04cf931
·
verified ·
1 Parent(s): 3e435ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -48
app.py CHANGED
@@ -10,13 +10,13 @@ class UltravoxInterface:
10
  """Initialize with smaller model footprint"""
11
  print("Initializing voice interface...")
12
 
13
- # Use smaller whisper model instead of full Ultravox
14
  self.model_name = "openai/whisper-small"
15
  self.pipe = transformers.pipeline(
16
  "automatic-speech-recognition",
17
  model=self.model_name,
18
- torch_dtype=torch.float16, # Use half precision
19
- device=0 if torch.cuda.is_available() else -1
20
  )
21
 
22
  print("Model loaded successfully!")
@@ -50,51 +50,33 @@ class UltravoxInterface:
50
  def create_interface(self):
51
  """Create and configure the Gradio interface"""
52
 
53
- with gr.Blocks(title="Voice Assistant", theme=gr.themes.Soft(
54
- primary_hue="orange",
55
- secondary_hue="gray",
56
- )) as interface:
57
- gr.Markdown("# 🎙️ Voice Assistant")
58
- gr.Markdown("Speak into the microphone and get text transcription!")
59
-
60
- with gr.Row():
61
- with gr.Column():
62
- audio_input = gr.Audio(
63
- label="Speak here",
64
- sources=["microphone"],
65
- type="filepath"
66
- )
67
-
68
- submit_btn = gr.Button(
69
- "Process Audio",
70
- variant="primary"
71
- )
72
-
73
- with gr.Column():
74
- output_text = gr.Textbox(
75
- label="Transcription",
76
- lines=5,
77
- placeholder="Transcription will appear here..."
78
- )
79
-
80
- submit_btn.click(
81
- fn=self.process_audio,
82
- inputs=[audio_input],
83
- outputs=output_text
84
- )
85
-
86
- gr.Markdown("""
87
- ## How to use:
88
- 1. Click the microphone icon and allow browser access
89
- 2. Speak your message
90
- 3. Click 'Stop' when finished
91
- 4. Click 'Process Audio' to get the transcription
92
-
93
- ## Note:
94
- Optimized for short audio clips (up to 30 seconds).
95
- """)
96
-
97
  return interface
98
 
 
99
  app = UltravoxInterface()
100
- interface = app.create_interface()
 
 
 
 
10
  """Initialize with smaller model footprint"""
11
  print("Initializing voice interface...")
12
 
13
+ # Use smaller whisper model
14
  self.model_name = "openai/whisper-small"
15
  self.pipe = transformers.pipeline(
16
  "automatic-speech-recognition",
17
  model=self.model_name,
18
+ torch_dtype=torch.float16,
19
+ device="cpu" # Explicitly set to CPU
20
  )
21
 
22
  print("Model loaded successfully!")
 
50
  def create_interface(self):
51
  """Create and configure the Gradio interface"""
52
 
53
+ interface = gr.Interface(
54
+ fn=self.process_audio,
55
+ inputs=[
56
+ gr.Audio(
57
+ label="Speak here",
58
+ sources=["microphone"],
59
+ type="filepath"
60
+ )
61
+ ],
62
+ outputs=[
63
+ gr.Textbox(
64
+ label="Transcription",
65
+ lines=5,
66
+ placeholder="Transcription will appear here..."
67
+ )
68
+ ],
69
+ title="Voice Assistant",
70
+ description="Speak into the microphone and get text transcription!",
71
+ theme=gr.themes.Soft(primary_hue="orange"),
72
+ examples=[[None]],
73
+ )
74
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  return interface
76
 
77
+ # Create the interface
78
  app = UltravoxInterface()
79
+ interface = app.create_interface()
80
+
81
+ # Launch the interface - this is crucial for Hugging Face Spaces
82
+ interface.launch()