Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -133,25 +133,15 @@ pipe_asr = pipeline(
|
|
133 |
)
|
134 |
|
135 |
# Function to handle audio input, transcribe, fetch from Neo4j, and generate audio response
|
136 |
-
def transcribe_and_respond(
|
137 |
-
|
138 |
-
|
139 |
-
except TypeError:
|
140 |
-
print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
|
141 |
-
return stream, "", None
|
142 |
-
|
143 |
-
y = y.astype(np.float32)
|
144 |
-
max_abs_y = np.max(np.abs(y))
|
145 |
-
if max_abs_y > 0:
|
146 |
-
y = y / max_abs_y
|
147 |
|
148 |
-
|
149 |
-
|
150 |
-
else:
|
151 |
-
stream = y
|
152 |
|
153 |
# Transcribe the audio using Whisper
|
154 |
-
result = pipe_asr({"array":
|
155 |
question = result.get("text", "")
|
156 |
|
157 |
# Retrieve information from Neo4j
|
@@ -160,20 +150,15 @@ def transcribe_and_respond(stream, new_chunk):
|
|
160 |
# Convert the response to audio using Eleven Labs TTS
|
161 |
audio_path = generate_audio_elevenlabs(response_text) if response_text else None
|
162 |
|
163 |
-
return
|
164 |
|
165 |
-
# Function to clear the transcription state
|
166 |
-
def clear_transcription_state():
|
167 |
-
return None, "", None
|
168 |
|
169 |
# Define the Gradio interface with only audio input and output
|
170 |
with gr.Blocks(theme="rawrsor1/Everforest") as demo:
|
171 |
with gr.Row():
|
172 |
audio_input = gr.Audio(
|
173 |
sources=["microphone"],
|
174 |
-
streaming=True,
|
175 |
type='numpy',
|
176 |
-
every=0.1,
|
177 |
label="Speak to Ask"
|
178 |
)
|
179 |
audio_output = gr.Audio(
|
@@ -183,19 +168,18 @@ with gr.Blocks(theme="rawrsor1/Everforest") as demo:
|
|
183 |
interactive=False
|
184 |
)
|
185 |
|
186 |
-
#
|
187 |
-
|
188 |
-
|
189 |
-
transcribe_and_respond,
|
190 |
-
inputs=
|
191 |
-
outputs=[
|
192 |
-
api_name="api_voice_to_neo4j_response"
|
193 |
)
|
194 |
|
195 |
# Clear state interaction
|
196 |
gr.Button("Clear State").click(
|
197 |
fn=clear_transcription_state,
|
198 |
-
outputs=[
|
199 |
api_name="api_clean_state"
|
200 |
)
|
201 |
|
|
|
133 |
)
|
134 |
|
135 |
# Function to handle audio input, transcribe, fetch from Neo4j, and generate audio response
|
136 |
+
def transcribe_and_respond(audio):
|
137 |
+
if audio is None:
|
138 |
+
return None, "No audio provided."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
|
140 |
+
sr, y = audio
|
141 |
+
y = np.array(y).astype(np.float32)
|
|
|
|
|
142 |
|
143 |
# Transcribe the audio using Whisper
|
144 |
+
result = pipe_asr({"array": y, "sampling_rate": sr}, return_timestamps=False)
|
145 |
question = result.get("text", "")
|
146 |
|
147 |
# Retrieve information from Neo4j
|
|
|
150 |
# Convert the response to audio using Eleven Labs TTS
|
151 |
audio_path = generate_audio_elevenlabs(response_text) if response_text else None
|
152 |
|
153 |
+
return audio_path, response_text
|
154 |
|
|
|
|
|
|
|
155 |
|
156 |
# Define the Gradio interface with only audio input and output
|
157 |
with gr.Blocks(theme="rawrsor1/Everforest") as demo:
|
158 |
with gr.Row():
|
159 |
audio_input = gr.Audio(
|
160 |
sources=["microphone"],
|
|
|
161 |
type='numpy',
|
|
|
162 |
label="Speak to Ask"
|
163 |
)
|
164 |
audio_output = gr.Audio(
|
|
|
168 |
interactive=False
|
169 |
)
|
170 |
|
171 |
+
# Submit button to process the audio input
|
172 |
+
submit_btn = gr.Button("Submit")
|
173 |
+
submit_btn.click(
|
174 |
+
fn=transcribe_and_respond,
|
175 |
+
inputs=audio_input,
|
176 |
+
outputs=[audio_output, gr.Textbox(label="Transcription")]
|
|
|
177 |
)
|
178 |
|
179 |
# Clear state interaction
|
180 |
gr.Button("Clear State").click(
|
181 |
fn=clear_transcription_state,
|
182 |
+
outputs=[audio_output],
|
183 |
api_name="api_clean_state"
|
184 |
)
|
185 |
|