Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -129,21 +129,40 @@ pipe_asr = pipeline(
|
|
129 |
# Function to handle audio input, transcribe, fetch from Neo4j, and generate audio response
|
130 |
def transcribe_and_respond(audio):
|
131 |
if audio is None:
|
|
|
132 |
return None, "No audio provided."
|
133 |
|
134 |
sr, y = audio
|
135 |
y = np.array(y).astype(np.float32)
|
136 |
|
137 |
-
#
|
138 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
question = result.get("text", "")
|
140 |
|
|
|
|
|
|
|
141 |
# Retrieve information from Neo4j
|
142 |
response_text = structured_retriever(question) if question else "I didn't understand the question."
|
143 |
|
144 |
# Convert the response to audio using Eleven Labs TTS
|
145 |
audio_path = generate_audio_elevenlabs(response_text) if response_text else None
|
146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
return audio_path, response_text
|
148 |
|
149 |
# Function to clear the transcription state
|
@@ -177,9 +196,9 @@ with gr.Blocks(theme="rawrsor1/Everforest") as demo:
|
|
177 |
# Clear state interaction
|
178 |
gr.Button("Clear State").click(
|
179 |
fn=clear_transcription_state,
|
180 |
-
outputs=[audio_output],
|
181 |
api_name="api_clean_state"
|
182 |
)
|
183 |
|
184 |
# Launch the Gradio interface
|
185 |
-
demo.launch(show_error=True, share=True)
|
|
|
129 |
# Function to handle audio input, transcribe, fetch from Neo4j, and generate audio response
|
130 |
def transcribe_and_respond(audio):
|
131 |
if audio is None:
|
132 |
+
logging.error("No audio provided.")
|
133 |
return None, "No audio provided."
|
134 |
|
135 |
sr, y = audio
|
136 |
y = np.array(y).astype(np.float32)
|
137 |
|
138 |
+
# Normalize the audio array
|
139 |
+
max_abs_y = np.max(np.abs(y))
|
140 |
+
if max_abs_y > 0:
|
141 |
+
y = y / max_abs_y
|
142 |
+
|
143 |
+
# Prepare input_features for Whisper model
|
144 |
+
input_features = processor(y, sampling_rate=sr, return_tensors="pt").input_features
|
145 |
+
|
146 |
+
# Transcribe the audio using Whisper with English language setting
|
147 |
+
result = pipe_asr({"input_features": input_features, "language": "en"}, return_timestamps=False)
|
148 |
question = result.get("text", "")
|
149 |
|
150 |
+
# Log the transcribed text for debugging
|
151 |
+
logging.debug(f"Transcribed text: {question}")
|
152 |
+
|
153 |
# Retrieve information from Neo4j
|
154 |
response_text = structured_retriever(question) if question else "I didn't understand the question."
|
155 |
|
156 |
# Convert the response to audio using Eleven Labs TTS
|
157 |
audio_path = generate_audio_elevenlabs(response_text) if response_text else None
|
158 |
|
159 |
+
# Ensure a valid audio path is returned
|
160 |
+
if audio_path and os.path.exists(audio_path):
|
161 |
+
logging.debug(f"Generated audio file path: {audio_path}")
|
162 |
+
else:
|
163 |
+
logging.error("Failed to generate audio or save audio to file.")
|
164 |
+
audio_path = None
|
165 |
+
|
166 |
return audio_path, response_text
|
167 |
|
168 |
# Function to clear the transcription state
|
|
|
196 |
# Clear state interaction
|
197 |
gr.Button("Clear State").click(
|
198 |
fn=clear_transcription_state,
|
199 |
+
outputs=[audio_output, gr.Textbox(label="Transcription")],
|
200 |
api_name="api_clean_state"
|
201 |
)
|
202 |
|
203 |
# Launch the Gradio interface
|
204 |
+
demo.launch(show_error=True, share=True)
|