Update app.py
Browse files
app.py
CHANGED
@@ -104,16 +104,19 @@ def pdf_chat(pdf_file, text_query, temperature, top_p, max_output_tokens):
|
|
104 |
return f"Error processing the PDF: {str(e)}"
|
105 |
|
106 |
# Function to transcribe audio to text using OpenAI Whisper API
|
107 |
-
def transcribe_audio(
|
108 |
if not openai_api_key:
|
109 |
return "Error: No API key provided."
|
110 |
|
111 |
openai.api_key = openai_api_key
|
112 |
|
113 |
try:
|
114 |
-
#
|
115 |
-
|
116 |
-
|
|
|
|
|
|
|
117 |
return audio_file_transcription.text
|
118 |
except Exception as e:
|
119 |
return f"Error transcribing audio: {str(e)}"
|
@@ -170,10 +173,7 @@ with gr.Blocks() as demo:
|
|
170 |
pdf_button = gr.Button("Ask")
|
171 |
|
172 |
with gr.Tab("Voice Chat"):
|
173 |
-
|
174 |
-
audio_record = gr.Audio(label="Record your Voice", type="filepath", show_label=True)
|
175 |
-
# Upload Audio File Component
|
176 |
-
audio_upload = gr.File(label="Or Upload an Audio File", type="file", file_types=["audio/wav", "audio/mp3"])
|
177 |
audio_query = gr.Textbox(label="Ask about the transcription")
|
178 |
audio_output = gr.Textbox(label="Response", interactive=False)
|
179 |
audio_button = gr.Button("Ask")
|
@@ -188,27 +188,18 @@ with gr.Blocks() as demo:
|
|
188 |
image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
|
189 |
pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
|
190 |
|
191 |
-
# For Voice Chat
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
return "Please either record or upload an audio file."
|
196 |
-
|
197 |
-
# Process the audio (either from recording or upload)
|
198 |
-
transcription = transcribe_audio(audio.name, api_key)
|
199 |
-
if transcription.startswith("Error"):
|
200 |
-
return transcription # Return transcription error
|
201 |
-
return query_openai(
|
202 |
-
[{"role": "user", "content": [{"type": "text", "text": transcription}, {"type": "text", "text": query}]}],
|
203 |
temperature, top_p, max_output_tokens
|
204 |
-
)
|
205 |
-
|
206 |
-
audio_button.click(process_audio, [audio_record, audio_upload, audio_query, temperature, top_p, max_output_tokens], audio_output)
|
207 |
|
208 |
# Fix: Clear button resets all necessary fields correctly
|
209 |
clear_button.click(
|
210 |
clear_chat,
|
211 |
-
outputs=[
|
212 |
image_url, image_query, image_url_output,
|
213 |
text_query, text_output,
|
214 |
image_text_query, image_output,
|
|
|
104 |
return f"Error processing the PDF: {str(e)}"
|
105 |
|
106 |
# Function to transcribe audio to text using OpenAI Whisper API
|
107 |
+
def transcribe_audio(audio_binary, openai_api_key):
|
108 |
if not openai_api_key:
|
109 |
return "Error: No API key provided."
|
110 |
|
111 |
openai.api_key = openai_api_key
|
112 |
|
113 |
try:
|
114 |
+
# Use the correct transcription API call
|
115 |
+
audio_file_obj = io.BytesIO(audio_binary)
|
116 |
+
audio_file_obj.name = 'audio.wav' # Set a name for the file object (as OpenAI expects it)
|
117 |
+
|
118 |
+
# Transcribe the audio to text using OpenAI's whisper model
|
119 |
+
audio_file_transcription = openai.Audio.transcribe(file=audio_file_obj, model="whisper-1")
|
120 |
return audio_file_transcription.text
|
121 |
except Exception as e:
|
122 |
return f"Error transcribing audio: {str(e)}"
|
|
|
173 |
pdf_button = gr.Button("Ask")
|
174 |
|
175 |
with gr.Tab("Voice Chat"):
|
176 |
+
audio_upload = gr.File(label="Upload an Audio File", type="binary")
|
|
|
|
|
|
|
177 |
audio_query = gr.Textbox(label="Ask about the transcription")
|
178 |
audio_output = gr.Textbox(label="Response", interactive=False)
|
179 |
audio_button = gr.Button("Ask")
|
|
|
188 |
image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
|
189 |
pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
|
190 |
|
191 |
+
# For Voice Chat
|
192 |
+
audio_button.click(
|
193 |
+
lambda audio_binary, query, temperature, top_p, max_output_tokens: query_openai(
|
194 |
+
[{"role": "user", "content": [{"type": "text", "text": transcribe_audio(audio_binary, api_key)}, {"type": "text", "text": query}]}],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
temperature, top_p, max_output_tokens
|
196 |
+
), [audio_upload, audio_query, temperature, top_p, max_output_tokens], audio_output
|
197 |
+
)
|
|
|
198 |
|
199 |
# Fix: Clear button resets all necessary fields correctly
|
200 |
clear_button.click(
|
201 |
clear_chat,
|
202 |
+
outputs=[
|
203 |
image_url, image_query, image_url_output,
|
204 |
text_query, text_output,
|
205 |
image_text_query, image_output,
|