shukdevdatta123 commited on
Commit
f2955b8
·
verified ·
1 Parent(s): a56f35d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -24
app.py CHANGED
@@ -104,16 +104,19 @@ def pdf_chat(pdf_file, text_query, temperature, top_p, max_output_tokens):
104
  return f"Error processing the PDF: {str(e)}"
105
 
106
  # Function to transcribe audio to text using OpenAI Whisper API
107
- def transcribe_audio(audio_filepath, openai_api_key):
108
  if not openai_api_key:
109
  return "Error: No API key provided."
110
 
111
  openai.api_key = openai_api_key
112
 
113
  try:
114
- # Open the audio file and transcribe using OpenAI's Whisper model
115
- with open(audio_filepath, "rb") as audio_file:
116
- audio_file_transcription = openai.Audio.transcribe(file=audio_file, model="whisper-1")
 
 
 
117
  return audio_file_transcription.text
118
  except Exception as e:
119
  return f"Error transcribing audio: {str(e)}"
@@ -170,10 +173,7 @@ with gr.Blocks() as demo:
170
  pdf_button = gr.Button("Ask")
171
 
172
  with gr.Tab("Voice Chat"):
173
- # Record Audio Component for Voice Chat
174
- audio_record = gr.Audio(label="Record your Voice", type="filepath", show_label=True)
175
- # Upload Audio File Component
176
- audio_upload = gr.File(label="Or Upload an Audio File", type="file", file_types=["audio/wav", "audio/mp3"])
177
  audio_query = gr.Textbox(label="Ask about the transcription")
178
  audio_output = gr.Textbox(label="Response", interactive=False)
179
  audio_button = gr.Button("Ask")
@@ -188,27 +188,18 @@ with gr.Blocks() as demo:
188
  image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
189
  pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
190
 
191
- # For Voice Chat (record or upload audio and process query)
192
- def process_audio(audio, query, temperature, top_p, max_output_tokens):
193
- # Check if audio is recorded or uploaded
194
- if audio is None:
195
- return "Please either record or upload an audio file."
196
-
197
- # Process the audio (either from recording or upload)
198
- transcription = transcribe_audio(audio.name, api_key)
199
- if transcription.startswith("Error"):
200
- return transcription # Return transcription error
201
- return query_openai(
202
- [{"role": "user", "content": [{"type": "text", "text": transcription}, {"type": "text", "text": query}]}],
203
  temperature, top_p, max_output_tokens
204
- )
205
-
206
- audio_button.click(process_audio, [audio_record, audio_upload, audio_query, temperature, top_p, max_output_tokens], audio_output)
207
 
208
  # Fix: Clear button resets all necessary fields correctly
209
  clear_button.click(
210
  clear_chat,
211
- outputs=[
212
  image_url, image_query, image_url_output,
213
  text_query, text_output,
214
  image_text_query, image_output,
 
104
  return f"Error processing the PDF: {str(e)}"
105
 
106
  # Function to transcribe audio to text using OpenAI Whisper API
107
+ def transcribe_audio(audio_binary, openai_api_key):
108
  if not openai_api_key:
109
  return "Error: No API key provided."
110
 
111
  openai.api_key = openai_api_key
112
 
113
  try:
114
+ # Use the correct transcription API call
115
+ audio_file_obj = io.BytesIO(audio_binary)
116
+ audio_file_obj.name = 'audio.wav' # Set a name for the file object (as OpenAI expects it)
117
+
118
+ # Transcribe the audio to text using OpenAI's whisper model
119
+ audio_file_transcription = openai.Audio.transcribe(file=audio_file_obj, model="whisper-1")
120
  return audio_file_transcription.text
121
  except Exception as e:
122
  return f"Error transcribing audio: {str(e)}"
 
173
  pdf_button = gr.Button("Ask")
174
 
175
  with gr.Tab("Voice Chat"):
176
+ audio_upload = gr.File(label="Upload an Audio File", type="binary")
 
 
 
177
  audio_query = gr.Textbox(label="Ask about the transcription")
178
  audio_output = gr.Textbox(label="Response", interactive=False)
179
  audio_button = gr.Button("Ask")
 
188
  image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
189
  pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
190
 
191
+ # For Voice Chat
192
+ audio_button.click(
193
+ lambda audio_binary, query, temperature, top_p, max_output_tokens: query_openai(
194
+ [{"role": "user", "content": [{"type": "text", "text": transcribe_audio(audio_binary, api_key)}, {"type": "text", "text": query}]}],
 
 
 
 
 
 
 
 
195
  temperature, top_p, max_output_tokens
196
+ ), [audio_upload, audio_query, temperature, top_p, max_output_tokens], audio_output
197
+ )
 
198
 
199
  # Fix: Clear button resets all necessary fields correctly
200
  clear_button.click(
201
  clear_chat,
202
+ outputs=[
203
  image_url, image_query, image_url_output,
204
  text_query, text_output,
205
  image_text_query, image_output,