shukdevdatta123 commited on
Commit
f386ba9
·
verified ·
1 Parent(s): da6faec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -20
app.py CHANGED
@@ -2,7 +2,6 @@ import gradio as gr
2
  import openai
3
  import fitz # PyMuPDF for PDF processing
4
  import base64
5
- import soundfile as sf
6
  import io
7
 
8
  # Variable to store API key
@@ -95,28 +94,27 @@ def pdf_chat(pdf_file, text_query, temperature, top_p, max_output_tokens):
95
  ]
96
  return query_openai(messages, temperature, top_p, max_output_tokens)
97
 
98
- # Function to process uploaded audio and transcribe to text
99
- def transcribe_audio(audio_file):
100
- if audio_file is None:
101
- return "Please upload an audio file."
 
 
102
 
103
  try:
104
- # Load the audio file
105
- audio_data, samplerate = sf.read(io.BytesIO(audio_file))
106
- audio_buffer = io.BytesIO()
107
- sf.write(audio_buffer, audio_data, samplerate, format='WAV')
108
- audio_buffer.seek(0)
109
-
110
- # Transcribe the audio using OpenAI's Whisper API
111
- transcript = openai.Audio.transcribe(
112
- model="whisper-1",
113
- file=audio_buffer
114
- )
115
-
116
- return transcript["text"]
117
 
 
 
 
118
  except Exception as e:
119
- return f"Error in transcription: {str(e)}"
120
 
121
  # Function to clear the chat (Fix: Returns the correct number of outputs)
122
  def clear_chat():
@@ -184,7 +182,14 @@ with gr.Blocks() as demo:
184
  text_button.click(text_chat, [text_query, temperature, top_p, max_output_tokens], text_output)
185
  image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
186
  pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
187
- audio_button.click(lambda audio, query, temperature, top_p, max_output_tokens: query_openai([{"role": "user", "content": [{"type": "text", "text": transcribe_audio(audio)}, {"type": "text", "text": query}]}], temperature, top_p, max_output_tokens), [audio_upload, audio_query, temperature, top_p, max_output_tokens], audio_output)
 
 
 
 
 
 
 
188
 
189
  # Fix: Clear button resets all necessary fields correctly
190
  clear_button.click(
 
2
  import openai
3
  import fitz # PyMuPDF for PDF processing
4
  import base64
 
5
  import io
6
 
7
  # Variable to store API key
 
94
  ]
95
  return query_openai(messages, temperature, top_p, max_output_tokens)
96
 
97
+ # Function to transcribe audio to text using OpenAI Whisper API
98
+ def transcribe_audio(audio, openai_api_key):
99
+ if not openai_api_key:
100
+ return "Error: No API key provided."
101
+
102
+ openai.api_key = openai_api_key
103
 
104
  try:
105
+ # Open the audio file and pass it as a file object
106
+ with open(audio, 'rb') as audio_file:
107
+ audio_file_content = audio_file.read()
108
+
109
+ # Use the correct transcription API call
110
+ audio_file_obj = io.BytesIO(audio_file_content)
111
+ audio_file_obj.name = 'audio.wav' # Set a name for the file object (as OpenAI expects it)
 
 
 
 
 
 
112
 
113
+ # Transcribe the audio to text using OpenAI's whisper model
114
+ audio_file_transcription = openai.Audio.transcribe(file=audio_file_obj, model="whisper-1")
115
+ return audio_file_transcription.text
116
  except Exception as e:
117
+ return f"Error transcribing audio: {str(e)}"
118
 
119
  # Function to clear the chat (Fix: Returns the correct number of outputs)
120
  def clear_chat():
 
182
  text_button.click(text_chat, [text_query, temperature, top_p, max_output_tokens], text_output)
183
  image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
184
  pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
185
+
186
+ # For Voice Chat
187
+ audio_button.click(
188
+ lambda audio, query, temperature, top_p, max_output_tokens: query_openai(
189
+ [{"role": "user", "content": [{"type": "text", "text": transcribe_audio(audio.name, api_key)}, {"type": "text", "text": query}]}],
190
+ temperature, top_p, max_output_tokens
191
+ ), [audio_upload, audio_query, temperature, top_p, max_output_tokens], audio_output
192
+ )
193
 
194
  # Fix: Clear button resets all necessary fields correctly
195
  clear_button.click(