Spaces:

shukdevdatta123
/

GPT-4.5-Multimodal-Chatbot

Running

App Files Files Community

shukdevdatta123 commited on Mar 15

Commit

a56f35d

verified ·

1 Parent(s): b69685a

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -10

app.py CHANGED Viewed

@@ -104,19 +104,16 @@ def pdf_chat(pdf_file, text_query, temperature, top_p, max_output_tokens):
         return f"Error processing the PDF: {str(e)}"
 # Function to transcribe audio to text using OpenAI Whisper API
-def transcribe_audio(audio_binary, openai_api_key):
     if not openai_api_key:
         return "Error: No API key provided."
     openai.api_key = openai_api_key
     try:
-        # Use the correct transcription API call
-        audio_file_obj = io.BytesIO(audio_binary)
-        audio_file_obj.name = 'audio.wav'  # Set a name for the file object (as OpenAI expects it)
-        # Transcribe the audio to text using OpenAI's whisper model
-        audio_file_transcription = openai.Audio.transcribe(file=audio_file_obj, model="whisper-1")
         return audio_file_transcription.text
     except Exception as e:
         return f"Error transcribing audio: {str(e)}"
@@ -173,8 +170,8 @@ with gr.Blocks() as demo:
             pdf_button = gr.Button("Ask")
         with gr.Tab("Voice Chat"):
-            # Record Audio Component for Voice Chat (No 'source' argument needed)
-            audio_record = gr.Audio(label="Record your Voice", type="bytes", show_label=True)
             # Upload Audio File Component
             audio_upload = gr.File(label="Or Upload an Audio File", type="file", file_types=["audio/wav", "audio/mp3"])
             audio_query = gr.Textbox(label="Ask about the transcription")
@@ -198,7 +195,7 @@ with gr.Blocks() as demo:
             return "Please either record or upload an audio file."
         # Process the audio (either from recording or upload)
-        transcription = transcribe_audio(audio, api_key)
         if transcription.startswith("Error"):
             return transcription  # Return transcription error
         return query_openai(

         return f"Error processing the PDF: {str(e)}"
 # Function to transcribe audio to text using OpenAI Whisper API
+def transcribe_audio(audio_filepath, openai_api_key):
     if not openai_api_key:
         return "Error: No API key provided."
     openai.api_key = openai_api_key
     try:
+        # Open the audio file and transcribe using OpenAI's Whisper model
+        with open(audio_filepath, "rb") as audio_file:
+            audio_file_transcription = openai.Audio.transcribe(file=audio_file, model="whisper-1")
         return audio_file_transcription.text
     except Exception as e:
         return f"Error transcribing audio: {str(e)}"
             pdf_button = gr.Button("Ask")
         with gr.Tab("Voice Chat"):
+            # Record Audio Component for Voice Chat
+            audio_record = gr.Audio(label="Record your Voice", type="filepath", show_label=True)
             # Upload Audio File Component
             audio_upload = gr.File(label="Or Upload an Audio File", type="file", file_types=["audio/wav", "audio/mp3"])
             audio_query = gr.Textbox(label="Ask about the transcription")
             return "Please either record or upload an audio file."
         # Process the audio (either from recording or upload)
+        transcription = transcribe_audio(audio.name, api_key)
         if transcription.startswith("Error"):
             return transcription  # Return transcription error
         return query_openai(