Update app.py
Browse files
app.py
CHANGED
@@ -2,7 +2,6 @@ import gradio as gr
|
|
2 |
import openai
|
3 |
import fitz # PyMuPDF for PDF processing
|
4 |
import base64
|
5 |
-
import soundfile as sf
|
6 |
import io
|
7 |
|
8 |
# Variable to store API key
|
@@ -95,28 +94,27 @@ def pdf_chat(pdf_file, text_query, temperature, top_p, max_output_tokens):
|
|
95 |
]
|
96 |
return query_openai(messages, temperature, top_p, max_output_tokens)
|
97 |
|
98 |
-
# Function to
|
99 |
-
def transcribe_audio(
|
100 |
-
if
|
101 |
-
return "
|
|
|
|
|
102 |
|
103 |
try:
|
104 |
-
#
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
#
|
111 |
-
transcript = openai.Audio.transcribe(
|
112 |
-
model="whisper-1",
|
113 |
-
file=audio_buffer
|
114 |
-
)
|
115 |
-
|
116 |
-
return transcript["text"]
|
117 |
|
|
|
|
|
|
|
118 |
except Exception as e:
|
119 |
-
return f"Error
|
120 |
|
121 |
# Function to clear the chat (Fix: Returns the correct number of outputs)
|
122 |
def clear_chat():
|
@@ -184,7 +182,14 @@ with gr.Blocks() as demo:
|
|
184 |
text_button.click(text_chat, [text_query, temperature, top_p, max_output_tokens], text_output)
|
185 |
image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
|
186 |
pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
|
187 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
|
189 |
# Fix: Clear button resets all necessary fields correctly
|
190 |
clear_button.click(
|
|
|
2 |
import openai
|
3 |
import fitz # PyMuPDF for PDF processing
|
4 |
import base64
|
|
|
5 |
import io
|
6 |
|
7 |
# Variable to store API key
|
|
|
94 |
]
|
95 |
return query_openai(messages, temperature, top_p, max_output_tokens)
|
96 |
|
97 |
+
# Function to transcribe audio to text using OpenAI Whisper API
|
98 |
+
def transcribe_audio(audio, openai_api_key):
|
99 |
+
if not openai_api_key:
|
100 |
+
return "Error: No API key provided."
|
101 |
+
|
102 |
+
openai.api_key = openai_api_key
|
103 |
|
104 |
try:
|
105 |
+
# Open the audio file and pass it as a file object
|
106 |
+
with open(audio, 'rb') as audio_file:
|
107 |
+
audio_file_content = audio_file.read()
|
108 |
+
|
109 |
+
# Use the correct transcription API call
|
110 |
+
audio_file_obj = io.BytesIO(audio_file_content)
|
111 |
+
audio_file_obj.name = 'audio.wav' # Set a name for the file object (as OpenAI expects it)
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
|
113 |
+
# Transcribe the audio to text using OpenAI's whisper model
|
114 |
+
audio_file_transcription = openai.Audio.transcribe(file=audio_file_obj, model="whisper-1")
|
115 |
+
return audio_file_transcription.text
|
116 |
except Exception as e:
|
117 |
+
return f"Error transcribing audio: {str(e)}"
|
118 |
|
119 |
# Function to clear the chat (Fix: Returns the correct number of outputs)
|
120 |
def clear_chat():
|
|
|
182 |
text_button.click(text_chat, [text_query, temperature, top_p, max_output_tokens], text_output)
|
183 |
image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
|
184 |
pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
|
185 |
+
|
186 |
+
# For Voice Chat
|
187 |
+
audio_button.click(
|
188 |
+
lambda audio, query, temperature, top_p, max_output_tokens: query_openai(
|
189 |
+
[{"role": "user", "content": [{"type": "text", "text": transcribe_audio(audio.name, api_key)}, {"type": "text", "text": query}]}],
|
190 |
+
temperature, top_p, max_output_tokens
|
191 |
+
), [audio_upload, audio_query, temperature, top_p, max_output_tokens], audio_output
|
192 |
+
)
|
193 |
|
194 |
# Fix: Clear button resets all necessary fields correctly
|
195 |
clear_button.click(
|