Update app.py
Browse files
app.py
CHANGED
@@ -103,6 +103,38 @@ def pdf_chat(pdf_file, text_query, temperature, top_p, max_output_tokens):
|
|
103 |
except Exception as e:
|
104 |
return f"Error processing the PDF: {str(e)}"
|
105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
# Function to transcribe audio to text using OpenAI Whisper API
|
107 |
def transcribe_audio(audio_binary, openai_api_key):
|
108 |
if not openai_api_key:
|
@@ -121,9 +153,9 @@ def transcribe_audio(audio_binary, openai_api_key):
|
|
121 |
except Exception as e:
|
122 |
return f"Error transcribing audio: {str(e)}"
|
123 |
|
124 |
-
# Function to clear the chat
|
125 |
def clear_chat():
|
126 |
-
return "", "", "", "", "", "", "", None, "", None, "", 1.0, 1.0, 2048
|
127 |
|
128 |
# Gradio UI Layout
|
129 |
with gr.Blocks() as demo:
|
@@ -176,14 +208,14 @@ with gr.Blocks() as demo:
|
|
176 |
pdf_button = gr.Button("Ask", elem_id="ask_button")
|
177 |
|
178 |
with gr.Tab("Voice Chat (Upload)"):
|
179 |
-
audio_upload = gr.File(label="Upload an Audio File"
|
180 |
audio_query = gr.Textbox(label="Ask about the transcription")
|
181 |
audio_output = gr.Textbox(label="Response", interactive=False)
|
182 |
audio_button = gr.Button("Ask", elem_id="ask_button")
|
183 |
|
184 |
with gr.Tab("Voice(Record) Chat"):
|
185 |
-
# Fix: Changed
|
186 |
-
audio_record = gr.Audio(label="Record your voice", type="
|
187 |
audio_record_query = gr.Textbox(label="Ask about the transcription")
|
188 |
audio_record_output = gr.Textbox(label="Response", interactive=False)
|
189 |
audio_record_button = gr.Button("Ask", elem_id="ask_button")
|
@@ -199,29 +231,25 @@ with gr.Blocks() as demo:
|
|
199 |
pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
|
200 |
|
201 |
# For Voice Chat (Upload)
|
202 |
-
audio_button.click(
|
203 |
-
|
204 |
-
|
205 |
-
temperature, top_p, max_output_tokens
|
206 |
-
), [audio_upload, audio_query, temperature, top_p, max_output_tokens], audio_output
|
207 |
-
)
|
208 |
|
209 |
# For Voice Chat (Record)
|
210 |
-
audio_record_button.click(
|
211 |
-
|
212 |
-
|
213 |
-
temperature, top_p, max_output_tokens
|
214 |
-
), [audio_record, audio_record_query, temperature, top_p, max_output_tokens], audio_record_output
|
215 |
-
)
|
216 |
|
217 |
-
#
|
218 |
clear_button.click(
|
219 |
clear_chat,
|
220 |
outputs=[
|
221 |
image_url, image_query, image_url_output,
|
222 |
text_query, text_output,
|
223 |
image_text_query, image_output,
|
224 |
-
pdf_upload, pdf_text_query, pdf_output,
|
|
|
|
|
225 |
temperature, top_p, max_output_tokens
|
226 |
]
|
227 |
)
|
|
|
103 |
except Exception as e:
|
104 |
return f"Error processing the PDF: {str(e)}"
|
105 |
|
106 |
+
# Function to process audio file and convert to text
|
107 |
+
def process_audio(audio_file, query, temperature, top_p, max_output_tokens):
|
108 |
+
# Modified to handle numpy array or filepath depending on Audio component output
|
109 |
+
try:
|
110 |
+
if isinstance(audio_file, tuple): # In case audio is returned as tuple (numpy array, sample rate)
|
111 |
+
# Convert numpy array to WAV bytes in memory
|
112 |
+
import numpy as np
|
113 |
+
import scipy.io.wavfile as wav
|
114 |
+
|
115 |
+
audio_data, sample_rate = audio_file
|
116 |
+
buffer = io.BytesIO()
|
117 |
+
wav.write(buffer, sample_rate, audio_data)
|
118 |
+
buffer.seek(0)
|
119 |
+
audio_binary = buffer.read()
|
120 |
+
else: # Filepath
|
121 |
+
with open(audio_file, "rb") as f:
|
122 |
+
audio_binary = f.read()
|
123 |
+
|
124 |
+
# Transcribe the audio
|
125 |
+
transcription = transcribe_audio(audio_binary, api_key)
|
126 |
+
|
127 |
+
# Use the transcription and query to get a response
|
128 |
+
messages = [
|
129 |
+
{"role": "user", "content": [
|
130 |
+
{"type": "text", "text": f"Transcription: {transcription}"},
|
131 |
+
{"type": "text", "text": f"Query: {query}"}
|
132 |
+
]},
|
133 |
+
]
|
134 |
+
return query_openai(messages, temperature, top_p, max_output_tokens)
|
135 |
+
except Exception as e:
|
136 |
+
return f"Error processing audio: {str(e)}"
|
137 |
+
|
138 |
# Function to transcribe audio to text using OpenAI Whisper API
|
139 |
def transcribe_audio(audio_binary, openai_api_key):
|
140 |
if not openai_api_key:
|
|
|
153 |
except Exception as e:
|
154 |
return f"Error transcribing audio: {str(e)}"
|
155 |
|
156 |
+
# Function to clear the chat
|
157 |
def clear_chat():
|
158 |
+
return "", "", "", "", "", "", "", None, "", None, "", None, "", None, "", 1.0, 1.0, 2048
|
159 |
|
160 |
# Gradio UI Layout
|
161 |
with gr.Blocks() as demo:
|
|
|
208 |
pdf_button = gr.Button("Ask", elem_id="ask_button")
|
209 |
|
210 |
with gr.Tab("Voice Chat (Upload)"):
|
211 |
+
audio_upload = gr.File(label="Upload an Audio File")
|
212 |
audio_query = gr.Textbox(label="Ask about the transcription")
|
213 |
audio_output = gr.Textbox(label="Response", interactive=False)
|
214 |
audio_button = gr.Button("Ask", elem_id="ask_button")
|
215 |
|
216 |
with gr.Tab("Voice(Record) Chat"):
|
217 |
+
# Fix: Changed type to "numpy" which is supported in your Gradio version
|
218 |
+
audio_record = gr.Audio(label="Record your voice", type="numpy")
|
219 |
audio_record_query = gr.Textbox(label="Ask about the transcription")
|
220 |
audio_record_output = gr.Textbox(label="Response", interactive=False)
|
221 |
audio_record_button = gr.Button("Ask", elem_id="ask_button")
|
|
|
231 |
pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
|
232 |
|
233 |
# For Voice Chat (Upload)
|
234 |
+
audio_button.click(process_audio,
|
235 |
+
[audio_upload, audio_query, temperature, top_p, max_output_tokens],
|
236 |
+
audio_output)
|
|
|
|
|
|
|
237 |
|
238 |
# For Voice Chat (Record)
|
239 |
+
audio_record_button.click(process_audio,
|
240 |
+
[audio_record, audio_record_query, temperature, top_p, max_output_tokens],
|
241 |
+
audio_record_output)
|
|
|
|
|
|
|
242 |
|
243 |
+
# Clear button resets all necessary fields
|
244 |
clear_button.click(
|
245 |
clear_chat,
|
246 |
outputs=[
|
247 |
image_url, image_query, image_url_output,
|
248 |
text_query, text_output,
|
249 |
image_text_query, image_output,
|
250 |
+
pdf_upload, pdf_text_query, pdf_output,
|
251 |
+
audio_upload, audio_query, audio_output,
|
252 |
+
audio_record, audio_record_query, audio_record_output,
|
253 |
temperature, top_p, max_output_tokens
|
254 |
]
|
255 |
)
|