Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,9 @@ import gradio as gr
|
|
2 |
import openai
|
3 |
import fitz # PyMuPDF for PDF processing
|
4 |
import base64
|
|
|
|
|
|
|
5 |
|
6 |
# Variable to store API key
|
7 |
api_key = ""
|
@@ -45,7 +48,7 @@ def image_url_chat(image_url, text_query, temperature, top_p, max_output_tokens)
|
|
45 |
{"role": "user", "content": [
|
46 |
{"type": "image_url", "image_url": {"url": image_url}}, # Corrected format
|
47 |
{"type": "text", "text": text_query}
|
48 |
-
]}
|
49 |
]
|
50 |
return query_openai(messages, temperature, top_p, max_output_tokens)
|
51 |
|
@@ -72,7 +75,7 @@ def image_chat(image_file, text_query, temperature, top_p, max_output_tokens):
|
|
72 |
{"role": "user", "content": [
|
73 |
{"type": "image_url", "image_url": {"url": image_data}}, # Fixed format
|
74 |
{"type": "text", "text": text_query}
|
75 |
-
]}
|
76 |
]
|
77 |
return query_openai(messages, temperature, top_p, max_output_tokens)
|
78 |
|
@@ -89,10 +92,33 @@ def pdf_chat(pdf_file, text_query, temperature, top_p, max_output_tokens):
|
|
89 |
{"role": "user", "content": [
|
90 |
{"type": "text", "text": text}, # Fixed format
|
91 |
{"type": "text", "text": text_query}
|
92 |
-
]}
|
93 |
]
|
94 |
return query_openai(messages, temperature, top_p, max_output_tokens)
|
95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
# Function to clear the chat (Fix: Returns the correct number of outputs)
|
97 |
def clear_chat():
|
98 |
return "", "", "", "", "", "", "", None, "", None, "", 1.0, 1.0, 2048
|
@@ -144,6 +170,12 @@ with gr.Blocks() as demo:
|
|
144 |
pdf_output = gr.Textbox(label="Response", interactive=False)
|
145 |
pdf_button = gr.Button("Ask")
|
146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
# Clear chat button
|
148 |
clear_button = gr.Button("Clear Chat")
|
149 |
|
@@ -153,6 +185,7 @@ with gr.Blocks() as demo:
|
|
153 |
text_button.click(text_chat, [text_query, temperature, top_p, max_output_tokens], text_output)
|
154 |
image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
|
155 |
pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
|
|
|
156 |
|
157 |
# Fix: Clear button resets all necessary fields correctly
|
158 |
clear_button.click(
|
|
|
2 |
import openai
|
3 |
import fitz # PyMuPDF for PDF processing
|
4 |
import base64
|
5 |
+
import openai
|
6 |
+
import soundfile as sf
|
7 |
+
import io
|
8 |
|
9 |
# Variable to store API key
|
10 |
api_key = ""
|
|
|
48 |
{"role": "user", "content": [
|
49 |
{"type": "image_url", "image_url": {"url": image_url}}, # Corrected format
|
50 |
{"type": "text", "text": text_query}
|
51 |
+
]},
|
52 |
]
|
53 |
return query_openai(messages, temperature, top_p, max_output_tokens)
|
54 |
|
|
|
75 |
{"role": "user", "content": [
|
76 |
{"type": "image_url", "image_url": {"url": image_data}}, # Fixed format
|
77 |
{"type": "text", "text": text_query}
|
78 |
+
]},
|
79 |
]
|
80 |
return query_openai(messages, temperature, top_p, max_output_tokens)
|
81 |
|
|
|
92 |
{"role": "user", "content": [
|
93 |
{"type": "text", "text": text}, # Fixed format
|
94 |
{"type": "text", "text": text_query}
|
95 |
+
]},
|
96 |
]
|
97 |
return query_openai(messages, temperature, top_p, max_output_tokens)
|
98 |
|
99 |
+
# Function to process uploaded audio and transcribe to text
|
100 |
+
def transcribe_audio(audio_file):
|
101 |
+
if audio_file is None:
|
102 |
+
return "Please upload an audio file."
|
103 |
+
|
104 |
+
try:
|
105 |
+
# Load the audio file
|
106 |
+
audio_data, samplerate = sf.read(audio_file.name)
|
107 |
+
audio_buffer = io.BytesIO()
|
108 |
+
sf.write(audio_buffer, audio_data, samplerate, format='WAV')
|
109 |
+
audio_buffer.seek(0)
|
110 |
+
|
111 |
+
# Transcribe the audio using OpenAI's Whisper API
|
112 |
+
transcript = openai.Audio.transcribe(
|
113 |
+
model="whisper-1",
|
114 |
+
file=audio_buffer
|
115 |
+
)
|
116 |
+
|
117 |
+
return transcript["text"]
|
118 |
+
|
119 |
+
except Exception as e:
|
120 |
+
return f"Error in transcription: {str(e)}"
|
121 |
+
|
122 |
# Function to clear the chat (Fix: Returns the correct number of outputs)
|
123 |
def clear_chat():
|
124 |
return "", "", "", "", "", "", "", None, "", None, "", 1.0, 1.0, 2048
|
|
|
170 |
pdf_output = gr.Textbox(label="Response", interactive=False)
|
171 |
pdf_button = gr.Button("Ask")
|
172 |
|
173 |
+
with gr.Tab("Voice Chat"):
|
174 |
+
audio_upload = gr.File(label="Upload an Audio File", type="file")
|
175 |
+
audio_query = gr.Textbox(label="Ask about the transcription")
|
176 |
+
audio_output = gr.Textbox(label="Response", interactive=False)
|
177 |
+
audio_button = gr.Button("Ask")
|
178 |
+
|
179 |
# Clear chat button
|
180 |
clear_button = gr.Button("Clear Chat")
|
181 |
|
|
|
185 |
text_button.click(text_chat, [text_query, temperature, top_p, max_output_tokens], text_output)
|
186 |
image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
|
187 |
pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
|
188 |
+
audio_button.click(lambda audio, query, temperature, top_p, max_output_tokens: query_openai([{"role": "user", "content": [{"type": "text", "text": transcribe_audio(audio)}, {"type": "text", "text": query}]}], temperature, top_p, max_output_tokens), [audio_upload, audio_query, temperature, top_p, max_output_tokens], audio_output)
|
189 |
|
190 |
# Fix: Clear button resets all necessary fields correctly
|
191 |
clear_button.click(
|