shukdevdatta123 commited on
Commit
9f74220
·
verified ·
1 Parent(s): a169fea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -67
app.py CHANGED
@@ -44,7 +44,7 @@ def image_url_chat(image_url, text_query, temperature, top_p, max_output_tokens)
44
 
45
  messages = [
46
  {"role": "user", "content": [
47
- {"type": "image_url", "image_url": {"url": image_url}},
48
  {"type": "text", "text": text_query}
49
  ]},
50
  ]
@@ -71,7 +71,7 @@ def image_chat(image_file, text_query, temperature, top_p, max_output_tokens):
71
 
72
  messages = [
73
  {"role": "user", "content": [
74
- {"type": "image_url", "image_url": {"url": image_data}},
75
  {"type": "text", "text": text_query}
76
  ]},
77
  ]
@@ -103,38 +103,6 @@ def pdf_chat(pdf_file, text_query, temperature, top_p, max_output_tokens):
103
  except Exception as e:
104
  return f"Error processing the PDF: {str(e)}"
105
 
106
- # Function to process audio file and convert to text
107
- def process_audio(audio_file, query, temperature, top_p, max_output_tokens):
108
- # Modified to handle numpy array or filepath depending on Audio component output
109
- try:
110
- if isinstance(audio_file, tuple): # In case audio is returned as tuple (numpy array, sample rate)
111
- # Convert numpy array to WAV bytes in memory
112
- import numpy as np
113
- import scipy.io.wavfile as wav
114
-
115
- audio_data, sample_rate = audio_file
116
- buffer = io.BytesIO()
117
- wav.write(buffer, sample_rate, audio_data)
118
- buffer.seek(0)
119
- audio_binary = buffer.read()
120
- else: # Filepath
121
- with open(audio_file, "rb") as f:
122
- audio_binary = f.read()
123
-
124
- # Transcribe the audio
125
- transcription = transcribe_audio(audio_binary, api_key)
126
-
127
- # Use the transcription and query to get a response
128
- messages = [
129
- {"role": "user", "content": [
130
- {"type": "text", "text": f"Transcription: {transcription}"},
131
- {"type": "text", "text": f"Query: {query}"}
132
- ]},
133
- ]
134
- return query_openai(messages, temperature, top_p, max_output_tokens)
135
- except Exception as e:
136
- return f"Error processing audio: {str(e)}"
137
-
138
  # Function to transcribe audio to text using OpenAI Whisper API
139
  def transcribe_audio(audio_binary, openai_api_key):
140
  if not openai_api_key:
@@ -153,9 +121,9 @@ def transcribe_audio(audio_binary, openai_api_key):
153
  except Exception as e:
154
  return f"Error transcribing audio: {str(e)}"
155
 
156
- # Function to clear the chat
157
  def clear_chat():
158
- return "", "", "", "", "", "", "", None, "", None, "", None, "", None, "", 1.0, 1.0, 2048
159
 
160
  # Gradio UI Layout
161
  with gr.Blocks() as demo:
@@ -163,7 +131,7 @@ with gr.Blocks() as demo:
163
 
164
  # Accordion for explaining hyperparameters
165
  with gr.Accordion("Hyperparameters", open=False):
166
- gr.Markdown("""
167
  ### Temperature:
168
  Controls the randomness of the model's output. A lower temperature makes the model more deterministic, while a higher temperature makes it more creative and varied.
169
  ### Top-P (Nucleus Sampling):
@@ -172,6 +140,30 @@ with gr.Blocks() as demo:
172
  Limits the number of tokens (words or subwords) the model can generate in its response. You can use this to control the length of the response.
173
  """)
174
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  # API Key Input
176
  with gr.Row():
177
  api_key_input = gr.Textbox(label="Enter OpenAI API Key", type="password")
@@ -181,47 +173,40 @@ with gr.Blocks() as demo:
181
  with gr.Row():
182
  temperature = gr.Slider(0, 2, value=1.0, step=0.1, label="Temperature")
183
  top_p = gr.Slider(0, 1, value=1.0, step=0.1, label="Top-P")
184
- max_output_tokens = gr.Slider(0, 16384, value=2048, step=512, label="Max Output Tokens")
185
 
186
  with gr.Tabs():
187
  with gr.Tab("Image URL Chat"):
188
  image_url = gr.Textbox(label="Enter Image URL")
189
  image_query = gr.Textbox(label="Ask about the Image")
190
  image_url_output = gr.Textbox(label="Response", interactive=False)
191
- image_url_button = gr.Button("Ask", elem_id="ask_button")
192
 
193
  with gr.Tab("Text Chat"):
194
  text_query = gr.Textbox(label="Enter your query")
195
  text_output = gr.Textbox(label="Response", interactive=False)
196
- text_button = gr.Button("Ask", elem_id="ask_button")
197
 
198
  with gr.Tab("Image Chat"):
199
  image_upload = gr.File(label="Upload an Image", type="filepath")
200
  image_text_query = gr.Textbox(label="Ask about the uploaded image")
201
  image_output = gr.Textbox(label="Response", interactive=False)
202
- image_button = gr.Button("Ask", elem_id="ask_button")
203
 
204
  with gr.Tab("PDF Chat"):
205
  pdf_upload = gr.File(label="Upload a PDF", type="filepath")
206
  pdf_text_query = gr.Textbox(label="Ask about the uploaded PDF")
207
  pdf_output = gr.Textbox(label="Response", interactive=False)
208
- pdf_button = gr.Button("Ask", elem_id="ask_button")
209
-
210
- with gr.Tab("Voice Chat (Upload)"):
211
- audio_upload = gr.File(label="Upload an Audio File")
212
  audio_query = gr.Textbox(label="Ask about the transcription")
213
  audio_output = gr.Textbox(label="Response", interactive=False)
214
- audio_button = gr.Button("Ask", elem_id="ask_button")
215
-
216
- with gr.Tab("Voice(Record) Chat"):
217
- # Fix: Changed type to "numpy" which is supported in your Gradio version
218
- audio_record = gr.Audio(label="Record your voice", type="numpy")
219
- audio_record_query = gr.Textbox(label="Ask about the transcription")
220
- audio_record_output = gr.Textbox(label="Response", interactive=False)
221
- audio_record_button = gr.Button("Ask", elem_id="ask_button")
222
 
223
  # Clear chat button
224
- clear_button = gr.Button("Clear Chat", elem_id="clear_chat_button")
225
 
226
  # Button Click Actions
227
  api_key_button.click(set_api_key, inputs=[api_key_input], outputs=[api_key_output])
@@ -230,26 +215,22 @@ with gr.Blocks() as demo:
230
  image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
231
  pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
232
 
233
- # For Voice Chat (Upload)
234
- audio_button.click(process_audio,
235
- [audio_upload, audio_query, temperature, top_p, max_output_tokens],
236
- audio_output)
237
-
238
- # For Voice Chat (Record)
239
- audio_record_button.click(process_audio,
240
- [audio_record, audio_record_query, temperature, top_p, max_output_tokens],
241
- audio_record_output)
242
 
243
- # Clear button resets all necessary fields
244
  clear_button.click(
245
  clear_chat,
246
  outputs=[
247
  image_url, image_query, image_url_output,
248
  text_query, text_output,
249
  image_text_query, image_output,
250
- pdf_upload, pdf_text_query, pdf_output,
251
- audio_upload, audio_query, audio_output,
252
- audio_record, audio_record_query, audio_record_output,
253
  temperature, top_p, max_output_tokens
254
  ]
255
  )
 
44
 
45
  messages = [
46
  {"role": "user", "content": [
47
+ {"type": "image_url", "image_url": {"url": image_url}}, # Corrected format
48
  {"type": "text", "text": text_query}
49
  ]},
50
  ]
 
71
 
72
  messages = [
73
  {"role": "user", "content": [
74
+ {"type": "image_url", "image_url": {"url": image_data}}, # Fixed format
75
  {"type": "text", "text": text_query}
76
  ]},
77
  ]
 
103
  except Exception as e:
104
  return f"Error processing the PDF: {str(e)}"
105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  # Function to transcribe audio to text using OpenAI Whisper API
107
  def transcribe_audio(audio_binary, openai_api_key):
108
  if not openai_api_key:
 
121
  except Exception as e:
122
  return f"Error transcribing audio: {str(e)}"
123
 
124
+ # Function to clear the chat (Fix: Returns the correct number of outputs)
125
  def clear_chat():
126
+ return "", "", "", "", "", "", "", None, "", None, "", 1.0, 1.0, 2048
127
 
128
  # Gradio UI Layout
129
  with gr.Blocks() as demo:
 
131
 
132
  # Accordion for explaining hyperparameters
133
  with gr.Accordion("Hyperparameters", open=False):
134
+ gr.Markdown("""
135
  ### Temperature:
136
  Controls the randomness of the model's output. A lower temperature makes the model more deterministic, while a higher temperature makes it more creative and varied.
137
  ### Top-P (Nucleus Sampling):
 
140
  Limits the number of tokens (words or subwords) the model can generate in its response. You can use this to control the length of the response.
141
  """)
142
 
143
+ gr.HTML("""
144
+ <style>
145
+ #api_key_button {
146
+ margin-top: 27px; /* Add margin-top to the button */
147
+ background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
148
+ }
149
+ #api_key_button:hover {
150
+ background: linear-gradient(135deg, #5b10f1 0%, #9f3ef3 100%); /* Slightly lighter */
151
+ }
152
+ #clear_chat_button {
153
+ background: linear-gradient(135deg, #e53e3e 0%, #f56565 100%); /* Red gradient */
154
+ }
155
+ #clear_chat_button:hover {
156
+ background: linear-gradient(135deg, #c53030 0%, #e53e3e 100%); /* Slightly darker red gradient on hover */
157
+ }
158
+ #ask_button {
159
+ background: linear-gradient(135deg, #fbd38d 0%, #f6e05e 100%); /* Yellow gradient */
160
+ }
161
+ #ask_button:hover {
162
+ background: linear-gradient(135deg, #ecc94b 0%, #fbd38d 100%); /* Slightly darker yellow gradient on hover */
163
+ }
164
+ </style>
165
+ """)
166
+
167
  # API Key Input
168
  with gr.Row():
169
  api_key_input = gr.Textbox(label="Enter OpenAI API Key", type="password")
 
173
  with gr.Row():
174
  temperature = gr.Slider(0, 2, value=1.0, step=0.1, label="Temperature")
175
  top_p = gr.Slider(0, 1, value=1.0, step=0.1, label="Top-P")
176
+ max_output_tokens = gr.Slider(0, 16384, value=2048, step=512, label="Max Output Tokens") # Changed default to 2048
177
 
178
  with gr.Tabs():
179
  with gr.Tab("Image URL Chat"):
180
  image_url = gr.Textbox(label="Enter Image URL")
181
  image_query = gr.Textbox(label="Ask about the Image")
182
  image_url_output = gr.Textbox(label="Response", interactive=False)
183
+ image_url_button = gr.Button("Ask",elem_id="ask_button")
184
 
185
  with gr.Tab("Text Chat"):
186
  text_query = gr.Textbox(label="Enter your query")
187
  text_output = gr.Textbox(label="Response", interactive=False)
188
+ text_button = gr.Button("Ask",elem_id="ask_button")
189
 
190
  with gr.Tab("Image Chat"):
191
  image_upload = gr.File(label="Upload an Image", type="filepath")
192
  image_text_query = gr.Textbox(label="Ask about the uploaded image")
193
  image_output = gr.Textbox(label="Response", interactive=False)
194
+ image_button = gr.Button("Ask",elem_id="ask_button")
195
 
196
  with gr.Tab("PDF Chat"):
197
  pdf_upload = gr.File(label="Upload a PDF", type="filepath")
198
  pdf_text_query = gr.Textbox(label="Ask about the uploaded PDF")
199
  pdf_output = gr.Textbox(label="Response", interactive=False)
200
+ pdf_button = gr.Button("Ask",elem_id="ask_button")
201
+
202
+ with gr.Tab("Voice Chat"):
203
+ audio_upload = gr.File(label="Upload an Audio File", type="binary")
204
  audio_query = gr.Textbox(label="Ask about the transcription")
205
  audio_output = gr.Textbox(label="Response", interactive=False)
206
+ audio_button = gr.Button("Ask",elem_id="ask_button")
 
 
 
 
 
 
 
207
 
208
  # Clear chat button
209
+ clear_button = gr.Button("Clear Chat",elem_id="clear_chat_button")
210
 
211
  # Button Click Actions
212
  api_key_button.click(set_api_key, inputs=[api_key_input], outputs=[api_key_output])
 
215
  image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
216
  pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
217
 
218
+ # For Voice Chat
219
+ audio_button.click(
220
+ lambda audio_binary, query, temperature, top_p, max_output_tokens: query_openai(
221
+ [{"role": "user", "content": [{"type": "text", "text": transcribe_audio(audio_binary, api_key)}, {"type": "text", "text": query}]}],
222
+ temperature, top_p, max_output_tokens
223
+ ), [audio_upload, audio_query, temperature, top_p, max_output_tokens], audio_output
224
+ )
 
 
225
 
226
+ # Fix: Clear button resets all necessary fields correctly
227
  clear_button.click(
228
  clear_chat,
229
  outputs=[
230
  image_url, image_query, image_url_output,
231
  text_query, text_output,
232
  image_text_query, image_output,
233
+ pdf_upload, pdf_text_query, pdf_output,
 
 
234
  temperature, top_p, max_output_tokens
235
  ]
236
  )