shukdevdatta123 commited on
Commit
ec3a27a
Β·
verified Β·
1 Parent(s): 92fc24f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -150
app.py CHANGED
@@ -3,6 +3,8 @@ import openai
3
  import fitz # PyMuPDF for PDF processing
4
  import base64
5
  import io
 
 
6
 
7
  # Variable to store API key
8
  api_key = ""
@@ -19,217 +21,148 @@ def query_openai(messages, temperature, top_p, max_output_tokens):
19
  return "Please enter your OpenAI API key first."
20
 
21
  try:
22
- openai.api_key = api_key # Set API key dynamically
23
-
24
- # Ensure numeric values for OpenAI parameters
25
- temperature = float(temperature) if temperature else 1.0
26
- top_p = float(top_p) if top_p else 1.0
27
- max_output_tokens = int(max_output_tokens) if max_output_tokens else 2048
28
 
29
  response = openai.ChatCompletion.create(
30
  model="gpt-4.5-preview",
31
  messages=messages,
32
- temperature=temperature,
33
- top_p=top_p,
34
- max_tokens=max_output_tokens
35
  )
36
  return response["choices"][0]["message"]["content"]
37
  except Exception as e:
38
  return f"Error: {str(e)}"
39
 
40
- # Function to process image URL input
41
- def image_url_chat(image_url, text_query, temperature, top_p, max_output_tokens):
42
- if not image_url or not text_query:
43
- return "Please provide an image URL and a query."
44
-
45
- messages = [
46
- {"role": "user", "content": [
47
- {"type": "image_url", "image_url": {"url": image_url}}, # Corrected format
48
- {"type": "text", "text": text_query}
49
- ]},
50
- ]
51
- return query_openai(messages, temperature, top_p, max_output_tokens)
52
-
53
- # Function to process text input
54
- def text_chat(text_query, temperature, top_p, max_output_tokens):
55
- if not text_query:
56
- return "Please enter a query."
57
-
58
- messages = [{"role": "user", "content": [{"type": "text", "text": text_query}]}]
59
- return query_openai(messages, temperature, top_p, max_output_tokens)
60
-
61
- # Function to process uploaded image input
62
- def image_chat(image_file, text_query, temperature, top_p, max_output_tokens):
63
- if image_file is None or not text_query:
64
- return "Please upload an image and provide a query."
65
-
66
- # Encode image as base64
67
- with open(image_file, "rb") as img:
68
- base64_image = base64.b64encode(img.read()).decode("utf-8")
69
-
70
- image_data = f"data:image/jpeg;base64,{base64_image}"
71
-
72
- messages = [
73
- {"role": "user", "content": [
74
- {"type": "image_url", "image_url": {"url": image_data}}, # Fixed format
75
- {"type": "text", "text": text_query}
76
- ]},
77
- ]
78
- return query_openai(messages, temperature, top_p, max_output_tokens)
79
-
80
- # Function to process uploaded PDF input
81
- def pdf_chat(pdf_file, text_query, temperature, top_p, max_output_tokens):
82
- if pdf_file is None or not text_query:
83
- return "Please upload a PDF and provide a query."
84
-
85
- try:
86
- # Extract text from all pages of the PDF
87
- doc = fitz.open(pdf_file.name)
88
- text = "\n".join([page.get_text("text") for page in doc]) # Extract text from all pages
89
-
90
- # If no text found, return an error
91
- if not text.strip():
92
- return "No text found in the PDF."
93
-
94
- # Create the query message with the extracted text and the user's query
95
- messages = [
96
- {"role": "user", "content": [
97
- {"type": "text", "text": text}, # The extracted text from the PDF
98
- {"type": "text", "text": text_query}
99
- ]},
100
- ]
101
- return query_openai(messages, temperature, top_p, max_output_tokens)
102
-
103
- except Exception as e:
104
- return f"Error processing the PDF: {str(e)}"
105
-
106
- # Function to transcribe audio to text using OpenAI Whisper API
107
- def transcribe_audio(audio_binary, openai_api_key):
108
- if not openai_api_key:
109
  return "Error: No API key provided."
110
 
111
- openai.api_key = openai_api_key
112
 
113
  try:
114
- # Use the correct transcription API call
115
- audio_file_obj = io.BytesIO(audio_binary)
116
- audio_file_obj.name = 'audio.wav' # Set a name for the file object (as OpenAI expects it)
117
-
118
- # Transcribe the audio to text using OpenAI's whisper model
119
- audio_file_transcription = openai.Audio.transcribe(file=audio_file_obj, model="whisper-1")
120
- return audio_file_transcription.text
 
 
 
 
 
121
  except Exception as e:
122
  return f"Error transcribing audio: {str(e)}"
123
 
124
- # Function to clear the chat (Fix: Returns the correct number of outputs)
125
  def clear_chat():
126
- return "", "", "", "", "", "", "", None, "", None, "", 1.0, 1.0, 2048
127
 
128
  # Gradio UI Layout
129
  with gr.Blocks() as demo:
130
- gr.Markdown("## GPT-4.5 Preview Chatbot")
131
-
132
- # Accordion for explaining hyperparameters
133
- with gr.Accordion("Hyperparameters", open=False):
134
- gr.Markdown("""
135
- ### Temperature:
136
- Controls the randomness of the model's output. A lower temperature makes the model more deterministic, while a higher temperature makes it more creative and varied.
137
-
138
- ### Top-P (Nucleus Sampling):
139
- Controls the cumulative probability distribution from which the model picks the next word. A lower value makes the model more focused and deterministic, while a higher value increases randomness.
140
-
141
- ### Max Output Tokens:
142
- Limits the number of tokens (words or subwords) the model can generate in its response. You can use this to control the length of the response.
143
- """)
144
 
 
145
  gr.HTML("""
146
  <style>
147
  #api_key_button {
148
- margin-top: 27px; /* Add margin-top to the button */
149
- background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
 
 
 
150
  }
151
  #api_key_button:hover {
152
- background: linear-gradient(135deg, #5b10f1 0%, #9f3ef3 100%); /* Slightly lighter */
153
  }
154
  #clear_chat_button {
155
- background: linear-gradient(135deg, #e53e3e 0%, #f56565 100%); /* Red gradient */
 
 
 
156
  }
157
  #clear_chat_button:hover {
158
- background: linear-gradient(135deg, #c53030 0%, #e53e3e 100%); /* Slightly darker red gradient on hover */
159
  }
160
  </style>
161
  """)
162
-
163
  # API Key Input
164
  with gr.Row():
165
  api_key_input = gr.Textbox(label="Enter OpenAI API Key", type="password")
166
  api_key_button = gr.Button("Set API Key", elem_id="api_key_button")
167
  api_key_output = gr.Textbox(label="API Key Status", interactive=False)
168
 
169
- with gr.Row():
170
- temperature = gr.Slider(0, 2, value=1.0, step=0.1, label="Temperature")
171
- top_p = gr.Slider(0, 1, value=1.0, step=0.1, label="Top-P")
172
- max_output_tokens = gr.Slider(0, 16384, value=2048, step=512, label="Max Output Tokens") # Changed default to 2048
173
-
 
 
 
 
 
 
 
174
  with gr.Tabs():
175
- with gr.Tab("Image URL Chat"):
 
 
 
 
 
176
  image_url = gr.Textbox(label="Enter Image URL")
177
  image_query = gr.Textbox(label="Ask about the Image")
178
  image_url_output = gr.Textbox(label="Response", interactive=False)
179
  image_url_button = gr.Button("Ask")
180
-
181
- with gr.Tab("Text Chat"):
182
- text_query = gr.Textbox(label="Enter your query")
183
- text_output = gr.Textbox(label="Response", interactive=False)
184
- text_button = gr.Button("Ask")
185
-
186
- with gr.Tab("Image Chat"):
187
  image_upload = gr.File(label="Upload an Image", type="filepath")
188
  image_text_query = gr.Textbox(label="Ask about the uploaded image")
189
  image_output = gr.Textbox(label="Response", interactive=False)
190
  image_button = gr.Button("Ask")
191
-
192
- with gr.Tab("PDF Chat"):
193
  pdf_upload = gr.File(label="Upload a PDF", type="filepath")
194
  pdf_text_query = gr.Textbox(label="Ask about the uploaded PDF")
195
  pdf_output = gr.Textbox(label="Response", interactive=False)
196
  pdf_button = gr.Button("Ask")
197
 
198
- with gr.Tab("Voice Chat"):
199
- audio_upload = gr.File(label="Upload an Audio File", type="binary")
200
- audio_query = gr.Textbox(label="Ask about the transcription")
 
201
  audio_output = gr.Textbox(label="Response", interactive=False)
202
  audio_button = gr.Button("Ask")
203
 
204
  # Clear chat button
205
- clear_button = gr.Button("Clear Chat",elem_id="clear_chat_button")
206
 
207
  # Button Click Actions
208
  api_key_button.click(set_api_key, inputs=[api_key_input], outputs=[api_key_output])
209
- image_url_button.click(image_url_chat, [image_url, image_query, temperature, top_p, max_output_tokens], image_url_output)
210
- text_button.click(text_chat, [text_query, temperature, top_p, max_output_tokens], text_output)
211
- image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
212
- pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
213
 
214
- # For Voice Chat
215
- audio_button.click(
216
- lambda audio_binary, query, temperature, top_p, max_output_tokens: query_openai(
217
- [{"role": "user", "content": [{"type": "text", "text": transcribe_audio(audio_binary, api_key)}, {"type": "text", "text": query}]}],
218
- temperature, top_p, max_output_tokens
219
- ), [audio_upload, audio_query, temperature, top_p, max_output_tokens], audio_output
220
- )
221
-
222
- # Fix: Clear button resets all necessary fields correctly
223
- clear_button.click(
224
- clear_chat,
225
- outputs=[
226
- image_url, image_query, image_url_output,
227
- text_query, text_output,
228
- image_text_query, image_output,
229
- pdf_upload, pdf_text_query, pdf_output,
230
- temperature, top_p, max_output_tokens
231
- ]
232
- )
233
 
234
  # Launch Gradio App
235
  if __name__ == "__main__":
 
3
  import fitz # PyMuPDF for PDF processing
4
  import base64
5
  import io
6
+ import numpy as np
7
+ import soundfile as sf
8
 
9
  # Variable to store API key
10
  api_key = ""
 
21
  return "Please enter your OpenAI API key first."
22
 
23
  try:
24
+ openai.api_key = api_key
 
 
 
 
 
25
 
26
  response = openai.ChatCompletion.create(
27
  model="gpt-4.5-preview",
28
  messages=messages,
29
+ temperature=float(temperature),
30
+ top_p=float(top_p),
31
+ max_tokens=int(max_output_tokens)
32
  )
33
  return response["choices"][0]["message"]["content"]
34
  except Exception as e:
35
  return f"Error: {str(e)}"
36
 
37
+ # Function to transcribe audio
38
+ def transcribe_audio(audio_input):
39
+ if not api_key:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  return "Error: No API key provided."
41
 
42
+ openai.api_key = api_key
43
 
44
  try:
45
+ if isinstance(audio_input, np.ndarray):
46
+ wav_io = io.BytesIO()
47
+ sf.write(wav_io, audio_input, samplerate=16000, format="WAV")
48
+ wav_io.seek(0)
49
+ audio_file_obj = wav_io
50
+ audio_file_obj.name = "recorded_audio.wav"
51
+ else:
52
+ audio_file_obj = io.BytesIO(audio_input)
53
+ audio_file_obj.name = "uploaded_audio.wav"
54
+
55
+ transcription = openai.Audio.transcribe(file=audio_file_obj, model="whisper-1")
56
+ return transcription["text"]
57
  except Exception as e:
58
  return f"Error transcribing audio: {str(e)}"
59
 
60
+ # Function to clear chat
61
  def clear_chat():
62
+ return "", "", "", "", "", "", "", None, "", None, "", None, "", 1.0, 1.0, 2048
63
 
64
  # Gradio UI Layout
65
  with gr.Blocks() as demo:
66
+ gr.Markdown("## πŸ”₯ GPT-4.5 AI Chatbot: Text, Image, PDF, & Voice Support")
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
+ # Custom CSS for buttons
69
  gr.HTML("""
70
  <style>
71
  #api_key_button {
72
+ margin-top: 27px;
73
+ background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%);
74
+ color: white;
75
+ font-weight: bold;
76
+ border-radius: 5px;
77
  }
78
  #api_key_button:hover {
79
+ background: linear-gradient(135deg, #5b10f1 0%, #9f3ef3 100%);
80
  }
81
  #clear_chat_button {
82
+ background: linear-gradient(135deg, #e53e3e 0%, #f56565 100%);
83
+ color: white;
84
+ font-weight: bold;
85
+ border-radius: 5px;
86
  }
87
  #clear_chat_button:hover {
88
+ background: linear-gradient(135deg, #c53030 0%, #e53e3e 100%);
89
  }
90
  </style>
91
  """)
92
+
93
  # API Key Input
94
  with gr.Row():
95
  api_key_input = gr.Textbox(label="Enter OpenAI API Key", type="password")
96
  api_key_button = gr.Button("Set API Key", elem_id="api_key_button")
97
  api_key_output = gr.Textbox(label="API Key Status", interactive=False)
98
 
99
+ # Accordion for Hyperparameters
100
+ with gr.Accordion("πŸ”§ Advanced Settings (Hyperparameters)", open=False):
101
+ gr.Markdown("""
102
+ - **Temperature**: Controls randomness. Lower values make responses more predictable.
103
+ - **Top-P (Nucleus Sampling)**: Determines how many top probable words can be chosen.
104
+ - **Max Output Tokens**: Limits the length of the response.
105
+ """)
106
+ with gr.Row():
107
+ temperature = gr.Slider(0, 2, value=1.0, step=0.1, label="Temperature")
108
+ top_p = gr.Slider(0, 1, value=1.0, step=0.1, label="Top-P")
109
+ max_output_tokens = gr.Slider(0, 16384, value=2048, step=512, label="Max Output Tokens")
110
+
111
  with gr.Tabs():
112
+ with gr.Tab("πŸ’¬ Text Chat"):
113
+ text_query = gr.Textbox(label="Enter your query")
114
+ text_output = gr.Textbox(label="Response", interactive=False)
115
+ text_button = gr.Button("Ask")
116
+
117
+ with gr.Tab("πŸ–ΌοΈ Image URL Chat"):
118
  image_url = gr.Textbox(label="Enter Image URL")
119
  image_query = gr.Textbox(label="Ask about the Image")
120
  image_url_output = gr.Textbox(label="Response", interactive=False)
121
  image_url_button = gr.Button("Ask")
122
+
123
+ with gr.Tab("πŸ“Έ Image Upload Chat"):
 
 
 
 
 
124
  image_upload = gr.File(label="Upload an Image", type="filepath")
125
  image_text_query = gr.Textbox(label="Ask about the uploaded image")
126
  image_output = gr.Textbox(label="Response", interactive=False)
127
  image_button = gr.Button("Ask")
128
+
129
+ with gr.Tab("πŸ“„ PDF Chat"):
130
  pdf_upload = gr.File(label="Upload a PDF", type="filepath")
131
  pdf_text_query = gr.Textbox(label="Ask about the uploaded PDF")
132
  pdf_output = gr.Textbox(label="Response", interactive=False)
133
  pdf_button = gr.Button("Ask")
134
 
135
+ with gr.Tab("🎀 Voice Chat"):
136
+ audio_record = gr.Audio(source="microphone", type="numpy", label="πŸŽ™οΈ Record Audio")
137
+ audio_upload = gr.File(label="πŸ“‚ Upload an Audio File", type="binary")
138
+ audio_query = gr.Textbox(label="Ask a question about the transcription")
139
  audio_output = gr.Textbox(label="Response", interactive=False)
140
  audio_button = gr.Button("Ask")
141
 
142
  # Clear chat button
143
+ clear_button = gr.Button("🧹 Clear Chat", elem_id="clear_chat_button")
144
 
145
  # Button Click Actions
146
  api_key_button.click(set_api_key, inputs=[api_key_input], outputs=[api_key_output])
147
+ text_button.click(lambda q, t, p, m: query_openai([{"role": "user", "content": [{"type": "text", "text": q}]}], t, p, m),
148
+ inputs=[text_query, temperature, top_p, max_output_tokens],
149
+ outputs=[text_output])
 
150
 
151
+ image_url_button.click(lambda u, q, t, p, m: query_openai([{"role": "user", "content": [{"type": "image_url", "image_url": {"url": u}}, {"type": "text", "text": q}]}], t, p, m),
152
+ inputs=[image_url, image_query, temperature, top_p, max_output_tokens],
153
+ outputs=[image_url_output])
154
+
155
+ image_button.click(lambda f, q, t, p, m: query_openai([{"role": "user", "content": [{"type": "image_url", "image_url": {"url": f}}, {"type": "text", "text": q}]}], t, p, m),
156
+ inputs=[image_upload, image_text_query, temperature, top_p, max_output_tokens],
157
+ outputs=[image_output])
158
+
159
+ pdf_button.click(lambda f, q, t, p, m: query_openai([{"role": "user", "content": [{"type": "text", "text": f.read()}, {"type": "text", "text": q}]}], t, p, m),
160
+ inputs=[pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens],
161
+ outputs=[pdf_output])
162
+
163
+ audio_button.click(lambda a, q, t, p, m: query_openai([{"role": "user", "content": [{"type": "text", "text": transcribe_audio(a)}, {"type": "text", "text": q}]}], t, p, m),
164
+ inputs=[audio_record, audio_query, temperature, top_p, max_output_tokens],
165
+ outputs=[audio_output])
 
 
 
 
166
 
167
  # Launch Gradio App
168
  if __name__ == "__main__":