File size: 15,663 Bytes
8b97f99
 
 
 
ec333f1
8b97f99
c2740a5
8b97f99
 
c2740a5
8b97f99
 
 
 
 
c2740a5
8b97f99
 
c2740a5
8b97f99
 
b06076e
 
 
 
 
 
63271b3
8b97f99
 
 
b06076e
 
 
8b97f99
c2740a5
8b97f99
c2740a5
8b97f99
b06076e
 
 
 
 
 
 
cc3538d
b06076e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc3538d
b06076e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f386ba9
 
b06076e
ec333f1
 
b06076e
 
 
 
 
 
 
ec333f1
f386ba9
ec333f1
cc3538d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3a0fb13
a840067
cc3538d
 
 
a840067
 
cc3538d
 
 
5a11a8d
8b97f99
5a11a8d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8b97f99
 
 
b06076e
 
e595396
cdd2b64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e595396
b06076e
 
9f74220
b06076e
 
 
 
 
 
 
a0d1236
9f74220
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3a0fb13
9f74220
 
3a0fb13
9f74220
 
3a0fb13
 
 
 
 
 
 
9f74220
 
 
8b97f99
 
 
7057cb9
8b97f99
 
b06076e
 
 
cc3538d
b06076e
8b97f99
b06076e
8b97f99
 
c2740a5
cc3538d
b06076e
 
 
 
cc3538d
b06076e
 
8b97f99
 
c2740a5
cc3538d
b06076e
 
8b97f99
 
c2740a5
cc3538d
9f74220
 
cc3538d
3a0fb13
 
 
 
 
 
 
cc3538d
 
a840067
6bdf7a7
cc3538d
 
 
 
30d63ae
8b97f99
cc3538d
8b97f99
 
 
b06076e
 
 
 
f386ba9
3a0fb13
 
 
 
 
 
cc3538d
a840067
3a0fb13
 
 
 
 
cc3538d
 
 
 
 
 
 
 
a840067
cc3538d
3a0fb13
4110b67
cc3538d
9f74220
30d63ae
3a0fb13
b06076e
 
 
 
 
 
cc3538d
ae381f5
 
b06076e
 
 
8b97f99
 
 
a723167
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
import gradio as gr
import openai
import fitz  # PyMuPDF for PDF processing
import base64
import io

# Variable to store API key
api_key = ""

# Function to update API key
def set_api_key(key):
    global api_key
    api_key = key
    return "API Key Set Successfully!"

# Function to interact with OpenAI API
def query_openai(messages, temperature, top_p, max_output_tokens):
    if not api_key:
        return "Please enter your OpenAI API key first."

    try:
        openai.api_key = api_key  # Set API key dynamically

        # Ensure numeric values for OpenAI parameters
        temperature = float(temperature) if temperature else 1.0
        top_p = float(top_p) if top_p else 1.0
        max_output_tokens = int(max_output_tokens) if max_output_tokens else 2048

        response = openai.ChatCompletion.create(
            model="gpt-4.5-preview",
            messages=messages,
            temperature=temperature,
            top_p=top_p,
            max_tokens=max_output_tokens
        )
        return response["choices"][0]["message"]["content"]
    except Exception as e:
        return f"Error: {str(e)}"

# Function to process image URL input
def image_url_chat(image_url, text_query, temperature, top_p, max_output_tokens):
    if not image_url or not text_query:
        return "Please provide an image URL and a query."

    messages = [
        {"role": "user", "content": [
            {"type": "image_url", "image_url": {"url": image_url}},
            {"type": "text", "text": text_query}
        ]},
    ]
    return query_openai(messages, temperature, top_p, max_output_tokens)

# Function to process text input
def text_chat(text_query, temperature, top_p, max_output_tokens):
    if not text_query:
        return "Please enter a query."

    messages = [{"role": "user", "content": [{"type": "text", "text": text_query}]}]
    return query_openai(messages, temperature, top_p, max_output_tokens)

# Function to process uploaded image input
def image_chat(image_file, text_query, temperature, top_p, max_output_tokens):
    if image_file is None or not text_query:
        return "Please upload an image and provide a query."

    # Encode image as base64
    with open(image_file, "rb") as img:
        base64_image = base64.b64encode(img.read()).decode("utf-8")

    image_data = f"data:image/jpeg;base64,{base64_image}"

    messages = [
        {"role": "user", "content": [
            {"type": "image_url", "image_url": {"url": image_data}},
            {"type": "text", "text": text_query}
        ]},
    ]
    return query_openai(messages, temperature, top_p, max_output_tokens)

# Function to process uploaded PDF input
def pdf_chat(pdf_file, text_query, temperature, top_p, max_output_tokens):
    if pdf_file is None or not text_query:
        return "Please upload a PDF and provide a query."

    try:
        # Extract text from all pages of the PDF
        doc = fitz.open(pdf_file.name)
        text = "\n".join([page.get_text("text") for page in doc])  # Extract text from all pages

        # If no text found, return an error
        if not text.strip():
            return "No text found in the PDF."

        # Create the query message with the extracted text and the user's query
        messages = [
            {"role": "user", "content": [
                {"type": "text", "text": text},  # The extracted text from the PDF
                {"type": "text", "text": text_query}
            ]},
        ]
        return query_openai(messages, temperature, top_p, max_output_tokens)
    
    except Exception as e:
        return f"Error processing the PDF: {str(e)}"

# Function to transcribe audio to text using OpenAI Whisper API
def transcribe_audio(audio_binary, openai_api_key):
    if not openai_api_key:
        return "Error: No API key provided."
    
    openai.api_key = openai_api_key
    
    try:
        # Use the correct transcription API call
        audio_file_obj = io.BytesIO(audio_binary)
        audio_file_obj.name = 'audio.wav'  # Set a name for the file object (as OpenAI expects it)

        # Transcribe the audio to text using OpenAI's whisper model
        audio_file_transcription = openai.Audio.transcribe(file=audio_file_obj, model="whisper-1")
        return audio_file_transcription.text
    except Exception as e:
        return f"Error transcribing audio: {str(e)}"

# Function to handle uploaded audio transcription
def process_uploaded_audio(audio_binary):
    if not audio_binary:
        return "Please upload an audio file first."
    
    if not api_key:
        return "Please enter your OpenAI API key first."
    
    try:
        transcription = transcribe_audio(audio_binary, api_key)
        return transcription
    except Exception as e:
        return f"Error transcribing audio: {str(e)}"

# Function to handle recorded audio transcription
def process_recorded_audio(audio_path):
    if not audio_path:
        return "No audio recorded."
    
    if not api_key:
        return "Please enter your OpenAI API key first."
    
    try:
        with open(audio_path, "rb") as audio_file:
            audio_binary = audio_file.read()
        
        transcription = transcribe_audio(audio_binary, api_key)
        return transcription
    except Exception as e:
        return f"Error transcribing recorded audio: {str(e)}"

# Function to process the voice chat queries
def process_voice_query(transcription, temperature, top_p, max_output_tokens):
    if not transcription or transcription.startswith("Error") or transcription.startswith("Please"):
        return "Please ensure audio is transcribed successfully first."
    
    # Use the transcription as the query
    messages = [{"role": "user", "content": [{"type": "text", "text": transcription}]}]
    
    return query_openai(messages, temperature, top_p, max_output_tokens)

# Function to clear the chat - FIXED to return the correct types for file inputs
def clear_chat():
    # For file components like gr.File and gr.Audio, we should return None
    # For text components, return empty string
    # For sliders, return default values
    
    # The order must match exactly with the outputs in clear_button.click()
    return (
        "",      # image_url (textbox)
        "",      # image_query (textbox)
        "",      # image_url_output (textbox)
        "",      # text_query (textbox)
        "",      # text_output (textbox)
        "",      # image_text_query (textbox)
        "",      # image_output (textbox)
        None,    # pdf_upload (file)
        "",      # pdf_text_query (textbox)
        "",      # pdf_output (textbox)
        None,    # audio_upload (file)
        "",      # upload_transcription (textbox)
        "",      # upload_audio_output (textbox)
        None,    # audio_recorder (audio)
        "",      # record_transcription (textbox)
        "",      # record_audio_output (textbox)
        1.0,     # temperature (slider)
        1.0,     # top_p (slider)
        2048     # max_output_tokens (slider)
    )

# Gradio UI Layout
with gr.Blocks() as demo:
    gr.Markdown("## GPT-4.5 Preview Chatbot")

    with gr.Accordion("How to Use This App!", open=False):
        gr.Markdown("""
        ### Getting Started:
        1. Enter your OpenAI API key in the field at the top and click "Set API Key"
        2. Adjust the hyperparameters if needed (Temperature, Top-P, Max Output Tokens)
        
        ### Using the Different Tabs:
        
        #### Image URL Chat
        - Paste an image URL in the field
        - Enter your question about the image
        - Click "Ask" to get a response
        
        #### Text Chat
        - Simply type your query in the text field
        - Click "Ask" to get a response
        
        #### Image Chat
        - Upload an image from your device
        - Enter your question about the uploaded image
        - Click "Ask" to get a response
        
        #### PDF Chat
        - Upload a PDF document
        - Ask questions about the PDF content
        - Click "Ask" to get a response
        
        #### Voice Chat
        - **Upload Audio:** Upload an audio file, click "Transcribe Audio", then click "Ask"
        - **Record Audio:** Record your voice, click "Transcribe Recording", then click "Ask"
        
        ### Tips:
        - Use the "Clear Chat" button to reset all fields
        - For more creative responses, try increasing the Temperature
        - For longer responses, increase the Max Output Tokens
        """)

    # Accordion for explaining hyperparameters
    with gr.Accordion("Hyperparameters", open=False):
        gr.Markdown("""
        ### Temperature: 
        Controls the randomness of the model's output. A lower temperature makes the model more deterministic, while a higher temperature makes it more creative and varied.
        ### Top-P (Nucleus Sampling): 
        Controls the cumulative probability distribution from which the model picks the next word. A lower value makes the model more focused and deterministic, while a higher value increases randomness.
        ### Max Output Tokens: 
        Limits the number of tokens (words or subwords) the model can generate in its response. You can use this to control the length of the response.
        """)

    gr.HTML("""
    <style>
        #api_key_button {
            margin-top: 27px; /* Add margin-top to the button */
            background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
        }
        #api_key_button:hover {
            background: linear-gradient(135deg, #5b10f1 0%, #9f3ef3 100%); /* Slightly lighter */
        }
        #clear_chat_button {
            background: linear-gradient(135deg, #e53e3e 0%, #f56565 100%); /* Red gradient */
        }
        #clear_chat_button:hover {
            background: linear-gradient(135deg, #c53030 0%, #e53e3e 100%); /* Slightly darker red gradient on hover */
        }
        #ask_button {
            background: linear-gradient(135deg, #fbd38d 0%, #f6e05e 100%); /* Yellow gradient */
        }
        #ask_button:hover {
            background: linear-gradient(135deg, #ecc94b 0%, #fbd38d 100%); /* Slightly darker yellow gradient on hover */
        }
        #transcribe_button {
            background: linear-gradient(135deg, #68d391 0%, #48bb78 100%); /* Green gradient */
        }
        
        #transcribe_button:hover {
            background: linear-gradient(135deg, #38a169 0%, #68d391 100%); /* Slightly darker green gradient on hover */
        }
    </style>
    """)
    
    # API Key Input
    with gr.Row():
        api_key_input = gr.Textbox(label="Enter OpenAI API Key", type="password")
        api_key_button = gr.Button("Set API Key", elem_id="api_key_button")
        api_key_output = gr.Textbox(label="API Key Status", interactive=False)

    with gr.Row():
        temperature = gr.Slider(0, 2, value=1.0, step=0.1, label="Temperature")
        top_p = gr.Slider(0, 1, value=1.0, step=0.1, label="Top-P")
        max_output_tokens = gr.Slider(0, 16384, value=2048, step=512, label="Max Output Tokens")
    
    with gr.Tabs():
        with gr.Tab("Image URL Chat"):
            image_url = gr.Textbox(label="Enter Image URL")
            image_query = gr.Textbox(label="Ask about the Image")
            image_url_output = gr.Textbox(label="Response", interactive=False)
            image_url_button = gr.Button("Ask", elem_id="ask_button")
        
        with gr.Tab("Text Chat"):
            text_query = gr.Textbox(label="Enter your query")
            text_output = gr.Textbox(label="Response", interactive=False)
            text_button = gr.Button("Ask", elem_id="ask_button")
        
        with gr.Tab("Image Chat"):
            image_upload = gr.File(label="Upload an Image", type="filepath")
            image_text_query = gr.Textbox(label="Ask about the uploaded image")
            image_output = gr.Textbox(label="Response", interactive=False)
            image_button = gr.Button("Ask", elem_id="ask_button")
        
        with gr.Tab("PDF Chat"):
            pdf_upload = gr.File(label="Upload a PDF", type="filepath")
            pdf_text_query = gr.Textbox(label="Ask about the uploaded PDF")
            pdf_output = gr.Textbox(label="Response", interactive=False)
            pdf_button = gr.Button("Ask", elem_id="ask_button")

        with gr.Tab("Voice Chat"):
            with gr.Tabs():
                with gr.Tab("Upload Audio"):
                    # Upload audio section
                    audio_upload = gr.File(label="Upload an Audio File", type="binary")
                    upload_transcribe_button = gr.Button("Transcribe Audio", elem_id="transcribe_button")
                    upload_transcription = gr.Textbox(label="Transcription", interactive=False)
                    upload_audio_output = gr.Textbox(label="Response", interactive=False)
                    upload_audio_button = gr.Button("Ask", elem_id="ask_button")
                
                with gr.Tab("Record Audio"):
                    # Record audio section
                    audio_recorder = gr.Audio(label="Record your voice", type="filepath")
                    record_transcribe_button = gr.Button("Transcribe Recording", elem_id="transcribe_button")
                    record_transcription = gr.Textbox(label="Transcription", interactive=False)
                    record_audio_output = gr.Textbox(label="Response", interactive=False)
                    record_audio_button = gr.Button("Ask", elem_id="ask_button")

    # Clear chat button
    clear_button = gr.Button("Clear Chat", elem_id="clear_chat_button")

    # Button Click Actions
    api_key_button.click(set_api_key, inputs=[api_key_input], outputs=[api_key_output])
    image_url_button.click(image_url_chat, [image_url, image_query, temperature, top_p, max_output_tokens], image_url_output)
    text_button.click(text_chat, [text_query, temperature, top_p, max_output_tokens], text_output)
    image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
    pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
    
    # Voice Chat - Upload Audio tab actions
    upload_transcribe_button.click(
        process_uploaded_audio,
        inputs=[audio_upload],
        outputs=[upload_transcription]
    )
    
    # FIXED: Properly order the inputs to process_voice_query
    upload_audio_button.click(
        process_voice_query,
        inputs=[upload_transcription, temperature, top_p, max_output_tokens],
        outputs=[upload_audio_output]
    )
    
    # Voice Chat - Record Audio tab actions
    record_transcribe_button.click(
        process_recorded_audio,
        inputs=[audio_recorder],
        outputs=[record_transcription]
    )
    
    # FIXED: Properly order the inputs to process_voice_query
    record_audio_button.click(
        process_voice_query,
        inputs=[record_transcription, temperature, top_p, max_output_tokens],
        outputs=[record_audio_output]
    )

    # Clear button resets all necessary fields
    clear_button.click(
        clear_chat,
        outputs=[
            image_url, image_query, image_url_output, 
            text_query, text_output, 
            image_text_query, image_output, 
            pdf_upload, pdf_text_query, pdf_output,
            audio_upload, upload_transcription, upload_audio_output,
            audio_recorder, record_transcription, record_audio_output,
            temperature, top_p, max_output_tokens
        ]
    )

# Launch Gradio App
if __name__ == "__main__":
    demo.launch()