File size: 8,900 Bytes
8b97f99
 
 
 
ec333f1
8b97f99
c2740a5
8b97f99
 
c2740a5
8b97f99
 
 
 
 
c2740a5
8b97f99
 
c2740a5
8b97f99
 
c2740a5
63271b3
c2740a5
63271b3
 
892bcef
63271b3
8b97f99
 
 
 
 
 
 
c2740a5
8b97f99
c2740a5
8b97f99
c2740a5
 
e6cee82
4105a3b
c2740a5
 
 
 
 
ec333f1
c2740a5
4105a3b
8b97f99
c2740a5
 
e6cee82
c2740a5
e6cee82
c2740a5
8b97f99
 
c2740a5
 
e6cee82
4105a3b
8b97f99
 
 
 
e96901a
8b97f99
e96901a
c2740a5
 
 
 
ec333f1
c2740a5
4105a3b
8b97f99
c2740a5
 
e6cee82
c2740a5
8b97f99
a723167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8b97f99
f386ba9
a56f35d
f386ba9
 
 
 
ec333f1
 
a56f35d
 
 
f386ba9
ec333f1
f386ba9
ec333f1
c2740a5
8b97f99
c2740a5
8b97f99
 
 
c2740a5
7057cb9
 
 
 
 
 
 
 
c2740a5
8b97f99
 
 
7057cb9
8b97f99
 
 
 
 
c2740a5
 
8b97f99
 
 
 
c2740a5
8b97f99
c2740a5
8b97f99
 
c2740a5
8b97f99
c2740a5
8b97f99
 
 
c2740a5
8b97f99
c2740a5
8b97f99
 
 
c2740a5
8b97f99
 
ec333f1
a56f35d
 
610a3c2
 
ec333f1
 
 
 
8b97f99
 
 
 
 
4105a3b
c2740a5
4105a3b
c2740a5
f386ba9
610a3c2
 
 
 
 
 
 
a56f35d
610a3c2
 
 
 
f386ba9
610a3c2
 
 
e6cee82
d60058e
e6cee82
 
610a3c2
4105a3b
e6cee82
4105a3b
d60058e
e6cee82
 
 
8b97f99
 
 
a723167
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
import gradio as gr
import openai
import fitz  # PyMuPDF for PDF processing
import base64
import io

# Variable to store API key
api_key = ""

# Function to update API key
def set_api_key(key):
    global api_key
    api_key = key
    return "API Key Set Successfully!"

# Function to interact with OpenAI API
def query_openai(messages, temperature, top_p, max_output_tokens):
    if not api_key:
        return "Please enter your OpenAI API key first."

    try:
        openai.api_key = api_key  # Set API key dynamically

        # Ensure numeric values for OpenAI parameters
        temperature = float(temperature) if temperature else 1.0
        top_p = float(top_p) if top_p else 1.0
        max_output_tokens = int(max_output_tokens) if max_output_tokens else 2048

        response = openai.ChatCompletion.create(
            model="gpt-4.5-preview",
            messages=messages,
            temperature=temperature,
            top_p=top_p,
            max_tokens=max_output_tokens
        )
        return response["choices"][0]["message"]["content"]
    except Exception as e:
        return f"Error: {str(e)}"

# Function to process image URL input
def image_url_chat(image_url, text_query, temperature, top_p, max_output_tokens):
    if not image_url or not text_query:
        return "Please provide an image URL and a query."

    messages = [
        {"role": "user", "content": [
            {"type": "image_url", "image_url": {"url": image_url}},  # Corrected format
            {"type": "text", "text": text_query}
        ]},
    ]
    return query_openai(messages, temperature, top_p, max_output_tokens)

# Function to process text input
def text_chat(text_query, temperature, top_p, max_output_tokens):
    if not text_query:
        return "Please enter a query."

    messages = [{"role": "user", "content": [{"type": "text", "text": text_query}]}]
    return query_openai(messages, temperature, top_p, max_output_tokens)

# Function to process uploaded image input
def image_chat(image_file, text_query, temperature, top_p, max_output_tokens):
    if image_file is None or not text_query:
        return "Please upload an image and provide a query."

    # Encode image as base64
    with open(image_file, "rb") as img:
        base64_image = base64.b64encode(img.read()).decode("utf-8")

    image_data = f"data:image/jpeg;base64,{base64_image}"

    messages = [
        {"role": "user", "content": [
            {"type": "image_url", "image_url": {"url": image_data}},  # Fixed format
            {"type": "text", "text": text_query}
        ]},
    ]
    return query_openai(messages, temperature, top_p, max_output_tokens)

# Function to process uploaded PDF input
def pdf_chat(pdf_file, text_query, temperature, top_p, max_output_tokens):
    if pdf_file is None or not text_query:
        return "Please upload a PDF and provide a query."

    try:
        # Extract text from all pages of the PDF
        doc = fitz.open(pdf_file.name)
        text = "\n".join([page.get_text("text") for page in doc])  # Extract text from all pages

        # If no text found, return an error
        if not text.strip():
            return "No text found in the PDF."

        # Create the query message with the extracted text and the user's query
        messages = [
            {"role": "user", "content": [
                {"type": "text", "text": text},  # The extracted text from the PDF
                {"type": "text", "text": text_query}
            ]},
        ]
        return query_openai(messages, temperature, top_p, max_output_tokens)
    
    except Exception as e:
        return f"Error processing the PDF: {str(e)}"

# Function to transcribe audio to text using OpenAI Whisper API
def transcribe_audio(audio_filepath, openai_api_key):
    if not openai_api_key:
        return "Error: No API key provided."
    
    openai.api_key = openai_api_key
    
    try:
        # Open the audio file and transcribe using OpenAI's Whisper model
        with open(audio_filepath, "rb") as audio_file:
            audio_file_transcription = openai.Audio.transcribe(file=audio_file, model="whisper-1")
        return audio_file_transcription.text
    except Exception as e:
        return f"Error transcribing audio: {str(e)}"

# Function to clear the chat (Fix: Returns the correct number of outputs)
def clear_chat():
    return "", "", "", "", "", "", "", None, "", None, "", 1.0, 1.0, 2048

# Gradio UI Layout
with gr.Blocks() as demo:
    gr.Markdown("## GPT-4.5 Preview Chatbot")

    gr.HTML("""
    <style>
        #api_key_button {
            margin-top: 27px; /* Add margin-top to the button */
        }
    </style>
    """)
    
    # API Key Input
    with gr.Row():
        api_key_input = gr.Textbox(label="Enter OpenAI API Key", type="password")
        api_key_button = gr.Button("Set API Key", elem_id="api_key_button")
        api_key_output = gr.Textbox(label="API Key Status", interactive=False)

    with gr.Row():
        temperature = gr.Slider(0, 2, value=1.0, step=0.1, label="Temperature")
        top_p = gr.Slider(0, 1, value=1.0, step=0.1, label="Top-P")
        max_output_tokens = gr.Slider(0, 16384, value=2048, step=512, label="Max Output Tokens")  # Changed default to 2048
    
    with gr.Tabs():
        with gr.Tab("Image URL Chat"):
            image_url = gr.Textbox(label="Enter Image URL")
            image_query = gr.Textbox(label="Ask about the Image")
            image_url_output = gr.Textbox(label="Response", interactive=False)
            image_url_button = gr.Button("Ask")
        
        with gr.Tab("Text Chat"):
            text_query = gr.Textbox(label="Enter your query")
            text_output = gr.Textbox(label="Response", interactive=False)
            text_button = gr.Button("Ask")
        
        with gr.Tab("Image Chat"):
            image_upload = gr.File(label="Upload an Image", type="filepath")
            image_text_query = gr.Textbox(label="Ask about the uploaded image")
            image_output = gr.Textbox(label="Response", interactive=False)
            image_button = gr.Button("Ask")
        
        with gr.Tab("PDF Chat"):
            pdf_upload = gr.File(label="Upload a PDF", type="filepath")
            pdf_text_query = gr.Textbox(label="Ask about the uploaded PDF")
            pdf_output = gr.Textbox(label="Response", interactive=False)
            pdf_button = gr.Button("Ask")

        with gr.Tab("Voice Chat"):
            # Record Audio Component for Voice Chat
            audio_record = gr.Audio(label="Record your Voice", type="filepath", show_label=True)
            # Upload Audio File Component
            audio_upload = gr.File(label="Or Upload an Audio File", type="file", file_types=["audio/wav", "audio/mp3"])
            audio_query = gr.Textbox(label="Ask about the transcription")
            audio_output = gr.Textbox(label="Response", interactive=False)
            audio_button = gr.Button("Ask")

    # Clear chat button
    clear_button = gr.Button("Clear Chat")

    # Button Click Actions
    api_key_button.click(set_api_key, inputs=[api_key_input], outputs=[api_key_output])
    image_url_button.click(image_url_chat, [image_url, image_query, temperature, top_p, max_output_tokens], image_url_output)
    text_button.click(text_chat, [text_query, temperature, top_p, max_output_tokens], text_output)
    image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
    pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
    
    # For Voice Chat (record or upload audio and process query)
    def process_audio(audio, query, temperature, top_p, max_output_tokens):
        # Check if audio is recorded or uploaded
        if audio is None:
            return "Please either record or upload an audio file."

        # Process the audio (either from recording or upload)
        transcription = transcribe_audio(audio.name, api_key)
        if transcription.startswith("Error"):
            return transcription  # Return transcription error
        return query_openai(
            [{"role": "user", "content": [{"type": "text", "text": transcription}, {"type": "text", "text": query}]}],
            temperature, top_p, max_output_tokens
        )

    audio_button.click(process_audio, [audio_record, audio_upload, audio_query, temperature, top_p, max_output_tokens], audio_output)

    # Fix: Clear button resets all necessary fields correctly
    clear_button.click(
        clear_chat,
        outputs=[ 
            image_url, image_query, image_url_output, 
            text_query, text_output, 
            image_text_query, image_output, 
            pdf_upload, pdf_text_query, pdf_output, 
            temperature, top_p, max_output_tokens
        ]
    )

# Launch Gradio App
if __name__ == "__main__":
    demo.launch()