File size: 7,113 Bytes
8b97f99
 
 
 
ec333f1
ec3a27a
 
8b97f99
c2740a5
8b97f99
 
c2740a5
8b97f99
 
 
 
 
c2740a5
8b97f99
 
c2740a5
8b97f99
 
ec3a27a
63271b3
8b97f99
 
 
ec3a27a
 
 
8b97f99
c2740a5
8b97f99
c2740a5
8b97f99
ec3a27a
 
 
f386ba9
 
ec3a27a
ec333f1
 
ec3a27a
 
 
 
 
 
 
 
 
 
 
 
ec333f1
f386ba9
ec333f1
ec3a27a
8b97f99
ec3a27a
8b97f99
 
 
ec3a27a
a0d1236
ec3a27a
7057cb9
 
 
ec3a27a
 
 
 
 
7057cb9
2be48c9
ec3a27a
2be48c9
92fc24f
ec3a27a
 
 
 
92fc24f
 
ec3a27a
92fc24f
7057cb9
 
ec3a27a
8b97f99
 
 
7057cb9
8b97f99
 
ec3a27a
 
 
 
 
 
 
 
 
 
 
 
8b97f99
ec3a27a
 
 
 
 
 
8b97f99
 
c2740a5
8b97f99
ec3a27a
 
8b97f99
 
c2740a5
8b97f99
ec3a27a
 
8b97f99
 
c2740a5
8b97f99
 
ec3a27a
 
 
 
ec333f1
 
 
8b97f99
ec3a27a
8b97f99
 
 
ec3a27a
 
 
f386ba9
ec3a27a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8b97f99
 
 
a723167
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import gradio as gr
import openai
import fitz  # PyMuPDF for PDF processing
import base64
import io
import numpy as np
import soundfile as sf

# Variable to store API key
api_key = ""

# Function to update API key
def set_api_key(key):
    global api_key
    api_key = key
    return "API Key Set Successfully!"

# Function to interact with OpenAI API
def query_openai(messages, temperature, top_p, max_output_tokens):
    if not api_key:
        return "Please enter your OpenAI API key first."

    try:
        openai.api_key = api_key  

        response = openai.ChatCompletion.create(
            model="gpt-4.5-preview",
            messages=messages,
            temperature=float(temperature),
            top_p=float(top_p),
            max_tokens=int(max_output_tokens)
        )
        return response["choices"][0]["message"]["content"]
    except Exception as e:
        return f"Error: {str(e)}"

# Function to transcribe audio
def transcribe_audio(audio_input):
    if not api_key:
        return "Error: No API key provided."
    
    openai.api_key = api_key
    
    try:
        if isinstance(audio_input, np.ndarray):
            wav_io = io.BytesIO()
            sf.write(wav_io, audio_input, samplerate=16000, format="WAV")
            wav_io.seek(0)
            audio_file_obj = wav_io
            audio_file_obj.name = "recorded_audio.wav"
        else:
            audio_file_obj = io.BytesIO(audio_input)
            audio_file_obj.name = "uploaded_audio.wav"

        transcription = openai.Audio.transcribe(file=audio_file_obj, model="whisper-1")
        return transcription["text"]
    except Exception as e:
        return f"Error transcribing audio: {str(e)}"

# Function to clear chat
def clear_chat():
    return "", "", "", "", "", "", "", None, "", None, "", None, "", 1.0, 1.0, 2048

# Gradio UI Layout
with gr.Blocks() as demo:
    gr.Markdown("## πŸ”₯ GPT-4.5 AI Chatbot: Text, Image, PDF, & Voice Support")

    # Custom CSS for buttons
    gr.HTML("""
    <style>
        #api_key_button {
            margin-top: 27px; 
            background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%);
            color: white;
            font-weight: bold;
            border-radius: 5px;
        }
        #api_key_button:hover {
            background: linear-gradient(135deg, #5b10f1 0%, #9f3ef3 100%);
        }
        #clear_chat_button {
            background: linear-gradient(135deg, #e53e3e 0%, #f56565 100%);
            color: white;
            font-weight: bold;
            border-radius: 5px;
        }
        #clear_chat_button:hover {
            background: linear-gradient(135deg, #c53030 0%, #e53e3e 100%);
        }
    </style>
    """)

    # API Key Input
    with gr.Row():
        api_key_input = gr.Textbox(label="Enter OpenAI API Key", type="password")
        api_key_button = gr.Button("Set API Key", elem_id="api_key_button")
        api_key_output = gr.Textbox(label="API Key Status", interactive=False)

    # Accordion for Hyperparameters
    with gr.Accordion("πŸ”§ Advanced Settings (Hyperparameters)", open=False):
        gr.Markdown("""
        - **Temperature**: Controls randomness. Lower values make responses more predictable.
        - **Top-P (Nucleus Sampling)**: Determines how many top probable words can be chosen.
        - **Max Output Tokens**: Limits the length of the response.
        """)
        with gr.Row():
            temperature = gr.Slider(0, 2, value=1.0, step=0.1, label="Temperature")
            top_p = gr.Slider(0, 1, value=1.0, step=0.1, label="Top-P")
            max_output_tokens = gr.Slider(0, 16384, value=2048, step=512, label="Max Output Tokens")  

    with gr.Tabs():
        with gr.Tab("πŸ’¬ Text Chat"):
            text_query = gr.Textbox(label="Enter your query")
            text_output = gr.Textbox(label="Response", interactive=False)
            text_button = gr.Button("Ask")

        with gr.Tab("πŸ–ΌοΈ Image URL Chat"):
            image_url = gr.Textbox(label="Enter Image URL")
            image_query = gr.Textbox(label="Ask about the Image")
            image_url_output = gr.Textbox(label="Response", interactive=False)
            image_url_button = gr.Button("Ask")

        with gr.Tab("πŸ“Έ Image Upload Chat"):
            image_upload = gr.File(label="Upload an Image", type="filepath")
            image_text_query = gr.Textbox(label="Ask about the uploaded image")
            image_output = gr.Textbox(label="Response", interactive=False)
            image_button = gr.Button("Ask")

        with gr.Tab("πŸ“„ PDF Chat"):
            pdf_upload = gr.File(label="Upload a PDF", type="filepath")
            pdf_text_query = gr.Textbox(label="Ask about the uploaded PDF")
            pdf_output = gr.Textbox(label="Response", interactive=False)
            pdf_button = gr.Button("Ask")

        with gr.Tab("🎀 Voice Chat"):
            audio_record = gr.Audio(source="microphone", type="numpy", label="πŸŽ™οΈ Record Audio")  
            audio_upload = gr.File(label="πŸ“‚ Upload an Audio File", type="binary")  
            audio_query = gr.Textbox(label="Ask a question about the transcription")
            audio_output = gr.Textbox(label="Response", interactive=False)
            audio_button = gr.Button("Ask")

    # Clear chat button
    clear_button = gr.Button("🧹 Clear Chat", elem_id="clear_chat_button")

    # Button Click Actions
    api_key_button.click(set_api_key, inputs=[api_key_input], outputs=[api_key_output])
    text_button.click(lambda q, t, p, m: query_openai([{"role": "user", "content": [{"type": "text", "text": q}]}], t, p, m), 
                      inputs=[text_query, temperature, top_p, max_output_tokens], 
                      outputs=[text_output])
    
    image_url_button.click(lambda u, q, t, p, m: query_openai([{"role": "user", "content": [{"type": "image_url", "image_url": {"url": u}}, {"type": "text", "text": q}]}], t, p, m), 
                           inputs=[image_url, image_query, temperature, top_p, max_output_tokens], 
                           outputs=[image_url_output])

    image_button.click(lambda f, q, t, p, m: query_openai([{"role": "user", "content": [{"type": "image_url", "image_url": {"url": f}}, {"type": "text", "text": q}]}], t, p, m), 
                       inputs=[image_upload, image_text_query, temperature, top_p, max_output_tokens], 
                       outputs=[image_output])

    pdf_button.click(lambda f, q, t, p, m: query_openai([{"role": "user", "content": [{"type": "text", "text": f.read()}, {"type": "text", "text": q}]}], t, p, m), 
                     inputs=[pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], 
                     outputs=[pdf_output])

    audio_button.click(lambda a, q, t, p, m: query_openai([{"role": "user", "content": [{"type": "text", "text": transcribe_audio(a)}, {"type": "text", "text": q}]}], t, p, m), 
                       inputs=[audio_record, audio_query, temperature, top_p, max_output_tokens], 
                       outputs=[audio_output])

# Launch Gradio App
if __name__ == "__main__":
    demo.launch()