Spaces:

shukdevdatta123
/

GPT-4.5-Multimodal-Chatbot

Running

File size: 7,113 Bytes

8b97f99
 
 
 
ec333f1
ec3a27a
 
8b97f99
c2740a5
8b97f99
 
c2740a5
8b97f99
 
 
 
 
c2740a5
8b97f99
 
c2740a5
8b97f99
 
ec3a27a
63271b3
8b97f99
 
 
ec3a27a
 
 
8b97f99
c2740a5
8b97f99
c2740a5
8b97f99
ec3a27a
 
 
f386ba9
 
ec3a27a
ec333f1
 
ec3a27a
 
 
 
 
 
 
 
 
 
 
 
ec333f1
f386ba9
ec333f1
ec3a27a
8b97f99
ec3a27a
8b97f99
 
 
ec3a27a
a0d1236
ec3a27a
7057cb9
 
 
ec3a27a
 
 
 
 
7057cb9
2be48c9
ec3a27a
2be48c9
92fc24f
ec3a27a
 
 
 
92fc24f
 
ec3a27a
92fc24f
7057cb9
 
ec3a27a
8b97f99
 
 
7057cb9
8b97f99
 
ec3a27a
 
 
 
 
 
 
 
 
 
 
 
8b97f99
ec3a27a
 
 
 
 
 
8b97f99
 
c2740a5
8b97f99
ec3a27a
 
8b97f99
 
c2740a5
8b97f99
ec3a27a
 
8b97f99
 
c2740a5
8b97f99
 
ec3a27a
 
 
 
ec333f1
 
 
8b97f99
ec3a27a
8b97f99
 
 
ec3a27a
 
 
f386ba9
ec3a27a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8b97f99
 
 
a723167

import gradio as gr
import openai
import fitz  # PyMuPDF for PDF processing
import base64
import io
import numpy as np
import soundfile as sf

# Variable to store API key
api_key = ""

# Function to update API key
def set_api_key(key):
    global api_key
    api_key = key
    return "API Key Set Successfully!"

# Function to interact with OpenAI API
def query_openai(messages, temperature, top_p, max_output_tokens):
    if not api_key:
        return "Please enter your OpenAI API key first."

    try:
        openai.api_key = api_key  

        response = openai.ChatCompletion.create(
            model="gpt-4.5-preview",
            messages=messages,
            temperature=float(temperature),
            top_p=float(top_p),
            max_tokens=int(max_output_tokens)
        )
        return response["choices"][0]["message"]["content"]
    except Exception as e:
        return f"Error: {str(e)}"

# Function to transcribe audio
def transcribe_audio(audio_input):
    if not api_key:
        return "Error: No API key provided."
    
    openai.api_key = api_key
    
    try:
        if isinstance(audio_input, np.ndarray):
            wav_io = io.BytesIO()
            sf.write(wav_io, audio_input, samplerate=16000, format="WAV")
            wav_io.seek(0)
            audio_file_obj = wav_io
            audio_file_obj.name = "recorded_audio.wav"
        else:
            audio_file_obj = io.BytesIO(audio_input)
            audio_file_obj.name = "uploaded_audio.wav"

        transcription = openai.Audio.transcribe(file=audio_file_obj, model="whisper-1")
        return transcription["text"]
    except Exception as e:
        return f"Error transcribing audio: {str(e)}"

# Function to clear chat
def clear_chat():
    return "", "", "", "", "", "", "", None, "", None, "", None, "", 1.0, 1.0, 2048

# Gradio UI Layout
with gr.Blocks() as demo:
    gr.Markdown("## 🔥 GPT-4.5 AI Chatbot: Text, Image, PDF, & Voice Support")

    # Custom CSS for buttons
    gr.HTML("""
    <style>
        #api_key_button {
            margin-top: 27px; 
            background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%);
            color: white;
            font-weight: bold;
            border-radius: 5px;
        }
        #api_key_button:hover {
            background: linear-gradient(135deg, #5b10f1 0%, #9f3ef3 100%);
        }
        #clear_chat_button {
            background: linear-gradient(135deg, #e53e3e 0%, #f56565 100%);
            color: white;
            font-weight: bold;
            border-radius: 5px;
        }
        #clear_chat_button:hover {
            background: linear-gradient(135deg, #c53030 0%, #e53e3e 100%);
        }
    </style>
    """)

    # API Key Input
    with gr.Row():
        api_key_input = gr.Textbox(label="Enter OpenAI API Key", type="password")
        api_key_button = gr.Button("Set API Key", elem_id="api_key_button")
        api_key_output = gr.Textbox(label="API Key Status", interactive=False)

    # Accordion for Hyperparameters
    with gr.Accordion("🔧 Advanced Settings (Hyperparameters)", open=False):
        gr.Markdown("""
        - **Temperature**: Controls randomness. Lower values make responses more predictable.
        - **Top-P (Nucleus Sampling)**: Determines how many top probable words can be chosen.
        - **Max Output Tokens**: Limits the length of the response.
        """)
        with gr.Row():
            temperature = gr.Slider(0, 2, value=1.0, step=0.1, label="Temperature")
            top_p = gr.Slider(0, 1, value=1.0, step=0.1, label="Top-P")
            max_output_tokens = gr.Slider(0, 16384, value=2048, step=512, label="Max Output Tokens")  

    with gr.Tabs():
        with gr.Tab("💬 Text Chat"):
            text_query = gr.Textbox(label="Enter your query")
            text_output = gr.Textbox(label="Response", interactive=False)
            text_button = gr.Button("Ask")

        with gr.Tab("🖼️ Image URL Chat"):
            image_url = gr.Textbox(label="Enter Image URL")
            image_query = gr.Textbox(label="Ask about the Image")
            image_url_output = gr.Textbox(label="Response", interactive=False)
            image_url_button = gr.Button("Ask")

        with gr.Tab("📸 Image Upload Chat"):
            image_upload = gr.File(label="Upload an Image", type="filepath")
            image_text_query = gr.Textbox(label="Ask about the uploaded image")
            image_output = gr.Textbox(label="Response", interactive=False)
            image_button = gr.Button("Ask")

        with gr.Tab("📄 PDF Chat"):
            pdf_upload = gr.File(label="Upload a PDF", type="filepath")
            pdf_text_query = gr.Textbox(label="Ask about the uploaded PDF")
            pdf_output = gr.Textbox(label="Response", interactive=False)
            pdf_button = gr.Button("Ask")

        with gr.Tab("🎤 Voice Chat"):
            audio_record = gr.Audio(source="microphone", type="numpy", label="🎙️ Record Audio")  
            audio_upload = gr.File(label="📂 Upload an Audio File", type="binary")  
            audio_query = gr.Textbox(label="Ask a question about the transcription")
            audio_output = gr.Textbox(label="Response", interactive=False)
            audio_button = gr.Button("Ask")

    # Clear chat button
    clear_button = gr.Button("🧹 Clear Chat", elem_id="clear_chat_button")

    # Button Click Actions
    api_key_button.click(set_api_key, inputs=[api_key_input], outputs=[api_key_output])
    text_button.click(lambda q, t, p, m: query_openai([{"role": "user", "content": [{"type": "text", "text": q}]}], t, p, m), 
                      inputs=[text_query, temperature, top_p, max_output_tokens], 
                      outputs=[text_output])
    
    image_url_button.click(lambda u, q, t, p, m: query_openai([{"role": "user", "content": [{"type": "image_url", "image_url": {"url": u}}, {"type": "text", "text": q}]}], t, p, m), 
                           inputs=[image_url, image_query, temperature, top_p, max_output_tokens], 
                           outputs=[image_url_output])

    image_button.click(lambda f, q, t, p, m: query_openai([{"role": "user", "content": [{"type": "image_url", "image_url": {"url": f}}, {"type": "text", "text": q}]}], t, p, m), 
                       inputs=[image_upload, image_text_query, temperature, top_p, max_output_tokens], 
                       outputs=[image_output])

    pdf_button.click(lambda f, q, t, p, m: query_openai([{"role": "user", "content": [{"type": "text", "text": f.read()}, {"type": "text", "text": q}]}], t, p, m), 
                     inputs=[pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], 
                     outputs=[pdf_output])

    audio_button.click(lambda a, q, t, p, m: query_openai([{"role": "user", "content": [{"type": "text", "text": transcribe_audio(a)}, {"type": "text", "text": q}]}], t, p, m), 
                       inputs=[audio_record, audio_query, temperature, top_p, max_output_tokens], 
                       outputs=[audio_output])

# Launch Gradio App
if __name__ == "__main__":
    demo.launch()