Spaces:

shukdevdatta123
/

GPT-4.5-Multimodal-Chatbot

Running

File size: 8,372 Bytes

8b97f99
 
 
 
ec333f1
8b97f99
c2740a5
8b97f99
 
c2740a5
8b97f99
 
 
 
 
c2740a5
8b97f99
 
c2740a5
8b97f99
 
c2740a5
63271b3
c2740a5
63271b3
 
892bcef
63271b3
8b97f99
 
 
 
 
 
 
c2740a5
8b97f99
c2740a5
8b97f99
c2740a5
 
e6cee82
4105a3b
c2740a5
 
 
 
 
ec333f1
c2740a5
4105a3b
8b97f99
c2740a5
 
e6cee82
c2740a5
e6cee82
c2740a5
8b97f99
 
c2740a5
 
e6cee82
4105a3b
8b97f99
 
 
 
e96901a
8b97f99
e96901a
c2740a5
 
 
 
ec333f1
c2740a5
4105a3b
8b97f99
c2740a5
 
e6cee82
c2740a5
8b97f99
a723167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8b97f99
f386ba9
0381207
f386ba9
 
 
 
ec333f1
 
f386ba9
0381207
f386ba9
ec333f1
f386ba9
 
 
ec333f1
f386ba9
ec333f1
c2740a5
8b97f99
c2740a5
8b97f99
 
 
c2740a5
7057cb9
 
 
 
 
 
 
 
c2740a5
8b97f99
 
 
7057cb9
8b97f99
 
 
 
 
c2740a5
 
8b97f99
 
 
 
c2740a5
8b97f99
c2740a5
8b97f99
 
c2740a5
8b97f99
c2740a5
8b97f99
 
 
c2740a5
8b97f99
c2740a5
8b97f99
 
 
c2740a5
8b97f99
 
ec333f1
da6faec
ec333f1
 
 
 
8b97f99
 
 
 
 
4105a3b
c2740a5
4105a3b
c2740a5
f386ba9
 
 
0381207
 
f386ba9
 
 
e6cee82
d60058e
e6cee82
 
 
4105a3b
e6cee82
4105a3b
d60058e
e6cee82
 
 
8b97f99
 
 
a723167

import gradio as gr
import openai
import fitz  # PyMuPDF for PDF processing
import base64
import io

# Variable to store API key
api_key = ""

# Function to update API key
def set_api_key(key):
    global api_key
    api_key = key
    return "API Key Set Successfully!"

# Function to interact with OpenAI API
def query_openai(messages, temperature, top_p, max_output_tokens):
    if not api_key:
        return "Please enter your OpenAI API key first."

    try:
        openai.api_key = api_key  # Set API key dynamically

        # Ensure numeric values for OpenAI parameters
        temperature = float(temperature) if temperature else 1.0
        top_p = float(top_p) if top_p else 1.0
        max_output_tokens = int(max_output_tokens) if max_output_tokens else 2048

        response = openai.ChatCompletion.create(
            model="gpt-4.5-preview",
            messages=messages,
            temperature=temperature,
            top_p=top_p,
            max_tokens=max_output_tokens
        )
        return response["choices"][0]["message"]["content"]
    except Exception as e:
        return f"Error: {str(e)}"

# Function to process image URL input
def image_url_chat(image_url, text_query, temperature, top_p, max_output_tokens):
    if not image_url or not text_query:
        return "Please provide an image URL and a query."

    messages = [
        {"role": "user", "content": [
            {"type": "image_url", "image_url": {"url": image_url}},  # Corrected format
            {"type": "text", "text": text_query}
        ]},
    ]
    return query_openai(messages, temperature, top_p, max_output_tokens)

# Function to process text input
def text_chat(text_query, temperature, top_p, max_output_tokens):
    if not text_query:
        return "Please enter a query."

    messages = [{"role": "user", "content": [{"type": "text", "text": text_query}]}]
    return query_openai(messages, temperature, top_p, max_output_tokens)

# Function to process uploaded image input
def image_chat(image_file, text_query, temperature, top_p, max_output_tokens):
    if image_file is None or not text_query:
        return "Please upload an image and provide a query."

    # Encode image as base64
    with open(image_file, "rb") as img:
        base64_image = base64.b64encode(img.read()).decode("utf-8")

    image_data = f"data:image/jpeg;base64,{base64_image}"

    messages = [
        {"role": "user", "content": [
            {"type": "image_url", "image_url": {"url": image_data}},  # Fixed format
            {"type": "text", "text": text_query}
        ]},
    ]
    return query_openai(messages, temperature, top_p, max_output_tokens)

# Function to process uploaded PDF input
def pdf_chat(pdf_file, text_query, temperature, top_p, max_output_tokens):
    if pdf_file is None or not text_query:
        return "Please upload a PDF and provide a query."

    try:
        # Extract text from all pages of the PDF
        doc = fitz.open(pdf_file.name)
        text = "\n".join([page.get_text("text") for page in doc])  # Extract text from all pages

        # If no text found, return an error
        if not text.strip():
            return "No text found in the PDF."

        # Create the query message with the extracted text and the user's query
        messages = [
            {"role": "user", "content": [
                {"type": "text", "text": text},  # The extracted text from the PDF
                {"type": "text", "text": text_query}
            ]},
        ]
        return query_openai(messages, temperature, top_p, max_output_tokens)
    
    except Exception as e:
        return f"Error processing the PDF: {str(e)}"

# Function to transcribe audio to text using OpenAI Whisper API
def transcribe_audio(audio_binary, openai_api_key):
    if not openai_api_key:
        return "Error: No API key provided."
    
    openai.api_key = openai_api_key
    
    try:
        # Use the correct transcription API call
        audio_file_obj = io.BytesIO(audio_binary)
        audio_file_obj.name = 'audio.wav'  # Set a name for the file object (as OpenAI expects it)

        # Transcribe the audio to text using OpenAI's whisper model
        audio_file_transcription = openai.Audio.transcribe(file=audio_file_obj, model="whisper-1")
        return audio_file_transcription.text
    except Exception as e:
        return f"Error transcribing audio: {str(e)}"

# Function to clear the chat (Fix: Returns the correct number of outputs)
def clear_chat():
    return "", "", "", "", "", "", "", None, "", None, "", 1.0, 1.0, 2048

# Gradio UI Layout
with gr.Blocks() as demo:
    gr.Markdown("## GPT-4.5 Preview Chatbot")

    gr.HTML("""
    <style>
        #api_key_button {
            margin-top: 27px; /* Add margin-top to the button */
        }
    </style>
    """)
    
    # API Key Input
    with gr.Row():
        api_key_input = gr.Textbox(label="Enter OpenAI API Key", type="password")
        api_key_button = gr.Button("Set API Key", elem_id="api_key_button")
        api_key_output = gr.Textbox(label="API Key Status", interactive=False)

    with gr.Row():
        temperature = gr.Slider(0, 2, value=1.0, step=0.1, label="Temperature")
        top_p = gr.Slider(0, 1, value=1.0, step=0.1, label="Top-P")
        max_output_tokens = gr.Slider(0, 16384, value=2048, step=512, label="Max Output Tokens")  # Changed default to 2048
    
    with gr.Tabs():
        with gr.Tab("Image URL Chat"):
            image_url = gr.Textbox(label="Enter Image URL")
            image_query = gr.Textbox(label="Ask about the Image")
            image_url_output = gr.Textbox(label="Response", interactive=False)
            image_url_button = gr.Button("Ask")
        
        with gr.Tab("Text Chat"):
            text_query = gr.Textbox(label="Enter your query")
            text_output = gr.Textbox(label="Response", interactive=False)
            text_button = gr.Button("Ask")
        
        with gr.Tab("Image Chat"):
            image_upload = gr.File(label="Upload an Image", type="filepath")
            image_text_query = gr.Textbox(label="Ask about the uploaded image")
            image_output = gr.Textbox(label="Response", interactive=False)
            image_button = gr.Button("Ask")
        
        with gr.Tab("PDF Chat"):
            pdf_upload = gr.File(label="Upload a PDF", type="filepath")
            pdf_text_query = gr.Textbox(label="Ask about the uploaded PDF")
            pdf_output = gr.Textbox(label="Response", interactive=False)
            pdf_button = gr.Button("Ask")

        with gr.Tab("Voice Chat"):
            audio_upload = gr.File(label="Upload an Audio File", type="binary")
            audio_query = gr.Textbox(label="Ask about the transcription")
            audio_output = gr.Textbox(label="Response", interactive=False)
            audio_button = gr.Button("Ask")

    # Clear chat button
    clear_button = gr.Button("Clear Chat")

    # Button Click Actions
    api_key_button.click(set_api_key, inputs=[api_key_input], outputs=[api_key_output])
    image_url_button.click(image_url_chat, [image_url, image_query, temperature, top_p, max_output_tokens], image_url_output)
    text_button.click(text_chat, [text_query, temperature, top_p, max_output_tokens], text_output)
    image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
    pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
    
    # For Voice Chat
    audio_button.click(
        lambda audio_binary, query, temperature, top_p, max_output_tokens: query_openai(
            [{"role": "user", "content": [{"type": "text", "text": transcribe_audio(audio_binary, api_key)}, {"type": "text", "text": query}]}],
            temperature, top_p, max_output_tokens
        ), [audio_upload, audio_query, temperature, top_p, max_output_tokens], audio_output
    )

    # Fix: Clear button resets all necessary fields correctly
    clear_button.click(
        clear_chat,
        outputs=[
            image_url, image_query, image_url_output, 
            text_query, text_output, 
            image_text_query, image_output, 
            pdf_upload, pdf_text_query, pdf_output, 
            temperature, top_p, max_output_tokens
        ]
    )

# Launch Gradio App
if __name__ == "__main__":
    demo.launch()