Spaces:

shukdevdatta123
/

GPT-4.5-Multimodal-Chatbot

Running

File size: 8,900 Bytes

8b97f99
 
 
 
ec333f1
8b97f99
c2740a5
8b97f99
 
c2740a5
8b97f99
 
 
 
 
c2740a5
8b97f99
 
c2740a5
8b97f99
 
c2740a5
63271b3
c2740a5
63271b3
 
892bcef
63271b3
8b97f99
 
 
 
 
 
 
c2740a5
8b97f99
c2740a5
8b97f99
c2740a5
 
e6cee82
4105a3b
c2740a5
 
 
 
 
ec333f1
c2740a5
4105a3b
8b97f99
c2740a5
 
e6cee82
c2740a5
e6cee82
c2740a5
8b97f99
 
c2740a5
 
e6cee82
4105a3b
8b97f99
 
 
 
e96901a
8b97f99
e96901a
c2740a5
 
 
 
ec333f1
c2740a5
4105a3b
8b97f99
c2740a5
 
e6cee82
c2740a5
8b97f99
a723167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8b97f99
f386ba9
a56f35d
f386ba9
 
 
 
ec333f1
 
a56f35d
 
 
f386ba9
ec333f1
f386ba9
ec333f1
c2740a5
8b97f99
c2740a5
8b97f99
 
 
c2740a5
7057cb9
 
 
 
 
 
 
 
c2740a5
8b97f99
 
 
7057cb9
8b97f99
 
 
 
 
c2740a5
 
8b97f99
 
 
 
c2740a5
8b97f99
c2740a5
8b97f99
 
c2740a5
8b97f99
c2740a5
8b97f99
 
 
c2740a5
8b97f99
c2740a5
8b97f99
 
 
c2740a5
8b97f99
 
ec333f1
a56f35d
 
610a3c2
 
ec333f1
 
 
 
8b97f99
 
 
 
 
4105a3b
c2740a5
4105a3b
c2740a5
f386ba9
610a3c2
 
 
 
 
 
 
a56f35d
610a3c2
 
 
 
f386ba9
610a3c2
 
 
e6cee82
d60058e
e6cee82
 
610a3c2
4105a3b
e6cee82
4105a3b
d60058e
e6cee82
 
 
8b97f99
 
 
a723167

import gradio as gr
import openai
import fitz  # PyMuPDF for PDF processing
import base64
import io

# Variable to store API key
api_key = ""

# Function to update API key
def set_api_key(key):
    global api_key
    api_key = key
    return "API Key Set Successfully!"

# Function to interact with OpenAI API
def query_openai(messages, temperature, top_p, max_output_tokens):
    if not api_key:
        return "Please enter your OpenAI API key first."

    try:
        openai.api_key = api_key  # Set API key dynamically

        # Ensure numeric values for OpenAI parameters
        temperature = float(temperature) if temperature else 1.0
        top_p = float(top_p) if top_p else 1.0
        max_output_tokens = int(max_output_tokens) if max_output_tokens else 2048

        response = openai.ChatCompletion.create(
            model="gpt-4.5-preview",
            messages=messages,
            temperature=temperature,
            top_p=top_p,
            max_tokens=max_output_tokens
        )
        return response["choices"][0]["message"]["content"]
    except Exception as e:
        return f"Error: {str(e)}"

# Function to process image URL input
def image_url_chat(image_url, text_query, temperature, top_p, max_output_tokens):
    if not image_url or not text_query:
        return "Please provide an image URL and a query."

    messages = [
        {"role": "user", "content": [
            {"type": "image_url", "image_url": {"url": image_url}},  # Corrected format
            {"type": "text", "text": text_query}
        ]},
    ]
    return query_openai(messages, temperature, top_p, max_output_tokens)

# Function to process text input
def text_chat(text_query, temperature, top_p, max_output_tokens):
    if not text_query:
        return "Please enter a query."

    messages = [{"role": "user", "content": [{"type": "text", "text": text_query}]}]
    return query_openai(messages, temperature, top_p, max_output_tokens)

# Function to process uploaded image input
def image_chat(image_file, text_query, temperature, top_p, max_output_tokens):
    if image_file is None or not text_query:
        return "Please upload an image and provide a query."

    # Encode image as base64
    with open(image_file, "rb") as img:
        base64_image = base64.b64encode(img.read()).decode("utf-8")

    image_data = f"data:image/jpeg;base64,{base64_image}"

    messages = [
        {"role": "user", "content": [
            {"type": "image_url", "image_url": {"url": image_data}},  # Fixed format
            {"type": "text", "text": text_query}
        ]},
    ]
    return query_openai(messages, temperature, top_p, max_output_tokens)

# Function to process uploaded PDF input
def pdf_chat(pdf_file, text_query, temperature, top_p, max_output_tokens):
    if pdf_file is None or not text_query:
        return "Please upload a PDF and provide a query."

    try:
        # Extract text from all pages of the PDF
        doc = fitz.open(pdf_file.name)
        text = "\n".join([page.get_text("text") for page in doc])  # Extract text from all pages

        # If no text found, return an error
        if not text.strip():
            return "No text found in the PDF."

        # Create the query message with the extracted text and the user's query
        messages = [
            {"role": "user", "content": [
                {"type": "text", "text": text},  # The extracted text from the PDF
                {"type": "text", "text": text_query}
            ]},
        ]
        return query_openai(messages, temperature, top_p, max_output_tokens)
    
    except Exception as e:
        return f"Error processing the PDF: {str(e)}"

# Function to transcribe audio to text using OpenAI Whisper API
def transcribe_audio(audio_filepath, openai_api_key):
    if not openai_api_key:
        return "Error: No API key provided."
    
    openai.api_key = openai_api_key
    
    try:
        # Open the audio file and transcribe using OpenAI's Whisper model
        with open(audio_filepath, "rb") as audio_file:
            audio_file_transcription = openai.Audio.transcribe(file=audio_file, model="whisper-1")
        return audio_file_transcription.text
    except Exception as e:
        return f"Error transcribing audio: {str(e)}"

# Function to clear the chat (Fix: Returns the correct number of outputs)
def clear_chat():
    return "", "", "", "", "", "", "", None, "", None, "", 1.0, 1.0, 2048

# Gradio UI Layout
with gr.Blocks() as demo:
    gr.Markdown("## GPT-4.5 Preview Chatbot")

    gr.HTML("""
    <style>
        #api_key_button {
            margin-top: 27px; /* Add margin-top to the button */
        }
    </style>
    """)
    
    # API Key Input
    with gr.Row():
        api_key_input = gr.Textbox(label="Enter OpenAI API Key", type="password")
        api_key_button = gr.Button("Set API Key", elem_id="api_key_button")
        api_key_output = gr.Textbox(label="API Key Status", interactive=False)

    with gr.Row():
        temperature = gr.Slider(0, 2, value=1.0, step=0.1, label="Temperature")
        top_p = gr.Slider(0, 1, value=1.0, step=0.1, label="Top-P")
        max_output_tokens = gr.Slider(0, 16384, value=2048, step=512, label="Max Output Tokens")  # Changed default to 2048
    
    with gr.Tabs():
        with gr.Tab("Image URL Chat"):
            image_url = gr.Textbox(label="Enter Image URL")
            image_query = gr.Textbox(label="Ask about the Image")
            image_url_output = gr.Textbox(label="Response", interactive=False)
            image_url_button = gr.Button("Ask")
        
        with gr.Tab("Text Chat"):
            text_query = gr.Textbox(label="Enter your query")
            text_output = gr.Textbox(label="Response", interactive=False)
            text_button = gr.Button("Ask")
        
        with gr.Tab("Image Chat"):
            image_upload = gr.File(label="Upload an Image", type="filepath")
            image_text_query = gr.Textbox(label="Ask about the uploaded image")
            image_output = gr.Textbox(label="Response", interactive=False)
            image_button = gr.Button("Ask")
        
        with gr.Tab("PDF Chat"):
            pdf_upload = gr.File(label="Upload a PDF", type="filepath")
            pdf_text_query = gr.Textbox(label="Ask about the uploaded PDF")
            pdf_output = gr.Textbox(label="Response", interactive=False)
            pdf_button = gr.Button("Ask")

        with gr.Tab("Voice Chat"):
            # Record Audio Component for Voice Chat
            audio_record = gr.Audio(label="Record your Voice", type="filepath", show_label=True)
            # Upload Audio File Component
            audio_upload = gr.File(label="Or Upload an Audio File", type="file", file_types=["audio/wav", "audio/mp3"])
            audio_query = gr.Textbox(label="Ask about the transcription")
            audio_output = gr.Textbox(label="Response", interactive=False)
            audio_button = gr.Button("Ask")

    # Clear chat button
    clear_button = gr.Button("Clear Chat")

    # Button Click Actions
    api_key_button.click(set_api_key, inputs=[api_key_input], outputs=[api_key_output])
    image_url_button.click(image_url_chat, [image_url, image_query, temperature, top_p, max_output_tokens], image_url_output)
    text_button.click(text_chat, [text_query, temperature, top_p, max_output_tokens], text_output)
    image_button.click(image_chat, [image_upload, image_text_query, temperature, top_p, max_output_tokens], image_output)
    pdf_button.click(pdf_chat, [pdf_upload, pdf_text_query, temperature, top_p, max_output_tokens], pdf_output)
    
    # For Voice Chat (record or upload audio and process query)
    def process_audio(audio, query, temperature, top_p, max_output_tokens):
        # Check if audio is recorded or uploaded
        if audio is None:
            return "Please either record or upload an audio file."

        # Process the audio (either from recording or upload)
        transcription = transcribe_audio(audio.name, api_key)
        if transcription.startswith("Error"):
            return transcription  # Return transcription error
        return query_openai(
            [{"role": "user", "content": [{"type": "text", "text": transcription}, {"type": "text", "text": query}]}],
            temperature, top_p, max_output_tokens
        )

    audio_button.click(process_audio, [audio_record, audio_upload, audio_query, temperature, top_p, max_output_tokens], audio_output)

    # Fix: Clear button resets all necessary fields correctly
    clear_button.click(
        clear_chat,
        outputs=[ 
            image_url, image_query, image_url_output, 
            text_query, text_output, 
            image_text_query, image_output, 
            pdf_upload, pdf_text_query, pdf_output, 
            temperature, top_p, max_output_tokens
        ]
    )

# Launch Gradio App
if __name__ == "__main__":
    demo.launch()