Multi-modal-o1-Chatbot-Secure

Sleeping

App Files Files Community

shukdevdatta123 commited on 21 days ago

Commit

b39aa5d

verified ·

1 Parent(s): 8faefa1

Update app.py

Browse files

Files changed (1) hide show

app.py +559 -521

app.py CHANGED Viewed

@@ -1,522 +1,560 @@
-import gradio as gr
-import openai
-import base64
-from PIL import Image
-import io
-import os
-import tempfile
-import fitz  # PyMuPDF for PDF handling
-# Function to extract text from PDF files
-def extract_text_from_pdf(pdf_file):
-    try:
-        text = ""
-        pdf_document = fitz.open(pdf_file)
-        for page_num in range(len(pdf_document)):
-            page = pdf_document[page_num]
-            text += page.get_text()
-        pdf_document.close()
-        return text
-    except Exception as e:
-        return f"Error extracting text from PDF: {str(e)}"
-# Function to generate MCQ quiz from PDF content
-def generate_mcq_quiz(pdf_content, num_questions, openai_api_key, model_choice):
-    if not openai_api_key:
-        return "Error: No API key provided."
-    openai.api_key = openai_api_key
-    # Limit content length to avoid token limits
-    limited_content = pdf_content[:8000] if len(pdf_content) > 8000 else pdf_content
-    prompt = f"""Based on the following document content, generate {num_questions} multiple-choice quiz questions.
-For each question:
-1. Create a clear question based on key concepts in the document
-2. Provide 4 possible answers (A, B, C, D)
-3. Indicate the correct answer
-4. Briefly explain why the answer is correct
-Format the output clearly with each question numbered and separated.
-Document content:
-{limited_content}
-"""
-    try:
-        messages = [
-            {"role": "user", "content": prompt}
-        ]
-        response = openai.ChatCompletion.create(
-            model=model_choice,
-            messages=messages
-        )
-        return response.choices[0].message.content
-    except Exception as e:
-        return f"Error generating quiz: {str(e)}"
-# Function to send the request to OpenAI API with an image, text or PDF input
-def generate_response(input_text, image, pdf_content, openai_api_key, reasoning_effort="medium", model_choice="o1"):
-    if not openai_api_key:
-        return "Error: No API key provided."
-    openai.api_key = openai_api_key
-    # Process the input depending on whether it's text, image, or a PDF-related query
-    if pdf_content and input_text:
-        # For PDF queries, we combine the PDF content with the user's question
-        prompt = f"Based on the following document content, please answer this question: '{input_text}'\n\nDocument content:\n{pdf_content}"
-        input_content = prompt
-    elif image:
-        # Convert the image to base64 string
-        image_info = get_base64_string_from_image(image)
-        input_content = f"data:image/png;base64,{image_info}"
-    else:
-        # Plain text input
-        input_content = input_text
-    # Prepare the messages for OpenAI API
-    if model_choice == "o1":
-        if image and not pdf_content:
-            messages = [
-                {"role": "user", "content": [{"type": "image_url", "image_url": {"url": input_content}}]}
-            ]
-        else:
-            messages = [
-                {"role": "user", "content": input_content}
-            ]
-    elif model_choice == "o3-mini":
-        messages = [
-            {"role": "user", "content": input_content}
-        ]
-    try:
-        # Call OpenAI API with the selected model
-        response = openai.ChatCompletion.create(
-            model=model_choice,
-            messages=messages,
-            max_completion_tokens=2000
-        )
-        return response.choices[0].message.content
-    except Exception as e:
-        return f"Error calling OpenAI API: {str(e)}"
-# Function to convert an uploaded image to a base64 string
-def get_base64_string_from_image(pil_image):
-    # Convert PIL Image to bytes
-    buffered = io.BytesIO()
-    pil_image.save(buffered, format="PNG")
-    img_bytes = buffered.getvalue()
-    base64_str = base64.b64encode(img_bytes).decode("utf-8")
-    return base64_str
-# Function to transcribe audio to text using OpenAI Whisper API
-def transcribe_audio(audio, openai_api_key):
-    if not openai_api_key:
-        return "Error: No API key provided."
-    openai.api_key = openai_api_key
-    try:
-        # Open the audio file and pass it as a file object
-        with open(audio, 'rb') as audio_file:
-            audio_file_content = audio_file.read()
-        # Use the correct transcription API call
-        audio_file_obj = io.BytesIO(audio_file_content)
-        audio_file_obj.name = 'audio.wav'  # Set a name for the file object (as OpenAI expects it)
-        # Transcribe the audio to text using OpenAI's whisper model
-        audio_file_transcription = openai.Audio.transcribe(file=audio_file_obj, model="whisper-1")
-        return audio_file_transcription.text
-    except Exception as e:
-        return f"Error transcribing audio: {str(e)}"
-# The function that will be used by Gradio interface
-def chatbot(input_text, image, audio, pdf_file, openai_api_key, reasoning_effort, model_choice, pdf_content, num_quiz_questions, pdf_quiz_mode, history):
-    if history is None:
-        history = []
-    # If there's audio, transcribe it to text
-    if audio:
-        input_text = transcribe_audio(audio, openai_api_key)
-    # If a new PDF is uploaded, extract its text
-    new_pdf_content = pdf_content
-    if pdf_file is not None:
-        new_pdf_content = extract_text_from_pdf(pdf_file)
-    # Check if we're in PDF quiz mode
-    if pdf_quiz_mode:
-        if new_pdf_content:
-            # Generate MCQ quiz questions
-            quiz_response = generate_mcq_quiz(new_pdf_content, int(num_quiz_questions), openai_api_key, model_choice)
-            history.append((f"👤: [Uploaded PDF for Quiz - {int(num_quiz_questions)} questions]", f"🤖: {quiz_response}"))
-        else:
-            history.append(("👤: [Attempted to generate quiz without PDF]", "🤖: Please upload a PDF file to generate quiz questions."))
-    else:
-        # Regular chat mode - generate the response
-        response = generate_response(input_text, image, new_pdf_content, openai_api_key, reasoning_effort, model_choice)
-        # Append the response to the history
-        if input_text:
-            history.append((f"👤: {input_text}", f"🤖: {response}"))
-        elif image is not None:
-            history.append((f"👤: [Uploaded image]", f"🤖: {response}"))
-        elif pdf_file is not None:
-            history.append((f"👤: [Uploaded PDF]", f"🤖: {response}"))
-        else:
-            history.append((f"👤: [No input provided]", f"🤖: Please provide some input (text, image, or PDF) for me to respond to."))
-    return "", None, None, None, new_pdf_content, history
-# Function to clear the chat history and PDF content
-def clear_history():
-    return "", None, None, None, "", []
-# Function to process a newly uploaded PDF
-def process_pdf(pdf_file):
-    if pdf_file is None:
-        return ""
-    return extract_text_from_pdf(pdf_file)
-# Function to update visible components based on input type selection
-def update_input_type(choice):
-    if choice == "Text":
-        return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value=False)
-    elif choice == "Image":
-        return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value=False)
-    elif choice == "Voice":
-        return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(value=False)
-    elif choice == "PDF":
-        return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(value=False)
-    elif choice == "PDF(QUIZ)":
-        return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True), gr.update(value=True)
-# Custom CSS styles with animations and button colors
-custom_css = """
-    /* General body styles */
-    .gradio-container {
-        font-family: 'Arial', sans-serif;
-        background-color: #f0f4f8; /* Lighter blue-gray background */
-        color: #2d3748;;
-    }
-    /* Header styles */
-    .gradio-header {
-        background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
-        color: white;
-        padding: 20px;
-        text-align: center;
-        border-radius: 8px;
-        box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
-        animation: fadeIn 1s ease-out;
-    }
-    .gradio-header h1 {
-        font-size: 2.5rem;
-    }
-    .gradio-header h3 {
-        font-size: 1.2rem;
-        margin-top: 10px;
-    }
-    /* Chatbot container styles */
-    .gradio-chatbot {
-        background-color: #fff;
-        border-radius: 10px;
-        padding: 20px;
-        box-shadow: 0 6px 18px rgba(0, 0, 0, 0.1);
-        border-left: 4px solid #4a00e0; /* Accent border */
-    }
-    /* Input field styles */
-    .gradio-textbox, .gradio-dropdown, .gradio-image, .gradio-audio, .gradio-file, .gradio-slider {
-        border-radius: 8px;
-        border: 2px solid #e2e8f0;
-        background-color: #f8fafc;
-    }
-    .gradio-textbox:focus, .gradio-dropdown:focus, .gradio-image:focus, .gradio-audio:focus, .gradio-file:focus, .gradio-slider:focus {
-        border-color: #8e2de2;
-        box-shadow: 0 0 0 3px rgba(142, 45, 226, 0.2);
-    }
-    /* Button styles */
-    /* Send Button: Sky Blue */
-    #submit-btn {
-        background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
-        color: white;
-        border: none;
-        border-radius: 8px;
-        padding: 10px 19px;
-        font-size: 1.1rem;
-        cursor: pointer;
-        transition: all 0.3s ease;
-        margin-left: auto;
-        margin-right: auto;
-        display: block;
-        margin-top: 10px;
-    }
-    #submit-btn:hover {
-        background: linear-gradient(135deg, #5b10f1 0%, #9f3ef3 100%); /* Slightly lighter */
-        box-shadow: 0 6px 8px rgba(74, 0, 224, 0.4);
-    }
-    #submit-btn:active {
-        transform: scale(0.95);
-    }
-    #clear-history {
-        background: linear-gradient(135deg, #e53e3e 0%, #f56565 100%); /* Red gradient */
-        color: white;
-        border: none;
-        border-radius: 8px;
-        padding: 10px 13px;
-        font-size: 1.1rem;
-        cursor: pointer;
-        transition: all 0.3s ease;
-        margin-top: 10px;
-    }
-    #clear-history:hover {
-        background: linear-gradient(135deg, #c53030 0%, #e53e3e 100%); /* Slightly darker red gradient on hover */
-        box-shadow: 0 6px 8px rgba(229, 62, 62, 0.4);
-    }
-    #clear-history:active {
-        transform: scale(0.95);
-    }
-    /* Input type selector buttons */
-    #input-type-group {
-        display: flex;
-        justify-content: center;
-        gap: 10px;
-        margin-bottom: 20px;
-    }
-    .input-type-btn {
-        background-color: #718096; /* Slate gray */
-        color: white;
-        border: none;
-        border-radius: 8px;
-        padding: 10px 15px;
-        font-size: 1rem;
-        cursor: pointer;
-        transition: all 0.3s ease;
-    }
-    .input-type-btn.selected {
-        background-color: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
-    }
-    .input-type-btn:hover {
-        background-color: #4a5568; /* Darker slate */
-    }
-    /* Chat history styles */
-    .gradio-chatbot .message {
-        margin-bottom: 10px;
-    }
-    .gradio-chatbot .user {
-        background-color: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
-        color: white;
-        padding: 10px;
-        border-radius: 12px;
-        max-width: 70%;
-        animation: slideInUser 0.5s ease-out;
-    }
-    .gradio-chatbot .assistant {
-        background-color: #f0f4f8; /* Light blue-gray */
-        color: #2d3748;
-        padding: 10px;
-        border-radius: 12px;
-        max-width: 70%;
-        margin-left: auto;
-        animation: slideInAssistant 0.5s ease-out;
-    }
-    /* Animation keyframes */
-    @keyframes fadeIn {
-        0% { opacity: 0; }
-        100% { opacity: 1; }
-    }
-    @keyframes slideInUser {
-        0% { transform: translateX(-100%); }
-        100% { transform: translateX(0); }
-    }
-    @keyframes slideInAssistant {
-        0% { transform: translateX(100%); }
-        100% { transform: translateX(0); }
-    }
-    /* Mobile responsiveness */
-    @media (max-width: 768px) {
-        .gradio-header h1 {
-            font-size: 1.8rem;
-        }
-        .gradio-header h3 {
-            font-size: 1rem;
-        }
-        .gradio-chatbot {
-            max-height: 400px;
-        }
-        .gradio-textbox, .gradio-dropdown, .gradio-image, .gradio-audio, .gradio-file, .gradio-slider {
-            width: 100%;
-        }
-        #submit-btn, #clear-history {
-            width: 100%;
-            margin-left: 0;
-        }
-    }
-"""
-# Gradio interface setup
-def create_interface():
-    with gr.Blocks(css=custom_css) as demo:
-        gr.Markdown("""
-            <div class="gradio-header">
-                <h1>Multimodal Chatbot (Text + Image + Voice + PDF + Quiz)</h1>
-                <h3>Interact with a chatbot using text, image, voice, or PDF inputs</h3>
-            </div>
-        """)
-        # Add a description with an expandable accordion
-        with gr.Accordion("Click to expand for details", open=False):
-            gr.Markdown("""
-            ### Description:
-            This is a multimodal chatbot that can handle text, image, voice, PDF inputs, and generate quizzes from PDFs.
-            - You can ask questions or provide text, and the assistant will respond.
-            - You can upload an image, and the assistant will process it and answer questions about the image.
-            - Voice input is supported: You can upload or record an audio file, and it will be transcribed to text and sent to the assistant.
-            - PDF support: Upload a PDF and ask questions about its content.
-            - PDF Quiz: Upload a PDF and specify how many MCQ questions you want generated based on the content.
-            - Enter your OpenAI API key to start interacting with the model.
-            - You can use the 'Clear History' button to remove the conversation history.
-            - "o1" is for image, voice, PDF and text chat and "o3-mini" is for text, PDF and voice chat only.
-            ### Reasoning Effort:
-            The reasoning effort controls how complex or detailed the assistant's answers should be.
-            - **Low**: Provides quick, concise answers with minimal reasoning or details.
-            - **Medium**: Offers a balanced response with a reasonable level of detail and thought.
-            - **High**: Produces more detailed, analytical, or thoughtful responses, requiring deeper reasoning.
-            """)
-        # Store PDF content as a state variable
-        pdf_content = gr.State("")
-        with gr.Row():
-            openai_api_key = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="sk-...", interactive=True)
-        # Input type selector
-        with gr.Row():
-            input_type = gr.Radio(
-                ["Text", "Image", "Voice", "PDF", "PDF(QUIZ)"],
-                label="Choose Input Type",
-                value="Text"
-            )
-        # Create the input components (initially text is visible, others are hidden)
-        with gr.Row():
-            # Text input
-            input_text = gr.Textbox(
-                label="Enter Text Question",
-                placeholder="Ask a question or provide text",
-                lines=2,
-                visible=True
-            )
-            # Image input
-            image_input = gr.Image(
-                label="Upload an Image",
-                type="pil",
-                visible=False
-            )
-            # Audio input
-            audio_input = gr.Audio(
-                label="Upload or Record Audio",
-                type="filepath",
-                visible=False
-            )
-            # PDF input
-            pdf_input = gr.File(
-                label="Upload your PDF",
-                file_types=[".pdf"],
-                visible=False
-            )
-            # Quiz specific components
-            quiz_questions_slider = gr.Slider(
-                minimum=1,
-                maximum=20,
-                value=5,
-                step=1,
-                label="Number of Quiz Questions",
-                visible=False
-            )
-            # Hidden state for quiz mode
-            quiz_mode = gr.Checkbox(
-                label="Quiz Mode",
-                visible=False,
-                value=False
-            )
-        with gr.Row():
-            reasoning_effort = gr.Dropdown(
-                label="Reasoning Effort",
-                choices=["low", "medium", "high"],
-                value="medium"
-            )
-            model_choice = gr.Dropdown(
-                label="Select Model",
-                choices=["o1", "o3-mini"],
-                value="o1"  # Default to 'o1' for image-related tasks
-            )
-            submit_btn = gr.Button("Ask!", elem_id="submit-btn")
-            clear_btn = gr.Button("Clear History", elem_id="clear-history")
-        chat_history = gr.Chatbot()
-        # Connect the input type selector to the update function
-        input_type.change(
-            fn=update_input_type,
-            inputs=[input_type],
-            outputs=[input_text, image_input, audio_input, pdf_input, quiz_questions_slider, quiz_mode]
-        )
-        # Process PDF when uploaded
-        pdf_input.change(
-            fn=process_pdf,
-            inputs=[pdf_input],
-            outputs=[pdf_content]
-        )
-        # Button interactions
-        submit_btn.click(
-            fn=chatbot,
-            inputs=[
-                input_text,
-                image_input,
-                audio_input,
-                pdf_input,
-                openai_api_key,
-                reasoning_effort,
-                model_choice,
-                pdf_content,
-                quiz_questions_slider,
-                quiz_mode,
-                chat_history
-            ],
-            outputs=[
-                input_text,
-                image_input,
-                audio_input,
-                pdf_input,
-                pdf_content,
-                chat_history
-            ]
-        )
-        clear_btn.click(
-            fn=clear_history,
-            inputs=[],
-            outputs=[input_text, image_input, audio_input, pdf_input, pdf_content, chat_history]
-        )
-    return demo
-# Run the interface
-if __name__ == "__main__":
-    demo = create_interface()
     demo.launch()

+import gradio as gr
+from openai import OpenAI
+import base64
+from PIL import Image
+import io
+import os
+import tempfile
+import fitz  # PyMuPDF for PDF handling
+# Function to extract text from PDF files
+def extract_text_from_pdf(pdf_file):
+    try:
+        text = ""
+        pdf_document = fitz.open(pdf_file)
+        for page_num in range(len(pdf_document)):
+            page = pdf_document[page_num]
+            text += page.get_text()
+        pdf_document.close()
+        return text
+    except Exception as e:
+        return f"Error extracting text from PDF: {str(e)}"
+# Function to generate MCQ quiz from PDF content
+def generate_mcq_quiz(pdf_content, num_questions, openai_api_key, model_choice):
+    if not openai_api_key:
+        return "Error: No API key provided."
+    client = OpenAI(api_key=openai_api_key)
+    # Limit content length to avoid token limits
+    limited_content = pdf_content[:8000] if len(pdf_content) > 8000 else pdf_content
+    prompt = f"""Based on the following document content, generate {num_questions} multiple-choice quiz questions.
+For each question:
+1. Create a clear question based on key concepts in the document
+2. Provide 4 possible answers (A, B, C, D)
+3. Indicate the correct answer
+4. Briefly explain why the answer is correct
+Format the output clearly with each question numbered and separated.
+Document content:
+{limited_content}
+"""
+    try:
+        messages = [
+            {"role": "user", "content": prompt}
+        ]
+        # Use appropriate model based on choice
+        model_name = "gpt-4" if model_choice == "o1" else "gpt-3.5-turbo"
+        response = client.chat.completions.create(
+            model=model_name,
+            messages=messages,
+            max_tokens=2000
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        return f"Error generating quiz: {str(e)}"
+# Function to send the request to OpenAI API with an image, text or PDF input
+def generate_response(input_text, image, pdf_content, openai_api_key, reasoning_effort="medium", model_choice="o1"):
+    if not openai_api_key:
+        return "Error: No API key provided."
+    client = OpenAI(api_key=openai_api_key)
+    # Process the input depending on whether it's text, image, or a PDF-related query
+    if pdf_content and input_text:
+        # For PDF queries, we combine the PDF content with the user's question
+        prompt = f"Based on the following document content, please answer this question: '{input_text}'\n\nDocument content:\n{pdf_content}"
+        messages = [{"role": "user", "content": prompt}]
+    elif image:
+        # Convert the image to base64 string
+        image_base64 = get_base64_string_from_image(image)
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": input_text or "Please describe this image."},
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/png;base64,{image_base64}"
+                        }
+                    }
+                ]
+            }
+        ]
+    else:
+        # Plain text input
+        messages = [{"role": "user", "content": input_text}]
+    try:
+        # Use appropriate model based on choice
+        if model_choice == "o1" and image:
+            model_name = "gpt-4-vision-preview"
+        elif model_choice == "o1":
+            model_name = "gpt-4"
+        else:
+            model_name = "gpt-3.5-turbo"
+        # Call OpenAI API with the selected model
+        response = client.chat.completions.create(
+            model=model_name,
+            messages=messages,
+            max_tokens=2000
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        return f"Error calling OpenAI API: {str(e)}"
+# Function to convert an uploaded image to a base64 string
+def get_base64_string_from_image(pil_image):
+    # Convert PIL Image to bytes
+    buffered = io.BytesIO()
+    pil_image.save(buffered, format="PNG")
+    img_bytes = buffered.getvalue()
+    base64_str = base64.b64encode(img_bytes).decode("utf-8")
+    return base64_str
+# Function to transcribe audio to text using OpenAI Whisper API
+def transcribe_audio(audio, openai_api_key):
+    if not openai_api_key:
+        return "Error: No API key provided."
+    client = OpenAI(api_key=openai_api_key)
+    try:
+        # Open the audio file and pass it as a file object
+        with open(audio, 'rb') as audio_file:
+            # Transcribe the audio to text using OpenAI's whisper model
+            transcript = client.audio.transcriptions.create(
+                model="whisper-1",
+                file=audio_file
+            )
+            return transcript.text
+    except Exception as e:
+        return f"Error transcribing audio: {str(e)}"
+# The function that will be used by Gradio interface
+def chatbot(input_text, image, audio, pdf_file, openai_api_key, reasoning_effort, model_choice, pdf_content, num_quiz_questions, pdf_quiz_mode, history):
+    if history is None:
+        history = []
+    # If there's audio, transcribe it to text
+    if audio:
+        input_text = transcribe_audio(audio, openai_api_key)
+    # If a new PDF is uploaded, extract its text
+    new_pdf_content = pdf_content
+    if pdf_file is not None:
+        new_pdf_content = extract_text_from_pdf(pdf_file)
+    # Check if we're in PDF quiz mode
+    if pdf_quiz_mode:
+        if new_pdf_content:
+            # Generate MCQ quiz questions
+            quiz_response = generate_mcq_quiz(new_pdf_content, int(num_quiz_questions), openai_api_key, model_choice)
+            history.append((f"👤: [Uploaded PDF for Quiz - {int(num_quiz_questions)} questions]", f"🤖: {quiz_response}"))
+        else:
+            history.append(("👤: [Attempted to generate quiz without PDF]", "🤖: Please upload a PDF file to generate quiz questions."))
+    else:
+        # Regular chat mode - generate the response
+        response = generate_response(input_text, image, new_pdf_content, openai_api_key, reasoning_effort, model_choice)
+        # Append the response to the history
+        if input_text:
+            history.append((f"👤: {input_text}", f"🤖: {response}"))
+        elif image is not None:
+            history.append((f"👤: [Uploaded image]", f"🤖: {response}"))
+        elif pdf_file is not None:
+            history.append((f"👤: [Uploaded PDF]", f"🤖: {response}"))
+        else:
+            history.append((f"👤: [No input provided]", f"🤖: Please provide some input (text, image, or PDF) for me to respond to."))
+    return "", None, None, None, new_pdf_content, history
+# Function to clear the chat history and PDF content
+def clear_history():
+    return "", None, None, None, "", []
+# Function to process a newly uploaded PDF
+def process_pdf(pdf_file):
+    if pdf_file is None:
+        return ""
+    return extract_text_from_pdf(pdf_file)
+# Function to update visible components based on input type selection
+def update_input_type(choice):
+    if choice == "Text":
+        return (
+            gr.update(visible=True),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            False
+        )
+    elif choice == "Image":
+        return (
+            gr.update(visible=True),
+            gr.update(visible=True),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            False
+        )
+    elif choice == "Voice":
+        return (
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=True),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            False
+        )
+    elif choice == "PDF":
+        return (
+            gr.update(visible=True),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=True),
+            gr.update(visible=False),
+            False
+        )
+    elif choice == "PDF(QUIZ)":
+        return (
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=True),
+            gr.update(visible=True),
+            True
+        )
+# Custom CSS styles with animations and button colors
+custom_css = """
+    /* General body styles */
+    .gradio-container {
+        font-family: 'Arial', sans-serif;
+        background-color: #f0f4f8; /* Lighter blue-gray background */
+        color: #2d3748;;
+    }
+    /* Header styles */
+    .gradio-header {
+        background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
+        color: white;
+        padding: 20px;
+        text-align: center;
+        border-radius: 8px;
+        box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
+        animation: fadeIn 1s ease-out;
+    }
+    .gradio-header h1 {
+        font-size: 2.5rem;
+    }
+    .gradio-header h3 {
+        font-size: 1.2rem;
+        margin-top: 10px;
+    }
+    /* Chatbot container styles */
+    .gradio-chatbot {
+        background-color: #fff;
+        border-radius: 10px;
+        padding: 20px;
+        box-shadow: 0 6px 18px rgba(0, 0, 0, 0.1);
+        border-left: 4px solid #4a00e0; /* Accent border */
+    }
+    /* Input field styles */
+    .gradio-textbox, .gradio-dropdown, .gradio-image, .gradio-audio, .gradio-file, .gradio-slider {
+        border-radius: 8px;
+        border: 2px solid #e2e8f0;
+        background-color: #f8fafc;
+    }
+    .gradio-textbox:focus, .gradio-dropdown:focus, .gradio-image:focus, .gradio-audio:focus, .gradio-file:focus, .gradio-slider:focus {
+        border-color: #8e2de2;
+        box-shadow: 0 0 0 3px rgba(142, 45, 226, 0.2);
+    }
+    /* Button styles */
+    /* Send Button: Sky Blue */
+    #submit-btn {
+        background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
+        color: white;
+        border: none;
+        border-radius: 8px;
+        padding: 10px 19px;
+        font-size: 1.1rem;
+        cursor: pointer;
+        transition: all 0.3s ease;
+        margin-left: auto;
+        margin-right: auto;
+        display: block;
+        margin-top: 10px;
+    }
+    #submit-btn:hover {
+        background: linear-gradient(135deg, #5b10f1 0%, #9f3ef3 100%); /* Slightly lighter */
+        box-shadow: 0 6px 8px rgba(74, 0, 224, 0.4);
+    }
+    #submit-btn:active {
+        transform: scale(0.95);
+    }
+    #clear-history {
+        background: linear-gradient(135deg, #e53e3e 0%, #f56565 100%); /* Red gradient */
+        color: white;
+        border: none;
+        border-radius: 8px;
+        padding: 10px 13px;
+        font-size: 1.1rem;
+        cursor: pointer;
+        transition: all 0.3s ease;
+        margin-top: 10px;
+    }
+    #clear-history:hover {
+        background: linear-gradient(135deg, #c53030 0%, #e53e3e 100%); /* Slightly darker red gradient on hover */
+        box-shadow: 0 6px 8px rgba(229, 62, 62, 0.4);
+    }
+    #clear-history:active {
+        transform: scale(0.95);
+    }
+    /* Input type selector buttons */
+    #input-type-group {
+        display: flex;
+        justify-content: center;
+        gap: 10px;
+        margin-bottom: 20px;
+    }
+    .input-type-btn {
+        background-color: #718096; /* Slate gray */
+        color: white;
+        border: none;
+        border-radius: 8px;
+        padding: 10px 15px;
+        font-size: 1rem;
+        cursor: pointer;
+        transition: all 0.3s ease;
+    }
+    .input-type-btn.selected {
+        background-color: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
+    }
+    .input-type-btn:hover {
+        background-color: #4a5568; /* Darker slate */
+    }
+    /* Chat history styles */
+    .gradio-chatbot .message {
+        margin-bottom: 10px;
+    }
+    .gradio-chatbot .user {
+        background-color: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
+        color: white;
+        padding: 10px;
+        border-radius: 12px;
+        max-width: 70%;
+        animation: slideInUser 0.5s ease-out;
+    }
+    .gradio-chatbot .assistant {
+        background-color: #f0f4f8; /* Light blue-gray */
+        color: #2d3748;
+        padding: 10px;
+        border-radius: 12px;
+        max-width: 70%;
+        margin-left: auto;
+        animation: slideInAssistant 0.5s ease-out;
+    }
+    /* Animation keyframes */
+    @keyframes fadeIn {
+        0% { opacity: 0; }
+        100% { opacity: 1; }
+    }
+    @keyframes slideInUser {
+        0% { transform: translateX(-100%); }
+        100% { transform: translateX(0); }
+    }
+    @keyframes slideInAssistant {
+        0% { transform: translateX(100%); }
+        100% { transform: translateX(0); }
+    }
+    /* Mobile responsiveness */
+    @media (max-width: 768px) {
+        .gradio-header h1 {
+            font-size: 1.8rem;
+        }
+        .gradio-header h3 {
+            font-size: 1rem;
+        }
+        .gradio-chatbot {
+            max-height: 400px;
+        }
+        .gradio-textbox, .gradio-dropdown, .gradio-image, .gradio-audio, .gradio-file, .gradio-slider {
+            width: 100%;
+        }
+        #submit-btn, #clear-history {
+            width: 100%;
+            margin-left: 0;
+        }
+    }
+"""
+# Gradio interface setup
+def create_interface():
+    with gr.Blocks(css=custom_css) as demo:
+        gr.Markdown("""
+            <div class="gradio-header">
+                <h1>Multimodal Chatbot (Text + Image + Voice + PDF + Quiz)</h1>
+                <h3>Interact with a chatbot using text, image, voice, or PDF inputs</h3>
+            </div>
+        """)
+        # Add a description with an expandable accordion
+        with gr.Accordion("Click to expand for details", open=False):
+            gr.Markdown("""
+            ### Description:
+            This is a multimodal chatbot that can handle text, image, voice, PDF inputs, and generate quizzes from PDFs.
+            - You can ask questions or provide text, and the assistant will respond.
+            - You can upload an image, and the assistant will process it and answer questions about the image.
+            - Voice input is supported: You can upload or record an audio file, and it will be transcribed to text and sent to the assistant.
+            - PDF support: Upload a PDF and ask questions about its content.
+            - PDF Quiz: Upload a PDF and specify how many MCQ questions you want generated based on the content.
+            - Enter your OpenAI API key to start interacting with the model.
+            - You can use the 'Clear History' button to remove the conversation history.
+            - "o1" is for image, voice, PDF and text chat and "o3-mini" is for text, PDF and voice chat only.
+            ### Reasoning Effort:
+            The reasoning effort controls how complex or detailed the assistant's answers should be.
+            - **Low**: Provides quick, concise answers with minimal reasoning or details.
+            - **Medium**: Offers a balanced response with a reasonable level of detail and thought.
+            - **High**: Produces more detailed, analytical, or thoughtful responses, requiring deeper reasoning.
+            """)
+        # Store PDF content as a state variable
+        pdf_content = gr.State("")
+        with gr.Row():
+            openai_api_key = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="sk-...", interactive=True)
+        # Input type selector
+        with gr.Row():
+            input_type = gr.Radio(
+                ["Text", "Image", "Voice", "PDF", "PDF(QUIZ)"],
+                label="Choose Input Type",
+                value="Text"
+            )
+        # Create the input components (initially text is visible, others are hidden)
+        with gr.Row():
+            # Text input
+            input_text = gr.Textbox(
+                label="Enter Text Question",
+                placeholder="Ask a question or provide text",
+                lines=2,
+                visible=True
+            )
+            # Image input
+            image_input = gr.Image(
+                label="Upload an Image",
+                type="pil",
+                visible=False
+            )
+            # Audio input
+            audio_input = gr.Audio(
+                label="Upload or Record Audio",
+                type="filepath",
+                visible=False
+            )
+            # PDF input
+            pdf_input = gr.File(
+                label="Upload your PDF",
+                file_types=[".pdf"],
+                visible=False
+            )
+            # Quiz specific components
+            quiz_questions_slider = gr.Slider(
+                minimum=1,
+                maximum=20,
+                value=5,
+                step=1,
+                label="Number of Quiz Questions",
+                visible=False
+            )
+        # State variable for quiz mode (not visible)
+        quiz_mode = gr.State(False)
+        with gr.Row():
+            reasoning_effort = gr.Dropdown(
+                label="Reasoning Effort",
+                choices=["low", "medium", "high"],
+                value="medium"
+            )
+            model_choice = gr.Dropdown(
+                label="Select Model",
+                choices=["o1", "o3-mini"],
+                value="o1"  # Default to 'o1' for image-related tasks
+            )
+            submit_btn = gr.Button("Ask!", elem_id="submit-btn")
+            clear_btn = gr.Button("Clear History", elem_id="clear-history")
+        chat_history = gr.Chatbot()
+        # Connect the input type selector to the update function
+        input_type.change(
+            fn=update_input_type,
+            inputs=[input_type],
+            outputs=[input_text, image_input, audio_input, pdf_input, quiz_questions_slider, quiz_mode]
+        )
+        # Process PDF when uploaded
+        pdf_input.change(
+            fn=process_pdf,
+            inputs=[pdf_input],
+            outputs=[pdf_content]
+        )
+        # Button interactions
+        submit_btn.click(
+            fn=chatbot,
+            inputs=[
+                input_text,
+                image_input,
+                audio_input,
+                pdf_input,
+                openai_api_key,
+                reasoning_effort,
+                model_choice,
+                pdf_content,
+                quiz_questions_slider,
+                quiz_mode,
+                chat_history
+            ],
+            outputs=[
+                input_text,
+                image_input,
+                audio_input,
+                pdf_input,
+                pdf_content,
+                chat_history
+            ]
+        )
+        clear_btn.click(
+            fn=clear_history,
+            inputs=[],
+            outputs=[input_text, image_input, audio_input, pdf_input, pdf_content, chat_history]
+        )
+    return demo
+# Run the interface
+if __name__ == "__main__":
+    demo = create_interface()
     demo.launch()