|
import gradio as gr
|
|
import openai
|
|
import base64
|
|
from PIL import Image
|
|
import io
|
|
import os
|
|
import tempfile
|
|
import fitz
|
|
|
|
|
|
def extract_text_from_pdf(pdf_file):
|
|
try:
|
|
text = ""
|
|
pdf_document = fitz.open(pdf_file)
|
|
|
|
for page_num in range(len(pdf_document)):
|
|
page = pdf_document[page_num]
|
|
text += page.get_text()
|
|
|
|
pdf_document.close()
|
|
return text
|
|
except Exception as e:
|
|
return f"Error extracting text from PDF: {str(e)}"
|
|
|
|
|
|
def generate_mcq_quiz(pdf_content, num_questions, openai_api_key, model_choice):
|
|
if not openai_api_key:
|
|
return "Error: No API key provided."
|
|
|
|
openai.api_key = openai_api_key
|
|
|
|
|
|
limited_content = pdf_content[:8000] if len(pdf_content) > 8000 else pdf_content
|
|
|
|
prompt = f"""Based on the following document content, generate {num_questions} multiple-choice quiz questions.
|
|
For each question:
|
|
1. Create a clear question based on key concepts in the document
|
|
2. Provide 4 possible answers (A, B, C, D)
|
|
3. Indicate the correct answer
|
|
4. Briefly explain why the answer is correct
|
|
|
|
Format the output clearly with each question numbered and separated.
|
|
|
|
Document content:
|
|
{limited_content}
|
|
"""
|
|
|
|
try:
|
|
messages = [
|
|
{"role": "user", "content": prompt}
|
|
]
|
|
|
|
response = openai.ChatCompletion.create(
|
|
model=model_choice,
|
|
messages=messages
|
|
)
|
|
|
|
return response.choices[0].message.content
|
|
except Exception as e:
|
|
return f"Error generating quiz: {str(e)}"
|
|
|
|
|
|
def generate_response(input_text, image, pdf_content, openai_api_key, reasoning_effort="medium", model_choice="o1"):
|
|
if not openai_api_key:
|
|
return "Error: No API key provided."
|
|
|
|
openai.api_key = openai_api_key
|
|
|
|
|
|
if pdf_content and input_text:
|
|
|
|
prompt = f"Based on the following document content, please answer this question: '{input_text}'\n\nDocument content:\n{pdf_content}"
|
|
input_content = prompt
|
|
elif image:
|
|
|
|
image_info = get_base64_string_from_image(image)
|
|
input_content = f"data:image/png;base64,{image_info}"
|
|
else:
|
|
|
|
input_content = input_text
|
|
|
|
|
|
if model_choice == "o1":
|
|
if image and not pdf_content:
|
|
messages = [
|
|
{"role": "user", "content": [{"type": "image_url", "image_url": {"url": input_content}}]}
|
|
]
|
|
else:
|
|
messages = [
|
|
{"role": "user", "content": input_content}
|
|
]
|
|
elif model_choice == "o3-mini":
|
|
messages = [
|
|
{"role": "user", "content": input_content}
|
|
]
|
|
|
|
try:
|
|
|
|
response = openai.ChatCompletion.create(
|
|
model=model_choice,
|
|
messages=messages,
|
|
max_completion_tokens=2000
|
|
)
|
|
|
|
return response.choices[0].message.content
|
|
except Exception as e:
|
|
return f"Error calling OpenAI API: {str(e)}"
|
|
|
|
|
|
def get_base64_string_from_image(pil_image):
|
|
|
|
buffered = io.BytesIO()
|
|
pil_image.save(buffered, format="PNG")
|
|
img_bytes = buffered.getvalue()
|
|
base64_str = base64.b64encode(img_bytes).decode("utf-8")
|
|
return base64_str
|
|
|
|
|
|
def transcribe_audio(audio, openai_api_key):
|
|
if not openai_api_key:
|
|
return "Error: No API key provided."
|
|
|
|
openai.api_key = openai_api_key
|
|
|
|
try:
|
|
|
|
with open(audio, 'rb') as audio_file:
|
|
audio_file_content = audio_file.read()
|
|
|
|
|
|
audio_file_obj = io.BytesIO(audio_file_content)
|
|
audio_file_obj.name = 'audio.wav'
|
|
|
|
|
|
audio_file_transcription = openai.Audio.transcribe(file=audio_file_obj, model="whisper-1")
|
|
return audio_file_transcription.text
|
|
except Exception as e:
|
|
return f"Error transcribing audio: {str(e)}"
|
|
|
|
|
|
def chatbot(input_text, image, audio, pdf_file, openai_api_key, reasoning_effort, model_choice, pdf_content, num_quiz_questions, pdf_quiz_mode, history):
|
|
if history is None:
|
|
history = []
|
|
|
|
|
|
if audio:
|
|
input_text = transcribe_audio(audio, openai_api_key)
|
|
|
|
|
|
new_pdf_content = pdf_content
|
|
if pdf_file is not None:
|
|
new_pdf_content = extract_text_from_pdf(pdf_file)
|
|
|
|
|
|
if pdf_quiz_mode:
|
|
if new_pdf_content:
|
|
|
|
quiz_response = generate_mcq_quiz(new_pdf_content, int(num_quiz_questions), openai_api_key, model_choice)
|
|
history.append((f"👤: [Uploaded PDF for Quiz - {int(num_quiz_questions)} questions]", f"🤖: {quiz_response}"))
|
|
else:
|
|
history.append(("👤: [Attempted to generate quiz without PDF]", "🤖: Please upload a PDF file to generate quiz questions."))
|
|
else:
|
|
|
|
response = generate_response(input_text, image, new_pdf_content, openai_api_key, reasoning_effort, model_choice)
|
|
|
|
|
|
if input_text:
|
|
history.append((f"👤: {input_text}", f"🤖: {response}"))
|
|
elif image is not None:
|
|
history.append((f"👤: [Uploaded image]", f"🤖: {response}"))
|
|
elif pdf_file is not None:
|
|
history.append((f"👤: [Uploaded PDF]", f"🤖: {response}"))
|
|
else:
|
|
history.append((f"👤: [No input provided]", f"🤖: Please provide some input (text, image, or PDF) for me to respond to."))
|
|
|
|
return "", None, None, None, new_pdf_content, history
|
|
|
|
|
|
def clear_history():
|
|
return "", None, None, None, "", []
|
|
|
|
|
|
def process_pdf(pdf_file):
|
|
if pdf_file is None:
|
|
return ""
|
|
return extract_text_from_pdf(pdf_file)
|
|
|
|
|
|
def update_input_type(choice):
|
|
if choice == "Text":
|
|
return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value=False)
|
|
elif choice == "Image":
|
|
return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value=False)
|
|
elif choice == "Voice":
|
|
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(value=False)
|
|
elif choice == "PDF":
|
|
return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(value=False)
|
|
elif choice == "PDF(QUIZ)":
|
|
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True), gr.update(value=True)
|
|
|
|
|
|
custom_css = """
|
|
/* General body styles */
|
|
.gradio-container {
|
|
font-family: 'Arial', sans-serif;
|
|
background-color: #f0f4f8; /* Lighter blue-gray background */
|
|
color: #2d3748;;
|
|
}
|
|
/* Header styles */
|
|
.gradio-header {
|
|
background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
|
|
color: white;
|
|
padding: 20px;
|
|
text-align: center;
|
|
border-radius: 8px;
|
|
box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
|
|
animation: fadeIn 1s ease-out;
|
|
}
|
|
.gradio-header h1 {
|
|
font-size: 2.5rem;
|
|
}
|
|
.gradio-header h3 {
|
|
font-size: 1.2rem;
|
|
margin-top: 10px;
|
|
}
|
|
/* Chatbot container styles */
|
|
.gradio-chatbot {
|
|
background-color: #fff;
|
|
border-radius: 10px;
|
|
padding: 20px;
|
|
box-shadow: 0 6px 18px rgba(0, 0, 0, 0.1);
|
|
border-left: 4px solid #4a00e0; /* Accent border */
|
|
}
|
|
/* Input field styles */
|
|
.gradio-textbox, .gradio-dropdown, .gradio-image, .gradio-audio, .gradio-file, .gradio-slider {
|
|
border-radius: 8px;
|
|
border: 2px solid #e2e8f0;
|
|
background-color: #f8fafc;
|
|
}
|
|
.gradio-textbox:focus, .gradio-dropdown:focus, .gradio-image:focus, .gradio-audio:focus, .gradio-file:focus, .gradio-slider:focus {
|
|
border-color: #8e2de2;
|
|
box-shadow: 0 0 0 3px rgba(142, 45, 226, 0.2);
|
|
}
|
|
/* Button styles */
|
|
/* Send Button: Sky Blue */
|
|
#submit-btn {
|
|
background: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
|
|
color: white;
|
|
border: none;
|
|
border-radius: 8px;
|
|
padding: 10px 19px;
|
|
font-size: 1.1rem;
|
|
cursor: pointer;
|
|
transition: all 0.3s ease;
|
|
margin-left: auto;
|
|
margin-right: auto;
|
|
display: block;
|
|
margin-top: 10px;
|
|
}
|
|
#submit-btn:hover {
|
|
background: linear-gradient(135deg, #5b10f1 0%, #9f3ef3 100%); /* Slightly lighter */
|
|
box-shadow: 0 6px 8px rgba(74, 0, 224, 0.4);
|
|
}
|
|
#submit-btn:active {
|
|
transform: scale(0.95);
|
|
}
|
|
#clear-history {
|
|
background: linear-gradient(135deg, #e53e3e 0%, #f56565 100%); /* Red gradient */
|
|
color: white;
|
|
border: none;
|
|
border-radius: 8px;
|
|
padding: 10px 13px;
|
|
font-size: 1.1rem;
|
|
cursor: pointer;
|
|
transition: all 0.3s ease;
|
|
margin-top: 10px;
|
|
}
|
|
#clear-history:hover {
|
|
background: linear-gradient(135deg, #c53030 0%, #e53e3e 100%); /* Slightly darker red gradient on hover */
|
|
box-shadow: 0 6px 8px rgba(229, 62, 62, 0.4);
|
|
}
|
|
#clear-history:active {
|
|
transform: scale(0.95);
|
|
}
|
|
/* Input type selector buttons */
|
|
#input-type-group {
|
|
display: flex;
|
|
justify-content: center;
|
|
gap: 10px;
|
|
margin-bottom: 20px;
|
|
}
|
|
.input-type-btn {
|
|
background-color: #718096; /* Slate gray */
|
|
color: white;
|
|
border: none;
|
|
border-radius: 8px;
|
|
padding: 10px 15px;
|
|
font-size: 1rem;
|
|
cursor: pointer;
|
|
transition: all 0.3s ease;
|
|
}
|
|
.input-type-btn.selected {
|
|
background-color: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
|
|
}
|
|
.input-type-btn:hover {
|
|
background-color: #4a5568; /* Darker slate */
|
|
}
|
|
/* Chat history styles */
|
|
.gradio-chatbot .message {
|
|
margin-bottom: 10px;
|
|
}
|
|
.gradio-chatbot .user {
|
|
background-color: linear-gradient(135deg, #4a00e0 0%, #8e2de2 100%); /* Purple gradient */
|
|
color: white;
|
|
padding: 10px;
|
|
border-radius: 12px;
|
|
max-width: 70%;
|
|
animation: slideInUser 0.5s ease-out;
|
|
}
|
|
.gradio-chatbot .assistant {
|
|
background-color: #f0f4f8; /* Light blue-gray */
|
|
color: #2d3748;
|
|
padding: 10px;
|
|
border-radius: 12px;
|
|
max-width: 70%;
|
|
margin-left: auto;
|
|
animation: slideInAssistant 0.5s ease-out;
|
|
}
|
|
/* Animation keyframes */
|
|
@keyframes fadeIn {
|
|
0% { opacity: 0; }
|
|
100% { opacity: 1; }
|
|
}
|
|
@keyframes slideInUser {
|
|
0% { transform: translateX(-100%); }
|
|
100% { transform: translateX(0); }
|
|
}
|
|
@keyframes slideInAssistant {
|
|
0% { transform: translateX(100%); }
|
|
100% { transform: translateX(0); }
|
|
}
|
|
/* Mobile responsiveness */
|
|
@media (max-width: 768px) {
|
|
.gradio-header h1 {
|
|
font-size: 1.8rem;
|
|
}
|
|
.gradio-header h3 {
|
|
font-size: 1rem;
|
|
}
|
|
.gradio-chatbot {
|
|
max-height: 400px;
|
|
}
|
|
.gradio-textbox, .gradio-dropdown, .gradio-image, .gradio-audio, .gradio-file, .gradio-slider {
|
|
width: 100%;
|
|
}
|
|
#submit-btn, #clear-history {
|
|
width: 100%;
|
|
margin-left: 0;
|
|
}
|
|
}
|
|
"""
|
|
|
|
|
|
def create_interface():
|
|
with gr.Blocks(css=custom_css) as demo:
|
|
gr.Markdown("""
|
|
<div class="gradio-header">
|
|
<h1>Multimodal Chatbot (Text + Image + Voice + PDF + Quiz)</h1>
|
|
<h3>Interact with a chatbot using text, image, voice, or PDF inputs</h3>
|
|
</div>
|
|
""")
|
|
|
|
|
|
with gr.Accordion("Click to expand for details", open=False):
|
|
gr.Markdown("""
|
|
### Description:
|
|
This is a multimodal chatbot that can handle text, image, voice, PDF inputs, and generate quizzes from PDFs.
|
|
- You can ask questions or provide text, and the assistant will respond.
|
|
- You can upload an image, and the assistant will process it and answer questions about the image.
|
|
- Voice input is supported: You can upload or record an audio file, and it will be transcribed to text and sent to the assistant.
|
|
- PDF support: Upload a PDF and ask questions about its content.
|
|
- PDF Quiz: Upload a PDF and specify how many MCQ questions you want generated based on the content.
|
|
- Enter your OpenAI API key to start interacting with the model.
|
|
- You can use the 'Clear History' button to remove the conversation history.
|
|
- "o1" is for image, voice, PDF and text chat and "o3-mini" is for text, PDF and voice chat only.
|
|
### Reasoning Effort:
|
|
The reasoning effort controls how complex or detailed the assistant's answers should be.
|
|
- **Low**: Provides quick, concise answers with minimal reasoning or details.
|
|
- **Medium**: Offers a balanced response with a reasonable level of detail and thought.
|
|
- **High**: Produces more detailed, analytical, or thoughtful responses, requiring deeper reasoning.
|
|
""")
|
|
|
|
|
|
pdf_content = gr.State("")
|
|
|
|
with gr.Row():
|
|
openai_api_key = gr.Textbox(label="Enter OpenAI API Key", type="password", placeholder="sk-...", interactive=True)
|
|
|
|
|
|
with gr.Row():
|
|
input_type = gr.Radio(
|
|
["Text", "Image", "Voice", "PDF", "PDF(QUIZ)"],
|
|
label="Choose Input Type",
|
|
value="Text"
|
|
)
|
|
|
|
|
|
with gr.Row():
|
|
|
|
input_text = gr.Textbox(
|
|
label="Enter Text Question",
|
|
placeholder="Ask a question or provide text",
|
|
lines=2,
|
|
visible=True
|
|
)
|
|
|
|
|
|
image_input = gr.Image(
|
|
label="Upload an Image",
|
|
type="pil",
|
|
visible=False
|
|
)
|
|
|
|
|
|
audio_input = gr.Audio(
|
|
label="Upload or Record Audio",
|
|
type="filepath",
|
|
visible=False
|
|
)
|
|
|
|
|
|
pdf_input = gr.File(
|
|
label="Upload your PDF",
|
|
file_types=[".pdf"],
|
|
visible=False
|
|
)
|
|
|
|
|
|
quiz_questions_slider = gr.Slider(
|
|
minimum=1,
|
|
maximum=20,
|
|
value=5,
|
|
step=1,
|
|
label="Number of Quiz Questions",
|
|
visible=False
|
|
)
|
|
|
|
|
|
quiz_mode = gr.Checkbox(
|
|
label="Quiz Mode",
|
|
visible=False,
|
|
value=False
|
|
)
|
|
|
|
with gr.Row():
|
|
reasoning_effort = gr.Dropdown(
|
|
label="Reasoning Effort",
|
|
choices=["low", "medium", "high"],
|
|
value="medium"
|
|
)
|
|
model_choice = gr.Dropdown(
|
|
label="Select Model",
|
|
choices=["o1", "o3-mini"],
|
|
value="o1"
|
|
)
|
|
submit_btn = gr.Button("Ask!", elem_id="submit-btn")
|
|
clear_btn = gr.Button("Clear History", elem_id="clear-history")
|
|
|
|
chat_history = gr.Chatbot()
|
|
|
|
|
|
input_type.change(
|
|
fn=update_input_type,
|
|
inputs=[input_type],
|
|
outputs=[input_text, image_input, audio_input, pdf_input, quiz_questions_slider, quiz_mode]
|
|
)
|
|
|
|
|
|
pdf_input.change(
|
|
fn=process_pdf,
|
|
inputs=[pdf_input],
|
|
outputs=[pdf_content]
|
|
)
|
|
|
|
|
|
submit_btn.click(
|
|
fn=chatbot,
|
|
inputs=[
|
|
input_text,
|
|
image_input,
|
|
audio_input,
|
|
pdf_input,
|
|
openai_api_key,
|
|
reasoning_effort,
|
|
model_choice,
|
|
pdf_content,
|
|
quiz_questions_slider,
|
|
quiz_mode,
|
|
chat_history
|
|
],
|
|
outputs=[
|
|
input_text,
|
|
image_input,
|
|
audio_input,
|
|
pdf_input,
|
|
pdf_content,
|
|
chat_history
|
|
]
|
|
)
|
|
|
|
clear_btn.click(
|
|
fn=clear_history,
|
|
inputs=[],
|
|
outputs=[input_text, image_input, audio_input, pdf_input, pdf_content, chat_history]
|
|
)
|
|
|
|
return demo
|
|
|
|
|
|
if __name__ == "__main__":
|
|
demo = create_interface()
|
|
demo.launch() |