KubraBashir's picture
Update app.py
911ed13 verified
raw
history blame
5.94 kB
import gradio as gr
import os
import fitz # PyMuPDF for PDF extraction
from pptx import Presentation
from docx import Document
from groq import Groq
# Initialize Groq Client
api_key = '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' # Use environment variable for the API key
client = Groq(api_key=api_key)
# File Extraction Functions
def extract_text_from_pdf(file_path):
pdf_text = ""
try:
pdf_file = fitz.open(file_path)
for page_num in range(pdf_file.page_count):
page = pdf_file.load_page(page_num)
pdf_text += page.get_text()
except Exception as e:
return f"Error reading PDF: {e}"
return pdf_text
def extract_text_from_ppt(file_path):
ppt_text = ""
try:
presentation = Presentation(file_path)
for slide in presentation.slides:
for shape in slide.shapes:
if hasattr(shape, 'text'):
ppt_text += shape.text + "\n"
except Exception as e:
return f"Error reading PPT: {e}"
return ppt_text
def extract_text_from_word(file_path):
doc_text = ""
try:
document = Document(file_path)
for paragraph in document.paragraphs:
doc_text += paragraph.text + "\n"
except Exception as e:
return f"Error reading Word file: {e}"
return doc_text
def process_files(file_paths):
text_data = ""
for file_path in file_paths:
if file_path.endswith(".pdf"):
text_data += extract_text_from_pdf(file_path)
elif file_path.endswith(".pptx"):
text_data += extract_text_from_ppt(file_path)
elif file_path.endswith(".docx"):
text_data += extract_text_from_word(file_path)
else:
text_data += f"Unsupported file format: {file_path}\n"
return text_data
# Generate MCQs and Subjective Questions Using Groq
def generate_questions(text, num_mcqs=5, num_subjective=2, difficulty_mcqs="medium", difficulty_subjective="medium", question_type="mix"):
try:
num_mcqs = min(num_mcqs, 40)
num_subjective = min(num_subjective, 20)
difficulty_levels = {
"easy": "simple questions with direct answers.",
"medium": "moderate complexity questions requiring reasoning.",
"hard": "challenging questions requiring deep understanding."
}
question_type_map = {
"reason": "Generate reasoning-based questions.",
"short": "Generate short-answer questions.",
"long": "Generate long-answer questions.",
"case study": "Generate case study-based questions.",
"mix": "Generate a mix of question types."
}
prompt = f"Generate {num_mcqs} multiple choice questions and {num_subjective} subjective questions from the following text: {text}. Include the correct answers for each question. The questions should be {difficulty_levels.get(difficulty_mcqs, 'medium')} for MCQs and {difficulty_levels.get(difficulty_subjective, 'medium')} for Subjective questions. {question_type_map.get(question_type, 'mix')}"
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": prompt}],
model="llama3-8b-8192",
)
response = chat_completion.choices[0].message.content.strip()
# Split response into MCQs and Subjective questions
mcqs, subjective = "", ""
is_subjective_section = False
for line in response.split("\n"):
if "**Subjective Questions**" in line:
is_subjective_section = True
if is_subjective_section:
subjective += line + "\n"
else:
mcqs += line + "\n"
return mcqs, subjective
except Exception as e:
return f"Error generating questions: {e}", ""
# Gradio Interface Function
def process_and_generate(file_paths, raw_text, num_mcqs, num_subjective, difficulty_mcqs, difficulty_subjective, question_type):
combined_text = ""
# Extract text from uploaded files
if file_paths:
extracted_text = process_files(file_paths)
if extracted_text.strip():
combined_text += extracted_text
# Add raw text if provided
if raw_text.strip():
if combined_text:
combined_text += "\n" + raw_text
else:
combined_text = raw_text
if not combined_text.strip():
return "No text provided to generate questions.", "No text provided to generate questions."
try:
mcqs, subjective = generate_questions(
combined_text, num_mcqs, num_subjective, difficulty_mcqs, difficulty_subjective, question_type
)
return mcqs, subjective
except Exception as e:
return f"Error generating questions: {e}", f"Error generating questions: {e}"
# Gradio Inputs and Outputs
inputs = [
gr.File(file_count="multiple", type="filepath", label="Upload Files (.pdf, .pptx, .docx)"),
gr.Textbox(lines=3, placeholder="Enter raw text here (Optional)...", label="Raw Text"),
gr.Slider(minimum=2, maximum=40, value=5, step=1, label="Number of MCQs (Max 40)"),
gr.Slider(minimum=2, maximum=20, value=2, step=1, label="Number of Subjective Questions (Max 20)"),
gr.Radio(["easy", "medium", "hard"], label="Select Difficulty Level for MCQs", value="medium"),
gr.Radio(["easy", "medium", "hard"], label="Select Difficulty Level for Subjective Questions", value="medium"),
gr.Radio(["reason", "short", "long", "case study", "mix"], label="Select Type of Question", value="mix")
]
outputs = [
gr.Textbox(label="Generated MCQs", lines=10),
gr.Textbox(label="Generated Subjective Questions", lines=10)
]
# Gradio Interface
gr.Interface(
fn=process_and_generate,
inputs=inputs,
outputs=outputs,
title="MCQ & Subjective Question Generator",
live=False
).launch()