KubraBashir's picture
Update app.py
1e3edd6 verified
import gradio as gr
import os
import fitz # PyMuPDF for PDF extraction
import requests
from bs4 import BeautifulSoup
from pptx import Presentation
from docx import Document # For Word files
from groq import Groq # Assuming Groq is available as a Python library
# Initialize Groq Client using environment variable
api_key = os.getenv('MCQs') # Retrieve API key from environment
if api_key:
client = Groq(api_key=api_key)
else:
raise ValueError("API Key not found in environment variables!")
# Step 1: File Extraction Functions
def extract_text_from_pdf(file):
pdf_text = ""
try:
pdf_file = fitz.open(file.name)
for page_num in range(pdf_file.page_count):
page = pdf_file.load_page(page_num)
pdf_text += page.get_text()
except Exception as e:
return f"Error reading PDF: {e}"
return pdf_text
def extract_text_from_ppt(file):
ppt_text = ""
try:
presentation = Presentation(file.name)
for slide in presentation.slides:
for shape in slide.shapes:
if hasattr(shape, 'text'):
ppt_text += shape.text + "\n"
except Exception as e:
return f"Error reading PPT: {e}"
return ppt_text
def extract_text_from_word(file):
doc_text = ""
try:
document = Document(file.name)
for paragraph in document.paragraphs:
doc_text += paragraph.text + "\n"
except Exception as e:
return f"Error reading Word file: {e}"
return doc_text
def extract_text_from_url(url):
page_text = ""
try:
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
page_text = soup.get_text(separator="\n", strip=True)
except Exception as e:
return f"Error reading URL: {e}"
return page_text
def process_files(files, url):
text_data = ""
for file in files:
if file.name.endswith(".pdf"):
text_data += extract_text_from_pdf(file)
elif file.name.endswith(".pptx"):
text_data += extract_text_from_ppt(file)
elif file.name.endswith(".docx"):
text_data += extract_text_from_word(file)
else:
text_data += "Unsupported file format: " + file.name + "\n" # Inform user of unsupported format
if url:
text_data += extract_text_from_url(url)
return text_data
# Step 2: Generate MCQs and Subjective Questions Using Groq
def generate_questions(text, num_mcqs=5, num_subjective=2, difficulty_mcqs="medium", difficulty_subjective="medium"):
try:
num_mcqs = min(num_mcqs, 40) # Limit MCQs to 40
num_subjective = min(num_subjective, 20) # Limit Subjective Questions to 20
difficulty_levels = {
"easy": "simple questions with direct answers.",
"medium": "moderate complexity questions requiring reasoning.",
"hard": "challenging questions requiring deep understanding."
}
prompt = f"Generate {num_mcqs} multiple choice questions and {num_subjective} subjective questions from the following text: {text}. Include the correct answers for each question. The questions should be {difficulty_levels.get(difficulty_mcqs, 'medium')} for MCQs and {difficulty_levels.get(difficulty_subjective, 'medium')} for Subjective questions."
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": prompt}],
model="llama3-8b-8192",
)
response = chat_completion.choices[0].message.content.strip()
# Split response based on question types
mcqs = ""
subjective = ""
# Initialize the flag for the subjective section
is_subjective_section = False
# Split the MCQs and Subjective questions based on content patterns
lines = response.split("\n")
for line in lines:
if line.strip().startswith("**Subjective Questions**"): # Identify the start of the subjective section
is_subjective_section = True
subjective += line + "\n" # Add the header to the subjective section as well
elif is_subjective_section:
subjective += line + "\n"
else:
mcqs += line + "\n"
return mcqs, subjective
except Exception as e:
return f"Error generating questions: {e}", ""
# Step 3: Gradio Interface
def process_and_generate(files, raw_text, url, num_mcqs=5, num_subjective=2, difficulty_mcqs="medium", difficulty_subjective="medium"):
extracted_text = ""
# Extract and process all input files
if files:
for file in files:
if file.name.endswith(".pdf"):
extracted_text += extract_text_from_pdf(file) + "\n"
elif file.name.endswith(".pptx"):
extracted_text += extract_text_from_ppt(file) + "\n"
elif file.name.endswith(".docx"):
extracted_text += extract_text_from_word(file) + "\n"
else:
extracted_text += f"Unsupported file format: {file.name}\n"
# Append raw text input if provided
if raw_text:
extracted_text += raw_text + "\n"
# Extract text from URL if provided
if url:
extracted_text += extract_text_from_url(url) + "\n"
# Check if there's any extracted text to process
if not extracted_text.strip():
return "No valid input provided to generate questions.", ""
# Generate questions from the extracted text
mcqs, subjective = generate_questions(extracted_text, num_mcqs, num_subjective, difficulty_mcqs, difficulty_subjective)
return mcqs, subjective
# Gradio Inputs
inputs = [
gr.File(file_count="multiple", type="filepath"), # Corrected to 'filepath'
gr.Textbox(lines=2, placeholder="Enter raw text here...", label="Raw Text (Optional)"),
gr.Textbox(lines=1, placeholder="Enter URL here...", label="URL (Optional)"),
gr.Slider(minimum=1, maximum=40, value=5, step=1, label="Number of MCQs (Max 40)"),
gr.Slider(minimum=1, maximum=20, value=2, step=1, label="Number of Subjective Questions (Max 20)"),
gr.Radio(["easy", "medium", "hard"], label="Select Difficulty Level for MCQs", value="medium"),
gr.Radio(["easy", "medium", "hard"], label="Select Difficulty Level for Subjective Questions", value="medium")
]
# Outputs (Separate outputs for MCQs and Subjective Questions)
mcq_output = gr.Textbox(label="Generated MCQs", lines=10) # Display MCQs in a larger box
subjective_output = gr.Textbox(label="Generated Subjective Questions", lines=10) # Display subjective questions in a larger box
# Launch Gradio Interface with separate outputs
gr.Interface(
fn=process_and_generate,
inputs=inputs,
outputs=[mcq_output, subjective_output], # Only MCQs and Subjective Questions
live=False,
title="MCQ & Subjective Question Generator",
theme="default"
).launch(share=True) # Share link for easy access