File size: 5,939 Bytes
fdcd573 911ed13 fdcd573 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import gradio as gr
import os
import fitz # PyMuPDF for PDF extraction
from pptx import Presentation
from docx import Document
from groq import Groq
# Initialize Groq Client
api_key = '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' # Use environment variable for the API key
client = Groq(api_key=api_key)
# File Extraction Functions
def extract_text_from_pdf(file_path):
pdf_text = ""
try:
pdf_file = fitz.open(file_path)
for page_num in range(pdf_file.page_count):
page = pdf_file.load_page(page_num)
pdf_text += page.get_text()
except Exception as e:
return f"Error reading PDF: {e}"
return pdf_text
def extract_text_from_ppt(file_path):
ppt_text = ""
try:
presentation = Presentation(file_path)
for slide in presentation.slides:
for shape in slide.shapes:
if hasattr(shape, 'text'):
ppt_text += shape.text + "\n"
except Exception as e:
return f"Error reading PPT: {e}"
return ppt_text
def extract_text_from_word(file_path):
doc_text = ""
try:
document = Document(file_path)
for paragraph in document.paragraphs:
doc_text += paragraph.text + "\n"
except Exception as e:
return f"Error reading Word file: {e}"
return doc_text
def process_files(file_paths):
text_data = ""
for file_path in file_paths:
if file_path.endswith(".pdf"):
text_data += extract_text_from_pdf(file_path)
elif file_path.endswith(".pptx"):
text_data += extract_text_from_ppt(file_path)
elif file_path.endswith(".docx"):
text_data += extract_text_from_word(file_path)
else:
text_data += f"Unsupported file format: {file_path}\n"
return text_data
# Generate MCQs and Subjective Questions Using Groq
def generate_questions(text, num_mcqs=5, num_subjective=2, difficulty_mcqs="medium", difficulty_subjective="medium", question_type="mix"):
try:
num_mcqs = min(num_mcqs, 40)
num_subjective = min(num_subjective, 20)
difficulty_levels = {
"easy": "simple questions with direct answers.",
"medium": "moderate complexity questions requiring reasoning.",
"hard": "challenging questions requiring deep understanding."
}
question_type_map = {
"reason": "Generate reasoning-based questions.",
"short": "Generate short-answer questions.",
"long": "Generate long-answer questions.",
"case study": "Generate case study-based questions.",
"mix": "Generate a mix of question types."
}
prompt = f"Generate {num_mcqs} multiple choice questions and {num_subjective} subjective questions from the following text: {text}. Include the correct answers for each question. The questions should be {difficulty_levels.get(difficulty_mcqs, 'medium')} for MCQs and {difficulty_levels.get(difficulty_subjective, 'medium')} for Subjective questions. {question_type_map.get(question_type, 'mix')}"
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": prompt}],
model="llama3-8b-8192",
)
response = chat_completion.choices[0].message.content.strip()
# Split response into MCQs and Subjective questions
mcqs, subjective = "", ""
is_subjective_section = False
for line in response.split("\n"):
if "**Subjective Questions**" in line:
is_subjective_section = True
if is_subjective_section:
subjective += line + "\n"
else:
mcqs += line + "\n"
return mcqs, subjective
except Exception as e:
return f"Error generating questions: {e}", ""
# Gradio Interface Function
def process_and_generate(file_paths, raw_text, num_mcqs, num_subjective, difficulty_mcqs, difficulty_subjective, question_type):
combined_text = ""
# Extract text from uploaded files
if file_paths:
extracted_text = process_files(file_paths)
if extracted_text.strip():
combined_text += extracted_text
# Add raw text if provided
if raw_text.strip():
if combined_text:
combined_text += "\n" + raw_text
else:
combined_text = raw_text
if not combined_text.strip():
return "No text provided to generate questions.", "No text provided to generate questions."
try:
mcqs, subjective = generate_questions(
combined_text, num_mcqs, num_subjective, difficulty_mcqs, difficulty_subjective, question_type
)
return mcqs, subjective
except Exception as e:
return f"Error generating questions: {e}", f"Error generating questions: {e}"
# Gradio Inputs and Outputs
inputs = [
gr.File(file_count="multiple", type="filepath", label="Upload Files (.pdf, .pptx, .docx)"),
gr.Textbox(lines=3, placeholder="Enter raw text here (Optional)...", label="Raw Text"),
gr.Slider(minimum=2, maximum=40, value=5, step=1, label="Number of MCQs (Max 40)"),
gr.Slider(minimum=2, maximum=20, value=2, step=1, label="Number of Subjective Questions (Max 20)"),
gr.Radio(["easy", "medium", "hard"], label="Select Difficulty Level for MCQs", value="medium"),
gr.Radio(["easy", "medium", "hard"], label="Select Difficulty Level for Subjective Questions", value="medium"),
gr.Radio(["reason", "short", "long", "case study", "mix"], label="Select Type of Question", value="mix")
]
outputs = [
gr.Textbox(label="Generated MCQs", lines=10),
gr.Textbox(label="Generated Subjective Questions", lines=10)
]
# Gradio Interface
gr.Interface(
fn=process_and_generate,
inputs=inputs,
outputs=outputs,
title="MCQ & Subjective Question Generator",
live=False
).launch()
|