KubraBashir
commited on
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import os
|
3 |
+
import fitz # PyMuPDF for PDF extraction
|
4 |
+
from pptx import Presentation
|
5 |
+
from docx import Document
|
6 |
+
from groq import Groq
|
7 |
+
|
8 |
+
# Initialize Groq Client
|
9 |
+
api_key = os.environ.get("GROQ_API_KEY") # Use environment variable for the API key
|
10 |
+
client = Groq(api_key=api_key)
|
11 |
+
|
12 |
+
# File Extraction Functions
|
13 |
+
def extract_text_from_pdf(file_path):
|
14 |
+
pdf_text = ""
|
15 |
+
try:
|
16 |
+
pdf_file = fitz.open(file_path)
|
17 |
+
for page_num in range(pdf_file.page_count):
|
18 |
+
page = pdf_file.load_page(page_num)
|
19 |
+
pdf_text += page.get_text()
|
20 |
+
except Exception as e:
|
21 |
+
return f"Error reading PDF: {e}"
|
22 |
+
return pdf_text
|
23 |
+
|
24 |
+
def extract_text_from_ppt(file_path):
|
25 |
+
ppt_text = ""
|
26 |
+
try:
|
27 |
+
presentation = Presentation(file_path)
|
28 |
+
for slide in presentation.slides:
|
29 |
+
for shape in slide.shapes:
|
30 |
+
if hasattr(shape, 'text'):
|
31 |
+
ppt_text += shape.text + "\n"
|
32 |
+
except Exception as e:
|
33 |
+
return f"Error reading PPT: {e}"
|
34 |
+
return ppt_text
|
35 |
+
|
36 |
+
def extract_text_from_word(file_path):
|
37 |
+
doc_text = ""
|
38 |
+
try:
|
39 |
+
document = Document(file_path)
|
40 |
+
for paragraph in document.paragraphs:
|
41 |
+
doc_text += paragraph.text + "\n"
|
42 |
+
except Exception as e:
|
43 |
+
return f"Error reading Word file: {e}"
|
44 |
+
return doc_text
|
45 |
+
|
46 |
+
def process_files(file_paths):
|
47 |
+
text_data = ""
|
48 |
+
for file_path in file_paths:
|
49 |
+
if file_path.endswith(".pdf"):
|
50 |
+
text_data += extract_text_from_pdf(file_path)
|
51 |
+
elif file_path.endswith(".pptx"):
|
52 |
+
text_data += extract_text_from_ppt(file_path)
|
53 |
+
elif file_path.endswith(".docx"):
|
54 |
+
text_data += extract_text_from_word(file_path)
|
55 |
+
else:
|
56 |
+
text_data += f"Unsupported file format: {file_path}\n"
|
57 |
+
return text_data
|
58 |
+
|
59 |
+
# Generate MCQs and Subjective Questions Using Groq
|
60 |
+
def generate_questions(text, num_mcqs=5, num_subjective=2, difficulty_mcqs="medium", difficulty_subjective="medium", question_type="mix"):
|
61 |
+
try:
|
62 |
+
num_mcqs = min(num_mcqs, 40)
|
63 |
+
num_subjective = min(num_subjective, 20)
|
64 |
+
|
65 |
+
difficulty_levels = {
|
66 |
+
"easy": "simple questions with direct answers.",
|
67 |
+
"medium": "moderate complexity questions requiring reasoning.",
|
68 |
+
"hard": "challenging questions requiring deep understanding."
|
69 |
+
}
|
70 |
+
|
71 |
+
question_type_map = {
|
72 |
+
"reason": "Generate reasoning-based questions.",
|
73 |
+
"short": "Generate short-answer questions.",
|
74 |
+
"long": "Generate long-answer questions.",
|
75 |
+
"case study": "Generate case study-based questions.",
|
76 |
+
"mix": "Generate a mix of question types."
|
77 |
+
}
|
78 |
+
|
79 |
+
prompt = f"Generate {num_mcqs} multiple choice questions and {num_subjective} subjective questions from the following text: {text}. Include the correct answers for each question. The questions should be {difficulty_levels.get(difficulty_mcqs, 'medium')} for MCQs and {difficulty_levels.get(difficulty_subjective, 'medium')} for Subjective questions. {question_type_map.get(question_type, 'mix')}"
|
80 |
+
|
81 |
+
chat_completion = client.chat.completions.create(
|
82 |
+
messages=[{"role": "user", "content": prompt}],
|
83 |
+
model="llama3-8b-8192",
|
84 |
+
)
|
85 |
+
|
86 |
+
response = chat_completion.choices[0].message.content.strip()
|
87 |
+
|
88 |
+
# Split response into MCQs and Subjective questions
|
89 |
+
mcqs, subjective = "", ""
|
90 |
+
is_subjective_section = False
|
91 |
+
|
92 |
+
for line in response.split("\n"):
|
93 |
+
if "**Subjective Questions**" in line:
|
94 |
+
is_subjective_section = True
|
95 |
+
if is_subjective_section:
|
96 |
+
subjective += line + "\n"
|
97 |
+
else:
|
98 |
+
mcqs += line + "\n"
|
99 |
+
|
100 |
+
return mcqs, subjective
|
101 |
+
|
102 |
+
except Exception as e:
|
103 |
+
return f"Error generating questions: {e}", ""
|
104 |
+
|
105 |
+
# Gradio Interface Function
|
106 |
+
def process_and_generate(file_paths, raw_text, num_mcqs, num_subjective, difficulty_mcqs, difficulty_subjective, question_type):
|
107 |
+
combined_text = ""
|
108 |
+
|
109 |
+
# Extract text from uploaded files
|
110 |
+
if file_paths:
|
111 |
+
extracted_text = process_files(file_paths)
|
112 |
+
if extracted_text.strip():
|
113 |
+
combined_text += extracted_text
|
114 |
+
|
115 |
+
# Add raw text if provided
|
116 |
+
if raw_text.strip():
|
117 |
+
if combined_text:
|
118 |
+
combined_text += "\n" + raw_text
|
119 |
+
else:
|
120 |
+
combined_text = raw_text
|
121 |
+
|
122 |
+
if not combined_text.strip():
|
123 |
+
return "No text provided to generate questions.", "No text provided to generate questions."
|
124 |
+
|
125 |
+
try:
|
126 |
+
mcqs, subjective = generate_questions(
|
127 |
+
combined_text, num_mcqs, num_subjective, difficulty_mcqs, difficulty_subjective, question_type
|
128 |
+
)
|
129 |
+
return mcqs, subjective
|
130 |
+
except Exception as e:
|
131 |
+
return f"Error generating questions: {e}", f"Error generating questions: {e}"
|
132 |
+
|
133 |
+
# Gradio Inputs and Outputs
|
134 |
+
inputs = [
|
135 |
+
gr.File(file_count="multiple", type="filepath", label="Upload Files (.pdf, .pptx, .docx)"),
|
136 |
+
gr.Textbox(lines=3, placeholder="Enter raw text here (Optional)...", label="Raw Text"),
|
137 |
+
gr.Slider(minimum=2, maximum=40, value=5, step=1, label="Number of MCQs (Max 40)"),
|
138 |
+
gr.Slider(minimum=2, maximum=20, value=2, step=1, label="Number of Subjective Questions (Max 20)"),
|
139 |
+
gr.Radio(["easy", "medium", "hard"], label="Select Difficulty Level for MCQs", value="medium"),
|
140 |
+
gr.Radio(["easy", "medium", "hard"], label="Select Difficulty Level for Subjective Questions", value="medium"),
|
141 |
+
gr.Radio(["reason", "short", "long", "case study", "mix"], label="Select Type of Question", value="mix")
|
142 |
+
]
|
143 |
+
|
144 |
+
outputs = [
|
145 |
+
gr.Textbox(label="Generated MCQs", lines=10),
|
146 |
+
gr.Textbox(label="Generated Subjective Questions", lines=10)
|
147 |
+
]
|
148 |
+
|
149 |
+
# Gradio Interface
|
150 |
+
gr.Interface(
|
151 |
+
fn=process_and_generate,
|
152 |
+
inputs=inputs,
|
153 |
+
outputs=outputs,
|
154 |
+
title="MCQ & Subjective Question Generator",
|
155 |
+
live=False
|
156 |
+
).launch()
|