AruniAnkur commited on
Commit
5aca58a
·
verified ·
1 Parent(s): 05b526e

Upload 2 files

Browse files
Files changed (2) hide show
  1. functionbloom.py +9 -4
  2. main.py +476 -0
functionbloom.py CHANGED
@@ -106,7 +106,7 @@ def get_bloom_taxonomy_scores(question: str) -> Dict[str, float]:
106
  return default_scores
107
 
108
 
109
- def generate_ai_response(api_key, assistant_context, user_query, role_description, response_instructions, bloom_taxonomy_weights, num_questions, question_length, include_numericals):
110
  try:
111
  url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key={api_key}"
112
 
@@ -123,6 +123,8 @@ def generate_ai_response(api_key, assistant_context, user_query, role_descriptio
123
  **Role**: {role_description}
124
 
125
  **Context**: {assistant_context}
 
 
126
 
127
  **Instructions**: {response_instructions}
128
  Question Length Requirement: {length_guidelines[question_length]}
@@ -221,7 +223,7 @@ def generate_pdf(questions, filename="questions.pdf"):
221
  st.error(f"Error generating PDF: {e}")
222
  return None
223
 
224
- def process_pdf_and_generate_questions(pdf_source, uploaded_file, api_key, role_description, response_instructions, bloom_taxonomy_weights, num_questions, question_length, include_numericals):
225
  try:
226
 
227
  pdf_path = get_pdf_path(pdf_source, uploaded_file)
@@ -245,7 +247,8 @@ def process_pdf_and_generate_questions(pdf_source, uploaded_file, api_key, role_
245
  normalized_bloom_weights,
246
  num_questions,
247
  question_length,
248
- include_numericals
 
249
  )
250
 
251
  # Clean up temporary PDF file
@@ -385,4 +388,6 @@ def sendtogemini(inputpath, question):
385
  d['question'] = i
386
  d['score'] = predict_with_loaded_model(i)
387
  data.append(d)
388
- return data
 
 
 
106
  return default_scores
107
 
108
 
109
+ def generate_ai_response(api_key, assistant_context, user_query, role_description, response_instructions, bloom_taxonomy_weights, num_questions, question_length, include_numericals, user_input):
110
  try:
111
  url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key={api_key}"
112
 
 
123
  **Role**: {role_description}
124
 
125
  **Context**: {assistant_context}
126
+
127
+ **User Query**: {user_input}
128
 
129
  **Instructions**: {response_instructions}
130
  Question Length Requirement: {length_guidelines[question_length]}
 
223
  st.error(f"Error generating PDF: {e}")
224
  return None
225
 
226
+ def process_pdf_and_generate_questions(pdf_source, uploaded_file, api_key, role_description, response_instructions, bloom_taxonomy_weights, num_questions, question_length, include_numericals, user_input):
227
  try:
228
 
229
  pdf_path = get_pdf_path(pdf_source, uploaded_file)
 
247
  normalized_bloom_weights,
248
  num_questions,
249
  question_length,
250
+ include_numericals,
251
+ user_input
252
  )
253
 
254
  # Clean up temporary PDF file
 
388
  d['question'] = i
389
  d['score'] = predict_with_loaded_model(i)
390
  data.append(d)
391
+ return data
392
+
393
+
main.py ADDED
@@ -0,0 +1,476 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional, Dict
2
+ import streamlit as st
3
+ import os
4
+ from dotenv import load_dotenv
5
+ import torch
6
+ from transformers import DistilBertForSequenceClassification, DistilBertTokenizer
7
+ from torch.nn.functional import softmax
8
+ from doctr.models import ocr_predictor
9
+ from doctr.io import DocumentFile
10
+ from functionbloom import save_uploaded_file, get_pdf_path, extract_text_pymupdf, get_bloom_taxonomy_scores,generate_ai_response,normalize_bloom_weights, generate_pdf,process_pdf_and_generate_questions,get_bloom_taxonomy_details
11
+ from functionbloom import predict_with_loaded_model, process_document, sendtogemini
12
+
13
+
14
+ load_dotenv()
15
+
16
+ model = DistilBertForSequenceClassification.from_pretrained('./fine_tuned_distilbert')
17
+ tokenizer = DistilBertTokenizer.from_pretrained('./fine_tuned_distilbert')
18
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
19
+ model.to(device)
20
+ mapping = {"Remembering": 0, "Understanding": 1, "Applying": 2, "Analyzing": 3, "Evaluating": 4, "Creating": 5}
21
+ reverse_mapping = {v: k for k, v in mapping.items()}
22
+ modelocr = ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True)
23
+
24
+ def main():
25
+ st.set_page_config(page_title="Academic Paper Tool", page_icon="📝", layout="wide")
26
+
27
+ # Tabs for different functionalities
28
+ st.markdown("""
29
+ <style>
30
+ .stTabs [data-baseweb="tab"] {
31
+ margin-bottom: 1rem;
32
+ flex: 1;
33
+ justify-content: center;
34
+ }
35
+ .stTabs [data-baseweb="tab-list"] button [data-testid="stMarkdownContainer"] p {
36
+ font-size: 2rem;
37
+ padding: 0 2rem;
38
+ font-weight: bold;
39
+ margin: 0;
40
+ }
41
+
42
+ /* Information Button Styling */
43
+ .info-button {
44
+ background-color: #f0f2f6;
45
+ border: 1px solid #4a6cf7;
46
+ border-radius: 50%;
47
+ width: 24px;
48
+ height: 24px;
49
+ display: inline-flex;
50
+ align-items: center;
51
+ justify-content: center;
52
+ cursor: pointer;
53
+ margin-left: 8px;
54
+ font-weight: bold;
55
+ color: #4a6cf7;
56
+ }
57
+
58
+ /* Modal Styling */
59
+ .modal {
60
+ display: none;
61
+ position: fixed;
62
+ z-index: 1000;
63
+ left: 0;
64
+ top: 0;
65
+ width: 100%;
66
+ height: 100%;
67
+ overflow: auto;
68
+ background-color: rgba(0,0,0,0.4);
69
+ }
70
+
71
+ .modal-content {
72
+ background-color: #fefefe;
73
+ margin: 15% auto;
74
+ padding: 20px;
75
+ border: 1px solid #888;
76
+ width: 80%;
77
+ max-width: 500px;
78
+ border-radius: 10px;
79
+ box-shadow: 0 4px 6px rgba(0,0,0,0.1);
80
+ }
81
+
82
+ .close-button {
83
+ color: #aaa;
84
+ float: right;
85
+ font-size: 28px;
86
+ font-weight: bold;
87
+ cursor: pointer;
88
+ }
89
+
90
+ .close-button:hover,
91
+ .close-button:focus {
92
+ color: black;
93
+ text-decoration: none;
94
+ cursor: pointer;
95
+ }
96
+
97
+ /* Question Container Styling */
98
+ .question-container {
99
+ display: flex;
100
+ align-items: start;
101
+ gap: 10px;
102
+ margin-bottom: 10px;
103
+ }
104
+
105
+ /* Info Button Styling */
106
+ .info-button {
107
+ background-color: #f0f2f6;
108
+ border: 1px solid #4a6cf7;
109
+ border-radius: 50%;
110
+ width: 24px;
111
+ height: 24px;
112
+ display: inline-flex;
113
+ align-items: center;
114
+ justify-content: center;
115
+ cursor: pointer;
116
+ font-weight: bold;
117
+ color: #4a6cf7;
118
+ flex-shrink: 0;
119
+ font-size: 14px;
120
+ }
121
+
122
+ .info-button:hover {
123
+ background-color: #4a6cf7;
124
+ color: white;
125
+ }
126
+
127
+ /* Modal Styling */
128
+ .modal {
129
+ display: none;
130
+ position: fixed;
131
+ z-index: 9999;
132
+ left: 0;
133
+ top: 0;
134
+ width: 100%;
135
+ height: 100%;
136
+ background-color: rgba(0,0,0,0.4);
137
+ }
138
+
139
+ .modal-content {
140
+ background-color: #fefefe;
141
+ margin: 15% auto;
142
+ padding: 20px;
143
+ border: 1px solid #888;
144
+ width: 80%;
145
+ max-width: 500px;
146
+ border-radius: 10px;
147
+ box-shadow: 0 4px 6px rgba(0,0,0,0.1);
148
+ position: relative;
149
+ }
150
+
151
+ .close-button {
152
+ position: absolute;
153
+ right: 10px;
154
+ top: 5px;
155
+ color: #aaa;
156
+ font-size: 28px;
157
+ font-weight: bold;
158
+ cursor: pointer;
159
+ }
160
+
161
+ .close-button:hover,
162
+ .close-button:focus {
163
+ color: black;
164
+ text-decoration: none;
165
+ cursor: pointer;
166
+ }
167
+ </style>
168
+ """, unsafe_allow_html=True)
169
+
170
+ tab1, tab2 = st.tabs(["Question Generator", "Paper Scorer"])
171
+
172
+ if 'totalscore' not in st.session_state:
173
+ st.session_state.totalscore = None
174
+ if 'show_details' not in st.session_state:
175
+ st.session_state.show_details = False
176
+ if 'question_scores' not in st.session_state:
177
+ st.session_state.question_scores = {}
178
+
179
+ # Question Generator Tab
180
+ with tab1:
181
+ st.markdown("<h1 style='font-size: 28px;'>🎓 Academic Paper Question Generator</h1>", unsafe_allow_html=True)
182
+ st.markdown("Generate insightful questions from academic papers using Bloom's Taxonomy")
183
+
184
+ # Initialize session state variables with defaults
185
+ if 'pdf_source_type' not in st.session_state:
186
+ st.session_state.pdf_source_type = "URL"
187
+ if 'pdf_url' not in st.session_state:
188
+ st.session_state.pdf_url = ""
189
+ if 'uploaded_file' not in st.session_state:
190
+ st.session_state.uploaded_file = None
191
+ if 'questions' not in st.session_state:
192
+ st.session_state.questions = []
193
+ if 'accepted_questions' not in st.session_state:
194
+ st.session_state.accepted_questions = []
195
+
196
+ # API Configuration
197
+ api_key = os.getenv('GEMINI_API_KEY')
198
+
199
+ # Main form for PDF and question generation
200
+ with st.form(key='pdf_generation_form'):
201
+ st.subheader("PDF Source")
202
+
203
+ st.session_state.pdf_url = st.text_input(
204
+ "Enter the URL of the PDF",
205
+ value=st.session_state.pdf_url,
206
+ key="pdf_url_input"
207
+ )
208
+
209
+ st.markdown("<h4 style='text-align: center;'>OR</h4>", unsafe_allow_html=True)
210
+
211
+ st.session_state.uploaded_file = st.file_uploader(
212
+ "Upload a PDF file",
213
+ type=['pdf'],
214
+ key="pdf_file_upload"
215
+ )
216
+
217
+ st.session_state.user_input=st.text_area("Enter your query here", key="input", height=100)
218
+
219
+ # Question Length Selection
220
+ question_length = st.select_slider(
221
+ "Select Question Length",
222
+ options=["Short", "Medium", "Long"],
223
+ value="Medium",
224
+ help="Short: 10-15 words, Medium: 20-25 words, Long: 30-40 words"
225
+ )
226
+
227
+ st.session_state.include_numericals = st.checkbox("Include Numericals", key="include_numericals_checkbox")
228
+
229
+ # Bloom's Taxonomy Weights
230
+ st.subheader("Adjust Bloom's Taxonomy Weights")
231
+ col1, col2, col3 = st.columns(3)
232
+
233
+ with col1:
234
+ knowledge = st.slider("Knowledge: Remembering", 0, 100, 20, key='knowledge_slider')
235
+ application = st.slider("Applying: Using abstractions in concrete situations", 0, 100, 20, key='application_slider')
236
+
237
+ with col2:
238
+ comprehension = st.slider("Understanding: Explaining the meaning of information", 0, 100, 20, key='comprehension_slider')
239
+ analysis = st.slider("Analyzing: Breaking down a whole into component parts", 0, 100, 20, key='analysis_slider')
240
+
241
+ with col3:
242
+ synthesis = st.slider("Creating: Putting parts together to form a new and integrated whole", 0, 100, 10, key='synthesis_slider')
243
+ evaluation = st.slider("Evaluation: Making and defending judgments based on internal evidence or external criteria", 0, 100, 10, key='evaluation_slider')
244
+
245
+ # Collect the Bloom's Taxonomy weights
246
+ bloom_taxonomy_weights = {
247
+ "Knowledge": knowledge,
248
+ "Comprehension": comprehension,
249
+ "Application": application,
250
+ "Analysis": analysis,
251
+ "Synthesis": synthesis,
252
+ "Evaluation": evaluation
253
+ }
254
+
255
+ # Number of questions
256
+ num_questions = st.slider("How many questions would you like to generate?", min_value=1, max_value=20, value=5, key='num_questions_slider')
257
+
258
+ # Submit button within the form
259
+ submit_button = st.form_submit_button(label='Generate Questions')
260
+
261
+ # Process form submission
262
+ if submit_button:
263
+ # Validate API key
264
+ if not api_key:
265
+ st.error("Please enter a valid Gemini API key.")
266
+ # Validate PDF source
267
+ elif not st.session_state.pdf_url and not st.session_state.uploaded_file:
268
+ st.error("Please enter a PDF URL or upload a PDF file.")
269
+ else:
270
+ # Normalize the Bloom's weights
271
+ normalized_bloom_weights = normalize_bloom_weights(bloom_taxonomy_weights)
272
+
273
+ st.info("Normalized Bloom's Taxonomy Weights:")
274
+ st.json(normalized_bloom_weights)
275
+
276
+ # Role and instructions for the AI
277
+ role_description = "You are a question-generating AI agent, given context and instruction, you need to generate questions from the context."
278
+ response_instructions = "Please generate questions that are clear and relevant to the content of the paper. Generate questions which are separated by new lines, without any numbering or additional context."
279
+
280
+ # Generate questions
281
+ with st.spinner('Generating questions...'):
282
+ st.session_state.questions = process_pdf_and_generate_questions(
283
+ pdf_source=st.session_state.pdf_url if st.session_state.pdf_url else None,
284
+ uploaded_file=st.session_state.uploaded_file if st.session_state.uploaded_file else None,
285
+ api_key=api_key,
286
+ role_description=role_description,
287
+ response_instructions=response_instructions,
288
+ bloom_taxonomy_weights=normalized_bloom_weights,
289
+ num_questions=num_questions,
290
+ question_length=question_length,
291
+ include_numericals=st.session_state.include_numericals,
292
+ user_input=st.session_state.user_input
293
+ )
294
+
295
+ if st.session_state.questions:
296
+ st.header("Generated Questions")
297
+
298
+ # Create a form for question management to prevent reload
299
+ with st.form(key='questions_form'):
300
+ for idx, question in enumerate(st.session_state.questions, 1):
301
+ cols = st.columns([4, 1]) # Create two columns
302
+
303
+ with cols[0]:
304
+ # Display the question
305
+ st.write(f"Q{idx}: {question}")
306
+
307
+ # Add info button using Streamlit's expander
308
+ with st.expander("Show Bloom's Taxonomy Details"):
309
+ taxonomy_details = get_bloom_taxonomy_details(st.session_state.question_scores.get(question))
310
+ st.text(taxonomy_details)
311
+
312
+ # Use radio buttons for selection
313
+ with cols[1]:
314
+ selected_option = st.radio(
315
+ f"Select an option for Q{idx}",
316
+ ["Accept", "Discard"],
317
+ key=f"radio_{idx}",
318
+ index=1
319
+ )
320
+
321
+ # Handle radio button state changes
322
+ if selected_option == "Accept":
323
+ if question not in st.session_state.accepted_questions:
324
+ st.session_state.accepted_questions.append(question)
325
+ else:
326
+ if question in st.session_state.accepted_questions:
327
+ st.session_state.accepted_questions.remove(question)
328
+
329
+ # Submit button for question selection
330
+ submit_questions = st.form_submit_button("Update Accepted Questions")
331
+
332
+ # Show accepted questions
333
+ if st.session_state.accepted_questions:
334
+ st.header("Accepted Questions")
335
+ for q in st.session_state.accepted_questions:
336
+ st.write(q)
337
+
338
+ # Download button for accepted questions
339
+ if st.button("Download Accepted Questions as PDF"):
340
+ filename = generate_pdf(st.session_state.accepted_questions, filename="accepted_questions.pdf")
341
+ if filename:
342
+ with open(filename, "rb") as pdf_file:
343
+ st.download_button(
344
+ label="Click to Download PDF",
345
+ data=pdf_file,
346
+ file_name="accepted_questions.pdf",
347
+ mime="application/pdf"
348
+ )
349
+ st.success("PDF generated successfully!")
350
+ else:
351
+ st.info("No questions selected yet.")
352
+
353
+ # Add some footer information
354
+ st.markdown("---")
355
+ st.markdown("""
356
+ ### About this Tool
357
+ - Generate academic paper questions using Bloom's Taxonomy
358
+ - Customize question generation weights
359
+ - Select and refine generated questions
360
+ - Support for PDF via URL or local upload
361
+ """)
362
+ with tab2:
363
+ st.markdown("<h1 style='font-size: 28px;'>📄 Academic Paper Scorer</h1>", unsafe_allow_html=True)
364
+ st.markdown("Evaluate the Quality of Your Academic Paper")
365
+
366
+ # Create a styled container for the upload section
367
+ st.markdown("""
368
+ <style>
369
+ .upload-container {
370
+ background-color: #f0f2f6;
371
+ border-radius: 10px;
372
+ padding: 20px;
373
+ border: 2px dashed #4a6cf7;
374
+ text-align: center;
375
+ }
376
+ .score-breakdown {
377
+ background-color: #f8f9fa;
378
+ border-radius: 8px;
379
+ padding: 15px;
380
+ margin-bottom: 15px;
381
+ }
382
+ .score-header {
383
+ font-weight: bold;
384
+ color: #4a6cf7;
385
+ margin-bottom: 10px;
386
+ }
387
+ </style>
388
+ """, unsafe_allow_html=True)
389
+
390
+ with st.form(key='paper_scorer_form'):
391
+ st.header("Upload Your Academic Paper")
392
+ uploaded_file = st.file_uploader(
393
+ "Choose a PDF file",
394
+ type=['pdf','jpg','png','jpeg'],
395
+ label_visibility="collapsed"
396
+ )
397
+
398
+ st.markdown("<div style='text-align: center; margin-top: 20px;'><strong>OR</strong></div>", unsafe_allow_html=True)
399
+ if 'question_typed' not in st.session_state:
400
+ st.session_state.question_typed = ""
401
+ st.text_area("Paste your question here", value=st.session_state.question_typed, key="question_typed")
402
+ question_typed = st.session_state.question_typed
403
+ submit_button = st.form_submit_button(
404
+ "Score Paper",
405
+ use_container_width=True,
406
+ type="primary"
407
+ )
408
+
409
+ if submit_button:
410
+ # Calculate total score
411
+ pdf_path = save_uploaded_file(uploaded_file)
412
+ dummydata = sendtogemini(inputpath=pdf_path, question=st.session_state.question_typed)
413
+ #print(dummydata)
414
+ total_score = {'Remembering': 0, 'Understanding': 0, 'Applying': 0, 'Analyzing': 0, 'Evaluating': 0, 'Creating': 0}
415
+ for item in dummydata:
416
+ for category in total_score:
417
+ total_score[category] += item['score'][category]
418
+
419
+ # average_score = total_score / (len(dummydata) * 6 * 10) * 100
420
+
421
+ # Score display columns
422
+ categories = ['Remembering', 'Understanding', 'Applying', 'Analyzing', 'Evaluating', 'Creating']
423
+
424
+ # Create 6 columns in a single row
425
+ cols = st.columns(6)
426
+
427
+ # Iterate through categories and populate columns
428
+ for i, category in enumerate(categories):
429
+ with cols[i]:
430
+ score = round(total_score[category] / (len(dummydata) ),ndigits=3)
431
+ color = 'green' if score > .7 else 'orange' if score > .4 else 'red'
432
+ st.markdown(f"""
433
+ <div class="score-breakdown">
434
+ <div class="score-header" style="color: {color}">{category}</div>
435
+ <div style="font-size: 24px; color: {color};">{score}/{len(dummydata)}</div>
436
+ </div>
437
+ """, unsafe_allow_html=True)
438
+
439
+ with st.expander("Show Detailed Scores", expanded=True):
440
+ for idx, item in enumerate(dummydata, 1):
441
+
442
+ # Question header
443
+ st.markdown(f'<div class="score-header">Question {idx}: {item["question"]}</div>', unsafe_allow_html=True)
444
+
445
+ # Create columns for score display
446
+ score_cols = st.columns(6)
447
+
448
+ # Scoring categories
449
+ categories = ['Remembering', 'Understanding', 'Applying', 'Analyzing', 'Evaluating', 'Creating']
450
+
451
+ for col, category in zip(score_cols, categories):
452
+ with col:
453
+ # Determine color based on score
454
+ score = round(item['score'][category],ndigits=3)
455
+ color = 'green' if score > .7 else 'orange' if score > .3 else 'red'
456
+
457
+ st.markdown(f"""
458
+ <div style="text-align: center;
459
+ background-color: #f1f1f1;
460
+ border-radius: 5px;
461
+ padding: 5px;
462
+ margin-bottom: 5px;">
463
+ <div style="font-weight: bold; color: {color};">{category}</div>
464
+ <div style="font-size: 18px; color: {color};">{score}/1</div>
465
+ </div>
466
+ """, unsafe_allow_html=True)
467
+
468
+ st.markdown('</div>', unsafe_allow_html=True)
469
+
470
+ # Add a separator between questions
471
+ if idx < len(dummydata):
472
+ st.markdown('---')
473
+
474
+ # Run Streamlit app
475
+ if __name__ == "__main__":
476
+ main()