AruniAnkur commited on
Commit
8c84d02
·
verified ·
1 Parent(s): a42c968

added a large thing

Browse files
Files changed (1) hide show
  1. app.py +499 -40
app.py CHANGED
@@ -1,54 +1,513 @@
1
  import streamlit as st
2
- import torch
3
- from transformers import DistilBertForSequenceClassification, DistilBertTokenizer
4
- from torch.nn.functional import softmax
 
 
 
 
 
 
5
 
6
- # Load the model and tokenizer
7
- model = DistilBertForSequenceClassification.from_pretrained('./fine_tuned_distilbert')
8
- tokenizer = DistilBertTokenizer.from_pretrained('./fine_tuned_distilbert')
9
 
10
- # Device setup
11
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
- model.to(device)
 
 
 
 
 
 
 
 
 
 
13
 
14
- mapping = {"Remembering": 0, "Understanding": 1, "Applying": 2, "Analyzing": 3, "Evaluating": 4, "Creating": 5}
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- # Reverse the mapping to get the class name from the index
17
- reverse_mapping = {v: k for k, v in mapping.items()}
 
 
 
 
18
 
19
- def predict_with_loaded_model(text):
20
- # Tokenize the input text
21
- inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)
22
- input_ids = inputs['input_ids'].to(device)
 
 
 
 
 
 
 
 
23
 
24
- model.eval()
25
- with torch.no_grad():
26
- # Get the raw logits from the model
27
- outputs = model(input_ids)
28
- logits = outputs.logits
29
 
30
- # Apply softmax to get probabilities
31
- probabilities = softmax(logits, dim=-1)
32
-
33
- # Convert probabilities to a list or dictionary of class probabilities
34
- probabilities = probabilities.squeeze().cpu().numpy()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
- # Map the probabilities to the class labels using the reverse mapping
37
- class_probabilities = {reverse_mapping[i]: prob for i, prob in enumerate(probabilities)}
 
 
 
 
 
 
38
 
39
- return class_probabilities
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- # Streamlit App
42
- st.title("Question Bloom Score Prediction")
43
 
44
- # Create an input box for the user to enter a question
45
- question = st.text_area("Enter a question:")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
- # If a question is entered, make the prediction
48
- if question:
49
- class_probabilities = predict_with_loaded_model(question)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
- # Display the probabilities for each class label
52
- st.write("**Class Probabilities (Bloom Scores)**")
53
- for class_label, prob in class_probabilities.items():
54
- st.write(f"{class_label}: {prob:.4f}")
 
1
  import streamlit as st
2
+ import requests
3
+ import json
4
+ import fitz # PyMuPDF
5
+ from fpdf import FPDF
6
+ import os
7
+ import tempfile
8
+ import base64
9
+ import dotenv
10
+ from dotenv import load_dotenv
11
 
12
+ load_dotenv()
 
 
13
 
14
+ # Previous functions from Question Generator
15
+ def get_pdf_path(pdf_source=None, uploaded_file=None):
16
+ try:
17
+ # If a file is uploaded locally
18
+ if uploaded_file is not None:
19
+ # Create a temporary file to save the uploaded PDF
20
+ temp_dir = tempfile.mkdtemp()
21
+ pdf_path = os.path.join(temp_dir, uploaded_file.name)
22
+
23
+ # Save the uploaded file
24
+ with open(pdf_path, "wb") as pdf_file:
25
+ pdf_file.write(uploaded_file.getvalue())
26
+ return pdf_path
27
 
28
+ # If a URL is provided
29
+ if pdf_source:
30
+ response = requests.get(pdf_source, timeout=30)
31
+ response.raise_for_status()
32
+
33
+ # Create a temporary file
34
+ temp_dir = tempfile.mkdtemp()
35
+ pdf_path = os.path.join(temp_dir, "downloaded.pdf")
36
+
37
+ with open(pdf_path, "wb") as pdf_file:
38
+ pdf_file.write(response.content)
39
+ return pdf_path
40
 
41
+ # If no source is provided
42
+ st.error("No PDF source provided.")
43
+ return None
44
+ except Exception as e:
45
+ st.error(f"Error getting PDF: {e}")
46
+ return None
47
 
48
+ def extract_text_pymupdf(pdf_path):
49
+ try:
50
+ doc = fitz.open(pdf_path)
51
+ pages_content = []
52
+ for page_num in range(len(doc)):
53
+ page = doc[page_num]
54
+ pages_content.append(page.get_text())
55
+ doc.close()
56
+ return " ".join(pages_content) # Join all pages into one large context string
57
+ except Exception as e:
58
+ st.error(f"Error extracting text from PDF: {e}")
59
+ return ""
60
 
61
+ def generate_ai_response(api_key, assistant_context, user_query, role_description, response_instructions, bloom_taxonomy_weights, num_questions):
62
+ try:
63
+ url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key={api_key}"
 
 
64
 
65
+ prompt = f"""
66
+ You are a highly knowledgeable assistant. Your task is to assist the user with the following context from an academic paper.
67
+
68
+ **Role**: {role_description}
69
+
70
+ **Context**: {assistant_context}
71
+
72
+ **Instructions**: {response_instructions}
73
+
74
+ **Bloom's Taxonomy Weights**:
75
+ Knowledge: {bloom_taxonomy_weights['Knowledge']}%
76
+ Comprehension: {bloom_taxonomy_weights['Comprehension']}%
77
+ Application: {bloom_taxonomy_weights['Application']}%
78
+ Analysis: {bloom_taxonomy_weights['Analysis']}%
79
+ Synthesis: {bloom_taxonomy_weights['Synthesis']}%
80
+ Evaluation: {bloom_taxonomy_weights['Evaluation']}%
81
+
82
+ **Query**: {user_query}
83
+
84
+ **Number of Questions**: {num_questions}
85
+ """
86
+
87
+ payload = {
88
+ "contents": [
89
+ {
90
+ "parts": [
91
+ {"text": prompt}
92
+ ]
93
+ }
94
+ ]
95
+ }
96
+ headers = {"Content-Type": "application/json"}
97
+
98
+ response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=60)
99
+ response.raise_for_status()
100
+
101
+ result = response.json()
102
+ questions = result.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "")
103
+ questions_list = [question.strip() for question in questions.split("\n") if question.strip()]
104
+ return questions_list
105
+ except requests.RequestException as e:
106
+ st.error(f"API request error: {e}")
107
+ return []
108
+ except Exception as e:
109
+ st.error(f"Error generating questions: {e}")
110
+ return []
111
+
112
+ def normalize_bloom_weights(bloom_weights):
113
+ total = sum(bloom_weights.values())
114
+ if total != 100:
115
+ normalization_factor = 100 / total
116
+ # Normalize each weight by multiplying it by the normalization factor
117
+ bloom_weights = {key: round(value * normalization_factor, 2) for key, value in bloom_weights.items()}
118
+ return bloom_weights
119
+
120
+ def generate_pdf(questions, filename="questions.pdf"):
121
+ try:
122
+ pdf = FPDF()
123
+ pdf.set_auto_page_break(auto=True, margin=15)
124
+ pdf.add_page()
125
+
126
+ # Set font
127
+ pdf.set_font("Arial", size=12)
128
+
129
+ # Add a title or heading
130
+ pdf.cell(200, 10, txt="Generated Questions", ln=True, align="C")
131
+
132
+ # Add space between title and questions
133
+ pdf.ln(10)
134
+
135
+ # Loop through questions and add them to the PDF
136
+ for i, question in enumerate(questions, 1):
137
+ # Using multi_cell for wrapping the text in case it's too long
138
+ pdf.multi_cell(0, 10, f"Q{i}: {question}")
139
+
140
+ # Save the generated PDF to the file
141
+ pdf.output(filename)
142
+ return filename
143
+ except Exception as e:
144
+ st.error(f"Error generating PDF: {e}")
145
+ return None
146
+
147
+ def process_pdf_and_generate_questions(pdf_source, uploaded_file, api_key, role_description, response_instructions, bloom_taxonomy_weights, num_questions):
148
+ try:
149
+ # Get PDF path (either from URL or uploaded file)
150
+ pdf_path = get_pdf_path(pdf_source, uploaded_file)
151
+ if not pdf_path:
152
+ return []
153
+
154
+ # Extract text
155
+ pdf_text = extract_text_pymupdf(pdf_path)
156
+ if not pdf_text:
157
+ return []
158
+
159
+ # Generate questions
160
+ assistant_context = pdf_text
161
+ user_query = "Generate questions based on the above context."
162
+ normalized_bloom_weights = normalize_bloom_weights(bloom_taxonomy_weights)
163
+ questions = generate_ai_response(
164
+ api_key,
165
+ assistant_context,
166
+ user_query,
167
+ role_description,
168
+ response_instructions,
169
+ normalized_bloom_weights,
170
+ num_questions
171
+ )
172
+
173
+ # Clean up temporary PDF file
174
+ try:
175
+ os.remove(pdf_path)
176
+ # Remove the temporary directory
177
+ os.rmdir(os.path.dirname(pdf_path))
178
+ except Exception as e:
179
+ st.warning(f"Could not delete temporary PDF file: {e}")
180
+
181
+ return questions
182
+ except Exception as e:
183
+ st.error(f"Error processing PDF and generating questions: {e}")
184
+ return []
185
+
186
+ dummydata = [
187
+ {"question": "What is the main idea of the paper?", "score": {
188
+ "Knowledge": 10,
189
+ "Comprehension": 9,
190
+ "Application": 8,
191
+ "Analysis": 7,
192
+ "Synthesis": 6,
193
+ "Evaluation": 5
194
+ }},
195
+ {"question": "What are the key findings of the paper?", "score": {
196
+ "Knowledge": 9,
197
+ "Comprehension": 8,
198
+ "Application": 7,
199
+ "Analysis": 6,
200
+ "Synthesis": 5,
201
+ "Evaluation": 4
202
+ }},
203
+ {"question": "How does the paper contribute to the field?", "score": {
204
+ "Knowledge": 8,
205
+ "Comprehension": 7,
206
+ "Application": 6,
207
+ "Analysis": 5,
208
+ "Synthesis": 4,
209
+ "Evaluation": 3
210
+ }},
211
+ {"question": "What are the limitations of the paper?", "score": {
212
+ "Knowledge": 7,
213
+ "Comprehension": 6,
214
+ "Application": 5,
215
+ "Analysis": 4,
216
+ "Synthesis": 3,
217
+ "Evaluation": 2
218
+ }},
219
+ {"question": "What are the future research directions?", "score": {
220
+ "Knowledge": 6,
221
+ "Comprehension": 5,
222
+ "Application": 4,
223
+ "Analysis": 3,
224
+ "Synthesis": 2,
225
+ "Evaluation": 1
226
+ }},
227
+ {"question": "How does the paper compare to existing work?", "score": {
228
+ "Knowledge": 5,
229
+ "Comprehension": 4,
230
+ "Application": 3,
231
+ "Analysis": 2,
232
+ "Synthesis": 1,
233
+ "Evaluation": 0
234
+ }
235
+ }
236
+
237
+ ]
238
+
239
+ def main():
240
+ st.set_page_config(page_title="Academic Paper Tool", page_icon="📝", layout="wide")
241
 
242
+ # Tabs for different functionalities
243
+ tab1, tab2 = st.tabs(["Question Generator", "Paper Scorer"])
244
+
245
+ if 'totalscore' not in st.session_state:
246
+ st.session_state.totalscore = None
247
+ if 'show_details' not in st.session_state:
248
+ st.session_state.show_details = False
249
+
250
 
251
+ # Question Generator Tab
252
+ with tab1:
253
+ st.title("🎓 Academic Paper Question Generator")
254
+ st.markdown("Generate insightful questions from academic papers using Bloom's Taxonomy")
255
+
256
+ # Initialize session state variables with defaults
257
+ if 'pdf_source_type' not in st.session_state:
258
+ st.session_state.pdf_source_type = "URL"
259
+ if 'pdf_url' not in st.session_state:
260
+ st.session_state.pdf_url = "https://proceedings.neurips.cc/paper_files/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf"
261
+ if 'uploaded_file' not in st.session_state:
262
+ st.session_state.uploaded_file = None
263
+ if 'questions' not in st.session_state:
264
+ st.session_state.questions = []
265
+ if 'accepted_questions' not in st.session_state:
266
+ st.session_state.accepted_questions = []
267
+
268
+ # API Configuration
269
+ api_key = os.getenv('GEMINI_API_KEY')
270
+ # api_key = st.sidebar.text_input("Enter Gemini API Key", type="password", value=apivalue)
271
+
272
+ # Main form for PDF and question generation
273
+ with st.form(key='pdf_generation_form'):
274
+ st.header("PDF Source Configuration")
275
+
276
+ st.session_state.pdf_url = st.text_input(
277
+ "Enter the URL of the PDF",
278
+ key="pdf_url_input"
279
+ )
280
+
281
+ st.markdown("<h3 style='text-align: center;'>OR</h3>", unsafe_allow_html=True)
282
+
283
+ st.session_state.uploaded_file = st.file_uploader(
284
+ "Upload a PDF file",
285
+ type=['pdf'],
286
+ key="pdf_file_upload"
287
+ )
288
+
289
+ # Bloom's Taxonomy Weights
290
+ st.subheader("Adjust Bloom's Taxonomy Weights")
291
+ col1, col2, col3 = st.columns(3)
292
+
293
+ with col1:
294
+ knowledge = st.slider("Knowledge: Remembering information", 0, 100, 20, key='knowledge_slider')
295
+ application = st.slider("Application: Using abstractions in concrete situations", 0, 100, 20, key='application_slider')
296
+
297
+ with col2:
298
+ comprehension = st.slider("Comprehension: Explaining the meaning of information", 0, 100, 20, key='comprehension_slider')
299
+ analysis = st.slider("Analysis: Breaking down a whole into component parts", 0, 100, 20, key='analysis_slider')
300
+
301
+ with col3:
302
+ synthesis = st.slider("Synthesis: Putting parts together to form a new and integrated whole", 0, 100, 10, key='synthesis_slider')
303
+ evaluation = st.slider("Evaluation: Making and defending judgments based on internal evidence or external criteria", 0, 100, 10, key='evaluation_slider')
304
+
305
+ # Collect the Bloom's Taxonomy weights
306
+ bloom_taxonomy_weights = {
307
+ "Knowledge": knowledge,
308
+ "Comprehension": comprehension,
309
+ "Application": application,
310
+ "Analysis": analysis,
311
+ "Synthesis": synthesis,
312
+ "Evaluation": evaluation
313
+ }
314
 
315
+ # Number of questions
316
+ num_questions = st.slider("How many questions would you like to generate?", min_value=1, max_value=20, value=5, key='num_questions_slider')
317
 
318
+ # Submit button within the form
319
+ submit_button = st.form_submit_button(label='Generate Questions')
320
+
321
+ # Process form submission
322
+ if submit_button:
323
+ # Validate API key
324
+ if not api_key:
325
+ st.error("Please enter a valid Gemini API key.")
326
+ # Validate PDF source
327
+ elif not st.session_state.pdf_url and not st.session_state.uploaded_file:
328
+ st.error("Please enter a PDF URL or upload a PDF file.")
329
+ else:
330
+ # Normalize the Bloom's weights
331
+ normalized_bloom_weights = normalize_bloom_weights(bloom_taxonomy_weights)
332
+
333
+ st.info("Normalized Bloom's Taxonomy Weights:")
334
+ st.json(normalized_bloom_weights)
335
+
336
+ # Role and instructions for the AI
337
+ role_description = "You are a question-generating AI agent, given context and instruction, you need to generate questions from the context."
338
+ response_instructions = "Please generate questions that are clear and relevant to the content of the paper. Generate questions which are separated by new lines, without any numbering or additional context."
339
+
340
+ # Generate questions
341
+ with st.spinner('Generating questions...'):
342
+ st.session_state.questions = process_pdf_and_generate_questions(
343
+ pdf_source=st.session_state.pdf_url if st.session_state.pdf_url else None,
344
+ uploaded_file=st.session_state.uploaded_file if st.session_state.uploaded_file else None,
345
+ api_key=api_key,
346
+ role_description=role_description,
347
+ response_instructions=response_instructions,
348
+ bloom_taxonomy_weights=normalized_bloom_weights,
349
+ num_questions=num_questions
350
+ )
351
+ if st.session_state.questions:
352
+ st.header("Generated Questions")
353
+
354
+ # Create a form for question management to prevent reload
355
+ with st.form(key='questions_form'):
356
+ for idx, question in enumerate(st.session_state.questions, 1):
357
+ cols = st.columns([4, 1]) # Create two columns for radio buttons (Accept, Discard)
358
+
359
+ with cols[0]:
360
+ st.write(f"Q{idx}: {question}")
361
+
362
+ # Use radio buttons for selection
363
+ with cols[1]:
364
+ # Default value is 'Discard', so users can change it to 'Accept'
365
+ selected_option = st.radio(f"Select an option for Q{idx}", ["Accept", "Discard"], key=f"radio_{idx}", index=1)
366
+
367
+ # Handle radio button state changes
368
+ if selected_option == "Accept":
369
+ # Add to accepted questions if 'Accept' is selected
370
+ if question not in st.session_state.accepted_questions:
371
+ st.session_state.accepted_questions.append(question)
372
+ else:
373
+ # Remove from accepted questions if 'Discard' is selected
374
+ if question in st.session_state.accepted_questions:
375
+ st.session_state.accepted_questions.remove(question)
376
+
377
+ # Submit button for question selection
378
+ submit_questions = st.form_submit_button("Update Accepted Questions")
379
+
380
+
381
+ # Show accepted questions
382
+ if st.session_state.accepted_questions:
383
+ st.header("Accepted Questions")
384
+ for q in st.session_state.accepted_questions:
385
+ st.write(q)
386
+
387
+ # Download button for accepted questions
388
+ if st.button("Download Accepted Questions as PDF"):
389
+ filename = generate_pdf(st.session_state.accepted_questions, filename="accepted_questions.pdf")
390
+ if filename:
391
+ with open(filename, "rb") as pdf_file:
392
+ st.download_button(
393
+ label="Click to Download PDF",
394
+ data=pdf_file,
395
+ file_name="accepted_questions.pdf",
396
+ mime="application/pdf"
397
+ )
398
+ st.success("PDF generated successfully!")
399
+ else:
400
+ st.info("No questions selected yet.")
401
+
402
+ # Add some footer information
403
+ st.markdown("---")
404
+ st.markdown("""
405
+ ### About this Tool
406
+ - Generate academic paper questions using Bloom's Taxonomy
407
+ - Customize question generation weights
408
+ - Select and refine generated questions
409
+ - Support for PDF via URL or local upload
410
+ """)
411
+ with tab2:
412
+ st.title("📄 Academic Paper Scorer")
413
+
414
+ # Add a descriptive subheader
415
+ st.markdown("### Evaluate the Quality of Your Academic Paper")
416
+
417
+ # Create a styled container for the upload section
418
+ st.markdown("""
419
+ <style>
420
+ .upload-container {
421
+ background-color: #f0f2f6;
422
+ border-radius: 10px;
423
+ padding: 20px;
424
+ border: 2px dashed #4a6cf7;
425
+ text-align: center;
426
+ }
427
+ .score-breakdown {
428
+ background-color: #f8f9fa;
429
+ border-radius: 8px;
430
+ padding: 15px;
431
+ margin-bottom: 15px;
432
+ }
433
+ .score-header {
434
+ font-weight: bold;
435
+ color: #4a6cf7;
436
+ margin-bottom: 10px;
437
+ }
438
+ </style>
439
+ """, unsafe_allow_html=True)
440
+
441
+ with st.form(key='paper_scorer_form'):
442
+ st.header("Upload Your Academic Paper")
443
+ uploaded_file = st.file_uploader(
444
+ "Choose a PDF file",
445
+ type=['pdf','jpg','png','jpeg'],
446
+ label_visibility="collapsed"
447
+ )
448
+
449
+ # Custom submit button with some styling
450
+ submit_button = st.form_submit_button(
451
+ "Score Paper",
452
+ use_container_width=True,
453
+ type="primary"
454
+ )
455
 
456
+ if submit_button:
457
+ # Calculate total score
458
+ total_score = sum(
459
+ sum(question['score'].values())
460
+ for question in dummydata
461
+ )
462
+ average_score = total_score / (len(dummydata) * 6 * 10) * 100
463
+
464
+ # Score display columns
465
+ col1, col2 = st.columns([2,1])
466
+
467
+ with col1:
468
+ st.metric(label="Total Paper Score", value=f"{average_score:.2f}/100")
469
+
470
+ with st.expander("Show Detailed Scores", expanded=True):
471
+ for idx, item in enumerate(dummydata, 1):
472
+
473
+ # Question header
474
+ st.markdown(f'<div class="score-header">Question {idx}: {item["question"]}</div>', unsafe_allow_html=True)
475
+
476
+ # Create columns for score display
477
+ score_cols = st.columns(6)
478
+
479
+ # Scoring categories
480
+ categories = ['Knowledge', 'Comprehension', 'Application', 'Analysis', 'Synthesis', 'Evaluation']
481
+
482
+ for col, category in zip(score_cols, categories):
483
+ with col:
484
+ # Determine color based on score
485
+ score = item['score'][category]
486
+ color = 'green' if score > 7 else 'orange' if score > 4 else 'red'
487
+
488
+ st.markdown(f"""
489
+ <div style="text-align: center;
490
+ background-color: #f1f1f1;
491
+ border-radius: 5px;
492
+ padding: 5px;
493
+ margin-bottom: 5px;">
494
+ <div style="font-weight: bold; color: {color};">{category}</div>
495
+ <div style="font-size: 18px; color: {color};">{score}/10</div>
496
+ </div>
497
+ """, unsafe_allow_html=True)
498
+
499
+ st.markdown('</div>', unsafe_allow_html=True)
500
+
501
+ # Add a separator between questions
502
+ if idx < len(dummydata):
503
+ st.markdown('---')
504
+ # but = st.button("Show Detailed Scores")
505
+ # if but:
506
+ # st.write("Detailed Scores")
507
+ # with st.container():
508
+ # for key, value in dummydata.items():
509
+ # st.write(f"{key}: {value}")
510
 
511
+ # Run Streamlit app
512
+ if __name__ == "__main__":
513
+ main()