BloomScore / app.py
AruniAnkur's picture
added a large thing
8c84d02 verified
raw
history blame
20.7 kB
import streamlit as st
import requests
import json
import fitz # PyMuPDF
from fpdf import FPDF
import os
import tempfile
import base64
import dotenv
from dotenv import load_dotenv
load_dotenv()
# Previous functions from Question Generator
def get_pdf_path(pdf_source=None, uploaded_file=None):
try:
# If a file is uploaded locally
if uploaded_file is not None:
# Create a temporary file to save the uploaded PDF
temp_dir = tempfile.mkdtemp()
pdf_path = os.path.join(temp_dir, uploaded_file.name)
# Save the uploaded file
with open(pdf_path, "wb") as pdf_file:
pdf_file.write(uploaded_file.getvalue())
return pdf_path
# If a URL is provided
if pdf_source:
response = requests.get(pdf_source, timeout=30)
response.raise_for_status()
# Create a temporary file
temp_dir = tempfile.mkdtemp()
pdf_path = os.path.join(temp_dir, "downloaded.pdf")
with open(pdf_path, "wb") as pdf_file:
pdf_file.write(response.content)
return pdf_path
# If no source is provided
st.error("No PDF source provided.")
return None
except Exception as e:
st.error(f"Error getting PDF: {e}")
return None
def extract_text_pymupdf(pdf_path):
try:
doc = fitz.open(pdf_path)
pages_content = []
for page_num in range(len(doc)):
page = doc[page_num]
pages_content.append(page.get_text())
doc.close()
return " ".join(pages_content) # Join all pages into one large context string
except Exception as e:
st.error(f"Error extracting text from PDF: {e}")
return ""
def generate_ai_response(api_key, assistant_context, user_query, role_description, response_instructions, bloom_taxonomy_weights, num_questions):
try:
url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key={api_key}"
prompt = f"""
You are a highly knowledgeable assistant. Your task is to assist the user with the following context from an academic paper.
**Role**: {role_description}
**Context**: {assistant_context}
**Instructions**: {response_instructions}
**Bloom's Taxonomy Weights**:
Knowledge: {bloom_taxonomy_weights['Knowledge']}%
Comprehension: {bloom_taxonomy_weights['Comprehension']}%
Application: {bloom_taxonomy_weights['Application']}%
Analysis: {bloom_taxonomy_weights['Analysis']}%
Synthesis: {bloom_taxonomy_weights['Synthesis']}%
Evaluation: {bloom_taxonomy_weights['Evaluation']}%
**Query**: {user_query}
**Number of Questions**: {num_questions}
"""
payload = {
"contents": [
{
"parts": [
{"text": prompt}
]
}
]
}
headers = {"Content-Type": "application/json"}
response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=60)
response.raise_for_status()
result = response.json()
questions = result.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "")
questions_list = [question.strip() for question in questions.split("\n") if question.strip()]
return questions_list
except requests.RequestException as e:
st.error(f"API request error: {e}")
return []
except Exception as e:
st.error(f"Error generating questions: {e}")
return []
def normalize_bloom_weights(bloom_weights):
total = sum(bloom_weights.values())
if total != 100:
normalization_factor = 100 / total
# Normalize each weight by multiplying it by the normalization factor
bloom_weights = {key: round(value * normalization_factor, 2) for key, value in bloom_weights.items()}
return bloom_weights
def generate_pdf(questions, filename="questions.pdf"):
try:
pdf = FPDF()
pdf.set_auto_page_break(auto=True, margin=15)
pdf.add_page()
# Set font
pdf.set_font("Arial", size=12)
# Add a title or heading
pdf.cell(200, 10, txt="Generated Questions", ln=True, align="C")
# Add space between title and questions
pdf.ln(10)
# Loop through questions and add them to the PDF
for i, question in enumerate(questions, 1):
# Using multi_cell for wrapping the text in case it's too long
pdf.multi_cell(0, 10, f"Q{i}: {question}")
# Save the generated PDF to the file
pdf.output(filename)
return filename
except Exception as e:
st.error(f"Error generating PDF: {e}")
return None
def process_pdf_and_generate_questions(pdf_source, uploaded_file, api_key, role_description, response_instructions, bloom_taxonomy_weights, num_questions):
try:
# Get PDF path (either from URL or uploaded file)
pdf_path = get_pdf_path(pdf_source, uploaded_file)
if not pdf_path:
return []
# Extract text
pdf_text = extract_text_pymupdf(pdf_path)
if not pdf_text:
return []
# Generate questions
assistant_context = pdf_text
user_query = "Generate questions based on the above context."
normalized_bloom_weights = normalize_bloom_weights(bloom_taxonomy_weights)
questions = generate_ai_response(
api_key,
assistant_context,
user_query,
role_description,
response_instructions,
normalized_bloom_weights,
num_questions
)
# Clean up temporary PDF file
try:
os.remove(pdf_path)
# Remove the temporary directory
os.rmdir(os.path.dirname(pdf_path))
except Exception as e:
st.warning(f"Could not delete temporary PDF file: {e}")
return questions
except Exception as e:
st.error(f"Error processing PDF and generating questions: {e}")
return []
dummydata = [
{"question": "What is the main idea of the paper?", "score": {
"Knowledge": 10,
"Comprehension": 9,
"Application": 8,
"Analysis": 7,
"Synthesis": 6,
"Evaluation": 5
}},
{"question": "What are the key findings of the paper?", "score": {
"Knowledge": 9,
"Comprehension": 8,
"Application": 7,
"Analysis": 6,
"Synthesis": 5,
"Evaluation": 4
}},
{"question": "How does the paper contribute to the field?", "score": {
"Knowledge": 8,
"Comprehension": 7,
"Application": 6,
"Analysis": 5,
"Synthesis": 4,
"Evaluation": 3
}},
{"question": "What are the limitations of the paper?", "score": {
"Knowledge": 7,
"Comprehension": 6,
"Application": 5,
"Analysis": 4,
"Synthesis": 3,
"Evaluation": 2
}},
{"question": "What are the future research directions?", "score": {
"Knowledge": 6,
"Comprehension": 5,
"Application": 4,
"Analysis": 3,
"Synthesis": 2,
"Evaluation": 1
}},
{"question": "How does the paper compare to existing work?", "score": {
"Knowledge": 5,
"Comprehension": 4,
"Application": 3,
"Analysis": 2,
"Synthesis": 1,
"Evaluation": 0
}
}
]
def main():
st.set_page_config(page_title="Academic Paper Tool", page_icon="πŸ“", layout="wide")
# Tabs for different functionalities
tab1, tab2 = st.tabs(["Question Generator", "Paper Scorer"])
if 'totalscore' not in st.session_state:
st.session_state.totalscore = None
if 'show_details' not in st.session_state:
st.session_state.show_details = False
# Question Generator Tab
with tab1:
st.title("πŸŽ“ Academic Paper Question Generator")
st.markdown("Generate insightful questions from academic papers using Bloom's Taxonomy")
# Initialize session state variables with defaults
if 'pdf_source_type' not in st.session_state:
st.session_state.pdf_source_type = "URL"
if 'pdf_url' not in st.session_state:
st.session_state.pdf_url = "https://proceedings.neurips.cc/paper_files/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf"
if 'uploaded_file' not in st.session_state:
st.session_state.uploaded_file = None
if 'questions' not in st.session_state:
st.session_state.questions = []
if 'accepted_questions' not in st.session_state:
st.session_state.accepted_questions = []
# API Configuration
api_key = os.getenv('GEMINI_API_KEY')
# api_key = st.sidebar.text_input("Enter Gemini API Key", type="password", value=apivalue)
# Main form for PDF and question generation
with st.form(key='pdf_generation_form'):
st.header("PDF Source Configuration")
st.session_state.pdf_url = st.text_input(
"Enter the URL of the PDF",
key="pdf_url_input"
)
st.markdown("<h3 style='text-align: center;'>OR</h3>", unsafe_allow_html=True)
st.session_state.uploaded_file = st.file_uploader(
"Upload a PDF file",
type=['pdf'],
key="pdf_file_upload"
)
# Bloom's Taxonomy Weights
st.subheader("Adjust Bloom's Taxonomy Weights")
col1, col2, col3 = st.columns(3)
with col1:
knowledge = st.slider("Knowledge: Remembering information", 0, 100, 20, key='knowledge_slider')
application = st.slider("Application: Using abstractions in concrete situations", 0, 100, 20, key='application_slider')
with col2:
comprehension = st.slider("Comprehension: Explaining the meaning of information", 0, 100, 20, key='comprehension_slider')
analysis = st.slider("Analysis: Breaking down a whole into component parts", 0, 100, 20, key='analysis_slider')
with col3:
synthesis = st.slider("Synthesis: Putting parts together to form a new and integrated whole", 0, 100, 10, key='synthesis_slider')
evaluation = st.slider("Evaluation: Making and defending judgments based on internal evidence or external criteria", 0, 100, 10, key='evaluation_slider')
# Collect the Bloom's Taxonomy weights
bloom_taxonomy_weights = {
"Knowledge": knowledge,
"Comprehension": comprehension,
"Application": application,
"Analysis": analysis,
"Synthesis": synthesis,
"Evaluation": evaluation
}
# Number of questions
num_questions = st.slider("How many questions would you like to generate?", min_value=1, max_value=20, value=5, key='num_questions_slider')
# Submit button within the form
submit_button = st.form_submit_button(label='Generate Questions')
# Process form submission
if submit_button:
# Validate API key
if not api_key:
st.error("Please enter a valid Gemini API key.")
# Validate PDF source
elif not st.session_state.pdf_url and not st.session_state.uploaded_file:
st.error("Please enter a PDF URL or upload a PDF file.")
else:
# Normalize the Bloom's weights
normalized_bloom_weights = normalize_bloom_weights(bloom_taxonomy_weights)
st.info("Normalized Bloom's Taxonomy Weights:")
st.json(normalized_bloom_weights)
# Role and instructions for the AI
role_description = "You are a question-generating AI agent, given context and instruction, you need to generate questions from the context."
response_instructions = "Please generate questions that are clear and relevant to the content of the paper. Generate questions which are separated by new lines, without any numbering or additional context."
# Generate questions
with st.spinner('Generating questions...'):
st.session_state.questions = process_pdf_and_generate_questions(
pdf_source=st.session_state.pdf_url if st.session_state.pdf_url else None,
uploaded_file=st.session_state.uploaded_file if st.session_state.uploaded_file else None,
api_key=api_key,
role_description=role_description,
response_instructions=response_instructions,
bloom_taxonomy_weights=normalized_bloom_weights,
num_questions=num_questions
)
if st.session_state.questions:
st.header("Generated Questions")
# Create a form for question management to prevent reload
with st.form(key='questions_form'):
for idx, question in enumerate(st.session_state.questions, 1):
cols = st.columns([4, 1]) # Create two columns for radio buttons (Accept, Discard)
with cols[0]:
st.write(f"Q{idx}: {question}")
# Use radio buttons for selection
with cols[1]:
# Default value is 'Discard', so users can change it to 'Accept'
selected_option = st.radio(f"Select an option for Q{idx}", ["Accept", "Discard"], key=f"radio_{idx}", index=1)
# Handle radio button state changes
if selected_option == "Accept":
# Add to accepted questions if 'Accept' is selected
if question not in st.session_state.accepted_questions:
st.session_state.accepted_questions.append(question)
else:
# Remove from accepted questions if 'Discard' is selected
if question in st.session_state.accepted_questions:
st.session_state.accepted_questions.remove(question)
# Submit button for question selection
submit_questions = st.form_submit_button("Update Accepted Questions")
# Show accepted questions
if st.session_state.accepted_questions:
st.header("Accepted Questions")
for q in st.session_state.accepted_questions:
st.write(q)
# Download button for accepted questions
if st.button("Download Accepted Questions as PDF"):
filename = generate_pdf(st.session_state.accepted_questions, filename="accepted_questions.pdf")
if filename:
with open(filename, "rb") as pdf_file:
st.download_button(
label="Click to Download PDF",
data=pdf_file,
file_name="accepted_questions.pdf",
mime="application/pdf"
)
st.success("PDF generated successfully!")
else:
st.info("No questions selected yet.")
# Add some footer information
st.markdown("---")
st.markdown("""
### About this Tool
- Generate academic paper questions using Bloom's Taxonomy
- Customize question generation weights
- Select and refine generated questions
- Support for PDF via URL or local upload
""")
with tab2:
st.title("πŸ“„ Academic Paper Scorer")
# Add a descriptive subheader
st.markdown("### Evaluate the Quality of Your Academic Paper")
# Create a styled container for the upload section
st.markdown("""
<style>
.upload-container {
background-color: #f0f2f6;
border-radius: 10px;
padding: 20px;
border: 2px dashed #4a6cf7;
text-align: center;
}
.score-breakdown {
background-color: #f8f9fa;
border-radius: 8px;
padding: 15px;
margin-bottom: 15px;
}
.score-header {
font-weight: bold;
color: #4a6cf7;
margin-bottom: 10px;
}
</style>
""", unsafe_allow_html=True)
with st.form(key='paper_scorer_form'):
st.header("Upload Your Academic Paper")
uploaded_file = st.file_uploader(
"Choose a PDF file",
type=['pdf','jpg','png','jpeg'],
label_visibility="collapsed"
)
# Custom submit button with some styling
submit_button = st.form_submit_button(
"Score Paper",
use_container_width=True,
type="primary"
)
if submit_button:
# Calculate total score
total_score = sum(
sum(question['score'].values())
for question in dummydata
)
average_score = total_score / (len(dummydata) * 6 * 10) * 100
# Score display columns
col1, col2 = st.columns([2,1])
with col1:
st.metric(label="Total Paper Score", value=f"{average_score:.2f}/100")
with st.expander("Show Detailed Scores", expanded=True):
for idx, item in enumerate(dummydata, 1):
# Question header
st.markdown(f'<div class="score-header">Question {idx}: {item["question"]}</div>', unsafe_allow_html=True)
# Create columns for score display
score_cols = st.columns(6)
# Scoring categories
categories = ['Knowledge', 'Comprehension', 'Application', 'Analysis', 'Synthesis', 'Evaluation']
for col, category in zip(score_cols, categories):
with col:
# Determine color based on score
score = item['score'][category]
color = 'green' if score > 7 else 'orange' if score > 4 else 'red'
st.markdown(f"""
<div style="text-align: center;
background-color: #f1f1f1;
border-radius: 5px;
padding: 5px;
margin-bottom: 5px;">
<div style="font-weight: bold; color: {color};">{category}</div>
<div style="font-size: 18px; color: {color};">{score}/10</div>
</div>
""", unsafe_allow_html=True)
st.markdown('</div>', unsafe_allow_html=True)
# Add a separator between questions
if idx < len(dummydata):
st.markdown('---')
# but = st.button("Show Detailed Scores")
# if but:
# st.write("Detailed Scores")
# with st.container():
# for key, value in dummydata.items():
# st.write(f"{key}: {value}")
# Run Streamlit app
if __name__ == "__main__":
main()