from typing import Optional, Dict
import streamlit as st
import os
from dotenv import load_dotenv
import torch
from transformers import DistilBertForSequenceClassification, DistilBertTokenizer
from torch.nn.functional import softmax
from doctr.models import ocr_predictor
from doctr.io import DocumentFile
from functionbloom import save_uploaded_file, get_pdf_path, extract_text_pymupdf, get_bloom_taxonomy_scores,generate_ai_response,normalize_bloom_weights, generate_pdf,process_pdf_and_generate_questions,get_bloom_taxonomy_details
from functionbloom import predict_with_loaded_model, process_document, sendtogemini
load_dotenv()
model = DistilBertForSequenceClassification.from_pretrained('./fine_tuned_distilbert')
tokenizer = DistilBertTokenizer.from_pretrained('./fine_tuned_distilbert')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
mapping = {"Remembering": 0, "Understanding": 1, "Applying": 2, "Analyzing": 3, "Evaluating": 4, "Creating": 5}
reverse_mapping = {v: k for k, v in mapping.items()}
modelocr = ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True)
def main():
st.set_page_config(page_title="Academic Paper Tool", page_icon="📝", layout="wide")
# Tabs for different functionalities
st.markdown("""
""", unsafe_allow_html=True)
tab1, tab2 = st.tabs(["Question Generator", "Paper Scorer"])
if 'totalscore' not in st.session_state:
st.session_state.totalscore = None
if 'show_details' not in st.session_state:
st.session_state.show_details = False
if 'question_scores' not in st.session_state:
st.session_state.question_scores = {}
# Question Generator Tab
with tab1:
st.markdown("
🎓 Academic Paper Question Generator
", unsafe_allow_html=True)
st.markdown("Generate insightful questions from academic papers using Bloom's Taxonomy")
# Initialize session state variables with defaults
if 'pdf_source_type' not in st.session_state:
st.session_state.pdf_source_type = "URL"
if 'pdf_url' not in st.session_state:
st.session_state.pdf_url = ""
if 'uploaded_file' not in st.session_state:
st.session_state.uploaded_file = None
if 'questions' not in st.session_state:
st.session_state.questions = []
if 'accepted_questions' not in st.session_state:
st.session_state.accepted_questions = []
# API Configuration
api_key = os.getenv('GEMINI_API_KEY')
# Main form for PDF and question generation
with st.form(key='pdf_generation_form'):
st.subheader("PDF Source")
st.session_state.pdf_url = st.text_input(
"Enter the URL of the PDF",
value=st.session_state.pdf_url,
key="pdf_url_input"
)
st.markdown("OR
", unsafe_allow_html=True)
st.session_state.uploaded_file = st.file_uploader(
"Upload a PDF file",
type=['pdf'],
key="pdf_file_upload"
)
st.session_state.user_input=st.text_area("Enter your query here", key="input", height=100)
# Question Length Selection
question_length = st.select_slider(
"Select Question Length",
options=["Short", "Medium", "Long"],
value="Medium",
help="Short: 10-15 words, Medium: 20-25 words, Long: 30-40 words"
)
st.session_state.include_numericals = st.checkbox("Include Numericals", key="include_numericals_checkbox")
# Bloom's Taxonomy Weights
st.subheader("Adjust Bloom's Taxonomy Weights")
col1, col2, col3 = st.columns(3)
with col1:
knowledge = st.slider("Knowledge: Remembering", 0, 100, 20, key='knowledge_slider')
application = st.slider("Applying: Using abstractions in concrete situations", 0, 100, 20, key='application_slider')
with col2:
comprehension = st.slider("Understanding: Explaining the meaning of information", 0, 100, 20, key='comprehension_slider')
analysis = st.slider("Analyzing: Breaking down a whole into component parts", 0, 100, 20, key='analysis_slider')
with col3:
synthesis = st.slider("Creating: Putting parts together to form a new and integrated whole", 0, 100, 10, key='synthesis_slider')
evaluation = st.slider("Evaluation: Making and defending judgments based on internal evidence or external criteria", 0, 100, 10, key='evaluation_slider')
# Collect the Bloom's Taxonomy weights
bloom_taxonomy_weights = {
"Knowledge": knowledge,
"Comprehension": comprehension,
"Application": application,
"Analysis": analysis,
"Synthesis": synthesis,
"Evaluation": evaluation
}
# Number of questions
num_questions = st.slider("How many questions would you like to generate?", min_value=1, max_value=20, value=5, key='num_questions_slider')
# Submit button within the form
submit_button = st.form_submit_button(label='Generate Questions')
# Process form submission
if submit_button:
# Validate API key
if not api_key:
st.error("Please enter a valid Gemini API key.")
# Validate PDF source
elif not st.session_state.pdf_url and not st.session_state.uploaded_file:
st.error("Please enter a PDF URL or upload a PDF file.")
else:
# Normalize the Bloom's weights
normalized_bloom_weights = normalize_bloom_weights(bloom_taxonomy_weights)
st.info("Normalized Bloom's Taxonomy Weights:")
st.json(normalized_bloom_weights)
# Role and instructions for the AI
role_description = "You are a question-generating AI agent, given context and instruction, you need to generate questions from the context."
response_instructions = "Please generate questions that are clear and relevant to the content of the paper. Generate questions which are separated by new lines, without any numbering or additional context."
# Generate questions
with st.spinner('Generating questions...'):
st.session_state.questions = process_pdf_and_generate_questions(
pdf_source=st.session_state.pdf_url if st.session_state.pdf_url else None,
uploaded_file=st.session_state.uploaded_file if st.session_state.uploaded_file else None,
api_key=api_key,
role_description=role_description,
response_instructions=response_instructions,
bloom_taxonomy_weights=normalized_bloom_weights,
num_questions=num_questions,
question_length=question_length,
include_numericals=st.session_state.include_numericals,
user_input=st.session_state.user_input
)
if st.session_state.questions:
st.header("Generated Questions")
# Create a form for question management to prevent reload
with st.form(key='questions_form'):
for idx, question in enumerate(st.session_state.questions, 1):
cols = st.columns([4, 1]) # Create two columns
with cols[0]:
# Display the question
st.write(f"Q{idx}: {question}")
# Add info button using Streamlit's expander
with st.expander("Show Bloom's Taxonomy Details"):
taxonomy_details = get_bloom_taxonomy_details(st.session_state.question_scores.get(question))
st.text(taxonomy_details)
# Use radio buttons for selection
with cols[1]:
selected_option = st.radio(
f"Select an option for Q{idx}",
["Accept", "Discard"],
key=f"radio_{idx}",
index=1
)
# Handle radio button state changes
if selected_option == "Accept":
if question not in st.session_state.accepted_questions:
st.session_state.accepted_questions.append(question)
else:
if question in st.session_state.accepted_questions:
st.session_state.accepted_questions.remove(question)
# Submit button for question selection
submit_questions = st.form_submit_button("Update Accepted Questions")
# Show accepted questions
if st.session_state.accepted_questions:
st.header("Accepted Questions")
for q in st.session_state.accepted_questions:
st.write(q)
# Download button for accepted questions
if st.button("Download Accepted Questions as PDF"):
filename = generate_pdf(st.session_state.accepted_questions, filename="accepted_questions.pdf")
if filename:
with open(filename, "rb") as pdf_file:
st.download_button(
label="Click to Download PDF",
data=pdf_file,
file_name="accepted_questions.pdf",
mime="application/pdf"
)
st.success("PDF generated successfully!")
else:
st.info("No questions selected yet.")
# Add some footer information
st.markdown("---")
st.markdown("""
### About this Tool
- Generate academic paper questions using Bloom's Taxonomy
- Customize question generation weights
- Select and refine generated questions
- Support for PDF via URL or local upload
""")
with tab2:
st.markdown("📄 Academic Paper Scorer
", unsafe_allow_html=True)
st.markdown("Evaluate the Quality of Your Academic Paper")
# Create a styled container for the upload section
st.markdown("""
""", unsafe_allow_html=True)
with st.form(key='paper_scorer_form'):
st.header("Upload Your Academic Paper")
uploaded_file = st.file_uploader(
"Choose a PDF file",
type=['pdf','jpg','png','jpeg'],
label_visibility="collapsed"
)
st.markdown("OR
", unsafe_allow_html=True)
if 'question_typed' not in st.session_state:
st.session_state.question_typed = ""
st.text_area("Paste your question here", value=st.session_state.question_typed, key="question_typed")
question_typed = st.session_state.question_typed
submit_button = st.form_submit_button(
"Score Paper",
use_container_width=True,
type="primary"
)
if submit_button:
# Calculate total score
pdf_path = save_uploaded_file(uploaded_file)
dummydata = sendtogemini(inputpath=pdf_path, question=st.session_state.question_typed)
#print(dummydata)
total_score = {'Remembering': 0, 'Understanding': 0, 'Applying': 0, 'Analyzing': 0, 'Evaluating': 0, 'Creating': 0}
for item in dummydata:
for category in total_score:
total_score[category] += item['score'][category]
# average_score = total_score / (len(dummydata) * 6 * 10) * 100
# Score display columns
categories = ['Remembering', 'Understanding', 'Applying', 'Analyzing', 'Evaluating', 'Creating']
# Create 6 columns in a single row
cols = st.columns(6)
# Iterate through categories and populate columns
for i, category in enumerate(categories):
with cols[i]:
score = round(total_score[category] / (len(dummydata) ),ndigits=3)
color = 'green' if score > .7 else 'orange' if score > .4 else 'red'
st.markdown(f"""
""", unsafe_allow_html=True)
with st.expander("Show Detailed Scores", expanded=True):
for idx, item in enumerate(dummydata, 1):
# Question header
st.markdown(f'', unsafe_allow_html=True)
# Create columns for score display
score_cols = st.columns(6)
# Scoring categories
categories = ['Remembering', 'Understanding', 'Applying', 'Analyzing', 'Evaluating', 'Creating']
for col, category in zip(score_cols, categories):
with col:
# Determine color based on score
score = round(item['score'][category],ndigits=3)
color = 'green' if score > .7 else 'orange' if score > .3 else 'red'
st.markdown(f"""
""", unsafe_allow_html=True)
st.markdown('', unsafe_allow_html=True)
# Add a separator between questions
if idx < len(dummydata):
st.markdown('---')
# Run Streamlit app
if __name__ == "__main__":
main()