from typing import Optional, Dict import streamlit as st import os from dotenv import load_dotenv import torch from transformers import DistilBertForSequenceClassification, DistilBertTokenizer from torch.nn.functional import softmax from doctr.models import ocr_predictor from doctr.io import DocumentFile from functionbloom import save_uploaded_file, get_pdf_path, extract_text_pymupdf, get_bloom_taxonomy_scores,generate_ai_response,normalize_bloom_weights, generate_pdf,process_pdf_and_generate_questions,get_bloom_taxonomy_details from functionbloom import predict_with_loaded_model, process_document, sendtogemini load_dotenv() model = DistilBertForSequenceClassification.from_pretrained('./fine_tuned_distilbert') tokenizer = DistilBertTokenizer.from_pretrained('./fine_tuned_distilbert') device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) mapping = {"Remembering": 0, "Understanding": 1, "Applying": 2, "Analyzing": 3, "Evaluating": 4, "Creating": 5} reverse_mapping = {v: k for k, v in mapping.items()} modelocr = ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True) def main(): st.set_page_config(page_title="Academic Paper Tool", page_icon="📝", layout="wide") # Tabs for different functionalities st.markdown(""" """, unsafe_allow_html=True) tab1, tab2 = st.tabs(["Question Generator", "Paper Scorer"]) if 'totalscore' not in st.session_state: st.session_state.totalscore = None if 'show_details' not in st.session_state: st.session_state.show_details = False if 'question_scores' not in st.session_state: st.session_state.question_scores = {} # Question Generator Tab with tab1: st.markdown("

🎓 Academic Paper Question Generator

", unsafe_allow_html=True) st.markdown("Generate insightful questions from academic papers using Bloom's Taxonomy") # Initialize session state variables with defaults if 'pdf_source_type' not in st.session_state: st.session_state.pdf_source_type = "URL" if 'pdf_url' not in st.session_state: st.session_state.pdf_url = "" if 'uploaded_file' not in st.session_state: st.session_state.uploaded_file = None if 'questions' not in st.session_state: st.session_state.questions = [] if 'accepted_questions' not in st.session_state: st.session_state.accepted_questions = [] # API Configuration api_key = os.getenv('GEMINI_API_KEY') # Main form for PDF and question generation with st.form(key='pdf_generation_form'): st.subheader("PDF Source") st.session_state.pdf_url = st.text_input( "Enter the URL of the PDF", value=st.session_state.pdf_url, key="pdf_url_input" ) st.markdown("

OR

", unsafe_allow_html=True) st.session_state.uploaded_file = st.file_uploader( "Upload a PDF file", type=['pdf'], key="pdf_file_upload" ) st.session_state.user_input=st.text_area("Enter your query here", key="input", height=100) # Question Length Selection question_length = st.select_slider( "Select Question Length", options=["Short", "Medium", "Long"], value="Medium", help="Short: 10-15 words, Medium: 20-25 words, Long: 30-40 words" ) st.session_state.include_numericals = st.checkbox("Include Numericals", key="include_numericals_checkbox") # Bloom's Taxonomy Weights st.subheader("Adjust Bloom's Taxonomy Weights") col1, col2, col3 = st.columns(3) with col1: knowledge = st.slider("Knowledge: Remembering", 0, 100, 20, key='knowledge_slider') application = st.slider("Applying: Using abstractions in concrete situations", 0, 100, 20, key='application_slider') with col2: comprehension = st.slider("Understanding: Explaining the meaning of information", 0, 100, 20, key='comprehension_slider') analysis = st.slider("Analyzing: Breaking down a whole into component parts", 0, 100, 20, key='analysis_slider') with col3: synthesis = st.slider("Creating: Putting parts together to form a new and integrated whole", 0, 100, 10, key='synthesis_slider') evaluation = st.slider("Evaluation: Making and defending judgments based on internal evidence or external criteria", 0, 100, 10, key='evaluation_slider') # Collect the Bloom's Taxonomy weights bloom_taxonomy_weights = { "Knowledge": knowledge, "Comprehension": comprehension, "Application": application, "Analysis": analysis, "Synthesis": synthesis, "Evaluation": evaluation } # Number of questions num_questions = st.slider("How many questions would you like to generate?", min_value=1, max_value=20, value=5, key='num_questions_slider') # Submit button within the form submit_button = st.form_submit_button(label='Generate Questions') # Process form submission if submit_button: # Validate API key if not api_key: st.error("Please enter a valid Gemini API key.") # Validate PDF source elif not st.session_state.pdf_url and not st.session_state.uploaded_file: st.error("Please enter a PDF URL or upload a PDF file.") else: # Normalize the Bloom's weights normalized_bloom_weights = normalize_bloom_weights(bloom_taxonomy_weights) st.info("Normalized Bloom's Taxonomy Weights:") st.json(normalized_bloom_weights) # Role and instructions for the AI role_description = "You are a question-generating AI agent, given context and instruction, you need to generate questions from the context." response_instructions = "Please generate questions that are clear and relevant to the content of the paper. Generate questions which are separated by new lines, without any numbering or additional context." # Generate questions with st.spinner('Generating questions...'): st.session_state.questions = process_pdf_and_generate_questions( pdf_source=st.session_state.pdf_url if st.session_state.pdf_url else None, uploaded_file=st.session_state.uploaded_file if st.session_state.uploaded_file else None, api_key=api_key, role_description=role_description, response_instructions=response_instructions, bloom_taxonomy_weights=normalized_bloom_weights, num_questions=num_questions, question_length=question_length, include_numericals=st.session_state.include_numericals, user_input=st.session_state.user_input ) if st.session_state.questions: st.header("Generated Questions") # Create a form for question management to prevent reload with st.form(key='questions_form'): for idx, question in enumerate(st.session_state.questions, 1): cols = st.columns([4, 1]) # Create two columns with cols[0]: # Display the question st.write(f"Q{idx}: {question}") # Add info button using Streamlit's expander with st.expander("Show Bloom's Taxonomy Details"): taxonomy_details = get_bloom_taxonomy_details(st.session_state.question_scores.get(question)) st.text(taxonomy_details) # Use radio buttons for selection with cols[1]: selected_option = st.radio( f"Select an option for Q{idx}", ["Accept", "Discard"], key=f"radio_{idx}", index=1 ) # Handle radio button state changes if selected_option == "Accept": if question not in st.session_state.accepted_questions: st.session_state.accepted_questions.append(question) else: if question in st.session_state.accepted_questions: st.session_state.accepted_questions.remove(question) # Submit button for question selection submit_questions = st.form_submit_button("Update Accepted Questions") # Show accepted questions if st.session_state.accepted_questions: st.header("Accepted Questions") for q in st.session_state.accepted_questions: st.write(q) # Download button for accepted questions if st.button("Download Accepted Questions as PDF"): filename = generate_pdf(st.session_state.accepted_questions, filename="accepted_questions.pdf") if filename: with open(filename, "rb") as pdf_file: st.download_button( label="Click to Download PDF", data=pdf_file, file_name="accepted_questions.pdf", mime="application/pdf" ) st.success("PDF generated successfully!") else: st.info("No questions selected yet.") # Add some footer information st.markdown("---") st.markdown(""" ### About this Tool - Generate academic paper questions using Bloom's Taxonomy - Customize question generation weights - Select and refine generated questions - Support for PDF via URL or local upload """) with tab2: st.markdown("

📄 Academic Paper Scorer

", unsafe_allow_html=True) st.markdown("Evaluate the Quality of Your Academic Paper") # Create a styled container for the upload section st.markdown(""" """, unsafe_allow_html=True) with st.form(key='paper_scorer_form'): st.header("Upload Your Academic Paper") uploaded_file = st.file_uploader( "Choose a PDF file", type=['pdf','jpg','png','jpeg'], label_visibility="collapsed" ) st.markdown("
OR
", unsafe_allow_html=True) if 'question_typed' not in st.session_state: st.session_state.question_typed = "" st.text_area("Paste your question here", value=st.session_state.question_typed, key="question_typed") question_typed = st.session_state.question_typed submit_button = st.form_submit_button( "Score Paper", use_container_width=True, type="primary" ) if submit_button: # Calculate total score pdf_path = save_uploaded_file(uploaded_file) dummydata = sendtogemini(inputpath=pdf_path, question=st.session_state.question_typed) #print(dummydata) total_score = {'Remembering': 0, 'Understanding': 0, 'Applying': 0, 'Analyzing': 0, 'Evaluating': 0, 'Creating': 0} for item in dummydata: for category in total_score: total_score[category] += item['score'][category] # average_score = total_score / (len(dummydata) * 6 * 10) * 100 # Score display columns categories = ['Remembering', 'Understanding', 'Applying', 'Analyzing', 'Evaluating', 'Creating'] # Create 6 columns in a single row cols = st.columns(6) # Iterate through categories and populate columns for i, category in enumerate(categories): with cols[i]: score = round(total_score[category] / (len(dummydata) ),ndigits=3) color = 'green' if score > .7 else 'orange' if score > .4 else 'red' st.markdown(f"""
{category}
{score}/{len(dummydata)}
""", unsafe_allow_html=True) with st.expander("Show Detailed Scores", expanded=True): for idx, item in enumerate(dummydata, 1): # Question header st.markdown(f'
Question {idx}: {item["question"]}
', unsafe_allow_html=True) # Create columns for score display score_cols = st.columns(6) # Scoring categories categories = ['Remembering', 'Understanding', 'Applying', 'Analyzing', 'Evaluating', 'Creating'] for col, category in zip(score_cols, categories): with col: # Determine color based on score score = round(item['score'][category],ndigits=3) color = 'green' if score > .7 else 'orange' if score > .3 else 'red' st.markdown(f"""
{category}
{score}/1
""", unsafe_allow_html=True) st.markdown('', unsafe_allow_html=True) # Add a separator between questions if idx < len(dummydata): st.markdown('---') # Run Streamlit app if __name__ == "__main__": main()