Spaces:

AruniAnkur
/

BloomScore

Sleeping

File size: 19,601 Bytes

from typing import Optional, Dict
import streamlit as st
import os
from dotenv import load_dotenv
import torch
from transformers import DistilBertForSequenceClassification, DistilBertTokenizer
from torch.nn.functional import softmax
from doctr.models import ocr_predictor
from doctr.io import DocumentFile
from functionbloom import save_uploaded_file, get_pdf_path, extract_text_pymupdf, get_bloom_taxonomy_scores,generate_ai_response,normalize_bloom_weights, generate_pdf,process_pdf_and_generate_questions,get_bloom_taxonomy_details
from functionbloom import predict_with_loaded_model, process_document, sendtogemini


load_dotenv()

model = DistilBertForSequenceClassification.from_pretrained('./fine_tuned_distilbert')
tokenizer = DistilBertTokenizer.from_pretrained('./fine_tuned_distilbert')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
mapping = {"Remembering": 0, "Understanding": 1, "Applying": 2, "Analyzing": 3, "Evaluating": 4, "Creating": 5}
reverse_mapping = {v: k for k, v in mapping.items()}
modelocr = ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True)

def main():
    st.set_page_config(page_title="Academic Paper Tool", page_icon="📝", layout="wide")
    
    # Tabs for different functionalities
    st.markdown("""
    <style>
        .stTabs [data-baseweb="tab"] {
            margin-bottom: 1rem;
            flex: 1;
            justify-content: center;
        }
        .stTabs [data-baseweb="tab-list"] button [data-testid="stMarkdownContainer"] p {
            font-size: 2rem;
            padding: 0 2rem;
            font-weight: bold;
            margin: 0;
        }
        
        /* Information Button Styling */
        .info-button {
            background-color: #f0f2f6;
            border: 1px solid #4a6cf7;
            border-radius: 50%;
            width: 24px;
            height: 24px;
            display: inline-flex;
            align-items: center;
            justify-content: center;
            cursor: pointer;
            margin-left: 8px;
            font-weight: bold;
            color: #4a6cf7;
        }
        
        /* Modal Styling */
        .modal {
            display: none;
            position: fixed;
            z-index: 1000;
            left: 0;
            top: 0;
            width: 100%;
            height: 100%;
            overflow: auto;
            background-color: rgba(0,0,0,0.4);
        }
        
        .modal-content {
            background-color: #fefefe;
            margin: 15% auto;
            padding: 20px;
            border: 1px solid #888;
            width: 80%;
            max-width: 500px;
            border-radius: 10px;
            box-shadow: 0 4px 6px rgba(0,0,0,0.1);
        }
        
        .close-button {
            color: #aaa;
            float: right;
            font-size: 28px;
            font-weight: bold;
            cursor: pointer;
        }
        
        .close-button:hover,
        .close-button:focus {
            color: black;
            text-decoration: none;
            cursor: pointer;
        }
        
        /* Question Container Styling */
        .question-container {
            display: flex;
            align-items: start;
            gap: 10px;
            margin-bottom: 10px;
        }
        
        /* Info Button Styling */
        .info-button {
            background-color: #f0f2f6;
            border: 1px solid #4a6cf7;
            border-radius: 50%;
            width: 24px;
            height: 24px;
            display: inline-flex;
            align-items: center;
            justify-content: center;
            cursor: pointer;
            font-weight: bold;
            color: #4a6cf7;
            flex-shrink: 0;
            font-size: 14px;
        }
        
        .info-button:hover {
            background-color: #4a6cf7;
            color: white;
        }
        
        /* Modal Styling */
        .modal {
            display: none;
            position: fixed;
            z-index: 9999;
            left: 0;
            top: 0;
            width: 100%;
            height: 100%;
            background-color: rgba(0,0,0,0.4);
        }
        
        .modal-content {
            background-color: #fefefe;
            margin: 15% auto;
            padding: 20px;
            border: 1px solid #888;
            width: 80%;
            max-width: 500px;
            border-radius: 10px;
            box-shadow: 0 4px 6px rgba(0,0,0,0.1);
            position: relative;
        }
        
        .close-button {
            position: absolute;
            right: 10px;
            top: 5px;
            color: #aaa;
            font-size: 28px;
            font-weight: bold;
            cursor: pointer;
        }
        
        .close-button:hover,
        .close-button:focus {
            color: black;
            text-decoration: none;
            cursor: pointer;
        }
    </style>
    """, unsafe_allow_html=True)

    tab1, tab2 = st.tabs(["Question Generator", "Paper Scorer"])

    if 'totalscore' not in st.session_state:
        st.session_state.totalscore = None
    if 'show_details' not in st.session_state:
        st.session_state.show_details = False
    if 'question_scores' not in st.session_state:
        st.session_state.question_scores = {}

    # Question Generator Tab
    with tab1:
        st.markdown("<h1 style='font-size: 28px;'>🎓 Academic Paper Question Generator</h1>", unsafe_allow_html=True)
        st.markdown("Generate insightful questions from academic papers using Bloom's Taxonomy")

        # Initialize session state variables with defaults
        if 'pdf_source_type' not in st.session_state:
            st.session_state.pdf_source_type = "URL"
        if 'pdf_url' not in st.session_state:
            st.session_state.pdf_url = "https://proceedings.neurips.cc/paper_files/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf"
        if 'uploaded_file' not in st.session_state:
            st.session_state.uploaded_file = None
        if 'questions' not in st.session_state:
            st.session_state.questions = []
        if 'accepted_questions' not in st.session_state:
            st.session_state.accepted_questions = []

        # API Configuration
        api_key = os.getenv('GEMINI_API_KEY')
        
        # Main form for PDF and question generation
        with st.form(key='pdf_generation_form'):
            st.subheader("PDF Source")
            
            st.session_state.pdf_url = st.text_input(
                    "Enter the URL of the PDF", 
                    value=st.session_state.pdf_url, 
                    key="pdf_url_input"
                )
            
            st.markdown("<h4 style='text-align: center;'>OR</h4>", unsafe_allow_html=True)

            st.session_state.uploaded_file = st.file_uploader(
                    "Upload a PDF file", 
                    type=['pdf'], 
                    key="pdf_file_upload"
            )

            # Question Length Selection
            question_length = st.select_slider(
                "Select Question Length",
                options=["Short", "Medium", "Long"],
                value="Medium",
                help="Short: 10-15 words, Medium: 20-25 words, Long: 30-40 words"
            )

            st.session_state.include_numericals = st.checkbox("Include Numericals", key="include_numericals_checkbox")

            # Bloom's Taxonomy Weights
            st.subheader("Adjust Bloom's Taxonomy Weights")
            col1, col2, col3 = st.columns(3)

            with col1:
                knowledge = st.slider("Knowledge: Remembering information", 0, 100, 20, key='knowledge_slider')
                application = st.slider("Application: Using abstractions in concrete situations", 0, 100, 20, key='application_slider')

            with col2:
                comprehension = st.slider("Comprehension: Explaining the meaning of information", 0, 100, 20, key='comprehension_slider')
                analysis = st.slider("Analysis: Breaking down a whole into component parts", 0, 100, 20, key='analysis_slider')

            with col3:
                synthesis = st.slider("Synthesis: Putting parts together to form a new and integrated whole", 0, 100, 10, key='synthesis_slider')
                evaluation = st.slider("Evaluation: Making and defending judgments based on internal evidence or external criteria", 0, 100, 10, key='evaluation_slider')

            # Collect the Bloom's Taxonomy weights
            bloom_taxonomy_weights = {
                "Knowledge": knowledge,
                "Comprehension": comprehension,
                "Application": application,
                "Analysis": analysis,
                "Synthesis": synthesis,
                "Evaluation": evaluation
            }

            # Number of questions
            num_questions = st.slider("How many questions would you like to generate?", min_value=1, max_value=20, value=5, key='num_questions_slider')

            # Submit button within the form
            submit_button = st.form_submit_button(label='Generate Questions')

        # Process form submission
        if submit_button:
            # Validate API key
            if not api_key:
                st.error("Please enter a valid Gemini API key.")
            # Validate PDF source
            elif not st.session_state.pdf_url and not st.session_state.uploaded_file:
                st.error("Please enter a PDF URL or upload a PDF file.")
            else:
                # Normalize the Bloom's weights
                normalized_bloom_weights = normalize_bloom_weights(bloom_taxonomy_weights)
                
                st.info("Normalized Bloom's Taxonomy Weights:")
                st.json(normalized_bloom_weights)

                # Role and instructions for the AI
                role_description = "You are a question-generating AI agent, given context and instruction, you need to generate questions from the context."
                response_instructions = "Please generate questions that are clear and relevant to the content of the paper. Generate questions which are separated by new lines, without any numbering or additional context."

                # Generate questions
                with st.spinner('Generating questions...'):
                    st.session_state.questions = process_pdf_and_generate_questions(
                        pdf_source=st.session_state.pdf_url if st.session_state.pdf_url else None, 
                        uploaded_file=st.session_state.uploaded_file if st.session_state.uploaded_file else None,
                        api_key=api_key, 
                        role_description=role_description, 
                        response_instructions=response_instructions, 
                        bloom_taxonomy_weights=normalized_bloom_weights, 
                        num_questions=num_questions,
                        question_length=question_length,
                        include_numericals=st.session_state.include_numericals
                    )
                    
        if st.session_state.questions:
            st.header("Generated Questions")
            
            # Create a form for question management to prevent reload
            with st.form(key='questions_form'):
                for idx, question in enumerate(st.session_state.questions, 1):
                    cols = st.columns([4, 1])  # Create two columns
                    
                    with cols[0]:
                        # Display the question
                        st.write(f"Q{idx}: {question}")
                        
                        # Add info button using Streamlit's expander
                        with st.expander("Show Bloom's Taxonomy Details"):
                            taxonomy_details = get_bloom_taxonomy_details(st.session_state.question_scores.get(question))
                            st.text(taxonomy_details)
                    
                    # Use radio buttons for selection
                    with cols[1]:
                        selected_option = st.radio(
                            f"Select an option for Q{idx}", 
                            ["Accept", "Discard"], 
                            key=f"radio_{idx}", 
                            index=1
                        )
                    
                    # Handle radio button state changes
                    if selected_option == "Accept":
                        if question not in st.session_state.accepted_questions:
                            st.session_state.accepted_questions.append(question)
                    else:
                        if question in st.session_state.accepted_questions:
                            st.session_state.accepted_questions.remove(question)

                # Submit button for question selection
                submit_questions = st.form_submit_button("Update Accepted Questions")

            # Show accepted questions
            if st.session_state.accepted_questions:
                st.header("Accepted Questions")
                for q in st.session_state.accepted_questions:
                    st.write(q)

                # Download button for accepted questions
                if st.button("Download Accepted Questions as PDF"):
                    filename = generate_pdf(st.session_state.accepted_questions, filename="accepted_questions.pdf")
                    if filename:
                        with open(filename, "rb") as pdf_file:
                            st.download_button(
                                label="Click to Download PDF",
                                data=pdf_file,
                                file_name="accepted_questions.pdf",
                                mime="application/pdf"
                            )
                        st.success("PDF generated successfully!")
            else:
                st.info("No questions selected yet.")

        # Add some footer information
        st.markdown("---")
        st.markdown("""
        ### About this Tool
        - Generate academic paper questions using Bloom's Taxonomy
        - Customize question generation weights
        - Select and refine generated questions
        - Support for PDF via URL or local upload
        """)
    with tab2:
        st.markdown("<h1 style='font-size: 28px;'>📄 Academic Paper Scorer</h1>", unsafe_allow_html=True)
        st.markdown("Evaluate the Quality of Your Academic Paper")
            
            # Create a styled container for the upload section
        st.markdown("""
        <style>
        .upload-container {
            background-color: #f0f2f6;
            border-radius: 10px;
            padding: 20px;
            border: 2px dashed #4a6cf7;
            text-align: center;
        }
        .score-breakdown {
            background-color: #f8f9fa;
            border-radius: 8px;
            padding: 15px;
            margin-bottom: 15px;
        }
        .score-header {
            font-weight: bold;
            color: #4a6cf7;
            margin-bottom: 10px;
        }
        </style>
        """, unsafe_allow_html=True)
        
        with st.form(key='paper_scorer_form'):
            st.header("Upload Your Academic Paper")
            uploaded_file = st.file_uploader(
                "Choose a PDF file", 
                type=['pdf','jpg','png','jpeg'],
                label_visibility="collapsed"
            )

            st.markdown("<div style='text-align: center; margin-top: 20px;'><strong>OR</strong></div>", unsafe_allow_html=True)
            if 'question_typed' not in st.session_state:
                st.session_state.question_typed = ""
            st.text_area("Paste your question here", value=st.session_state.question_typed, key="question_typed")
            question_typed = st.session_state.question_typed
            submit_button = st.form_submit_button(
                "Score Paper", 
                use_container_width=True,
                type="primary"
            )

        if submit_button:
            # Calculate total score
            pdf_path = save_uploaded_file(uploaded_file)
            dummydata = sendtogemini(inputpath=pdf_path, question=st.session_state.question_typed)
            #print(dummydata)
            total_score = {'Remembering': 0, 'Understanding': 0, 'Applying': 0, 'Analyzing': 0, 'Evaluating': 0, 'Creating': 0}
            for item in dummydata:
                for category in total_score:
                    total_score[category] += item['score'][category]

            # average_score = total_score / (len(dummydata) * 6 * 10) * 100
            
            # Score display columns
            categories = ['Remembering', 'Understanding', 'Applying', 'Analyzing', 'Evaluating', 'Creating']

            # Create 6 columns in a single row
            cols = st.columns(6)

            # Iterate through categories and populate columns
            for i, category in enumerate(categories):
                with cols[i]:
                    score = round(total_score[category] / (len(dummydata) ),ndigits=3)
                    color = 'green' if score > .7 else 'orange' if score > .4 else 'red'
                    st.markdown(f"""
                    <div class="score-breakdown">
                        <div class="score-header" style="color: {color}">{category}</div>
                        <div style="font-size: 24px; color: {color};">{score}/{len(dummydata)}</div>
                    </div>
                    """, unsafe_allow_html=True)
            
            with st.expander("Show Detailed Scores", expanded=True):
                for idx, item in enumerate(dummydata, 1):
                    
                    # Question header
                    st.markdown(f'<div class="score-header">Question {idx}: {item["question"]}</div>', unsafe_allow_html=True)
                    
                    # Create columns for score display
                    score_cols = st.columns(6)
                    
                    # Scoring categories
                    categories = ['Remembering', 'Understanding', 'Applying', 'Analyzing', 'Evaluating', 'Creating']
                    
                    for col, category in zip(score_cols, categories):
                        with col:
                            # Determine color based on score
                            score = round(item['score'][category],ndigits=3)
                            color = 'green' if score > .7 else 'orange' if score > .3 else 'red'
                            
                            st.markdown(f"""
                            <div style="text-align: center; 
                                        background-color: #f1f1f1; 
                                        border-radius: 5px; 
                                        padding: 5px; 
                                        margin-bottom: 5px;">
                                <div style="font-weight: bold; color: {color};">{category}</div>
                                <div style="font-size: 18px; color: {color};">{score}/1</div>
                            </div>
                            """, unsafe_allow_html=True)
                    
                    st.markdown('</div>', unsafe_allow_html=True)
                    
                    # Add a separator between questions
                    if idx < len(dummydata):
                        st.markdown('---') 

# Run Streamlit app
if __name__ == "__main__":
    main()