Spaces:

AruniAnkur
/

BloomScore

Running

App Files Files Community

AruniAnkur commited on Dec 21, 2024

Commit

5aca58a

verified ·

1 Parent(s): 05b526e

Upload 2 files

Browse files

Files changed (2) hide show

functionbloom.py +9 -4
main.py +476 -0

functionbloom.py CHANGED Viewed

@@ -106,7 +106,7 @@ def get_bloom_taxonomy_scores(question: str) -> Dict[str, float]:
         return default_scores
-def generate_ai_response(api_key, assistant_context, user_query, role_description, response_instructions, bloom_taxonomy_weights, num_questions, question_length, include_numericals):
     try:
         url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key={api_key}"
@@ -123,6 +123,8 @@ def generate_ai_response(api_key, assistant_context, user_query, role_descriptio
         **Role**: {role_description}
         **Context**: {assistant_context}
         **Instructions**: {response_instructions}
         Question Length Requirement: {length_guidelines[question_length]}
@@ -221,7 +223,7 @@ def generate_pdf(questions, filename="questions.pdf"):
         st.error(f"Error generating PDF: {e}")
         return None
-def process_pdf_and_generate_questions(pdf_source, uploaded_file, api_key, role_description, response_instructions, bloom_taxonomy_weights, num_questions, question_length, include_numericals):
     try:
         pdf_path = get_pdf_path(pdf_source, uploaded_file)
@@ -245,7 +247,8 @@ def process_pdf_and_generate_questions(pdf_source, uploaded_file, api_key, role_
             normalized_bloom_weights,
             num_questions,
             question_length,
-            include_numericals
         )
         # Clean up temporary PDF file
@@ -385,4 +388,6 @@ def sendtogemini(inputpath, question):
         d['question'] = i
         d['score'] = predict_with_loaded_model(i)
         data.append(d)
-    return data

         return default_scores
+def generate_ai_response(api_key, assistant_context, user_query, role_description, response_instructions, bloom_taxonomy_weights, num_questions, question_length, include_numericals, user_input):
     try:
         url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key={api_key}"
         **Role**: {role_description}
         **Context**: {assistant_context}
+        **User Query**: {user_input}
         **Instructions**: {response_instructions}
         Question Length Requirement: {length_guidelines[question_length]}
         st.error(f"Error generating PDF: {e}")
         return None
+def process_pdf_and_generate_questions(pdf_source, uploaded_file, api_key, role_description, response_instructions, bloom_taxonomy_weights, num_questions, question_length, include_numericals, user_input):
     try:
         pdf_path = get_pdf_path(pdf_source, uploaded_file)
             normalized_bloom_weights,
             num_questions,
             question_length,
+            include_numericals,
+            user_input
         )
         # Clean up temporary PDF file
         d['question'] = i
         d['score'] = predict_with_loaded_model(i)
         data.append(d)
+    return data

main.py ADDED Viewed

	@@ -0,0 +1,476 @@

+from typing import Optional, Dict
+import streamlit as st
+import os
+from dotenv import load_dotenv
+import torch
+from transformers import DistilBertForSequenceClassification, DistilBertTokenizer
+from torch.nn.functional import softmax
+from doctr.models import ocr_predictor
+from doctr.io import DocumentFile
+from functionbloom import save_uploaded_file, get_pdf_path, extract_text_pymupdf, get_bloom_taxonomy_scores,generate_ai_response,normalize_bloom_weights, generate_pdf,process_pdf_and_generate_questions,get_bloom_taxonomy_details
+from functionbloom import predict_with_loaded_model, process_document, sendtogemini
+load_dotenv()
+model = DistilBertForSequenceClassification.from_pretrained('./fine_tuned_distilbert')
+tokenizer = DistilBertTokenizer.from_pretrained('./fine_tuned_distilbert')
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+mapping = {"Remembering": 0, "Understanding": 1, "Applying": 2, "Analyzing": 3, "Evaluating": 4, "Creating": 5}
+reverse_mapping = {v: k for k, v in mapping.items()}
+modelocr = ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True)
+def main():
+    st.set_page_config(page_title="Academic Paper Tool", page_icon="📝", layout="wide")
+    # Tabs for different functionalities
+    st.markdown("""
+    <style>
+        .stTabs [data-baseweb="tab"] {
+            margin-bottom: 1rem;
+            flex: 1;
+            justify-content: center;
+        }
+        .stTabs [data-baseweb="tab-list"] button [data-testid="stMarkdownContainer"] p {
+            font-size: 2rem;
+            padding: 0 2rem;
+            font-weight: bold;
+            margin: 0;
+        }
+        /* Information Button Styling */
+        .info-button {
+            background-color: #f0f2f6;
+            border: 1px solid #4a6cf7;
+            border-radius: 50%;
+            width: 24px;
+            height: 24px;
+            display: inline-flex;
+            align-items: center;
+            justify-content: center;
+            cursor: pointer;
+            margin-left: 8px;
+            font-weight: bold;
+            color: #4a6cf7;
+        }
+        /* Modal Styling */
+        .modal {
+            display: none;
+            position: fixed;
+            z-index: 1000;
+            left: 0;
+            top: 0;
+            width: 100%;
+            height: 100%;
+            overflow: auto;
+            background-color: rgba(0,0,0,0.4);
+        }
+        .modal-content {
+            background-color: #fefefe;
+            margin: 15% auto;
+            padding: 20px;
+            border: 1px solid #888;
+            width: 80%;
+            max-width: 500px;
+            border-radius: 10px;
+            box-shadow: 0 4px 6px rgba(0,0,0,0.1);
+        }
+        .close-button {
+            color: #aaa;
+            float: right;
+            font-size: 28px;
+            font-weight: bold;
+            cursor: pointer;
+        }
+        .close-button:hover,
+        .close-button:focus {
+            color: black;
+            text-decoration: none;
+            cursor: pointer;
+        }
+        /* Question Container Styling */
+        .question-container {
+            display: flex;
+            align-items: start;
+            gap: 10px;
+            margin-bottom: 10px;
+        }
+        /* Info Button Styling */
+        .info-button {
+            background-color: #f0f2f6;
+            border: 1px solid #4a6cf7;
+            border-radius: 50%;
+            width: 24px;
+            height: 24px;
+            display: inline-flex;
+            align-items: center;
+            justify-content: center;
+            cursor: pointer;
+            font-weight: bold;
+            color: #4a6cf7;
+            flex-shrink: 0;
+            font-size: 14px;
+        }
+        .info-button:hover {
+            background-color: #4a6cf7;
+            color: white;
+        }
+        /* Modal Styling */
+        .modal {
+            display: none;
+            position: fixed;
+            z-index: 9999;
+            left: 0;
+            top: 0;
+            width: 100%;
+            height: 100%;
+            background-color: rgba(0,0,0,0.4);
+        }
+        .modal-content {
+            background-color: #fefefe;
+            margin: 15% auto;
+            padding: 20px;
+            border: 1px solid #888;
+            width: 80%;
+            max-width: 500px;
+            border-radius: 10px;
+            box-shadow: 0 4px 6px rgba(0,0,0,0.1);
+            position: relative;
+        }
+        .close-button {
+            position: absolute;
+            right: 10px;
+            top: 5px;
+            color: #aaa;
+            font-size: 28px;
+            font-weight: bold;
+            cursor: pointer;
+        }
+        .close-button:hover,
+        .close-button:focus {
+            color: black;
+            text-decoration: none;
+            cursor: pointer;
+        }
+    </style>
+    """, unsafe_allow_html=True)
+    tab1, tab2 = st.tabs(["Question Generator", "Paper Scorer"])
+    if 'totalscore' not in st.session_state:
+        st.session_state.totalscore = None
+    if 'show_details' not in st.session_state:
+        st.session_state.show_details = False
+    if 'question_scores' not in st.session_state:
+        st.session_state.question_scores = {}
+    # Question Generator Tab
+    with tab1:
+        st.markdown("<h1 style='font-size: 28px;'>🎓 Academic Paper Question Generator</h1>", unsafe_allow_html=True)
+        st.markdown("Generate insightful questions from academic papers using Bloom's Taxonomy")
+        # Initialize session state variables with defaults
+        if 'pdf_source_type' not in st.session_state:
+            st.session_state.pdf_source_type = "URL"
+        if 'pdf_url' not in st.session_state:
+            st.session_state.pdf_url = ""
+        if 'uploaded_file' not in st.session_state:
+            st.session_state.uploaded_file = None
+        if 'questions' not in st.session_state:
+            st.session_state.questions = []
+        if 'accepted_questions' not in st.session_state:
+            st.session_state.accepted_questions = []
+        # API Configuration
+        api_key = os.getenv('GEMINI_API_KEY')
+        # Main form for PDF and question generation
+        with st.form(key='pdf_generation_form'):
+            st.subheader("PDF Source")
+            st.session_state.pdf_url = st.text_input(
+                    "Enter the URL of the PDF",
+                    value=st.session_state.pdf_url,
+                    key="pdf_url_input"
+                )
+            st.markdown("<h4 style='text-align: center;'>OR</h4>", unsafe_allow_html=True)
+            st.session_state.uploaded_file = st.file_uploader(
+                    "Upload a PDF file",
+                    type=['pdf'],
+                    key="pdf_file_upload"
+            )
+            st.session_state.user_input=st.text_area("Enter your query here", key="input", height=100)
+            # Question Length Selection
+            question_length = st.select_slider(
+                "Select Question Length",
+                options=["Short", "Medium", "Long"],
+                value="Medium",
+                help="Short: 10-15 words, Medium: 20-25 words, Long: 30-40 words"
+            )
+            st.session_state.include_numericals = st.checkbox("Include Numericals", key="include_numericals_checkbox")
+            # Bloom's Taxonomy Weights
+            st.subheader("Adjust Bloom's Taxonomy Weights")
+            col1, col2, col3 = st.columns(3)
+            with col1:
+                knowledge = st.slider("Knowledge: Remembering", 0, 100, 20, key='knowledge_slider')
+                application = st.slider("Applying: Using abstractions in concrete situations", 0, 100, 20, key='application_slider')
+            with col2:
+                comprehension = st.slider("Understanding: Explaining the meaning of information", 0, 100, 20, key='comprehension_slider')
+                analysis = st.slider("Analyzing: Breaking down a whole into component parts", 0, 100, 20, key='analysis_slider')
+            with col3:
+                synthesis = st.slider("Creating: Putting parts together to form a new and integrated whole", 0, 100, 10, key='synthesis_slider')
+                evaluation = st.slider("Evaluation: Making and defending judgments based on internal evidence or external criteria", 0, 100, 10, key='evaluation_slider')
+            # Collect the Bloom's Taxonomy weights
+            bloom_taxonomy_weights = {
+                "Knowledge": knowledge,
+                "Comprehension": comprehension,
+                "Application": application,
+                "Analysis": analysis,
+                "Synthesis": synthesis,
+                "Evaluation": evaluation
+            }
+            # Number of questions
+            num_questions = st.slider("How many questions would you like to generate?", min_value=1, max_value=20, value=5, key='num_questions_slider')
+            # Submit button within the form
+            submit_button = st.form_submit_button(label='Generate Questions')
+        # Process form submission
+        if submit_button:
+            # Validate API key
+            if not api_key:
+                st.error("Please enter a valid Gemini API key.")
+            # Validate PDF source
+            elif not st.session_state.pdf_url and not st.session_state.uploaded_file:
+                st.error("Please enter a PDF URL or upload a PDF file.")
+            else:
+                # Normalize the Bloom's weights
+                normalized_bloom_weights = normalize_bloom_weights(bloom_taxonomy_weights)
+                st.info("Normalized Bloom's Taxonomy Weights:")
+                st.json(normalized_bloom_weights)
+                # Role and instructions for the AI
+                role_description = "You are a question-generating AI agent, given context and instruction, you need to generate questions from the context."
+                response_instructions = "Please generate questions that are clear and relevant to the content of the paper. Generate questions which are separated by new lines, without any numbering or additional context."
+                # Generate questions
+                with st.spinner('Generating questions...'):
+                    st.session_state.questions = process_pdf_and_generate_questions(
+                        pdf_source=st.session_state.pdf_url if st.session_state.pdf_url else None,
+                        uploaded_file=st.session_state.uploaded_file if st.session_state.uploaded_file else None,
+                        api_key=api_key,
+                        role_description=role_description,
+                        response_instructions=response_instructions,
+                        bloom_taxonomy_weights=normalized_bloom_weights,
+                        num_questions=num_questions,
+                        question_length=question_length,
+                        include_numericals=st.session_state.include_numericals,
+                        user_input=st.session_state.user_input
+                    )
+        if st.session_state.questions:
+            st.header("Generated Questions")
+            # Create a form for question management to prevent reload
+            with st.form(key='questions_form'):
+                for idx, question in enumerate(st.session_state.questions, 1):
+                    cols = st.columns([4, 1])  # Create two columns
+                    with cols[0]:
+                        # Display the question
+                        st.write(f"Q{idx}: {question}")
+                        # Add info button using Streamlit's expander
+                        with st.expander("Show Bloom's Taxonomy Details"):
+                            taxonomy_details = get_bloom_taxonomy_details(st.session_state.question_scores.get(question))
+                            st.text(taxonomy_details)
+                    # Use radio buttons for selection
+                    with cols[1]:
+                        selected_option = st.radio(
+                            f"Select an option for Q{idx}",
+                            ["Accept", "Discard"],
+                            key=f"radio_{idx}",
+                            index=1
+                        )
+                    # Handle radio button state changes
+                    if selected_option == "Accept":
+                        if question not in st.session_state.accepted_questions:
+                            st.session_state.accepted_questions.append(question)
+                    else:
+                        if question in st.session_state.accepted_questions:
+                            st.session_state.accepted_questions.remove(question)
+                # Submit button for question selection
+                submit_questions = st.form_submit_button("Update Accepted Questions")
+            # Show accepted questions
+            if st.session_state.accepted_questions:
+                st.header("Accepted Questions")
+                for q in st.session_state.accepted_questions:
+                    st.write(q)
+                # Download button for accepted questions
+                if st.button("Download Accepted Questions as PDF"):
+                    filename = generate_pdf(st.session_state.accepted_questions, filename="accepted_questions.pdf")
+                    if filename:
+                        with open(filename, "rb") as pdf_file:
+                            st.download_button(
+                                label="Click to Download PDF",
+                                data=pdf_file,
+                                file_name="accepted_questions.pdf",
+                                mime="application/pdf"
+                            )
+                        st.success("PDF generated successfully!")
+            else:
+                st.info("No questions selected yet.")
+        # Add some footer information
+        st.markdown("---")
+        st.markdown("""
+        ### About this Tool
+        - Generate academic paper questions using Bloom's Taxonomy
+        - Customize question generation weights
+        - Select and refine generated questions
+        - Support for PDF via URL or local upload
+        """)
+    with tab2:
+        st.markdown("<h1 style='font-size: 28px;'>📄 Academic Paper Scorer</h1>", unsafe_allow_html=True)
+        st.markdown("Evaluate the Quality of Your Academic Paper")
+            # Create a styled container for the upload section
+        st.markdown("""
+        <style>
+        .upload-container {
+            background-color: #f0f2f6;
+            border-radius: 10px;
+            padding: 20px;
+            border: 2px dashed #4a6cf7;
+            text-align: center;
+        }
+        .score-breakdown {
+            background-color: #f8f9fa;
+            border-radius: 8px;
+            padding: 15px;
+            margin-bottom: 15px;
+        }
+        .score-header {
+            font-weight: bold;
+            color: #4a6cf7;
+            margin-bottom: 10px;
+        }
+        </style>
+        """, unsafe_allow_html=True)
+        with st.form(key='paper_scorer_form'):
+            st.header("Upload Your Academic Paper")
+            uploaded_file = st.file_uploader(
+                "Choose a PDF file",
+                type=['pdf','jpg','png','jpeg'],
+                label_visibility="collapsed"
+            )
+            st.markdown("<div style='text-align: center; margin-top: 20px;'><strong>OR</strong></div>", unsafe_allow_html=True)
+            if 'question_typed' not in st.session_state:
+                st.session_state.question_typed = ""
+            st.text_area("Paste your question here", value=st.session_state.question_typed, key="question_typed")
+            question_typed = st.session_state.question_typed
+            submit_button = st.form_submit_button(
+                "Score Paper",
+                use_container_width=True,
+                type="primary"
+            )
+        if submit_button:
+            # Calculate total score
+            pdf_path = save_uploaded_file(uploaded_file)
+            dummydata = sendtogemini(inputpath=pdf_path, question=st.session_state.question_typed)
+            #print(dummydata)
+            total_score = {'Remembering': 0, 'Understanding': 0, 'Applying': 0, 'Analyzing': 0, 'Evaluating': 0, 'Creating': 0}
+            for item in dummydata:
+                for category in total_score:
+                    total_score[category] += item['score'][category]
+            # average_score = total_score / (len(dummydata) * 6 * 10) * 100
+            # Score display columns
+            categories = ['Remembering', 'Understanding', 'Applying', 'Analyzing', 'Evaluating', 'Creating']
+            # Create 6 columns in a single row
+            cols = st.columns(6)
+            # Iterate through categories and populate columns
+            for i, category in enumerate(categories):
+                with cols[i]:
+                    score = round(total_score[category] / (len(dummydata) ),ndigits=3)
+                    color = 'green' if score > .7 else 'orange' if score > .4 else 'red'
+                    st.markdown(f"""
+                    <div class="score-breakdown">
+                        <div class="score-header" style="color: {color}">{category}</div>
+                        <div style="font-size: 24px; color: {color};">{score}/{len(dummydata)}</div>
+                    </div>
+                    """, unsafe_allow_html=True)
+            with st.expander("Show Detailed Scores", expanded=True):
+                for idx, item in enumerate(dummydata, 1):
+                    # Question header
+                    st.markdown(f'<div class="score-header">Question {idx}: {item["question"]}</div>', unsafe_allow_html=True)
+                    # Create columns for score display
+                    score_cols = st.columns(6)
+                    # Scoring categories
+                    categories = ['Remembering', 'Understanding', 'Applying', 'Analyzing', 'Evaluating', 'Creating']
+                    for col, category in zip(score_cols, categories):
+                        with col:
+                            # Determine color based on score
+                            score = round(item['score'][category],ndigits=3)
+                            color = 'green' if score > .7 else 'orange' if score > .3 else 'red'
+                            st.markdown(f"""
+                            <div style="text-align: center;
+                                        background-color: #f1f1f1;
+                                        border-radius: 5px;
+                                        padding: 5px;
+                                        margin-bottom: 5px;">
+                                <div style="font-weight: bold; color: {color};">{category}</div>
+                                <div style="font-size: 18px; color: {color};">{score}/1</div>
+                            </div>
+                            """, unsafe_allow_html=True)
+                    st.markdown('</div>', unsafe_allow_html=True)
+                    # Add a separator between questions
+                    if idx < len(dummydata):
+                        st.markdown('---')
+# Run Streamlit app
+if __name__ == "__main__":
+    main()