Spaces:

AruniAnkur
/

BloomScore

Running

App Files Files Community

AruniAnkur commited on Dec 4, 2024

Commit

40f5f78

verified ·

1 Parent(s): 8c84d02

added full code

Browse files

Files changed (1) hide show

app.py +134 -74

app.py CHANGED Viewed

@@ -8,9 +8,22 @@ import tempfile
 import base64
 import dotenv
 from dotenv import load_dotenv
 load_dotenv()
 # Previous functions from Question Generator
 def get_pdf_path(pdf_source=None, uploaded_file=None):
     try:
@@ -183,63 +196,23 @@ def process_pdf_and_generate_questions(pdf_source, uploaded_file, api_key, role_
         st.error(f"Error processing PDF and generating questions: {e}")
         return []
-dummydata = [
-   {"question": "What is the main idea of the paper?", "score": {
-       "Knowledge": 10,
-         "Comprehension": 9,
-            "Application": 8,
-                "Analysis": 7,
-                    "Synthesis": 6,
-                        "Evaluation": 5
-   }},
-   {"question": "What are the key findings of the paper?", "score": {
-         "Knowledge": 9,
-            "Comprehension": 8,
-                "Application": 7,
-                 "Analysis": 6,
-                      "Synthesis": 5,
-                            "Evaluation": 4
-    }},
-    {"question": "How does the paper contribute to the field?", "score": {
-         "Knowledge": 8,
-            "Comprehension": 7,
-                "Application": 6,
-                 "Analysis": 5,
-                      "Synthesis": 4,
-                            "Evaluation": 3
-    }},
-    {"question": "What are the limitations of the paper?", "score": {
-         "Knowledge": 7,
-            "Comprehension": 6,
-                "Application": 5,
-                 "Analysis": 4,
-                      "Synthesis": 3,
-                            "Evaluation": 2
-    }},
-    {"question": "What are the future research directions?", "score": {
-         "Knowledge": 6,
-            "Comprehension": 5,
-                "Application": 4,
-                 "Analysis": 3,
-                      "Synthesis": 2,
-                            "Evaluation": 1
-    }},
-    {"question": "How does the paper compare to existing work?", "score": {
-         "Knowledge": 5,
-            "Comprehension": 4,
-                "Application": 3,
-                 "Analysis": 2,
-                      "Synthesis": 1,
-                            "Evaluation": 0
-    }
-   }
-]
 def main():
     st.set_page_config(page_title="Academic Paper Tool", page_icon="📝", layout="wide")
     # Tabs for different functionalities
     tab1, tab2 = st.tabs(["Question Generator", "Paper Scorer"])
     if 'totalscore' not in st.session_state:
@@ -275,6 +248,7 @@ def main():
             st.session_state.pdf_url = st.text_input(
                     "Enter the URL of the PDF",
                     key="pdf_url_input"
                 )
@@ -445,7 +419,6 @@ def main():
                 type=['pdf','jpg','png','jpeg'],
                 label_visibility="collapsed"
             )
             # Custom submit button with some styling
             submit_button = st.form_submit_button(
                 "Score Paper",
@@ -455,17 +428,33 @@ def main():
         if submit_button:
             # Calculate total score
-            total_score = sum(
-                sum(question['score'].values())
-                for question in dummydata
-            )
-            average_score = total_score / (len(dummydata) * 6 * 10) * 100
             # Score display columns
-            col1, col2 = st.columns([2,1])
-            with col1:
-                st.metric(label="Total Paper Score", value=f"{average_score:.2f}/100")
             with st.expander("Show Detailed Scores", expanded=True):
                 for idx, item in enumerate(dummydata, 1):
@@ -477,13 +466,13 @@ def main():
                     score_cols = st.columns(6)
                     # Scoring categories
-                    categories = ['Knowledge', 'Comprehension', 'Application', 'Analysis', 'Synthesis', 'Evaluation']
                     for col, category in zip(score_cols, categories):
                         with col:
                             # Determine color based on score
-                            score = item['score'][category]
-                            color = 'green' if score > 7 else 'orange' if score > 4 else 'red'
                             st.markdown(f"""
                             <div style="text-align: center;
@@ -492,7 +481,7 @@ def main():
                                         padding: 5px;
                                         margin-bottom: 5px;">
                                 <div style="font-weight: bold; color: {color};">{category}</div>
-                                <div style="font-size: 18px; color: {color};">{score}/10</div>
                             </div>
                             """, unsafe_allow_html=True)
@@ -501,12 +490,83 @@ def main():
                     # Add a separator between questions
                     if idx < len(dummydata):
                         st.markdown('---')
-            # but = st.button("Show Detailed Scores")
-            # if but:
-            #     st.write("Detailed Scores")
-            #     with st.container():
-            #         for key, value in dummydata.items():
-            #             st.write(f"{key}: {value}")
 # Run Streamlit app
 if __name__ == "__main__":

 import base64
 import dotenv
 from dotenv import load_dotenv
+import torch
+from transformers import DistilBertForSequenceClassification, DistilBertTokenizer
+from torch.nn.functional import softmax
+from doctr.models import ocr_predictor
+from doctr.io import DocumentFile
 load_dotenv()
+model = DistilBertForSequenceClassification.from_pretrained('./fine_tuned_distilbert')
+tokenizer = DistilBertTokenizer.from_pretrained('./fine_tuned_distilbert')
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+mapping = {"Remembering": 0, "Understanding": 1, "Applying": 2, "Analyzing": 3, "Evaluating": 4, "Creating": 5}
+reverse_mapping = {v: k for k, v in mapping.items()}
+modelocr = ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True)
 # Previous functions from Question Generator
 def get_pdf_path(pdf_source=None, uploaded_file=None):
     try:
         st.error(f"Error processing PDF and generating questions: {e}")
         return []
 def main():
     st.set_page_config(page_title="Academic Paper Tool", page_icon="📝", layout="wide")
     # Tabs for different functionalities
+    st.markdown("""
+       <style>
+            .stTabs [data-baseweb="tab"] {
+                margin-bottom: 1rem;
+                flex: 1;
+                justify-content: center;
+            }
+            .stTabs [data-baseweb="tab-list"] button [data-testid="stMarkdownContainer"] p {
+            font-size:2rem;
+            padding: 0 2rem;
+            margin: 0;
+            }
+        </style>
+    """, unsafe_allow_html=True)
     tab1, tab2 = st.tabs(["Question Generator", "Paper Scorer"])
     if 'totalscore' not in st.session_state:
             st.session_state.pdf_url = st.text_input(
                     "Enter the URL of the PDF",
+                    value=st.session_state.pdf_url,
                     key="pdf_url_input"
                 )
                 type=['pdf','jpg','png','jpeg'],
                 label_visibility="collapsed"
             )
             # Custom submit button with some styling
             submit_button = st.form_submit_button(
                 "Score Paper",
         if submit_button:
             # Calculate total score
+            print(uploaded_file.name)
+            dummydata = sendtogemini(uploaded_file.name)
+            print(dummydata)
+            total_score = {'Remembering': 0, 'Understanding': 0, 'Applying': 0, 'Analyzing': 0, 'Evaluating': 0, 'Creating': 0}
+            for item in dummydata:
+                for category in total_score:
+                    total_score[category] += item['score'][category]
+            # average_score = total_score / (len(dummydata) * 6 * 10) * 100
             # Score display columns
+            categories = ['Remembering', 'Understanding', 'Applying', 'Analyzing', 'Evaluating', 'Creating']
+            # Create 6 columns in a single row
+            cols = st.columns(6)
+            # Iterate through categories and populate columns
+            for i, category in enumerate(categories):
+                with cols[i]:
+                    score = round(total_score[category] / (len(dummydata) ),ndigits=3)
+                    color = 'green' if score > .7 else 'orange' if score > .4 else 'red'
+                    st.markdown(f"""
+                    <div class="score-breakdown">
+                        <div class="score-header" style="color: {color}">{category}</div>
+                        <div style="font-size: 24px; color: {color};">{score}/1</div>
+                    </div>
+                    """, unsafe_allow_html=True)
             with st.expander("Show Detailed Scores", expanded=True):
                 for idx, item in enumerate(dummydata, 1):
                     score_cols = st.columns(6)
                     # Scoring categories
+                    categories = ['Remembering', 'Understanding', 'Applying', 'Analyzing', 'Evaluating', 'Creating']
                     for col, category in zip(score_cols, categories):
                         with col:
                             # Determine color based on score
+                            score = round(item['score'][category],ndigits=3)
+                            color = 'green' if score > .7 else 'orange' if score > .4 else 'red'
                             st.markdown(f"""
                             <div style="text-align: center;
                                         padding: 5px;
                                         margin-bottom: 5px;">
                                 <div style="font-weight: bold; color: {color};">{category}</div>
+                                <div style="font-size: 18px; color: {color};">{score}/1</div>
                             </div>
                             """, unsafe_allow_html=True)
                     # Add a separator between questions
                     if idx < len(dummydata):
                         st.markdown('---')
+def predict_with_loaded_model(text):
+    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)
+    input_ids = inputs['input_ids'].to(device)
+    model.eval()
+    with torch.no_grad():
+        outputs = model(input_ids)
+        logits = outputs.logits
+        probabilities = softmax(logits, dim=-1)
+    probabilities = probabilities.squeeze().cpu().numpy()
+    # Convert to float and format to 3 decimal places
+    class_probabilities = {reverse_mapping[i]: float(f"{prob:.3f}") for i, prob in enumerate(probabilities)}
+    return class_probabilities
+# def process_document(input_path):
+#     return {'Avg_Confidence': 0.9397169561947093, 'String': ['What are the key differences between classification and regression tasks in', 'supervised learning, and how do you determine which algorithm to use for a', 'specific problem?', 'e How does clustering differ from dimensionality reduction, and can you', 'provide real-world examples of where each is applied?', 'What are common evaluation metrics for classification models, and how do', 'precision, recall, and F1-score relate to each other?', 'How do convolutional neural networks (CNNS) and recurrent neural networks', '(RNNS) differ in their architecture and applications?', 'What steps can be taken to identify and mitigate bias in machine learning', 'models, and why is this an important consideration?']}
+def process_document(input_path):
+    if input_path.lower().endswith(".pdf"):
+        doc = DocumentFile.from_pdf(input_path)
+        #print(f"Number of pages: {len(doc)}")
+    elif input_path.lower().endswith((".jpg", ".jpeg", ".png", ".bmp", ".tiff")):
+        doc = DocumentFile.from_images(input_path)
+    else:
+        raise ValueError("Unsupported file type. Please provide a PDF or an image file.")
+    result = modelocr(doc)
+    def calculate_average_confidence(result):
+        total_confidence = 0
+        word_count = 0
+        for page in result.pages:
+            for block in page.blocks:
+                for line in block.lines:
+                    for word in line.words:
+                        total_confidence += word.confidence
+                        word_count += 1
+        average_confidence = total_confidence / word_count if word_count > 0 else 0
+        return average_confidence
+    average_confidence = calculate_average_confidence(result)
+    string_result = result.render()
+    return {'Avg_Confidence': average_confidence, 'String':string_result.split('\n')}
+def sendtogemini(inputpath):
+    qw = process_document(inputpath)
+    questionset = str(qw['String'])
+    # send this prompt to gemini :
+    questionset += """You are given a list of text fragments containing questions fragments extracted by an ocr model. Your task is to:
+    # only Merge the question fragments into complete and coherent questions.Don't answer then.
+    # Separate each question , start a new question with @ to make them easily distinguishable for further processing."""
+    url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key={os.getenv('GEMINI_API_KEY')}"
+    payload = {
+            "contents": [
+                {
+                    "parts": [
+                        {"text": questionset}
+                    ]
+                }
+            ]
+        }
+    headers = {"Content-Type": "application/json"}
+    response = requests.post(url, headers=headers, data=json.dumps(payload), timeout=60)
+    result = response.json()
+    res1 = result.get("candidates", [{}])[0].get("content", {}).get("parts", [{}])[0].get("text", "")
+    question = []
+    for i in res1.split('\n'):
+        i = i.strip()
+        if len(i) > 0:
+            if i[0] == '@':
+                question.append(i[1:].strip())
+    data = []
+    for i in question:
+        d = {}
+        d['question'] = i
+        d['score'] = predict_with_loaded_model(i)
+        data.append(d)
+    return data
 # Run Streamlit app
 if __name__ == "__main__":