Spaces:

omkar-surve126
/

NovaScholar

Build error

App Files Files Community

Harshal Vhatkar commited on Dec 27, 2024

Commit

cca73d9

1 Parent(s): 1586102

add course creation and other new features

Browse files

Files changed (5) hide show

create_course.py +272 -0
file_upload_vectorize.py +2 -2
main.py +334 -58
pre_class_analytics.py +850 -0
session_page.py +257 -16

create_course.py ADDED Viewed

	@@ -0,0 +1,272 @@

+from datetime import datetime, timedelta
+import os
+from typing import Dict, List, Any
+from pymongo import MongoClient
+import requests
+import uuid
+import openai
+from openai import OpenAI
+import streamlit as st
+from bson import ObjectId
+from dotenv import load_dotenv
+import json
+load_dotenv()
+MONGODB_URI = os.getenv("MONGO_URI")
+PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_KEY")
+OPENAI_API_KEY = os.getenv("OPENAI_KEY")
+client = MongoClient(MONGODB_URI)
+db = client['novascholar_db']
+courses_collection = db['courses']
+def generate_perplexity_response(api_key, course_name):
+        headers = {
+            "accept": "application/json",
+            "content-type": "application/json",
+            "authorization": f"Bearer {api_key}"
+        }
+        prompt = f"""
+        You are an expert educational AI assistant specializing in curriculum design and instructional planning. Your task is to generate comprehensive, academically rigorous course structures for undergraduate level education.
+        Please generate a detailed course structure for the course {course_name} in JSON format following these specifications:
+        1. The course structure should be appropriate for a full semester (14-16 weeks)
+        2. Each module should be designed for 2-4 weeks of instruction
+        3. Follow standard academic practices and nomenclature
+        4. Ensure progressive complexity from foundational to advanced concepts
+        5. The course_title should exactly match the course name provided in the prompt. No additional information should be included in the course_title field.
+        6: Ensure that the property names are enclosed in double quotes (") and followed by a colon (:), and the values are enclosed in double quotes (").
+        7. **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
+        The JSON response should follow this structure:
+        {{
+            "course_title": "string",
+            "course_description": "string",
+            "modules": [
+                {{
+                    "module_title": "string",
+                    "sub_modules": [
+                        {{
+                            "title": "string",
+                            "topics": [string],
+                        }}
+                    ]
+                }}
+            ]
+        }}
+        Example response:
+        {{
+            "course_title": "Advanced Natural Language Processing",
+            "course_descriptio": "An advanced course covering modern approaches to NLP using deep learning, with focus on transformer architectures and their applications.",
+            "modules": [
+                {{
+                    "module_title": "Foundations of Modern NLP",
+                    "sub_modules": [
+                        {{
+                            "title": "Attention Mechanism",
+                            "topics": [
+                                "Self-attention",
+                                "Multi-head attention",
+                                "Positional encoding"
+                            ]
+                        }}
+                    ]
+                }}
+            ]
+        }}
+        """
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    "You are an expert educational AI assistant specializing in course design and curriculum planning. "
+                    "Your task is to generate accurate, detailed, and structured educational content for undergraduate-level and post-graduate-level courses. "
+                    "Provide detailed and accurate information tailored to the user's prompt."
+                    "Ensure that the responses are logical, follow standard academic practices, and include realistic concepts relevant to the course."
+                ),
+            },
+            {
+                "role": "user",
+                "content": prompt
+            },
+        ]
+        try:
+            client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
+            response = client.chat.completions.create(
+                model="llama-3.1-sonar-small-128k-online",
+                messages=messages
+            )
+            content = response.choices[0].message.content
+            return content
+        except Exception as e:
+            st.error(f"Failed to fetch data from Perplexity API: {e}")
+            return ""
+def get_new_course_id():
+    """Generate a new course ID by incrementing the last course ID"""
+    last_course = courses_collection.find_one(sort=[("course_id", -1)])
+    if last_course:
+        last_course_id = int(last_course["course_id"][2:])
+        new_course_id = f"CS{last_course_id + 1}"
+    else:
+        new_course_id = "CS101"
+    return new_course_id
+def create_course(course_name, start_date, duration_weeks):
+        # Generate course overview
+        # overview_prompt = f"""Generate an overview for the undergraduate course {course_name}
+        # Include all relevant concepts and key topics covered in a typical curriculum.
+        # The response should be concise (300-400 words). Ensure that your response is in a valid JSON format."""
+        # overview_prompt2 = f"""Generate an overview for the undergraduate course {course_name}.
+        #                     The overview should include:
+        #                     The course title, a detailed course description,
+        #                     a division of all relevant concepts and key topics into 4-6 logical modules,
+        #                     capturing the flow and structure of a typical curriculum.
+        #                     Ensure the response adheres to the following JSON format:
+        #                         {{
+        #                             'overview': 'string',
+        #                             'modules': [
+        #                                 {{
+        #                                     'name': 'string',
+        #                                     'description': 'string'
+        #                                 }}
+        #                             ]
+        #                         }}
+        #                     overview: A detailed description of the course.
+        #                     modules: An array of 4-6 objects, each representing a logical module with a name and a brief description
+        #                     **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}"""
+        # course_overview = generate_perplexity_response(PERPLEXITY_API_KEY, overview_prompt2)
+        # # print(course_overview)
+        # course_overview_store = course_overview
+        # # print(course_overview_store)
+        # # Generate modules
+        # # modules_prompt = f"Based on this overview: {course_overview}\nCreate 4-6 logical modules for the course, each module should group related concepts and each module may include reference books if applicable"
+        # sub_modules_prompt = f"""Using the provided modules in the overview {course_overview_store}, generate 2-3 submodules for each module.
+        #                         Each submodule should represent a cohesive subset of the module's topics, logically organized for teaching purposes.
+        #                         Ensure the response adheres to the following JSON format:
+        #                         {
+        #                             'modules': [
+        #                                 {
+        #                                     'name': 'string',
+        #                                     'sub_modules': [
+        #                                         {
+        #                                             'name': 'string',
+        #                                             'description': 'string'
+        #                                         }
+        #                                     ]
+        #                                 }
+        #                             ]
+        #                         }
+        #                         modules: An array where each object contains the name of the module and its corresponding sub_modules.
+        #                         sub_modules: An array of 2-3 objects for each module, each having a name and a brief description."
+        #                         **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}
+        #                     """
+        # sub_modules = generate_perplexity_response(PERPLEXITY_API_KEY, sub_modules_prompt)
+        # # modules_response = generate_perplexity_response(modules_prompt)
+        # print(sub_modules)
+        # total_sessions = duration_weeks * sessions_per_week
+        course_plan = generate_perplexity_response(PERPLEXITY_API_KEY, course_name)
+        course_plan_json = json.loads(course_plan)
+        # Generate sessions for each module
+        all_sessions = []
+        for module in course_plan_json['modules']:
+            for sub_module in module['sub_modules']:
+                for topic in sub_module['topics']:
+                    session = create_session(
+                        title=topic,
+                        date=start_date,
+                        module_name=module['module_title']
+                    )
+                    # print(session)
+                    all_sessions.append(session)
+                    start_date += timedelta(days=7)  # Next session after a week
+        # sample_sessions = [
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def25'), 'title': 'What is Generative AI?', 'date': datetime(2024, 12, 22, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def26'), 'title': 'History and Evolution of AI', 'date': datetime(2024, 12, 29, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def27'), 'title': 'Types of Generative AI (e.g., GANs, VAEs, LLMs)', 'date': datetime(2025, 1, 5, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 505626), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def28'), 'title': 'Overview of popular GenAI tools (e.g., ChatGPT, Claude, Google Gemini)', 'date': datetime(2025, 1, 12, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 506559), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def29'), 'title': 'Frameworks for building GenAI models (e.g., TensorFlow, PyTorch)', 'date': datetime(2025, 1, 19, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 506559), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def2a'), 'title': 'Integration with other AI technologies', 'date': datetime(2025, 1, 26, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 507612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def2b'), 'title': 'Text-to-text models (e.g., GPT-3, BERT)', 'date': datetime(2025, 2, 2, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 508512), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def2c'), 'title': 'Text generation for content creation and marketing', 'date': datetime(2025, 2, 9, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 508512), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def2d'), 'title': 'Chatbots and conversational interfaces', 'date': datetime(2025, 2, 16, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 509612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def2e'), 'title': 'Generative Adversarial Networks (GANs)', 'date': datetime(2025, 2, 23, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 509612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def2f'), 'title': 'Variational Autoencoders (VAEs)', 'date': datetime(2025, 3, 2, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 510612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def30'), 'title': 'Applications in art, design, and media', 'date': datetime(2025, 3, 9, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 511497), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def31'), 'title': 'Understanding prompt design principles', 'date': datetime(2025, 3, 16, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 511497), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def33'), 'title': 'Advanced techniques for fine-tuning models', 'date': datetime(2025, 3, 30, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 512514), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def34'), 'title': 'Ethical implications of AI-generated content', 'date': datetime(2025, 4, 6, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 513613), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def35'), 'title': 'Addressing bias in AI models', 'date': datetime(2025, 4, 13, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 514639), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def36'), 'title': 'Regulatory frameworks and guidelines', 'date': datetime(2025, 4, 20, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 514639), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def37'), 'title': 'Case studies from various industries (e.g., marketing, healthcare, finance)', 'date': datetime(2025, 4, 27, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 515610), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def38'), 'title': 'Success stories and challenges faced by companies using GenAI', 'date': datetime(2025, 5, 4, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 515610), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def39'), 'title': 'Guidelines for developing a GenAI project', 'date': datetime(2025, 5, 11, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 516614), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def3a'), 'title': 'Tools and resources for project implementation', 'date': datetime(2025, 5, 18, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 516614), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def3b'), 'title': 'Best practices for testing and deployment', 'date': datetime(2025, 5, 25, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 517563), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}}
+        # ]
+        # small_sample_sessions = [
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def25'), 'title': 'What is Generative AI?', 'date': datetime(2024, 12, 22, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        #     {'session_id': ObjectId('6767d0bbad8316ac358def26'), 'title': 'History and Evolution of AI', 'date': datetime(2024, 12, 29, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
+        # ]
+        # print(all_sessions)
+        print("Number of sessions:", len(all_sessions))
+        # Create course document
+        # course_description = course_plan_json['course_description']
+        # course_doc = {
+        #     "course_id": get_new_course_id(),
+        #     "title": course_name,
+        #     "description": course_description,
+        #     "faculty": faculty_name,
+        #     "faculty_id": faculty_id,
+        #     "duration": f"{duration_weeks} weeks",
+        #     "created_at": datetime.utcnow(),
+        #     "sessions": all_sessions
+        # }
+        # try:
+        #     courses_collection.insert_one(course_doc)
+        # except Exception as e:
+        #     st.error(f"Failed to insert course data into the database: {e}")
+        # print(course_plan)
+def create_session(title: str, date: datetime, module_name: str):
+        """Create a session document with pre-class, in-class, and post-class components."""
+        return {
+            "session_id": ObjectId(),
+            "title": title,
+            "date": date,
+            "status": "upcoming",
+            "created_at": datetime.utcnow(),
+            "pre_class": {
+                "resources": [],
+                "completion_required": True
+            },
+            "in_class": {
+                "quiz": [],
+                "polls": []
+            },
+            "post_class": {
+                "assignments": []
+            }
+        }
+# Usage example:
+if __name__ == "__main__":
+    create_course("Introduction to Data Analytics", datetime.now(), 2)

file_upload_vectorize.py CHANGED Viewed

@@ -124,12 +124,12 @@ def get_embedding(text):
     return response.data[0].embedding
 def create_vector_store(text, resource_id):
-    resource_object_id = ObjectId(resource_id)
     document = Document(text=text)
     embedding = get_embedding(text)
     vector_data = {
-        "resource_id": resource_object_id,
         "vector": embedding,
         "text": text,
         "created_at": datetime.utcnow()

     return response.data[0].embedding
 def create_vector_store(text, resource_id):
+    # resource_object_id = ObjectId(resource_id)
     document = Document(text=text)
     embedding = get_embedding(text)
     vector_data = {
+        "resource_id": resource_id,
         "vector": embedding,
         "text": text,
         "created_at": datetime.utcnow()

main.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import streamlit as st
-from datetime import datetime, date, time
 from pathlib import Path
 from utils.sample_data import SAMPLE_COURSES, SAMPLE_SESSIONS
 from session_page import display_session_content
@@ -14,9 +14,14 @@ from werkzeug.security import generate_password_hash, check_password_hash
 import os
 from openai import OpenAI
 from dotenv import load_dotenv
 client = OpenAI(api_key=os.getenv("OPENAI_KEY"))
 def get_research_papers(query):
     """Get research paper recommendations based on query"""
@@ -74,7 +79,12 @@ def init_session_state():
         st.session_state.selected_course = None
     if "show_create_course_form" not in st.session_state:
         st.session_state.show_create_course_form = False
 def login_user(username, password, user_type):
     """Login user based on credentials"""
@@ -127,7 +137,18 @@ def get_courses(username, user_type):
             courses = courses_collection2.find(
                 {"course_id": {"$in": enrolled_course_ids}}
             )
-            # course_titles = [course['title'] for course in courses]
             return list(courses)
     elif user_type == "faculty":
         faculty = faculty_collection.find_one({"full_name": username})
@@ -497,63 +518,181 @@ def register_page():
 # Create Course feature
 def create_course_form(faculty_name, faculty_id):
-    """Display form to create a new course"""
     st.title("Create New Course")
-    faculty = faculty_collection.find_one({"_id": faculty_id})
-    if not faculty:
-        st.error("Faculty not found")
-        return
-    faculty_str_id = faculty["TID"]
-    with st.form("create_course_form"):
-        course_title = st.text_input("Course Title")
-        course_description = st.text_area("Course Description")
-        start_date = st.date_input("Start Date")
-        end_date = st.date_input("End Date")
-        duration = -(
-            -((end_date - start_date).days) // 7
-        )  # Ceiling division to round up to the next week
-        if st.form_submit_button("Create Course"):
-            new_course_id = get_new_course_id()
-            course = {
-                "course_id": new_course_id,
-                "title": course_title,
-                "description": course_description,
-                "faculty": faculty_name,
-                "faculty_id": faculty_str_id,
-                # "start_date": start_date.isoformat(),
-                # "end_date": end_date.isoformat(),
-                "start_date": datetime.combine(
-                    start_date, datetime.min.time()
-                ),  # Store as datetime
-                "end_date": datetime.combine(
-                    end_date, datetime.min.time()
-                ),  # Store as datetime
-                "duration": f"{duration} weeks",
-                "created_at": datetime.utcnow(),
-                "sessions": [],
-            }
-            # Insert course into courses collection
-            courses_collection2.insert_one(course)
-            # Update faculty's courses_taught array
-            faculty_collection.update_one(
-                {"_id": st.session_state.user_id},
-                {
-                    "$push": {
-                        "courses_taught": {
-                            "course_id": new_course_id,
-                            "title": course_title,
                         }
-                    }
-                },
-            )
-            st.success(f"Course created successfully with ID: {new_course_id}")
-            st.session_state.show_create_course_form = False
-            st.rerun()
 from research_assistant_dashboard import display_research_assistant_dashboard
@@ -561,6 +700,127 @@ from research_assistant_dashboard import display_research_assistant_dashboard
 from goals2 import display_analyst_dashboard
 def main_dashboard():
     if st.session_state.user_type == "research_assistant":
         display_research_assistant_dashboard()
@@ -581,6 +841,20 @@ def main_dashboard():
                 st.session_state.username, st.session_state.user_type
             )
             if st.session_state.user_type == "faculty":
                 if st.button(
                     "Create New Course", key="create_course", use_container_width=True
@@ -631,6 +905,8 @@ def main_dashboard():
             create_course_form(st.session_state.username, st.session_state.user_id)
         elif st.session_state.get("show_create_session_form"):
             create_session_form(selected_course_id)
         else:
             # Main content
             if "selected_session" in st.session_state:

 import streamlit as st
+from datetime import datetime, date, time, timedelta
 from pathlib import Path
 from utils.sample_data import SAMPLE_COURSES, SAMPLE_SESSIONS
 from session_page import display_session_content
 import os
 from openai import OpenAI
 from dotenv import load_dotenv
+from create_course import create_course, courses_collection, generate_perplexity_response, PERPLEXITY_API_KEY
+import json
+from bson import ObjectId
 client = OpenAI(api_key=os.getenv("OPENAI_KEY"))
+from dotenv import load_dotenv
+load_dotenv()
+# PERPLEXITY_API_KEY = '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'
 def get_research_papers(query):
     """Get research paper recommendations based on query"""
         st.session_state.selected_course = None
     if "show_create_course_form" not in st.session_state:
         st.session_state.show_create_course_form = False
+    if "show_create_session_form" not in st.session_state:
+        st.session_state.show_create_session_form = False
+    if "show_enroll_course_page" not in st.session_state:
+        st.session_state.show_enroll_course_page = False
+    if "course_to_enroll" not in st.session_state:
+        st.session_state.course_to_enroll = None
 def login_user(username, password, user_type):
     """Login user based on credentials"""
             courses = courses_collection2.find(
                 {"course_id": {"$in": enrolled_course_ids}}
             )
+            # courses += courses_collection2.find(
+            #     {"course_id": {"$in": enrolled_course_ids}}
+            # )
+            # # course_titles = [course['title'] for course in courses]
+            # return list(courses)
+            # courses_cursor1 = courses_collection.find(
+            #     {"course_id": {"$in": enrolled_course_ids}}
+            # )
+            # courses_cursor2 = courses_collection2.find(
+            #     {"course_id": {"$in": enrolled_course_ids}}
+            # )
+            # courses = list(courses_cursor1) + list(courses_cursor2)
             return list(courses)
     elif user_type == "faculty":
         faculty = faculty_collection.find_one({"full_name": username})
 # Create Course feature
 def create_course_form(faculty_name, faculty_id):
+    """Display enhanced form to create a new course with AI-generated content"""
     st.title("Create New Course")
+    if 'course_plan' not in st.session_state:
+        st.session_state.course_plan = None
+    if 'edit_mode' not in st.session_state:
+        st.session_state.edit_mode = False
+    # Initial Course Creation Form
+    if not st.session_state.course_plan:
+        with st.form("initial_course_form"):
+            col1, col2 = st.columns(2)
+            with col1:
+                course_name = st.text_input("Course Name", placeholder="e.g., Introduction to Computer Science")
+                faculty_info = st.text_input("Faculty", value=faculty_name, disabled=True)
+            with col2:
+                duration_weeks = st.number_input("Duration (weeks)", min_value=1, max_value=16, value=12)
+                start_date = st.date_input("Start Date")
+            generate_button = st.form_submit_button("Generate Course Structure", use_container_width=True)
+            if generate_button and course_name:
+                with st.spinner("Generating course structure..."):
+                    try:
+                        course_plan = generate_perplexity_response(PERPLEXITY_API_KEY, course_name)
+                        # print(course_plan)
+                        st.session_state.course_plan = json.loads(course_plan)
+                        st.session_state.start_date = start_date
+                        st.session_state.duration_weeks = duration_weeks
+                        st.rerun()
+                    except Exception as e:
+                        st.error(f"Error generating course structure: {e}")
+    # Display and Edit Generated Course Content
+    if st.session_state.course_plan:
+        with st.expander("Course Overview", expanded=True):
+            if not st.session_state.edit_mode:
+                st.subheader(st.session_state.course_plan['course_title'])
+                st.write(st.session_state.course_plan['course_description'])
+                edit_button = st.button("Edit Course Details", use_container_width=True)
+                if edit_button:
+                    st.session_state.edit_mode = True
+                    st.rerun()
+            else:
+                with st.form("edit_course_details"):
+                    st.session_state.course_plan['course_title'] = st.text_input(
+                        "Course Title",
+                        value=st.session_state.course_plan['course_title']
+                    )
+                    st.session_state.course_plan['course_description'] = st.text_area(
+                        "Course Description",
+                        value=st.session_state.course_plan['course_description']
+                    )
+                    if st.form_submit_button("Save Course Details"):
+                        st.session_state.edit_mode = False
+                        st.rerun()
+        # Display Modules and Sessions
+        st.subheader("Course Modules and Sessions")
+        start_date = st.session_state.start_date
+        current_date = start_date
+        all_sessions = []
+        for module_idx, module in enumerate(st.session_state.course_plan['modules']):
+            with st.expander(f"📚 Module {module_idx + 1}: {module['module_title']}", expanded=True):
+                # Edit module title
+                new_module_title = st.text_input(
+                    f"Module {module_idx + 1} Title",
+                    value=module['module_title'],
+                    key=f"module_{module_idx}"
+                )
+                module['module_title'] = new_module_title
+                for sub_idx, sub_module in enumerate(module['sub_modules']):
+                    st.markdown(f"### 📖 {sub_module['title']}")
+                    # Create sessions for each topic
+                    for topic_idx, topic in enumerate(sub_module['topics']):
+                        session_key = f"session_{module_idx}_{sub_idx}_{topic_idx}"
+                        with st.container():
+                            col1, col2, col3 = st.columns([3, 2, 1])
+                            with col1:
+                                new_topic = st.text_input(
+                                    "Topic",
+                                    value=topic,
+                                    key=f"{session_key}_topic"
+                                )
+                                sub_module['topics'][topic_idx] = new_topic
+                            with col2:
+                                session_date = st.date_input(
+                                    "Session Date",
+                                    value=current_date,
+                                    key=f"{session_key}_date"
+                                )
+                            with col3:
+                                session_status = st.selectbox(
+                                    "Status",
+                                    options=["upcoming", "in-progress", "completed"],
+                                    key=f"{session_key}_status"
+                                )
+                            # Create session object
+                            session = {
+                                "session_id": str(ObjectId()),
+                                "title": new_topic,
+                                "date": datetime.combine(session_date, datetime.min.time()),
+                                "status": session_status,
+                                "module_name": module['module_title'],
+                                "created_at": datetime.utcnow(),
+                                "pre_class": {
+                                    "resources": [],
+                                    "completion_required": True
+                                },
+                                "in_class": {
+                                    "quiz": [],
+                                    "polls": []
+                                },
+                                "post_class": {
+                                    "assignments": []
+                                }
+                            }
+                            all_sessions.append(session)
+                            current_date = session_date + timedelta(days=7)
+        new_course_id = get_new_course_id()
+        course_title = st.session_state.course_plan['course_title']
+        # Final Save Button
+        if st.button("Save Course", type="primary", use_container_width=True):
+            try:
+                course_doc = {
+                    "course_id": new_course_id,
+                    "title": course_title,
+                    "description": st.session_state.course_plan['course_description'],
+                    "faculty": faculty_name,
+                    "faculty_id": faculty_id,
+                    "duration": f"{st.session_state.duration_weeks} weeks",
+                    "start_date": datetime.combine(st.session_state.start_date, datetime.min.time()),
+                    "created_at": datetime.utcnow(),
+                    "sessions": all_sessions
+                }
+                # Insert into database
+                courses_collection.insert_one(course_doc)
+                st.success("Course successfully created!")
+                # Update faculty collection
+                faculty_collection.update_one(
+                    {"_id": st.session_state.user_id},
+                    {
+                        "$push": {
+                            "courses_taught": {
+                                "course_id": new_course_id,
+                                "title": course_title,
+                            }
                         }
+                    },
+                )
+                # Clear session state
+                st.session_state.course_plan = None
+                st.session_state.edit_mode = False
+                # Optional: Add a button to view the created course
+                if st.button("View Course"):
+                    # Add navigation logic here
+                    pass
+            except Exception as e:
+                st.error(f"Error saving course: {e}")
 from research_assistant_dashboard import display_research_assistant_dashboard
 from goals2 import display_analyst_dashboard
+def enroll_in_course(course_id, course_title, student):
+    """Enroll a student in a course"""
+    if student:
+        courses = student.get("enrolled_courses", [])
+        if course_id not in [course["course_id"] for course in courses]:
+            course = courses_collection.find_one({"course_id": course_id})
+            if course:
+                courses.append(
+                    {
+                        "course_id": course["course_id"],
+                        "title": course["title"],
+                    }
+                )
+                students_collection.update_one(
+                    {"_id": st.session_state.user_id},
+                    {"$set": {"enrolled_courses": courses}},
+                )
+                st.success(f"Enrolled in course {course_title}")
+            else:
+                st.error("Course not found")
+        else:
+            st.warning("Already enrolled in this course")
+# def enroll_in_course_page(course_id):
+#     """Enroll a student in a course"""
+#     student = students_collection.find_one({"_id": st.session_state.user_id})
+#     course_title = courses_collection.find_one({"course_id": course_id})["title"]
+#     course = courses_collection.find_one({"course_id": course_id})
+#     if course:
+#         st.title(course["title"])
+#         st.subheader("Course Description:")
+#         st.write(course["description"])
+#         st.write(f"Faculty: {course['faculty']}")
+#         st.write(f"Duration: {course['duration']}")
+#         st.title("Course Sessions")
+#         for session in course["sessions"]:
+#             st.write(f"Session: {session['title']}")
+#             st.write(f"Date: {session['date']}")
+#             st.write(f"Status: {session['status']}")
+#             st.write("----")
+#     else:
+#         st.error("Course not found")
+#     enroll_button = st.button("Enroll in Course", key="enroll_button", use_container_width=True)
+#     if enroll_button:
+#         enroll_in_course(course_id, course_title, student)
+def enroll_in_course_page(course_id):
+    """Display an aesthetically pleasing course enrollment page"""
+    student = students_collection.find_one({"_id": st.session_state.user_id})
+    course = courses_collection.find_one({"course_id": course_id})
+    if not course:
+        st.error("Course not found")
+        return
+    # Create two columns for layout
+    col1, col2 = st.columns([2, 1])
+    with col1:
+        # Course header section
+        st.title(course["title"])
+        st.markdown(f"*{course['description']}*")
+        # Course details in an expander
+        with st.expander("Course Details", expanded=True):
+            st.markdown(f"👨‍🏫 **Faculty:** {course['faculty']}")
+            st.markdown(f"⏱️ **Duration:** {course['duration']}")
+        # Sessions in a clean card-like format
+        st.subheader("📚 Course Sessions")
+        for idx, session in enumerate(course["sessions"], 1):
+            with st.container():
+                st.markdown(f"""
+                ---
+                ### Session {idx}: {session['title']}
+                🗓️ **Date:** {session['date']}
+                📌 **Status:** {session['status']}
+                """)
+    with col2:
+        with st.container():
+            st.markdown("### Ready to Learn?")
+            st.markdown("Click below to enroll in this course")
+            # Check if already enrolled
+            courses = student.get("enrolled_courses", [])
+            is_enrolled = course_id in [c["course_id"] for c in courses]
+            if is_enrolled:
+                st.info("✅ You are already enrolled in this course")
+            else:
+                enroll_button = st.button(
+                    "🎓 Enroll Now",
+                    key="enroll_button",
+                    use_container_width=True
+                )
+                if enroll_button:
+                    enroll_in_course(course_id, course["title"], student)
+def show_available_courses(username, user_type, user_id):
+    """Display available courses for enrollment"""
+    st.title("Available Courses")
+    courses = list(courses_collection2.find({}, {"course_id": 1, "title": 1}))
+    course_options = [
+        f"{course['title']} ({course['course_id']})" for course in courses
+    ]
+    selected_course = st.selectbox("Select a Course to Enroll", course_options)
+    # if selected_courses:
+    #     for course in selected_courses:
+    #         course_id = course.split("(")[-1][:-1]
+    #         course_title = course.split(" (")[0]
+    #         enroll_in_course(course_id, course_title, user_id)
+    #     st.success("Courses enrolled successfully!")
+    if selected_course:
+        course_id = selected_course.split("(")[-1][:-1]
+        enroll_in_course_page(course_id)
 def main_dashboard():
     if st.session_state.user_type == "research_assistant":
         display_research_assistant_dashboard()
                 st.session_state.username, st.session_state.user_type
             )
+            # Enroll in Courses
+            if st.session_state.user_type == "student":
+                if st.button(
+                    "Enroll in a New Course", key="enroll_course", use_container_width=True
+                ):
+                    st.session_state.show_enroll_course_page = True
+            # if st.session_state.show_enroll_course_form:
+            #     courses = list(courses_collection.find({}, {"course_id": 1, "title": 1}))
+            #     courses += list(courses_collection2.find({}, {"course_id": 1, "title": 1}))
+            #     course_options = [f"{course['title']} ({course['course_id']})" for course in courses]
+            #     course_to_enroll = st.selectbox("Available Courses", course_options)
+            #     st.session_state.course_to_enroll = course_to_enroll
             if st.session_state.user_type == "faculty":
                 if st.button(
                     "Create New Course", key="create_course", use_container_width=True
             create_course_form(st.session_state.username, st.session_state.user_id)
         elif st.session_state.get("show_create_session_form"):
             create_session_form(selected_course_id)
+        elif st.session_state.get("show_enroll_course_page"):
+            show_available_courses(st.session_state.username, st.session_state.user_type, st.session_state.user_id)
         else:
             # Main content
             if "selected_session" in st.session_state:

pre_class_analytics.py ADDED Viewed

	@@ -0,0 +1,850 @@

+import re
+from bson import ObjectId
+from pymongo import MongoClient
+import pandas as pd
+import numpy as np
+from datetime import datetime
+from dotenv import load_dotenv
+import os
+from typing import List, Dict, Any
+from transformers import pipeline
+from textstat import flesch_reading_ease
+from collections import Counter
+import logging
+import spacy
+import json
+# Load chat histories from JSON file
+all_chat_histories = []
+with open(r'D:\ML_Projects\CSR Project\NOVAScholarProject\NovaScholar\all_chat_histories2.json', 'r') as file:
+    all_chat_histories = json.load(file)
+load_dotenv()
+MONGO_URI = os.getenv("MONGO_URI")
+client = MongoClient(MONGO_URI)
+db = client['novascholar_db']
+chat_history_collection = db['chat_history']
+# def get_chat_history(user_id, session_id):
+#     query = {
+#         "user_id": ObjectId(user_id),
+#         "session_id": session_id,
+#         "timestamp": {"$lte": datetime.utcnow()}
+#     }
+#     result = chat_history_collection.find(query)
+#     return list(result)
+# if __name__ == "__main__":
+#     user_ids = ["6738b70cc97dffb641c7d158", "6738b7b33f648a9224f7aa69"]
+#     session_ids = ["S104"]
+#     for user_id in user_ids:
+#         for session_id in session_ids:
+#             result = get_chat_history(user_id, session_id)
+#             print(result)
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class NovaScholarAnalytics:
+    def __init__(self):
+        # Initialize NLP components
+        self.nlp = spacy.load("en_core_web_sm")
+        self.sentiment_analyzer = pipeline("sentiment-analysis",  model="finiteautomata/bertweet-base-sentiment-analysis", top_k=None)
+        # Define question words for detecting questions
+        self.question_words = {"what", "why", "how", "when", "where", "which", "who", "whose", "whom"}
+        # Define question categories
+        self.question_categories = {
+            'conceptual': {'what', 'define', 'describe', 'explain'},
+            'procedural': {'how', 'steps', 'procedure', 'process'},
+            'reasoning': {'why', 'reason', 'cause', 'effect'},
+            'clarification': {'clarify', 'mean', 'difference', 'between'}
+        }
+    def _categorize_questions(self, questions_df: pd.DataFrame) -> Dict[str, int]:
+        """
+        Categorize questions into different types based on their content.
+        Args:
+            questions_df: DataFrame containing questions
+        Returns:
+            Dictionary with question categories and their counts
+        """
+        categories_count = {
+            'conceptual': 0,
+            'procedural': 0,
+            'reasoning': 0,
+            'clarification': 0,
+            'other': 0
+        }
+        for _, row in questions_df.iterrows():
+            prompt_lower = row['prompt'].lower()
+            categorized = False
+            for category, keywords in self.question_categories.items():
+                if any(keyword in prompt_lower for keyword in keywords):
+                    categories_count[category] += 1
+                    categorized = True
+                    break
+            if not categorized:
+                categories_count['other'] += 1
+        return categories_count
+    def _identify_frustration(self, df: pd.DataFrame) -> List[str]:
+        """
+        Identify signs of frustration in student messages.
+        Args:
+            df: DataFrame containing messages
+        Returns:
+            List of topics/areas where frustration was detected
+        """
+        frustration_indicators = [
+            "don't understand", "confused", "difficult", "hard to",
+            "not clear", "stuck", "help", "can't figure"
+        ]
+        frustrated_messages = df[
+            df['prompt'].str.lower().str.contains('|'.join(frustration_indicators), na=False)
+        ]
+        if len(frustrated_messages) == 0:
+            return []
+        # Extract topics from frustrated messages
+        frustrated_topics = self._extract_topics(frustrated_messages)
+        return list(set(frustrated_topics))  # Unique topic
+    def _calculate_resolution_times(self, df: pd.DataFrame) -> Dict[str, float]:
+        """
+        Calculate average time taken to resolve questions for different topics.
+        Args:
+            df: DataFrame containing messages
+        Returns:
+            Dictionary with topics and their average resolution times in minutes
+        """
+        resolution_times = {}
+        # Group messages by topic
+        topics = self._extract_topics(df)
+        for topic in set(topics):
+            escaped_topic = re.escape(topic)
+            topic_msgs = df[df['prompt'].str.contains(escaped_topic, case=False)]
+            if len(topic_msgs) >= 2:
+                # Calculate time difference between first and last message
+                start_time = pd.to_datetime(topic_msgs['timestamp'].iloc[0])
+                end_time = pd.to_datetime(topic_msgs['timestamp'].iloc[-1])
+                duration = (end_time - start_time).total_seconds() / 60  # Convert to minutes
+                resolution_times[topic] = duration
+        return resolution_times
+    def _calculate_completion_rates(self, df: pd.DataFrame) -> Dict[str, float]:
+        """
+        Calculate completion rates for different topics.
+        Args:
+            df: DataFrame containing messages
+        Returns:
+            Dictionary with topics and their completion rates
+        """
+        completion_rates = {}
+        topics = self._extract_topics(df)
+        for topic in set(topics):
+            escaped_topic = re.escape(topic)
+            topic_msgs = df[df['prompt'].str.contains(escaped_topic, case=False)]
+            if len(topic_msgs) > 0:
+                # Consider a topic completed if there are no frustrated messages in the last 2 messages
+                last_msgs = topic_msgs.tail(2)
+                frustrated = self._identify_frustration(last_msgs)
+                completion_rates[topic] = 0.0 if frustrated else 1.0
+        return completion_rates
+    def _analyze_time_distribution(self, df: pd.DataFrame) -> Dict[str, Dict[str, float]]:
+        """
+        Analyze time spent on different topics.
+        Args:
+            df: DataFrame containing messages
+        Returns:
+            Dictionary with time distribution statistics per topic
+        """
+        time_stats = {}
+        topics = self._extract_topics(df)
+        for topic in set(topics):
+            escaped_topic = re.escape(topic)
+            topic_msgs = df[df['prompt'].str.contains(escaped_topic, case=False)]
+            if len(topic_msgs) >= 2:
+                times = pd.to_datetime(topic_msgs['timestamp'])
+                duration = (times.max() - times.min()).total_seconds() / 60
+                time_stats[topic] = {
+                    'total_minutes': duration,
+                    'avg_minutes_per_message': duration / len(topic_msgs),
+                    'message_count': len(topic_msgs)
+                }
+        return time_stats
+    def _identify_coverage_gaps(self, df: pd.DataFrame) -> List[str]:
+        """
+        Identify topics with potential coverage gaps.
+        Args:
+            df: DataFrame containing messages
+        Returns:
+            List of topics with coverage gaps
+        """
+        gaps = []
+        topics = self._extract_topics(df)
+        topic_stats = self._analyze_time_distribution(df)
+        for topic in set(topics):
+            if topic in topic_stats:
+                stats = topic_stats[topic]
+                # Flag topics with very short interaction times or few messages
+                if stats['total_minutes'] < 5 or stats['message_count'] < 3:
+                    gaps.append(topic)
+        return gaps
+    def _calculate_student_metrics(self, df: pd.DataFrame) -> Dict[str, Dict[str, float]]:
+        """
+        Calculate various metrics for each student.
+        Args:
+            df: DataFrame containing messages
+        Returns:
+            Dictionary with student metrics
+        """
+        student_metrics = {}
+        for user_id in df['user_id'].unique():
+            user_msgs = df[df['user_id'] == user_id]
+            metrics = {
+                'message_count': len(user_msgs),
+                'question_count': len(user_msgs[user_msgs['prompt'].str.contains('|'.join(self.question_words), case=False)]),
+                'avg_response_length': user_msgs['response'].str.len().mean(),
+                'unique_topics': len(set(self._extract_topics(user_msgs))),
+                'frustration_count': len(self._identify_frustration(user_msgs))
+            }
+            student_metrics[user_id] = metrics
+        return student_metrics
+    def _determine_student_cluster(self, metrics: Dict[str, float]) -> str:
+        """
+        Determine which cluster a student belongs to based on their metrics.
+        Args:
+            metrics: Dictionary containing student metrics
+        Returns:
+            Cluster label ('confident', 'engaged', or 'struggling')
+        """
+        # Simple rule-based clustering
+        if metrics['frustration_count'] > 2 or metrics['question_count'] / metrics['message_count'] > 0.7:
+            return 'struggling'
+        elif metrics['message_count'] > 10 and metrics['unique_topics'] > 3:
+            return 'engaged'
+        else:
+            return 'confident'
+    def _identify_abandon_points(self, df: pd.DataFrame) -> List[Dict[str, Any]]:
+        """
+        Identify points where students abandoned topics.
+        Args:
+            df: DataFrame containing messages
+        Returns:
+            List of dictionaries containing abandon point information
+        """
+        abandon_points = []
+        topics = self._extract_topics(df)
+        for topic in set(topics):
+            escaped_topic = re.escape(topic)
+            topic_msgs = df[df['prompt'].str.contains(escaped_topic, case=False)]
+            if len(topic_msgs) >= 2:
+                # Check for large time gaps between messages
+                times = pd.to_datetime(topic_msgs['timestamp'])
+                time_gaps = times.diff()
+                for idx, gap in enumerate(time_gaps):
+                    if gap and gap.total_seconds() > 600:  # 10 minutes threshold
+                        abandon_points.append({
+                            'topic': topic,
+                            'message_before': topic_msgs.iloc[idx-1]['prompt'],
+                            'time_gap': gap.total_seconds() / 60,  # Convert to minutes
+                            'resumed': idx < len(topic_msgs) - 1
+                        })
+        return abandon_points
+    def process_chat_history(self, chat_history: List[Dict[Any, Any]]) -> Dict[str, Any]:
+        """
+        Process chat history data and generate comprehensive analytics.
+        Args:
+            chat_history: List of chat history documents
+            session_info: Dictionary containing session metadata (topic, duration, etc.)
+        Returns:
+            Dictionary containing all analytics results
+        """
+        try:
+            # Convert chat history to DataFrame for easier processing
+            messages_data = []
+            for chat in chat_history:
+                for msg in chat['messages']:
+                    messages_data.append({
+                        'user_id': chat['user_id'],
+                        'session_id': chat['session_id'],
+                        'timestamp': msg['timestamp'],
+                        'prompt': msg['prompt'],
+                        'response': msg['response']
+                    })
+            df = pd.DataFrame(messages_data)
+            # Generate all analytics
+            analytics_results = {
+                'topic_interaction': self._analyze_topic_interaction(df),
+                'question_patterns': self._analyze_question_patterns(df),
+                'sentiment_analysis': self._analyze_sentiment(df),
+                'completion_trends': self._analyze_completion_trends(df),
+                'student_clustering': self._cluster_students(df),
+                'abandoned_conversations': self._analyze_abandoned_conversations(df)
+            }
+            return analytics_results
+        except Exception as e:
+            logger.error(f"Error processing chat history: {str(e)}")
+            raise
+    def _analyze_topic_interaction(self, df: pd.DataFrame) -> Dict[str, Any]:
+        """Analyze topic interaction frequency and patterns."""
+        topics = self._extract_topics(df)
+        topic_stats = {
+            'interaction_counts': Counter(topics),
+            'revisit_patterns': self._calculate_topic_revisits(df, topics),
+            'avg_time_per_topic': self._calculate_avg_time_per_topic(df, topics)
+        }
+        return topic_stats
+    def _analyze_question_patterns(self, df: pd.DataFrame) -> Dict[str, Any]:
+        """Analyze question patterns and identify difficult topics."""
+        questions = df[df['prompt'].str.lower().str.split().apply(
+            lambda x: any(word.lower() in self.question_words for word in x)
+        )]
+        question_stats = {
+            'total_questions': len(questions),
+            'question_types': self._categorize_questions(questions),
+            'complex_chains': self._identify_complex_chains(df)
+        }
+        return question_stats
+    def _analyze_sentiment(self, df: pd.DataFrame) -> Dict[str, Any]:
+        """Perform sentiment analysis on messages."""
+        sentiments = []
+        for prompt in df['prompt']:
+            try:
+                sentiment = self.sentiment_analyzer(prompt)[0]
+                sentiments.append(sentiment['label'])
+            except Exception as e:
+                logger.warning(f"Error in sentiment analysis: {str(e)}")
+                sentiments.append('NEUTRAL')
+        sentiment_stats = {
+            'overall_sentiment': Counter(sentiments),
+            'frustration_indicators': self._identify_frustration(df),
+            'resolution_times': self._calculate_resolution_times(df)
+        }
+        return sentiment_stats
+    def _analyze_completion_trends(self, df: pd.DataFrame) -> Dict[str, Any]:
+        """Analyze topic completion trends and coverage."""
+        completion_stats = {
+            'completion_rates': self._calculate_completion_rates(df),
+            'time_distribution': self._analyze_time_distribution(df),
+            'coverage_gaps': self._identify_coverage_gaps(df)
+        }
+        return completion_stats
+    def _cluster_students(self, df: pd.DataFrame) -> Dict[str, Any]:
+        """Cluster students based on interaction patterns."""
+        student_metrics = self._calculate_student_metrics(df)
+        clusters = {
+            'confident': [],
+            'engaged': [],
+            'struggling': []
+        }
+        for student_id, metrics in student_metrics.items():
+            cluster = self._determine_student_cluster(metrics)
+            clusters[cluster].append(student_id)
+        return clusters
+    def _analyze_abandoned_conversations(self, df: pd.DataFrame) -> Dict[str, Any]:
+        """Identify and analyze abandoned conversations."""
+        abandoned_stats = {
+            'abandon_points': self._identify_abandon_points(df),
+            'incomplete_topics': self._identify_incomplete_topics(df),
+            'dropout_patterns': self._analyze_dropout_patterns(df)
+        }
+        return abandoned_stats
+    def _identify_incomplete_topics(self, df: pd.DataFrame) -> List[Dict[str, Any]]:
+        """
+        Identify topics that were started but not completed by students.
+        Args:
+            df: DataFrame containing messages
+        Returns:
+            List of dictionaries containing incomplete topic information
+        """
+        incomplete_topics = []
+        topics = self._extract_topics(df)
+        for topic in set(topics):
+            escaped_topic = re.escape(topic)
+            topic_msgs = df[df['prompt'].str.contains(escaped_topic, case=False)]
+            if len(topic_msgs) > 0:
+                # Check for completion indicators
+                last_msgs = topic_msgs.tail(3)  # Look at last 3 messages
+                # Consider a topic incomplete if:
+                # 1. There are unanswered questions
+                # 2. Contains frustration indicators
+                # 3. No positive confirmation/understanding indicators
+                has_questions = last_msgs['prompt'].str.contains('|'.join(self.question_words), case=False).any()
+                has_frustration = bool(self._identify_frustration(last_msgs))
+                completion_indicators = ['understand', 'got it', 'makes sense', 'thank you', 'clear now']
+                has_completion = last_msgs['prompt'].str.contains('|'.join(completion_indicators), case=False).any()
+                if (has_questions or has_frustration) and not has_completion:
+                    incomplete_topics.append({
+                        'topic': topic,
+                        'last_interaction': topic_msgs.iloc[-1]['timestamp'],
+                        'message_count': len(topic_msgs),
+                        'has_pending_questions': has_questions,
+                        'shows_frustration': has_frustration
+                    })
+        return incomplete_topics
+    def _analyze_dropout_patterns(self, df: pd.DataFrame) -> Dict[str, Any]:
+        """
+        Analyze patterns in where and why students tend to drop out of conversations.
+        Args:
+            df: DataFrame containing messages
+        Returns:
+            Dictionary containing dropout pattern analysis
+        """
+        dropout_analysis = {
+            'timing_patterns': {},
+            'topic_patterns': {},
+            'complexity_indicators': {},
+            'engagement_metrics': {}
+        }
+        # Analyze timing of dropouts
+        timestamps = pd.to_datetime(df['timestamp'])
+        time_gaps = timestamps.diff()
+        dropout_points = time_gaps[time_gaps > pd.Timedelta(minutes=30)].index
+        for point in dropout_points:
+            # Get context before dropout
+            context_msgs = df.loc[max(0, point-5):point]
+            # Analyze timing
+            time_of_day = timestamps[point].hour
+            dropout_analysis['timing_patterns'][time_of_day] = \
+                dropout_analysis['timing_patterns'].get(time_of_day, 0) + 1
+            # Analyze topics at dropout points
+            dropout_topics = self._extract_topics(context_msgs)
+            for topic in dropout_topics:
+                dropout_analysis['topic_patterns'][topic] = \
+                    dropout_analysis['topic_patterns'].get(topic, 0) + 1
+            # Analyze complexity
+            msg_lengths = context_msgs['prompt'].str.len().mean()
+            question_density = len(context_msgs[context_msgs['prompt'].str.contains(
+                '|'.join(self.question_words), case=False)]) / len(context_msgs)
+            dropout_analysis['complexity_indicators'][point] = {
+                'message_length': msg_lengths,
+                'question_density': question_density
+            }
+            # Analyze engagement
+            dropout_analysis['engagement_metrics'][point] = {
+                'messages_before_dropout': len(context_msgs),
+                'response_times': time_gaps[max(0, point-5):point].mean().total_seconds() / 60
+            }
+        return dropout_analysis
+    def _rank_topics_by_difficulty(self, analytics_results: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """
+        Rank topics by their difficulty based on various metrics from analytics results.
+        Args:
+            analytics_results: Dictionary containing all analytics data
+        Returns:
+            List of dictionaries containing topic difficulty rankings and scores
+        """
+        topic_difficulty = []
+        # Extract relevant metrics for each topic
+        topics = set()
+        for topic in analytics_results['topic_interaction']['interaction_counts'].keys():
+            # Calculate difficulty score based on multiple factors
+            difficulty_score = 0
+            # Factor 1: Question frequency
+            question_count = sum(1 for chain in analytics_results['question_patterns']['complex_chains']
+                            if chain['topic'] == topic)
+            difficulty_score += question_count * 0.3
+            # Factor 2: Frustration indicators
+            frustration_count = sum(1 for indicator in analytics_results['sentiment_analysis']['frustration_indicators']
+                                if topic.lower() in indicator.lower())
+            difficulty_score += frustration_count * 0.25
+            # Factor 3: Completion rate (inverse relationship)
+            completion_rate = analytics_results['completion_trends']['completion_rates'].get(topic, 1.0)
+            difficulty_score += (1 - completion_rate) * 0.25
+            # Factor 4: Time spent (normalized)
+            avg_time = analytics_results['topic_interaction']['avg_time_per_topic'].get(topic, 0)
+            max_time = max(analytics_results['topic_interaction']['avg_time_per_topic'].values())
+            normalized_time = avg_time / max_time if max_time > 0 else 0
+            difficulty_score += normalized_time * 0.2
+            topic_difficulty.append({
+                'topic': topic,
+                'difficulty_score': round(difficulty_score, 2),
+                'metrics': {
+                    'question_frequency': question_count,
+                    'frustration_indicators': frustration_count,
+                    'completion_rate': completion_rate,
+                    'avg_time_spent': avg_time
+                }
+            })
+        # Sort topics by difficulty score
+        return sorted(topic_difficulty, key=lambda x: x['difficulty_score'], reverse=True)
+    def _identify_support_needs(self, analytics_results: Dict[str, Any]) -> Dict[str, List[Dict[str, Any]]]:
+        """
+        Identify specific support needs for students based on analytics results.
+        Args:
+            analytics_results: Dictionary containing all analytics data
+        Returns:
+            Dictionary containing support needs categorized by urgency
+        """
+        support_needs = {
+            'immediate_attention': [],
+            'monitoring_needed': [],
+            'general_support': []
+        }
+        # Analyze struggling students
+        for student_id in analytics_results['student_clustering']['struggling']:
+            # Get student-specific metrics
+            student_msgs = analytics_results.get('sentiment_analysis', {}).get('messages', [])
+            frustration_topics = [topic for topic in analytics_results['sentiment_analysis']['frustration_indicators']
+                                if any(msg['user_id'] == student_id for msg in student_msgs)]
+            # Calculate engagement metrics
+            engagement_level = len([chain for chain in analytics_results['question_patterns']['complex_chains']
+                                if any(msg['user_id'] == student_id for msg in chain['messages'])])
+            # Identify immediate attention needs
+            if len(frustration_topics) >= 3 or engagement_level < 2:
+                support_needs['immediate_attention'].append({
+                    'student_id': student_id,
+                    'issues': frustration_topics,
+                    'engagement_level': engagement_level,
+                    'recommended_actions': [
+                        'Schedule one-on-one session',
+                        'Review difficult topics',
+                        'Provide additional resources'
+                    ]
+                })
+            # Identify monitoring needs
+            elif len(frustration_topics) >= 1 or engagement_level < 4:
+                support_needs['monitoring_needed'].append({
+                    'student_id': student_id,
+                    'areas_of_concern': frustration_topics,
+                    'engagement_level': engagement_level,
+                    'recommended_actions': [
+                        'Regular progress checks',
+                        'Provide supplementary materials'
+                    ]
+                })
+            # General support needs
+            else:
+                support_needs['general_support'].append({
+                    'student_id': student_id,
+                    'areas_for_improvement': frustration_topics,
+                    'engagement_level': engagement_level,
+                    'recommended_actions': [
+                        'Maintain regular communication',
+                        'Encourage participation'
+                    ]
+                })
+        return support_needs
+    def _extract_topics(self, df: pd.DataFrame) -> List[str]:
+        """Extract topics from messages using spaCy."""
+        topics = []
+        for doc in self.nlp.pipe(df['prompt']):
+            # Extract noun phrases as potential topics
+            noun_phrases = [chunk.text for chunk in doc.noun_chunks]
+            topics.extend(noun_phrases)
+        return topics
+    def _calculate_topic_revisits(self, df: pd.DataFrame, topics: List[str]) -> Dict[str, int]:
+        """Calculate how often topics are revisited."""
+        topic_visits = Counter(topics)
+        return {topic: count for topic, count in topic_visits.items() if count > 1}
+    def _calculate_avg_time_per_topic(self, df: pd.DataFrame, topics: List[str]) -> Dict[str, float]:
+        """Calculate average time spent per topic."""
+        topic_times = {}
+        for topic in set(topics):
+            escaped_topic = re.escape(topic)
+            topic_msgs = df[df['prompt'].str.contains(escaped_topic, case=False)]
+            if len(topic_msgs) > 1:
+                time_diffs = pd.to_datetime(topic_msgs['timestamp']).diff()
+                avg_time = time_diffs.mean().total_seconds() / 60  # Convert to minutes
+                topic_times[topic] = avg_time
+        return topic_times
+    def _identify_complex_chains(self, df: pd.DataFrame) -> List[Dict[str, Any]]:
+        """Identify complex conversation chains."""
+        chains = []
+        current_chain = []
+        for idx, row in df.iterrows():
+            if self._is_followup_question(row['prompt']):
+                current_chain.append(row)
+            else:
+                if len(current_chain) >= 3:  # Consider 3+ related questions as complex chain
+                    chains.append({
+                        'messages': current_chain,
+                        'topic': self._extract_topics([current_chain[0]['prompt']])[0],
+                        'length': len(current_chain)
+                    })
+                current_chain = []
+        return chains
+    def _generate_topic_priority_list(self, analytics_results: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """
+        Generate a prioritized list of topics for the upcoming session.
+        Args:
+            analytics_results: Dictionary containing all analytics data
+        Returns:
+            List of dictionaries containing topics and their priority scores
+        """
+        topic_priorities = []
+        # Get difficulty rankings
+        difficulty_ranking = self._rank_topics_by_difficulty(analytics_results)
+        for topic_data in difficulty_ranking:
+            topic = topic_data['topic']
+            # Calculate priority score based on multiple factors
+            priority_score = 0
+            # Factor 1: Difficulty score (40% weight)
+            priority_score += topic_data['difficulty_score'] * 0.4
+            # Factor 2: Student frustration (25% weight)
+            frustration_count = sum(1 for indicator in
+                                  analytics_results['sentiment_analysis']['frustration_indicators']
+                                  if topic.lower() in indicator.lower())
+            normalized_frustration = min(frustration_count / 5, 1)  # Cap at 5 frustrations
+            priority_score += normalized_frustration * 0.25
+            # Factor 3: Incomplete understanding (20% weight)
+            incomplete_topics = analytics_results.get('abandoned_conversations', {}).get('incomplete_topics', [])
+            if any(t['topic'] == topic for t in incomplete_topics):
+                priority_score += 0.2
+            # Factor 4: Coverage gaps (15% weight)
+            if topic in analytics_results['completion_trends']['coverage_gaps']:
+                priority_score += 0.15
+            topic_priorities.append({
+                'topic': topic,
+                'priority_score': round(priority_score, 2),
+                'reasons': {
+                    'difficulty_level': topic_data['difficulty_score'],
+                    'frustration_indicators': frustration_count,
+                    'has_incomplete_understanding': any(t['topic'] == topic for t in incomplete_topics),
+                    'has_coverage_gaps': topic in analytics_results['completion_trends']['coverage_gaps']
+                },
+                'recommended_focus_areas': self._generate_focus_recommendations(topic_data, analytics_results)
+            })
+        # Sort by priority score
+        return sorted(topic_priorities, key=lambda x: x['priority_score'], reverse=True)
+    def _generate_focus_recommendations(self, topic_data: Dict[str, Any],
+                                     analytics_results: Dict[str, Any]) -> List[str]:
+        """Generate specific focus recommendations for a topic."""
+        recommendations = []
+        if topic_data['metrics']['question_frequency'] > 3:
+            recommendations.append("Provide more detailed explanations and examples")
+        if topic_data['metrics']['completion_rate'] < 0.7:
+            recommendations.append("Break down complex concepts into smaller segments")
+        if topic_data['metrics']['frustration_indicators'] > 2:
+            recommendations.append("Review prerequisite concepts and provide additional context")
+        return recommendations
+    def _is_followup_question(self, prompt: str) -> bool:
+        """Determine if a prompt is a follow-up question."""
+        followup_indicators = {'also', 'then', 'additionally', 'furthermore', 'related to that'}
+        return any(indicator in prompt.lower() for indicator in followup_indicators)
+    def generate_faculty_report(self, analytics_results: Dict[str, Any]) -> Dict[str, Any]:
+        """Generate a comprehensive report for faculty."""
+        report = {
+            'key_findings': self._generate_key_findings(analytics_results),
+            'recommended_actions': self._generate_recommendations(analytics_results),
+            'topic_difficulty_ranking': self._rank_topics_by_difficulty(analytics_results),
+            'student_support_needs': self._identify_support_needs(analytics_results),
+            'topic_priorities': self._generate_topic_priority_list(analytics_results)
+        }
+        return report
+    def _generate_key_findings(self, analytics_results: Dict[str, Any]) -> List[str]:
+        """Generate key findings from analytics results."""
+        findings = []
+        # Analyze topic interaction patterns
+        topic_stats = analytics_results['topic_interaction']
+        low_interaction_topics = [topic for topic, count in topic_stats['interaction_counts'].items()
+                                if count < 3]  # Arbitrary threshold
+        if low_interaction_topics:
+            findings.append(f"Low engagement detected in topics: {', '.join(low_interaction_topics)}")
+        # Analyze sentiment patterns
+        sentiment_stats = analytics_results['sentiment_analysis']
+        if sentiment_stats['frustration_indicators']:
+            findings.append("Significant frustration detected in the following areas: " +
+                          ', '.join(sentiment_stats['frustration_indicators']))
+        # Analyze student clustering
+        student_clusters = analytics_results['student_clustering']
+        if len(student_clusters['struggling']) > 0:
+            findings.append(f"{len(student_clusters['struggling'])} students showing signs of difficulty")
+        return findings
+    def _generate_recommendations(self, analytics_results: Dict[str, Any]) -> List[str]:
+        """Generate actionable recommendations for faculty."""
+        recommendations = []
+        # Analyze complex chains
+        question_patterns = analytics_results['question_patterns']
+        if question_patterns['complex_chains']:
+            topics_needing_clarity = set(chain['topic'] for chain in question_patterns['complex_chains'])
+            recommendations.append(f"Consider providing additional examples for: {', '.join(topics_needing_clarity)}")
+        # Analyze completion trends
+        completion_trends = analytics_results['completion_trends']
+        low_completion_topics = [topic for topic, rate in completion_trends['completion_rates'].items()
+                               if rate < 0.7]  # 70% threshold
+        if low_completion_topics:
+            recommendations.append(f"Review and possibly simplify material for: {', '.join(low_completion_topics)}")
+        return recommendations
+# Example usage
+if __name__ == "__main__":
+    # Initialize analytics engine
+    analytics_engine = NovaScholarAnalytics()
+    # Sample usage with dummy data
+    sample_chat_history = [
+        {
+            "user_id": "123",
+            "session_id": "S101",
+            "messages": [
+                {
+                    "prompt": "What is DevOps?",
+                    "response": "DevOps is a software engineering practice...",
+                    "timestamp": datetime.now()
+                }
+            ]
+        }
+    ]
+    # Process analytics
+    results = analytics_engine.process_chat_history(all_chat_histories)
+    # Generate faculty report
+    faculty_report = analytics_engine.generate_faculty_report(results)
+    print(faculty_report)
+    # Print results
+    # logger.info("Analytics processing completed")
+    # logger.info(f"Key findings: {faculty_report['key_findings']}")
+    # logger.info(f"Recommendations: {faculty_report['recommended_actions']}")

session_page.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import random
 import streamlit as st
 from datetime import datetime
@@ -13,13 +15,21 @@ from dotenv import load_dotenv
 import os
 from pymongo import MongoClient
 from gen_mcqs import generate_mcqs, save_quiz, quizzes_collection, get_student_quiz_score, submit_quiz_answers
 load_dotenv()
 MONGO_URI = os.getenv('MONGO_URI')
 client = MongoClient(MONGO_URI)
 db = client["novascholar_db"]
 polls_collection = db["polls"]
 def get_current_user():
     if 'current_user' not in st.session_state:
         return None
@@ -133,14 +143,18 @@ def display_preclass_content(session, student_id, course_id):
                 if st.button("Mark PDF as Read", key=f"pdf_{material['file_name']}"):
                     create_notification("PDF marked as read!", "success")
     # Chat input
     # Add a check, if materials are available, only then show the chat input
     if(st.session_state.user_type == "student"):
         if materials:
             if prompt := st.chat_input("Ask a question about Pre-class Materials"):
-                if len(st.session_state.messages) >= 20:
-                    st.warning("Message limit (20) reached for this session.")
-                    return
                 st.session_state.messages.append({"role": "user", "content": prompt})
@@ -150,14 +164,20 @@ def display_preclass_content(session, student_id, course_id):
                 # Get document context
                 context = ""
                 materials = resources_collection.find({"session_id": session['session_id']})
                 context = ""
                 vector_data = None
                 context = ""
                 for material in materials:
                     resource_id = material['_id']
                     vector_data = vectors_collection.find_one({"resource_id": resource_id})
                     if vector_data and 'text' in vector_data:
                         context += vector_data['text'] + "\n"
@@ -167,15 +187,33 @@ def display_preclass_content(session, student_id, course_id):
                 try:
                     # Generate response using Gemini
-                    context_prompt = f"""
-                    Based on the following context, answer the user's question:
                     Context:
                     {context}
                     Question: {prompt}
-                    Please provide a clear and concise answer based only on the information provided in the context.
                     """
                     response = model.generate_content(context_prompt)
@@ -229,10 +267,10 @@ def display_preclass_content(session, student_id, course_id):
                 if file_content:
                     material_type = st.selectbox("Select Material Type", ["pdf", "docx", "txt"])
                     if st.button("Upload Material"):
-                        upload_resource(course_id, session['session_id'], file_name, uploaded_file, material_type)
                         # Search for the newly uploaded resource's _id in resources_collection
-                        resource_id = resources_collection.find_one({"file_name": file_name})["_id"]
                         create_vector_store(file_content, resource_id)
                         st.success("Material uploaded successfully!")
@@ -979,6 +1017,205 @@ def display_postclass_analytics(session, course_id):
                 for student in pending_students:
                     st.markdown(f"- {student.get('full_name', 'Unknown Student')} (SID: {student.get('SID', 'Unknown SID')})")
 def upload_preclass_materials(session_id, course_id):
     """Upload pre-class materials for a session"""
     st.subheader("Upload Pre-class Materials")
@@ -1069,7 +1306,7 @@ def display_quiz_tab(student_id, course_id, session_id):
                             st.error("Error submitting quiz. Please try again.")
 def display_session_content(student_id, course_id, session, username, user_type):
-    st.title(f"Session {session['session_id']}: {session['title']}")
     # Check if the date is a string or a datetime object
     if isinstance(session['date'], str):
@@ -1078,21 +1315,23 @@ def display_session_content(student_id, course_id, session, username, user_type)
     else:
         session_date = session['date']
-    st.markdown(f"**Date:** {format_datetime(session_date)}")
-    st.markdown(f"**Status:** {session['status'].replace('_', ' ').title()}")
     # Find the course_id of the session in
     if st.session_state.user_type == 'student':
         tabs = (["Pre-class Work", "In-class Work", "Post-class Work"])
     else:
-        tabs = (["Pre-class Analytics", "In-class Analytics", "Post-class Analytics"])
     if st.session_state.user_type == 'student':
         pre_class_tab, in_class_tab, post_class_tab, quiz_tab = st.tabs(["Pre-class Work", "In-class Work", "Post-class Work", "Quizzes"])
     else:
         pre_class_work, in_class_work, post_class_work, preclass_analytics, inclass_analytics, postclass_analytics = st.tabs(["Pre-class Work", "In-class Work", "Post-class Work", "Pre-class Analytics", "In-class Analytics", "Post-class Analytics"])
     # Display pre-class materials
     if st.session_state.user_type == 'student':
         with pre_class_tab:
@@ -1114,8 +1353,10 @@ def display_session_content(student_id, course_id, session, username, user_type)
         with post_class_work:
             display_post_class_content(session, student_id, course_id)
         with preclass_analytics:
-            display_preclass_analytics(session, course_id)
         with inclass_analytics:
             display_inclass_analytics(session, course_id)
         with postclass_analytics:
             display_postclass_analytics(session, course_id)

+from collections import defaultdict
+import json
 import random
 import streamlit as st
 from datetime import datetime
 import os
 from pymongo import MongoClient
 from gen_mcqs import generate_mcqs, save_quiz, quizzes_collection, get_student_quiz_score, submit_quiz_answers
+from create_course import courses_collection
+from pre_class_analytics import NovaScholarAnalytics
+import openai
+from openai import OpenAI
 load_dotenv()
 MONGO_URI = os.getenv('MONGO_URI')
+OPENAI_KEY = os.getenv('OPENAI_KEY')
 client = MongoClient(MONGO_URI)
 db = client["novascholar_db"]
 polls_collection = db["polls"]
 def get_current_user():
     if 'current_user' not in st.session_state:
         return None
                 if st.button("Mark PDF as Read", key=f"pdf_{material['file_name']}"):
                     create_notification("PDF marked as read!", "success")
+    # Initialize 'messages' in session_state if it doesn't exist
+    if 'messages' not in st.session_state:
+        st.session_state.messages = []
     # Chat input
     # Add a check, if materials are available, only then show the chat input
     if(st.session_state.user_type == "student"):
         if materials:
             if prompt := st.chat_input("Ask a question about Pre-class Materials"):
+                # if len(st.session_state.messages) >= 20:
+                #     st.warning("Message limit (20) reached for this session.")
+                #     return
                 st.session_state.messages.append({"role": "user", "content": prompt})
                 # Get document context
                 context = ""
+                print(session['session_id'])
                 materials = resources_collection.find({"session_id": session['session_id']})
+                print(materials)
                 context = ""
                 vector_data = None
+                # for material in materials:
+                #     print(material)
                 context = ""
                 for material in materials:
                     resource_id = material['_id']
+                    print(resource_id)
                     vector_data = vectors_collection.find_one({"resource_id": resource_id})
+                    # print(vector_data)
                     if vector_data and 'text' in vector_data:
                         context += vector_data['text'] + "\n"
                 try:
                     # Generate response using Gemini
+                    # context_prompt = f"""
+                    # Based on the following context, answer the user's question:
+                    # Context:
+                    # {context}
+                    # Question: {prompt}
+                    # Please provide a clear and concise answer based only on the information provided in the context.
+                    # """
+                    context_prompt = f"""
+                    You are a highly intelligent and resourceful assistant capable of synthesizing information from the provided context.
                     Context:
                     {context}
+                    Instructions:
+                    1. Base your answers primarily on the given context.
+                    2. If the answer to the user's question is not explicitly in the context but can be inferred or synthesized from the information provided, do so thoughtfully.
+                    3. Only use external knowledge or web assistance when:
+                    - The context lacks sufficient information, and
+                    - The question requires knowledge beyond what can be reasonably inferred from the context.
+                    4. Clearly state if you are relying on web assistance for any part of your answer.
                     Question: {prompt}
+                    Please provide a clear and comprehensive answer based on the above instructions.
                     """
                     response = model.generate_content(context_prompt)
                 if file_content:
                     material_type = st.selectbox("Select Material Type", ["pdf", "docx", "txt"])
                     if st.button("Upload Material"):
+                        resource_id = upload_resource(course_id, session['session_id'], file_name, uploaded_file, material_type)
                         # Search for the newly uploaded resource's _id in resources_collection
+                        # resource_id = resources_collection.find_one({"file_name": file_name})["_id"]
                         create_vector_store(file_content, resource_id)
                         st.success("Material uploaded successfully!")
                 for student in pending_students:
                     st.markdown(f"- {student.get('full_name', 'Unknown Student')} (SID: {student.get('SID', 'Unknown SID')})")
+def get_chat_history(user_id, session_id):
+    query = {
+        "user_id": ObjectId(user_id),
+        "session_id": session_id,
+        "timestamp": {"$lte": datetime.utcnow()}
+    }
+    result = chat_history_collection.find(query)
+    return list(result)
+def get_response_from_llm(raw_data):
+    messages = [
+        {
+            "role": "system",
+            "content": "You are an AI that refines raw analytics data into actionable insights for faculty reports."
+        },
+        {
+            "role": "user",
+            "content": f"""
+            Based on the following analytics data, refine and summarize the insights:
+            Raw Data:
+            {raw_data}
+            Instructions:
+            1. Group similar topics together under appropriate categories.
+            2. Remove irrelevant or repetitive entries.
+            3. Summarize the findings into actionable insights.
+            4. Provide concise recommendations for improvement based on the findings.
+            Output:
+            Provide a structured response with the following format:
+            {{
+            "Low Engagement Topics": ["List of Topics"],
+            "Frustration Areas": ["List of areas"],
+            "Recommendations": ["Actionable recommendations"],
+            }}
+            """
+        }
+    ]
+    try:
+        client = OpenAI(api_key=OPENAI_KEY)
+        response = client.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=messages,
+            temperature=0.2
+        )
+        content = response.choices[0].message.content
+        return json.loads(content)
+    except Exception as e:
+        st.error(f"Error generating response: {str(e)}")
+        return None
+def get_preclass_analytics(session):
+    """Get all user_ids from chat_history collection where session_id matches"""
+    user_ids = chat_history_collection.distinct("user_id", {"session_id": session['session_id']})
+    print(user_ids)
+    session_id = session['session_id']
+    all_chat_histories = []
+    for user_id in user_ids:
+        result = get_chat_history(user_id, session_id)
+        if result:
+            for record in result:
+                chat_history = {
+                    "user_id": record["user_id"],
+                    "session_id": record["session_id"],
+                    "messages": record["messages"]
+                }
+                all_chat_histories.append(chat_history)
+        else:
+            st.warning("No chat history found for this session.")
+    # Use the analytics engine
+    analytics_engine = NovaScholarAnalytics()
+    results = analytics_engine.process_chat_history(all_chat_histories)
+    faculty_report = analytics_engine.generate_faculty_report(results)
+    # Pass this Faculty Report to an LLM model for refinements and clarity
+    refined_report = get_response_from_llm(faculty_report)
+    return refined_report
+def display_preclass_analytics2(session, course_id):
+    refined_report = get_preclass_analytics(session)
+    st.subheader("Pre-class Analytics")
+    if refined_report:
+        # Custom CSS to improve the look and feel
+        st.markdown("""
+            <style>
+            .metric-card {
+                background-color: #f8f9fa;
+                border-radius: 10px;
+                padding: 20px;
+                box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+            }
+            .header-text {
+                color: #1f77b4;
+                font-size: 24px;
+                font-weight: bold;
+                margin-bottom: 20px;
+            }
+            .subheader {
+                color: #2c3e50;
+                font-size: 17px;
+                font-weight: 500;
+                margin-bottom: 10px;
+            }
+            .insight-text {
+                color: #34495e;
+                font-size: 16px;
+                line-height: 1.6;
+            }
+            .glossary-card {
+                padding: 15px;
+                margin-top: 40px;
+            }
+            </style>
+        """, unsafe_allow_html=True)
+        # Header
+        # st.markdown("<h1 style='text-align: center; color: #2c3e50;'>Pre-Class Analytics Dashboard</h1>", unsafe_allow_html=True)
+        # st.markdown("<p style='text-align: center; color: #7f8c8d;'>Insights from Student Interactions</p>", unsafe_allow_html=True)
+        # Create three columns for metrics
+        col1, col2, col3 = st.columns(3)
+        with col1:
+            st.markdown("<p class='header-text'>🎯 Low Engagement Topics</p>", unsafe_allow_html=True)
+            # Group topics by category
+            topics = refined_report["Low Engagement Topics"]
+            # categories = defaultdict(list)
+            for i, topic in enumerate(topics):
+                st.markdown(f"{i + 1}.  <p class='subheader'>{topic}</p>", unsafe_allow_html=True)
+            # # Categorize topics (you can modify these categories based on your needs)
+            # for topic in topics:
+            #     if "Data" in topic and ("Type" in topic or "Structure" in topic):
+            #         categories["Data Types"].append(topic)
+            #     elif "Analytics" in topic:
+            #         categories["Analytics Concepts"].append(topic)
+            #     else:
+            #         categories["General Concepts"].append(topic)
+            # Display categorized topics
+            # for category, items in categories.items():
+            #     st.markdown(f"<p class='subheader'>{category}</p>", unsafe_allow_html=True)
+            #     i = 0
+            #     for i, item in items:
+            #         st.markdown(f"{i + 1} {item}", unsafe_allow_html=True)
+        with col2:
+            st.markdown("<p class='header-text'>⚠️ Frustration Areas</p>", unsafe_allow_html=True)
+            for i, area in enumerate(refined_report["Frustration Areas"]):
+                st.markdown(f"{i + 1}.  <p class='subheader'>{area}</p>", unsafe_allow_html=True)
+        with col3:
+            st.markdown("<p class='header-text'>💡 Recommendations</p>", unsafe_allow_html=True)
+            for i, rec in enumerate(refined_report["Recommendations"]):
+                st.markdown(f"{i + 1}.  <p class='subheader'>{rec}</p>", unsafe_allow_html=True)
+        # Glossary section
+        st.markdown("<div class='glossary-card'>", unsafe_allow_html=True)
+        # st.markdown("<h3 style='color: #2c3e50;'>Understanding the Metrics</h3>", unsafe_allow_html=True)
+        explanations = {
+            "Low Engagement Topics": "Topics where students showed minimal interaction or understanding during their chat sessions. These areas may require additional focus during classroom instruction.",
+            "Frustration Areas": "Specific concepts or topics where students expressed difficulty or confusion during their interactions with the chatbot. These areas may need immediate attention or alternative teaching approaches.",
+            "Recommendations": "AI-generated suggestions for improving student engagement and understanding, based on the analyzed chat interactions and identified patterns."
+        }
+        st.subheader("Understanding the Metrics")
+        for metric, explanation in explanations.items():
+            # st.markdown(f"<p class='subheader'>{metric}</p>", unsafe_allow_html=True)
+            # st.markdown(f"<p class='insight-text'>{explanation}</p>", unsafe_allow_html=True)
+            st.markdown(f"<span class='subheader'>**{metric}**</span>:  <span class='subheader'>{explanation}</span>", unsafe_allow_html=True)
+        st.markdown("</div>", unsafe_allow_html=True)
+def display_session_analytics(session, course_id):
+    """Display session analytics for faculty"""
+    st.header("Session Analytics")
+    # Display Pre-class Analytics
+    display_preclass_analytics2(session, course_id)
+    # Display In-class Analytics
+    display_inclass_analytics(session, course_id)
+    # Display Post-class Analytics
+    display_postclass_analytics(session, course_id)
 def upload_preclass_materials(session_id, course_id):
     """Upload pre-class materials for a session"""
     st.subheader("Upload Pre-class Materials")
                             st.error("Error submitting quiz. Please try again.")
 def display_session_content(student_id, course_id, session, username, user_type):
+    st.title(f"{session['title']}")
     # Check if the date is a string or a datetime object
     if isinstance(session['date'], str):
     else:
         session_date = session['date']
+    course_name = courses_collection2.find_one({"course_id": course_id})['title']
+    st.markdown(f"**Date:** {format_datetime(session_date)}")
+    st.markdown(f"**Course Name:** {course_name}")
     # Find the course_id of the session in
     if st.session_state.user_type == 'student':
         tabs = (["Pre-class Work", "In-class Work", "Post-class Work"])
     else:
+        tabs = (["Session Analytics"])
     if st.session_state.user_type == 'student':
         pre_class_tab, in_class_tab, post_class_tab, quiz_tab = st.tabs(["Pre-class Work", "In-class Work", "Post-class Work", "Quizzes"])
     else:
         pre_class_work, in_class_work, post_class_work, preclass_analytics, inclass_analytics, postclass_analytics = st.tabs(["Pre-class Work", "In-class Work", "Post-class Work", "Pre-class Analytics", "In-class Analytics", "Post-class Analytics"])
+        # pre_class_work, in_class_work, post_class_work, session_analytics = st.tabs(["Pre-class Work", "In-class Work", "Post-class Work", "Session Analytics"])
     # Display pre-class materials
     if st.session_state.user_type == 'student':
         with pre_class_tab:
         with post_class_work:
             display_post_class_content(session, student_id, course_id)
         with preclass_analytics:
+            display_preclass_analytics2(session, course_id)
         with inclass_analytics:
             display_inclass_analytics(session, course_id)
         with postclass_analytics:
             display_postclass_analytics(session, course_id)
+        # with session_analytics:
+        #     display_session_analytics(session, course_id)