Spaces:

omkar-surve126
/

NovaScholar

Build error

App Files Files Community

Harshal Vhatkar commited on Jan 4

Commit

b8a1cb6

1 Parent(s): f39b63e

update pre-class-analytics

Browse files

Files changed (5) hide show

.gitignore +5 -1
app.py +5 -4
pre_class_analytics.py +0 -850
pre_class_analytics2.py +677 -0
session_page.py +467 -106

.gitignore CHANGED Viewed

@@ -14,4 +14,8 @@ all_chat_histories2.json
 analytics.ipynb
 chat_history.csv
 harshal.py
-course_creation.py

 analytics.ipynb
 chat_history.csv
 harshal.py
+course_creation.py
+topics.json
+new_analytics.json
+new_analytics2.json
+pre_class_analytics.py

app.py CHANGED Viewed

@@ -135,10 +135,11 @@ def login_form():
         col1, col2 = st.columns(2)
         with col1:
-            user_type = st.selectbox(
                 "Please select your Role",
-                ["student", "faculty", "research_assistant", "analyst"]
             )
             username = st.text_input("Username or Email")
         with col2:
@@ -159,7 +160,7 @@ def login_form():
 def get_courses(username, user_type):
     if user_type == "student":
-        student = students_collection.find_one({"full_name": username})
         if student:
             enrolled_course_ids = [
                 course["course_id"] for course in student.get("enrolled_courses", [])
@@ -855,7 +856,7 @@ def enroll_in_course(course_id, course_title, student):
                     {"$set": {"enrolled_courses": courses}},
                 )
                 st.success(f"Enrolled in course {course_title}")
-                st.experimental_rerun()
             else:
                 st.error("Course not found")
         else:

         col1, col2 = st.columns(2)
         with col1:
+            user_option = st.selectbox(
                 "Please select your Role",
+                ["Student", "Faculty", "Research Assistant", "Analyst"]
             )
+            user_type = user_option.lower()
             username = st.text_input("Username or Email")
         with col2:
 def get_courses(username, user_type):
     if user_type == "student":
+        student = students_collection.find_one({"$or": [{"full_name": username}, {"username": username}]})
         if student:
             enrolled_course_ids = [
                 course["course_id"] for course in student.get("enrolled_courses", [])
                     {"$set": {"enrolled_courses": courses}},
                 )
                 st.success(f"Enrolled in course {course_title}")
+                # st.experimental_rerun()
             else:
                 st.error("Course not found")
         else:

pre_class_analytics.py DELETED Viewed

@@ -1,850 +0,0 @@
-import re
-from bson import ObjectId
-from pymongo import MongoClient
-import pandas as pd
-import numpy as np
-from datetime import datetime
-from dotenv import load_dotenv
-import os
-from typing import List, Dict, Any
-from transformers import pipeline
-from textstat import flesch_reading_ease
-from collections import Counter
-import logging
-import spacy
-import json
-# Load chat histories from JSON file
-# all_chat_histories = []
-# with open(r'all_chat_histories2.json', 'r') as file:
-#     all_chat_histories = json.load(file)
-load_dotenv()
-MONGO_URI = os.getenv("MONGO_URI")
-client = MongoClient(MONGO_URI)
-db = client['novascholar_db']
-chat_history_collection = db['chat_history']
-# def get_chat_history(user_id, session_id):
-#     query = {
-#         "user_id": ObjectId(user_id),
-#         "session_id": session_id,
-#         "timestamp": {"$lte": datetime.utcnow()}
-#     }
-#     result = chat_history_collection.find(query)
-#     return list(result)
-# if __name__ == "__main__":
-#     user_ids = ["6738b70cc97dffb641c7d158", "6738b7b33f648a9224f7aa69"]
-#     session_ids = ["S104"]
-#     for user_id in user_ids:
-#         for session_id in session_ids:
-#             result = get_chat_history(user_id, session_id)
-#             print(result)
-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-class NovaScholarAnalytics:
-    def __init__(self):
-        # Initialize NLP components
-        self.nlp = spacy.load("en_core_web_sm")
-        self.sentiment_analyzer = pipeline("sentiment-analysis",  model="finiteautomata/bertweet-base-sentiment-analysis", top_k=None)
-        # Define question words for detecting questions
-        self.question_words = {"what", "why", "how", "when", "where", "which", "who", "whose", "whom"}
-        # Define question categories
-        self.question_categories = {
-            'conceptual': {'what', 'define', 'describe', 'explain'},
-            'procedural': {'how', 'steps', 'procedure', 'process'},
-            'reasoning': {'why', 'reason', 'cause', 'effect'},
-            'clarification': {'clarify', 'mean', 'difference', 'between'}
-        }
-    def _categorize_questions(self, questions_df: pd.DataFrame) -> Dict[str, int]:
-        """
-        Categorize questions into different types based on their content.
-        Args:
-            questions_df: DataFrame containing questions
-        Returns:
-            Dictionary with question categories and their counts
-        """
-        categories_count = {
-            'conceptual': 0,
-            'procedural': 0,
-            'reasoning': 0,
-            'clarification': 0,
-            'other': 0
-        }
-        for _, row in questions_df.iterrows():
-            prompt_lower = row['prompt'].lower()
-            categorized = False
-            for category, keywords in self.question_categories.items():
-                if any(keyword in prompt_lower for keyword in keywords):
-                    categories_count[category] += 1
-                    categorized = True
-                    break
-            if not categorized:
-                categories_count['other'] += 1
-        return categories_count
-    def _identify_frustration(self, df: pd.DataFrame) -> List[str]:
-        """
-        Identify signs of frustration in student messages.
-        Args:
-            df: DataFrame containing messages
-        Returns:
-            List of topics/areas where frustration was detected
-        """
-        frustration_indicators = [
-            "don't understand", "confused", "difficult", "hard to",
-            "not clear", "stuck", "help", "can't figure"
-        ]
-        frustrated_messages = df[
-            df['prompt'].str.lower().str.contains('|'.join(frustration_indicators), na=False)
-        ]
-        if len(frustrated_messages) == 0:
-            return []
-        # Extract topics from frustrated messages
-        frustrated_topics = self._extract_topics(frustrated_messages)
-        return list(set(frustrated_topics))  # Unique topic
-    def _calculate_resolution_times(self, df: pd.DataFrame) -> Dict[str, float]:
-        """
-        Calculate average time taken to resolve questions for different topics.
-        Args:
-            df: DataFrame containing messages
-        Returns:
-            Dictionary with topics and their average resolution times in minutes
-        """
-        resolution_times = {}
-        # Group messages by topic
-        topics = self._extract_topics(df)
-        for topic in set(topics):
-            escaped_topic = re.escape(topic)
-            topic_msgs = df[df['prompt'].str.contains(escaped_topic, case=False)]
-            if len(topic_msgs) >= 2:
-                # Calculate time difference between first and last message
-                start_time = pd.to_datetime(topic_msgs['timestamp'].iloc[0])
-                end_time = pd.to_datetime(topic_msgs['timestamp'].iloc[-1])
-                duration = (end_time - start_time).total_seconds() / 60  # Convert to minutes
-                resolution_times[topic] = duration
-        return resolution_times
-    def _calculate_completion_rates(self, df: pd.DataFrame) -> Dict[str, float]:
-        """
-        Calculate completion rates for different topics.
-        Args:
-            df: DataFrame containing messages
-        Returns:
-            Dictionary with topics and their completion rates
-        """
-        completion_rates = {}
-        topics = self._extract_topics(df)
-        for topic in set(topics):
-            escaped_topic = re.escape(topic)
-            topic_msgs = df[df['prompt'].str.contains(escaped_topic, case=False)]
-            if len(topic_msgs) > 0:
-                # Consider a topic completed if there are no frustrated messages in the last 2 messages
-                last_msgs = topic_msgs.tail(2)
-                frustrated = self._identify_frustration(last_msgs)
-                completion_rates[topic] = 0.0 if frustrated else 1.0
-        return completion_rates
-    def _analyze_time_distribution(self, df: pd.DataFrame) -> Dict[str, Dict[str, float]]:
-        """
-        Analyze time spent on different topics.
-        Args:
-            df: DataFrame containing messages
-        Returns:
-            Dictionary with time distribution statistics per topic
-        """
-        time_stats = {}
-        topics = self._extract_topics(df)
-        for topic in set(topics):
-            escaped_topic = re.escape(topic)
-            topic_msgs = df[df['prompt'].str.contains(escaped_topic, case=False)]
-            if len(topic_msgs) >= 2:
-                times = pd.to_datetime(topic_msgs['timestamp'])
-                duration = (times.max() - times.min()).total_seconds() / 60
-                time_stats[topic] = {
-                    'total_minutes': duration,
-                    'avg_minutes_per_message': duration / len(topic_msgs),
-                    'message_count': len(topic_msgs)
-                }
-        return time_stats
-    def _identify_coverage_gaps(self, df: pd.DataFrame) -> List[str]:
-        """
-        Identify topics with potential coverage gaps.
-        Args:
-            df: DataFrame containing messages
-        Returns:
-            List of topics with coverage gaps
-        """
-        gaps = []
-        topics = self._extract_topics(df)
-        topic_stats = self._analyze_time_distribution(df)
-        for topic in set(topics):
-            if topic in topic_stats:
-                stats = topic_stats[topic]
-                # Flag topics with very short interaction times or few messages
-                if stats['total_minutes'] < 5 or stats['message_count'] < 3:
-                    gaps.append(topic)
-        return gaps
-    def _calculate_student_metrics(self, df: pd.DataFrame) -> Dict[str, Dict[str, float]]:
-        """
-        Calculate various metrics for each student.
-        Args:
-            df: DataFrame containing messages
-        Returns:
-            Dictionary with student metrics
-        """
-        student_metrics = {}
-        for user_id in df['user_id'].unique():
-            user_msgs = df[df['user_id'] == user_id]
-            metrics = {
-                'message_count': len(user_msgs),
-                'question_count': len(user_msgs[user_msgs['prompt'].str.contains('|'.join(self.question_words), case=False)]),
-                'avg_response_length': user_msgs['response'].str.len().mean(),
-                'unique_topics': len(set(self._extract_topics(user_msgs))),
-                'frustration_count': len(self._identify_frustration(user_msgs))
-            }
-            student_metrics[user_id] = metrics
-        return student_metrics
-    def _determine_student_cluster(self, metrics: Dict[str, float]) -> str:
-        """
-        Determine which cluster a student belongs to based on their metrics.
-        Args:
-            metrics: Dictionary containing student metrics
-        Returns:
-            Cluster label ('confident', 'engaged', or 'struggling')
-        """
-        # Simple rule-based clustering
-        if metrics['frustration_count'] > 2 or metrics['question_count'] / metrics['message_count'] > 0.7:
-            return 'struggling'
-        elif metrics['message_count'] > 10 and metrics['unique_topics'] > 3:
-            return 'engaged'
-        else:
-            return 'confident'
-    def _identify_abandon_points(self, df: pd.DataFrame) -> List[Dict[str, Any]]:
-        """
-        Identify points where students abandoned topics.
-        Args:
-            df: DataFrame containing messages
-        Returns:
-            List of dictionaries containing abandon point information
-        """
-        abandon_points = []
-        topics = self._extract_topics(df)
-        for topic in set(topics):
-            escaped_topic = re.escape(topic)
-            topic_msgs = df[df['prompt'].str.contains(escaped_topic, case=False)]
-            if len(topic_msgs) >= 2:
-                # Check for large time gaps between messages
-                times = pd.to_datetime(topic_msgs['timestamp'])
-                time_gaps = times.diff()
-                for idx, gap in enumerate(time_gaps):
-                    if gap and gap.total_seconds() > 600:  # 10 minutes threshold
-                        abandon_points.append({
-                            'topic': topic,
-                            'message_before': topic_msgs.iloc[idx-1]['prompt'],
-                            'time_gap': gap.total_seconds() / 60,  # Convert to minutes
-                            'resumed': idx < len(topic_msgs) - 1
-                        })
-        return abandon_points
-    def process_chat_history(self, chat_history: List[Dict[Any, Any]]) -> Dict[str, Any]:
-        """
-        Process chat history data and generate comprehensive analytics.
-        Args:
-            chat_history: List of chat history documents
-            session_info: Dictionary containing session metadata (topic, duration, etc.)
-        Returns:
-            Dictionary containing all analytics results
-        """
-        try:
-            # Convert chat history to DataFrame for easier processing
-            messages_data = []
-            for chat in chat_history:
-                for msg in chat['messages']:
-                    messages_data.append({
-                        'user_id': chat['user_id'],
-                        'session_id': chat['session_id'],
-                        'timestamp': msg['timestamp'],
-                        'prompt': msg['prompt'],
-                        'response': msg['response']
-                    })
-            df = pd.DataFrame(messages_data)
-            # Generate all analytics
-            analytics_results = {
-                'topic_interaction': self._analyze_topic_interaction(df),
-                'question_patterns': self._analyze_question_patterns(df),
-                'sentiment_analysis': self._analyze_sentiment(df),
-                'completion_trends': self._analyze_completion_trends(df),
-                'student_clustering': self._cluster_students(df),
-                'abandoned_conversations': self._analyze_abandoned_conversations(df)
-            }
-            return analytics_results
-        except Exception as e:
-            logger.error(f"Error processing chat history: {str(e)}")
-            raise
-    def _analyze_topic_interaction(self, df: pd.DataFrame) -> Dict[str, Any]:
-        """Analyze topic interaction frequency and patterns."""
-        topics = self._extract_topics(df)
-        topic_stats = {
-            'interaction_counts': Counter(topics),
-            'revisit_patterns': self._calculate_topic_revisits(df, topics),
-            'avg_time_per_topic': self._calculate_avg_time_per_topic(df, topics)
-        }
-        return topic_stats
-    def _analyze_question_patterns(self, df: pd.DataFrame) -> Dict[str, Any]:
-        """Analyze question patterns and identify difficult topics."""
-        questions = df[df['prompt'].str.lower().str.split().apply(
-            lambda x: any(word.lower() in self.question_words for word in x)
-        )]
-        question_stats = {
-            'total_questions': len(questions),
-            'question_types': self._categorize_questions(questions),
-            'complex_chains': self._identify_complex_chains(df)
-        }
-        return question_stats
-    def _analyze_sentiment(self, df: pd.DataFrame) -> Dict[str, Any]:
-        """Perform sentiment analysis on messages."""
-        sentiments = []
-        for prompt in df['prompt']:
-            try:
-                sentiment = self.sentiment_analyzer(prompt)[0]
-                sentiments.append(sentiment['label'])
-            except Exception as e:
-                logger.warning(f"Error in sentiment analysis: {str(e)}")
-                sentiments.append('NEUTRAL')
-        sentiment_stats = {
-            'overall_sentiment': Counter(sentiments),
-            'frustration_indicators': self._identify_frustration(df),
-            'resolution_times': self._calculate_resolution_times(df)
-        }
-        return sentiment_stats
-    def _analyze_completion_trends(self, df: pd.DataFrame) -> Dict[str, Any]:
-        """Analyze topic completion trends and coverage."""
-        completion_stats = {
-            'completion_rates': self._calculate_completion_rates(df),
-            'time_distribution': self._analyze_time_distribution(df),
-            'coverage_gaps': self._identify_coverage_gaps(df)
-        }
-        return completion_stats
-    def _cluster_students(self, df: pd.DataFrame) -> Dict[str, Any]:
-        """Cluster students based on interaction patterns."""
-        student_metrics = self._calculate_student_metrics(df)
-        clusters = {
-            'confident': [],
-            'engaged': [],
-            'struggling': []
-        }
-        for student_id, metrics in student_metrics.items():
-            cluster = self._determine_student_cluster(metrics)
-            clusters[cluster].append(student_id)
-        return clusters
-    def _analyze_abandoned_conversations(self, df: pd.DataFrame) -> Dict[str, Any]:
-        """Identify and analyze abandoned conversations."""
-        abandoned_stats = {
-            'abandon_points': self._identify_abandon_points(df),
-            'incomplete_topics': self._identify_incomplete_topics(df),
-            'dropout_patterns': self._analyze_dropout_patterns(df)
-        }
-        return abandoned_stats
-    def _identify_incomplete_topics(self, df: pd.DataFrame) -> List[Dict[str, Any]]:
-        """
-        Identify topics that were started but not completed by students.
-        Args:
-            df: DataFrame containing messages
-        Returns:
-            List of dictionaries containing incomplete topic information
-        """
-        incomplete_topics = []
-        topics = self._extract_topics(df)
-        for topic in set(topics):
-            escaped_topic = re.escape(topic)
-            topic_msgs = df[df['prompt'].str.contains(escaped_topic, case=False)]
-            if len(topic_msgs) > 0:
-                # Check for completion indicators
-                last_msgs = topic_msgs.tail(3)  # Look at last 3 messages
-                # Consider a topic incomplete if:
-                # 1. There are unanswered questions
-                # 2. Contains frustration indicators
-                # 3. No positive confirmation/understanding indicators
-                has_questions = last_msgs['prompt'].str.contains('|'.join(self.question_words), case=False).any()
-                has_frustration = bool(self._identify_frustration(last_msgs))
-                completion_indicators = ['understand', 'got it', 'makes sense', 'thank you', 'clear now']
-                has_completion = last_msgs['prompt'].str.contains('|'.join(completion_indicators), case=False).any()
-                if (has_questions or has_frustration) and not has_completion:
-                    incomplete_topics.append({
-                        'topic': topic,
-                        'last_interaction': topic_msgs.iloc[-1]['timestamp'],
-                        'message_count': len(topic_msgs),
-                        'has_pending_questions': has_questions,
-                        'shows_frustration': has_frustration
-                    })
-        return incomplete_topics
-    def _analyze_dropout_patterns(self, df: pd.DataFrame) -> Dict[str, Any]:
-        """
-        Analyze patterns in where and why students tend to drop out of conversations.
-        Args:
-            df: DataFrame containing messages
-        Returns:
-            Dictionary containing dropout pattern analysis
-        """
-        dropout_analysis = {
-            'timing_patterns': {},
-            'topic_patterns': {},
-            'complexity_indicators': {},
-            'engagement_metrics': {}
-        }
-        # Analyze timing of dropouts
-        timestamps = pd.to_datetime(df['timestamp'])
-        time_gaps = timestamps.diff()
-        dropout_points = time_gaps[time_gaps > pd.Timedelta(minutes=30)].index
-        for point in dropout_points:
-            # Get context before dropout
-            context_msgs = df.loc[max(0, point-5):point]
-            # Analyze timing
-            time_of_day = timestamps[point].hour
-            dropout_analysis['timing_patterns'][time_of_day] = \
-                dropout_analysis['timing_patterns'].get(time_of_day, 0) + 1
-            # Analyze topics at dropout points
-            dropout_topics = self._extract_topics(context_msgs)
-            for topic in dropout_topics:
-                dropout_analysis['topic_patterns'][topic] = \
-                    dropout_analysis['topic_patterns'].get(topic, 0) + 1
-            # Analyze complexity
-            msg_lengths = context_msgs['prompt'].str.len().mean()
-            question_density = len(context_msgs[context_msgs['prompt'].str.contains(
-                '|'.join(self.question_words), case=False)]) / len(context_msgs)
-            dropout_analysis['complexity_indicators'][point] = {
-                'message_length': msg_lengths,
-                'question_density': question_density
-            }
-            # Analyze engagement
-            dropout_analysis['engagement_metrics'][point] = {
-                'messages_before_dropout': len(context_msgs),
-                'response_times': time_gaps[max(0, point-5):point].mean().total_seconds() / 60
-            }
-        return dropout_analysis
-    def _rank_topics_by_difficulty(self, analytics_results: Dict[str, Any]) -> List[Dict[str, Any]]:
-        """
-        Rank topics by their difficulty based on various metrics from analytics results.
-        Args:
-            analytics_results: Dictionary containing all analytics data
-        Returns:
-            List of dictionaries containing topic difficulty rankings and scores
-        """
-        topic_difficulty = []
-        # Extract relevant metrics for each topic
-        topics = set()
-        for topic in analytics_results['topic_interaction']['interaction_counts'].keys():
-            # Calculate difficulty score based on multiple factors
-            difficulty_score = 0
-            # Factor 1: Question frequency
-            question_count = sum(1 for chain in analytics_results['question_patterns']['complex_chains']
-                            if chain['topic'] == topic)
-            difficulty_score += question_count * 0.3
-            # Factor 2: Frustration indicators
-            frustration_count = sum(1 for indicator in analytics_results['sentiment_analysis']['frustration_indicators']
-                                if topic.lower() in indicator.lower())
-            difficulty_score += frustration_count * 0.25
-            # Factor 3: Completion rate (inverse relationship)
-            completion_rate = analytics_results['completion_trends']['completion_rates'].get(topic, 1.0)
-            difficulty_score += (1 - completion_rate) * 0.25
-            # Factor 4: Time spent (normalized)
-            avg_time = analytics_results['topic_interaction']['avg_time_per_topic'].get(topic, 0)
-            max_time = max(analytics_results['topic_interaction']['avg_time_per_topic'].values())
-            normalized_time = avg_time / max_time if max_time > 0 else 0
-            difficulty_score += normalized_time * 0.2
-            topic_difficulty.append({
-                'topic': topic,
-                'difficulty_score': round(difficulty_score, 2),
-                'metrics': {
-                    'question_frequency': question_count,
-                    'frustration_indicators': frustration_count,
-                    'completion_rate': completion_rate,
-                    'avg_time_spent': avg_time
-                }
-            })
-        # Sort topics by difficulty score
-        return sorted(topic_difficulty, key=lambda x: x['difficulty_score'], reverse=True)
-    def _identify_support_needs(self, analytics_results: Dict[str, Any]) -> Dict[str, List[Dict[str, Any]]]:
-        """
-        Identify specific support needs for students based on analytics results.
-        Args:
-            analytics_results: Dictionary containing all analytics data
-        Returns:
-            Dictionary containing support needs categorized by urgency
-        """
-        support_needs = {
-            'immediate_attention': [],
-            'monitoring_needed': [],
-            'general_support': []
-        }
-        # Analyze struggling students
-        for student_id in analytics_results['student_clustering']['struggling']:
-            # Get student-specific metrics
-            student_msgs = analytics_results.get('sentiment_analysis', {}).get('messages', [])
-            frustration_topics = [topic for topic in analytics_results['sentiment_analysis']['frustration_indicators']
-                                if any(msg['user_id'] == student_id for msg in student_msgs)]
-            # Calculate engagement metrics
-            engagement_level = len([chain for chain in analytics_results['question_patterns']['complex_chains']
-                                if any(msg['user_id'] == student_id for msg in chain['messages'])])
-            # Identify immediate attention needs
-            if len(frustration_topics) >= 3 or engagement_level < 2:
-                support_needs['immediate_attention'].append({
-                    'student_id': student_id,
-                    'issues': frustration_topics,
-                    'engagement_level': engagement_level,
-                    'recommended_actions': [
-                        'Schedule one-on-one session',
-                        'Review difficult topics',
-                        'Provide additional resources'
-                    ]
-                })
-            # Identify monitoring needs
-            elif len(frustration_topics) >= 1 or engagement_level < 4:
-                support_needs['monitoring_needed'].append({
-                    'student_id': student_id,
-                    'areas_of_concern': frustration_topics,
-                    'engagement_level': engagement_level,
-                    'recommended_actions': [
-                        'Regular progress checks',
-                        'Provide supplementary materials'
-                    ]
-                })
-            # General support needs
-            else:
-                support_needs['general_support'].append({
-                    'student_id': student_id,
-                    'areas_for_improvement': frustration_topics,
-                    'engagement_level': engagement_level,
-                    'recommended_actions': [
-                        'Maintain regular communication',
-                        'Encourage participation'
-                    ]
-                })
-        return support_needs
-    def _extract_topics(self, df: pd.DataFrame) -> List[str]:
-        """Extract topics from messages using spaCy."""
-        topics = []
-        for doc in self.nlp.pipe(df['prompt']):
-            # Extract noun phrases as potential topics
-            noun_phrases = [chunk.text for chunk in doc.noun_chunks]
-            topics.extend(noun_phrases)
-        return topics
-    def _calculate_topic_revisits(self, df: pd.DataFrame, topics: List[str]) -> Dict[str, int]:
-        """Calculate how often topics are revisited."""
-        topic_visits = Counter(topics)
-        return {topic: count for topic, count in topic_visits.items() if count > 1}
-    def _calculate_avg_time_per_topic(self, df: pd.DataFrame, topics: List[str]) -> Dict[str, float]:
-        """Calculate average time spent per topic."""
-        topic_times = {}
-        for topic in set(topics):
-            escaped_topic = re.escape(topic)
-            topic_msgs = df[df['prompt'].str.contains(escaped_topic, case=False)]
-            if len(topic_msgs) > 1:
-                time_diffs = pd.to_datetime(topic_msgs['timestamp']).diff()
-                avg_time = time_diffs.mean().total_seconds() / 60  # Convert to minutes
-                topic_times[topic] = avg_time
-        return topic_times
-    def _identify_complex_chains(self, df: pd.DataFrame) -> List[Dict[str, Any]]:
-        """Identify complex conversation chains."""
-        chains = []
-        current_chain = []
-        for idx, row in df.iterrows():
-            if self._is_followup_question(row['prompt']):
-                current_chain.append(row)
-            else:
-                if len(current_chain) >= 3:  # Consider 3+ related questions as complex chain
-                    chains.append({
-                        'messages': current_chain,
-                        'topic': self._extract_topics([current_chain[0]['prompt']])[0],
-                        'length': len(current_chain)
-                    })
-                current_chain = []
-        return chains
-    def _generate_topic_priority_list(self, analytics_results: Dict[str, Any]) -> List[Dict[str, Any]]:
-        """
-        Generate a prioritized list of topics for the upcoming session.
-        Args:
-            analytics_results: Dictionary containing all analytics data
-        Returns:
-            List of dictionaries containing topics and their priority scores
-        """
-        topic_priorities = []
-        # Get difficulty rankings
-        difficulty_ranking = self._rank_topics_by_difficulty(analytics_results)
-        for topic_data in difficulty_ranking:
-            topic = topic_data['topic']
-            # Calculate priority score based on multiple factors
-            priority_score = 0
-            # Factor 1: Difficulty score (40% weight)
-            priority_score += topic_data['difficulty_score'] * 0.4
-            # Factor 2: Student frustration (25% weight)
-            frustration_count = sum(1 for indicator in
-                                  analytics_results['sentiment_analysis']['frustration_indicators']
-                                  if topic.lower() in indicator.lower())
-            normalized_frustration = min(frustration_count / 5, 1)  # Cap at 5 frustrations
-            priority_score += normalized_frustration * 0.25
-            # Factor 3: Incomplete understanding (20% weight)
-            incomplete_topics = analytics_results.get('abandoned_conversations', {}).get('incomplete_topics', [])
-            if any(t['topic'] == topic for t in incomplete_topics):
-                priority_score += 0.2
-            # Factor 4: Coverage gaps (15% weight)
-            if topic in analytics_results['completion_trends']['coverage_gaps']:
-                priority_score += 0.15
-            topic_priorities.append({
-                'topic': topic,
-                'priority_score': round(priority_score, 2),
-                'reasons': {
-                    'difficulty_level': topic_data['difficulty_score'],
-                    'frustration_indicators': frustration_count,
-                    'has_incomplete_understanding': any(t['topic'] == topic for t in incomplete_topics),
-                    'has_coverage_gaps': topic in analytics_results['completion_trends']['coverage_gaps']
-                },
-                'recommended_focus_areas': self._generate_focus_recommendations(topic_data, analytics_results)
-            })
-        # Sort by priority score
-        return sorted(topic_priorities, key=lambda x: x['priority_score'], reverse=True)
-    def _generate_focus_recommendations(self, topic_data: Dict[str, Any],
-                                     analytics_results: Dict[str, Any]) -> List[str]:
-        """Generate specific focus recommendations for a topic."""
-        recommendations = []
-        if topic_data['metrics']['question_frequency'] > 3:
-            recommendations.append("Provide more detailed explanations and examples")
-        if topic_data['metrics']['completion_rate'] < 0.7:
-            recommendations.append("Break down complex concepts into smaller segments")
-        if topic_data['metrics']['frustration_indicators'] > 2:
-            recommendations.append("Review prerequisite concepts and provide additional context")
-        return recommendations
-    def _is_followup_question(self, prompt: str) -> bool:
-        """Determine if a prompt is a follow-up question."""
-        followup_indicators = {'also', 'then', 'additionally', 'furthermore', 'related to that'}
-        return any(indicator in prompt.lower() for indicator in followup_indicators)
-    def generate_faculty_report(self, analytics_results: Dict[str, Any]) -> Dict[str, Any]:
-        """Generate a comprehensive report for faculty."""
-        report = {
-            'key_findings': self._generate_key_findings(analytics_results),
-            'recommended_actions': self._generate_recommendations(analytics_results),
-            'topic_difficulty_ranking': self._rank_topics_by_difficulty(analytics_results),
-            'student_support_needs': self._identify_support_needs(analytics_results),
-            'topic_priorities': self._generate_topic_priority_list(analytics_results)
-        }
-        return report
-    def _generate_key_findings(self, analytics_results: Dict[str, Any]) -> List[str]:
-        """Generate key findings from analytics results."""
-        findings = []
-        # Analyze topic interaction patterns
-        topic_stats = analytics_results['topic_interaction']
-        low_interaction_topics = [topic for topic, count in topic_stats['interaction_counts'].items()
-                                if count < 3]  # Arbitrary threshold
-        if low_interaction_topics:
-            findings.append(f"Low engagement detected in topics: {', '.join(low_interaction_topics)}")
-        # Analyze sentiment patterns
-        sentiment_stats = analytics_results['sentiment_analysis']
-        if sentiment_stats['frustration_indicators']:
-            findings.append("Significant frustration detected in the following areas: " +
-                          ', '.join(sentiment_stats['frustration_indicators']))
-        # Analyze student clustering
-        student_clusters = analytics_results['student_clustering']
-        if len(student_clusters['struggling']) > 0:
-            findings.append(f"{len(student_clusters['struggling'])} students showing signs of difficulty")
-        return findings
-    def _generate_recommendations(self, analytics_results: Dict[str, Any]) -> List[str]:
-        """Generate actionable recommendations for faculty."""
-        recommendations = []
-        # Analyze complex chains
-        question_patterns = analytics_results['question_patterns']
-        if question_patterns['complex_chains']:
-            topics_needing_clarity = set(chain['topic'] for chain in question_patterns['complex_chains'])
-            recommendations.append(f"Consider providing additional examples for: {', '.join(topics_needing_clarity)}")
-        # Analyze completion trends
-        completion_trends = analytics_results['completion_trends']
-        low_completion_topics = [topic for topic, rate in completion_trends['completion_rates'].items()
-                               if rate < 0.7]  # 70% threshold
-        if low_completion_topics:
-            recommendations.append(f"Review and possibly simplify material for: {', '.join(low_completion_topics)}")
-        return recommendations
-# Example usage
-if __name__ == "__main__":
-    # Initialize analytics engine
-    analytics_engine = NovaScholarAnalytics()
-    # Sample usage with dummy data
-    sample_chat_history = [
-        {
-            "user_id": "123",
-            "session_id": "S101",
-            "messages": [
-                {
-                    "prompt": "What is DevOps?",
-                    "response": "DevOps is a software engineering practice...",
-                    "timestamp": datetime.now()
-                }
-            ]
-        }
-    ]
-    # Process analytics
-    #results = analytics_engine.process_chat_history(all_chat_histories)
-    # Generate faculty report
-    #faculty_report = analytics_engine.generate_faculty_report(results)
-    #print(faculty_report)
-    # Print results
-    # logger.info("Analytics processing completed")
-    # logger.info(f"Key findings: {faculty_report['key_findings']}")
-    # logger.info(f"Recommendations: {faculty_report['recommended_actions']}")

pre_class_analytics2.py ADDED Viewed

	@@ -0,0 +1,677 @@

+import json
+import typing_extensions as typing
+import google.generativeai as genai
+from typing import List, Dict, Any
+import numpy as np
+from collections import defaultdict
+from dotenv import load_dotenv
+import os
+import pymongo
+from pymongo import MongoClient
+load_dotenv()
+GEMINI_API_KEY = os.getenv('GEMINI_KEY')
+class EngagementMetrics(typing.TypedDict):
+    participation_level: str  # "high" | "medium" | "low"
+    question_quality: str     # "advanced" | "intermediate" | "basic"
+    concept_understanding: str  # "strong" | "moderate" | "needs_improvement"
+class StudentInsight(typing.TypedDict):
+    student_id: str
+    performance_level: str  # "high_performer" | "average" | "at_risk"
+    struggling_topics: list[str]
+    engagement_metrics: EngagementMetrics
+class TopicInsight(typing.TypedDict):
+    topic: str
+    difficulty_level: float  # 0 to 1
+    student_count: int
+    common_issues: list[str]
+    key_misconceptions: list[str]
+class RecommendedAction(typing.TypedDict):
+    action: str
+    priority: str  # "high" | "medium" | "low"
+    target_group: str  # "all_students" | "specific_students" | "faculty"
+    reasoning: str
+    expected_impact: str
+class ClassDistribution(typing.TypedDict):
+    high_performers: float
+    average_performers: float
+    at_risk: float
+class CourseHealth(typing.TypedDict):
+    overall_engagement: float  # 0 to 1
+    critical_topics: list[str]
+    class_distribution: ClassDistribution
+class InterventionMetrics(typing.TypedDict):
+    immediate_attention_needed: list[str]  # student_ids
+    monitoring_required: list[str]  # student_ids
+class AnalyticsResponse(typing.TypedDict):
+    topic_insights: list[TopicInsight]
+    student_insights: list[StudentInsight]
+    recommended_actions: list[RecommendedAction]
+    course_health: CourseHealth
+    intervention_metrics: InterventionMetrics
+class NovaScholarAnalytics:
+    def __init__(self, model_name: str = "gemini-1.5-flash"):
+        genai.configure(api_key=GEMINI_API_KEY)
+        self.model = genai.GenerativeModel(model_name)
+    def _create_analytics_prompt(self, chat_histories: List[Dict], all_topics: List[str]) -> str:
+        """Creates a structured prompt for Gemini to analyze chat histories."""
+        # Prompt 1:
+        # return f"""Analyze these student chat histories for a university course and provide detailed analytics.
+        # Context:
+        # - These are pre-class chat interactions between students and an AI tutor
+        # - Topics covered: {', '.join(all_topics)}
+        # Chat histories: {json.dumps(chat_histories, indent=2)}
+        # Return the analysis in JSON format matching this exact schema:
+        # {AnalyticsResponse.__annotations__}
+        # Ensure all numeric values are between 0 and 1 (accuracy upto 3 decimal places) where applicable.
+        # Important analysis guidelines:
+        # 1. Identify topics where students show confusion or ask multiple follow-up questions
+        # 2. Look for patterns in question types and complexity
+        # 3. Analyze response understanding based on follow-up questions
+        # 4. Consider both explicit and implicit signs of difficulty
+        # 5. Focus on concept relationships and prerequisite understanding"""
+        # Prompt 2:
+        # return f"""Analyze the provided student chat histories for a university course and generate concise, actionable analytics.
+        # Context:
+        # - Chat histories: {json.dumps(chat_histories, indent=2)}
+        # - These are pre-class interactions between students and an AI tutor aimed at identifying learning difficulties and improving course delivery.
+        # - Topics covered: {', '.join(all_topics)}.
+        # Your task is to extract key insights that will help faculty address challenges effectively and enhance learning outcomes.
+        # Output Format:
+        # 1. Topics where students face significant difficulties:
+        # - Provide a ranked list of topics where the majority of students are struggling, based on the frequency and nature of their questions or misconceptions.
+        # - Include the percentage of students who found each topic challenging.
+        # 2. AI-recommended actions for faculty:
+        # - Suggest actionable steps to address the difficulties identified in each critical topic.
+        # - Specify the priority of each action (high, medium, low) based on the urgency and impact.
+        # - Explain the reasoning behind each recommendation and its expected impact on student outcomes.
+        # 3. Student-specific analytics (focusing on at-risk students):
+        # - Identify students categorized as "at-risk" based on their engagement levels, question complexity, and recurring struggles.
+        # - For each at-risk student, list their top 3 struggling topics and their engagement metrics (participation level, concept understanding).
+        # - Provide personalized recommendations for improving their understanding.
+        # Guidelines for Analysis:
+        # - Focus on actionable and concise insights rather than exhaustive details.
+        # - Use both explicit (e.g., direct questions) and implicit (e.g., repeated follow-ups) cues to identify areas of difficulty.
+        # - Prioritize topics with higher difficulty scores or more students struggling.
+        # - Ensure numerical values (e.g., difficulty levels, percentages) are between 0 and 1 where applicable.
+        # The response must be well-structured, concise, and highly actionable for faculty to implement improvements effectively."""
+        # Prompt 3:
+        return f"""Analyze the provided student chat histories for a university course and generate concise, actionable analytics.
+        Context:
+        - Chat histories: {json.dumps(chat_histories, indent=2)}
+        - These are pre-class interactions between students and an AI tutor aimed at identifying learning difficulties and improving course delivery.
+        - Topics covered: {', '.join(all_topics)}.
+        Your task is to provide detailed analytics that will help faculty address challenges effectively and enhance learning outcomes.
+        Output Format (strictly follow this JSON structure):
+        {{
+        "topic_wise_insights": [
+            {{
+            "topic": "<string>",
+            "struggling_percentage": <number between 0 and 1>,
+            "key_issues": ["<string>", "<string>", ...],
+            "key_misconceptions": ["<string>", "<string>", ...],
+            "recommended_actions": {{
+                "description": "<string>",
+                "priority": "high|medium|low",
+                "expected_outcome": "<string>"
+            }}
+            }}
+        ],
+        "ai_recommended_actions": [
+        {{
+            "action": "<string>",
+            "priority": "high|medium|low",
+            "reasoning": "<string>",
+            "expected_outcome": "<string>",
+            "pedagogy_recommendations": {{
+                "methods": ["<string>", "<string>", ...],
+                "resources": ["<string>", "<string>", ...],
+                "expected_impact": "<string>"
+            }}
+        }}
+        ],
+        "student_analytics": [
+            {{
+            "student_id": "<string>",
+            "engagement_metrics": {{
+                "participation_level": <number between 0 and 1>,
+                "concept_understanding": "strong|moderate|needs_improvement",
+                "question_quality": "advanced|intermediate|basic"
+            }},
+            "struggling_topics": ["<string>", "<string>", ...],
+            "personalized_recommendation": "<string>"
+            }}
+        ]
+        }}
+        Guidelines for Analysis:
+        - Focus on actionable and concise insights rather than exhaustive details.
+        - Use both explicit (e.g., direct questions) and implicit (e.g., repeated follow-ups) cues to identify areas of difficulty.
+        - Prioritize topics with higher difficulty scores or more students struggling.
+        - Ensure numerical values (e.g., difficulty levels, percentages) are between 0 and 1 where applicable.
+        - Make sure to include All** students in the analysis, not just a subset.
+        - for the ai_recommended_actions:
+            - Prioritize pedagogy recommendations for critical topics with the high difficulty scores or struggling percentages.
+            - For each action:
+                - Include specific teaching methods (e.g., interactive discussions or quizzes, problem-based learning, practical examples etc).
+                - Recommend supporting resources (e.g., videos, handouts, simulations).
+                - Provide reasoning for the recommendation and the expected outcomes for student learning.
+                - Example:
+                - **Action:** Conduct an interactive problem-solving session on "<Topic Name>".
+                - **Reasoning:** Students showed difficulty in applying concepts to practical problems.
+                - **Expected Outcome:** Improved practical understanding and application of the topic.
+                - **Pedagogy Recommendations:**
+                    - **Methods:** Group discussions, real-world case studies.
+                    - **Resources:** Online interactive tools, relevant case studies, video walkthroughs.
+                    - **Expected Impact:** Enhance conceptual clarity by 40% and practical application by 30%.
+        The response must adhere strictly to the above JSON structure, with all fields populated appropriately."""
+    def _calculate_class_distribution(self, analytics: Dict) -> Dict:
+        """Calculate the distribution of students across performance levels."""
+        try:
+            total_students = len(analytics.get("student_insights", []))
+            if total_students == 0:
+                return {
+                    "high_performers": 0,
+                    "average_performers": 0,
+                    "at_risk": 0
+                }
+            distribution = defaultdict(int)
+            for student in analytics.get("student_insights", []):
+                performance_level = student.get("performance_level", "average")
+                # Map performance levels to our three categories
+                if performance_level in ["excellent", "high", "high_performer"]:
+                    distribution["high_performers"] += 1
+                elif performance_level in ["struggling", "low", "at_risk"]:
+                    distribution["at_risk"] += 1
+                else:
+                    distribution["average_performers"] += 1
+            # Convert to percentages
+            return {
+                level: count/total_students
+                for level, count in distribution.items()
+            }
+        except Exception as e:
+            print(f"Error calculating class distribution: {str(e)}")
+            return {
+                "high_performers": 0,
+                "average_performers": 0,
+                "at_risk": 0
+            }
+    def _identify_urgent_cases(self, analytics: Dict) -> List[str]:
+        """Identify students needing immediate attention."""
+        try:
+            urgent_cases = []
+            for student in analytics.get("student_insights", []):
+                student_id = student.get("student_id")
+                if not student_id:
+                    continue
+                # Check multiple risk factors
+                risk_factors = 0
+                # Factor 1: Performance level
+                if student.get("performance_level") in ["struggling", "at_risk", "low"]:
+                    risk_factors += 1
+                # Factor 2: Number of struggling topics
+                if len(student.get("struggling_topics", [])) >= 2:
+                    risk_factors += 1
+                # Factor 3: Engagement metrics
+                engagement = student.get("engagement_metrics", {})
+                if (engagement.get("participation_level") == "low" or
+                    engagement.get("concept_understanding") == "needs_improvement"):
+                    risk_factors += 1
+                # If student has multiple risk factors, add to urgent cases
+                if risk_factors >= 2:
+                    urgent_cases.append(student_id)
+            return urgent_cases
+        except Exception as e:
+            print(f"Error identifying urgent cases: {str(e)}")
+            return []
+    def _identify_monitoring_cases(self, analytics: Dict) -> List[str]:
+        """Identify students who need monitoring but aren't urgent cases."""
+        try:
+            monitoring_cases = []
+            urgent_cases = set(self._identify_urgent_cases(analytics))
+            for student in analytics.get("student_insights", []):
+                student_id = student.get("student_id")
+                if not student_id or student_id in urgent_cases:
+                    continue
+                # Check monitoring criteria
+                monitoring_needed = False
+                # Criterion 1: Has some struggling topics but not enough for urgent
+                if len(student.get("struggling_topics", [])) == 1:
+                    monitoring_needed = True
+                # Criterion 2: Medium-low engagement
+                engagement = student.get("engagement_metrics", {})
+                if engagement.get("participation_level") == "medium":
+                    monitoring_needed = True
+                # Criterion 3: Recent performance decline
+                if student.get("performance_level") == "average":
+                    monitoring_needed = True
+                if monitoring_needed:
+                    monitoring_cases.append(student_id)
+            return monitoring_cases
+        except Exception as e:
+            print(f"Error identifying monitoring cases: {str(e)}")
+            return []
+    def _identify_critical_topics(self, analytics: Dict) -> List[str]:
+        """
+        Identify critical topics that need attention based on multiple factors.
+        Returns a list of topic names that are considered critical.
+        """
+        try:
+            critical_topics = []
+            topics = analytics.get("topic_insights", [])
+            for topic in topics:
+                if not isinstance(topic, dict):
+                    continue
+                # Initialize score for topic criticality
+                critical_score = 0
+                # Factor 1: High difficulty level
+                difficulty_level = topic.get("difficulty_level", 0)
+                if difficulty_level > 0.7:
+                    critical_score += 2
+                elif difficulty_level > 0.5:
+                    critical_score += 1
+                # Factor 2: Number of students struggling
+                student_count = topic.get("student_count", 0)
+                total_students = len(analytics.get("student_insights", []))
+                if total_students > 0:
+                    struggle_ratio = student_count / total_students
+                    if struggle_ratio > 0.5:
+                        critical_score += 2
+                    elif struggle_ratio > 0.3:
+                        critical_score += 1
+                # Factor 3: Number of common issues
+                if len(topic.get("common_issues", [])) > 2:
+                    critical_score += 1
+                # Factor 4: Number of key misconceptions
+                if len(topic.get("key_misconceptions", [])) > 1:
+                    critical_score += 1
+                # If topic exceeds threshold, mark as critical
+                if critical_score >= 3:
+                    critical_topics.append(topic.get("topic", "Unknown Topic"))
+            return critical_topics
+        except Exception as e:
+            print(f"Error identifying critical topics: {str(e)}")
+            return []
+    def _calculate_engagement(self, analytics: Dict) -> Dict:
+        """
+        Calculate detailed engagement metrics across all students.
+        Returns a dictionary with engagement statistics.
+        """
+        try:
+            total_students = len(analytics.get("student_insights", []))
+            if total_students == 0:
+                return {
+                    "total_students": 0,
+                    "overall_score": 0,
+                    "engagement_distribution": {
+                        "high": 0,
+                        "medium": 0,
+                        "low": 0
+                    },
+                    "participation_metrics": {
+                        "average_topics_per_student": 0,
+                        "active_participants": 0
+                    }
+                }
+            engagement_levels = defaultdict(int)
+            total_topics_engaged = 0
+            active_participants = 0
+            for student in analytics.get("student_insights", []):
+                # Get engagement metrics
+                metrics = student.get("engagement_metrics", {})
+                # Calculate participation level
+                participation = metrics.get("participation_level", "low").lower()
+                engagement_levels[participation] += 1
+                # Count topics student is engaged with
+                topics_count = len(student.get("struggling_topics", []))
+                total_topics_engaged += topics_count
+                # Count active participants (students engaging with any topics)
+                if topics_count > 0:
+                    active_participants += 1
+            # Calculate overall engagement score (0-1)
+            weighted_score = (
+                (engagement_levels["high"] * 1.0 +
+                engagement_levels["medium"] * 0.6 +
+                engagement_levels["low"] * 0.2) / total_students
+            )
+            return {
+                "total_students": total_students,
+                "overall_score": round(weighted_score, 2),
+                "engagement_distribution": {
+                    level: count/total_students
+                    for level, count in engagement_levels.items()
+                },
+                "participation_metrics": {
+                    "average_topics_per_student": round(total_topics_engaged / total_students, 2),
+                    "active_participants_ratio": round(active_participants / total_students, 2)
+                }
+            }
+        except Exception as e:
+            print(f"Error calculating engagement: {str(e)}")
+            return {
+                "total_students": 0,
+                "overall_score": 0,
+                "engagement_distribution": {
+                    "high": 0,
+                    "medium": 0,
+                    "low": 0
+                },
+                "participation_metrics": {
+                    "average_topics_per_student": 0,
+                    "active_participants_ratio": 0
+                }
+            }
+    def _process_gemini_response(self, response: str) -> Dict:
+        """Process and validate Gemini's response."""
+        # try:
+        #     analytics = json.loads(response)
+        #     return self._enrich_analytics(analytics)
+        # except json.JSONDecodeError as e:
+        #     print(f"Error decoding Gemini response: {e}")
+        #     return self._fallback_analytics()
+        try:
+            # Parse JSON response
+            analytics = json.loads(response)
+            # Validate required fields exist
+            required_fields = {
+                "topic_insights": [],
+                "student_insights": [],
+                "recommended_actions": []
+            }
+            # Ensure all required fields exist with default values
+            for field, default_value in required_fields.items():
+                if field not in analytics or not analytics[field]:
+                    analytics[field] = default_value
+            # Now enrich the validated analytics
+            return self._enrich_analytics(analytics)
+        except (json.JSONDecodeError, KeyError, TypeError) as e:
+            print(f"Error processing Gemini response: {str(e)}")
+            print(f"Raw response: {response}")
+            return self._fallback_analytics()
+    def _enrich_analytics(self, analytics: Dict) -> Dict:
+        """Add derived insights and metrics to the analytics."""
+        # Add overall course health metrics
+        analytics["course_health"] = {
+            "overall_engagement": self._calculate_engagement(analytics),
+            "critical_topics": self._identify_critical_topics(analytics),
+            "class_distribution": self._calculate_class_distribution(analytics)
+        }
+        # Add intervention urgency scores
+        analytics["intervention_metrics"] = {
+            "immediate_attention_needed": self._identify_urgent_cases(analytics),
+            "monitoring_required": self._identify_monitoring_cases(analytics)
+        }
+        return analytics
+    def _calculate_engagement(self, analytics: Dict) -> Dict:
+        # """Calculate overall engagement metrics."""
+        # total_students = len(analytics["student_insights"])
+        # engagement_levels = defaultdict(int)
+        # for student in analytics["student_insights"]:
+        #     engagement_levels[student["engagement_metrics"]["participation_level"]] += 1
+        # return {
+        #     "total_students": total_students,
+        #     "engagement_distribution": {
+        #         level: count/total_students
+        #         for level, count in engagement_levels.items()
+        #     }
+        # }
+        """Calculate overall engagement metrics with defensive programming."""
+        try:
+            total_students = len(analytics.get("student_insights", []))
+            if total_students == 0:
+                return {
+                    "total_students": 0,
+                    "engagement_distribution": {
+                        "high": 0,
+                        "medium": 0,
+                        "low": 0
+                    }
+                }
+            engagement_levels = defaultdict(int)
+            for student in analytics.get("student_insights", []):
+                metrics = student.get("engagement_metrics", {})
+                level = metrics.get("participation_level", "low")
+                engagement_levels[level] += 1
+            return {
+                "total_students": total_students,
+                "engagement_distribution": {
+                    level: count/total_students
+                    for level, count in engagement_levels.items()
+                }
+            }
+        except Exception as e:
+            print(f"Error calculating engagement: {str(e)}")
+            return {
+                "total_students": 0,
+                "engagement_distribution": {
+                    "high": 0,
+                    "medium": 0,
+                    "low": 0
+                }
+            }
+    def _identify_critical_topics(self, analytics: Dict) -> List[Dict]:
+        # """Identify topics needing immediate attention."""
+        # return [
+        #     topic for topic in analytics["topic_insights"]
+        #     if topic["difficulty_level"] > 0.7 or
+        #     len(topic["common_issues"]) > 2
+        # ]
+        """Identify topics needing immediate attention with defensive programming."""
+        try:
+            return [
+                topic for topic in analytics.get("topic_insights", [])
+                if topic.get("difficulty_level", 0) > 0.7 or
+                len(topic.get("common_issues", [])) > 2
+            ]
+        except Exception as e:
+            print(f"Error identifying critical topics: {str(e)}")
+            return []
+    def generate_analytics(self, chat_histories: List[Dict], all_topics: List[str]) -> Dict:
+        # Method 1: (caused key 'student_insights' error):
+        # """Main method to generate analytics from chat histories."""
+        # # Preprocess chat histories
+        # processed_histories = self._preprocess_chat_histories(chat_histories)
+        # # Create and send prompt to Gemini
+        # prompt = self._create_analytics_prompt(processed_histories, all_topics)
+        # response = self.model.generate_content(
+        #     prompt,
+        #     generation_config=genai.GenerationConfig(
+        #         response_mime_type="application/json",
+        #         response_schema=AnalyticsResponse
+        #     )
+        # )
+        # # # Process and enrich analytics
+        # # analytics = self._process_gemini_response(response.text)
+        # # return analytics
+        # # Process, validate, and enrich the response
+        # analytics = self._process_gemini_response(response.text)
+        # # Then cast it to satisfy the type checker
+        # return typing.cast(AnalyticsResponse, analytics)
+        # Method 2 (possible fix):
+        """Main method to generate analytics with better error handling."""
+        try:
+            processed_histories = self._preprocess_chat_histories(chat_histories)
+            prompt = self._create_analytics_prompt(processed_histories, all_topics)
+            response = self.model.generate_content(
+                prompt,
+                generation_config=genai.GenerationConfig(
+                    response_mime_type="application/json"
+                    # response_schema=AnalyticsResponse
+                )
+            )
+            if not response.text:
+                print("Empty response from Gemini")
+                return self._fallback_analytics()
+            # analytics = self._process_gemini_response(response.text)
+            # return typing.cast(AnalyticsResponse, analytics)
+            # return response.text;
+            analytics = json.loads(response.text)
+            return analytics
+        except Exception as e:
+            print(f"Error generating analytics: {str(e)}")
+            return self._fallback_analytics()
+    def _preprocess_chat_histories(self, chat_histories: List[Dict]) -> List[Dict]:
+        """Preprocess chat histories to focus on relevant information."""
+        processed = []
+        for chat in chat_histories:
+            print(str(chat["user_id"]))
+            processed_chat = {
+                "user_id": str(chat["user_id"]),
+                "messages": [
+                    {
+                        "prompt": msg["prompt"],
+                        "response": msg["response"]
+                    }
+                    for msg in chat["messages"]
+                ]
+            }
+            processed.append(processed_chat)
+        return processed
+    def _fallback_analytics(self) -> Dict:
+        # """Provide basic analytics in case of LLM processing failure."""
+        # return {
+        #     "topic_insights": [],
+        #     "student_insights": [],
+        #     "recommended_actions": [
+        #         {
+        #             "action": "Review analytics generation process",
+        #             "priority": "high",
+        #             "target_group": "system_administrators",
+        #             "reasoning": "Analytics generation failed",
+        #             "expected_impact": "Restore analytics functionality"
+        #         }
+        #     ]
+        # }
+        """Provide comprehensive fallback analytics that match our schema."""
+        return {
+            "topic_insights": [],
+            "student_insights": [],
+            "recommended_actions": [
+                {
+                    "action": "Review analytics generation process",
+                    "priority": "high",
+                    "target_group": "system_administrators",
+                    "reasoning": "Analytics generation failed",
+                    "expected_impact": "Restore analytics functionality"
+                }
+            ],
+            "course_health": {
+                "overall_engagement": 0,
+                "critical_topics": [],
+                "class_distribution": {
+                    "high_performers": 0,
+                    "average_performers": 0,
+                    "at_risk": 0
+                }
+            },
+            "intervention_metrics": {
+                "immediate_attention_needed": [],
+                "monitoring_required": []
+            }
+        }
+# if __name__ == "__main__":
+#     # Example usage
+#     analytics_generator = NovaScholarAnalytics()
+#     analytics = analytics_generator.generate_analytics(chat_histories, all_topics)
+#     print(json.dumps(analytics, indent=2))

session_page.py CHANGED Viewed

@@ -16,11 +16,12 @@ import os
 from pymongo import MongoClient
 from gen_mcqs import generate_mcqs, save_quiz, quizzes_collection, get_student_quiz_score, submit_quiz_answers
 from create_course import courses_collection
-from pre_class_analytics import NovaScholarAnalytics
 import openai
 from openai import OpenAI
 from goals2 import GoalAnalyzer
 from openai import OpenAI
 import asyncio
@@ -242,19 +243,37 @@ def display_preclass_content(session, student_id, course_id):
                     # Please provide a clear and concise answer based only on the information provided in the context.
                     # """
                     context_prompt = f"""
-                    You are a highly intelligent and resourceful assistant capable of synthesizing information from the provided context.
                     Context:
                     {context}
                     Instructions:
-                    1. Base your answers primarily on the given context.
-                    2. If the answer to the user's question is not explicitly in the context but can be inferred or synthesized from the information provided, do so thoughtfully.
-                    3. Only use external knowledge or web assistance when:
-                    - The context lacks sufficient information, and
-                    - The question requires knowledge beyond what can be reasonably inferred from the context.
-                    4. Clearly state if you are relying on web assistance for any part of your answer.
                     Question: {prompt}
@@ -1147,6 +1166,88 @@ def get_response_from_llm(raw_data):
         st.error(f"Error generating response: {str(e)}")
         return None
 def get_preclass_analytics(session):
     """Get all user_ids from chat_history collection where session_id matches"""
     user_ids = chat_history_collection.distinct("user_id", {"session_id": session['session_id']})
@@ -1168,120 +1269,380 @@ def get_preclass_analytics(session):
         else:
             st.warning("No chat history found for this session.")
-    # Use the analytics engine
-    analytics_engine = NovaScholarAnalytics()
-    results = analytics_engine.process_chat_history(all_chat_histories)
-    faculty_report = analytics_engine.generate_faculty_report(results)
-    # Pass this Faculty Report to an LLM model for refinements and clarity
-    refined_report = get_response_from_llm(faculty_report)
-    return refined_report
-def display_preclass_analytics2(session, course_id):
-    refined_report = get_preclass_analytics(session)
-    st.subheader("Pre-class Analytics")
-    if refined_report:
-        # Custom CSS to improve the look and feel
-        st.markdown("""
-            <style>
-            .metric-card {
-                background-color: #f8f9fa;
-                border-radius: 10px;
-                padding: 20px;
-                box-shadow: 0 2px 4px rgba(0,0,0,0.1);
-            }
-            .header-text {
-                color: #1f77b4;
-                font-size: 24px;
-                font-weight: bold;
-                margin-bottom: 20px;
-            }
-            .subheader {
-                color: #2c3e50;
-                font-size: 17px;
-                font-weight: 500;
-                margin-bottom: 10px;
-            }
-            .insight-text {
-                color: #34495e;
-                font-size: 16px;
-                line-height: 1.6;
-            }
-            .glossary-card {
-                padding: 15px;
-                margin-top: 40px;
-            }
-            </style>
-        """, unsafe_allow_html=True)
-        # Header
-        # st.markdown("<h1 style='text-align: center; color: #2c3e50;'>Pre-Class Analytics Dashboard</h1>", unsafe_allow_html=True)
-        # st.markdown("<p style='text-align: center; color: #7f8c8d;'>Insights from Student Interactions</p>", unsafe_allow_html=True)
-        # Create three columns for metrics
-        col1, col2, col3 = st.columns(3)
-        with col1:
-            st.markdown("<p class='header-text'>🎯 Low Engagement Topics</p>", unsafe_allow_html=True)
-            # Group topics by category
-            topics = refined_report["Low Engagement Topics"]
-            # categories = defaultdict(list)
-            for i, topic in enumerate(topics):
-                st.markdown(f"{i + 1}.  <p class='subheader'>{topic}</p>", unsafe_allow_html=True)
-            # # Categorize topics (you can modify these categories based on your needs)
-            # for topic in topics:
-            #     if "Data" in topic and ("Type" in topic or "Structure" in topic):
-            #         categories["Data Types"].append(topic)
-            #     elif "Analytics" in topic:
-            #         categories["Analytics Concepts"].append(topic)
-            #     else:
-            #         categories["General Concepts"].append(topic)
-            # Display categorized topics
-            # for category, items in categories.items():
-            #     st.markdown(f"<p class='subheader'>{category}</p>", unsafe_allow_html=True)
-            #     i = 0
-            #     for i, item in items:
-            #         st.markdown(f"{i + 1} {item}", unsafe_allow_html=True)
-        with col2:
-            st.markdown("<p class='header-text'>⚠️ Frustration Areas</p>", unsafe_allow_html=True)
-            for i, area in enumerate(refined_report["Frustration Areas"]):
-                st.markdown(f"{i + 1}.  <p class='subheader'>{area}</p>", unsafe_allow_html=True)
-        with col3:
-            st.markdown("<p class='header-text'>💡 Recommendations</p>", unsafe_allow_html=True)
-            for i, rec in enumerate(refined_report["Recommendations"]):
-                st.markdown(f"{i + 1}.  <p class='subheader'>{rec}</p>", unsafe_allow_html=True)
-        # Glossary section
-        st.markdown("<div class='glossary-card'>", unsafe_allow_html=True)
-        # st.markdown("<h3 style='color: #2c3e50;'>Understanding the Metrics</h3>", unsafe_allow_html=True)
-        explanations = {
-            "Low Engagement Topics": "Topics where students showed minimal interaction or understanding during their chat sessions. These areas may require additional focus during classroom instruction.",
-            "Frustration Areas": "Specific concepts or topics where students expressed difficulty or confusion during their interactions with the chatbot. These areas may need immediate attention or alternative teaching approaches.",
-            "Recommendations": "AI-generated suggestions for improving student engagement and understanding, based on the analyzed chat interactions and identified patterns."
-        }
-        st.subheader("Understanding the Metrics")
-        for metric, explanation in explanations.items():
-            # st.markdown(f"<p class='subheader'>{metric}</p>", unsafe_allow_html=True)
-            # st.markdown(f"<p class='insight-text'>{explanation}</p>", unsafe_allow_html=True)
-            st.markdown(f"<span class='subheader'>**{metric}**</span>:  <span class='subheader'>{explanation}</span>", unsafe_allow_html=True)
-        st.markdown("</div>", unsafe_allow_html=True)
 def display_session_analytics(session, course_id):
     """Display session analytics for faculty"""
     st.header("Session Analytics")
     # Display Pre-class Analytics
-    display_preclass_analytics2(session, course_id)
     # Display In-class Analytics
     display_inclass_analytics(session, course_id)

 from pymongo import MongoClient
 from gen_mcqs import generate_mcqs, save_quiz, quizzes_collection, get_student_quiz_score, submit_quiz_answers
 from create_course import courses_collection
+# from pre_class_analytics import NovaScholarAnalytics
+from pre_class_analytics2 import NovaScholarAnalytics
 import openai
 from openai import OpenAI
+import google.generativeai as genai
 from goals2 import GoalAnalyzer
 from openai import OpenAI
 import asyncio
                     # Please provide a clear and concise answer based only on the information provided in the context.
                     # """
+                    # context_prompt = f"""
+                    # You are a highly intelligent and resourceful assistant capable of synthesizing information from the provided context.
+                    # Context:
+                    # {context}
+                    # Instructions:
+                    # 1. Base your answers primarily on the given context.
+                    # 2. If the answer to the user's question is not explicitly in the context but can be inferred or synthesized from the information provided, do so thoughtfully.
+                    # 3. Only use external knowledge or web assistance when:
+                    # - The context lacks sufficient information, and
+                    # - The question requires knowledge beyond what can be reasonably inferred from the context.
+                    # 4. Clearly state if you are relying on web assistance for any part of your answer.
+                    # 5. Do not respond with a negative. If the answer is not in the context, provide a thoughtful response based on the information available on the web about it.
+                    # Question: {prompt}
+                    # Please provide a clear and comprehensive answer based on the above instructions.
+                    # """
                     context_prompt = f"""
+                    You are a highly intelligent and resourceful assistant capable of synthesizing information from the provided context and external sources.
                     Context:
                     {context}
                     Instructions:
+                    1. Base your answers on the provided context wherever possible.
+                    2. If the answer to the user's question is not explicitly in the context:
+                    - Use external knowledge or web assistance to provide a clear and accurate response.
+                    3. Do not respond negatively. If the answer is not in the context, use web assistance or your knowledge to generate a thoughtful response.
+                    4. Clearly state if part of your response relies on web assistance.
                     Question: {prompt}
         st.error(f"Error generating response: {str(e)}")
         return None
+import typing_extensions as typing
+from typing import Union, List, Dict
+# class Topics(typing.TypedDict):
+#     overarching_theme: List[Dict[str, Union[str, List[Dict[str, Union[str, List[str]]]]]]]
+#     indirect_topics: List[Dict[str, str]]
+def extract_topics_from_materials(session):
+    """Extract topics from pre-class materials"""
+    materials = resources_collection.find({"session_id": session['session_id']})
+    texts = ""
+    if materials:
+        for material in materials:
+            if 'text_content' in material:
+                text = material['text_content']
+                texts += text + "\n"
+            else:
+                st.warning("No text content found in the material.")
+                return
+    else:
+        st.error("No pre-class materials found for this session.")
+        return
+    if texts:
+        context_prompt = f"""
+        Task: Extract Comprehensive Topics in a List Format
+        You are tasked with analyzing the provided text content and extracting a detailed, flat list of topics.
+        Instructions:
+        Identify All Topics: Extract a comprehensive list of all topics, subtopics, and indirect topics present in the provided text content. This list should include:
+        Overarching themes
+        Main topics
+        Subtopics and their sub-subtopics
+        Indirectly related topics
+        Flat List Format: Provide a flat list where each item is a topic. Ensure topics at all levels (overarching, main, sub, sub-sub, indirect) are represented as individual entries in the list.
+        Be Exhaustive: Ensure the response captures every topic, subtopic, and indirectly related concept comprehensively.
+        Output Requirements:
+        Use this structure:
+        {{
+            "topics": [
+                "Topic 1",
+                "Topic 2",
+                "Topic 3",
+                ...
+            ]
+        }}
+        Do Not Include: Do not include backticks, hierarchical structures, or the word 'json' in your response.
+        Content to Analyze:
+        {texts}
+        """
+        try:
+            # response = model.generate_content(context_prompt, generation_config=genai.GenerationConfig(response_mime_type="application/json", response_schema=list[Topics]))
+            response = model.generate_content(context_prompt)
+            if not response or not response.text:
+                st.error("Error extracting topics from materials.")
+                return
+            topics = response.text
+            return topics
+        except Exception as e:
+            st.error(f"Error extracting topics: {str(e)}")
+            return None
+    else:
+        st.error("No text content found in the pre-class materials.")
+        return None
+def convert_json_to_dict(json_str):
+    try:
+        return json.loads(json_str)
+    except Exception as e:
+        st.error(f"Error converting JSON to dictionary. {str(e)}")
+        return None
+# Load topics from a JSON file
+topics = []
+with open(r'topics.json', 'r') as file:
+    topics = json.load(file)
 def get_preclass_analytics(session):
     """Get all user_ids from chat_history collection where session_id matches"""
     user_ids = chat_history_collection.distinct("user_id", {"session_id": session['session_id']})
         else:
             st.warning("No chat history found for this session.")
+    # Pass the pre-class materials content to the analytics engine
+    # topics = extract_topics_from_materials(session)
+    # dict_topics = convert_json_to_dict(topics)
+    print(topics)
+    # # Use the 1st analytics engine
+    # analytics_engine = NovaScholarAnalytics(all_topics_list=topics)
+    # # extracted_topics = analytics_engine._extract_topics(None, topics)
+    # # print(extracted_topics)
+    # results = analytics_engine.process_chat_history(all_chat_histories)
+    # faculty_report = analytics_engine.generate_faculty_report(results)
+    # print(faculty_report)
+    # # Pass this Faculty Report to an LLM model for refinements and clarity
+    # refined_report = get_response_from_llm(faculty_report)
+    # return refined_report
+    # Use the 2nd analytice engine (using LLM):
+    analytics_generator = NovaScholarAnalytics()
+    analytics2 = analytics_generator.generate_analytics(all_chat_histories, topics)
+    # enriched_analytics = analytics_generator._enrich_analytics(analytics2)
+    print("Analytics is: ", analytics2)
+    return analytics2
+    # print(json.dumps(analytics, indent=2))
+# Load Analytics from a JSON file
+# analytics = []
+# with open(r'new_analytics2.json', 'r') as file:
+#     analytics = json.load(file)
+def display_preclass_analytics(session, course_id):
+    # Initialize or get analytics data from session state
+    if 'analytics_data' not in st.session_state:
+        st.session_state.analytics_data = get_preclass_analytics(session)
+    analytics = st.session_state.analytics_data
+    # Enhanced CSS for better styling and interactivity
+    st.markdown("""
+        <style>
+        /* General styles */
+        .section-title {
+            color: #1a237e;
+            font-size: 1.5rem;
+            font-weight: 600;
+            margin-top: 1rem 0 1rem 0;
+        }
+        /* Topic list styles */
+        .topic-list {
+            max-width: 800px;
+            margin: 0 auto;
+        }
+        .topic-header {
+            background-color: #ffffff;
+            border: 1px solid #e0e0e0;
+            border-radius: 8px;
+            padding: 1rem 1.25rem;
+            margin: 0.5rem 0;
+            cursor: pointer;
+            display: flex;
+            align-items: center;
+            justify-content: space-between;
+            transition: all 0.2s ease;
+        }
+        .topic-header:hover {
+            background-color: #f8fafc;
+            transform: translateX(5px);
+        }
+        .topic-header h3 {
+            color: #1e3a8a;
+            font-size: 1.1rem;
+            font-weight: 500;
+            margin: 0;
+        }
+        .topic-struggling-rate {
+            background-color: #dbeafe;
+            padding: 0.25rem 0.75rem;
+            border-radius: 16px;
+            font-size: 0.85rem;
+            color: #1e40af;
+        }
+        .topic-content {
+            background-color: #ffffff;
+            border: 1px solid #e0e0e0;
+            border-top: none;
+            border-radius: 0 0 8px 8px;
+            padding: 1.25rem;
+            margin-top: -0.5rem;
+            margin-bottom: 1rem;
+        }
+        .topic-content .section-heading {
+            color: #2c5282;
+            font-size: 1rem;
+            font-weight: 600;
+            margin: 1rem 0 0.5rem 0;
+        }
+        .topic-content ul {
+            margin: 0;
+            padding-left: 1.25rem;
+            font-size: 0.85rem;
+            color: #4a5568;
+        }
+        /* Recommendation card styles */
+        .recommendation-grid {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
+            gap: 1rem;
+            margin: 1rem 0;
+        }
+        .recommendation-card {
+            background-color: #f8fafc;
+            border-radius: 8px;
+            padding: 1.25rem;
+            border-left: 4px solid #3b82f6;
+            margin-bottom: 1rem;
+        }
+        .recommendation-card h4 {
+            color: #1e40af;
+            font-size: 1rem;
+            font-weight: 600;
+            margin-bottom: 0;
+            display: flex;
+            align-items: center;
+            gap: 0.5rem;
+        }
+        .recommendation-card .priority-badge {
+            font-size: 0.75rem;
+            padding: 0.25rem 0.5rem;
+            border-radius: 4px;
+            background-color: #dbeafe;
+            color: #1e40af;
+            text-transform: uppercase;
+        }
+        /* Student analytics styles */
+        .student-filters {
+            background-color: #f8fafc;
+            padding: 1rem;
+            border-radius: 8px;
+            margin-bottom: 1rem;
+        }
+        .analytics-grid {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
+            gap: 1rem;
+            margin-top: 1rem;
+        }
+        .student-metrics-card {
+            background-color: #ffffff;
+            border-radius: 8px;
+            padding: 1rem;
+            border: 1px solid #e5e7eb;
+            margin-bottom: 1rem;
+        }
+        .student-metrics-card .header {
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            margin-bottom: 0.75rem;
+        }
+        .student-metrics-card .student-id {
+            color: #1e40af;
+            font-size: 1rem;
+            font-weight: 600;
+        }
+        .student-metrics-card .metrics-grid {
+            display: grid;
+            grid-template-columns: repeat(2, 1fr);
+            gap: 0.75rem;
+        }
+        .metric-box {
+            background-color: #f8fafc;
+            padding: 0.75rem;
+            border-radius: 6px;
+        }
+        .metric-box .label {
+            font-size: 0.9rem;
+            color: #6b7280;
+            margin-bottom: 0.25rem;
+            font-weight: 500;
+        }
+        .metric-box .value {
+            font-size: 0.9rem;
+            color: #1f2937;
+            font-weight: 600;
+        }
+        .struggling-topics {
+            grid-column: span 2;
+            margin-top: 0.5rem;
+        }
+        .struggling-topics .label{
+            font-size: 0.9rem;
+            font-weight: 600;
+        }
+        .struggling-topics .value{
+            font-size: 0.9rem;
+            font-weight: 500;
+        }
+        .recommendation-text {
+            grid-column: span 2;
+            font-size: 0.95rem;
+            color: #4b5563;
+            margin-top: 0.75rem;
+            padding-top: 0.75rem;
+            border-top: 1px solid #e5e7eb;
+        }
+        .reason{
+            font-size: 1rem;
+            font-weight: 600;
+        }
+        </style>
+    """, unsafe_allow_html=True)
+    # Topic-wise Analytics Section
+    st.markdown('<h2 class="section-title">Topic-wise Analytics</h2>', unsafe_allow_html=True)
+    # Initialize session state for topic expansion
+    if 'expanded_topic' not in st.session_state:
+        st.session_state.expanded_topic = None
+    # Store topic indices in session state if not already done
+    if 'topic_indices' not in st.session_state:
+        st.session_state.topic_indices = list(range(len(analytics["topic_wise_insights"])))
+    st.markdown('<div class="topic-list">', unsafe_allow_html=True)
+    for idx in st.session_state.topic_indices:
+        topic = analytics["topic_wise_insights"][idx]
+        topic_id = f"topic_{idx}"
+        # Create clickable header
+        col1, col2 = st.columns([3, 1])
+        with col1:
+            if st.button(
+                topic["topic"],
+                key=f"topic_button_{idx}",
+                use_container_width=True,
+                type="secondary"
+            ):
+                st.session_state.expanded_topic = topic_id if st.session_state.expanded_topic != topic_id else None
+        with col2:
+            st.markdown(f"""
+                <div style="text-align: right;">
+                    <span class="topic-struggling-rate">{topic["struggling_percentage"]*100:.1f}% Struggling</span>
+                </div>
+            """, unsafe_allow_html=True)
+        # Show content if topic is expanded
+        if st.session_state.expanded_topic == topic_id:
+            st.markdown(f"""
+                <div class="topic-content">
+                    <div class="section-heading">Key Issues</div>
+                    <ul>
+                        {"".join([f"<li>{issue}</li>" for issue in topic["key_issues"]])}
+                    </ul>
+                    <div class="section-heading">Key Misconceptions</div>
+                    <ul>
+                        {"".join([f"<li>{misc}</li>" for misc in topic["key_misconceptions"]])}
+                    </ul>
+                </div>
+            """, unsafe_allow_html=True)
+    st.markdown('</div>', unsafe_allow_html=True)
+    # AI Recommendations Section
+    st.markdown('<h2 class="section-title">AI-Powered Recommendations</h2>', unsafe_allow_html=True)
+    st.markdown('<div class="recommendation-grid">', unsafe_allow_html=True)
+    for idx, rec in enumerate(analytics["ai_recommended_actions"]):
+        st.markdown(f"""
+            <div class="recommendation-card">
+                <h4>
+                    <span>Recommendation {idx + 1}</span>
+                    <span class="priority-badge">{rec["priority"]}</span>
+                </h4>
+                <p>{rec["action"]}</p>
+                <p><span class="reason">Reason:</span>  {rec["reasoning"]}</p>
+                <p><span class="reason">Expected Outcome:</span>  {rec["expected_outcome"]}</p>
+            </div>
+        """, unsafe_allow_html=True)
+    st.markdown('</div>', unsafe_allow_html=True)
+    # Student Analytics Section
+    st.markdown('<h2 class="section-title">Student Analytics</h2>', unsafe_allow_html=True)
+    # Filters
+    with st.container():
+        # st.markdown('<div class="student-filters">', unsafe_allow_html=True)
+        col1, col2, col3 = st.columns(3)
+        with col1:
+            concept_understanding = st.selectbox(
+                "Filter by Understanding",
+                ["All", "Strong", "Moderate", "Needs Improvement"]
+            )
+        with col2:
+            participation_level = st.selectbox(
+                "Filter by Participation",
+                ["All", "High (>80%)", "Medium (50-80%)", "Low (<50%)"]
+            )
+        with col3:
+            struggling_topic = st.selectbox(
+                "Filter by Struggling Topic",
+                ["All"] + list(set([topic for student in analytics["student_analytics"]
+                                  for topic in student["struggling_topics"]]))
+            )
+        # st.markdown('</div>', unsafe_allow_html=True)
+    # Display student metrics in a grid
+    st.markdown('<div class="analytics-grid">', unsafe_allow_html=True)
+    for student in analytics["student_analytics"]:
+        # Apply filters
+        if (concept_understanding != "All" and
+            student["engagement_metrics"]["concept_understanding"].replace("_", " ").title() != concept_understanding):
+            continue
+        participation = student["engagement_metrics"]["participation_level"] * 100
+        if participation_level != "All":
+            if participation_level == "High (>80%)" and participation <= 80:
+                continue
+            elif participation_level == "Medium (50-80%)" and (participation < 50 or participation > 80):
+                continue
+            elif participation_level == "Low (<50%)" and participation >= 50:
+                continue
+        if struggling_topic != "All" and struggling_topic not in student["struggling_topics"]:
+            continue
+        st.markdown(f"""
+            <div class="student-metrics-card">
+                <div class="header">
+                    <span class="student-id">Student {student["student_id"][-6:]}</span>
+                </div>
+                <div class="metrics-grid">
+                    <div class="metric-box">
+                        <div class="label">Participation</div>
+                        <div class="value">{student["engagement_metrics"]["participation_level"]*100:.1f}%</div>
+                    </div>
+                    <div class="metric-box">
+                        <div class="label">Understanding</div>
+                        <div class="value">{student["engagement_metrics"]["concept_understanding"].replace('_', ' ').title()}</div>
+                    </div>
+                    <div class="struggling-topics">
+                        <div class="label">Struggling Topics: </div>
+                        <div class="value">{", ".join(student["struggling_topics"]) if student["struggling_topics"] else "None"}</div>
+                    </div>
+                    <div class="recommendation-text">
+                        {student["personalized_recommendation"]}
+                    </div>
+                </div>
+            </div>
+        """, unsafe_allow_html=True)
+    st.markdown('</div>', unsafe_allow_html=True)
+def reset_analytics_state():
+    """
+    Helper function to reset the analytics state when needed
+    (e.g., when loading a new session or when data needs to be refreshed)
+    """
+    if 'analytics_data' in st.session_state:
+        del st.session_state.analytics_data
+    if 'expanded_topic' in st.session_state:
+        del st.session_state.expanded_topic
+    if 'topic_indices' in st.session_state:
+        del st.session_state.topic_indice
 def display_session_analytics(session, course_id):
     """Display session analytics for faculty"""
     st.header("Session Analytics")
     # Display Pre-class Analytics
+    display_preclass_analytics(session, course_id)
     # Display In-class Analytics
     display_inclass_analytics(session, course_id)