omkar-surve126 commited on
Commit
b91146d
·
verified ·
1 Parent(s): 6a0f0b9

Upload 38 files

Browse files
.gitignore CHANGED
@@ -1,22 +1,22 @@
1
- # Ignore .env file
2
- .env
3
- __pycache__/
4
- newenv
5
- backupgoal.py
6
- backupgoal2.py
7
- backupresearch.py
8
- goals.py
9
- goals3.py
10
- research_assistant_dashboard2.py
11
- tempCodeRunnerFile.py
12
- all_chat_histories.json
13
- all_chat_histories2.json
14
- analytics.ipynb
15
- chat_history.csv
16
- harshal.py
17
- course_creation.py
18
- topics.json
19
- new_analytics.json
20
- new_analytics2.json
21
- pre_class_analytics.py
22
  sample_files/
 
1
+ # Ignore .env file
2
+ .env
3
+ __pycache__/
4
+ newenv
5
+ backupgoal.py
6
+ backupgoal2.py
7
+ backupresearch.py
8
+ goals.py
9
+ goals3.py
10
+ research_assistant_dashboard2.py
11
+ tempCodeRunnerFile.py
12
+ all_chat_histories.json
13
+ all_chat_histories2.json
14
+ analytics.ipynb
15
+ chat_history.csv
16
+ harshal.py
17
+ course_creation.py
18
+ topics.json
19
+ new_analytics.json
20
+ new_analytics2.json
21
+ pre_class_analytics.py
22
  sample_files/
Columns.py ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import streamlit as st
3
+ import pandas as pd
4
+ import PyPDF2
5
+ import io
6
+ import os
7
+ from dotenv import load_dotenv
8
+ import requests
9
+ import time
10
+ from mistralai import Mistral
11
+ from typing import List, Dict
12
+ from fpdf import FPDF
13
+
14
+ load_dotenv()
15
+ MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
16
+ MISTRAL_API_URL = "https://api.mistral.ai/v1/completions"
17
+
18
+ # Initialize the Mistral client
19
+ client = Mistral(api_key=MISTRAL_API_KEY)
20
+
21
+ def call_mistral_api(prompt: str) -> str:
22
+ """Call Mistral AI with a prompt, return the text response if successful."""
23
+ messages = [
24
+ {"role": "system", "content": "You are a helpful assistant."},
25
+ {"role": "user", "content": prompt}
26
+ ]
27
+ tools = [] # Add any tools if necessary
28
+
29
+ try:
30
+ # Make the API call
31
+ response = client.chat.complete(
32
+ model="mistral-large-latest",
33
+ messages=messages,
34
+ tools=tools,
35
+ tool_choice="any",
36
+ )
37
+ return response.choices[0].message.content
38
+ except Exception as e:
39
+ print(f"API Error: {str(e)}")
40
+ return ""
41
+
42
+ def process_dataframe(df: pd.DataFrame) -> pd.DataFrame:
43
+ """Process the DataFrame and return a DataFrame with analysis results."""
44
+ print("Processing DataFrame...")
45
+ # Initialize results dictionary
46
+ results = []
47
+
48
+ # Process each column starting from the third column
49
+ for i, column in enumerate(df.columns[2:], start=2):
50
+ print(f"Processing column: {column}")
51
+ # Extract text from column and attach values from the first and second columns
52
+ text = " ".join(
53
+ f"Column1-{row[df.columns[0]]}, Column2-{row[df.columns[1]]}, {value}"
54
+ for _, row in df.iterrows()
55
+ for value in [row[column]]
56
+ if pd.notna(value)
57
+ )
58
+
59
+ # Generate prompt
60
+ prompt = f"You are a Professional Researcher and Analyser with 10 yrs of Experience. Find details and Elaborate on Top Trends,Patterns ,Highlight Theories and Method in this topic.Support your answer with rightful evidence of corresponding DOI/SrNo and Frequency(how many times same topic repeated and in which papers):Make sure to limit the answer within 400 words ({column}):\n\n{text}"
61
+
62
+ # Call Mistral API
63
+ result1 = call_mistral_api(prompt)
64
+ prompt1=f"""This result was the reponse of an earlier prompt Result -{result1}, Fact check the result with my original data -({column}):\n\n{text}. Return the refined Result(after careful fact checking and finding adequate evidence within the original data) , Make sure the meaning/structure of the Result doesnt change,only false/low evidence statements get eliminated.Limit the response to 400 words.MAKE SURE THERE IS NO CONTEXT CHANGE AND MEANING REMAINS SAME JUST WITH GOOD EVIDENCE AND REFINED RESULT. """
65
+ result=call_mistral_api(prompt1)
66
+ results.append({"Column": column, "Result": result})
67
+
68
+ # Create DataFrame from results
69
+ results_df = pd.DataFrame(results)
70
+ print("DataFrame processing complete.")
71
+ return results_df
72
+
73
+ def split_dataframe(df: pd.DataFrame, max_rows: int = 52) -> List[pd.DataFrame]:
74
+ """
75
+ Split a DataFrame into multiple smaller DataFrames, each having a maximum of `max_rows` rows.
76
+
77
+ Args:
78
+ df (pd.DataFrame): The original DataFrame to be split.
79
+ max_rows (int): The maximum number of rows for each smaller DataFrame (excluding the header row).
80
+
81
+ Returns:
82
+ List[pd.DataFrame]: A list of smaller DataFrames.
83
+ """
84
+ print("Splitting DataFrame...")
85
+ # Calculate the number of splits needed
86
+ num_splits = (len(df) + max_rows - 1) // max_rows
87
+
88
+ # Split the DataFrame
89
+ split_dfs = [df.iloc[i * max_rows:(i + 1) * max_rows].reset_index(drop=True) for i in range(num_splits)]
90
+ print(f"DataFrame split into {len(split_dfs)} parts.")
91
+ return split_dfs
92
+
93
+ def generate_professional_review(df1: pd.DataFrame) -> str:
94
+ """
95
+ Generate a professional literature review, trends analysis, TSM/ADO analysis, gaps, theories, and frameworks
96
+ based on DOI and Serial Number as key value pairs.
97
+
98
+ Args:
99
+ df1 (pd.DataFrame): The first DataFrame.
100
+ df2 (pd.DataFrame): The second DataFrame.
101
+
102
+ Returns:
103
+ str: The generated analysis text.
104
+ """
105
+ print("Generating professional review...")
106
+ # Concatenate DataFrames
107
+
108
+
109
+ # Convert the concatenated DataFrame to a string format suitable for the prompt
110
+ context = df1.to_string(index=True)
111
+
112
+ # Generate a single prompt for the analysis
113
+ prompt = f"""Generate a professional literature review, trends analysis, TCM ADO (Theories,Context,Method ,Ancedents,Decisions,Outcomes), gaps, theories, and frameworks
114
+ based on the following data , If you find evidence as proper DOI make sure you analyze the whole
115
+ table with more DOI,Serial No and find more evidence.Always give supporting evidence for your literature review,TCM ADO analysis,trends ,frameworks,
116
+ check DOIs and find more evidence as inference again.Make sure the review is as professional as possible.Limit the answer to 500 words and only highlight the most imp trends with supporting evidence of DOI/SrNo and frequency(how many papers used that and top 2 DOI of that),Limit it to 500 words.Make sure all important details/frequently repeating trends/methods are highlighted.:\n\n{context}."""
117
+
118
+
119
+ # Call Mistral API
120
+ result = call_mistral_api(prompt)
121
+ print("Professional review generated.")
122
+ return result
123
+
124
+
125
+ def main():
126
+ st.title("Research Corpus Synthesis Tool")
127
+
128
+ # Logout button
129
+ if st.button("Logout", use_container_width=True):
130
+ for key in st.session_state.keys():
131
+ del st.session_state[key]
132
+ st.rerun()
133
+
134
+ # File uploader
135
+ uploaded_file = st.file_uploader("Upload CSV file", type="csv")
136
+
137
+ if uploaded_file:
138
+ if st.button("Process CSV"):
139
+ print("CSV file uploaded.")
140
+ # Initialize progress bar
141
+ progress_bar = st.progress(0)
142
+ status_text = st.empty()
143
+
144
+ # Read CSV file into DataFrame
145
+ df = pd.read_csv(uploaded_file)
146
+ print("CSV file read into DataFrame.")
147
+
148
+ # Split DataFrame into smaller DataFrames
149
+ split_dfs = split_dataframe(df, max_rows=52)
150
+
151
+ # Initialize variable to concatenate all generated reviews
152
+ concatenated_reviews = ""
153
+
154
+ # Process each smaller DataFrame
155
+ for i, split_df in enumerate(split_dfs):
156
+ status_text.text(f"Processing part {i + 1} of {len(split_dfs)}")
157
+ print(f"Processing part {i + 1} of {len(split_dfs)}")
158
+
159
+ # Process the smaller DataFrame
160
+ processed_df = process_dataframe(split_df)
161
+
162
+ # Generate professional review
163
+ review = generate_professional_review(processed_df)
164
+
165
+ # Concatenate the generated review
166
+ concatenated_reviews += review + "\n\n"
167
+
168
+ # Update progress
169
+ progress = (i + 1) / len(split_dfs)
170
+ progress_bar.progress(progress)
171
+ st.write(i)
172
+ st.write(review)
173
+
174
+
175
+
176
+
177
+
178
+ # Generate final analysis based on the concatenated reviews
179
+ final_prompt = f"""
180
+ Given is a consolidated research review of a huge number of research papers (evidence is DOI, Serial No). Perform this:
181
+ Given as a context is a table of analyzing trends/frameworks analysis of a huge corpus of papers specific to the columns.
182
+ Analyze the table properly and create a professional and accurate literature review (Ensure to cite DOI as evidence).
183
+
184
+ Subheadings for Literature Review :
185
+ 1. Introduction
186
+ ○ Overview of the main topic or concept.
187
+ ○ Key research questions or objectives.
188
+ 2. Theoretical Foundations
189
+ ○ Exploration of dominant theories related to the topic.
190
+ ○ Domain-specific theoretical applications.
191
+ 3. Contextual Analysis
192
+ ○ Geographic contexts and challenges.
193
+ ○ Sectoral applications and digital infrastructure readiness.
194
+ 4. Methodological Approaches
195
+ ○ Qualitative, quantitative, and mixed-methods approaches used in research.
196
+ 5. Discussion and Future Research
197
+ ○ Current challenges and limitations.
198
+ ○ Potential areas for future study.
199
+ 6. Conclusion
200
+ ○ Summary of findings.
201
+ ○ Implications and future directions.
202
+
203
+ TCM-ADO Framework in Research Analysis and Literature Review:
204
+ Theory
205
+ Theoretical foundations driving the research.
206
+ ● Focus on identifying and analyzing the conceptual models or frameworks that underpin the study.
207
+ ● Establish the intellectual basis and rationale for the research direction.
208
+ Context
209
+ Situational and environmental factors shaping the research.
210
+ ● Emphasis on geographic, sectoral, cultural, and infrastructural dimensions influencing the implementation or findings.
211
+ ● Examples include urban versus rural settings, digital infrastructure readiness, or policy landscapes.
212
+ ● Objective: To understand how external conditions impact the dynamics and applicability of the research.
213
+ Method
214
+ Research methodologies and analytical approaches utilized.
215
+ ● Covers the selection of qualitative, quantitative, or mixed-method approaches, along with tools and techniques employed.
216
+ ● Objective: To ensure methodological rigor and the validity of findings.
217
+ Antecedents
218
+ Pre-existing conditions enabling or constraining research or implementation.
219
+ ● Includes factors such as technological infrastructure, stakeholder preparedness, and
220
+ regulatory frameworks.
221
+ ● To identify critical prerequisites that influence the starting point of the research or
222
+ initiative.
223
+ Decisions
224
+ Strategic choices made throughout the implementation or research process.
225
+ ● Involves critical decision points in areas like technology adoption, governance
226
+ frameworks, and operational strategies.
227
+ ● analyze how informed decision-making shapes the trajectory and success of the project.
228
+ Outcomes
229
+ Results and impacts observed as a consequence of the initiative or study.
230
+ ● Evaluates direct and indirect contributions to the research objectives or broader societal
231
+ goals.
232
+ ● assess the effectiveness and long-term implications of the research or project outcomes.
233
+ """
234
+
235
+ final_result = call_mistral_api(final_prompt)
236
+ print("Final analysis generated.")
237
+
238
+ # Display the final result
239
+ st.subheader("Final Analysis")
240
+ st.write(final_result)
241
+
242
+ status_text.text("Processing complete!")
243
+ progress_bar.progress(1.0)
244
+ print("Processing complete.")
245
+
246
+ if __name__ == "__main__":
247
+ main()
Research Paper Attributes.txt ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Review Based Paper
2
+ Title TEXT,
3
+ Publication TEXT,
4
+ Journal_Conference TEXT,
5
+ Abstract TEXT,
6
+ Keywords TEXT,
7
+ Author TEXT
8
+ Date_of_Publication TEXT,
9
+ Intro TEXT,
10
+ Literature_Review TEXT,
11
+ Body: TEXT
12
+ Protocol: TEXT
13
+ Search String: TEXT
14
+ Included Studies: TEXT
15
+ Data Collection and Analysis Methods: TEXT
16
+ Data Extraction Table: TEXT
17
+ Synthesis and Analysis: TEXT
18
+ Conclusion
19
+ Limitations
20
+ Results
21
+ References
22
+
23
+ Risk of Bias Assessment:Opinion/Perspective Based Paper
24
+ Title TEXT,
25
+ Publication TEXT,
26
+ Journal_Conference TEXT,
27
+ Abstract TEXT,
28
+ Keywords TEXT,
29
+ Author TEXT,
30
+ Date_of_Publication TEXT,
31
+ Intro TEXT,
32
+ Literature_Review TEXT
33
+ Introduction: TEXT
34
+ Body: TEXT
35
+ Results and Discussion:TEXT
36
+ Conclusion: TEXT
37
+ References: TEXT
38
+
39
+
40
+
41
+
42
+
43
+
44
+
45
+
46
+
47
+
48
+
49
+
50
+
51
+
52
+
53
+
54
+ Empirical Research Paper
55
+ Title TEXT,
56
+ Publication TEXT,
57
+ Journal_Conference TEXT,
58
+ Abstract TEXT,
59
+ Keywords TEXT,
60
+ Author TEXT,
61
+ Date_of_Publication TEXT,
62
+ Intro TEXT,
63
+ Literature_Review TEXT
64
+ Introduction: TEXT
65
+ Body: TEXT
66
+ Methodology: TEXT
67
+ Participants: TEXT - Describes the sample and the sampling methods used.
68
+ Survey Instrument: TEXT - Describes the design and development of the survey questionnaire.
69
+ Data Collection: TEXT - Explains how the survey data was collected.
70
+ Data Analysis: TEXT - Details the statistical techniques used to analyze the data.
71
+
72
+
73
+ Results and Discussion:TEXT
74
+ Conclusion: TEXT
75
+ References: TEXT
76
+ Research Paper (Other)
77
+ Title TEXT,
78
+ Publication TEXT,
79
+ Journal_Conference TEXT,
80
+ Abstract TEXT,
81
+ Keywords TEXT,
82
+ Author TEXT,
83
+ Date_of_Publication TEXT,
84
+ Intro TEXT,
85
+ Literature_Review TEXT,
86
+ Research_Models_Used TEXT,
87
+ Methodology TEXT,
88
+ Discussion TEXT,
89
+ Future_Scope TEXT,
90
+ Theory TEXT,
91
+ Independent_Variables TEXT,
92
+ nof_Independent_Variables INTEGER,
93
+ Dependent_Variables TEXT,
94
+ nof_Dependent_Variables INTEGER,
95
+ Control_Variables TEXT,
96
+ Extraneous_Variables TEXT,
97
+ nof_Control_Variables INTEGER,
98
+ nof_Extraneous_Variables INTEGER
analytics.py CHANGED
@@ -1,97 +1,97 @@
1
- import os
2
- import pandas as pd
3
- import numpy as np
4
- from numpy.linalg import norm
5
- from pymongo import MongoClient
6
- import openai
7
- from openai import OpenAI
8
- import streamlit as st
9
- from datetime import datetime
10
-
11
- # MongoDB connection
12
- MONGO_URI = os.getenv('MONGO_URI')
13
-
14
- client = MongoClient(MONGO_URI)
15
- db = client['digital_nova']
16
- themes_collection = db['themes']
17
- corpus_collection = db['corpus']
18
- vectors_collection = db['vectors'] # Reference to 'vectors' collection
19
- users_collection = db['users']
20
-
21
- # Function to create embeddings
22
- def create_embeddings(text, openai_api_key):
23
- client = OpenAI(api_key=openai_api_key)
24
- response = client.embeddings.create(
25
- input=text,
26
- model="text-embedding-3-small"
27
- )
28
- return response.data[0].embedding
29
-
30
- # Function to calculate cosine similarity
31
- def cosine_similarity(v1, v2):
32
- v1 = np.array(v1)
33
- v2 = np.array(v2)
34
- dot_product = np.dot(v1, v2)
35
- norm_product = norm(v1) * norm(v2)
36
- return dot_product / norm_product if norm_product != 0 else 0
37
-
38
- def derive_analytics(goal, reference_text, openai_api_key, context=None, synoptic=None):
39
- """
40
- Analyze subjective answers with respect to pre-class materials and synoptic, and provide detailed feedback
41
-
42
- Args:
43
- goal (str): Analysis objective
44
- reference_text (str): Student's answer text
45
- openai_api_key (str): OpenAI API key
46
- context (str, optional): Pre-class material content for comparison
47
- synoptic (str, optional): Synoptic content for evaluation
48
- """
49
- template = f"""Given a student's answer to a subjective question, analyze it following these specific guidelines. Compare it with the provided pre-class materials and synoptic (if available) to assess correctness and completeness.
50
-
51
- 1. Analyze the text as an experienced educational assessor, considering:
52
- - Conceptual understanding
53
- - Factual accuracy
54
- - Completeness of response
55
- - Use of relevant terminology
56
- - Application of concepts
57
-
58
- 2. Structure the output in markdown with two sections:
59
-
60
- **Correctness Assessment**
61
- - Rate overall correctness on a scale of 1-10
62
-
63
- **Evidence-Based Feedback**
64
- - Provide specific evidence from the student's answer to justify the score reduction
65
- - Highlight the exact lines or phrases that need improvement
66
-
67
- Pre-class Materials Context:
68
- {context if context else "No reference materials provided"}
69
-
70
- Synoptic:
71
- {synoptic if synoptic else "No synoptic provided"}
72
-
73
- Student's Answer:
74
- {reference_text}
75
-
76
- Rules:
77
- - Base assessment strictly on provided content
78
- - Be specific in feedback and suggestions
79
- """
80
-
81
- # Initialize OpenAI client
82
- client = OpenAI(api_key=openai_api_key)
83
-
84
- try:
85
- response = client.chat.completions.create(
86
- model="gpt-4-0125-preview",
87
- messages=[
88
- {"role": "system", "content": "You are an educational assessment expert."},
89
- {"role": "user", "content": template}
90
- ],
91
- temperature=0.7
92
- )
93
- analysis = response.choices[0].message.content
94
- return analysis
95
- except Exception as e:
96
- print(f"Error in generating analysis with OpenAI: {str(e)}")
97
- return "Error generating analysis"
 
1
+ import os
2
+ import pandas as pd
3
+ import numpy as np
4
+ from numpy.linalg import norm
5
+ from pymongo import MongoClient
6
+ import openai
7
+ from openai import OpenAI
8
+ import streamlit as st
9
+ from datetime import datetime
10
+
11
+ # MongoDB connection
12
+ MONGO_URI = os.getenv('MONGO_URI')
13
+
14
+ client = MongoClient(MONGO_URI)
15
+ db = client['digital_nova']
16
+ themes_collection = db['themes']
17
+ corpus_collection = db['corpus']
18
+ vectors_collection = db['vectors'] # Reference to 'vectors' collection
19
+ users_collection = db['users']
20
+
21
+ # Function to create embeddings
22
+ def create_embeddings(text, openai_api_key):
23
+ client = OpenAI(api_key=openai_api_key)
24
+ response = client.embeddings.create(
25
+ input=text,
26
+ model="text-embedding-3-small"
27
+ )
28
+ return response.data[0].embedding
29
+
30
+ # Function to calculate cosine similarity
31
+ def cosine_similarity(v1, v2):
32
+ v1 = np.array(v1)
33
+ v2 = np.array(v2)
34
+ dot_product = np.dot(v1, v2)
35
+ norm_product = norm(v1) * norm(v2)
36
+ return dot_product / norm_product if norm_product != 0 else 0
37
+
38
+ def derive_analytics(goal, reference_text, openai_api_key, context=None, synoptic=None):
39
+ """
40
+ Analyze subjective answers with respect to pre-class materials and synoptic, and provide detailed feedback
41
+
42
+ Args:
43
+ goal (str): Analysis objective
44
+ reference_text (str): Student's answer text
45
+ openai_api_key (str): OpenAI API key
46
+ context (str, optional): Pre-class material content for comparison
47
+ synoptic (str, optional): Synoptic content for evaluation
48
+ """
49
+ template = f"""Given a student's answer to a subjective question, analyze it following these specific guidelines. Compare it with the provided pre-class materials and synoptic (if available) to assess correctness and completeness.
50
+
51
+ 1. Analyze the text as an experienced educational assessor, considering:
52
+ - Conceptual understanding
53
+ - Factual accuracy
54
+ - Completeness of response
55
+ - Use of relevant terminology
56
+ - Application of concepts
57
+
58
+ 2. Structure the output in markdown with two sections:
59
+
60
+ **Correctness Assessment**
61
+ - Rate overall correctness on a scale of 1-10
62
+
63
+ **Evidence-Based Feedback**
64
+ - Provide specific evidence from the student's answer to justify the score reduction
65
+ - Highlight the exact lines or phrases that need improvement
66
+
67
+ Pre-class Materials Context:
68
+ {context if context else "No reference materials provided"}
69
+
70
+ Synoptic:
71
+ {synoptic if synoptic else "No synoptic provided"}
72
+
73
+ Student's Answer:
74
+ {reference_text}
75
+
76
+ Rules:
77
+ - Base assessment strictly on provided content
78
+ - Be specific in feedback and suggestions
79
+ """
80
+
81
+ # Initialize OpenAI client
82
+ client = OpenAI(api_key=openai_api_key)
83
+
84
+ try:
85
+ response = client.chat.completions.create(
86
+ model="gpt-4-0125-preview",
87
+ messages=[
88
+ {"role": "system", "content": "You are an educational assessment expert."},
89
+ {"role": "user", "content": template}
90
+ ],
91
+ temperature=0.7
92
+ )
93
+ analysis = response.choices[0].message.content
94
+ return analysis
95
+ except Exception as e:
96
+ print(f"Error in generating analysis with OpenAI: {str(e)}")
97
+ return "Error generating analysis"
app.py CHANGED
The diff for this file is too large to render. See raw diff
 
assignment_evaluation.py ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # assignment_evaluation.py
2
+
3
+ import streamlit as st
4
+ from datetime import datetime
5
+ from pymongo import MongoClient
6
+ import os
7
+ from openai import OpenAI
8
+ from dotenv import load_dotenv
9
+ from bson import ObjectId
10
+
11
+ load_dotenv()
12
+
13
+ # MongoDB setup
14
+ MONGO_URI = os.getenv('MONGO_URI')
15
+ client = MongoClient(MONGO_URI)
16
+ db = client["novascholar_db"]
17
+ assignments_collection = db["assignments"]
18
+ assignment_evaluation_collection = db["assignment_evaluation"]
19
+ resources_collection = db["resources"]
20
+ students_collection = db["students"]
21
+
22
+ def evaluate_assignment(session_id, student_id, assignment_id):
23
+ """
24
+ Generate evaluation and analysis for submitted assignments
25
+ """
26
+ try:
27
+ # Fetch assignment and student submission
28
+ assignment = assignments_collection.find_one({"_id": assignment_id})
29
+ if not assignment:
30
+ return None
31
+
32
+ # Find student's submission
33
+ submission = next(
34
+ (sub for sub in assignment.get('submissions', [])
35
+ if sub['student_id'] == str(student_id)),
36
+ None
37
+ )
38
+ if not submission:
39
+ return None
40
+
41
+ # Default rubric for assignment evaluation
42
+ default_rubric = """
43
+ 1. Understanding & Implementation (1-4):
44
+ - Demonstrates understanding of assignment requirements
45
+ - Implements required components correctly
46
+ - Shows attention to detail
47
+
48
+ 2. Quality & Completeness (1-4):
49
+ - Work is complete and thorough
50
+ - Meets all assignment objectives
51
+ - Shows evidence of effort and care
52
+
53
+ 3. Presentation & Organization (1-4):
54
+ - Clear and professional presentation
55
+ - Well-structured and organized
56
+ - Follows required format and guidelines
57
+ """
58
+
59
+ # Initialize OpenAI client
60
+ client = OpenAI(api_key=os.getenv('OPENAI_KEY'))
61
+
62
+ # Create evaluation prompt
63
+ prompt_template = f"""As an assignment evaluator, assess this student's submission based on the provided rubric criteria. Follow these guidelines:
64
+
65
+ 1. Evaluation Process:
66
+ - Use each rubric criterion (scored 1-4)
67
+ - Evaluate completeness and quality
68
+ - Check alignment with assignment requirements
69
+ - Calculate final score: sum of criteria scores converted to 10-point scale
70
+
71
+ Assignment Title: {assignment['title']}
72
+ Due Date: {assignment['due_date']}
73
+
74
+ Submission Content:
75
+ {submission.get('text_content', 'No text content available')}
76
+
77
+ Rubric Criteria:
78
+ {default_rubric}
79
+
80
+ Provide your assessment in the following format:
81
+
82
+ **Overall Score and Summary**
83
+ - Score: [X]/10
84
+ - Overall Assessment: [2-3 sentence summary]
85
+
86
+ **Strengths**
87
+ - [Key strength 1]
88
+ - [Key strength 2]
89
+ - [Key strength 3]
90
+
91
+ **Areas for Improvement**
92
+ - [Improvement point 1]
93
+ - [Improvement point 2]
94
+ - [Improvement point 3]
95
+
96
+ **Specific Recommendations**
97
+ [2-3 sentences with actionable feedback]
98
+ """
99
+
100
+ # Generate evaluation using OpenAI
101
+ response = client.chat.completions.create(
102
+ model="gpt-4o-mini",
103
+ messages=[{"role": "user", "content": prompt_template}],
104
+ max_tokens=1000,
105
+ temperature=0.4
106
+ )
107
+
108
+ # Store evaluation in MongoDB
109
+ evaluation_doc = {
110
+ "assignment_id": assignment_id,
111
+ "student_id": student_id,
112
+ "session_id": session_id,
113
+ "evaluation": response.choices[0].message.content,
114
+ "evaluated_at": datetime.utcnow()
115
+ }
116
+
117
+ assignment_evaluation_collection.insert_one(evaluation_doc)
118
+ return evaluation_doc
119
+
120
+ except Exception as e:
121
+ print(f"Error in evaluate_assignment: {str(e)}")
122
+ return None
123
+
124
+ def display_evaluation_to_faculty(session_id, student_id, course_id):
125
+ """
126
+ Display interface for faculty to generate and view assignment evaluations
127
+ """
128
+ st.header("Evaluate Assignments")
129
+
130
+ try:
131
+ # Fetch available assignments
132
+ assignments = list(assignments_collection.find({
133
+ "session_id": str(session_id),
134
+ "course_id": course_id
135
+ }))
136
+
137
+ if not assignments:
138
+ st.info("No assignments found for this session.")
139
+ return
140
+
141
+ # Select assignment
142
+ assignment_options = {
143
+ f"{assignment['title']} (Due: {assignment['due_date'].strftime('%Y-%m-%d')})" if 'due_date' in assignment else assignment['title']: assignment['_id']
144
+ for assignment in assignments
145
+ }
146
+
147
+ if assignment_options:
148
+ selected_assignment = st.selectbox(
149
+ "Select Assignment to Evaluate",
150
+ options=list(assignment_options.keys())
151
+ )
152
+
153
+ if selected_assignment:
154
+ assignment_id = assignment_options[selected_assignment]
155
+ assignment = assignments_collection.find_one({"_id": assignment_id})
156
+
157
+ if assignment:
158
+ submissions = assignment.get('submissions', [])
159
+ if not submissions:
160
+ st.warning("No submissions found for this assignment.")
161
+ return
162
+
163
+ # Create a dropdown for student submissions
164
+ student_options = {
165
+ f"{students_collection.find_one({'_id': ObjectId(sub['student_id'])})['full_name']} (Submitted: {sub['submitted_at'].strftime('%Y-%m-%d %H:%M')})": sub['student_id']
166
+ for sub in submissions
167
+ }
168
+
169
+ selected_student = st.selectbox(
170
+ "Select Student Submission",
171
+ options=list(student_options.keys())
172
+ )
173
+
174
+ if selected_student:
175
+ student_id = student_options[selected_student]
176
+ submission = next(sub for sub in submissions if sub['student_id'] == student_id)
177
+
178
+ # Display submission details
179
+ st.subheader("Submission Details")
180
+ st.markdown(f"**Submitted:** {submission['submitted_at'].strftime('%Y-%m-%d %H:%M')}")
181
+ st.markdown(f"**File Name:** {submission['file_name']}")
182
+
183
+ # Add download button for submitted file
184
+ if 'file_content' in submission:
185
+ st.download_button(
186
+ label="Download Submission",
187
+ data=submission['file_content'],
188
+ file_name=submission['file_name'],
189
+ mime=submission['file_type']
190
+ )
191
+
192
+ # Check for existing evaluation
193
+ existing_eval = assignment_evaluation_collection.find_one({
194
+ "assignment_id": assignment_id,
195
+ "student_id": student_id,
196
+ "session_id": str(session_id)
197
+ })
198
+
199
+ if existing_eval:
200
+ st.subheader("Evaluation Results")
201
+ st.markdown(existing_eval['evaluation'])
202
+ st.success("✓ Evaluation completed")
203
+
204
+ if st.button("Regenerate Evaluation"):
205
+ with st.spinner("Regenerating evaluation..."):
206
+ evaluation = evaluate_assignment(
207
+ str(session_id),
208
+ student_id,
209
+ assignment_id
210
+ )
211
+ if evaluation:
212
+ st.success("Evaluation regenerated successfully!")
213
+ st.rerun()
214
+ else:
215
+ st.error("Error regenerating evaluation.")
216
+ else:
217
+ if st.button("Generate Evaluation"):
218
+ with st.spinner("Generating evaluation..."):
219
+ evaluation = evaluate_assignment(
220
+ str(session_id),
221
+ student_id,
222
+ assignment_id
223
+ )
224
+ if evaluation:
225
+ st.success("Evaluation generated successfully!")
226
+ st.markdown("### Generated Evaluation")
227
+ st.markdown(evaluation['evaluation'])
228
+ st.rerun()
229
+ else:
230
+ st.error("Error generating evaluation.")
231
+
232
+ except Exception as e:
233
+ st.error(f"An error occurred while loading the evaluations: {str(e)}")
234
+ print(f"Error in display_evaluation_to_faculty: {str(e)}")
235
+
236
+ def display_assignment_results(assignment_id, student_id):
237
+ """
238
+ Display assignment results and analysis for a student
239
+ """
240
+ try:
241
+ # Fetch analysis from evaluation collection
242
+ analysis = assignment_evaluation_collection.find_one({
243
+ "assignment_id": assignment_id,
244
+ "student_id": str(student_id)
245
+ })
246
+
247
+ if not analysis:
248
+ st.info("Evaluation will be available soon. Please check back later.")
249
+ return
250
+
251
+ st.header("Assignment Evaluation")
252
+
253
+ # Display evaluation content
254
+ st.markdown(analysis["evaluation"])
255
+
256
+ # Display evaluation timestamp
257
+ st.caption(f"Evaluation generated on: {analysis['evaluated_at'].strftime('%Y-%m-%d %H:%M:%S UTC')}")
258
+
259
+ except Exception as e:
260
+ st.error("An error occurred while loading the evaluation. Please try again later.")
261
+ print(f"Error in display_assignment_results: {str(e)}")
chatbot.py CHANGED
@@ -1,67 +1,67 @@
1
- import streamlit as st
2
- import datetime
3
- from db import courses_collection2, faculty_collection, students_collection, vectors_collection, chat_history_collection
4
- from PIL import Image
5
- from dotenv import load_dotenv
6
- import os
7
- from datetime import datetime
8
- from bson import ObjectId
9
- from file_upload_vectorize import model
10
- from gen_mcqs import generate_mcqs, quizzes_collection
11
-
12
- load_dotenv()
13
- MONGO_URI = os.getenv('MONGO_URI')
14
- OPENAI_KEY = os.getenv('OPENAI_KEY')
15
- GEMINI_KEY = os.getenv('GEMINI_KEY')
16
-
17
- def insert_chat_message(user_id, session_id, role, content):
18
- message = {
19
- "role": role,
20
- "content": content,
21
- "timestamp": datetime.utcnow()
22
- }
23
-
24
- chat_history_collection.update_one(
25
- {"user_id": ObjectId(user_id), "session_id": session_id},
26
- {"$push": {"messages": message}, "$set": {"timestamp": datetime.utcnow()}},
27
- upsert=True
28
- )
29
-
30
- def give_chat_response(user_id, session_id, question, title, description, context):
31
- context_prompt = f"""
32
- Based on the following session title, description, and context, answer the user's question in 3-4 lines:
33
-
34
- Title: {title}
35
- Description: {description}
36
- Context: {context}
37
-
38
- Question: {question}
39
-
40
- Please provide a clear and concise answer based on the information provided.
41
- """
42
-
43
- response = model.generate_content(context_prompt)
44
- if not response or not response.text:
45
- return "No response received from the model"
46
-
47
- assistant_response = response.text.strip()
48
-
49
- # Save the chat message
50
- insert_chat_message(user_id, session_id, "assistant", assistant_response)
51
-
52
- return assistant_response
53
-
54
- def create_quiz_by_context(user_id, session_id, context, length, session_title, session_description):
55
- """Create a quiz based on the context provided"""
56
- quiz = generate_mcqs(context, length, session_title, session_description)
57
- if not quiz:
58
- return "No quiz generated";
59
-
60
- # Save the quiz
61
- quizzes_collection.insert_one({
62
- "user_id": ObjectId(user_id),
63
- "session_id": ObjectId(session_id),
64
- "questions": quiz,
65
- "timestamp": datetime.utcnow()
66
- })
67
  return "Quiz created successfully"
 
1
+ import streamlit as st
2
+ import datetime
3
+ from db import courses_collection2, faculty_collection, students_collection, vectors_collection, chat_history_collection
4
+ from PIL import Image
5
+ from dotenv import load_dotenv
6
+ import os
7
+ from datetime import datetime
8
+ from bson import ObjectId
9
+ from file_upload_vectorize import model
10
+ from gen_mcqs import generate_mcqs, quizzes_collection
11
+
12
+ load_dotenv()
13
+ MONGO_URI = os.getenv('MONGO_URI')
14
+ OPENAI_KEY = os.getenv('OPENAI_KEY')
15
+ GEMINI_KEY = os.getenv('GEMINI_KEY')
16
+
17
+ def insert_chat_message(user_id, session_id, role, content):
18
+ message = {
19
+ "role": role,
20
+ "content": content,
21
+ "timestamp": datetime.utcnow()
22
+ }
23
+
24
+ chat_history_collection.update_one(
25
+ {"user_id": ObjectId(user_id), "session_id": session_id},
26
+ {"$push": {"messages": message}, "$set": {"timestamp": datetime.utcnow()}},
27
+ upsert=True
28
+ )
29
+
30
+ def give_chat_response(user_id, session_id, question, title, description, context):
31
+ context_prompt = f"""
32
+ Based on the following session title, description, and context, answer the user's question in 3-4 lines:
33
+
34
+ Title: {title}
35
+ Description: {description}
36
+ Context: {context}
37
+
38
+ Question: {question}
39
+
40
+ Please provide a clear and concise answer based on the information provided.
41
+ """
42
+
43
+ response = model.generate_content(context_prompt)
44
+ if not response or not response.text:
45
+ return "No response received from the model"
46
+
47
+ assistant_response = response.text.strip()
48
+
49
+ # Save the chat message
50
+ insert_chat_message(user_id, session_id, "assistant", assistant_response)
51
+
52
+ return assistant_response
53
+
54
+ def create_quiz_by_context(user_id, session_id, context, length, session_title, session_description):
55
+ """Create a quiz based on the context provided"""
56
+ quiz = generate_mcqs(context, length, session_title, session_description)
57
+ if not quiz:
58
+ return "No quiz generated";
59
+
60
+ # Save the quiz
61
+ quizzes_collection.insert_one({
62
+ "user_id": ObjectId(user_id),
63
+ "session_id": ObjectId(session_id),
64
+ "questions": quiz,
65
+ "timestamp": datetime.utcnow()
66
+ })
67
  return "Quiz created successfully"
create_course.py CHANGED
@@ -1,272 +1,272 @@
1
- from datetime import datetime, timedelta
2
- import os
3
- from typing import Dict, List, Any
4
- from pymongo import MongoClient
5
- import requests
6
- import uuid
7
- import openai
8
- from openai import OpenAI
9
- import streamlit as st
10
- from bson import ObjectId
11
- from dotenv import load_dotenv
12
- import json
13
-
14
- load_dotenv()
15
- MONGODB_URI = os.getenv("MONGO_URI")
16
- PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_KEY")
17
- OPENAI_API_KEY = os.getenv("OPENAI_KEY")
18
-
19
- client = MongoClient(MONGODB_URI)
20
- db = client['novascholar_db']
21
- courses_collection = db['courses']
22
-
23
- def generate_perplexity_response(api_key, course_name):
24
- headers = {
25
- "accept": "application/json",
26
- "content-type": "application/json",
27
- "authorization": f"Bearer {api_key}"
28
- }
29
-
30
- prompt = f"""
31
- You are an expert educational AI assistant specializing in curriculum design and instructional planning. Your task is to generate comprehensive, academically rigorous course structures for undergraduate level education.
32
-
33
- Please generate a detailed course structure for the course {course_name} in JSON format following these specifications:
34
-
35
- 1. The course structure should be appropriate for a full semester (14-16 weeks)
36
- 2. Each module should be designed for 2-4 weeks of instruction
37
- 3. Follow standard academic practices and nomenclature
38
- 4. Ensure progressive complexity from foundational to advanced concepts
39
- 5. The course_title should exactly match the course name provided in the prompt. No additional information should be included in the course_title field.
40
- 6: Ensure that the property names are enclosed in double quotes (") and followed by a colon (:), and the values are enclosed in double quotes (").
41
- 7. **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
42
-
43
-
44
- The JSON response should follow this structure:
45
- {{
46
- "course_title": "string",
47
- "course_description": "string",
48
- "modules": [
49
- {{
50
- "module_title": "string",
51
- "sub_modules": [
52
- {{
53
- "title": "string",
54
- "topics": [string],
55
- }}
56
- ]
57
- }}
58
- ]
59
- }}
60
-
61
- Example response:
62
- {{
63
- "course_title": "Advanced Natural Language Processing",
64
- "course_descriptio": "An advanced course covering modern approaches to NLP using deep learning, with focus on transformer architectures and their applications.",
65
- "modules": [
66
- {{
67
- "module_title": "Foundations of Modern NLP",
68
- "sub_modules": [
69
- {{
70
- "title": "Attention Mechanism",
71
- "topics": [
72
- "Self-attention",
73
- "Multi-head attention",
74
- "Positional encoding"
75
- ]
76
- }}
77
- ]
78
- }}
79
- ]
80
- }}
81
- """
82
-
83
- messages = [
84
- {
85
- "role": "system",
86
- "content": (
87
- "You are an expert educational AI assistant specializing in course design and curriculum planning. "
88
- "Your task is to generate accurate, detailed, and structured educational content for undergraduate-level and post-graduate-level courses. "
89
- "Provide detailed and accurate information tailored to the user's prompt."
90
- "Ensure that the responses are logical, follow standard academic practices, and include realistic concepts relevant to the course."
91
- ),
92
- },
93
- {
94
- "role": "user",
95
- "content": prompt
96
- },
97
- ]
98
- try:
99
- client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
100
- response = client.chat.completions.create(
101
- model="llama-3.1-sonar-small-128k-online",
102
- messages=messages
103
- )
104
- content = response.choices[0].message.content
105
- return content
106
- except Exception as e:
107
- st.error(f"Failed to fetch data from Perplexity API: {e}")
108
- return ""
109
-
110
- def get_new_course_id():
111
- """Generate a new course ID by incrementing the last course ID"""
112
- last_course = courses_collection.find_one(sort=[("course_id", -1)])
113
- if last_course:
114
- last_course_id = int(last_course["course_id"][2:])
115
- new_course_id = f"CS{last_course_id + 1}"
116
- else:
117
- new_course_id = "CS101"
118
- return new_course_id
119
-
120
-
121
- def create_course(course_name, start_date, duration_weeks):
122
- # Generate course overview
123
- # overview_prompt = f"""Generate an overview for the undergraduate course {course_name}
124
- # Include all relevant concepts and key topics covered in a typical curriculum.
125
- # The response should be concise (300-400 words). Ensure that your response is in a valid JSON format."""
126
-
127
- # overview_prompt2 = f"""Generate an overview for the undergraduate course {course_name}.
128
- # The overview should include:
129
- # The course title, a detailed course description,
130
- # a division of all relevant concepts and key topics into 4-6 logical modules,
131
- # capturing the flow and structure of a typical curriculum.
132
- # Ensure the response adheres to the following JSON format:
133
- # {{
134
- # 'overview': 'string',
135
- # 'modules': [
136
- # {{
137
- # 'name': 'string',
138
- # 'description': 'string'
139
- # }}
140
- # ]
141
- # }}
142
- # overview: A detailed description of the course.
143
- # modules: An array of 4-6 objects, each representing a logical module with a name and a brief description
144
- # **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}"""
145
-
146
- # course_overview = generate_perplexity_response(PERPLEXITY_API_KEY, overview_prompt2)
147
- # # print(course_overview)
148
- # course_overview_store = course_overview
149
- # # print(course_overview_store)
150
- # # Generate modules
151
- # # modules_prompt = f"Based on this overview: {course_overview}\nCreate 4-6 logical modules for the course, each module should group related concepts and each module may include reference books if applicable"
152
- # sub_modules_prompt = f"""Using the provided modules in the overview {course_overview_store}, generate 2-3 submodules for each module.
153
- # Each submodule should represent a cohesive subset of the module's topics, logically organized for teaching purposes.
154
- # Ensure the response adheres to the following JSON format:
155
- # {
156
- # 'modules': [
157
- # {
158
- # 'name': 'string',
159
- # 'sub_modules': [
160
- # {
161
- # 'name': 'string',
162
- # 'description': 'string'
163
- # }
164
- # ]
165
- # }
166
- # ]
167
- # }
168
- # modules: An array where each object contains the name of the module and its corresponding sub_modules.
169
- # sub_modules: An array of 2-3 objects for each module, each having a name and a brief description."
170
- # **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}
171
- # """
172
- # sub_modules = generate_perplexity_response(PERPLEXITY_API_KEY, sub_modules_prompt)
173
-
174
- # # modules_response = generate_perplexity_response(modules_prompt)
175
- # print(sub_modules)
176
-
177
- # total_sessions = duration_weeks * sessions_per_week
178
-
179
- course_plan = generate_perplexity_response(PERPLEXITY_API_KEY, course_name)
180
- course_plan_json = json.loads(course_plan)
181
-
182
- # Generate sessions for each module
183
- all_sessions = []
184
- for module in course_plan_json['modules']:
185
- for sub_module in module['sub_modules']:
186
- for topic in sub_module['topics']:
187
- session = create_session(
188
- title=topic,
189
- date=start_date,
190
- module_name=module['module_title']
191
- )
192
- # print(session)
193
- all_sessions.append(session)
194
- start_date += timedelta(days=7) # Next session after a week
195
-
196
- # sample_sessions = [
197
- # {'session_id': ObjectId('6767d0bbad8316ac358def25'), 'title': 'What is Generative AI?', 'date': datetime(2024, 12, 22, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
198
- # {'session_id': ObjectId('6767d0bbad8316ac358def26'), 'title': 'History and Evolution of AI', 'date': datetime(2024, 12, 29, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
199
- # {'session_id': ObjectId('6767d0bbad8316ac358def27'), 'title': 'Types of Generative AI (e.g., GANs, VAEs, LLMs)', 'date': datetime(2025, 1, 5, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 505626), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
200
- # {'session_id': ObjectId('6767d0bbad8316ac358def28'), 'title': 'Overview of popular GenAI tools (e.g., ChatGPT, Claude, Google Gemini)', 'date': datetime(2025, 1, 12, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 506559), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
201
- # {'session_id': ObjectId('6767d0bbad8316ac358def29'), 'title': 'Frameworks for building GenAI models (e.g., TensorFlow, PyTorch)', 'date': datetime(2025, 1, 19, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 506559), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
202
- # {'session_id': ObjectId('6767d0bbad8316ac358def2a'), 'title': 'Integration with other AI technologies', 'date': datetime(2025, 1, 26, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 507612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
203
- # {'session_id': ObjectId('6767d0bbad8316ac358def2b'), 'title': 'Text-to-text models (e.g., GPT-3, BERT)', 'date': datetime(2025, 2, 2, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 508512), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
204
- # {'session_id': ObjectId('6767d0bbad8316ac358def2c'), 'title': 'Text generation for content creation and marketing', 'date': datetime(2025, 2, 9, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 508512), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
205
- # {'session_id': ObjectId('6767d0bbad8316ac358def2d'), 'title': 'Chatbots and conversational interfaces', 'date': datetime(2025, 2, 16, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 509612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
206
- # {'session_id': ObjectId('6767d0bbad8316ac358def2e'), 'title': 'Generative Adversarial Networks (GANs)', 'date': datetime(2025, 2, 23, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 509612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
207
- # {'session_id': ObjectId('6767d0bbad8316ac358def2f'), 'title': 'Variational Autoencoders (VAEs)', 'date': datetime(2025, 3, 2, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 510612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
208
- # {'session_id': ObjectId('6767d0bbad8316ac358def30'), 'title': 'Applications in art, design, and media', 'date': datetime(2025, 3, 9, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 511497), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
209
- # {'session_id': ObjectId('6767d0bbad8316ac358def31'), 'title': 'Understanding prompt design principles', 'date': datetime(2025, 3, 16, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 511497), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
210
- # {'session_id': ObjectId('6767d0bbad8316ac358def33'), 'title': 'Advanced techniques for fine-tuning models', 'date': datetime(2025, 3, 30, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 512514), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
211
- # {'session_id': ObjectId('6767d0bbad8316ac358def34'), 'title': 'Ethical implications of AI-generated content', 'date': datetime(2025, 4, 6, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 513613), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
212
- # {'session_id': ObjectId('6767d0bbad8316ac358def35'), 'title': 'Addressing bias in AI models', 'date': datetime(2025, 4, 13, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 514639), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
213
- # {'session_id': ObjectId('6767d0bbad8316ac358def36'), 'title': 'Regulatory frameworks and guidelines', 'date': datetime(2025, 4, 20, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 514639), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
214
- # {'session_id': ObjectId('6767d0bbad8316ac358def37'), 'title': 'Case studies from various industries (e.g., marketing, healthcare, finance)', 'date': datetime(2025, 4, 27, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 515610), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
215
- # {'session_id': ObjectId('6767d0bbad8316ac358def38'), 'title': 'Success stories and challenges faced by companies using GenAI', 'date': datetime(2025, 5, 4, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 515610), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
216
- # {'session_id': ObjectId('6767d0bbad8316ac358def39'), 'title': 'Guidelines for developing a GenAI project', 'date': datetime(2025, 5, 11, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 516614), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
217
- # {'session_id': ObjectId('6767d0bbad8316ac358def3a'), 'title': 'Tools and resources for project implementation', 'date': datetime(2025, 5, 18, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 516614), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
218
- # {'session_id': ObjectId('6767d0bbad8316ac358def3b'), 'title': 'Best practices for testing and deployment', 'date': datetime(2025, 5, 25, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 517563), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}}
219
- # ]
220
-
221
- # small_sample_sessions = [
222
- # {'session_id': ObjectId('6767d0bbad8316ac358def25'), 'title': 'What is Generative AI?', 'date': datetime(2024, 12, 22, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
223
- # {'session_id': ObjectId('6767d0bbad8316ac358def26'), 'title': 'History and Evolution of AI', 'date': datetime(2024, 12, 29, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
224
- # ]
225
-
226
-
227
- # print(all_sessions)
228
-
229
- print("Number of sessions:", len(all_sessions))
230
- # Create course document
231
- # course_description = course_plan_json['course_description']
232
- # course_doc = {
233
- # "course_id": get_new_course_id(),
234
- # "title": course_name,
235
- # "description": course_description,
236
- # "faculty": faculty_name,
237
- # "faculty_id": faculty_id,
238
- # "duration": f"{duration_weeks} weeks",
239
- # "created_at": datetime.utcnow(),
240
- # "sessions": all_sessions
241
- # }
242
- # try:
243
- # courses_collection.insert_one(course_doc)
244
- # except Exception as e:
245
- # st.error(f"Failed to insert course data into the database: {e}")
246
-
247
- # print(course_plan)
248
-
249
- def create_session(title: str, date: datetime, module_name: str):
250
- """Create a session document with pre-class, in-class, and post-class components."""
251
- return {
252
- "session_id": ObjectId(),
253
- "title": title,
254
- "date": date,
255
- "status": "upcoming",
256
- "created_at": datetime.utcnow(),
257
- "pre_class": {
258
- "resources": [],
259
- "completion_required": True
260
- },
261
- "in_class": {
262
- "quiz": [],
263
- "polls": []
264
- },
265
- "post_class": {
266
- "assignments": []
267
- }
268
- }
269
-
270
- # Usage example:
271
- if __name__ == "__main__":
272
  create_course("Introduction to Data Analytics", datetime.now(), 2)
 
1
+ from datetime import datetime, timedelta
2
+ import os
3
+ from typing import Dict, List, Any
4
+ from pymongo import MongoClient
5
+ import requests
6
+ import uuid
7
+ import openai
8
+ from openai import OpenAI
9
+ import streamlit as st
10
+ from bson import ObjectId
11
+ from dotenv import load_dotenv
12
+ import json
13
+
14
+ load_dotenv()
15
+ MONGODB_URI = os.getenv("MONGO_URI")
16
+ PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_KEY")
17
+ OPENAI_API_KEY = os.getenv("OPENAI_KEY")
18
+
19
+ client = MongoClient(MONGODB_URI)
20
+ db = client['novascholar_db']
21
+ courses_collection = db['courses']
22
+
23
+ def generate_perplexity_response(api_key, course_name):
24
+ headers = {
25
+ "accept": "application/json",
26
+ "content-type": "application/json",
27
+ "authorization": f"Bearer {api_key}"
28
+ }
29
+
30
+ prompt = f"""
31
+ You are an expert educational AI assistant specializing in curriculum design and instructional planning. Your task is to generate comprehensive, academically rigorous course structures for undergraduate level education.
32
+
33
+ Please generate a detailed course structure for the course {course_name} in JSON format following these specifications:
34
+
35
+ 1. The course structure should be appropriate for a full semester (14-16 weeks)
36
+ 2. Each module should be designed for 2-4 weeks of instruction
37
+ 3. Follow standard academic practices and nomenclature
38
+ 4. Ensure progressive complexity from foundational to advanced concepts
39
+ 5. The course_title should exactly match the course name provided in the prompt. No additional information should be included in the course_title field.
40
+ 6: Ensure that the property names are enclosed in double quotes (") and followed by a colon (:), and the values are enclosed in double quotes (").
41
+ 7. **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
42
+
43
+
44
+ The JSON response should follow this structure:
45
+ {{
46
+ "course_title": "string",
47
+ "course_description": "string",
48
+ "modules": [
49
+ {{
50
+ "module_title": "string",
51
+ "sub_modules": [
52
+ {{
53
+ "title": "string",
54
+ "topics": [string],
55
+ }}
56
+ ]
57
+ }}
58
+ ]
59
+ }}
60
+
61
+ Example response:
62
+ {{
63
+ "course_title": "Advanced Natural Language Processing",
64
+ "course_descriptio": "An advanced course covering modern approaches to NLP using deep learning, with focus on transformer architectures and their applications.",
65
+ "modules": [
66
+ {{
67
+ "module_title": "Foundations of Modern NLP",
68
+ "sub_modules": [
69
+ {{
70
+ "title": "Attention Mechanism",
71
+ "topics": [
72
+ "Self-attention",
73
+ "Multi-head attention",
74
+ "Positional encoding"
75
+ ]
76
+ }}
77
+ ]
78
+ }}
79
+ ]
80
+ }}
81
+ """
82
+
83
+ messages = [
84
+ {
85
+ "role": "system",
86
+ "content": (
87
+ "You are an expert educational AI assistant specializing in course design and curriculum planning. "
88
+ "Your task is to generate accurate, detailed, and structured educational content for undergraduate-level and post-graduate-level courses. "
89
+ "Provide detailed and accurate information tailored to the user's prompt."
90
+ "Ensure that the responses are logical, follow standard academic practices, and include realistic concepts relevant to the course."
91
+ ),
92
+ },
93
+ {
94
+ "role": "user",
95
+ "content": prompt
96
+ },
97
+ ]
98
+ try:
99
+ client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
100
+ response = client.chat.completions.create(
101
+ model="llama-3.1-sonar-small-128k-online",
102
+ messages=messages
103
+ )
104
+ content = response.choices[0].message.content
105
+ return content
106
+ except Exception as e:
107
+ st.error(f"Failed to fetch data from Perplexity API: {e}")
108
+ return ""
109
+
110
+ def get_new_course_id():
111
+ """Generate a new course ID by incrementing the last course ID"""
112
+ last_course = courses_collection.find_one(sort=[("course_id", -1)])
113
+ if last_course:
114
+ last_course_id = int(last_course["course_id"][2:])
115
+ new_course_id = f"CS{last_course_id + 1}"
116
+ else:
117
+ new_course_id = "CS101"
118
+ return new_course_id
119
+
120
+
121
+ def create_course(course_name, start_date, duration_weeks):
122
+ # Generate course overview
123
+ # overview_prompt = f"""Generate an overview for the undergraduate course {course_name}
124
+ # Include all relevant concepts and key topics covered in a typical curriculum.
125
+ # The response should be concise (300-400 words). Ensure that your response is in a valid JSON format."""
126
+
127
+ # overview_prompt2 = f"""Generate an overview for the undergraduate course {course_name}.
128
+ # The overview should include:
129
+ # The course title, a detailed course description,
130
+ # a division of all relevant concepts and key topics into 4-6 logical modules,
131
+ # capturing the flow and structure of a typical curriculum.
132
+ # Ensure the response adheres to the following JSON format:
133
+ # {{
134
+ # 'overview': 'string',
135
+ # 'modules': [
136
+ # {{
137
+ # 'name': 'string',
138
+ # 'description': 'string'
139
+ # }}
140
+ # ]
141
+ # }}
142
+ # overview: A detailed description of the course.
143
+ # modules: An array of 4-6 objects, each representing a logical module with a name and a brief description
144
+ # **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}"""
145
+
146
+ # course_overview = generate_perplexity_response(PERPLEXITY_API_KEY, overview_prompt2)
147
+ # # print(course_overview)
148
+ # course_overview_store = course_overview
149
+ # # print(course_overview_store)
150
+ # # Generate modules
151
+ # # modules_prompt = f"Based on this overview: {course_overview}\nCreate 4-6 logical modules for the course, each module should group related concepts and each module may include reference books if applicable"
152
+ # sub_modules_prompt = f"""Using the provided modules in the overview {course_overview_store}, generate 2-3 submodules for each module.
153
+ # Each submodule should represent a cohesive subset of the module's topics, logically organized for teaching purposes.
154
+ # Ensure the response adheres to the following JSON format:
155
+ # {
156
+ # 'modules': [
157
+ # {
158
+ # 'name': 'string',
159
+ # 'sub_modules': [
160
+ # {
161
+ # 'name': 'string',
162
+ # 'description': 'string'
163
+ # }
164
+ # ]
165
+ # }
166
+ # ]
167
+ # }
168
+ # modules: An array where each object contains the name of the module and its corresponding sub_modules.
169
+ # sub_modules: An array of 2-3 objects for each module, each having a name and a brief description."
170
+ # **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}
171
+ # """
172
+ # sub_modules = generate_perplexity_response(PERPLEXITY_API_KEY, sub_modules_prompt)
173
+
174
+ # # modules_response = generate_perplexity_response(modules_prompt)
175
+ # print(sub_modules)
176
+
177
+ # total_sessions = duration_weeks * sessions_per_week
178
+
179
+ course_plan = generate_perplexity_response(PERPLEXITY_API_KEY, course_name)
180
+ course_plan_json = json.loads(course_plan)
181
+
182
+ # Generate sessions for each module
183
+ all_sessions = []
184
+ for module in course_plan_json['modules']:
185
+ for sub_module in module['sub_modules']:
186
+ for topic in sub_module['topics']:
187
+ session = create_session(
188
+ title=topic,
189
+ date=start_date,
190
+ module_name=module['module_title']
191
+ )
192
+ # print(session)
193
+ all_sessions.append(session)
194
+ start_date += timedelta(days=7) # Next session after a week
195
+
196
+ # sample_sessions = [
197
+ # {'session_id': ObjectId('6767d0bbad8316ac358def25'), 'title': 'What is Generative AI?', 'date': datetime(2024, 12, 22, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
198
+ # {'session_id': ObjectId('6767d0bbad8316ac358def26'), 'title': 'History and Evolution of AI', 'date': datetime(2024, 12, 29, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
199
+ # {'session_id': ObjectId('6767d0bbad8316ac358def27'), 'title': 'Types of Generative AI (e.g., GANs, VAEs, LLMs)', 'date': datetime(2025, 1, 5, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 505626), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
200
+ # {'session_id': ObjectId('6767d0bbad8316ac358def28'), 'title': 'Overview of popular GenAI tools (e.g., ChatGPT, Claude, Google Gemini)', 'date': datetime(2025, 1, 12, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 506559), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
201
+ # {'session_id': ObjectId('6767d0bbad8316ac358def29'), 'title': 'Frameworks for building GenAI models (e.g., TensorFlow, PyTorch)', 'date': datetime(2025, 1, 19, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 506559), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
202
+ # {'session_id': ObjectId('6767d0bbad8316ac358def2a'), 'title': 'Integration with other AI technologies', 'date': datetime(2025, 1, 26, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 507612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
203
+ # {'session_id': ObjectId('6767d0bbad8316ac358def2b'), 'title': 'Text-to-text models (e.g., GPT-3, BERT)', 'date': datetime(2025, 2, 2, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 508512), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
204
+ # {'session_id': ObjectId('6767d0bbad8316ac358def2c'), 'title': 'Text generation for content creation and marketing', 'date': datetime(2025, 2, 9, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 508512), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
205
+ # {'session_id': ObjectId('6767d0bbad8316ac358def2d'), 'title': 'Chatbots and conversational interfaces', 'date': datetime(2025, 2, 16, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 509612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
206
+ # {'session_id': ObjectId('6767d0bbad8316ac358def2e'), 'title': 'Generative Adversarial Networks (GANs)', 'date': datetime(2025, 2, 23, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 509612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
207
+ # {'session_id': ObjectId('6767d0bbad8316ac358def2f'), 'title': 'Variational Autoencoders (VAEs)', 'date': datetime(2025, 3, 2, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 510612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
208
+ # {'session_id': ObjectId('6767d0bbad8316ac358def30'), 'title': 'Applications in art, design, and media', 'date': datetime(2025, 3, 9, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 511497), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
209
+ # {'session_id': ObjectId('6767d0bbad8316ac358def31'), 'title': 'Understanding prompt design principles', 'date': datetime(2025, 3, 16, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 511497), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
210
+ # {'session_id': ObjectId('6767d0bbad8316ac358def33'), 'title': 'Advanced techniques for fine-tuning models', 'date': datetime(2025, 3, 30, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 512514), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
211
+ # {'session_id': ObjectId('6767d0bbad8316ac358def34'), 'title': 'Ethical implications of AI-generated content', 'date': datetime(2025, 4, 6, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 513613), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
212
+ # {'session_id': ObjectId('6767d0bbad8316ac358def35'), 'title': 'Addressing bias in AI models', 'date': datetime(2025, 4, 13, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 514639), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
213
+ # {'session_id': ObjectId('6767d0bbad8316ac358def36'), 'title': 'Regulatory frameworks and guidelines', 'date': datetime(2025, 4, 20, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 514639), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
214
+ # {'session_id': ObjectId('6767d0bbad8316ac358def37'), 'title': 'Case studies from various industries (e.g., marketing, healthcare, finance)', 'date': datetime(2025, 4, 27, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 515610), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
215
+ # {'session_id': ObjectId('6767d0bbad8316ac358def38'), 'title': 'Success stories and challenges faced by companies using GenAI', 'date': datetime(2025, 5, 4, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 515610), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
216
+ # {'session_id': ObjectId('6767d0bbad8316ac358def39'), 'title': 'Guidelines for developing a GenAI project', 'date': datetime(2025, 5, 11, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 516614), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
217
+ # {'session_id': ObjectId('6767d0bbad8316ac358def3a'), 'title': 'Tools and resources for project implementation', 'date': datetime(2025, 5, 18, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 516614), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
218
+ # {'session_id': ObjectId('6767d0bbad8316ac358def3b'), 'title': 'Best practices for testing and deployment', 'date': datetime(2025, 5, 25, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 517563), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}}
219
+ # ]
220
+
221
+ # small_sample_sessions = [
222
+ # {'session_id': ObjectId('6767d0bbad8316ac358def25'), 'title': 'What is Generative AI?', 'date': datetime(2024, 12, 22, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
223
+ # {'session_id': ObjectId('6767d0bbad8316ac358def26'), 'title': 'History and Evolution of AI', 'date': datetime(2024, 12, 29, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
224
+ # ]
225
+
226
+
227
+ # print(all_sessions)
228
+
229
+ print("Number of sessions:", len(all_sessions))
230
+ # Create course document
231
+ # course_description = course_plan_json['course_description']
232
+ # course_doc = {
233
+ # "course_id": get_new_course_id(),
234
+ # "title": course_name,
235
+ # "description": course_description,
236
+ # "faculty": faculty_name,
237
+ # "faculty_id": faculty_id,
238
+ # "duration": f"{duration_weeks} weeks",
239
+ # "created_at": datetime.utcnow(),
240
+ # "sessions": all_sessions
241
+ # }
242
+ # try:
243
+ # courses_collection.insert_one(course_doc)
244
+ # except Exception as e:
245
+ # st.error(f"Failed to insert course data into the database: {e}")
246
+
247
+ # print(course_plan)
248
+
249
+ def create_session(title: str, date: datetime, module_name: str):
250
+ """Create a session document with pre-class, in-class, and post-class components."""
251
+ return {
252
+ "session_id": ObjectId(),
253
+ "title": title,
254
+ "date": date,
255
+ "status": "upcoming",
256
+ "created_at": datetime.utcnow(),
257
+ "pre_class": {
258
+ "resources": [],
259
+ "completion_required": True
260
+ },
261
+ "in_class": {
262
+ "quiz": [],
263
+ "polls": []
264
+ },
265
+ "post_class": {
266
+ "assignments": []
267
+ }
268
+ }
269
+
270
+ # Usage example:
271
+ if __name__ == "__main__":
272
  create_course("Introduction to Data Analytics", datetime.now(), 2)
create_course2.py CHANGED
@@ -1,331 +1,331 @@
1
- from datetime import datetime, timedelta
2
- import os
3
- from typing import Dict, List, Any
4
- from pymongo import MongoClient
5
- import requests
6
- import uuid
7
- import openai
8
- from openai import OpenAI
9
- import streamlit as st
10
- from bson import ObjectId
11
- from dotenv import load_dotenv
12
- import json
13
-
14
- load_dotenv()
15
- MONGODB_URI = os.getenv("MONGO_URI")
16
- PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_KEY")
17
- OPENAI_API_KEY = os.getenv("OPENAI_KEY")
18
-
19
- client = MongoClient(MONGODB_URI)
20
- db = client['novascholar_db']
21
- courses_collection = db['courses']
22
-
23
- def generate_perplexity_response(api_key, course_name, duration_weeks, sessions_per_week):
24
- headers = {
25
- "accept": "application/json",
26
- "content-type": "application/json",
27
- "authorization": f"Bearer {api_key}"
28
- }
29
-
30
- # Calculate sessions based on duration
31
- total_sessions = duration_weeks * sessions_per_week # Assuming 2 sessions per week
32
-
33
- prompt = f"""
34
- You are an expert educational AI assistant specializing in curriculum design and instructional planning. Your task is to generate a comprehensive, academically rigorous course structure for the course {course_name} that fits exactly within {duration_weeks} weeks with {total_sessions} total sessions ({sessions_per_week} sessions per week).
35
-
36
- Please generate a detailed course structure in JSON format following these specifications:
37
-
38
- 1. The course structure must be designed for exactly {duration_weeks} weeks with {total_sessions} total sessions
39
- 2. Each module should contain an appropriate number of sessions that sum up to exactly {total_sessions}
40
- 3. Each session should be designed for a 1-1.5-hour class duration
41
- 4. Follow standard academic practices and nomenclature
42
- 5. Ensure progressive complexity from foundational to advanced concepts
43
- 6. The course_title should exactly match the course name provided
44
- 7. Ensure that the property names are enclosed in double quotes (") and followed by a colon (:), and the values are enclosed in double quotes (").
45
- 8. **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
46
-
47
- The JSON response should follow this structure:
48
- {{
49
- "course_title": "string",
50
- "course_description": "string",
51
- "total_duration_weeks": {duration_weeks},
52
- "sessions_per_week": {sessions_per_week},
53
- "total_sessions": {total_sessions},
54
- "modules": [
55
- {{
56
- "module_title": "string",
57
- "module_duration_sessions": number,
58
- "sub_modules": [
59
- {{
60
- "title": "string",
61
- "topics": [
62
- {{
63
- "title": "string",
64
- "short_description": "string",
65
- "concise_learning_objectives": ["string"]
66
- }}
67
- ]
68
- }}
69
- ]
70
- }}
71
- ]
72
- }}
73
-
74
- Ensure that:
75
- 1. The sum of all module_duration_sessions equals exactly {total_sessions}
76
- 2. Each topic has clear learning objectives
77
- 3. Topics build upon each other logically
78
- 4. Content is distributed evenly across the available sessions
79
- 5. **This Instruction is Strictly followed: **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.****
80
-
81
- """
82
-
83
- messages = [
84
- {
85
- "role": "system",
86
- "content": (
87
- "You are an expert educational AI assistant specializing in course design and curriculum planning. "
88
- "Your task is to generate accurate, detailed, and structured educational content that precisely fits "
89
- "the specified duration."
90
- ),
91
- },
92
- {
93
- "role": "user",
94
- "content": prompt
95
- },
96
- ]
97
-
98
- try:
99
- client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
100
- response = client.chat.completions.create(
101
- model="llama-3.1-sonar-small-128k-online",
102
- messages=messages
103
- )
104
- content = response.choices[0].message.content
105
-
106
- # Validate session count
107
- course_plan = json.loads(content)
108
- total_planned_sessions = sum(
109
- module.get('module_duration_sessions', 0)
110
- for module in course_plan.get('modules', [])
111
- )
112
-
113
- if abs(total_planned_sessions - total_sessions) > 5:
114
- raise ValueError(f"Generated plan has {total_planned_sessions} sessions, but {total_sessions} were requested")
115
-
116
- return content
117
- except Exception as e:
118
- st.error(f"Failed to fetch data from Perplexity API: {e}")
119
- return ""
120
-
121
- def generate_session_resources(api_key, session_titles: List[str]):
122
- """
123
- Generate relevant resources for each session title separately
124
- """
125
- resources_prompt = f"""
126
- You are an expert educational content curator. For each session title provided, suggest highly relevant and accurate learning resources.
127
- Please provide resources for these sessions: {session_titles}
128
-
129
- For each session, provide resources in this JSON format:
130
- {{
131
- "session_resources": [
132
- {{
133
- "session_title": "string",
134
- "resources": {{
135
- "readings": [
136
- {{
137
- "title": "string",
138
- "url": "string",
139
- "type": "string",
140
- "estimated_read_time": "string"
141
- }}
142
- ],
143
- "books": [
144
- {{
145
- "title": "string",
146
- "author": "string",
147
- "isbn": "string",
148
- "chapters": "string"
149
- }}
150
- ],
151
- "additional_resources": [
152
- {{
153
- "title": "string",
154
- "url": "string",
155
- "type": "string",
156
- "description": "string"
157
- }}
158
- ]
159
- }}
160
- }}
161
- ]
162
- }}
163
-
164
- Guidelines:
165
- 1. Ensure all URLs are real and currently active
166
- 2. Prioritize high-quality, authoritative sources
167
- 3. Include 1-2 resources of each type
168
- 5. For readings, include a mix of academic and practical resources. It can exceed to 3-4 readings
169
- 6. Book references should be real, recently published works
170
- 7. Additional resources can include tools, documentation, or practice platforms
171
- 8. Ensure that the property names are enclosed in double quotes (") and followed by a colon (:), and the values are enclosed in double quotes (").
172
- 9. ***NOTE: **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
173
- """
174
-
175
- messages = [
176
- {
177
- "role": "system",
178
- "content": "You are an expert educational content curator, focused on providing accurate and relevant learning resources.",
179
- },
180
- {
181
- "role": "user",
182
- "content": resources_prompt
183
- },
184
- ]
185
-
186
- try:
187
- client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
188
- response = client.chat.completions.create(
189
- model="llama-3.1-sonar-small-128k-online",
190
- messages=messages
191
- )
192
- print("Response is: \n", response.choices[0].message.content)
193
- # try:
194
- # return json.loads(response.choices[0].message.content)
195
- # except json.JSONDecodeError as e:
196
- # st.error(f"Failed to decode JSON response: {e}")
197
- # return None
198
- return response.choices[0].message.content
199
- except Exception as e:
200
- st.error(f"Failed to generate resources: {e}")
201
- return None
202
-
203
- def validate_course_plan(course_plan):
204
- required_fields = ['course_title', 'course_description', 'modules']
205
- if not all(field in course_plan for field in required_fields):
206
- raise ValueError("Invalid course plan structure")
207
-
208
- for module in course_plan['modules']:
209
- if 'module_title' not in module or 'sub_modules' not in module:
210
- raise ValueError("Invalid module structure")
211
-
212
- def create_session(title: str, date: datetime, module_name: str, resources: dict):
213
- """Create a session document with pre-class, in-class, and post-class components."""
214
- return {
215
- "session_id": ObjectId(),
216
- "title": title,
217
- "date": date,
218
- "status": "upcoming",
219
- "created_at": datetime.utcnow(),
220
- "module_name": module_name,
221
- "pre_class": {
222
- "resources": [],
223
- "completion_required": True
224
- },
225
- "in_class": {
226
- "quiz": [],
227
- "polls": []
228
- },
229
- "post_class": {
230
- "assignments": []
231
- },
232
- "external_resources": {
233
- "readings": resources.get("readings", []),
234
- "books": resources.get("books", []),
235
- "additional_resources": resources.get("additional_resources", [])
236
- }
237
- }
238
-
239
- def create_course(course_name: str, start_date: datetime, duration_weeks: int, sessions_per_week: int):
240
- # First generate a course plan using Perplexity API
241
- # course_plan = generate_perplexity_response(PERPLEXITY_API_KEY, course_name, duration_weeks, sessions_per_week)
242
- # course_plan_json = json.loads(course_plan)
243
-
244
- # print("Course Structure is: \n", course_plan_json);
245
-
246
- # Earlier Code:
247
- # Generate sessions for each module with resources
248
- # all_sessions = []
249
- # current_date = start_date
250
-
251
- # for module in course_plan_json['modules']:
252
- # for sub_module in module['sub_modules']:
253
- # for topic in sub_module['topics']:
254
- # session = create_session(
255
- # title=topic['title'],
256
- # date=current_date,
257
- # module_name=module['module_title'],
258
- # resources=topic['resources']
259
- # )
260
- # all_sessions.append(session)
261
- # current_date += timedelta(days=3.5) # Spacing sessions evenly across the week
262
-
263
- # return course_plan_json, all_sessions
264
-
265
- # New Code:
266
- # Extract all session titles
267
- session_titles = []
268
- # Load the course plan JSON
269
- course_plan_json = {}
270
- with open('sample_files/sample_course.json', 'r') as file:
271
- course_plan_json = json.load(file)
272
-
273
- for module in course_plan_json['modules']:
274
- for sub_module in module['sub_modules']:
275
- for topic in sub_module['topics']:
276
- session_titles.append(topic['title'])
277
-
278
- # Generate resources for all sessions
279
- session_resources = generate_session_resources(PERPLEXITY_API_KEY, session_titles)
280
- # print("Session Resources are: \n", session_resources)
281
- resources = json.loads(session_resources)
282
- # print("Resources JSON is: \n", resources_json)
283
-
284
- # print("Session Resources are: \n", session_resources)
285
-
286
- # Create a mapping of session titles to their resources
287
-
288
- # Import Resources JSON
289
- # resources = {}
290
- # with open('sample_files/sample_course_resources.json', 'r') as file:
291
- # resources = json.load(file)
292
-
293
- resources_map = {
294
- resource['session_title']: resource['resources']
295
- for resource in resources['session_resources']
296
- }
297
- print("Resources Map is: \n", resources_map)
298
- # print("Sample is: ", resources_map.get('Overview of ML Concepts, History, and Applications'));
299
- # Generate sessions with their corresponding resources
300
- all_sessions = []
301
- current_date = start_date
302
-
303
- for module in course_plan_json['modules']:
304
- for sub_module in module['sub_modules']:
305
- for topic in sub_module['topics']:
306
- session = create_session(
307
- title=topic['title'],
308
- date=current_date,
309
- module_name=module['module_title'],
310
- resources=resources_map.get(topic['title'], {})
311
- )
312
- all_sessions.append(session)
313
- current_date += timedelta(days=3.5)
314
-
315
- print("All Sessions are: \n", all_sessions)
316
-
317
- def get_new_course_id():
318
- """Generate a new course ID by incrementing the last course ID"""
319
- last_course = courses_collection.find_one(sort=[("course_id", -1)])
320
- if last_course:
321
- last_course_id = int(last_course["course_id"][2:])
322
- new_course_id = f"CS{last_course_id + 1}"
323
- else:
324
- new_course_id = "CS101"
325
- return new_course_id
326
-
327
- # if __name__ == "__main__":
328
- # course_name = "Introduction to Machine Learning"
329
- # start_date = datetime(2022, 9, 1)
330
- # duration_weeks = 4
331
  # create_course(course_name, start_date, duration_weeks, 3)
 
1
+ from datetime import datetime, timedelta
2
+ import os
3
+ from typing import Dict, List, Any
4
+ from pymongo import MongoClient
5
+ import requests
6
+ import uuid
7
+ import openai
8
+ from openai import OpenAI
9
+ import streamlit as st
10
+ from bson import ObjectId
11
+ from dotenv import load_dotenv
12
+ import json
13
+
14
+ load_dotenv()
15
+ MONGODB_URI = os.getenv("MONGO_URI")
16
+ PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_KEY")
17
+ OPENAI_API_KEY = os.getenv("OPENAI_KEY")
18
+
19
+ client = MongoClient(MONGODB_URI)
20
+ db = client['novascholar_db']
21
+ courses_collection = db['courses']
22
+
23
+ def generate_perplexity_response(api_key, course_name, duration_weeks, sessions_per_week):
24
+ headers = {
25
+ "accept": "application/json",
26
+ "content-type": "application/json",
27
+ "authorization": f"Bearer {api_key}"
28
+ }
29
+
30
+ # Calculate sessions based on duration
31
+ total_sessions = duration_weeks * sessions_per_week # Assuming 2 sessions per week
32
+
33
+ prompt = f"""
34
+ You are an expert educational AI assistant specializing in curriculum design and instructional planning. Your task is to generate a comprehensive, academically rigorous course structure for the course {course_name} that fits exactly within {duration_weeks} weeks with {total_sessions} total sessions ({sessions_per_week} sessions per week).
35
+
36
+ Please generate a detailed course structure in JSON format following these specifications:
37
+
38
+ 1. The course structure must be designed for exactly {duration_weeks} weeks with {total_sessions} total sessions
39
+ 2. Each module should contain an appropriate number of sessions that sum up to exactly {total_sessions}
40
+ 3. Each session should be designed for a 1-1.5-hour class duration
41
+ 4. Follow standard academic practices and nomenclature
42
+ 5. Ensure progressive complexity from foundational to advanced concepts
43
+ 6. The course_title should exactly match the course name provided
44
+ 7. Ensure that the property names are enclosed in double quotes (") and followed by a colon (:), and the values are enclosed in double quotes (").
45
+ 8. **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
46
+
47
+ The JSON response should follow this structure:
48
+ {{
49
+ "course_title": "string",
50
+ "course_description": "string",
51
+ "total_duration_weeks": {duration_weeks},
52
+ "sessions_per_week": {sessions_per_week},
53
+ "total_sessions": {total_sessions},
54
+ "modules": [
55
+ {{
56
+ "module_title": "string",
57
+ "module_duration_sessions": number,
58
+ "sub_modules": [
59
+ {{
60
+ "title": "string",
61
+ "topics": [
62
+ {{
63
+ "title": "string",
64
+ "short_description": "string",
65
+ "concise_learning_objectives": ["string"]
66
+ }}
67
+ ]
68
+ }}
69
+ ]
70
+ }}
71
+ ]
72
+ }}
73
+
74
+ Ensure that:
75
+ 1. The sum of all module_duration_sessions equals exactly {total_sessions}
76
+ 2. Each topic has clear learning objectives
77
+ 3. Topics build upon each other logically
78
+ 4. Content is distributed evenly across the available sessions
79
+ 5. **This Instruction is Strictly followed: **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.****
80
+
81
+ """
82
+
83
+ messages = [
84
+ {
85
+ "role": "system",
86
+ "content": (
87
+ "You are an expert educational AI assistant specializing in course design and curriculum planning. "
88
+ "Your task is to generate accurate, detailed, and structured educational content that precisely fits "
89
+ "the specified duration."
90
+ ),
91
+ },
92
+ {
93
+ "role": "user",
94
+ "content": prompt
95
+ },
96
+ ]
97
+
98
+ try:
99
+ client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
100
+ response = client.chat.completions.create(
101
+ model="llama-3.1-sonar-small-128k-online",
102
+ messages=messages
103
+ )
104
+ content = response.choices[0].message.content
105
+
106
+ # Validate session count
107
+ course_plan = json.loads(content)
108
+ total_planned_sessions = sum(
109
+ module.get('module_duration_sessions', 0)
110
+ for module in course_plan.get('modules', [])
111
+ )
112
+
113
+ if abs(total_planned_sessions - total_sessions) > 5:
114
+ raise ValueError(f"Generated plan has {total_planned_sessions} sessions, but {total_sessions} were requested")
115
+
116
+ return content
117
+ except Exception as e:
118
+ st.error(f"Failed to fetch data from Perplexity API: {e}")
119
+ return ""
120
+
121
+ def generate_session_resources(api_key, session_titles: List[str]):
122
+ """
123
+ Generate relevant resources for each session title separately
124
+ """
125
+ resources_prompt = f"""
126
+ You are an expert educational content curator. For each session title provided, suggest highly relevant and accurate learning resources.
127
+ Please provide resources for these sessions: {session_titles}
128
+
129
+ For each session, provide resources in this JSON format:
130
+ {{
131
+ "session_resources": [
132
+ {{
133
+ "session_title": "string",
134
+ "resources": {{
135
+ "readings": [
136
+ {{
137
+ "title": "string",
138
+ "url": "string",
139
+ "type": "string",
140
+ "estimated_read_time": "string"
141
+ }}
142
+ ],
143
+ "books": [
144
+ {{
145
+ "title": "string",
146
+ "author": "string",
147
+ "isbn": "string",
148
+ "chapters": "string"
149
+ }}
150
+ ],
151
+ "additional_resources": [
152
+ {{
153
+ "title": "string",
154
+ "url": "string",
155
+ "type": "string",
156
+ "description": "string"
157
+ }}
158
+ ]
159
+ }}
160
+ }}
161
+ ]
162
+ }}
163
+
164
+ Guidelines:
165
+ 1. Ensure all URLs are real and currently active
166
+ 2. Prioritize high-quality, authoritative sources
167
+ 3. Include 1-2 resources of each type
168
+ 5. For readings, include a mix of academic and practical resources. It can exceed to 3-4 readings
169
+ 6. Book references should be real, recently published works
170
+ 7. Additional resources can include tools, documentation, or practice platforms
171
+ 8. Ensure that the property names are enclosed in double quotes (") and followed by a colon (:), and the values are enclosed in double quotes (").
172
+ 9. ***NOTE: **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
173
+ """
174
+
175
+ messages = [
176
+ {
177
+ "role": "system",
178
+ "content": "You are an expert educational content curator, focused on providing accurate and relevant learning resources.",
179
+ },
180
+ {
181
+ "role": "user",
182
+ "content": resources_prompt
183
+ },
184
+ ]
185
+
186
+ try:
187
+ client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
188
+ response = client.chat.completions.create(
189
+ model="llama-3.1-sonar-small-128k-online",
190
+ messages=messages
191
+ )
192
+ print("Response is: \n", response.choices[0].message.content)
193
+ # try:
194
+ # return json.loads(response.choices[0].message.content)
195
+ # except json.JSONDecodeError as e:
196
+ # st.error(f"Failed to decode JSON response: {e}")
197
+ # return None
198
+ return response.choices[0].message.content
199
+ except Exception as e:
200
+ st.error(f"Failed to generate resources: {e}")
201
+ return None
202
+
203
+ def validate_course_plan(course_plan):
204
+ required_fields = ['course_title', 'course_description', 'modules']
205
+ if not all(field in course_plan for field in required_fields):
206
+ raise ValueError("Invalid course plan structure")
207
+
208
+ for module in course_plan['modules']:
209
+ if 'module_title' not in module or 'sub_modules' not in module:
210
+ raise ValueError("Invalid module structure")
211
+
212
+ def create_session(title: str, date: datetime, module_name: str, resources: dict):
213
+ """Create a session document with pre-class, in-class, and post-class components."""
214
+ return {
215
+ "session_id": ObjectId(),
216
+ "title": title,
217
+ "date": date,
218
+ "status": "upcoming",
219
+ "created_at": datetime.utcnow(),
220
+ "module_name": module_name,
221
+ "pre_class": {
222
+ "resources": [],
223
+ "completion_required": True
224
+ },
225
+ "in_class": {
226
+ "quiz": [],
227
+ "polls": []
228
+ },
229
+ "post_class": {
230
+ "assignments": []
231
+ },
232
+ "external_resources": {
233
+ "readings": resources.get("readings", []),
234
+ "books": resources.get("books", []),
235
+ "additional_resources": resources.get("additional_resources", [])
236
+ }
237
+ }
238
+
239
+ def create_course(course_name: str, start_date: datetime, duration_weeks: int, sessions_per_week: int):
240
+ # First generate a course plan using Perplexity API
241
+ # course_plan = generate_perplexity_response(PERPLEXITY_API_KEY, course_name, duration_weeks, sessions_per_week)
242
+ # course_plan_json = json.loads(course_plan)
243
+
244
+ # print("Course Structure is: \n", course_plan_json);
245
+
246
+ # Earlier Code:
247
+ # Generate sessions for each module with resources
248
+ # all_sessions = []
249
+ # current_date = start_date
250
+
251
+ # for module in course_plan_json['modules']:
252
+ # for sub_module in module['sub_modules']:
253
+ # for topic in sub_module['topics']:
254
+ # session = create_session(
255
+ # title=topic['title'],
256
+ # date=current_date,
257
+ # module_name=module['module_title'],
258
+ # resources=topic['resources']
259
+ # )
260
+ # all_sessions.append(session)
261
+ # current_date += timedelta(days=3.5) # Spacing sessions evenly across the week
262
+
263
+ # return course_plan_json, all_sessions
264
+
265
+ # New Code:
266
+ # Extract all session titles
267
+ session_titles = []
268
+ # Load the course plan JSON
269
+ course_plan_json = {}
270
+ with open('sample_files/sample_course.json', 'r') as file:
271
+ course_plan_json = json.load(file)
272
+
273
+ for module in course_plan_json['modules']:
274
+ for sub_module in module['sub_modules']:
275
+ for topic in sub_module['topics']:
276
+ session_titles.append(topic['title'])
277
+
278
+ # Generate resources for all sessions
279
+ session_resources = generate_session_resources(PERPLEXITY_API_KEY, session_titles)
280
+ # print("Session Resources are: \n", session_resources)
281
+ resources = json.loads(session_resources)
282
+ # print("Resources JSON is: \n", resources_json)
283
+
284
+ # print("Session Resources are: \n", session_resources)
285
+
286
+ # Create a mapping of session titles to their resources
287
+
288
+ # Import Resources JSON
289
+ # resources = {}
290
+ # with open('sample_files/sample_course_resources.json', 'r') as file:
291
+ # resources = json.load(file)
292
+
293
+ resources_map = {
294
+ resource['session_title']: resource['resources']
295
+ for resource in resources['session_resources']
296
+ }
297
+ print("Resources Map is: \n", resources_map)
298
+ # print("Sample is: ", resources_map.get('Overview of ML Concepts, History, and Applications'));
299
+ # Generate sessions with their corresponding resources
300
+ all_sessions = []
301
+ current_date = start_date
302
+
303
+ for module in course_plan_json['modules']:
304
+ for sub_module in module['sub_modules']:
305
+ for topic in sub_module['topics']:
306
+ session = create_session(
307
+ title=topic['title'],
308
+ date=current_date,
309
+ module_name=module['module_title'],
310
+ resources=resources_map.get(topic['title'], {})
311
+ )
312
+ all_sessions.append(session)
313
+ current_date += timedelta(days=3.5)
314
+
315
+ print("All Sessions are: \n", all_sessions)
316
+
317
+ def get_new_course_id():
318
+ """Generate a new course ID by incrementing the last course ID"""
319
+ last_course = courses_collection.find_one(sort=[("course_id", -1)])
320
+ if last_course:
321
+ last_course_id = int(last_course["course_id"][2:])
322
+ new_course_id = f"CS{last_course_id + 1}"
323
+ else:
324
+ new_course_id = "CS101"
325
+ return new_course_id
326
+
327
+ # if __name__ == "__main__":
328
+ # course_name = "Introduction to Machine Learning"
329
+ # start_date = datetime(2022, 9, 1)
330
+ # duration_weeks = 4
331
  # create_course(course_name, start_date, duration_weeks, 3)
create_course3.py ADDED
@@ -0,0 +1,609 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime, timedelta
2
+ import os
3
+ from typing import Dict, List, Any
4
+ from pymongo import MongoClient
5
+ import requests
6
+ import uuid
7
+ import openai
8
+ from openai import OpenAI
9
+ import streamlit as st
10
+ from bson import ObjectId
11
+ from dotenv import load_dotenv
12
+ import json
13
+ import google.generativeai as genai
14
+ from mistralai import Mistral
15
+
16
+ load_dotenv()
17
+ MONGODB_URI = os.getenv("MONGODB_URI")
18
+ PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
19
+ OPENAI_API_KEY = os.getenv("OPENAI_KEY")
20
+ GEMINI_API_KEY = os.getenv("GEMINI_KEY")
21
+ MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
22
+
23
+ client = MongoClient(MONGODB_URI)
24
+ db = client['novascholar_db']
25
+ courses_collection = db['courses']
26
+
27
+ genai.configure(api_key=GEMINI_API_KEY)
28
+ model = genai.GenerativeModel("gemini-1.5-flash")
29
+
30
+
31
+ def generate_course_outcomes(api_key, course_name, duration_weeks, sessions_per_week):
32
+ prompt = f"""
33
+ You are an expert educational AI assistant specializing in curriculum design and instructional planning. Your task is to generate a comprehensive, academically rigorous set of Course Learning Outcomes (CLOs) for the course {course_name}. These CLOs will serve as a foundation for instructional design and assessment planning.
34
+
35
+ Please generate a detailed list of CLOs in JSON format following these specifications:
36
+
37
+ 1. The CLOs should be clear, concise, and aligned with Bloom's Taxonomy, progressively covering lower-order to higher-order cognitive skills.
38
+ 2. Each CLO must explicitly define the skills, knowledge, or abilities the student is expected to acquire upon completing the course.
39
+ 3. The CLOs must align with the overall course objective and encompass foundational to advanced concepts.
40
+ 4. Use academic language appropriate for higher education or professional training.
41
+ 5. Ensure the CLOs are measurable and actionable.
42
+ 6. **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
43
+
44
+ The JSON response should follow this structure:
45
+ {{
46
+ "course_title": "string",
47
+ "course_description": "string",
48
+ "learning_outcomes": [
49
+ {{
50
+ "outcome_number": CO + number,
51
+ "outcome_description": "string",
52
+ "aligned_blooms_taxonomy_level": "string"
53
+ }}
54
+ ]
55
+ }}
56
+
57
+ Ensure that:
58
+ 1. Each outcome has a unique outcome_number starting from 1.
59
+ 2. The aligned_blooms_taxonomy_level must be one of the following: "Remember", "Understand", "Apply", "Analyze", "Evaluate", or "Create".
60
+ 3. The total number of CLOs should appropriately cover the breadth and depth of the course content, typically 5-7 CLOs.
61
+ 4. **This Instruction is Strictly followed: DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
62
+
63
+ """
64
+
65
+ messages = [
66
+ {
67
+ "role": "system",
68
+ "content": (
69
+ "You are an expert educational AI assistant specializing in course design and curriculum planning. "
70
+ "Your task is to generate accurate, detailed, and structured educational content that precisely fits "
71
+ "the specified requirements."
72
+ ),
73
+ },
74
+ {
75
+ "role": "user",
76
+ "content": prompt
77
+ },
78
+ ]
79
+ response = model.generate_content(
80
+ prompt,
81
+ generation_config=genai.GenerationConfig(
82
+ response_mime_type="application/json"
83
+ )
84
+ )
85
+ try:
86
+ response_json = json.loads(response.text)
87
+ return response.text;
88
+ except json.JSONDecodeError as e:
89
+ print("Error decoding COs JSON response:", e)
90
+
91
+ def generate_module_outcomes(course_name, course_outcomes, duration_weeks, sessions_per_week):
92
+ total_sessions = duration_weeks * sessions_per_week
93
+ prompt = f"""
94
+ You are an expert educational AI assistant specializing in curriculum design and instructional planning. Your task is to break down the provided Course Learning Outcomes (CLOs) for the course {course_name} into logically structured modules and corresponding Module Learning Outcomes (MLOs). The structure must fit exactly within {duration_weeks} weeks with {total_sessions} total sessions ({sessions_per_week} sessions per week). Each module will be designed to align with specific CLOs and distribute content evenly across the available sessions.
95
+
96
+ Here are the Course Learning Outcomes (CLOs) for the course {course_name} in JSON format:
97
+ {course_outcomes}
98
+
99
+ Please generate the module structure in JSON format following these specifications:
100
+
101
+ 1. Break the CLOs into logically grouped **modules**, ensuring that each module has a clear focus and progresses from foundational to advanced concepts.
102
+ 2. Each module must include:
103
+ - A **module title** summarizing its focus.
104
+ - A list of aligned CLOs that are covered within the module.
105
+ - Module Learning Outcomes (MLOs) that are measurable and actionable, aligned with the CLOs.
106
+ - The number of sessions allocated to the module (module_duration_sessions), such that the total sessions across all modules sum up to {total_sessions}.
107
+ 3. Ensure that the module_duration_sessions are evenly distributed while allowing for some variation based on the complexity of the module.
108
+ 4. Progressively distribute content so that earlier modules cover foundational concepts, and later modules cover advanced topics.
109
+ 5. The number of sessions allocated to each module must reflect the relative depth and complexity of its content.
110
+ 6. Ensure all modules fit within {duration_weeks} weeks and {sessions_per_week} sessions per week.
111
+
112
+ The JSON response should follow this structure:
113
+ {{
114
+ "course_title": "string",
115
+ "course_description": "string",
116
+ "total_duration_weeks": {duration_weeks},
117
+ "sessions_per_week": {sessions_per_week},
118
+ "total_sessions": {total_sessions},
119
+ "modules": [
120
+ {{
121
+ "module_title": "string",
122
+ "module_duration_sessions": number,
123
+ "aligned_CLOs": ["CLO1", "CLO2", ...],
124
+ "module_learning_outcomes": [
125
+ {{
126
+ "outcome_number": "MLO + number",
127
+ "outcome_description": "string",
128
+ "aligned_blooms_taxonomy_level": "string"
129
+ }}
130
+ ]
131
+ }}
132
+ ]
133
+ }}
134
+
135
+ Ensure that:
136
+ 1. The sum of all module_duration_sessions equals exactly {total_sessions}.
137
+ 2. Each MLO is aligned with its respective CLOs and measurable within the allocated sessions.
138
+ 3. Modules are well-distributed and follow a logical progression of topics.
139
+ 4. **This Instruction is Strictly followed: DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
140
+ """
141
+ response = model.generate_content(
142
+ prompt,
143
+ generation_config=genai.GenerationConfig(
144
+ response_mime_type="application/json"
145
+ )
146
+ )
147
+ try:
148
+ response_json = json.loads(response.text)
149
+ return response.text
150
+ except json.JSONDecodeError as e:
151
+ print("Error decoding Modules JSON response:", e)
152
+
153
+ def generate_submodule_outcomes(course_name, course_outcomes, module_outcomes, duration_weeks, sessions_per_week):
154
+ prompt = f"""
155
+ You are an expert educational AI assistant specializing in instructional design. Your task is to further break down each module from the given course structure into submodules. Each submodule will cover specific concepts or topics within the module, along with corresponding Submodule Learning Outcomes (SMLOs).
156
+
157
+ Here are the Course Learning Outcomes (CLOs) for the course {course_name}:
158
+ {course_outcomes}
159
+ and the Module Learning Outcomes (MLOs) for each module:
160
+ {module_outcomes}
161
+
162
+ Please follow these guidelines for creating submodules and SMLOs:
163
+
164
+ 1. For each module, create 2-3 submodules depending on its scope and duration.
165
+ 2. Assign each submodule a clear, concise title summarizing its focus.
166
+ 3. Each submodule must align with at least one Module Learning Outcome (MLO) and, by extension, its parent CLO(s).
167
+ 4. For each submodule, define 1-2 Submodule Learning Outcomes (SMLOs) that are measurable, actionable, and aligned with Bloom's Taxonomy.
168
+ 5. Distribute the total allocated sessions (module_duration_sessions) evenly among submodules, allowing slight variations for complex topics.
169
+ 6. Ensure that submodules progress logically within the module, starting with foundational concepts and advancing to more complex topics.
170
+ 7. Align the submodules with the total sessions allocated to the module to ensure they fit within the course timeline.
171
+
172
+ The JSON response should follow this structure:
173
+ {{
174
+ "module_title": "string",
175
+ "submodules": [
176
+ {{
177
+ "submodule_title": "string",
178
+ "submodule_duration_sessions": number,
179
+ "aligned_MLOs": ["MLO1", "MLO2", ...],
180
+ "submodule_learning_outcomes": [
181
+ {{
182
+ "outcome_number": "SMLO + number",
183
+ "outcome_description": "string",
184
+ "aligned_blooms_taxonomy_level": "string"
185
+ }}
186
+ ]
187
+ }}
188
+ ]
189
+ }}
190
+
191
+ Ensure that:
192
+ 1. The sum of all submodule_duration_sessions within a module equals the module's allocated sessions (module_duration_sessions).
193
+ 2. SMLOs are specific, measurable, and actionable, aligning with their respective MLOs and CLOs.
194
+ 3. Submodules are logically ordered, with earlier submodules focusing on foundational concepts and later ones covering advanced topics.
195
+ 4. **This Instruction is Strictly followed: DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
196
+ """
197
+ response = model.generate_content(
198
+ prompt,
199
+ generation_config=genai.GenerationConfig(
200
+ response_mime_type="application/json"
201
+ )
202
+ )
203
+ try:
204
+ parse_model_response(response.text)
205
+ return response.text
206
+ except json.JSONDecodeError as e:
207
+ print("Error decoding Submodules JSON response:", e)
208
+
209
+ import json
210
+ import ast
211
+ import re
212
+ import time
213
+ def parse_model_response(response_text):
214
+ """Enhanced parser for model responses with better error handling.
215
+
216
+ Args:
217
+ response_text (str): Raw response text from the model
218
+
219
+ Returns:
220
+ dict or list: Parsed response object
221
+
222
+ Raises:
223
+ ValueError: If parsing fails
224
+ """
225
+
226
+ # Remove markdown formatting and whitespace
227
+ cleaned_text = re.sub(r'```[a-zA-Z]*\n', '', response_text)
228
+ cleaned_text = cleaned_text.replace('```', '').strip()
229
+
230
+ # Try multiple parsing methods
231
+ parsing_methods = [
232
+ # Method 1: Direct JSON parsing
233
+ lambda x: json.loads(x),
234
+
235
+ # Method 2: AST literal evaluation
236
+ lambda x: ast.literal_eval(x),
237
+
238
+ # Method 3: Extract and parse content between curly braces
239
+ lambda x: json.loads(re.search(r'\{.*\}', x, re.DOTALL).group()),
240
+
241
+ # Method 4: Extract and parse content between square brackets
242
+ lambda x: json.loads(re.search(r'\[.*\]', x, re.DOTALL).group()),
243
+
244
+ # Method 5: Try to fix common JSON formatting issues and parse
245
+ lambda x: json.loads(x.replace("'", '"').replace('\n', '\\n'))
246
+ ]
247
+
248
+ last_error = None
249
+ for parse_method in parsing_methods:
250
+ try:
251
+ result = parse_method(cleaned_text)
252
+ if result: # Ensure we have actual content
253
+ return result
254
+ except Exception as e:
255
+ last_error = e
256
+ continue
257
+
258
+ raise ValueError(f"Could not parse the model's response: {last_error}")
259
+
260
+ def extract_session_titles_concepts(session_data):
261
+ """Extracts session titles and key concepts from the session data.
262
+
263
+ Args:
264
+ session_data (dict): Parsed JSON data containing session information
265
+
266
+ Returns:
267
+ list: List of dictionaries with session titles and key concepts
268
+ """
269
+ session_info = []
270
+ for module in session_data.get("submodules", []):
271
+ for session in module.get("sessions", []):
272
+ session_info.append({
273
+ "title": session.get("session_title", ""),
274
+ "key_concepts": session.get("key_concepts", [])
275
+ })
276
+ return session_info
277
+
278
+ def generate_session_outcomes(course_name, course_outcomes, module_outcomes, submodule_outcomes, duration_weeks, sessions_per_week):
279
+ prompt = f"""
280
+ You are an expert educational AI assistant specializing in instructional design and assessment. Your task is to create highly focused and measurable Session-Level Learning Outcomes (SLOs) that are aligned with their parent Submodule Learning Outcomes (SMLOs) and ready to serve as the foundation for rubric-based evaluations.
281
+
282
+ ### Context:
283
+ Course Name: {course_name}
284
+ Course Outcomes (CLOs): {course_outcomes}
285
+ Module Outcomes (MLOs): {module_outcomes}
286
+ Submodule Outcomes (SMLOs): {submodule_outcomes}
287
+
288
+ ### Instructions for SLO Generation:
289
+ 1. **For Each Submodule**: Break down its allocated sessions (submodule_duration_sessions) into Session-Level Learning Outcomes (SLOs) that:
290
+ - Are immediately actionable, measurable, and achievable within a single session (60-90 minutes).
291
+ - Are directly aligned with their parent SMLO, contributing to its achievement.
292
+ - Include observable behaviors or outputs using **action-oriented verbs** from Bloom's Taxonomy (e.g., analyze, demonstrate, create, justify).
293
+ - Are specific and detailed enough to support rubric development.
294
+
295
+ 2. **Structure for Each Session**:
296
+ - **Session Title**: A concise and clear session title that captures its focus.
297
+ - **Prerequisites**: Any prerequisite knowledge or skills required.
298
+ - **Key Concepts**: Specific concepts or skills that will be covered.
299
+ - **Session Learning Outcomes (SLOs)**: Include 2-3 outcomes that:
300
+ - Define precise tasks or objectives for the session.
301
+ - Specify the expected level of understanding, skill, or performance.
302
+ - Directly support the parent SMLO while promoting progressive learning.
303
+
304
+ 3. **Progressive Learning**: Ensure that earlier sessions address foundational knowledge, while later sessions build on this foundation, leading to higher-order skills and integration of concepts.
305
+
306
+ 4. **Output Format**: Ensure the output follows this strict JSON structure:
307
+
308
+ "submodules": [
309
+ {{
310
+ "submodule_title": "string",
311
+ "sessions": [
312
+ {{
313
+ "session_number": number,
314
+ "session_title": "string",
315
+ "prerequisites": ["string"],
316
+ "key_concepts": ["string"],
317
+ "session_learning_outcomes": [
318
+ {{
319
+ "outcome_number": "SLO + number",
320
+ "outcome_description": "string",
321
+ "aligned_smlo": "SMLO + number",
322
+ "bloom_taxonomy_level": "string"
323
+ }}
324
+ ]
325
+ }}
326
+ ]
327
+ }}
328
+ ]
329
+
330
+ ### Example of Rubric-Ready SLOs:
331
+ - **Poor Example**: "Understand agile methodologies."
332
+ - **Good Example**: "Identify and describe the key principles of the Agile Manifesto, providing examples of how each principle applies to software development."
333
+
334
+ **Important Instructions**:
335
+ 1. Ensure every SLO can be directly translated into rubric criteria (e.g., clarity, accuracy, application).
336
+ 2. **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT USE BACKTICKS (```), AND DO NOT INCLUDE ANY OTHER TEXT EXCEPT THE JSON RESPONSE. START WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
337
+ 3. Make sure every SLO is relevant to its parent SMLO.
338
+ """
339
+
340
+ response = model.generate_content(
341
+ prompt,
342
+ generation_config=genai.GenerationConfig(
343
+ response_mime_type="application/json"
344
+ )
345
+ )
346
+ try:
347
+ parse_model_response(response.text)
348
+ return response.text
349
+ except json.JSONDecodeError as e:
350
+ print("Error decoding Session Learning Outcomes JSON response:", e)
351
+
352
+
353
+ def merge_course_structure(cos, module_outcomes, submodules_los, sample_sessions_se):
354
+ # Load JSON data
355
+ cos_data = cos
356
+ module_outcomes_data = module_outcomes
357
+ submodules_los_data = submodules_los
358
+ sample_sessions_se_data = sample_sessions_se
359
+
360
+ # Create a mapping of submodule titles to their sessions
361
+ submodule_sessions_map = {}
362
+ for submodule in sample_sessions_se_data['submodules']:
363
+ submodule_sessions_map[submodule['submodule_title']] = submodule['sessions']
364
+
365
+ # Create a mapping of module titles to their submodules
366
+ module_submodules_map = {}
367
+ for module in submodules_los_data['modules']:
368
+ module_submodules_map[module['module_title']] = module['submodules']
369
+
370
+ # Merge submodules into modules
371
+ for module in module_outcomes_data['modules']:
372
+ module_title = module['module_title']
373
+ if module_title in module_submodules_map:
374
+ submodules = module_submodules_map[module_title]
375
+ for submodule in submodules:
376
+ submodule_title = submodule['submodule_title']
377
+ if submodule_title in submodule_sessions_map:
378
+ submodule['sessions'] = submodule_sessions_map[submodule_title]
379
+ module['submodules'] = submodules
380
+
381
+ # Merge modules into course structure
382
+ course_structure = cos_data
383
+ course_structure['modules'] = module_outcomes_data['modules']
384
+
385
+ return course_structure
386
+
387
+ def generate_session_resources(api_key, course_title, session_titles: List[str]):
388
+ """
389
+ Generate relevant resources for each session title separately
390
+ """
391
+ resources_prompt = f"""
392
+ You are an expert educational content curator with deep knowledge of instructional design and high-quality resource selection. Your task is to provide session-specific learning resources and course-level reference books for the course: {course_title}.
393
+
394
+ Guidelines for Resource Curation:
395
+ 1. For each session, suggest **highly relevant and accurate learning resources** based on the session title and key concepts provided.
396
+ 2. For the course as a whole, provide at most two **top reference books** that comprehensively cover the course objectives, including both academic and practical perspectives.
397
+ 3. Resources can include:
398
+ - **Web articles or blogs** (ensure they are from authoritative and credible sources)
399
+ - **Videos** (e.g., YouTube or other educational platforms)
400
+ - **PDFs, PPTs, or other downloadable formats**
401
+ - **Official documentation** for tools, platforms, or technologies
402
+ 4. Provide **multiple resources per session**, tailored to the topic's depth and complexity. Collectively, the number should not exceed 3.
403
+ 5. **IMPORTANT: MAKE SURE READINGS AND VIDEOS ARE GIVEN SEPARATELY. READINGS SHOULD NOT CONTAIN VIDEOS, IT SHOULD ONLY CONTAIN READING MATERIAL AND VICE-VERSA FOR VIDEOS**
404
+ 6. Ensure all URLs are **active and accessible**. Resources must be up-to-date, and links should work reliably.
405
+ 7. Reference books for the course should be **real, recently published works** and relevant to the course-level outcomes.
406
+
407
+ Output Format:
408
+ {{
409
+ "course_reference_books": [
410
+ {{
411
+ "title": "string",
412
+ "author": "string",
413
+ "publisher": "string",
414
+ "year": number,
415
+ "description": "string"
416
+ }}
417
+ ],
418
+ "session_resources": [
419
+ {{
420
+ "session_title": "string",
421
+ "resources": {{
422
+ "readings": [
423
+ {{
424
+ "title": "string",
425
+ "url": "string",
426
+ "type": "string",
427
+ "estimated_read_time": "string"
428
+ }}
429
+ ],
430
+ "videos": [
431
+ {{
432
+ "title": "string",
433
+ "url": "string",
434
+ "type": "string",
435
+ "duration": "string"
436
+ }}
437
+ ]
438
+ }}
439
+ }}
440
+ ]
441
+ }}
442
+
443
+ Additional Instructions:
444
+ - Ensure **property names are enclosed in double quotes (")** and values are properly formatted.
445
+ - Reference books should include **a brief description** to explain why they are relevant to the course.
446
+ - Responses should be concise, structured, and focused exclusively on the requested information.
447
+ - ***IMPORTANT: DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.***
448
+
449
+ Here are the session titles and key concepts for which you need to generate resources: {session_titles}.
450
+ """
451
+
452
+ messages = [
453
+ {
454
+ "role": "system",
455
+ "content": "You are an expert educational content curator, focused on providing accurate and relevant learning resources.",
456
+ },
457
+ {
458
+ "role": "user",
459
+ "content": resources_prompt
460
+ },
461
+ ]
462
+
463
+ try:
464
+ client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
465
+ response = client.chat.completions.create(
466
+ model="llama-3.1-sonar-small-128k-online",
467
+ messages=messages
468
+ )
469
+ print("Response is: \n", response.choices[0].message.content)
470
+ # try:
471
+ # return json.loads(response.choices[0].message.content)
472
+ # except json.JSONDecodeError as e:
473
+ # st.error(f"Failed to decode JSON response: {e}")
474
+ # return None
475
+ return response.choices[0].message.content
476
+ except Exception as e:
477
+ st.error(f"Failed to generate resources: {e}")
478
+ return None
479
+
480
+ def generate_resources_by_titles_chunking(session_titles, course_title):
481
+ def chunk_list(lst, chunk_size):
482
+ for i in range(0, len(lst), chunk_size):
483
+ yield lst[i:i + chunk_size]
484
+
485
+ all_session_resources = []
486
+ course_reference_books = None # Initialize this variable
487
+
488
+ # Process each chunk of session titles
489
+ for i, chunk in enumerate(chunk_list(session_titles, 10)):
490
+ session_resources_chunk = generate_session_resources(PERPLEXITY_API_KEY, course_title, chunk)
491
+ if session_resources_chunk:
492
+ if "session_resources" in session_resources_chunk:
493
+ # Parse the JSON string if it's a string
494
+ if isinstance(session_resources_chunk, str):
495
+ session_resources_chunk = json.loads(session_resources_chunk)
496
+ all_session_resources.extend(session_resources_chunk["session_resources"])
497
+ # all_session_resources.extend(session_resources_chunk["session_resources"])
498
+ # Only take the course_reference_books from the first chunk
499
+ else:
500
+ print("Some problem occured. Session resources chunk:", session_resources_chunk)
501
+
502
+ if i == 0 and "course_reference_books" in session_resources_chunk:
503
+ course_reference_books = session_resources_chunk.get("course_reference_books", [])
504
+ time.sleep(2)
505
+
506
+ # Combine all session resources into a single dictionary
507
+ complete_session_resources = {
508
+ "course_reference_books": course_reference_books,
509
+ "session_resources": all_session_resources
510
+ }
511
+ # Save the complete session resources to a JSON file
512
+ output_file_path = 'sample_files/session_resources2.json'
513
+ with open(output_file_path, 'w') as outfile:
514
+ try:
515
+ json.dump(complete_session_resources, outfile, indent=4)
516
+ except Exception as e:
517
+ print(f"Failed to save session resources to file: {e}")
518
+
519
+ # Debug print before return
520
+ print("Type of complete_session_resources:", type(complete_session_resources))
521
+ print("Content of complete_session_resources:", complete_session_resources)
522
+
523
+
524
+ return complete_session_resources
525
+
526
+ if __name__ == "__main__":
527
+ # course_name = "Introduction to Machine Learning"
528
+ # duration_weeks = 12
529
+ # sessions_per_week = 2
530
+ # Load COs from JSON file:
531
+ # with open('sample_files/cos.json', 'r') as file:
532
+ # course_outcomes = json.load(file)
533
+
534
+ # # Load MLOs from JSON file:
535
+ # with open('sample_files/module_outcomes.json', 'r') as file:
536
+ # module_outcomes = json.load(file)
537
+
538
+ # # Load SMLOs from JSON file:
539
+ # with open('sample_files/submodules_los.json', 'r') as file:
540
+ # submodules_outcomes = json.load(file)
541
+
542
+ # print("Generating Course Outcomes...")
543
+ # course_outcomes = generate_course_outcomes(GEMINI_API_KEY, course_name, duration_weeks, sessions_per_week)
544
+ # print("Generating Modules...")
545
+ # module_outcomes = generate_module_outcomes(course_name, course_outcomes, duration_weeks, sessions_per_week)
546
+ # print("Generating Submodules...")
547
+ # submodules_outcomes = generate_submodule_outcomes(course_name, course_outcomes, module_outcomes, duration_weeks, sessions_per_week)
548
+ # print("Generating Sessions...")
549
+ # session_outcomes = generate_session_outcomes(course_name, course_outcomes, module_outcomes, submodules_outcomes, duration_weeks, sessions_per_week)
550
+ # print(session_outcomes)
551
+ # print("Extracting Session Titles...")
552
+ # # Load Sessions from JSON file:
553
+ with open('sample_files/sample_sessions_se.json', 'r') as file:
554
+ session_data = json.load(file)
555
+ session_titles_concepts = extract_session_titles_concepts(session_data)
556
+ print(session_titles_concepts)
557
+ # print("Generating Session Resources...")
558
+ # # Chunk the session titles into batches of 10
559
+ # def chunk_list(lst, chunk_size):
560
+ # for i in range(0, len(lst), chunk_size):
561
+ # yield lst[i:i + chunk_size]
562
+
563
+ # Extract session titles from session_titles_concepts
564
+ session_titles = [session["title"] for session in session_titles_concepts]
565
+
566
+ # # Initialize an empty list to store all session resources
567
+ # all_session_resources = []
568
+
569
+ # # Process each chunk of session titles
570
+ # for chunk in chunk_list(session_titles, 10):
571
+ # session_resources_chunk = generate_session_resources(PERPLEXITY_API_KEY, course_name, chunk)
572
+ # if session_resources_chunk:
573
+ # all_session_resources.extend(json.loads(session_resources_chunk)["session_resources"])
574
+ # time.sleep(2)
575
+
576
+ # # Combine all session resources into a single dictionary
577
+ # complete_session_resources = {
578
+ # "course_reference_books": json.loads(session_resources_chunk)["course_reference_books"],
579
+ # "session_resources": all_session_resources
580
+ # }
581
+
582
+ # # Save the complete session resources to a JSON file
583
+ # output_file_path = 'sample_files/session_resources.json'
584
+ # with open(output_file_path, 'w') as outfile:
585
+ # json.dump(complete_session_resources, outfile, indent=4)
586
+ # print(complete_session_resources)
587
+
588
+
589
+ # session_resources = generate_session_resources(PERPLEXITY_API_KEY, course_name, session_titles_concepts)
590
+
591
+ # # Save session resources to a JSON file
592
+ # output_file_path = 'sample_files/session_resources.json'
593
+ # with open(output_file_path, 'w') as outfile:
594
+ # json.dump(session_resources, outfile, indent=4)
595
+ # print(session_resources)
596
+
597
+ # Create course structure
598
+ # course_structure = merge_course_structure(course_outcomes, module_outcomes, submodules_outcomes, session_outcomes)
599
+ # Save course structure to a JSON file
600
+ # output_file_path = 'sample_files/course_structure2.json'
601
+ # with open(output_file_path, 'w') as outfile:
602
+ # json.dump(course_structure, outfile, indent=4)
603
+ # print(course_structure)
604
+ resources = generate_resources_by_titles_chunking(session_titles, "Software Engineering")
605
+ print(resources)
606
+
607
+
608
+
609
+
db.py CHANGED
@@ -1,696 +1,696 @@
1
- # Setup for MongoDB
2
- from pymongo import MongoClient
3
- from datetime import datetime
4
- from werkzeug.security import generate_password_hash
5
- import os
6
- from dotenv import load_dotenv
7
-
8
- load_dotenv()
9
- MONGO_URI = os.getenv("MONGO_URI")
10
-
11
- client = MongoClient(MONGO_URI)
12
- try:
13
- client.admin.command("ping")
14
- print("MongoDB connection successful")
15
- except Exception as e:
16
- print(f"MongoDB connection failed: {e}")
17
-
18
- db = client["novascholar_db"]
19
-
20
- ########
21
- # Research Assistant Schema
22
- research_assistant_schema = {
23
- "bsonType": "object",
24
- "required": ["full_name", "password", "email", "courses_assisted"],
25
- "properties": {
26
- "full_name": {
27
- "bsonType": "string",
28
- "description": "Full name of the research assistant",
29
- },
30
- "password": {
31
- "bsonType": "string",
32
- "description": "Hashed password of the research assistant",
33
- },
34
- "email": {
35
- "bsonType": "string",
36
- "description": "Email address of the research assistant",
37
- },
38
- "courses_assisted": {
39
- "bsonType": "array",
40
- "description": "List of courses the research assistant is assisting",
41
- "items": {
42
- "bsonType": "object",
43
- "required": ["course_id"],
44
- "properties": {
45
- "course_id": {
46
- "bsonType": "string",
47
- "description": "ID of the course",
48
- }
49
- },
50
- },
51
- },
52
- },
53
- }
54
-
55
- # Create research assistants collection
56
- research_assistants_collection = db["research_assistants"]
57
-
58
- # Create indexes
59
- research_assistants_collection.create_index("full_name", unique=True)
60
- research_assistants_collection.create_index("email", unique=True)
61
-
62
-
63
- # Optional: Sample data insertion function
64
- def insert_sample_research_assistants():
65
- sample_research_assistants = [
66
- {
67
- "full_name": "John Doe RA",
68
- "password": generate_password_hash("password123"),
69
- "email": "[email protected]",
70
- "courses_assisted": [{"course_id": "CS101"}, {"course_id": "CS102"}],
71
- }
72
- ]
73
-
74
- try:
75
- research_assistants_collection.insert_many(sample_research_assistants)
76
- print("Sample research assistants inserted successfully!")
77
- except Exception as e:
78
- print(f"Error inserting sample research assistants: {e}")
79
-
80
-
81
- ###########
82
-
83
- ###############
84
- # Add after research assistant schema
85
-
86
- # Analyst Schema
87
- analyst_schema = {
88
- "bsonType": "object",
89
- "required": ["full_name", "password", "email", "courses_analyzed"],
90
- "properties": {
91
- "full_name": {"bsonType": "string", "description": "Full name of the analyst"},
92
- "password": {
93
- "bsonType": "string",
94
- "description": "Hashed password of the analyst",
95
- },
96
- "email": {"bsonType": "string", "description": "Email address of the analyst"},
97
- "courses_analyzed": {
98
- "bsonType": "array",
99
- "description": "List of courses the analyst is analyzing",
100
- "items": {
101
- "bsonType": "object",
102
- "required": ["course_id"],
103
- "properties": {
104
- "course_id": {
105
- "bsonType": "string",
106
- "description": "ID of the course",
107
- }
108
- },
109
- },
110
- },
111
- },
112
- }
113
-
114
- # Create analysts collection
115
- analysts_collection = db["analysts"]
116
-
117
- # Create indexes for analysts
118
- analysts_collection.create_index("full_name", unique=True)
119
- analysts_collection.create_index("email", unique=True)
120
-
121
-
122
- def insert_sample_analysts():
123
- sample_analysts = [
124
- {
125
- "full_name": "jane",
126
- "password": generate_password_hash("jane"),
127
- "email": "[email protected]",
128
- "courses_analyzed": [{"course_id": "CS101"}, {"course_id": "CS102"}],
129
- }
130
- ]
131
-
132
- try:
133
- analysts_collection.insert_many(sample_analysts)
134
- print("Sample analysts inserted successfully!")
135
- except Exception as e:
136
- print(f"Error inserting sample analysts: {e}")
137
-
138
-
139
- ##############@
140
-
141
-
142
- # Define the course schema
143
- course_schema = {
144
- "bsonType": "object",
145
- "required": [
146
- "course_id",
147
- "title",
148
- "description",
149
- "faculty",
150
- "faculty_id",
151
- "duration",
152
- "created_at",
153
- ],
154
- "properties": {
155
- "course_id": {
156
- "bsonType": "string",
157
- "description": "Unique identifier for the course",
158
- },
159
- "title": {"bsonType": "string", "description": "Title of the course"},
160
- "description": {
161
- "bsonType": "string",
162
- "description": "Description of the course",
163
- },
164
- "faculty": {"bsonType": "string", "description": "Name of the faculty"},
165
- "duration": {"bsonType": "string", "description": "Duration of the course"},
166
- "created_at": {
167
- "bsonType": "date",
168
- "description": "Date when the course was created",
169
- },
170
- "sessions": {
171
- "bsonType": "array",
172
- "description": "List of sessions associated with the course",
173
- "items": {
174
- "bsonType": "object",
175
- "required": ["session_id", "title", "date", "status", "created_at"],
176
- "properties": {
177
- "session_id": {
178
- "bsonType": "string",
179
- "description": "Unique identifier for the session",
180
- },
181
- "title": {
182
- "bsonType": "string",
183
- "description": "Title of the session",
184
- },
185
- "date": {"bsonType": "date", "description": "Date of the session"},
186
- "status": {
187
- "bsonType": "string",
188
- "description": "Status of the session (e.g., completed, upcoming)",
189
- },
190
- "created_at": {
191
- "bsonType": "date",
192
- "description": "Date when the session was created",
193
- },
194
- "pre_class": {
195
- "bsonType": "object",
196
- "description": "Pre-class segment data",
197
- "properties": {
198
- "resources": {
199
- "bsonType": "array",
200
- "description": "List of pre-class resources",
201
- "items": {
202
- "bsonType": "object",
203
- "required": ["type", "title", "url"],
204
- "properties": {
205
- "type": {
206
- "bsonType": "string",
207
- "description": "Type of resource (e.g., pdf, video)",
208
- },
209
- "title": {
210
- "bsonType": "string",
211
- "description": "Title of the resource",
212
- },
213
- "url": {
214
- "bsonType": "string",
215
- "description": "URL of the resource",
216
- },
217
- "vector": {
218
- "bsonType": "array",
219
- "description": "Vector representation of the resource",
220
- "items": {"bsonType": "double"},
221
- },
222
- },
223
- },
224
- },
225
- "completion_required": {
226
- "bsonType": "bool",
227
- "description": "Indicates if completion of pre-class resources is required",
228
- },
229
- },
230
- },
231
- "in_class": {
232
- "bsonType": "object",
233
- "description": "In-class segment data",
234
- "properties": {
235
- "topics": {
236
- "bsonType": "array",
237
- "description": "List of topics covered in the session",
238
- "items": {"bsonType": "string"},
239
- },
240
- "quiz": {
241
- "bsonType": "object",
242
- "description": "Quiz data",
243
- "properties": {
244
- "title": {
245
- "bsonType": "string",
246
- "description": "Title of the quiz",
247
- },
248
- "questions": {
249
- "bsonType": "int",
250
- "description": "Number of questions in the quiz",
251
- },
252
- "duration": {
253
- "bsonType": "int",
254
- "description": "Duration of the quiz in minutes",
255
- },
256
- },
257
- },
258
- "polls": {
259
- "bsonType": "array",
260
- "description": "List of polls conducted during the session",
261
- "items": {
262
- "bsonType": "object",
263
- "required": ["question", "options"],
264
- "properties": {
265
- "question": {
266
- "bsonType": "string",
267
- "description": "Poll question",
268
- },
269
- "options": {
270
- "bsonType": "array",
271
- "description": "List of poll options",
272
- "items": {"bsonType": "string"},
273
- },
274
- "responses": {
275
- "bsonType": "object",
276
- "description": "Responses to the poll",
277
- "additionalProperties": {"bsonType": "int"},
278
- },
279
- },
280
- },
281
- },
282
- },
283
- },
284
- "post_class": {
285
- "bsonType": "object",
286
- "description": "Post-class segment data",
287
- "properties": {
288
- "assignments": {
289
- "bsonType": "array",
290
- "description": "List of assignments",
291
- "items": {
292
- "bsonType": "object",
293
- "required": ["id", "title", "due_date", "status"],
294
- "properties": {
295
- "id": {
296
- "bsonType": "int",
297
- "description": "Assignment ID",
298
- },
299
- "title": {
300
- "bsonType": "string",
301
- "description": "Title of the assignment",
302
- },
303
- "due_date": {
304
- "bsonType": "date",
305
- "description": "Due date of the assignment",
306
- },
307
- "status": {
308
- "bsonType": "string",
309
- "description": "Status of the assignment (e.g., pending, completed)",
310
- },
311
- "submissions": {
312
- "bsonType": "array",
313
- "description": "List of submissions",
314
- "items": {
315
- "bsonType": "object",
316
- "required": [
317
- "student_id",
318
- "file_url",
319
- "submitted_at",
320
- ],
321
- "properties": {
322
- "student_id": {
323
- "bsonType": "string",
324
- "description": "ID of the student who submitted the assignment",
325
- },
326
- "file_url": {
327
- "bsonType": "string",
328
- "description": "URL of the submitted file",
329
- },
330
- "submitted_at": {
331
- "bsonType": "date",
332
- "description": "Date when the assignment was submitted",
333
- },
334
- },
335
- },
336
- },
337
- },
338
- },
339
- }
340
- },
341
- },
342
- },
343
- },
344
- },
345
- },
346
- }
347
-
348
- # Create the collection with the schema
349
- # db.create_collection("courses_collection2", validator={"$jsonSchema": course_schema})
350
-
351
- # sample_course = {
352
- # "course_id": "CS101",
353
- # "title": "Introduction to Computer Science",
354
- # "description": "This course covers the basics of computer science and programming.",
355
- # "faculty": "Dr. John Doe",
356
- # "faculty_id": "F101",
357
- # "duration": "10 weeks",
358
- # "created_at": datetime.utcnow(),
359
- # "sessions": [
360
- # {
361
- # "session_id": "S101",
362
- # "title": "Introduction to Programming Fundamentals",
363
- # "date": datetime.utcnow() - timedelta(days=7),
364
- # "status": "completed",
365
- # "created_at": datetime.utcnow() - timedelta(days=7),
366
- # "pre_class": {
367
- # "resources": [
368
- # {
369
- # "type": "pdf",
370
- # "title": "Introduction to Python Basics",
371
- # "url": "/assets/python_basics.pdf",
372
- # "vector": [0.1, 0.2, 0.3] # Example vector
373
- # }
374
- # ],
375
- # "completion_required": True
376
- # },
377
- # "in_class": {
378
- # "topics": ["Variables", "Data Types", "Basic Operations"],
379
- # "quiz": {
380
- # "title": "Python Basics Quiz",
381
- # "questions": 5,
382
- # "duration": 15
383
- # },
384
- # "polls": [
385
- # {
386
- # "question": "How comfortable are you with Python syntax?",
387
- # "options": ["Very", "Somewhat", "Not at all"],
388
- # "responses": {"Very": 10, "Somewhat": 5, "Not at all": 2}
389
- # }
390
- # ]
391
- # },
392
- # "post_class": {
393
- # "assignments": [
394
- # {
395
- # "id": 1,
396
- # "title": "Basic Python Programs",
397
- # "due_date": datetime.utcnow() + timedelta(days=2),
398
- # "status": "pending",
399
- # "submissions": []
400
- # }
401
- # ]
402
- # }
403
- # },
404
- # {
405
- # "session_id": "S102",
406
- # "title": "Control Flow and Functions",
407
- # "date": datetime.utcnow() - timedelta(days=3),
408
- # "status": "completed",
409
- # "created_at": datetime.utcnow() - timedelta(days=3),
410
- # "pre_class": {
411
- # "resources": [
412
- # {
413
- # "type": "pdf",
414
- # "title": "Control Flow in Python",
415
- # "url": "/assets/control_flow.pdf",
416
- # "vector": [0.4, 0.5, 0.6] # Example vector
417
- # }
418
- # ],
419
- # "completion_required": True
420
- # },
421
- # "in_class": {
422
- # "topics": ["If-else statements", "Loops", "Function definitions"],
423
- # "quiz": {
424
- # "title": "Control Flow Quiz",
425
- # "questions": 8,
426
- # "duration": 20
427
- # },
428
- # "polls": [
429
- # {
430
- # "question": "Which loop type do you find more intuitive?",
431
- # "options": ["For loops", "While loops", "Both"],
432
- # "responses": {"For loops": 12, "While loops": 8, "Both": 10}
433
- # }
434
- # ]
435
- # },
436
- # "post_class": {
437
- # "assignments": [
438
- # {
439
- # "id": 2,
440
- # "title": "Function Implementation Exercise",
441
- # "due_date": datetime.utcnow() + timedelta(days=4),
442
- # "status": "pending",
443
- # "submissions": []
444
- # }
445
- # ]
446
- # }
447
- # }
448
- # ]
449
- # }
450
- courses_collection2 = db["courses_collection2"]
451
-
452
-
453
- # Define the users schema
454
- users_schema = {
455
- "bsonType": "object",
456
- "required": ["user_id", "username", "password", "role", "created_at"],
457
- "properties": {
458
- "user_id": {
459
- "bsonType": "string",
460
- "description": "Unique identifier for the user",
461
- },
462
- "username": {"bsonType": "string", "description": "Name of the User"},
463
- "password": {"bsonType": "string", "description": "Password of the user"},
464
- "role": {
465
- "bsonType": "string",
466
- "description": "Type of user (e.g., student, faculty)",
467
- },
468
- "created_at": {
469
- "bsonType": "date",
470
- "description": "Date when the user was created",
471
- },
472
- },
473
- }
474
- # Create the collection with the schema
475
- # db.create_collection("users", validator={"$jsonSchema": users_schema})
476
- users_collection = db["users"]
477
-
478
-
479
- # Defining the Student Collection
480
- student_schema = {
481
- "bsonType": "object",
482
- "required": ["SID", "full_name", "password", "enrolled_courses", "created_at"],
483
- "properties": {
484
- "SID": {
485
- "bsonType": "string",
486
- "description": "Unique identifier for the student",
487
- },
488
- "full_name": {"bsonType": "string", "description": "Full name of the student"},
489
- "password": {
490
- "bsonType": "string",
491
- "description": "Hashed password of the student",
492
- },
493
- "enrolled_courses": {
494
- "bsonType": "array",
495
- "description": "List of courses the student is enrolled in",
496
- "items": {
497
- "bsonType": "object",
498
- "required": ["course_id", "title"],
499
- "properties": {
500
- "course_id": {
501
- "bsonType": "string",
502
- "description": "Unique identifier for the course",
503
- },
504
- "title": {
505
- "bsonType": "string",
506
- "description": "Title of the course",
507
- },
508
- },
509
- },
510
- },
511
- "created_at": {
512
- "bsonType": "date",
513
- "description": "Date when the student was created",
514
- },
515
- },
516
- }
517
- # Defining the Faculty Collection
518
- faculty_schema = {
519
- "bsonType": "object",
520
- "required": ["TID", "full_name", "password", "courses_taught", "created_at"],
521
- "properties": {
522
- "TID": {
523
- "bsonType": "string",
524
- "description": "Unique identifier for the faculty",
525
- },
526
- "full_name": {"bsonType": "string", "description": "Full name of the faculty"},
527
- "password": {
528
- "bsonType": "string",
529
- "description": "Hashed password of the faculty",
530
- },
531
- "courses_taught": {
532
- "bsonType": "array",
533
- "description": "List of courses the faculty is teaching",
534
- "items": {
535
- "bsonType": "object",
536
- "required": ["course_id", "title"],
537
- "properties": {
538
- "course_id": {
539
- "bsonType": "string",
540
- "description": "Unique identifier for the course",
541
- },
542
- "title": {
543
- "bsonType": "string",
544
- "description": "Title of the course",
545
- },
546
- },
547
- },
548
- },
549
- "created_at": {
550
- "bsonType": "date",
551
- "description": "Date when the faculty was created",
552
- },
553
- },
554
- }
555
- # Creating the Collections
556
- # db.create_collection("students", validator={"$jsonSchema": student_schema})
557
- # db.create_collection("faculty", validator={"$jsonSchema": faculty_schema})
558
-
559
- students_collection = db["students"]
560
- faculty_collection = db["faculty"]
561
-
562
- # Defining the Vector Collection Schema
563
- vector_schema = {
564
- "bsonType": "object",
565
- "required": ["resource_id", "vector"],
566
- "properties": {
567
- "resource_id": {
568
- "bsonType": "objectId",
569
- "description": "Unique identifier for the resource",
570
- },
571
- "vector": {
572
- "bsonType": "array",
573
- "description": "Vector representation of the resource",
574
- "items": {"bsonType": "double"},
575
- },
576
- "text": {"bsonType": "string", "description": "Text content of the resource"},
577
- "created_at": {
578
- "bsonType": "date",
579
- "description": "Date when the vector was created",
580
- },
581
- },
582
- }
583
- # Creating the Vector Collection
584
- # db.create_collection("vectors", validator={"$jsonSchema": vector_schema})
585
- vectors_collection = db["vectors"]
586
-
587
-
588
- # Creating a Chat-History Collection
589
- # Creating a Chat-History Collection
590
- chat_history_schema = {
591
- "bsonType": "object",
592
- "required": ["user_id", "session_id", "messages", "timestamp"],
593
- "properties": {
594
- "user_id": {
595
- "bsonType": "objectId",
596
- "description": "Unique identifier for the user",
597
- },
598
- "session_id": {
599
- "bsonType": "string",
600
- "description": "Identifier for the session",
601
- },
602
- "timestamp": {
603
- "bsonType": "date",
604
- "description": "Timestamp when the chat session started",
605
- },
606
- "messages": {
607
- "bsonType": "array",
608
- "description": "List of chat messages",
609
- "items": {
610
- "bsonType": "object",
611
- "properties": {
612
- "prompt": {
613
- "bsonType": "string",
614
- "description": "User's question or prompt",
615
- },
616
- "response": {
617
- "bsonType": "string",
618
- "description": "Assistant's response",
619
- },
620
- "timestamp": {
621
- "bsonType": "date",
622
- "description": "Timestamp of the message",
623
- },
624
- },
625
- },
626
- },
627
- },
628
- }
629
-
630
- # Create the collection with the schema
631
- # db.create_collection("chat_history", validator={"$jsonSchema": chat_history_schema})
632
- chat_history_collection = db["chat_history"]
633
-
634
-
635
- # Database setup for Research Assistant
636
- # Research Assistant Schema
637
- research_assistant_schema = {
638
- "bsonType": "object",
639
- "required": ["full_name", "password", "email", "courses_assisted"],
640
- "properties": {
641
- "full_name": {
642
- "bsonType": "string",
643
- "description": "Full name of the research assistant",
644
- },
645
- "password": {
646
- "bsonType": "string",
647
- "description": "Hashed password of the research assistant",
648
- },
649
- "email": {
650
- "bsonType": "string",
651
- "description": "Email address of the research assistant",
652
- },
653
- "courses_assisted": {
654
- "bsonType": "array",
655
- "description": "List of courses the research assistant is assisting",
656
- "items": {
657
- "bsonType": "object",
658
- "required": ["course_id"],
659
- "properties": {
660
- "course_id": {
661
- "bsonType": "string",
662
- "description": "ID of the course",
663
- }
664
- },
665
- },
666
- },
667
- },
668
- }
669
-
670
- # Create research assistants collection
671
- research_assistants_collection = db["research_assistants"]
672
-
673
- # Create indexes
674
- research_assistants_collection.create_index("full_name", unique=True)
675
- research_assistants_collection.create_index("email", unique=True)
676
-
677
-
678
- # Optional: Sample data insertion function
679
- # def insert_sample_research_assistants():
680
- # sample_research_assistants = [
681
- # {
682
- # "full_name": "John Doe RA",
683
- # "password": generate_password_hash("password123"),
684
- # "email": "[email protected]",
685
- # "courses_assisted": [{"course_id": "CS101"}, {"course_id": "CS102"}],
686
- # }
687
- # ]
688
-
689
- # try:
690
- # research_assistants_collection.insert_many(sample_research_assistants)
691
- # print("Sample research assistants inserted successfully!")
692
- # except Exception as e:
693
- # print(f"Error inserting sample research assistants: {e}")
694
-
695
- # if __name__ == "__main__":
696
- # insert_sample_analysts()
 
1
+ # Setup for MongoDB
2
+ from pymongo import MongoClient
3
+ from datetime import datetime
4
+ from werkzeug.security import generate_password_hash
5
+ import os
6
+ from dotenv import load_dotenv
7
+
8
+ load_dotenv()
9
+ MONGO_URI = os.getenv("MONGO_URI")
10
+
11
+ client = MongoClient(MONGO_URI)
12
+ try:
13
+ client.admin.command("ping")
14
+ print("MongoDB connection successful")
15
+ except Exception as e:
16
+ print(f"MongoDB connection failed: {e}")
17
+
18
+ db = client["novascholar_db"]
19
+
20
+ ########
21
+ # Research Assistant Schema
22
+ research_assistant_schema = {
23
+ "bsonType": "object",
24
+ "required": ["full_name", "password", "email", "courses_assisted"],
25
+ "properties": {
26
+ "full_name": {
27
+ "bsonType": "string",
28
+ "description": "Full name of the research assistant",
29
+ },
30
+ "password": {
31
+ "bsonType": "string",
32
+ "description": "Hashed password of the research assistant",
33
+ },
34
+ "email": {
35
+ "bsonType": "string",
36
+ "description": "Email address of the research assistant",
37
+ },
38
+ "courses_assisted": {
39
+ "bsonType": "array",
40
+ "description": "List of courses the research assistant is assisting",
41
+ "items": {
42
+ "bsonType": "object",
43
+ "required": ["course_id"],
44
+ "properties": {
45
+ "course_id": {
46
+ "bsonType": "string",
47
+ "description": "ID of the course",
48
+ }
49
+ },
50
+ },
51
+ },
52
+ },
53
+ }
54
+
55
+ # Create research assistants collection
56
+ research_assistants_collection = db["research_assistants"]
57
+
58
+ # Create indexes
59
+ research_assistants_collection.create_index("full_name", unique=True)
60
+ research_assistants_collection.create_index("email", unique=True)
61
+
62
+
63
+ # Optional: Sample data insertion function
64
+ def insert_sample_research_assistants():
65
+ sample_research_assistants = [
66
+ {
67
+ "full_name": "John Doe RA",
68
+ "password": generate_password_hash("password123"),
69
+ "email": "[email protected]",
70
+ "courses_assisted": [{"course_id": "CS101"}, {"course_id": "CS102"}],
71
+ }
72
+ ]
73
+
74
+ try:
75
+ research_assistants_collection.insert_many(sample_research_assistants)
76
+ print("Sample research assistants inserted successfully!")
77
+ except Exception as e:
78
+ print(f"Error inserting sample research assistants: {e}")
79
+
80
+
81
+ ###########
82
+
83
+ ###############
84
+ # Add after research assistant schema
85
+
86
+ # Analyst Schema
87
+ analyst_schema = {
88
+ "bsonType": "object",
89
+ "required": ["full_name", "password", "email", "courses_analyzed"],
90
+ "properties": {
91
+ "full_name": {"bsonType": "string", "description": "Full name of the analyst"},
92
+ "password": {
93
+ "bsonType": "string",
94
+ "description": "Hashed password of the analyst",
95
+ },
96
+ "email": {"bsonType": "string", "description": "Email address of the analyst"},
97
+ "courses_analyzed": {
98
+ "bsonType": "array",
99
+ "description": "List of courses the analyst is analyzing",
100
+ "items": {
101
+ "bsonType": "object",
102
+ "required": ["course_id"],
103
+ "properties": {
104
+ "course_id": {
105
+ "bsonType": "string",
106
+ "description": "ID of the course",
107
+ }
108
+ },
109
+ },
110
+ },
111
+ },
112
+ }
113
+
114
+ # Create analysts collection
115
+ analysts_collection = db["analysts"]
116
+
117
+ # Create indexes for analysts
118
+ analysts_collection.create_index("full_name", unique=True)
119
+ analysts_collection.create_index("email", unique=True)
120
+
121
+
122
+ def insert_sample_analysts():
123
+ sample_analysts = [
124
+ {
125
+ "full_name": "jane",
126
+ "password": generate_password_hash("jane"),
127
+ "email": "[email protected]",
128
+ "courses_analyzed": [{"course_id": "CS101"}, {"course_id": "CS102"}],
129
+ }
130
+ ]
131
+
132
+ try:
133
+ analysts_collection.insert_many(sample_analysts)
134
+ print("Sample analysts inserted successfully!")
135
+ except Exception as e:
136
+ print(f"Error inserting sample analysts: {e}")
137
+
138
+
139
+ ##############@
140
+
141
+
142
+ # Define the course schema
143
+ course_schema = {
144
+ "bsonType": "object",
145
+ "required": [
146
+ "course_id",
147
+ "title",
148
+ "description",
149
+ "faculty",
150
+ "faculty_id",
151
+ "duration",
152
+ "created_at",
153
+ ],
154
+ "properties": {
155
+ "course_id": {
156
+ "bsonType": "string",
157
+ "description": "Unique identifier for the course",
158
+ },
159
+ "title": {"bsonType": "string", "description": "Title of the course"},
160
+ "description": {
161
+ "bsonType": "string",
162
+ "description": "Description of the course",
163
+ },
164
+ "faculty": {"bsonType": "string", "description": "Name of the faculty"},
165
+ "duration": {"bsonType": "string", "description": "Duration of the course"},
166
+ "created_at": {
167
+ "bsonType": "date",
168
+ "description": "Date when the course was created",
169
+ },
170
+ "sessions": {
171
+ "bsonType": "array",
172
+ "description": "List of sessions associated with the course",
173
+ "items": {
174
+ "bsonType": "object",
175
+ "required": ["session_id", "title", "date", "status", "created_at"],
176
+ "properties": {
177
+ "session_id": {
178
+ "bsonType": "string",
179
+ "description": "Unique identifier for the session",
180
+ },
181
+ "title": {
182
+ "bsonType": "string",
183
+ "description": "Title of the session",
184
+ },
185
+ "date": {"bsonType": "date", "description": "Date of the session"},
186
+ "status": {
187
+ "bsonType": "string",
188
+ "description": "Status of the session (e.g., completed, upcoming)",
189
+ },
190
+ "created_at": {
191
+ "bsonType": "date",
192
+ "description": "Date when the session was created",
193
+ },
194
+ "pre_class": {
195
+ "bsonType": "object",
196
+ "description": "Pre-class segment data",
197
+ "properties": {
198
+ "resources": {
199
+ "bsonType": "array",
200
+ "description": "List of pre-class resources",
201
+ "items": {
202
+ "bsonType": "object",
203
+ "required": ["type", "title", "url"],
204
+ "properties": {
205
+ "type": {
206
+ "bsonType": "string",
207
+ "description": "Type of resource (e.g., pdf, video)",
208
+ },
209
+ "title": {
210
+ "bsonType": "string",
211
+ "description": "Title of the resource",
212
+ },
213
+ "url": {
214
+ "bsonType": "string",
215
+ "description": "URL of the resource",
216
+ },
217
+ "vector": {
218
+ "bsonType": "array",
219
+ "description": "Vector representation of the resource",
220
+ "items": {"bsonType": "double"},
221
+ },
222
+ },
223
+ },
224
+ },
225
+ "completion_required": {
226
+ "bsonType": "bool",
227
+ "description": "Indicates if completion of pre-class resources is required",
228
+ },
229
+ },
230
+ },
231
+ "in_class": {
232
+ "bsonType": "object",
233
+ "description": "In-class segment data",
234
+ "properties": {
235
+ "topics": {
236
+ "bsonType": "array",
237
+ "description": "List of topics covered in the session",
238
+ "items": {"bsonType": "string"},
239
+ },
240
+ "quiz": {
241
+ "bsonType": "object",
242
+ "description": "Quiz data",
243
+ "properties": {
244
+ "title": {
245
+ "bsonType": "string",
246
+ "description": "Title of the quiz",
247
+ },
248
+ "questions": {
249
+ "bsonType": "int",
250
+ "description": "Number of questions in the quiz",
251
+ },
252
+ "duration": {
253
+ "bsonType": "int",
254
+ "description": "Duration of the quiz in minutes",
255
+ },
256
+ },
257
+ },
258
+ "polls": {
259
+ "bsonType": "array",
260
+ "description": "List of polls conducted during the session",
261
+ "items": {
262
+ "bsonType": "object",
263
+ "required": ["question", "options"],
264
+ "properties": {
265
+ "question": {
266
+ "bsonType": "string",
267
+ "description": "Poll question",
268
+ },
269
+ "options": {
270
+ "bsonType": "array",
271
+ "description": "List of poll options",
272
+ "items": {"bsonType": "string"},
273
+ },
274
+ "responses": {
275
+ "bsonType": "object",
276
+ "description": "Responses to the poll",
277
+ "additionalProperties": {"bsonType": "int"},
278
+ },
279
+ },
280
+ },
281
+ },
282
+ },
283
+ },
284
+ "post_class": {
285
+ "bsonType": "object",
286
+ "description": "Post-class segment data",
287
+ "properties": {
288
+ "assignments": {
289
+ "bsonType": "array",
290
+ "description": "List of assignments",
291
+ "items": {
292
+ "bsonType": "object",
293
+ "required": ["id", "title", "due_date", "status"],
294
+ "properties": {
295
+ "id": {
296
+ "bsonType": "int",
297
+ "description": "Assignment ID",
298
+ },
299
+ "title": {
300
+ "bsonType": "string",
301
+ "description": "Title of the assignment",
302
+ },
303
+ "due_date": {
304
+ "bsonType": "date",
305
+ "description": "Due date of the assignment",
306
+ },
307
+ "status": {
308
+ "bsonType": "string",
309
+ "description": "Status of the assignment (e.g., pending, completed)",
310
+ },
311
+ "submissions": {
312
+ "bsonType": "array",
313
+ "description": "List of submissions",
314
+ "items": {
315
+ "bsonType": "object",
316
+ "required": [
317
+ "student_id",
318
+ "file_url",
319
+ "submitted_at",
320
+ ],
321
+ "properties": {
322
+ "student_id": {
323
+ "bsonType": "string",
324
+ "description": "ID of the student who submitted the assignment",
325
+ },
326
+ "file_url": {
327
+ "bsonType": "string",
328
+ "description": "URL of the submitted file",
329
+ },
330
+ "submitted_at": {
331
+ "bsonType": "date",
332
+ "description": "Date when the assignment was submitted",
333
+ },
334
+ },
335
+ },
336
+ },
337
+ },
338
+ },
339
+ }
340
+ },
341
+ },
342
+ },
343
+ },
344
+ },
345
+ },
346
+ }
347
+
348
+ # Create the collection with the schema
349
+ # db.create_collection("courses_collection2", validator={"$jsonSchema": course_schema})
350
+
351
+ # sample_course = {
352
+ # "course_id": "CS101",
353
+ # "title": "Introduction to Computer Science",
354
+ # "description": "This course covers the basics of computer science and programming.",
355
+ # "faculty": "Dr. John Doe",
356
+ # "faculty_id": "F101",
357
+ # "duration": "10 weeks",
358
+ # "created_at": datetime.utcnow(),
359
+ # "sessions": [
360
+ # {
361
+ # "session_id": "S101",
362
+ # "title": "Introduction to Programming Fundamentals",
363
+ # "date": datetime.utcnow() - timedelta(days=7),
364
+ # "status": "completed",
365
+ # "created_at": datetime.utcnow() - timedelta(days=7),
366
+ # "pre_class": {
367
+ # "resources": [
368
+ # {
369
+ # "type": "pdf",
370
+ # "title": "Introduction to Python Basics",
371
+ # "url": "/assets/python_basics.pdf",
372
+ # "vector": [0.1, 0.2, 0.3] # Example vector
373
+ # }
374
+ # ],
375
+ # "completion_required": True
376
+ # },
377
+ # "in_class": {
378
+ # "topics": ["Variables", "Data Types", "Basic Operations"],
379
+ # "quiz": {
380
+ # "title": "Python Basics Quiz",
381
+ # "questions": 5,
382
+ # "duration": 15
383
+ # },
384
+ # "polls": [
385
+ # {
386
+ # "question": "How comfortable are you with Python syntax?",
387
+ # "options": ["Very", "Somewhat", "Not at all"],
388
+ # "responses": {"Very": 10, "Somewhat": 5, "Not at all": 2}
389
+ # }
390
+ # ]
391
+ # },
392
+ # "post_class": {
393
+ # "assignments": [
394
+ # {
395
+ # "id": 1,
396
+ # "title": "Basic Python Programs",
397
+ # "due_date": datetime.utcnow() + timedelta(days=2),
398
+ # "status": "pending",
399
+ # "submissions": []
400
+ # }
401
+ # ]
402
+ # }
403
+ # },
404
+ # {
405
+ # "session_id": "S102",
406
+ # "title": "Control Flow and Functions",
407
+ # "date": datetime.utcnow() - timedelta(days=3),
408
+ # "status": "completed",
409
+ # "created_at": datetime.utcnow() - timedelta(days=3),
410
+ # "pre_class": {
411
+ # "resources": [
412
+ # {
413
+ # "type": "pdf",
414
+ # "title": "Control Flow in Python",
415
+ # "url": "/assets/control_flow.pdf",
416
+ # "vector": [0.4, 0.5, 0.6] # Example vector
417
+ # }
418
+ # ],
419
+ # "completion_required": True
420
+ # },
421
+ # "in_class": {
422
+ # "topics": ["If-else statements", "Loops", "Function definitions"],
423
+ # "quiz": {
424
+ # "title": "Control Flow Quiz",
425
+ # "questions": 8,
426
+ # "duration": 20
427
+ # },
428
+ # "polls": [
429
+ # {
430
+ # "question": "Which loop type do you find more intuitive?",
431
+ # "options": ["For loops", "While loops", "Both"],
432
+ # "responses": {"For loops": 12, "While loops": 8, "Both": 10}
433
+ # }
434
+ # ]
435
+ # },
436
+ # "post_class": {
437
+ # "assignments": [
438
+ # {
439
+ # "id": 2,
440
+ # "title": "Function Implementation Exercise",
441
+ # "due_date": datetime.utcnow() + timedelta(days=4),
442
+ # "status": "pending",
443
+ # "submissions": []
444
+ # }
445
+ # ]
446
+ # }
447
+ # }
448
+ # ]
449
+ # }
450
+ courses_collection2 = db["courses_collection2"]
451
+
452
+
453
+ # Define the users schema
454
+ users_schema = {
455
+ "bsonType": "object",
456
+ "required": ["user_id", "username", "password", "role", "created_at"],
457
+ "properties": {
458
+ "user_id": {
459
+ "bsonType": "string",
460
+ "description": "Unique identifier for the user",
461
+ },
462
+ "username": {"bsonType": "string", "description": "Name of the User"},
463
+ "password": {"bsonType": "string", "description": "Password of the user"},
464
+ "role": {
465
+ "bsonType": "string",
466
+ "description": "Type of user (e.g., student, faculty)",
467
+ },
468
+ "created_at": {
469
+ "bsonType": "date",
470
+ "description": "Date when the user was created",
471
+ },
472
+ },
473
+ }
474
+ # Create the collection with the schema
475
+ # db.create_collection("users", validator={"$jsonSchema": users_schema})
476
+ users_collection = db["users"]
477
+
478
+
479
+ # Defining the Student Collection
480
+ student_schema = {
481
+ "bsonType": "object",
482
+ "required": ["SID", "full_name", "password", "enrolled_courses", "created_at"],
483
+ "properties": {
484
+ "SID": {
485
+ "bsonType": "string",
486
+ "description": "Unique identifier for the student",
487
+ },
488
+ "full_name": {"bsonType": "string", "description": "Full name of the student"},
489
+ "password": {
490
+ "bsonType": "string",
491
+ "description": "Hashed password of the student",
492
+ },
493
+ "enrolled_courses": {
494
+ "bsonType": "array",
495
+ "description": "List of courses the student is enrolled in",
496
+ "items": {
497
+ "bsonType": "object",
498
+ "required": ["course_id", "title"],
499
+ "properties": {
500
+ "course_id": {
501
+ "bsonType": "string",
502
+ "description": "Unique identifier for the course",
503
+ },
504
+ "title": {
505
+ "bsonType": "string",
506
+ "description": "Title of the course",
507
+ },
508
+ },
509
+ },
510
+ },
511
+ "created_at": {
512
+ "bsonType": "date",
513
+ "description": "Date when the student was created",
514
+ },
515
+ },
516
+ }
517
+ # Defining the Faculty Collection
518
+ faculty_schema = {
519
+ "bsonType": "object",
520
+ "required": ["TID", "full_name", "password", "courses_taught", "created_at"],
521
+ "properties": {
522
+ "TID": {
523
+ "bsonType": "string",
524
+ "description": "Unique identifier for the faculty",
525
+ },
526
+ "full_name": {"bsonType": "string", "description": "Full name of the faculty"},
527
+ "password": {
528
+ "bsonType": "string",
529
+ "description": "Hashed password of the faculty",
530
+ },
531
+ "courses_taught": {
532
+ "bsonType": "array",
533
+ "description": "List of courses the faculty is teaching",
534
+ "items": {
535
+ "bsonType": "object",
536
+ "required": ["course_id", "title"],
537
+ "properties": {
538
+ "course_id": {
539
+ "bsonType": "string",
540
+ "description": "Unique identifier for the course",
541
+ },
542
+ "title": {
543
+ "bsonType": "string",
544
+ "description": "Title of the course",
545
+ },
546
+ },
547
+ },
548
+ },
549
+ "created_at": {
550
+ "bsonType": "date",
551
+ "description": "Date when the faculty was created",
552
+ },
553
+ },
554
+ }
555
+ # Creating the Collections
556
+ # db.create_collection("students", validator={"$jsonSchema": student_schema})
557
+ # db.create_collection("faculty", validator={"$jsonSchema": faculty_schema})
558
+
559
+ students_collection = db["students"]
560
+ faculty_collection = db["faculty"]
561
+
562
+ # Defining the Vector Collection Schema
563
+ vector_schema = {
564
+ "bsonType": "object",
565
+ "required": ["resource_id", "vector"],
566
+ "properties": {
567
+ "resource_id": {
568
+ "bsonType": "objectId",
569
+ "description": "Unique identifier for the resource",
570
+ },
571
+ "vector": {
572
+ "bsonType": "array",
573
+ "description": "Vector representation of the resource",
574
+ "items": {"bsonType": "double"},
575
+ },
576
+ "text": {"bsonType": "string", "description": "Text content of the resource"},
577
+ "created_at": {
578
+ "bsonType": "date",
579
+ "description": "Date when the vector was created",
580
+ },
581
+ },
582
+ }
583
+ # Creating the Vector Collection
584
+ # db.create_collection("vectors", validator={"$jsonSchema": vector_schema})
585
+ vectors_collection = db["vectors"]
586
+
587
+
588
+ # Creating a Chat-History Collection
589
+ # Creating a Chat-History Collection
590
+ chat_history_schema = {
591
+ "bsonType": "object",
592
+ "required": ["user_id", "session_id", "messages", "timestamp"],
593
+ "properties": {
594
+ "user_id": {
595
+ "bsonType": "objectId",
596
+ "description": "Unique identifier for the user",
597
+ },
598
+ "session_id": {
599
+ "bsonType": "string",
600
+ "description": "Identifier for the session",
601
+ },
602
+ "timestamp": {
603
+ "bsonType": "date",
604
+ "description": "Timestamp when the chat session started",
605
+ },
606
+ "messages": {
607
+ "bsonType": "array",
608
+ "description": "List of chat messages",
609
+ "items": {
610
+ "bsonType": "object",
611
+ "properties": {
612
+ "prompt": {
613
+ "bsonType": "string",
614
+ "description": "User's question or prompt",
615
+ },
616
+ "response": {
617
+ "bsonType": "string",
618
+ "description": "Assistant's response",
619
+ },
620
+ "timestamp": {
621
+ "bsonType": "date",
622
+ "description": "Timestamp of the message",
623
+ },
624
+ },
625
+ },
626
+ },
627
+ },
628
+ }
629
+
630
+ # Create the collection with the schema
631
+ # db.create_collection("chat_history", validator={"$jsonSchema": chat_history_schema})
632
+ chat_history_collection = db["chat_history"]
633
+
634
+
635
+ # Database setup for Research Assistant
636
+ # Research Assistant Schema
637
+ research_assistant_schema = {
638
+ "bsonType": "object",
639
+ "required": ["full_name", "password", "email", "courses_assisted"],
640
+ "properties": {
641
+ "full_name": {
642
+ "bsonType": "string",
643
+ "description": "Full name of the research assistant",
644
+ },
645
+ "password": {
646
+ "bsonType": "string",
647
+ "description": "Hashed password of the research assistant",
648
+ },
649
+ "email": {
650
+ "bsonType": "string",
651
+ "description": "Email address of the research assistant",
652
+ },
653
+ "courses_assisted": {
654
+ "bsonType": "array",
655
+ "description": "List of courses the research assistant is assisting",
656
+ "items": {
657
+ "bsonType": "object",
658
+ "required": ["course_id"],
659
+ "properties": {
660
+ "course_id": {
661
+ "bsonType": "string",
662
+ "description": "ID of the course",
663
+ }
664
+ },
665
+ },
666
+ },
667
+ },
668
+ }
669
+
670
+ # Create research assistants collection
671
+ research_assistants_collection = db["research_assistants"]
672
+
673
+ # Create indexes
674
+ research_assistants_collection.create_index("full_name", unique=True)
675
+ research_assistants_collection.create_index("email", unique=True)
676
+
677
+
678
+ # Optional: Sample data insertion function
679
+ # def insert_sample_research_assistants():
680
+ # sample_research_assistants = [
681
+ # {
682
+ # "full_name": "John Doe RA",
683
+ # "password": generate_password_hash("password123"),
684
+ # "email": "[email protected]",
685
+ # "courses_assisted": [{"course_id": "CS101"}, {"course_id": "CS102"}],
686
+ # }
687
+ # ]
688
+
689
+ # try:
690
+ # research_assistants_collection.insert_many(sample_research_assistants)
691
+ # print("Sample research assistants inserted successfully!")
692
+ # except Exception as e:
693
+ # print(f"Error inserting sample research assistants: {e}")
694
+
695
+ # if __name__ == "__main__":
696
+ # insert_sample_analysts()
entire_download.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from pymongo import MongoClient
4
+ from dotenv import load_dotenv
5
+ import os
6
+
7
+ # 1. Load environment variables
8
+ load_dotenv()
9
+ MONGODB_URI = os.getenv(
10
+ "MONGODB_UR",
11
+ "mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
12
+ )
13
+
14
+ # 2. Create MongoDB connection
15
+ client = MongoClient(MONGODB_URI)
16
+ db = client["novascholar_db"]
17
+ collection = db["research_papers"]
18
+
19
+
20
+ def get_collection_data(paper_type: str):
21
+ """
22
+ Fetch all documents from the specified collection based on paper type.
23
+ """
24
+ try:
25
+ # Determine collection name based on paper type
26
+ collection_name = paper_type.replace(" ", "_").lower()
27
+ doc_collection = db[collection_name]
28
+
29
+ # Get all documents
30
+ docs = list(doc_collection.find())
31
+
32
+ # Convert ObjectId to string
33
+ for doc in docs:
34
+ doc["_id"] = str(doc["_id"])
35
+
36
+ return docs
37
+ except Exception as e:
38
+ st.error(f"Database Error: {str(e)}")
39
+ return None
40
+
41
+
42
+ def main():
43
+ st.title("MongoDB Collection Download")
44
+ st.write("Download all documents from the selected research paper collection")
45
+
46
+ # Dropdown to select the type of research paper
47
+ paper_type = st.selectbox(
48
+ "Select type of research paper:",
49
+ [
50
+ "Review Based Paper",
51
+ "Opinion/Perspective Based Paper",
52
+ "Empirical Research Paper",
53
+ "Research Paper (Other)",
54
+ ],
55
+ )
56
+
57
+ if st.button("Fetch Data"):
58
+ with st.spinner("Retrieving documents from MongoDB..."):
59
+ docs = get_collection_data(paper_type)
60
+
61
+ if docs:
62
+ # Convert to DataFrame
63
+ df = pd.DataFrame(docs)
64
+ # Convert lists to comma-separated strings for consistency
65
+ for col in df.columns:
66
+ if df[col].apply(lambda x: isinstance(x, list)).any():
67
+ df[col] = df[col].apply(
68
+ lambda x: (
69
+ ", ".join(map(str, x)) if isinstance(x, list) else x
70
+ )
71
+ )
72
+ st.success(
73
+ f"Successfully retrieved {len(df)} documents from '{paper_type}' collection."
74
+ )
75
+ st.dataframe(df)
76
+
77
+ # Provide option to download the data as CSV
78
+ csv = df.to_csv(index=False).encode("utf-8")
79
+ st.download_button(
80
+ label="Download CSV",
81
+ data=csv,
82
+ file_name=f"{paper_type.replace(' ', '_').lower()}_papers.csv",
83
+ mime="text/csv",
84
+ )
85
+ else:
86
+ st.warning(f"No documents found in the '{paper_type}' collection.")
87
+
88
+
89
+ if __name__ == "__main__":
90
+ main()
extract.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import PyPDF2
4
+ import io
5
+ import os
6
+ from dotenv import load_dotenv
7
+ import requests
8
+ import time
9
+
10
+ # Load environment variables
11
+ load_dotenv()
12
+ PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
13
+ PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
14
+
15
+ def call_perplexity_api(prompt: str) -> str:
16
+ """Call Perplexity AI with a prompt, return the text response if successful."""
17
+ headers = {
18
+ "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
19
+ "Content-Type": "application/json",
20
+ }
21
+
22
+ payload = {
23
+ "model": "llama-3.1-sonar-small-128k-chat",
24
+ "messages": [{"role": "user", "content": prompt}],
25
+ "temperature": 0.3,
26
+ }
27
+
28
+ try:
29
+ response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
30
+ response.raise_for_status()
31
+ return response.json()["choices"][0]["message"]["content"]
32
+ except Exception as e:
33
+ st.error(f"API Error: {str(e)}")
34
+ return ""
35
+
36
+ def extract_text_from_pdf(pdf_file):
37
+ """Extract text content from a PDF file."""
38
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
39
+ text = ""
40
+ for page in pdf_reader.pages:
41
+ text += page.extract_text() + "\n"
42
+ return text
43
+
44
+ def analyze_paper(text: str, category: str) -> str:
45
+ """Generate a prompt and get analysis for a specific category."""
46
+ prompts = {
47
+ "Summarized Abstract": "Extract and summarize the abstract from this research paper:",
48
+ "Results": "What are the main results and findings from this research paper:",
49
+ "Summarized Introduction": "Summarize the introduction section of this research paper:",
50
+ "Methods Used": "What are the main methods and methodologies used in this research:",
51
+ "Literature Survey": "Summarize the literature review or related work from this paper:",
52
+ "Limitations": "What are the limitations mentioned in this research:",
53
+ "Contributions": "What are the main contributions of this research:",
54
+ "Practical Implications": "What are the practical implications of this research:",
55
+ "Objectives": "What are the main objectives of this research:",
56
+ "Findings": "What are the key findings from this research:",
57
+ "Future Research": "What future research directions are suggested in this paper:",
58
+ "Dependent Variables": "What are the dependent variables studied in this research:",
59
+ "Independent Variables": "What are the independent variables studied in this research:",
60
+ "Dataset": "What dataset(s) were used in this research:",
61
+ "Problem Statement": "What is the main problem statement or research question:",
62
+ "Challenges": "What challenges were faced or addressed in this research:",
63
+ "Applications": "What are the potential applications of this research:"
64
+ }
65
+
66
+ prompt = f"{prompts[category]}\n\nPaper text: {text[:5000]}" # Limit text to avoid token limits
67
+ return call_perplexity_api(prompt)
68
+
69
+ def main():
70
+ st.title("Research Paper Analysis Tool")
71
+
72
+ # File uploader
73
+ uploaded_files = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True)
74
+
75
+ if uploaded_files:
76
+ if st.button("Process Papers"):
77
+ # Initialize progress bar
78
+ progress_bar = st.progress(0)
79
+ status_text = st.empty()
80
+
81
+ # Initialize results dictionary
82
+ results = []
83
+
84
+ # Define categories
85
+ categories = [
86
+ "Summarized Abstract", "Results", "Summarized Introduction",
87
+ "Methods Used", "Literature Survey", "Limitations",
88
+ "Contributions", "Practical Implications", "Objectives",
89
+ "Findings", "Future Research", "Dependent Variables",
90
+ "Independent Variables", "Dataset", "Problem Statement",
91
+ "Challenges", "Applications"
92
+ ]
93
+
94
+ # Process each file
95
+ for i, file in enumerate(uploaded_files):
96
+ status_text.text(f"Processing {file.name}...")
97
+
98
+ # Extract text from PDF
99
+ text = extract_text_from_pdf(file)
100
+
101
+ # Initialize paper results
102
+ paper_results = {"Filename": file.name}
103
+
104
+ # Analyze each category
105
+ for j, category in enumerate(categories):
106
+ status_text.text(f"Processing {file.name} - {category}")
107
+ paper_results[category] = analyze_paper(text, category)
108
+
109
+ # Update progress
110
+ progress = (i * len(categories) + j + 1) / (len(uploaded_files) * len(categories))
111
+ progress_bar.progress(progress)
112
+
113
+ # Add small delay to avoid API rate limits
114
+ time.sleep(1)
115
+
116
+ results.append(paper_results)
117
+
118
+ # Create DataFrame
119
+ df = pd.DataFrame(results)
120
+
121
+ # Convert DataFrame to CSV
122
+ csv = df.to_csv(index=False)
123
+
124
+ # Create download button
125
+ st.download_button(
126
+ label="Download Results as CSV",
127
+ data=csv,
128
+ file_name="research_papers_analysis.csv",
129
+ mime="text/csv"
130
+ )
131
+
132
+ # Display results in the app
133
+ st.subheader("Analysis Results")
134
+ st.dataframe(df)
135
+
136
+ status_text.text("Processing complete!")
137
+ progress_bar.progress(1.0)
138
+
139
+ if __name__ == "__main__":
140
+ main()
file_upload_vectorize.py CHANGED
@@ -1,179 +1,179 @@
1
- from pymongo import MongoClient
2
- from datetime import datetime
3
- import openai
4
- import google.generativeai as genai
5
- import streamlit as st
6
- from db import courses_collection2, faculty_collection, students_collection, vectors_collection
7
- from PIL import Image
8
- import PyPDF2, docx, io
9
- from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Document
10
- from bson import ObjectId
11
- from dotenv import load_dotenv
12
- import os
13
- from create_course import courses_collection
14
-
15
- load_dotenv()
16
- MONGO_URI = os.getenv('MONGO_URI')
17
- OPENAI_KEY = os.getenv('OPENAI_KEY')
18
- GEMINI_KEY = os.getenv('GEMINI_KEY')
19
-
20
-
21
- client = MongoClient(MONGO_URI)
22
- db = client['novascholar_db']
23
- resources_collection = db['resources']
24
-
25
- # Configure APIs
26
- openai.api_key = OPENAI_KEY
27
- genai.configure(api_key=GEMINI_KEY)
28
- model = genai.GenerativeModel('gemini-pro')
29
-
30
- def upload_resource(course_id, session_id, file_name, file_content, material_type):
31
- # material_data = {
32
- # "session_id": session_id,
33
- # "course_id": course_id,
34
- # "file_name": file_name,
35
- # "file_content": file_content,
36
- # "material_type": material_type,
37
- # "uploaded_at": datetime.utcnow()
38
- # }
39
- # return resources_collection.insert_one(material_data)
40
- # resource_id = ObjectId()
41
-
42
- # Extract text content from the file
43
- text_content = extract_text_from_file(file_content)
44
-
45
- # Check if a resource with this file name already exists
46
- existing_resource = resources_collection.find_one({
47
- "session_id": session_id,
48
- "file_name": file_name
49
- })
50
-
51
- if existing_resource:
52
- return existing_resource["_id"]
53
-
54
- # Read the file content
55
- file_content.seek(0) # Reset the file pointer to the beginning
56
- original_file_content = file_content.read()
57
-
58
-
59
- resource_data = {
60
- "_id": ObjectId(),
61
- "course_id": course_id,
62
- "session_id": session_id,
63
- "file_name": file_name,
64
- "file_type": file_content.type,
65
- "text_content": text_content,
66
- "file_content": original_file_content, # Store the original file content
67
- "material_type": material_type,
68
- "uploaded_at": datetime.utcnow()
69
- }
70
-
71
- resources_collection.insert_one(resource_data)
72
- resource_id = resource_data["_id"]
73
-
74
- courses_collection.update_one(
75
- {
76
- "course_id": course_id,
77
- "sessions.session_id": session_id
78
- },
79
- {
80
- "$push": {"sessions.$.pre_class.resources": resource_id}
81
- }
82
- )
83
- # print("End of Upload Resource, Resource ID is: ", resource_id)
84
- # return resource_id
85
- if text_content:
86
- create_vector_store(text_content, resource_id)
87
- return resource_id
88
-
89
- def assignment_submit(student_id, course_id, session_id, assignment_id, file_name, file_content, text_content, material_type):
90
- # Read the file content
91
- file_content.seek(0) # Reset the file pointer to the beginning
92
- original_file_content = file_content.read()
93
-
94
- assignment_data = {
95
- "student_id": student_id,
96
- "course_id": course_id,
97
- "session_id": session_id,
98
- "assignment_id": assignment_id,
99
- "file_name": file_name,
100
- "file_type": file_content.type,
101
- "file_content": original_file_content, # Store the original file content
102
- "text_content": text_content,
103
- "material_type": material_type,
104
- "submitted_at": datetime.utcnow(),
105
- "file_url": "sample_url"
106
- }
107
- try:
108
- courses_collection2.update_one(
109
- {
110
- "course_id": course_id,
111
- "sessions.session_id": session_id,
112
- "sessions.post_class.assignments.id": assignment_id
113
- },
114
- {
115
- "$push": {"sessions.$.post_class.assignments.$[assignment].submissions": assignment_data}
116
- },
117
- array_filters=[{"assignment.id": assignment_id}]
118
- )
119
- return True
120
- except Exception as db_error:
121
- print(f"Error saving submission: {str(db_error)}")
122
- return False
123
-
124
- def extract_text_from_file(uploaded_file):
125
- text = ""
126
- file_type = uploaded_file.type
127
-
128
- try:
129
- if file_type == "text/plain":
130
- text = uploaded_file.getvalue().decode("utf-8")
131
- elif file_type == "application/pdf":
132
- pdf_reader = PyPDF2.PdfReader(io.BytesIO(uploaded_file.getvalue()))
133
- for page in pdf_reader.pages:
134
- text += page.extract_text() + "\n"
135
- elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
136
- doc = docx.Document(io.BytesIO(uploaded_file.getvalue()))
137
- for para in doc.paragraphs:
138
- text += para.text + "\n"
139
- return text
140
- except Exception as e:
141
- st.error(f"Error processing file: {str(e)}")
142
- return None
143
-
144
- def get_embedding(text):
145
- response = openai.embeddings.create(
146
- model="text-embedding-ada-002",
147
- input=text
148
- )
149
- return response.data[0].embedding
150
-
151
- def create_vector_store(text, resource_id):
152
- # resource_object_id = ObjectId(resource_id)
153
- # Ensure resource_id is an ObjectId
154
- # if not isinstance(resource_id, ObjectId):
155
- # resource_id = ObjectId(resource_id)
156
-
157
- existing_vector = vectors_collection.find_one({
158
- "resource_id": resource_id,
159
- "text": text
160
- })
161
-
162
- if existing_vector:
163
- print(f"Vector already exists for Resource ID: {resource_id}")
164
- return
165
-
166
- print(f"In Vector Store method, Resource ID is: {resource_id}")
167
- document = Document(text=text)
168
- embedding = get_embedding(text)
169
-
170
- vector_data = {
171
- "resource_id": resource_id,
172
- "vector": embedding,
173
- "text": text,
174
- "created_at": datetime.utcnow()
175
- }
176
-
177
- vectors_collection.insert_one(vector_data)
178
-
179
  # return VectorStoreIndex.from_documents([document])
 
1
+ from pymongo import MongoClient
2
+ from datetime import datetime
3
+ import openai
4
+ import google.generativeai as genai
5
+ import streamlit as st
6
+ from db import courses_collection2, faculty_collection, students_collection, vectors_collection
7
+ from PIL import Image
8
+ import PyPDF2, docx, io
9
+ from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Document
10
+ from bson import ObjectId
11
+ from dotenv import load_dotenv
12
+ import os
13
+ from create_course import courses_collection
14
+
15
+ load_dotenv()
16
+ MONGO_URI = os.getenv('MONGO_URI')
17
+ OPENAI_KEY = os.getenv('OPENAI_KEY')
18
+ GEMINI_KEY = os.getenv('GEMINI_KEY')
19
+
20
+
21
+ client = MongoClient(MONGO_URI)
22
+ db = client['novascholar_db']
23
+ resources_collection = db['resources']
24
+
25
+ # Configure APIs
26
+ openai.api_key = OPENAI_KEY
27
+ genai.configure(api_key=GEMINI_KEY)
28
+ model = genai.GenerativeModel('gemini-pro')
29
+
30
+ def upload_resource(course_id, session_id, file_name, file_content, material_type):
31
+ # material_data = {
32
+ # "session_id": session_id,
33
+ # "course_id": course_id,
34
+ # "file_name": file_name,
35
+ # "file_content": file_content,
36
+ # "material_type": material_type,
37
+ # "uploaded_at": datetime.utcnow()
38
+ # }
39
+ # return resources_collection.insert_one(material_data)
40
+ # resource_id = ObjectId()
41
+
42
+ # Extract text content from the file
43
+ text_content = extract_text_from_file(file_content)
44
+
45
+ # Check if a resource with this file name already exists
46
+ existing_resource = resources_collection.find_one({
47
+ "session_id": session_id,
48
+ "file_name": file_name
49
+ })
50
+
51
+ if existing_resource:
52
+ return existing_resource["_id"]
53
+
54
+ # Read the file content
55
+ file_content.seek(0) # Reset the file pointer to the beginning
56
+ original_file_content = file_content.read()
57
+
58
+
59
+ resource_data = {
60
+ "_id": ObjectId(),
61
+ "course_id": course_id,
62
+ "session_id": session_id,
63
+ "file_name": file_name,
64
+ "file_type": file_content.type,
65
+ "text_content": text_content,
66
+ "file_content": original_file_content, # Store the original file content
67
+ "material_type": material_type,
68
+ "uploaded_at": datetime.utcnow()
69
+ }
70
+
71
+ resources_collection.insert_one(resource_data)
72
+ resource_id = resource_data["_id"]
73
+
74
+ courses_collection.update_one(
75
+ {
76
+ "course_id": course_id,
77
+ "sessions.session_id": session_id
78
+ },
79
+ {
80
+ "$push": {"sessions.$.pre_class.resources": resource_id}
81
+ }
82
+ )
83
+ # print("End of Upload Resource, Resource ID is: ", resource_id)
84
+ # return resource_id
85
+ if text_content:
86
+ create_vector_store(text_content, resource_id)
87
+ return resource_id
88
+
89
+ def assignment_submit(student_id, course_id, session_id, assignment_id, file_name, file_content, text_content, material_type):
90
+ # Read the file content
91
+ file_content.seek(0) # Reset the file pointer to the beginning
92
+ original_file_content = file_content.read()
93
+
94
+ assignment_data = {
95
+ "student_id": student_id,
96
+ "course_id": course_id,
97
+ "session_id": session_id,
98
+ "assignment_id": assignment_id,
99
+ "file_name": file_name,
100
+ "file_type": file_content.type,
101
+ "file_content": original_file_content, # Store the original file content
102
+ "text_content": text_content,
103
+ "material_type": material_type,
104
+ "submitted_at": datetime.utcnow(),
105
+ "file_url": "sample_url"
106
+ }
107
+ try:
108
+ courses_collection2.update_one(
109
+ {
110
+ "course_id": course_id,
111
+ "sessions.session_id": session_id,
112
+ "sessions.post_class.assignments.id": assignment_id
113
+ },
114
+ {
115
+ "$push": {"sessions.$.post_class.assignments.$[assignment].submissions": assignment_data}
116
+ },
117
+ array_filters=[{"assignment.id": assignment_id}]
118
+ )
119
+ return True
120
+ except Exception as db_error:
121
+ print(f"Error saving submission: {str(db_error)}")
122
+ return False
123
+
124
+ def extract_text_from_file(uploaded_file):
125
+ text = ""
126
+ file_type = uploaded_file.type
127
+
128
+ try:
129
+ if file_type == "text/plain":
130
+ text = uploaded_file.getvalue().decode("utf-8")
131
+ elif file_type == "application/pdf":
132
+ pdf_reader = PyPDF2.PdfReader(io.BytesIO(uploaded_file.getvalue()))
133
+ for page in pdf_reader.pages:
134
+ text += page.extract_text() + "\n"
135
+ elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
136
+ doc = docx.Document(io.BytesIO(uploaded_file.getvalue()))
137
+ for para in doc.paragraphs:
138
+ text += para.text + "\n"
139
+ return text
140
+ except Exception as e:
141
+ st.error(f"Error processing file: {str(e)}")
142
+ return None
143
+
144
+ def get_embedding(text):
145
+ response = openai.embeddings.create(
146
+ model="text-embedding-ada-002",
147
+ input=text
148
+ )
149
+ return response.data[0].embedding
150
+
151
+ def create_vector_store(text, resource_id):
152
+ # resource_object_id = ObjectId(resource_id)
153
+ # Ensure resource_id is an ObjectId
154
+ # if not isinstance(resource_id, ObjectId):
155
+ # resource_id = ObjectId(resource_id)
156
+
157
+ existing_vector = vectors_collection.find_one({
158
+ "resource_id": resource_id,
159
+ "text": text
160
+ })
161
+
162
+ if existing_vector:
163
+ print(f"Vector already exists for Resource ID: {resource_id}")
164
+ return
165
+
166
+ print(f"In Vector Store method, Resource ID is: {resource_id}")
167
+ document = Document(text=text)
168
+ embedding = get_embedding(text)
169
+
170
+ vector_data = {
171
+ "resource_id": resource_id,
172
+ "vector": embedding,
173
+ "text": text,
174
+ "created_at": datetime.utcnow()
175
+ }
176
+
177
+ vectors_collection.insert_one(vector_data)
178
+
179
  # return VectorStoreIndex.from_documents([document])
gen_mcqs.py CHANGED
@@ -1,206 +1,206 @@
1
- import ast
2
- from pymongo import MongoClient
3
- from datetime import datetime
4
- import openai
5
- import google.generativeai as genai
6
- from google.generativeai import GenerativeModel
7
- from dotenv import load_dotenv
8
- import os
9
- from file_upload_vectorize import resources_collection, vectors_collection, courses_collection2, faculty_collection
10
-
11
- # Load environment variables
12
- load_dotenv()
13
- MONGO_URI = os.getenv('MONGO_URI')
14
- OPENAI_KEY = os.getenv('OPENAI_KEY')
15
- GEMINI_KEY = os.getenv('GEMINI_KEY')
16
-
17
- # Configure APIs
18
- openai.api_key = OPENAI_KEY
19
- genai.configure(api_key=GEMINI_KEY)
20
- model = genai.GenerativeModel('gemini-pro')
21
-
22
- # Connect to MongoDB
23
- client = MongoClient(MONGO_URI)
24
- db = client['novascholar_db']
25
- quizzes_collection = db["quizzes"]
26
-
27
- def strip_code_markers(response_text):
28
- """Strip off the markers ``` and python from a LLM model's response"""
29
- if response_text.startswith("```python"):
30
- response_text = response_text[len("```python"):].strip()
31
- if response_text.startswith("```"):
32
- response_text = response_text[len("```"):].strip()
33
- if response_text.endswith("```"):
34
- response_text = response_text[:-len("```")].strip()
35
- return response_text
36
-
37
-
38
- # New function to generate MCQs using Gemini
39
- def generate_mcqs(context, num_questions, session_title, session_description):
40
- """Generate MCQs either from context or session details"""
41
- try:
42
- # Initialize Gemini model
43
- if context:
44
- prompt = f"""
45
- Based on the following content, generate {num_questions} multiple choice questions.
46
- Format each question as a Python dictionary with the following structure:
47
- {{
48
- "question": "Question text here",
49
- "options": ["A) option1", "B) option2", "C) option3", "D) option4"],
50
- "correct_option": "A) option1" or "B) option2" or "C) option3" or "D) option4"
51
- }}
52
-
53
- Content:
54
- {context}
55
-
56
- Generate challenging but clear questions that test understanding of key concepts.
57
- Return only the Python list of dictionaries.
58
- """
59
- else:
60
- prompt = f"""
61
- Generate {num_questions} multiple choice questions about the topic:
62
- Title: {session_title}
63
- Description: {session_description}
64
-
65
- Format each question as a Python dictionary with the following structure:
66
- {{
67
- "question": "Question text here",
68
- "options": ["A) option1", "B) option2", "C) option3", "D) option4"],
69
- "correct_option": "A" or "B" or "C" or "D"
70
- }}
71
-
72
- Generate challenging but clear questions.
73
- Return only the Python list of dictionaries without any additional formatting or markers
74
- Do not write any other text, do not start the response with (```python), do not end the response with backticks(```)
75
- A Sample response should look like this: Response Text: [
76
- {
77
- "question": "Which of the following is NOT a valid data type in C++?",
78
- "options": ["int", "double", "boolean", "char"],
79
- "correct_option": "C"
80
- }
81
- ] (Notice that there are no backticks(```) around the response and no (```python))
82
- .
83
- """
84
-
85
- response = model.generate_content(prompt)
86
- response_text = response.text.strip()
87
- print("Response Text:", response_text)
88
- modified_response_text = strip_code_markers(response_text)
89
- print("Response Text Modified to:", modified_response_text)
90
- # Extract and parse the response to get the list of MCQs
91
- mcqs = ast.literal_eval(modified_response_text) # Be careful with eval, consider using ast.literal_eval for production
92
- print(mcqs)
93
- if not mcqs:
94
- raise ValueError("No questions generated")
95
- return mcqs
96
- except Exception as e:
97
- print(f"Error generating MCQs: , error: {e}")
98
- return None
99
-
100
- # New function to save quiz to database
101
- def save_quiz(course_id, session_id, title, questions, user_id):
102
- """Save quiz to database"""
103
- try:
104
- quiz_data = {
105
- "user_id": user_id,
106
- "course_id": course_id,
107
- "session_id": session_id,
108
- "title": title,
109
- "questions": questions,
110
- "created_at": datetime.utcnow(),
111
- "status": "active",
112
- "submissions": []
113
- }
114
- result = quizzes_collection.insert_one(quiz_data)
115
- return result.inserted_id
116
- except Exception as e:
117
- print(f"Error saving quiz: {e}")
118
- return None
119
-
120
-
121
- def get_student_quiz_score(quiz_id, student_id):
122
- """Get student's score for a specific quiz"""
123
- quiz = quizzes_collection.find_one(
124
- {
125
- "_id": quiz_id,
126
- "submissions.student_id": student_id
127
- },
128
- {"submissions.$": 1}
129
- )
130
- if quiz and quiz.get('submissions'):
131
- return quiz['submissions'][0].get('score')
132
- return None
133
-
134
- # def submit_quiz_answers(quiz_id, student_id, student_answers):
135
- # """Submit and score student's quiz answers"""
136
- # quiz = quizzes_collection.find_one({"_id": quiz_id})
137
- # if not quiz:
138
- # return None
139
-
140
- # # Calculate score
141
- # correct_answers = 0
142
- # total_questions = len(quiz['questions'])
143
-
144
- # for q_idx, question in enumerate(quiz['questions']):
145
- # if student_answers.get(str(q_idx)) == question['correct_option']:
146
- # correct_answers += 1
147
-
148
- # score = (correct_answers / total_questions) * 100
149
-
150
- # # Store submission
151
- # submission_data = {
152
- # "student_id": student_id,
153
- # "answers": student_answers,
154
- # "score": score,
155
- # "submitted_at": datetime.utcnow()
156
- # }
157
-
158
- # # Update quiz with submission
159
- # quizzes_collection.update_one(
160
- # {"_id": quiz_id},
161
- # {
162
- # "$push": {"submissions": submission_data}
163
- # }
164
- # )
165
-
166
- # return score
167
- def submit_quiz_answers(quiz_id, student_id, student_answers):
168
- """Submit and score student's quiz answers"""
169
- try:
170
- quiz = quizzes_collection.find_one({"_id": quiz_id})
171
- if not quiz:
172
- return None
173
-
174
- # Calculate score
175
- correct_answers = 0
176
- total_questions = len(quiz['questions'])
177
-
178
- for q_idx, question in enumerate(quiz['questions']):
179
- student_answer = student_answers.get(str(q_idx))
180
- if student_answer: # Only check if answer was provided
181
- # Extract the option letter (A, B, C, D) from the full answer string
182
- answer_letter = student_answer.split(')')[0].strip()
183
- if answer_letter == question['correct_option']:
184
- correct_answers += 1
185
-
186
- score = (correct_answers / total_questions) * 100
187
-
188
- # Store submission
189
- submission_data = {
190
- "student_id": student_id,
191
- "answers": student_answers,
192
- "score": score,
193
- "submitted_at": datetime.utcnow()
194
- }
195
-
196
- # Update quiz with submission
197
- result = quizzes_collection.update_one(
198
- {"_id": quiz_id},
199
- {"$push": {"submissions": submission_data}}
200
- )
201
-
202
- return score if result.modified_count > 0 else None
203
-
204
- except Exception as e:
205
- print(f"Error submitting quiz: {e}")
206
  return None
 
1
+ import ast
2
+ from pymongo import MongoClient
3
+ from datetime import datetime
4
+ import openai
5
+ import google.generativeai as genai
6
+ from google.generativeai import GenerativeModel
7
+ from dotenv import load_dotenv
8
+ import os
9
+ from file_upload_vectorize import resources_collection, vectors_collection, courses_collection2, faculty_collection
10
+
11
+ # Load environment variables
12
+ load_dotenv()
13
+ MONGO_URI = os.getenv('MONGO_URI')
14
+ OPENAI_KEY = os.getenv('OPENAI_KEY')
15
+ GEMINI_KEY = os.getenv('GEMINI_KEY')
16
+
17
+ # Configure APIs
18
+ openai.api_key = OPENAI_KEY
19
+ genai.configure(api_key=GEMINI_KEY)
20
+ model = genai.GenerativeModel('gemini-pro')
21
+
22
+ # Connect to MongoDB
23
+ client = MongoClient(MONGO_URI)
24
+ db = client['novascholar_db']
25
+ quizzes_collection = db["quizzes"]
26
+
27
+ def strip_code_markers(response_text):
28
+ """Strip off the markers ``` and python from a LLM model's response"""
29
+ if response_text.startswith("```python"):
30
+ response_text = response_text[len("```python"):].strip()
31
+ if response_text.startswith("```"):
32
+ response_text = response_text[len("```"):].strip()
33
+ if response_text.endswith("```"):
34
+ response_text = response_text[:-len("```")].strip()
35
+ return response_text
36
+
37
+
38
+ # New function to generate MCQs using Gemini
39
+ def generate_mcqs(context, num_questions, session_title, session_description):
40
+ """Generate MCQs either from context or session details"""
41
+ try:
42
+ # Initialize Gemini model
43
+ if context:
44
+ prompt = f"""
45
+ Based on the following content, generate {num_questions} multiple choice questions.
46
+ Format each question as a Python dictionary with the following structure:
47
+ {{
48
+ "question": "Question text here",
49
+ "options": ["A) option1", "B) option2", "C) option3", "D) option4"],
50
+ "correct_option": "A) option1" or "B) option2" or "C) option3" or "D) option4"
51
+ }}
52
+
53
+ Content:
54
+ {context}
55
+
56
+ Generate challenging but clear questions that test understanding of key concepts.
57
+ Return only the Python list of dictionaries.
58
+ """
59
+ else:
60
+ prompt = f"""
61
+ Generate {num_questions} multiple choice questions about the topic:
62
+ Title: {session_title}
63
+ Description: {session_description}
64
+
65
+ Format each question as a Python dictionary with the following structure:
66
+ {{
67
+ "question": "Question text here",
68
+ "options": ["A) option1", "B) option2", "C) option3", "D) option4"],
69
+ "correct_option": "A" or "B" or "C" or "D"
70
+ }}
71
+
72
+ Generate challenging but clear questions.
73
+ Return only the Python list of dictionaries without any additional formatting or markers
74
+ Do not write any other text, do not start the response with (```python), do not end the response with backticks(```)
75
+ A Sample response should look like this: Response Text: [
76
+ {
77
+ "question": "Which of the following is NOT a valid data type in C++?",
78
+ "options": ["int", "double", "boolean", "char"],
79
+ "correct_option": "C"
80
+ }
81
+ ] (Notice that there are no backticks(```) around the response and no (```python))
82
+ .
83
+ """
84
+
85
+ response = model.generate_content(prompt)
86
+ response_text = response.text.strip()
87
+ print("Response Text:", response_text)
88
+ modified_response_text = strip_code_markers(response_text)
89
+ print("Response Text Modified to:", modified_response_text)
90
+ # Extract and parse the response to get the list of MCQs
91
+ mcqs = ast.literal_eval(modified_response_text) # Be careful with eval, consider using ast.literal_eval for production
92
+ print(mcqs)
93
+ if not mcqs:
94
+ raise ValueError("No questions generated")
95
+ return mcqs
96
+ except Exception as e:
97
+ print(f"Error generating MCQs: , error: {e}")
98
+ return None
99
+
100
+ # New function to save quiz to database
101
+ def save_quiz(course_id, session_id, title, questions, user_id):
102
+ """Save quiz to database"""
103
+ try:
104
+ quiz_data = {
105
+ "user_id": user_id,
106
+ "course_id": course_id,
107
+ "session_id": session_id,
108
+ "title": title,
109
+ "questions": questions,
110
+ "created_at": datetime.utcnow(),
111
+ "status": "active",
112
+ "submissions": []
113
+ }
114
+ result = quizzes_collection.insert_one(quiz_data)
115
+ return result.inserted_id
116
+ except Exception as e:
117
+ print(f"Error saving quiz: {e}")
118
+ return None
119
+
120
+
121
+ def get_student_quiz_score(quiz_id, student_id):
122
+ """Get student's score for a specific quiz"""
123
+ quiz = quizzes_collection.find_one(
124
+ {
125
+ "_id": quiz_id,
126
+ "submissions.student_id": student_id
127
+ },
128
+ {"submissions.$": 1}
129
+ )
130
+ if quiz and quiz.get('submissions'):
131
+ return quiz['submissions'][0].get('score')
132
+ return None
133
+
134
+ # def submit_quiz_answers(quiz_id, student_id, student_answers):
135
+ # """Submit and score student's quiz answers"""
136
+ # quiz = quizzes_collection.find_one({"_id": quiz_id})
137
+ # if not quiz:
138
+ # return None
139
+
140
+ # # Calculate score
141
+ # correct_answers = 0
142
+ # total_questions = len(quiz['questions'])
143
+
144
+ # for q_idx, question in enumerate(quiz['questions']):
145
+ # if student_answers.get(str(q_idx)) == question['correct_option']:
146
+ # correct_answers += 1
147
+
148
+ # score = (correct_answers / total_questions) * 100
149
+
150
+ # # Store submission
151
+ # submission_data = {
152
+ # "student_id": student_id,
153
+ # "answers": student_answers,
154
+ # "score": score,
155
+ # "submitted_at": datetime.utcnow()
156
+ # }
157
+
158
+ # # Update quiz with submission
159
+ # quizzes_collection.update_one(
160
+ # {"_id": quiz_id},
161
+ # {
162
+ # "$push": {"submissions": submission_data}
163
+ # }
164
+ # )
165
+
166
+ # return score
167
+ def submit_quiz_answers(quiz_id, student_id, student_answers):
168
+ """Submit and score student's quiz answers"""
169
+ try:
170
+ quiz = quizzes_collection.find_one({"_id": quiz_id})
171
+ if not quiz:
172
+ return None
173
+
174
+ # Calculate score
175
+ correct_answers = 0
176
+ total_questions = len(quiz['questions'])
177
+
178
+ for q_idx, question in enumerate(quiz['questions']):
179
+ student_answer = student_answers.get(str(q_idx))
180
+ if student_answer: # Only check if answer was provided
181
+ # Extract the option letter (A, B, C, D) from the full answer string
182
+ answer_letter = student_answer.split(')')[0].strip()
183
+ if answer_letter == question['correct_option']:
184
+ correct_answers += 1
185
+
186
+ score = (correct_answers / total_questions) * 100
187
+
188
+ # Store submission
189
+ submission_data = {
190
+ "student_id": student_id,
191
+ "answers": student_answers,
192
+ "score": score,
193
+ "submitted_at": datetime.utcnow()
194
+ }
195
+
196
+ # Update quiz with submission
197
+ result = quizzes_collection.update_one(
198
+ {"_id": quiz_id},
199
+ {"$push": {"submissions": submission_data}}
200
+ )
201
+
202
+ return score if result.modified_count > 0 else None
203
+
204
+ except Exception as e:
205
+ print(f"Error submitting quiz: {e}")
206
  return None
goals2.py CHANGED
@@ -1,658 +1,658 @@
1
- import streamlit as st
2
- from typing import List, Dict
3
- import httpx
4
- from pathlib import Path
5
- import os
6
- from dotenv import load_dotenv
7
- import json
8
- import numpy as np
9
- from pymongo import MongoClient
10
- from openai import OpenAI
11
- from datetime import datetime
12
- import asyncio
13
- import pandas as pd
14
-
15
- # Load environment variables
16
- load_dotenv()
17
- PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_KEY")
18
- MONGODB_URI = os.getenv("MONGO_URI")
19
- OPENAI_API_KEY = os.getenv("OPENAI_KEY")
20
-
21
- # Initialize MongoDB client
22
- client = MongoClient(MONGODB_URI)
23
- db = client["document_analysis"]
24
- vectors_collection = db["document_vectors"]
25
-
26
- # Initialize OpenAI client
27
- openai_client = OpenAI(api_key=OPENAI_API_KEY)
28
-
29
-
30
- class GoalAnalyzer:
31
- def __init__(self):
32
- self.api_key = PERPLEXITY_API_KEY
33
- self.base_url = "https://api.perplexity.ai/chat/completions"
34
-
35
- def clean_json_string(self, content: str) -> str:
36
- """Clean and extract valid JSON from string"""
37
- # Remove markdown formatting
38
- if "```json" in content:
39
- content = content.split("```json")[1].split("```")[0]
40
- elif "```" in content:
41
- content = content.split("```")[1]
42
-
43
- # Find the JSON object boundaries
44
- start_idx = content.find("{")
45
- end_idx = content.rfind("}") + 1
46
-
47
- if start_idx != -1 and end_idx > 0:
48
- content = content[start_idx:end_idx]
49
-
50
- # Clean up common issues
51
- content = content.strip()
52
- content = content.replace("\n", "")
53
- content = content.replace("'", '"')
54
-
55
- return content
56
-
57
- async def get_perplexity_analysis(self, text: str, goal: str) -> Dict:
58
- """Get analysis from Perplexity API"""
59
- headers = {
60
- "Authorization": f"Bearer {self.api_key}",
61
- "Content-Type": "application/json",
62
- }
63
-
64
- prompt = f"""
65
- Analyze the following text in context of the goal: {goal}
66
-
67
- Text: {text}
68
-
69
- Provide analysis in the following JSON format:
70
- {{
71
- "themes": ["theme1", "theme2"],
72
- "subthemes": {{"theme1": ["subtheme1", "subtheme2"], "theme2": ["subtheme3"]}},
73
- "keywords": ["keyword1", "keyword2"],
74
- "relevance_score": 0-100
75
- }}
76
- """
77
-
78
- try:
79
- async with httpx.AsyncClient() as client:
80
- payload = {
81
- "model": "llama-3.1-sonar-small-128k-chat", # Updated to supported model
82
- "messages": [
83
- {
84
- "role": "system",
85
- "content": "You are an AI assistant that analyzes documents and provides structured analysis.",
86
- },
87
- {"role": "user", "content": prompt},
88
- ],
89
- "max_tokens": 1024,
90
- }
91
-
92
- # Debug info using expander
93
- with st.expander("Debug Info", expanded=False):
94
- st.write("Request payload:", payload)
95
-
96
- response = await client.post(
97
- self.base_url, headers=headers, json=payload, timeout=30.0
98
- )
99
-
100
- # Debug response info
101
- with st.expander("Response Info", expanded=False):
102
- st.write("Response status:", response.status_code)
103
- st.write("Response headers:", dict(response.headers))
104
- st.write("Response content:", response.text)
105
-
106
- if response.status_code != 200:
107
- error_detail = (
108
- response.json() if response.content else "No error details"
109
- )
110
- raise Exception(
111
- f"API returned status code {response.status_code}. Details: {error_detail}"
112
- )
113
-
114
- result = response.json()
115
- content = (
116
- result.get("choices", [{}])[0].get("message", {}).get("content", "")
117
- )
118
-
119
- # Clean and parse JSON
120
- cleaned_content = self.clean_json_string(content)
121
-
122
- try:
123
- analysis = json.loads(cleaned_content)
124
-
125
- # Validate required fields
126
- required_fields = [
127
- "themes",
128
- "subthemes",
129
- "keywords",
130
- "relevance_score",
131
- ]
132
- for field in required_fields:
133
- if field not in analysis:
134
- analysis[field] = [] if field != "relevance_score" else 0
135
-
136
- return analysis
137
-
138
- except json.JSONDecodeError as e:
139
- st.error(f"JSON parsing error: {str(e)}")
140
- st.error(f"Failed content: {cleaned_content}")
141
- return {
142
- "themes": ["Error parsing themes"],
143
- "subthemes": {"Error": ["Failed to parse subthemes"]},
144
- "keywords": ["parsing-error"],
145
- "relevance_score": 0,
146
- }
147
-
148
- except Exception as e:
149
- st.error(f"API Error: {str(e)}")
150
- return None
151
-
152
- def extract_text_from_file(self, file) -> str:
153
- """Extract text content from uploaded file"""
154
- try:
155
- text = ""
156
- file_type = file.type
157
-
158
- if file_type == "text/plain":
159
- text = file.getvalue().decode("utf-8")
160
- elif file_type == "application/pdf":
161
- import PyPDF2
162
-
163
- pdf_reader = PyPDF2.PdfReader(file)
164
- for page in pdf_reader.pages:
165
- text += page.extract_text()
166
- elif (
167
- file_type
168
- == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
169
- ):
170
- import docx
171
-
172
- doc = docx.Document(file)
173
- text = " ".join([paragraph.text for paragraph in doc.paragraphs])
174
-
175
- return text
176
- except Exception as e:
177
- st.error(f"Error extracting text: {str(e)}")
178
- return ""
179
-
180
-
181
- class DocumentVectorizer:
182
- def __init__(self):
183
- self.model = "text-embedding-ada-002"
184
- self.client = MongoClient(MONGODB_URI)
185
- self.db = self.client["document_analysis"]
186
- self.vectors_collection = self.db["document_vectors"]
187
-
188
- # Create vector search index if it doesn't exist
189
- try:
190
- self.vectors_collection.create_index(
191
- [("vector", "2dsphere")], # Changed to 2dsphere for vector indexing
192
- {
193
- "vectorSearchConfig": {
194
- "dimensions": 1536, # OpenAI embedding dimensions
195
- "similarity": "cosine",
196
- }
197
- },
198
- )
199
- except Exception as e:
200
- st.warning(f"Vector index may already exist")
201
-
202
- def get_embedding(self, text: str) -> list:
203
- """Get embedding vector for text using OpenAI"""
204
- try:
205
- response = openai_client.embeddings.create(model=self.model, input=text)
206
- return response.data[0].embedding
207
- except Exception as e:
208
- st.error(f"Error getting embedding: {str(e)}")
209
- return None
210
-
211
- # Add this method to DocumentVectorizer class
212
- def vector_exists(self, doc_name: str) -> bool:
213
- """Check if vector exists for document"""
214
- return self.vectors_collection.count_documents({"name": doc_name}) > 0
215
-
216
- # Update store_vector method in DocumentVectorizer class
217
- def store_vector(self, doc_name: str, vector: list, text: str, goal: str = None):
218
- """Store document/goal vector in MongoDB using upsert"""
219
- try:
220
- vector_doc = {
221
- "name": doc_name,
222
- "vector": vector,
223
- "text": text,
224
- "type": "document" if goal is None else "goal",
225
- "goal": goal,
226
- "updated_at": datetime.utcnow(),
227
- }
228
-
229
- # Use update_one with upsert
230
- self.vectors_collection.update_one(
231
- {"name": doc_name},
232
- {"$set": vector_doc, "$setOnInsert": {"created_at": datetime.utcnow()}},
233
- upsert=True,
234
- )
235
-
236
- except Exception as e:
237
- st.error(f"Error storing vector: {str(e)}")
238
-
239
- # Update vector_search method in DocumentVectorizer class
240
- def vector_search(self, query_vector: List[float], limit: int = 5) -> List[Dict]:
241
- """Search for similar documents using vector similarity"""
242
- try:
243
- # Get all documents
244
- documents = list(self.vectors_collection.find({"type": "document"}))
245
-
246
- # Calculate similarities
247
- similarities = []
248
- for doc in documents:
249
- similarity = self.calculate_similarity(query_vector, doc["vector"])
250
- similarities.append(
251
- {
252
- "name": doc["name"],
253
- "text": doc["text"],
254
- "similarity": similarity, # Keep as float
255
- "similarity_display": f"{similarity*100:.1f}%", # Add display version
256
- }
257
- )
258
-
259
- # Sort by similarity and get top k
260
- sorted_docs = sorted(
261
- similarities,
262
- key=lambda x: x["similarity"], # Sort by float value
263
- reverse=True,
264
- )[:limit]
265
-
266
- return sorted_docs
267
-
268
- except Exception as e:
269
- st.error(f"Vector search error: {str(e)}")
270
- return []
271
-
272
- def find_similar_documents(self, text: str, limit: int = 5) -> List[Dict]:
273
- """Find similar documents for given text"""
274
- vector = self.get_embedding(text)
275
- if vector:
276
- return self.vector_search(vector, limit)
277
- return []
278
-
279
- def calculate_similarity(self, vector1: list, vector2: list) -> float:
280
- """Calculate cosine similarity between two vectors"""
281
- return np.dot(vector1, vector2) / (
282
- np.linalg.norm(vector1) * np.linalg.norm(vector2)
283
- )
284
-
285
-
286
- def display_analysis_results(analysis: Dict):
287
- """Display analysis results in Streamlit UI"""
288
- if not analysis:
289
- return
290
-
291
- # Display Themes
292
- st.subheader("Themes")
293
- for theme in analysis.get("themes", []):
294
- with st.expander(f"🎯 {theme}"):
295
- # Display subthemes for this theme
296
- subthemes = analysis.get("subthemes", {}).get(theme, [])
297
- if subthemes:
298
- st.write("**Subthemes:**")
299
- for subtheme in subthemes:
300
- st.write(f"- {subtheme}")
301
-
302
- # Display Keywords
303
- st.subheader("Keywords")
304
- keywords = analysis.get("keywords", [])
305
- st.write(" | ".join([f"🔑 {keyword}" for keyword in keywords]))
306
-
307
- # Display Relevance Score
308
- score = analysis.get("relevance_score", 0)
309
- st.metric("Relevance Score", f"{score}%")
310
-
311
-
312
- def display_analyst_dashboard():
313
- st.title("Multi-Goal Document Analysis")
314
-
315
- with st.sidebar:
316
- st.markdown("### Input Section")
317
- tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
318
- # tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
319
-
320
- with tab1:
321
- # Multiple goals input
322
- num_goals = st.number_input("Number of goals:", min_value=1, value=1)
323
- goals = []
324
- for i in range(num_goals):
325
- goal = st.text_area(f"Goal {i+1}:", key=f"goal_{i}", height=100)
326
- if goal:
327
- goals.append(goal)
328
-
329
- uploaded_files = st.file_uploader(
330
- "Upload documents",
331
- accept_multiple_files=True,
332
- type=["txt", "pdf", "docx"],
333
- )
334
- analyze_button = (
335
- st.button("Analyze Documents") if goals and uploaded_files else None
336
- )
337
-
338
- with tab2:
339
- # Keep existing similarity search tab
340
- search_text = st.text_area("Enter text to find similar documents:")
341
- search_limit = st.slider("Number of results", 1, 10, 5)
342
- search_button = st.button("Search Similar") if search_text else None
343
-
344
- if st.button("Logout", use_container_width=True):
345
- for key in st.session_state.keys():
346
- del st.session_state[key]
347
- st.rerun()
348
-
349
- if analyze_button:
350
- analyzer = GoalAnalyzer()
351
- vectorizer = DocumentVectorizer()
352
-
353
- # Store vectors
354
- doc_vectors = {}
355
- goal_vectors = {}
356
-
357
- # Process goals first
358
- with st.spinner("Processing goals..."):
359
- for i, goal in enumerate(goals):
360
- vector = vectorizer.get_embedding(goal)
361
- if vector:
362
- goal_vectors[f"Goal {i+1}"] = vector
363
- vectorizer.store_vector(f"Goal {i+1}", vector, goal, goal)
364
-
365
- # Process documents
366
- with st.spinner("Processing documents..."):
367
- for file in uploaded_files:
368
- st.markdown(f"### Analysis for {file.name}")
369
-
370
- if vectorizer.vector_exists(file.name):
371
- st.info(f"Vector already exists for {file.name}")
372
- existing_doc = vectorizer.vectors_collection.find_one(
373
- {"name": file.name}
374
- )
375
- doc_vectors[file.name] = existing_doc["vector"]
376
- else:
377
- text = analyzer.extract_text_from_file(file)
378
- if not text:
379
- st.warning(f"Could not extract text from {file.name}")
380
- continue
381
-
382
- vector = vectorizer.get_embedding(text)
383
- if vector:
384
- doc_vectors[file.name] = vector
385
- vectorizer.store_vector(file.name, vector, text)
386
-
387
- # Display goal similarities
388
- st.subheader("Goal Relevance Scores")
389
- col1, col2 = st.columns([1, 2])
390
-
391
- with col1:
392
- for goal_name, goal_vector in goal_vectors.items():
393
- similarity = (
394
- vectorizer.calculate_similarity(
395
- doc_vectors[file.name], goal_vector
396
- )
397
- * 100
398
- )
399
- st.metric(f"{goal_name}", f"{similarity:.1f}%")
400
-
401
- with col2:
402
- # Get analysis for all goals combined
403
- analysis = asyncio.run(
404
- analyzer.get_perplexity_analysis(text, " | ".join(goals))
405
- )
406
- display_analysis_results(analysis)
407
-
408
- st.divider()
409
-
410
- # Document similarity matrix
411
- if len(doc_vectors) > 1:
412
- st.markdown("### Document Similarity Matrix")
413
- files = list(doc_vectors.keys())
414
- similarity_matrix = []
415
-
416
- for file1 in files:
417
- row = []
418
- for file2 in files:
419
- similarity = vectorizer.calculate_similarity(
420
- doc_vectors[file1], doc_vectors[file2]
421
- )
422
- row.append(similarity)
423
- similarity_matrix.append(row)
424
-
425
- df = pd.DataFrame(similarity_matrix, columns=files, index=files)
426
- st.dataframe(df.style.background_gradient(cmap="RdYlGn"))
427
-
428
- # Add goal-document similarity matrix
429
- st.markdown("### Goal-Document Similarity Matrix")
430
- goal_doc_matrix = []
431
- goal_names = list(goal_vectors.keys())
432
-
433
- for file in files:
434
- row = []
435
- for goal in goal_names:
436
- similarity = vectorizer.calculate_similarity(
437
- doc_vectors[file], goal_vectors[goal]
438
- )
439
- row.append(similarity)
440
- goal_doc_matrix.append(row)
441
-
442
- df_goals = pd.DataFrame(
443
- goal_doc_matrix, columns=goal_names, index=files
444
- )
445
- st.dataframe(df_goals.style.background_gradient(cmap="RdYlGn"))
446
-
447
- # Keep existing similarity search functionality
448
- elif search_button:
449
- vectorizer = DocumentVectorizer()
450
- with st.spinner("Searching similar documents..."):
451
- query_vector = vectorizer.get_embedding(search_text)
452
- if query_vector:
453
- similar_docs = vectorizer.vector_search(query_vector, search_limit)
454
-
455
- if similar_docs:
456
- st.markdown("### Similar Documents Found")
457
-
458
- # Create DataFrame with numeric similarities
459
- df = pd.DataFrame(similar_docs)
460
-
461
- # Apply gradient to numeric column
462
- styled_df = df[["name", "similarity"]].style.background_gradient(
463
- cmap="RdYlGn", subset=["similarity"]
464
- )
465
-
466
- # Format display after styling
467
- styled_df = styled_df.format({"similarity": "{:.1%}"})
468
-
469
- st.dataframe(styled_df)
470
-
471
- # Show document contents
472
- for doc in similar_docs:
473
- with st.expander(
474
- f"📄 {doc['name']} (Similarity: {doc['similarity_display']})"
475
- ):
476
- st.text(
477
- doc["text"][:20] + "..."
478
- if len(doc["text"]) > 20
479
- else doc["text"]
480
- )
481
- else:
482
- st.info("No similar documents found")
483
- else:
484
- st.error("Could not process search query")
485
-
486
-
487
- def main():
488
- st.title("Multi-Goal Document Analysis")
489
-
490
- with st.sidebar:
491
- st.markdown("### Input Section")
492
- tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
493
- # tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
494
-
495
- with tab1:
496
- # Multiple goals input
497
- num_goals = st.number_input("Number of goals:", min_value=1, value=1)
498
- goals = []
499
- for i in range(num_goals):
500
- goal = st.text_area(f"Goal {i+1}:", key=f"goal_{i}", height=100)
501
- if goal:
502
- goals.append(goal)
503
-
504
- uploaded_files = st.file_uploader(
505
- "Upload documents",
506
- accept_multiple_files=True,
507
- type=["txt", "pdf", "docx"],
508
- )
509
- analyze_button = (
510
- st.button("Analyze Documents") if goals and uploaded_files else None
511
- )
512
-
513
- with tab2:
514
- # Keep existing similarity search tab
515
- search_text = st.text_area("Enter text to find similar documents:")
516
- search_limit = st.slider("Number of results", 1, 10, 5)
517
- search_button = st.button("Search Similar") if search_text else None
518
-
519
- if analyze_button:
520
- analyzer = GoalAnalyzer()
521
- vectorizer = DocumentVectorizer()
522
-
523
- # Store vectors
524
- doc_vectors = {}
525
- goal_vectors = {}
526
-
527
- # Process goals first
528
- with st.spinner("Processing goals..."):
529
- for i, goal in enumerate(goals):
530
- vector = vectorizer.get_embedding(goal)
531
- if vector:
532
- goal_vectors[f"Goal {i+1}"] = vector
533
- vectorizer.store_vector(f"Goal {i+1}", vector, goal, goal)
534
-
535
- # Process documents
536
- with st.spinner("Processing documents..."):
537
- for file in uploaded_files:
538
- st.markdown(f"### Analysis for {file.name}")
539
-
540
- if vectorizer.vector_exists(file.name):
541
- st.info(f"Vector already exists for {file.name}")
542
- existing_doc = vectorizer.vectors_collection.find_one(
543
- {"name": file.name}
544
- )
545
- doc_vectors[file.name] = existing_doc["vector"]
546
- else:
547
- text = analyzer.extract_text_from_file(file)
548
- if not text:
549
- st.warning(f"Could not extract text from {file.name}")
550
- continue
551
-
552
- vector = vectorizer.get_embedding(text)
553
- if vector:
554
- doc_vectors[file.name] = vector
555
- vectorizer.store_vector(file.name, vector, text)
556
-
557
- # Display goal similarities
558
- st.subheader("Goal Relevance Scores")
559
- col1, col2 = st.columns([1, 2])
560
-
561
- with col1:
562
- for goal_name, goal_vector in goal_vectors.items():
563
- similarity = (
564
- vectorizer.calculate_similarity(
565
- doc_vectors[file.name], goal_vector
566
- )
567
- * 100
568
- )
569
- st.metric(f"{goal_name}", f"{similarity:.1f}%")
570
-
571
- with col2:
572
- # Get analysis for all goals combined
573
- analysis = asyncio.run(
574
- analyzer.get_perplexity_analysis(text, " | ".join(goals))
575
- )
576
- display_analysis_results(analysis)
577
-
578
- st.divider()
579
-
580
- # Document similarity matrix
581
- if len(doc_vectors) > 1:
582
- st.markdown("### Document Similarity Matrix")
583
- files = list(doc_vectors.keys())
584
- similarity_matrix = []
585
-
586
- for file1 in files:
587
- row = []
588
- for file2 in files:
589
- similarity = vectorizer.calculate_similarity(
590
- doc_vectors[file1], doc_vectors[file2]
591
- )
592
- row.append(similarity)
593
- similarity_matrix.append(row)
594
-
595
- df = pd.DataFrame(similarity_matrix, columns=files, index=files)
596
- st.dataframe(df.style.background_gradient(cmap="RdYlGn"))
597
-
598
- # Add goal-document similarity matrix
599
- st.markdown("### Goal-Document Similarity Matrix")
600
- goal_doc_matrix = []
601
- goal_names = list(goal_vectors.keys())
602
-
603
- for file in files:
604
- row = []
605
- for goal in goal_names:
606
- similarity = vectorizer.calculate_similarity(
607
- doc_vectors[file], goal_vectors[goal]
608
- )
609
- row.append(similarity)
610
- goal_doc_matrix.append(row)
611
-
612
- df_goals = pd.DataFrame(
613
- goal_doc_matrix, columns=goal_names, index=files
614
- )
615
- st.dataframe(df_goals.style.background_gradient(cmap="RdYlGn"))
616
-
617
- # Keep existing similarity search functionality
618
- elif search_button:
619
- vectorizer = DocumentVectorizer()
620
- with st.spinner("Searching similar documents..."):
621
- query_vector = vectorizer.get_embedding(search_text)
622
- if query_vector:
623
- similar_docs = vectorizer.vector_search(query_vector, search_limit)
624
-
625
- if similar_docs:
626
- st.markdown("### Similar Documents Found")
627
-
628
- # Create DataFrame with numeric similarities
629
- df = pd.DataFrame(similar_docs)
630
-
631
- # Apply gradient to numeric column
632
- styled_df = df[["name", "similarity"]].style.background_gradient(
633
- cmap="RdYlGn", subset=["similarity"]
634
- )
635
-
636
- # Format display after styling
637
- styled_df = styled_df.format({"similarity": "{:.1%}"})
638
-
639
- st.dataframe(styled_df)
640
-
641
- # Show document contents
642
- for doc in similar_docs:
643
- with st.expander(
644
- f"📄 {doc['name']} (Similarity: {doc['similarity_display']})"
645
- ):
646
- st.text(
647
- doc["text"][:20] + "..."
648
- if len(doc["text"]) > 20
649
- else doc["text"]
650
- )
651
- else:
652
- st.info("No similar documents found")
653
- else:
654
- st.error("Could not process search query")
655
-
656
-
657
- if __name__ == "__main__":
658
- main()
 
1
+ import streamlit as st
2
+ from typing import List, Dict
3
+ import httpx
4
+ from pathlib import Path
5
+ import os
6
+ from dotenv import load_dotenv
7
+ import json
8
+ import numpy as np
9
+ from pymongo import MongoClient
10
+ from openai import OpenAI
11
+ from datetime import datetime
12
+ import asyncio
13
+ import pandas as pd
14
+
15
+ # Load environment variables
16
+ load_dotenv()
17
+ PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_KEY")
18
+ MONGODB_URI = os.getenv("MONGO_URI")
19
+ OPENAI_API_KEY = os.getenv("OPENAI_KEY")
20
+
21
+ # Initialize MongoDB client
22
+ client = MongoClient(MONGODB_URI)
23
+ db = client["document_analysis"]
24
+ vectors_collection = db["document_vectors"]
25
+
26
+ # Initialize OpenAI client
27
+ openai_client = OpenAI(api_key=OPENAI_API_KEY)
28
+
29
+
30
+ class GoalAnalyzer:
31
+ def __init__(self):
32
+ self.api_key = PERPLEXITY_API_KEY
33
+ self.base_url = "https://api.perplexity.ai/chat/completions"
34
+
35
+ def clean_json_string(self, content: str) -> str:
36
+ """Clean and extract valid JSON from string"""
37
+ # Remove markdown formatting
38
+ if "```json" in content:
39
+ content = content.split("```json")[1].split("```")[0]
40
+ elif "```" in content:
41
+ content = content.split("```")[1]
42
+
43
+ # Find the JSON object boundaries
44
+ start_idx = content.find("{")
45
+ end_idx = content.rfind("}") + 1
46
+
47
+ if start_idx != -1 and end_idx > 0:
48
+ content = content[start_idx:end_idx]
49
+
50
+ # Clean up common issues
51
+ content = content.strip()
52
+ content = content.replace("\n", "")
53
+ content = content.replace("'", '"')
54
+
55
+ return content
56
+
57
+ async def get_perplexity_analysis(self, text: str, goal: str) -> Dict:
58
+ """Get analysis from Perplexity API"""
59
+ headers = {
60
+ "Authorization": f"Bearer {self.api_key}",
61
+ "Content-Type": "application/json",
62
+ }
63
+
64
+ prompt = f"""
65
+ Analyze the following text in context of the goal: {goal}
66
+
67
+ Text: {text}
68
+
69
+ Provide analysis in the following JSON format:
70
+ {{
71
+ "themes": ["theme1", "theme2"],
72
+ "subthemes": {{"theme1": ["subtheme1", "subtheme2"], "theme2": ["subtheme3"]}},
73
+ "keywords": ["keyword1", "keyword2"],
74
+ "relevance_score": 0-100
75
+ }}
76
+ """
77
+
78
+ try:
79
+ async with httpx.AsyncClient() as client:
80
+ payload = {
81
+ "model": "llama-3.1-sonar-small-128k-chat", # Updated to supported model
82
+ "messages": [
83
+ {
84
+ "role": "system",
85
+ "content": "You are an AI assistant that analyzes documents and provides structured analysis.",
86
+ },
87
+ {"role": "user", "content": prompt},
88
+ ],
89
+ "max_tokens": 1024,
90
+ }
91
+
92
+ # Debug info using expander
93
+ with st.expander("Debug Info", expanded=False):
94
+ st.write("Request payload:", payload)
95
+
96
+ response = await client.post(
97
+ self.base_url, headers=headers, json=payload, timeout=30.0
98
+ )
99
+
100
+ # Debug response info
101
+ with st.expander("Response Info", expanded=False):
102
+ st.write("Response status:", response.status_code)
103
+ st.write("Response headers:", dict(response.headers))
104
+ st.write("Response content:", response.text)
105
+
106
+ if response.status_code != 200:
107
+ error_detail = (
108
+ response.json() if response.content else "No error details"
109
+ )
110
+ raise Exception(
111
+ f"API returned status code {response.status_code}. Details: {error_detail}"
112
+ )
113
+
114
+ result = response.json()
115
+ content = (
116
+ result.get("choices", [{}])[0].get("message", {}).get("content", "")
117
+ )
118
+
119
+ # Clean and parse JSON
120
+ cleaned_content = self.clean_json_string(content)
121
+
122
+ try:
123
+ analysis = json.loads(cleaned_content)
124
+
125
+ # Validate required fields
126
+ required_fields = [
127
+ "themes",
128
+ "subthemes",
129
+ "keywords",
130
+ "relevance_score",
131
+ ]
132
+ for field in required_fields:
133
+ if field not in analysis:
134
+ analysis[field] = [] if field != "relevance_score" else 0
135
+
136
+ return analysis
137
+
138
+ except json.JSONDecodeError as e:
139
+ st.error(f"JSON parsing error: {str(e)}")
140
+ st.error(f"Failed content: {cleaned_content}")
141
+ return {
142
+ "themes": ["Error parsing themes"],
143
+ "subthemes": {"Error": ["Failed to parse subthemes"]},
144
+ "keywords": ["parsing-error"],
145
+ "relevance_score": 0,
146
+ }
147
+
148
+ except Exception as e:
149
+ st.error(f"API Error: {str(e)}")
150
+ return None
151
+
152
+ def extract_text_from_file(self, file) -> str:
153
+ """Extract text content from uploaded file"""
154
+ try:
155
+ text = ""
156
+ file_type = file.type
157
+
158
+ if file_type == "text/plain":
159
+ text = file.getvalue().decode("utf-8")
160
+ elif file_type == "application/pdf":
161
+ import PyPDF2
162
+
163
+ pdf_reader = PyPDF2.PdfReader(file)
164
+ for page in pdf_reader.pages:
165
+ text += page.extract_text()
166
+ elif (
167
+ file_type
168
+ == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
169
+ ):
170
+ import docx
171
+
172
+ doc = docx.Document(file)
173
+ text = " ".join([paragraph.text for paragraph in doc.paragraphs])
174
+
175
+ return text
176
+ except Exception as e:
177
+ st.error(f"Error extracting text: {str(e)}")
178
+ return ""
179
+
180
+
181
+ class DocumentVectorizer:
182
+ def __init__(self):
183
+ self.model = "text-embedding-ada-002"
184
+ self.client = MongoClient(MONGODB_URI)
185
+ self.db = self.client["document_analysis"]
186
+ self.vectors_collection = self.db["document_vectors"]
187
+
188
+ # Create vector search index if it doesn't exist
189
+ try:
190
+ self.vectors_collection.create_index(
191
+ [("vector", "2dsphere")], # Changed to 2dsphere for vector indexing
192
+ {
193
+ "vectorSearchConfig": {
194
+ "dimensions": 1536, # OpenAI embedding dimensions
195
+ "similarity": "cosine",
196
+ }
197
+ },
198
+ )
199
+ except Exception as e:
200
+ st.warning(f"Vector index may already exist")
201
+
202
+ def get_embedding(self, text: str) -> list:
203
+ """Get embedding vector for text using OpenAI"""
204
+ try:
205
+ response = openai_client.embeddings.create(model=self.model, input=text)
206
+ return response.data[0].embedding
207
+ except Exception as e:
208
+ st.error(f"Error getting embedding: {str(e)}")
209
+ return None
210
+
211
+ # Add this method to DocumentVectorizer class
212
+ def vector_exists(self, doc_name: str) -> bool:
213
+ """Check if vector exists for document"""
214
+ return self.vectors_collection.count_documents({"name": doc_name}) > 0
215
+
216
+ # Update store_vector method in DocumentVectorizer class
217
+ def store_vector(self, doc_name: str, vector: list, text: str, goal: str = None):
218
+ """Store document/goal vector in MongoDB using upsert"""
219
+ try:
220
+ vector_doc = {
221
+ "name": doc_name,
222
+ "vector": vector,
223
+ "text": text,
224
+ "type": "document" if goal is None else "goal",
225
+ "goal": goal,
226
+ "updated_at": datetime.utcnow(),
227
+ }
228
+
229
+ # Use update_one with upsert
230
+ self.vectors_collection.update_one(
231
+ {"name": doc_name},
232
+ {"$set": vector_doc, "$setOnInsert": {"created_at": datetime.utcnow()}},
233
+ upsert=True,
234
+ )
235
+
236
+ except Exception as e:
237
+ st.error(f"Error storing vector: {str(e)}")
238
+
239
+ # Update vector_search method in DocumentVectorizer class
240
+ def vector_search(self, query_vector: List[float], limit: int = 5) -> List[Dict]:
241
+ """Search for similar documents using vector similarity"""
242
+ try:
243
+ # Get all documents
244
+ documents = list(self.vectors_collection.find({"type": "document"}))
245
+
246
+ # Calculate similarities
247
+ similarities = []
248
+ for doc in documents:
249
+ similarity = self.calculate_similarity(query_vector, doc["vector"])
250
+ similarities.append(
251
+ {
252
+ "name": doc["name"],
253
+ "text": doc["text"],
254
+ "similarity": similarity, # Keep as float
255
+ "similarity_display": f"{similarity*100:.1f}%", # Add display version
256
+ }
257
+ )
258
+
259
+ # Sort by similarity and get top k
260
+ sorted_docs = sorted(
261
+ similarities,
262
+ key=lambda x: x["similarity"], # Sort by float value
263
+ reverse=True,
264
+ )[:limit]
265
+
266
+ return sorted_docs
267
+
268
+ except Exception as e:
269
+ st.error(f"Vector search error: {str(e)}")
270
+ return []
271
+
272
+ def find_similar_documents(self, text: str, limit: int = 5) -> List[Dict]:
273
+ """Find similar documents for given text"""
274
+ vector = self.get_embedding(text)
275
+ if vector:
276
+ return self.vector_search(vector, limit)
277
+ return []
278
+
279
+ def calculate_similarity(self, vector1: list, vector2: list) -> float:
280
+ """Calculate cosine similarity between two vectors"""
281
+ return np.dot(vector1, vector2) / (
282
+ np.linalg.norm(vector1) * np.linalg.norm(vector2)
283
+ )
284
+
285
+
286
+ def display_analysis_results(analysis: Dict):
287
+ """Display analysis results in Streamlit UI"""
288
+ if not analysis:
289
+ return
290
+
291
+ # Display Themes
292
+ st.subheader("Themes")
293
+ for theme in analysis.get("themes", []):
294
+ with st.expander(f"🎯 {theme}"):
295
+ # Display subthemes for this theme
296
+ subthemes = analysis.get("subthemes", {}).get(theme, [])
297
+ if subthemes:
298
+ st.write("**Subthemes:**")
299
+ for subtheme in subthemes:
300
+ st.write(f"- {subtheme}")
301
+
302
+ # Display Keywords
303
+ st.subheader("Keywords")
304
+ keywords = analysis.get("keywords", [])
305
+ st.write(" | ".join([f"🔑 {keyword}" for keyword in keywords]))
306
+
307
+ # Display Relevance Score
308
+ score = analysis.get("relevance_score", 0)
309
+ st.metric("Relevance Score", f"{score}%")
310
+
311
+
312
+ def display_analyst_dashboard():
313
+ st.title("Multi-Goal Document Analysis")
314
+
315
+ with st.sidebar:
316
+ st.markdown("### Input Section")
317
+ tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
318
+ # tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
319
+
320
+ with tab1:
321
+ # Multiple goals input
322
+ num_goals = st.number_input("Number of goals:", min_value=1, value=1)
323
+ goals = []
324
+ for i in range(num_goals):
325
+ goal = st.text_area(f"Goal {i+1}:", key=f"goal_{i}", height=100)
326
+ if goal:
327
+ goals.append(goal)
328
+
329
+ uploaded_files = st.file_uploader(
330
+ "Upload documents",
331
+ accept_multiple_files=True,
332
+ type=["txt", "pdf", "docx"],
333
+ )
334
+ analyze_button = (
335
+ st.button("Analyze Documents") if goals and uploaded_files else None
336
+ )
337
+
338
+ with tab2:
339
+ # Keep existing similarity search tab
340
+ search_text = st.text_area("Enter text to find similar documents:")
341
+ search_limit = st.slider("Number of results", 1, 10, 5)
342
+ search_button = st.button("Search Similar") if search_text else None
343
+
344
+ if st.button("Logout", use_container_width=True):
345
+ for key in st.session_state.keys():
346
+ del st.session_state[key]
347
+ st.rerun()
348
+
349
+ if analyze_button:
350
+ analyzer = GoalAnalyzer()
351
+ vectorizer = DocumentVectorizer()
352
+
353
+ # Store vectors
354
+ doc_vectors = {}
355
+ goal_vectors = {}
356
+
357
+ # Process goals first
358
+ with st.spinner("Processing goals..."):
359
+ for i, goal in enumerate(goals):
360
+ vector = vectorizer.get_embedding(goal)
361
+ if vector:
362
+ goal_vectors[f"Goal {i+1}"] = vector
363
+ vectorizer.store_vector(f"Goal {i+1}", vector, goal, goal)
364
+
365
+ # Process documents
366
+ with st.spinner("Processing documents..."):
367
+ for file in uploaded_files:
368
+ st.markdown(f"### Analysis for {file.name}")
369
+
370
+ if vectorizer.vector_exists(file.name):
371
+ st.info(f"Vector already exists for {file.name}")
372
+ existing_doc = vectorizer.vectors_collection.find_one(
373
+ {"name": file.name}
374
+ )
375
+ doc_vectors[file.name] = existing_doc["vector"]
376
+ else:
377
+ text = analyzer.extract_text_from_file(file)
378
+ if not text:
379
+ st.warning(f"Could not extract text from {file.name}")
380
+ continue
381
+
382
+ vector = vectorizer.get_embedding(text)
383
+ if vector:
384
+ doc_vectors[file.name] = vector
385
+ vectorizer.store_vector(file.name, vector, text)
386
+
387
+ # Display goal similarities
388
+ st.subheader("Goal Relevance Scores")
389
+ col1, col2 = st.columns([1, 2])
390
+
391
+ with col1:
392
+ for goal_name, goal_vector in goal_vectors.items():
393
+ similarity = (
394
+ vectorizer.calculate_similarity(
395
+ doc_vectors[file.name], goal_vector
396
+ )
397
+ * 100
398
+ )
399
+ st.metric(f"{goal_name}", f"{similarity:.1f}%")
400
+
401
+ with col2:
402
+ # Get analysis for all goals combined
403
+ analysis = asyncio.run(
404
+ analyzer.get_perplexity_analysis(text, " | ".join(goals))
405
+ )
406
+ display_analysis_results(analysis)
407
+
408
+ st.divider()
409
+
410
+ # Document similarity matrix
411
+ if len(doc_vectors) > 1:
412
+ st.markdown("### Document Similarity Matrix")
413
+ files = list(doc_vectors.keys())
414
+ similarity_matrix = []
415
+
416
+ for file1 in files:
417
+ row = []
418
+ for file2 in files:
419
+ similarity = vectorizer.calculate_similarity(
420
+ doc_vectors[file1], doc_vectors[file2]
421
+ )
422
+ row.append(similarity)
423
+ similarity_matrix.append(row)
424
+
425
+ df = pd.DataFrame(similarity_matrix, columns=files, index=files)
426
+ st.dataframe(df.style.background_gradient(cmap="RdYlGn"))
427
+
428
+ # Add goal-document similarity matrix
429
+ st.markdown("### Goal-Document Similarity Matrix")
430
+ goal_doc_matrix = []
431
+ goal_names = list(goal_vectors.keys())
432
+
433
+ for file in files:
434
+ row = []
435
+ for goal in goal_names:
436
+ similarity = vectorizer.calculate_similarity(
437
+ doc_vectors[file], goal_vectors[goal]
438
+ )
439
+ row.append(similarity)
440
+ goal_doc_matrix.append(row)
441
+
442
+ df_goals = pd.DataFrame(
443
+ goal_doc_matrix, columns=goal_names, index=files
444
+ )
445
+ st.dataframe(df_goals.style.background_gradient(cmap="RdYlGn"))
446
+
447
+ # Keep existing similarity search functionality
448
+ elif search_button:
449
+ vectorizer = DocumentVectorizer()
450
+ with st.spinner("Searching similar documents..."):
451
+ query_vector = vectorizer.get_embedding(search_text)
452
+ if query_vector:
453
+ similar_docs = vectorizer.vector_search(query_vector, search_limit)
454
+
455
+ if similar_docs:
456
+ st.markdown("### Similar Documents Found")
457
+
458
+ # Create DataFrame with numeric similarities
459
+ df = pd.DataFrame(similar_docs)
460
+
461
+ # Apply gradient to numeric column
462
+ styled_df = df[["name", "similarity"]].style.background_gradient(
463
+ cmap="RdYlGn", subset=["similarity"]
464
+ )
465
+
466
+ # Format display after styling
467
+ styled_df = styled_df.format({"similarity": "{:.1%}"})
468
+
469
+ st.dataframe(styled_df)
470
+
471
+ # Show document contents
472
+ for doc in similar_docs:
473
+ with st.expander(
474
+ f"📄 {doc['name']} (Similarity: {doc['similarity_display']})"
475
+ ):
476
+ st.text(
477
+ doc["text"][:20] + "..."
478
+ if len(doc["text"]) > 20
479
+ else doc["text"]
480
+ )
481
+ else:
482
+ st.info("No similar documents found")
483
+ else:
484
+ st.error("Could not process search query")
485
+
486
+
487
+ def main():
488
+ st.title("Multi-Goal Document Analysis")
489
+
490
+ with st.sidebar:
491
+ st.markdown("### Input Section")
492
+ tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
493
+ # tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
494
+
495
+ with tab1:
496
+ # Multiple goals input
497
+ num_goals = st.number_input("Number of goals:", min_value=1, value=1)
498
+ goals = []
499
+ for i in range(num_goals):
500
+ goal = st.text_area(f"Goal {i+1}:", key=f"goal_{i}", height=100)
501
+ if goal:
502
+ goals.append(goal)
503
+
504
+ uploaded_files = st.file_uploader(
505
+ "Upload documents",
506
+ accept_multiple_files=True,
507
+ type=["txt", "pdf", "docx"],
508
+ )
509
+ analyze_button = (
510
+ st.button("Analyze Documents") if goals and uploaded_files else None
511
+ )
512
+
513
+ with tab2:
514
+ # Keep existing similarity search tab
515
+ search_text = st.text_area("Enter text to find similar documents:")
516
+ search_limit = st.slider("Number of results", 1, 10, 5)
517
+ search_button = st.button("Search Similar") if search_text else None
518
+
519
+ if analyze_button:
520
+ analyzer = GoalAnalyzer()
521
+ vectorizer = DocumentVectorizer()
522
+
523
+ # Store vectors
524
+ doc_vectors = {}
525
+ goal_vectors = {}
526
+
527
+ # Process goals first
528
+ with st.spinner("Processing goals..."):
529
+ for i, goal in enumerate(goals):
530
+ vector = vectorizer.get_embedding(goal)
531
+ if vector:
532
+ goal_vectors[f"Goal {i+1}"] = vector
533
+ vectorizer.store_vector(f"Goal {i+1}", vector, goal, goal)
534
+
535
+ # Process documents
536
+ with st.spinner("Processing documents..."):
537
+ for file in uploaded_files:
538
+ st.markdown(f"### Analysis for {file.name}")
539
+
540
+ if vectorizer.vector_exists(file.name):
541
+ st.info(f"Vector already exists for {file.name}")
542
+ existing_doc = vectorizer.vectors_collection.find_one(
543
+ {"name": file.name}
544
+ )
545
+ doc_vectors[file.name] = existing_doc["vector"]
546
+ else:
547
+ text = analyzer.extract_text_from_file(file)
548
+ if not text:
549
+ st.warning(f"Could not extract text from {file.name}")
550
+ continue
551
+
552
+ vector = vectorizer.get_embedding(text)
553
+ if vector:
554
+ doc_vectors[file.name] = vector
555
+ vectorizer.store_vector(file.name, vector, text)
556
+
557
+ # Display goal similarities
558
+ st.subheader("Goal Relevance Scores")
559
+ col1, col2 = st.columns([1, 2])
560
+
561
+ with col1:
562
+ for goal_name, goal_vector in goal_vectors.items():
563
+ similarity = (
564
+ vectorizer.calculate_similarity(
565
+ doc_vectors[file.name], goal_vector
566
+ )
567
+ * 100
568
+ )
569
+ st.metric(f"{goal_name}", f"{similarity:.1f}%")
570
+
571
+ with col2:
572
+ # Get analysis for all goals combined
573
+ analysis = asyncio.run(
574
+ analyzer.get_perplexity_analysis(text, " | ".join(goals))
575
+ )
576
+ display_analysis_results(analysis)
577
+
578
+ st.divider()
579
+
580
+ # Document similarity matrix
581
+ if len(doc_vectors) > 1:
582
+ st.markdown("### Document Similarity Matrix")
583
+ files = list(doc_vectors.keys())
584
+ similarity_matrix = []
585
+
586
+ for file1 in files:
587
+ row = []
588
+ for file2 in files:
589
+ similarity = vectorizer.calculate_similarity(
590
+ doc_vectors[file1], doc_vectors[file2]
591
+ )
592
+ row.append(similarity)
593
+ similarity_matrix.append(row)
594
+
595
+ df = pd.DataFrame(similarity_matrix, columns=files, index=files)
596
+ st.dataframe(df.style.background_gradient(cmap="RdYlGn"))
597
+
598
+ # Add goal-document similarity matrix
599
+ st.markdown("### Goal-Document Similarity Matrix")
600
+ goal_doc_matrix = []
601
+ goal_names = list(goal_vectors.keys())
602
+
603
+ for file in files:
604
+ row = []
605
+ for goal in goal_names:
606
+ similarity = vectorizer.calculate_similarity(
607
+ doc_vectors[file], goal_vectors[goal]
608
+ )
609
+ row.append(similarity)
610
+ goal_doc_matrix.append(row)
611
+
612
+ df_goals = pd.DataFrame(
613
+ goal_doc_matrix, columns=goal_names, index=files
614
+ )
615
+ st.dataframe(df_goals.style.background_gradient(cmap="RdYlGn"))
616
+
617
+ # Keep existing similarity search functionality
618
+ elif search_button:
619
+ vectorizer = DocumentVectorizer()
620
+ with st.spinner("Searching similar documents..."):
621
+ query_vector = vectorizer.get_embedding(search_text)
622
+ if query_vector:
623
+ similar_docs = vectorizer.vector_search(query_vector, search_limit)
624
+
625
+ if similar_docs:
626
+ st.markdown("### Similar Documents Found")
627
+
628
+ # Create DataFrame with numeric similarities
629
+ df = pd.DataFrame(similar_docs)
630
+
631
+ # Apply gradient to numeric column
632
+ styled_df = df[["name", "similarity"]].style.background_gradient(
633
+ cmap="RdYlGn", subset=["similarity"]
634
+ )
635
+
636
+ # Format display after styling
637
+ styled_df = styled_df.format({"similarity": "{:.1%}"})
638
+
639
+ st.dataframe(styled_df)
640
+
641
+ # Show document contents
642
+ for doc in similar_docs:
643
+ with st.expander(
644
+ f"📄 {doc['name']} (Similarity: {doc['similarity_display']})"
645
+ ):
646
+ st.text(
647
+ doc["text"][:20] + "..."
648
+ if len(doc["text"]) > 20
649
+ else doc["text"]
650
+ )
651
+ else:
652
+ st.info("No similar documents found")
653
+ else:
654
+ st.error("Could not process search query")
655
+
656
+
657
+ if __name__ == "__main__":
658
+ main()
infranew.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import networkx as nx
4
+ from bokeh.models import HoverTool
5
+ from bokeh.plotting import figure, from_networkx
6
+ import requests
7
+ import json
8
+ import google.generativeai as genai
9
+
10
+ PERPLEXITY_API_KEY = "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"
11
+ PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
12
+
13
+
14
+ def extract_edges(keywords):
15
+ keywords = [kw.strip() for kw in keywords.split(",")]
16
+ edges = [
17
+ (keywords[i], keywords[j])
18
+ for i in range(len(keywords))
19
+ for j in range(i + 1, len(keywords))
20
+ ]
21
+ return edges
22
+
23
+
24
+ def create_knowledge_graph(data):
25
+ G = nx.Graph()
26
+
27
+ for _, row in data.iterrows():
28
+ words = []
29
+ for col in data.columns:
30
+ if pd.notnull(row[col]):
31
+ # Convert to string and handle numeric values
32
+ cell_value = str(row[col]).strip()
33
+ if cell_value:
34
+ words.extend(cell_value.split())
35
+
36
+ if words:
37
+ edges = extract_edges(",".join(words))
38
+ G.add_edges_from(edges)
39
+
40
+ for word in words:
41
+ word = word.strip()
42
+ if word not in G:
43
+ G.add_node(word, title=word, value=len(word))
44
+
45
+ return G
46
+
47
+
48
+ def render_graph_bokeh(G):
49
+ plot = figure(
50
+ title="Interactive Knowledge Graph",
51
+ x_range=(-1.5, 1.5),
52
+ y_range=(-1.5, 1.5),
53
+ tools="pan,wheel_zoom,box_zoom,reset,tap",
54
+ active_scroll="wheel_zoom",
55
+ )
56
+ plot.add_tools(HoverTool(tooltips="@index"))
57
+
58
+ graph_renderer = from_networkx(G, nx.spring_layout, scale=1, center=(0, 0))
59
+
60
+ graph_renderer.node_renderer.glyph.size = 10
61
+ graph_renderer.node_renderer.glyph.fill_color = "blue"
62
+ graph_renderer.node_renderer.glyph.line_color = "black"
63
+
64
+ graph_renderer.edge_renderer.glyph.line_width = 1
65
+ graph_renderer.edge_renderer.glyph.line_color = "gray"
66
+
67
+ plot.renderers.append(graph_renderer)
68
+
69
+ return plot
70
+
71
+
72
+ import re
73
+
74
+
75
+ def search_papers(topic: str, num_papers: int) -> list:
76
+ headers = {
77
+ "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
78
+ "Content-Type": "application/json",
79
+ }
80
+
81
+ prompt = f"""Find {num_papers} recent research papers about {topic}.
82
+ Return ONLY a valid JSON array with the following structure for each paper:
83
+ [
84
+ {{
85
+ "Title": "paper title",
86
+ "Abstract": "abstract text",
87
+ "Keywords": "key terms"
88
+ }}
89
+ ]"""
90
+
91
+ payload = {
92
+ "model": "llama-3.1-sonar-small-128k-chat",
93
+ "messages": [
94
+ {
95
+ "role": "system",
96
+ "content": "You are a research paper analyzer that returns valid JSON arrays.",
97
+ },
98
+ {"role": "user", "content": prompt},
99
+ ],
100
+ "temperature": 0.1,
101
+ }
102
+
103
+ try:
104
+ response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
105
+ response.raise_for_status()
106
+ content = response.json()["choices"][0]["message"]["content"]
107
+
108
+ # Clean response to ensure valid JSON
109
+ content = content.strip()
110
+ if not content.startswith("["):
111
+ content = content[content.find("[") :]
112
+ if not content.endswith("]"):
113
+ content = content[: content.rfind("]") + 1]
114
+
115
+ # Remove any trailing commas before closing brackets
116
+ content = re.sub(r",\s*]", "]", content)
117
+ content = re.sub(r",\s*}", "}", content)
118
+
119
+ papers = json.loads(content)
120
+ if not isinstance(papers, list):
121
+ raise ValueError("Response is not a JSON array")
122
+ return papers
123
+ except requests.exceptions.RequestException as e:
124
+ st.error(f"API Request Error: {str(e)}")
125
+ return []
126
+ except json.JSONDecodeError as e:
127
+ st.error(f"Invalid JSON response: {str(e)}")
128
+ st.error(f"Response content: {response.text}")
129
+ return []
130
+ except ValueError as e:
131
+ st.error(f"Error: {str(e)}")
132
+ return []
133
+
134
+
135
+ import os
136
+
137
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
138
+ GEMINI_API_URL = "https://api.openai.com/v1/engines/davinci-codex/completions"
139
+
140
+
141
+ def call_gemini_api(prompt: str) -> str:
142
+ headers = {
143
+ "Authorization": f"Bearer {GEMINI_API_KEY}",
144
+ "Content-Type": "application/json",
145
+ }
146
+
147
+ payload = {
148
+ "prompt": prompt,
149
+ "max_tokens": 150,
150
+ "temperature": 0.7,
151
+ }
152
+
153
+ try:
154
+ model = genai.GenerativeModel("gemini-pro")
155
+ response = model.generate_content(prompt)
156
+ return response.text
157
+ except Exception as e:
158
+ st.error(f"Gemini API Error: {str(e)}")
159
+ return ""
160
+
161
+
162
+ def generate_gaps_paragraph(gaps):
163
+ prompt = f"Generate a brief paragraph about the gaps in the research based on the following gaps: {', '.join(gaps)}"
164
+ return call_gemini_api(prompt)
165
+
166
+
167
+ def generate_insights(G, topic):
168
+ papers = search_papers(topic, 5)
169
+ if papers:
170
+ st.write("### Research Insights from Perplexity API")
171
+ for paper in papers:
172
+ st.write(f"**Title:** {paper['Title']}")
173
+ st.write(f"**Abstract:** {paper['Abstract']}")
174
+ st.write(f"**Keywords:** {paper['Keywords']}")
175
+ st.write("---")
176
+
177
+ nodes = list(G.nodes(data=True))
178
+ insights = {}
179
+ insights["Strong Points"] = [
180
+ n for n, d in nodes if G.degree(n) > len(G.nodes) * 0.1
181
+ ]
182
+ insights["Weak Points"] = [n for n, d in nodes if G.degree(n) < len(G.nodes) * 0.05]
183
+ insights["Gaps"] = [n for n, d in nodes if len(list(nx.neighbors(G, n))) == 0]
184
+
185
+ st.write("### Graph-Based Insights")
186
+ st.write("**Strong Points:**", insights["Strong Points"])
187
+ st.write("**Weak Points:**", insights["Weak Points"])
188
+ st.write("**Gaps:**", insights["Gaps"])
189
+
190
+ if insights["Gaps"]:
191
+ with st.spinner("Generating insights about gaps..."):
192
+ gaps_paragraph = generate_gaps_paragraph(insights["Gaps"])
193
+ if gaps_paragraph:
194
+ st.write("### Gaps in Research")
195
+ st.write(gaps_paragraph)
196
+
197
+
198
+ def main():
199
+ st.title("Advanced Interactive Knowledge Graph")
200
+ st.write(
201
+ "Upload a CSV file to generate a fully interactive and insightful knowledge graph."
202
+ )
203
+
204
+ uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
205
+
206
+ if uploaded_file is not None:
207
+ try:
208
+ data = pd.read_csv(uploaded_file)
209
+ st.write("Preview of the uploaded data:")
210
+ st.dataframe(data.head())
211
+
212
+ G = create_knowledge_graph(data)
213
+
214
+ st.write("Generated Knowledge Graph:")
215
+ plot = render_graph_bokeh(G)
216
+ st.bokeh_chart(plot, use_container_width=True)
217
+
218
+ topic = st.text_input(
219
+ "Enter a topic for additional insights:", "knowledge graphs"
220
+ )
221
+ if topic:
222
+ generate_insights(G, topic)
223
+
224
+ except Exception as e:
225
+ st.error(f"An error occurred while processing the file: {e}")
226
+ else:
227
+ st.info("Please upload a CSV file to get started.")
228
+
229
+
230
+ if __name__ == "__main__":
231
+ main()
keywords_database_download.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from pymongo import MongoClient
4
+ from dotenv import load_dotenv
5
+ import os
6
+ import json
7
+ import re
8
+
9
+ # 1. Load environment variables
10
+ load_dotenv()
11
+ MONGODB_URI = os.getenv(
12
+ "MONGODB_UR",
13
+ "mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
14
+ )
15
+ # 2. Create MongoDB connection
16
+ client = MongoClient(MONGODB_URI)
17
+ db = client["novascholar_db"]
18
+ collection = db["research_papers"]
19
+
20
+
21
+ def convert_mixed_columns(df: pd.DataFrame) -> pd.DataFrame:
22
+ """
23
+ Convert any columns that contain lists into comma-separated strings
24
+ in order to ensure consistent data types for CSV export.
25
+ """
26
+ for col in df.columns:
27
+ if any(isinstance(val, list) for val in df[col].dropna()):
28
+ df[col] = df[col].apply(
29
+ lambda x: (
30
+ ", ".join(map(str, x))
31
+ if isinstance(x, list)
32
+ else (str(x) if pd.notna(x) else "")
33
+ )
34
+ )
35
+ return df
36
+
37
+
38
+ def filter_and_export_collection_to_csv(keyword: str, doc_collection=None):
39
+ """
40
+ Find documents in the given collection with a matching keyword
41
+ in the 'Keywords' field, export them to CSV, and return the DataFrame
42
+ and CSV filename.
43
+ """
44
+ # Use the default 'research_papers' collection if none provided
45
+ if doc_collection is None:
46
+ doc_collection = collection
47
+
48
+ docs = list(doc_collection.find({"Keywords": {"$regex": keyword, "$options": "i"}}))
49
+ if docs:
50
+ df = pd.DataFrame(docs)
51
+ df = convert_mixed_columns(df)
52
+ csv_filename = "papers_filtered_export.csv"
53
+ df.to_csv(csv_filename, index=False)
54
+ return df, csv_filename
55
+ else:
56
+ # Return an empty DataFrame if no documents found
57
+ return pd.DataFrame(), None
58
+
59
+
60
+ def main():
61
+ # st.set_page_config(page_title="Filter and Export Papers", layout="wide")
62
+ st.title("Filter and Export Papers by Keyword")
63
+
64
+ # Let user select the paper type
65
+ paper_type = st.selectbox(
66
+ "Select type of research paper:",
67
+ [
68
+ "Review Based Paper",
69
+ "Opinion/Perspective Based Paper",
70
+ "Empirical Research Paper",
71
+ "Research Paper (Other)",
72
+ ],
73
+ )
74
+
75
+ # 5. Let user enter the keyword to filter
76
+ keyword_input = st.text_input(
77
+ "Enter the exact keyword to filter papers by 'Keywords' field:"
78
+ )
79
+
80
+ # When user clicks button, use the collection for the selected paper type
81
+ if st.button("Export Filtered Papers to CSV"):
82
+ with st.spinner("Exporting filtered documents..."):
83
+ try:
84
+ # Determine dynamic collection based on paper type
85
+ collection_name = paper_type.replace(" ", "_").lower()
86
+ doc_collection = db[collection_name]
87
+
88
+ df, csv_filename = filter_and_export_collection_to_csv(
89
+ keyword_input, doc_collection
90
+ )
91
+ if not df.empty and csv_filename:
92
+ st.success(
93
+ f"Successfully exported filtered papers to {csv_filename}!"
94
+ )
95
+ st.write("Preview of the filtered DataFrame:")
96
+ st.dataframe(df)
97
+ else:
98
+ st.warning("No matching documents found for that keyword.")
99
+ except Exception as e:
100
+ st.error(f"Error exporting filtered papers: {str(e)}")
101
+
102
+
103
+ if __name__ == "__main__":
104
+ main()
live_polls.py CHANGED
@@ -1,115 +1,115 @@
1
- # live_poll_feature.py
2
-
3
- import streamlit as st
4
- import pandas as pd
5
- from datetime import datetime
6
- from poll_db_operations import PollDatabase
7
-
8
- class LivePollFeature:
9
- def __init__(self):
10
- self.db = PollDatabase()
11
-
12
- def display_faculty_interface(self, session_id):
13
- """Display the faculty interface for managing polls"""
14
- st.subheader("Live Polls Management")
15
-
16
- # Create new poll
17
- with st.expander("Create New Poll", expanded=False):
18
- question = st.text_input("Poll Question")
19
-
20
- num_options = st.number_input("Number of Options",
21
- min_value=2,
22
- max_value=6,
23
- value=4)
24
-
25
- options = []
26
- for i in range(num_options):
27
- option = st.text_input(f"Option {i+1}",
28
- key=f"option_{i}")
29
- if option:
30
- options.append(option)
31
-
32
- if st.button("Create Poll") and question and len(options) >= 2:
33
- self.db.create_poll(
34
- st.session_state.selected_course,
35
- session_id,
36
- question,
37
- options,
38
- st.session_state.user_id
39
- )
40
- st.success("Poll created successfully!")
41
- st.rerun()
42
-
43
- # Display active polls
44
- active_polls = self.db.get_active_polls(session_id)
45
- if active_polls:
46
- st.subheader("Active Polls")
47
- for poll in active_polls:
48
- with st.expander(f"Poll: {poll['question']}", expanded=True):
49
- # Display results
50
- self._display_poll_results(poll)
51
-
52
- if st.button("Close Poll",
53
- key=f"close_{str(poll['_id'])}"):
54
- self.db.close_poll(poll['_id'])
55
- st.success("Poll closed successfully!")
56
- st.rerun()
57
-
58
- def display_student_interface(self, session_id):
59
- """Display the student interface for participating in polls"""
60
- st.subheader("Live Polls")
61
-
62
- active_polls = self.db.get_active_polls(session_id)
63
- if not active_polls:
64
- st.info("No active polls at the moment.")
65
- return
66
-
67
- for poll in active_polls:
68
- with st.expander(f"Poll: {poll['question']}", expanded=True):
69
- selected_option = st.radio(
70
- "Your response:",
71
- options=poll['options'],
72
- key=f"poll_{str(poll['_id'])}"
73
- )
74
-
75
- if st.button("Submit Response",
76
- key=f"submit_{str(poll['_id'])}"):
77
- success, message = self.db.submit_response(
78
- poll['_id'],
79
- st.session_state.user_id,
80
- selected_option
81
- )
82
- if success:
83
- st.success(message)
84
- else:
85
- st.warning(message)
86
- st.rerun()
87
-
88
- # self._display_poll_results(poll)
89
-
90
- def _display_poll_results(self, poll):
91
- """Helper method to display poll results"""
92
- responses_df = pd.DataFrame(
93
- list(poll['responses'].items()),
94
- columns=['Option', 'Votes']
95
- )
96
-
97
- total_votes = responses_df['Votes'].sum()
98
-
99
- # Calculate percentages
100
- if total_votes > 0:
101
- responses_df['Percentage'] = (
102
- responses_df['Votes'] / total_votes * 100
103
- ).round(1)
104
- else:
105
- responses_df['Percentage'] = 0
106
-
107
- # Display metrics
108
- st.metric("Total Responses", total_votes)
109
-
110
- # Display charts
111
- st.bar_chart(responses_df.set_index('Option')['Votes'])
112
-
113
- # Display detailed statistics
114
- if st.session_state.user_type == 'faculty':
115
  st.dataframe(responses_df)
 
1
+ # live_poll_feature.py
2
+
3
+ import streamlit as st
4
+ import pandas as pd
5
+ from datetime import datetime
6
+ from poll_db_operations import PollDatabase
7
+
8
+ class LivePollFeature:
9
+ def __init__(self):
10
+ self.db = PollDatabase()
11
+
12
+ def display_faculty_interface(self, session_id):
13
+ """Display the faculty interface for managing polls"""
14
+ st.subheader("Live Polls Management")
15
+
16
+ # Create new poll
17
+ with st.expander("Create New Poll", expanded=False):
18
+ question = st.text_input("Poll Question")
19
+
20
+ num_options = st.number_input("Number of Options",
21
+ min_value=2,
22
+ max_value=6,
23
+ value=4)
24
+
25
+ options = []
26
+ for i in range(num_options):
27
+ option = st.text_input(f"Option {i+1}",
28
+ key=f"option_{i}")
29
+ if option:
30
+ options.append(option)
31
+
32
+ if st.button("Create Poll") and question and len(options) >= 2:
33
+ self.db.create_poll(
34
+ st.session_state.selected_course,
35
+ session_id,
36
+ question,
37
+ options,
38
+ st.session_state.user_id
39
+ )
40
+ st.success("Poll created successfully!")
41
+ st.rerun()
42
+
43
+ # Display active polls
44
+ active_polls = self.db.get_active_polls(session_id)
45
+ if active_polls:
46
+ st.subheader("Active Polls")
47
+ for poll in active_polls:
48
+ with st.expander(f"Poll: {poll['question']}", expanded=True):
49
+ # Display results
50
+ self._display_poll_results(poll)
51
+
52
+ if st.button("Close Poll",
53
+ key=f"close_{str(poll['_id'])}"):
54
+ self.db.close_poll(poll['_id'])
55
+ st.success("Poll closed successfully!")
56
+ st.rerun()
57
+
58
+ def display_student_interface(self, session_id):
59
+ """Display the student interface for participating in polls"""
60
+ st.subheader("Live Polls")
61
+
62
+ active_polls = self.db.get_active_polls(session_id)
63
+ if not active_polls:
64
+ st.info("No active polls at the moment.")
65
+ return
66
+
67
+ for poll in active_polls:
68
+ with st.expander(f"Poll: {poll['question']}", expanded=True):
69
+ selected_option = st.radio(
70
+ "Your response:",
71
+ options=poll['options'],
72
+ key=f"poll_{str(poll['_id'])}"
73
+ )
74
+
75
+ if st.button("Submit Response",
76
+ key=f"submit_{str(poll['_id'])}"):
77
+ success, message = self.db.submit_response(
78
+ poll['_id'],
79
+ st.session_state.user_id,
80
+ selected_option
81
+ )
82
+ if success:
83
+ st.success(message)
84
+ else:
85
+ st.warning(message)
86
+ st.rerun()
87
+
88
+ # self._display_poll_results(poll)
89
+
90
+ def _display_poll_results(self, poll):
91
+ """Helper method to display poll results"""
92
+ responses_df = pd.DataFrame(
93
+ list(poll['responses'].items()),
94
+ columns=['Option', 'Votes']
95
+ )
96
+
97
+ total_votes = responses_df['Votes'].sum()
98
+
99
+ # Calculate percentages
100
+ if total_votes > 0:
101
+ responses_df['Percentage'] = (
102
+ responses_df['Votes'] / total_votes * 100
103
+ ).round(1)
104
+ else:
105
+ responses_df['Percentage'] = 0
106
+
107
+ # Display metrics
108
+ st.metric("Total Responses", total_votes)
109
+
110
+ # Display charts
111
+ st.bar_chart(responses_df.set_index('Option')['Votes'])
112
+
113
+ # Display detailed statistics
114
+ if st.session_state.user_type == 'faculty':
115
  st.dataframe(responses_df)
loldude.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.feature_extraction.text import TfidfVectorizer
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+ import plotly.express as px
7
+ import plotly.graph_objects as go
8
+ from collections import defaultdict
9
+
10
+ def load_and_preprocess_data(uploaded_file):
11
+ """Load and preprocess the CSV data."""
12
+ df = pd.read_csv(uploaded_file)
13
+ # Combine relevant text fields for similarity comparison
14
+ df['combined_text'] = df['Title'] + ' ' + df['Abstract'] + ' ' + df['Keywords']
15
+ return df
16
+
17
+ def calculate_similarity_matrix(df):
18
+ """Calculate cosine similarity matrix based on combined text."""
19
+ tfidf = TfidfVectorizer(stop_words='english')
20
+ tfidf_matrix = tfidf.fit_transform(df['combined_text'])
21
+ similarity_matrix = cosine_similarity(tfidf_matrix)
22
+ return similarity_matrix
23
+
24
+ def find_similar_papers(similarity_matrix, df, threshold=0.7):
25
+ """Find pairs of papers with similarity above threshold."""
26
+ similar_pairs = []
27
+ for i in range(len(similarity_matrix)):
28
+ for j in range(i + 1, len(similarity_matrix)):
29
+ similarity = similarity_matrix[i][j]
30
+ if similarity >= threshold:
31
+ similar_pairs.append({
32
+ 'Paper 1': df.iloc[i]['Title'],
33
+ 'Paper 2': df.iloc[j]['Title'],
34
+ 'Similarity': similarity
35
+ })
36
+ return pd.DataFrame(similar_pairs)
37
+
38
+ def find_outliers(similarity_matrix, df, threshold=0.3):
39
+ """Find papers with low average similarity to others."""
40
+ avg_similarities = np.mean(similarity_matrix, axis=1)
41
+ outliers = []
42
+ for i, avg_sim in enumerate(avg_similarities):
43
+ if avg_sim < threshold:
44
+ outliers.append({
45
+ 'Title': df.iloc[i]['Title'],
46
+ 'Average Similarity': avg_sim
47
+ })
48
+ return pd.DataFrame(outliers)
49
+
50
+ def create_similarity_heatmap(similarity_matrix, df):
51
+ """Create a heatmap of similarity matrix."""
52
+ fig = go.Figure(data=go.Heatmap(
53
+ z=similarity_matrix,
54
+ x=df['Title'],
55
+ y=df['Title'],
56
+ colorscale='Viridis'
57
+ ))
58
+ fig.update_layout(
59
+ title='Paper Similarity Heatmap',
60
+ xaxis_tickangle=-45,
61
+ height=800
62
+ )
63
+ return fig
64
+
65
+ def analyze_keywords(df):
66
+ """Analyze keyword frequency across papers."""
67
+ keyword_freq = defaultdict(int)
68
+ for keywords in df['Keywords']:
69
+ if isinstance(keywords, str):
70
+ for keyword in keywords.split(','):
71
+ keyword = keyword.strip()
72
+ keyword_freq[keyword] += 1
73
+
74
+ keyword_df = pd.DataFrame([
75
+ {'Keyword': k, 'Frequency': v}
76
+ for k, v in keyword_freq.items()
77
+ ]).sort_values('Frequency', ascending=False)
78
+
79
+ return keyword_df
80
+
81
+ def main():
82
+ st.title('Research Papers Similarity Analysis')
83
+
84
+ uploaded_file = st.file_uploader("Upload your research papers CSV file", type=['csv'])
85
+
86
+ if uploaded_file is not None:
87
+ df = load_and_preprocess_data(uploaded_file)
88
+ similarity_matrix = calculate_similarity_matrix(df)
89
+
90
+ st.header('Document Similarity Analysis')
91
+
92
+ # Similarity Heatmap
93
+ st.subheader('Similarity Heatmap')
94
+ heatmap = create_similarity_heatmap(similarity_matrix, df)
95
+ st.plotly_chart(heatmap, use_container_width=True)
96
+
97
+ # Similar Papers
98
+ st.subheader('Similar Papers')
99
+ similarity_threshold = st.slider('Similarity Threshold', 0.0, 1.0, 0.7)
100
+ similar_papers = find_similar_papers(similarity_matrix, df, similarity_threshold)
101
+ if not similar_papers.empty:
102
+ st.dataframe(similar_papers)
103
+ else:
104
+ st.write("No papers found above the similarity threshold.")
105
+
106
+ # Outliers
107
+ st.subheader('Outlier Papers')
108
+ outlier_threshold = st.slider('Outlier Threshold', 0.0, 1.0, 0.3)
109
+ outliers = find_outliers(similarity_matrix, df, outlier_threshold)
110
+ if not outliers.empty:
111
+ st.dataframe(outliers)
112
+ else:
113
+ st.write("No outliers found below the threshold.")
114
+
115
+ # Keyword Analysis
116
+ st.header('Keyword Analysis')
117
+ keyword_freq = analyze_keywords(df)
118
+ if not keyword_freq.empty:
119
+ fig = px.bar(keyword_freq, x='Keyword', y='Frequency',
120
+ title='Keyword Frequency Across Papers')
121
+ fig.update_xaxes(tickangle=45)
122
+ st.plotly_chart(fig, use_container_width=True)
123
+
124
+ # Basic Statistics
125
+ st.header('Basic Statistics')
126
+ col1, col2 = st.columns(2)
127
+ with col1:
128
+ st.metric("Total Papers", len(df))
129
+ st.metric("Average Similarity", f"{np.mean(similarity_matrix):.2f}")
130
+ with col2:
131
+ st.metric("Unique Keywords", len(keyword_freq))
132
+ st.metric("Max Similarity", f"{np.max(similarity_matrix[~np.eye(similarity_matrix.shape[0], dtype=bool)]):.2f}")
133
+
134
+ if __name__ == "__main__":
135
+ main()
modify_schema.py CHANGED
@@ -1,222 +1,222 @@
1
- from db import courses_collection2
2
- from dotenv import load_dotenv
3
- import os
4
- from pymongo import MongoClient
5
- from datetime import datetime
6
-
7
-
8
-
9
- load_dotenv()
10
- MONGO_URI = os.getenv("MONGO_URI")
11
-
12
- client = MongoClient(MONGO_URI)
13
- db = client["novascholar_db"]
14
-
15
- # Define the updated course schema
16
- updated_course_schema = {
17
- "bsonType": "object",
18
- "required": [
19
- "course_id",
20
- "title",
21
- "description",
22
- "faculty",
23
- "faculty_id",
24
- "duration",
25
- "created_at",
26
- ],
27
- "properties": {
28
- "course_id": {
29
- "bsonType": "string",
30
- "description": "Unique identifier for the course",
31
- },
32
- "title": {"bsonType": "string", "description": "Title of the course"},
33
- "description": {
34
- "bsonType": "string",
35
- "description": "Description of the course",
36
- },
37
- "faculty": {"bsonType": "string", "description": "Name of the faculty"},
38
- "duration": {"bsonType": "string", "description": "Duration of the course"},
39
- "created_at": {
40
- "bsonType": "date",
41
- "description": "Date when the course was created",
42
- },
43
- "sessions": {
44
- "bsonType": "array",
45
- "description": "List of sessions associated with the course",
46
- "items": {
47
- "bsonType": "object",
48
- "required": ["session_id", "title", "date"],
49
- "properties": {
50
- "session_id": {
51
- "bsonType": "string",
52
- "description": "Unique identifier for the session",
53
- },
54
- "title": {
55
- "bsonType": "string",
56
- "description": "Title of the session",
57
- },
58
- "date": {"bsonType": "date", "description": "Date of the session"},
59
- "status": {
60
- "bsonType": "string",
61
- "description": "Status of the session (e.g., completed, upcoming)",
62
- },
63
- "created_at": {
64
- "bsonType": "date",
65
- "description": "Date when the session was created",
66
- },
67
- "pre_class": {
68
- "bsonType": "object",
69
- "description": "Pre-class segment data",
70
- "properties": {
71
- "resources": {
72
- "bsonType": "array",
73
- "description": "List of pre-class resources",
74
- "items": {
75
- "bsonType": "object",
76
- "required": ["type", "title", "url"],
77
- "properties": {
78
- "type": {
79
- "bsonType": "string",
80
- "description": "Type of resource (e.g., pdf, video)",
81
- },
82
- "title": {
83
- "bsonType": "string",
84
- "description": "Title of the resource",
85
- },
86
- "url": {
87
- "bsonType": "string",
88
- "description": "URL of the resource",
89
- },
90
- "vector": {
91
- "bsonType": "array",
92
- "description": "Vector representation of the resource",
93
- "items": {"bsonType": "double"},
94
- },
95
- },
96
- },
97
- },
98
- "completion_required": {
99
- "bsonType": "bool",
100
- "description": "Indicates if completion of pre-class resources is required",
101
- },
102
- },
103
- },
104
- "in_class": {
105
- "bsonType": "object",
106
- "description": "In-class segment data",
107
- "properties": {
108
- "topics": {
109
- "bsonType": "array",
110
- "description": "List of topics covered in the session",
111
- "items": {"bsonType": "string"},
112
- },
113
- "quiz": {
114
- "bsonType": "object",
115
- "description": "Quiz data",
116
- "properties": {
117
- "title": {
118
- "bsonType": "string",
119
- "description": "Title of the quiz",
120
- },
121
- "questions": {
122
- "bsonType": "int",
123
- "description": "Number of questions in the quiz",
124
- },
125
- "duration": {
126
- "bsonType": "int",
127
- "description": "Duration of the quiz in minutes",
128
- },
129
- },
130
- },
131
- "polls": {
132
- "bsonType": "array",
133
- "description": "List of polls conducted during the session",
134
- "items": {
135
- "bsonType": "object",
136
- "required": ["question", "options"],
137
- "properties": {
138
- "question": {
139
- "bsonType": "string",
140
- "description": "Poll question",
141
- },
142
- "options": {
143
- "bsonType": "array",
144
- "description": "List of poll options",
145
- "items": {"bsonType": "string"},
146
- },
147
- "responses": {
148
- "bsonType": "object",
149
- "description": "Responses to the poll",
150
- "additionalProperties": {"bsonType": "int"},
151
- },
152
- },
153
- },
154
- },
155
- },
156
- },
157
- "post_class": {
158
- "bsonType": "object",
159
- "description": "Post-class segment data",
160
- "properties": {
161
- "assignments": {
162
- "bsonType": "array",
163
- "description": "List of assignments",
164
- "items": {
165
- "bsonType": "object",
166
- "required": ["id", "title", "due_date", "status"],
167
- "properties": {
168
- "id": {
169
- "bsonType": ["objectId", "int"],
170
- "description": "Assignment ID",
171
- },
172
- "title": {
173
- "bsonType": "string",
174
- "description": "Title of the assignment",
175
- },
176
- "due_date": {
177
- "bsonType": "date",
178
- "description": "Due date of the assignment",
179
- },
180
- "status": {
181
- "bsonType": "string",
182
- "description": "Status of the assignment (e.g., pending, completed)",
183
- },
184
- "submissions": {
185
- "bsonType": "array",
186
- "description": "List of submissions",
187
- "items": {
188
- "bsonType": "object",
189
- "properties": {
190
- "student_id": {
191
- "bsonType": "objectId",
192
- "description": "ID of the student who submitted the assignment",
193
- },
194
- "file_url": {
195
- "bsonType": "string",
196
- "description": "URL of the submitted file",
197
- },
198
- "submitted_at": {
199
- "bsonType": "date",
200
- "description": "Date when the assignment was submitted",
201
- },
202
- },
203
- },
204
- },
205
- },
206
- },
207
- }
208
- },
209
- },
210
- },
211
- },
212
- },
213
- },
214
- }
215
-
216
- # Update the schema using the collMod command
217
- db.command({
218
- "collMod": "courses_collection2",
219
- "validator": {"$jsonSchema": updated_course_schema}
220
- })
221
-
222
  print("Schema updated successfully!")
 
1
+ from db import courses_collection2
2
+ from dotenv import load_dotenv
3
+ import os
4
+ from pymongo import MongoClient
5
+ from datetime import datetime
6
+
7
+
8
+
9
+ load_dotenv()
10
+ MONGO_URI = os.getenv("MONGO_URI")
11
+
12
+ client = MongoClient(MONGO_URI)
13
+ db = client["novascholar_db"]
14
+
15
+ # Define the updated course schema
16
+ updated_course_schema = {
17
+ "bsonType": "object",
18
+ "required": [
19
+ "course_id",
20
+ "title",
21
+ "description",
22
+ "faculty",
23
+ "faculty_id",
24
+ "duration",
25
+ "created_at",
26
+ ],
27
+ "properties": {
28
+ "course_id": {
29
+ "bsonType": "string",
30
+ "description": "Unique identifier for the course",
31
+ },
32
+ "title": {"bsonType": "string", "description": "Title of the course"},
33
+ "description": {
34
+ "bsonType": "string",
35
+ "description": "Description of the course",
36
+ },
37
+ "faculty": {"bsonType": "string", "description": "Name of the faculty"},
38
+ "duration": {"bsonType": "string", "description": "Duration of the course"},
39
+ "created_at": {
40
+ "bsonType": "date",
41
+ "description": "Date when the course was created",
42
+ },
43
+ "sessions": {
44
+ "bsonType": "array",
45
+ "description": "List of sessions associated with the course",
46
+ "items": {
47
+ "bsonType": "object",
48
+ "required": ["session_id", "title", "date"],
49
+ "properties": {
50
+ "session_id": {
51
+ "bsonType": "string",
52
+ "description": "Unique identifier for the session",
53
+ },
54
+ "title": {
55
+ "bsonType": "string",
56
+ "description": "Title of the session",
57
+ },
58
+ "date": {"bsonType": "date", "description": "Date of the session"},
59
+ "status": {
60
+ "bsonType": "string",
61
+ "description": "Status of the session (e.g., completed, upcoming)",
62
+ },
63
+ "created_at": {
64
+ "bsonType": "date",
65
+ "description": "Date when the session was created",
66
+ },
67
+ "pre_class": {
68
+ "bsonType": "object",
69
+ "description": "Pre-class segment data",
70
+ "properties": {
71
+ "resources": {
72
+ "bsonType": "array",
73
+ "description": "List of pre-class resources",
74
+ "items": {
75
+ "bsonType": "object",
76
+ "required": ["type", "title", "url"],
77
+ "properties": {
78
+ "type": {
79
+ "bsonType": "string",
80
+ "description": "Type of resource (e.g., pdf, video)",
81
+ },
82
+ "title": {
83
+ "bsonType": "string",
84
+ "description": "Title of the resource",
85
+ },
86
+ "url": {
87
+ "bsonType": "string",
88
+ "description": "URL of the resource",
89
+ },
90
+ "vector": {
91
+ "bsonType": "array",
92
+ "description": "Vector representation of the resource",
93
+ "items": {"bsonType": "double"},
94
+ },
95
+ },
96
+ },
97
+ },
98
+ "completion_required": {
99
+ "bsonType": "bool",
100
+ "description": "Indicates if completion of pre-class resources is required",
101
+ },
102
+ },
103
+ },
104
+ "in_class": {
105
+ "bsonType": "object",
106
+ "description": "In-class segment data",
107
+ "properties": {
108
+ "topics": {
109
+ "bsonType": "array",
110
+ "description": "List of topics covered in the session",
111
+ "items": {"bsonType": "string"},
112
+ },
113
+ "quiz": {
114
+ "bsonType": "object",
115
+ "description": "Quiz data",
116
+ "properties": {
117
+ "title": {
118
+ "bsonType": "string",
119
+ "description": "Title of the quiz",
120
+ },
121
+ "questions": {
122
+ "bsonType": "int",
123
+ "description": "Number of questions in the quiz",
124
+ },
125
+ "duration": {
126
+ "bsonType": "int",
127
+ "description": "Duration of the quiz in minutes",
128
+ },
129
+ },
130
+ },
131
+ "polls": {
132
+ "bsonType": "array",
133
+ "description": "List of polls conducted during the session",
134
+ "items": {
135
+ "bsonType": "object",
136
+ "required": ["question", "options"],
137
+ "properties": {
138
+ "question": {
139
+ "bsonType": "string",
140
+ "description": "Poll question",
141
+ },
142
+ "options": {
143
+ "bsonType": "array",
144
+ "description": "List of poll options",
145
+ "items": {"bsonType": "string"},
146
+ },
147
+ "responses": {
148
+ "bsonType": "object",
149
+ "description": "Responses to the poll",
150
+ "additionalProperties": {"bsonType": "int"},
151
+ },
152
+ },
153
+ },
154
+ },
155
+ },
156
+ },
157
+ "post_class": {
158
+ "bsonType": "object",
159
+ "description": "Post-class segment data",
160
+ "properties": {
161
+ "assignments": {
162
+ "bsonType": "array",
163
+ "description": "List of assignments",
164
+ "items": {
165
+ "bsonType": "object",
166
+ "required": ["id", "title", "due_date", "status"],
167
+ "properties": {
168
+ "id": {
169
+ "bsonType": ["objectId", "int"],
170
+ "description": "Assignment ID",
171
+ },
172
+ "title": {
173
+ "bsonType": "string",
174
+ "description": "Title of the assignment",
175
+ },
176
+ "due_date": {
177
+ "bsonType": "date",
178
+ "description": "Due date of the assignment",
179
+ },
180
+ "status": {
181
+ "bsonType": "string",
182
+ "description": "Status of the assignment (e.g., pending, completed)",
183
+ },
184
+ "submissions": {
185
+ "bsonType": "array",
186
+ "description": "List of submissions",
187
+ "items": {
188
+ "bsonType": "object",
189
+ "properties": {
190
+ "student_id": {
191
+ "bsonType": "objectId",
192
+ "description": "ID of the student who submitted the assignment",
193
+ },
194
+ "file_url": {
195
+ "bsonType": "string",
196
+ "description": "URL of the submitted file",
197
+ },
198
+ "submitted_at": {
199
+ "bsonType": "date",
200
+ "description": "Date when the assignment was submitted",
201
+ },
202
+ },
203
+ },
204
+ },
205
+ },
206
+ },
207
+ }
208
+ },
209
+ },
210
+ },
211
+ },
212
+ },
213
+ },
214
+ }
215
+
216
+ # Update the schema using the collMod command
217
+ db.command({
218
+ "collMod": "courses_collection2",
219
+ "validator": {"$jsonSchema": updated_course_schema}
220
+ })
221
+
222
  print("Schema updated successfully!")
new_keywords.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from pymongo import MongoClient
4
+ from dotenv import load_dotenv
5
+ import os
6
+ import json
7
+ import re
8
+
9
+ # 1. Load environment variables
10
+ load_dotenv()
11
+ MONGODB_URI = os.getenv(
12
+ "MONGODB_UR",
13
+ "mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
14
+ )
15
+ # 2. Create MongoDB connection
16
+ client = MongoClient(MONGODB_URI)
17
+ db = client["novascholar_db"]
18
+ collection = db["research_papers"]
19
+
20
+
21
+ def convert_mixed_columns(df: pd.DataFrame) -> pd.DataFrame:
22
+ """
23
+ Convert any columns that contain lists into comma-separated strings
24
+ to ensure consistent data types for CSV export.
25
+ """
26
+ for col in df.columns:
27
+ if any(isinstance(val, list) for val in df[col].dropna()):
28
+ df[col] = df[col].apply(
29
+ lambda x: (
30
+ ", ".join(map(str, x))
31
+ if isinstance(x, list)
32
+ else (str(x) if pd.notna(x) else "")
33
+ )
34
+ )
35
+ return df
36
+
37
+
38
+ def filter_and_export_collection_to_csv(keywords_list, doc_collection):
39
+ """
40
+ Fetch documents from the specified collection where the 'Keywords' field
41
+ matches ANY of the keywords in 'keywords_list'. Convert to DataFrame,
42
+ ensure consistent column types, save to CSV, and return the DataFrame
43
+ and CSV filename.
44
+ """
45
+ # 3. Retrieve filtered documents from the collection based on 'Keywords' using $in with regex for substring matching
46
+ regex_keywords = [f".*{keyword}.*" for keyword in keywords_list]
47
+ docs = list(
48
+ doc_collection.find(
49
+ {"Keywords": {"$regex": "|".join(regex_keywords), "$options": "i"}}
50
+ )
51
+ )
52
+
53
+ # Convert documents to DataFrame
54
+ df = pd.DataFrame(docs)
55
+
56
+ if not df.empty:
57
+ # 4. Convert mixed columns
58
+ df = convert_mixed_columns(df)
59
+ # 5. Export to CSV
60
+ csv_filename = "filtered_papers_export.csv"
61
+ df.to_csv(csv_filename, index=False)
62
+ return df, csv_filename
63
+ else:
64
+ # Return an empty DataFrame and None if no documents found
65
+ return pd.DataFrame(), None
66
+
67
+
68
+ def main():
69
+ st.title("Filter and Export Papers by Keyword")
70
+
71
+ # Let user select the paper type
72
+ paper_type = st.selectbox(
73
+ "Select type of research paper:",
74
+ [
75
+ "Review Based Paper",
76
+ "Opinion/Perspective Based Paper",
77
+ "Empirical Research Paper",
78
+ "Research Paper (Other)",
79
+ ],
80
+ )
81
+
82
+ # Let user enter the keyword to filter
83
+ keyword_input = st.text_input(
84
+ "Enter the exact keyword to filter papers by 'Keywords' field:"
85
+ )
86
+
87
+ # When user clicks button, use the collection for the selected paper type
88
+ if st.button("Export Filtered Papers to CSV"):
89
+ with st.spinner("Exporting filtered documents..."):
90
+ try:
91
+ # Determine dynamic collection based on paper type
92
+ collection_name = paper_type.replace(" ", "_").lower()
93
+ doc_collection = db[collection_name]
94
+
95
+ # Split keywords by commas and strip whitespace
96
+ keywords_list = [
97
+ kw.strip() for kw in keyword_input.split(",") if kw.strip()
98
+ ]
99
+
100
+ if not keywords_list:
101
+ st.warning("Please enter at least one keyword.")
102
+ else:
103
+ df, csv_filename = filter_and_export_collection_to_csv(
104
+ keywords_list, doc_collection
105
+ )
106
+ if not df.empty and csv_filename:
107
+ st.success(
108
+ f"Successfully exported filtered papers to {csv_filename}!"
109
+ )
110
+ st.download_button(
111
+ label="Download CSV",
112
+ data=df.to_csv(index=False).encode("utf-8"),
113
+ file_name=csv_filename,
114
+ mime="text/csv",
115
+ )
116
+ st.write("Preview of the filtered DataFrame:")
117
+ st.dataframe(df)
118
+ else:
119
+ st.warning(
120
+ "No matching documents found for the provided keyword(s)."
121
+ )
122
+ except Exception as e:
123
+ st.error(f"Error exporting filtered papers: {str(e)}")
124
+
125
+
126
+ if __name__ == "__main__":
127
+ main()
new_research_paper.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import requests
4
+ import json
5
+ import os
6
+ from dotenv import load_dotenv
7
+
8
+ # Load environment variables
9
+ load_dotenv()
10
+ PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
11
+ PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
12
+
13
+
14
+ def call_perplexity_api(prompt: str) -> str:
15
+ """Call Perplexity AI with a prompt, return the text response if successful."""
16
+ headers = {
17
+ "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
18
+ "Content-Type": "application/json",
19
+ }
20
+
21
+ payload = {
22
+ "model": "llama-3.1-sonar-small-128k-chat",
23
+ "messages": [{"role": "user", "content": prompt}],
24
+ "temperature": 0.3,
25
+ }
26
+
27
+ try:
28
+ response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
29
+ response.raise_for_status()
30
+ return response.json()["choices"][0]["message"]["content"]
31
+ except Exception as e:
32
+ st.error(f"API Error: {str(e)}")
33
+ return ""
34
+
35
+
36
+ def generate_research_paper(df: pd.DataFrame) -> dict:
37
+ """
38
+ For each column in the DataFrame, generate a research paper section (200-500 words)
39
+ that addresses the data in that column. Return a dict mapping column -> text.
40
+ """
41
+ paper_sections = {}
42
+ for col in df.columns:
43
+ # Convert all non-null rows in the column to strings and join them for context
44
+ col_values = df[col].dropna().astype(str).tolist()
45
+ # We'll truncate if this is huge
46
+ sample_text = " | ".join(col_values[:50]) # limit to first 50 rows for brevity
47
+ prompt = f"""
48
+ Topic: {col}
49
+ Data Sample: {sample_text}
50
+
51
+ Generate a professional research paper section for the above column.
52
+ The section should be at least 100 words and at most 150 words,
53
+ focusing on key insights, challenges, and potential research angles.
54
+ Integrate the data samples as context for the content.
55
+ """
56
+ section_text = call_perplexity_api(prompt)
57
+ paper_sections[col] = section_text.strip() if section_text else ""
58
+ return paper_sections
59
+
60
+
61
+ def format_paper(paper_dict: dict) -> str:
62
+ """
63
+ Format the generated paper into a Markdown string.
64
+ Each column name is used as a heading, and the text is placed under it.
65
+ """
66
+ md_text = "# Generated Research Paper\n\n"
67
+ for col, content in paper_dict.items():
68
+ md_text += f"## {col}\n{content}\n\n"
69
+ return md_text
70
+
71
+
72
+ def main():
73
+ st.title("Corpus-based Research Paper Generator")
74
+
75
+ uploaded_file = st.file_uploader("Upload CSV corpus file", type="csv")
76
+ if uploaded_file:
77
+ df = pd.read_csv(uploaded_file)
78
+ st.write("### Preview of Uploaded Data")
79
+ st.dataframe(df.head())
80
+
81
+ if st.button("Generate Research Paper"):
82
+ st.info("Generating paper based on the columns of your corpus...")
83
+ with st.spinner("Calling Perplexity AI..."):
84
+ paper = generate_research_paper(df)
85
+ if paper:
86
+ formatted_paper = format_paper(paper)
87
+ st.success("Research Paper Generated Successfully!")
88
+ st.write(formatted_paper)
89
+
90
+ st.download_button(
91
+ label="Download Paper as Markdown",
92
+ data=formatted_paper,
93
+ file_name="research_paper.md",
94
+ mime="text/markdown",
95
+ )
96
+ else:
97
+ st.error(
98
+ "Paper generation failed. Please check Perplexity API key."
99
+ )
100
+
101
+
102
+ if __name__ == "__main__":
103
+ main()
poll_db_operations.py CHANGED
@@ -1,70 +1,70 @@
1
- from pymongo import MongoClient
2
- from datetime import datetime
3
- from bson import ObjectId
4
- from dotenv import load_dotenv
5
- import os
6
-
7
- load_dotenv()
8
- MONGO_URI = os.getenv('MONGO_URI')
9
- class PollDatabase:
10
- def __init__(self):
11
- self.client = MongoClient(MONGO_URI)
12
- self.db = self.client["novascholar_db"]
13
-
14
- def create_poll(self, course_id, session_id, question, options, faculty_id):
15
- """Create a new poll"""
16
- poll = {
17
- "course_id": course_id,
18
- "session_id": session_id,
19
- "faculty_id": faculty_id,
20
- "question": question,
21
- "options": options,
22
- "status": "active",
23
- "created_at": datetime.now(),
24
- "responses": {option: 0 for option in options}
25
- }
26
- return self.db.polls.insert_one(poll)
27
-
28
- def get_active_polls(self, session_id):
29
- """Get all active polls for a session"""
30
- return list(self.db.polls.find({
31
- "session_id": session_id,
32
- "status": "active"
33
- }))
34
-
35
- def submit_response(self, poll_id, student_id, selected_option):
36
- """Submit a student's response to a poll"""
37
- try:
38
- # Record individual response
39
- response = {
40
- "poll_id": poll_id,
41
- "student_id": student_id,
42
- "selected_option": selected_option,
43
- "submitted_at": datetime.now()
44
- }
45
- self.db.poll_responses.insert_one(response)
46
-
47
- # Update aggregated results
48
- self.db.polls.update_one(
49
- {"_id": ObjectId(poll_id)},
50
- {"$inc": {f"responses.{selected_option}": 1}}
51
- )
52
- return True, "Vote recorded successfully"
53
-
54
- except Exception as e:
55
- if "duplicate key error" in str(e):
56
- return False, "You have already voted in this poll"
57
- return False, f"Error recording vote: {str(e)}"
58
-
59
- def close_poll(self, poll_id):
60
- """Close a poll"""
61
- return self.db.polls.update_one(
62
- {"_id": ObjectId(poll_id)},
63
- {"$set": {"status": "closed"}}
64
- )
65
-
66
- def get_poll_analytics(self, poll_id):
67
- """Get detailed analytics for a poll"""
68
- poll = self.db.polls.find_one({"_id": ObjectId(poll_id)})
69
- responses = self.db.poll_responses.find({"poll_id": ObjectId(poll_id)})
70
  return poll, list(responses)
 
1
+ from pymongo import MongoClient
2
+ from datetime import datetime
3
+ from bson import ObjectId
4
+ from dotenv import load_dotenv
5
+ import os
6
+
7
+ load_dotenv()
8
+ MONGO_URI = os.getenv('MONGO_URI')
9
+ class PollDatabase:
10
+ def __init__(self):
11
+ self.client = MongoClient(MONGO_URI)
12
+ self.db = self.client["novascholar_db"]
13
+
14
+ def create_poll(self, course_id, session_id, question, options, faculty_id):
15
+ """Create a new poll"""
16
+ poll = {
17
+ "course_id": course_id,
18
+ "session_id": session_id,
19
+ "faculty_id": faculty_id,
20
+ "question": question,
21
+ "options": options,
22
+ "status": "active",
23
+ "created_at": datetime.now(),
24
+ "responses": {option: 0 for option in options}
25
+ }
26
+ return self.db.polls.insert_one(poll)
27
+
28
+ def get_active_polls(self, session_id):
29
+ """Get all active polls for a session"""
30
+ return list(self.db.polls.find({
31
+ "session_id": session_id,
32
+ "status": "active"
33
+ }))
34
+
35
+ def submit_response(self, poll_id, student_id, selected_option):
36
+ """Submit a student's response to a poll"""
37
+ try:
38
+ # Record individual response
39
+ response = {
40
+ "poll_id": poll_id,
41
+ "student_id": student_id,
42
+ "selected_option": selected_option,
43
+ "submitted_at": datetime.now()
44
+ }
45
+ self.db.poll_responses.insert_one(response)
46
+
47
+ # Update aggregated results
48
+ self.db.polls.update_one(
49
+ {"_id": ObjectId(poll_id)},
50
+ {"$inc": {f"responses.{selected_option}": 1}}
51
+ )
52
+ return True, "Vote recorded successfully"
53
+
54
+ except Exception as e:
55
+ if "duplicate key error" in str(e):
56
+ return False, "You have already voted in this poll"
57
+ return False, f"Error recording vote: {str(e)}"
58
+
59
+ def close_poll(self, poll_id):
60
+ """Close a poll"""
61
+ return self.db.polls.update_one(
62
+ {"_id": ObjectId(poll_id)},
63
+ {"$set": {"status": "closed"}}
64
+ )
65
+
66
+ def get_poll_analytics(self, poll_id):
67
+ """Get detailed analytics for a poll"""
68
+ poll = self.db.polls.find_one({"_id": ObjectId(poll_id)})
69
+ responses = self.db.poll_responses.find({"poll_id": ObjectId(poll_id)})
70
  return poll, list(responses)
poll_db_setup.py CHANGED
@@ -1,35 +1,35 @@
1
- from pymongo import MongoClient
2
- from datetime import datetime
3
- from dotenv import load_dotenv
4
- import os
5
-
6
- load_dotenv()
7
- MONGO_URI = os.getenv('MONGO_URI')
8
- def setup_mongodb():
9
- """Initialize MongoDB connection and create collections with indexes"""
10
- client = MongoClient(MONGO_URI)
11
- db = client["novascholar_db"]
12
-
13
- # Create indexes for polls collection
14
- db.polls.create_index([("session_id", 1), ("status", 1)])
15
- db.polls.create_index([("course_id", 1)])
16
-
17
- # Create unique index for poll_responses to prevent duplicate votes
18
- db.poll_responses.create_index(
19
- [("poll_id", 1), ("student_id", 1)],
20
- unique=True
21
- )
22
-
23
- return "Database setup completed successfully"
24
-
25
- def print_all_polls():
26
- """Print all polls in the database"""
27
- client = MongoClient(MONGO_URI)
28
- db = client["novascholar_db"]
29
-
30
- polls = db.polls.find()
31
- for poll in polls:
32
- print(poll)
33
-
34
- if __name__ == "__main__":
35
  print(print_all_polls())
 
1
+ from pymongo import MongoClient
2
+ from datetime import datetime
3
+ from dotenv import load_dotenv
4
+ import os
5
+
6
+ load_dotenv()
7
+ MONGO_URI = os.getenv('MONGO_URI')
8
+ def setup_mongodb():
9
+ """Initialize MongoDB connection and create collections with indexes"""
10
+ client = MongoClient(MONGO_URI)
11
+ db = client["novascholar_db"]
12
+
13
+ # Create indexes for polls collection
14
+ db.polls.create_index([("session_id", 1), ("status", 1)])
15
+ db.polls.create_index([("course_id", 1)])
16
+
17
+ # Create unique index for poll_responses to prevent duplicate votes
18
+ db.poll_responses.create_index(
19
+ [("poll_id", 1), ("student_id", 1)],
20
+ unique=True
21
+ )
22
+
23
+ return "Database setup completed successfully"
24
+
25
+ def print_all_polls():
26
+ """Print all polls in the database"""
27
+ client = MongoClient(MONGO_URI)
28
+ db = client["novascholar_db"]
29
+
30
+ polls = db.polls.find()
31
+ for poll in polls:
32
+ print(poll)
33
+
34
+ if __name__ == "__main__":
35
  print(print_all_polls())
pre_class_analytics2.py CHANGED
@@ -1,759 +1,759 @@
1
- import json
2
- import typing_extensions as typing
3
- import google.generativeai as genai
4
- from typing import List, Dict, Any
5
- import numpy as np
6
- from collections import defaultdict
7
-
8
- from dotenv import load_dotenv
9
- import os
10
- import pymongo
11
- from pymongo import MongoClient
12
-
13
- load_dotenv()
14
- GEMINI_API_KEY = os.getenv('GEMINI_KEY')
15
-
16
- class EngagementMetrics(typing.TypedDict):
17
- participation_level: str # "high" | "medium" | "low"
18
- question_quality: str # "advanced" | "intermediate" | "basic"
19
- concept_understanding: str # "strong" | "moderate" | "needs_improvement"
20
-
21
- class StudentInsight(typing.TypedDict):
22
- student_id: str
23
- performance_level: str # "high_performer" | "average" | "at_risk"
24
- struggling_topics: list[str]
25
- engagement_metrics: EngagementMetrics
26
-
27
- class TopicInsight(typing.TypedDict):
28
- topic: str
29
- difficulty_level: float # 0 to 1
30
- student_count: int
31
- common_issues: list[str]
32
- key_misconceptions: list[str]
33
-
34
- class RecommendedAction(typing.TypedDict):
35
- action: str
36
- priority: str # "high" | "medium" | "low"
37
- target_group: str # "all_students" | "specific_students" | "faculty"
38
- reasoning: str
39
- expected_impact: str
40
-
41
- class ClassDistribution(typing.TypedDict):
42
- high_performers: float
43
- average_performers: float
44
- at_risk: float
45
-
46
- class CourseHealth(typing.TypedDict):
47
- overall_engagement: float # 0 to 1
48
- critical_topics: list[str]
49
- class_distribution: ClassDistribution
50
-
51
- class InterventionMetrics(typing.TypedDict):
52
- immediate_attention_needed: list[str] # student_ids
53
- monitoring_required: list[str] # student_ids
54
-
55
- class AnalyticsResponse(typing.TypedDict):
56
- topic_insights: list[TopicInsight]
57
- student_insights: list[StudentInsight]
58
- recommended_actions: list[RecommendedAction]
59
- course_health: CourseHealth
60
- intervention_metrics: InterventionMetrics
61
-
62
-
63
-
64
- class NovaScholarAnalytics:
65
- def __init__(self, model_name: str = "gemini-1.5-flash"):
66
- genai.configure(api_key=GEMINI_API_KEY)
67
- self.model = genai.GenerativeModel(model_name)
68
-
69
- def _create_analytics_prompt(self, chat_histories: List[Dict], all_topics: List[str]) -> str:
70
- """Creates a structured prompt for Gemini to analyze chat histories."""
71
- # Prompt 1:
72
- # return f"""Analyze these student chat histories for a university course and provide detailed analytics.
73
-
74
- # Context:
75
- # - These are pre-class chat interactions between students and an AI tutor
76
- # - Topics covered: {', '.join(all_topics)}
77
-
78
- # Chat histories: {json.dumps(chat_histories, indent=2)}
79
-
80
- # Return the analysis in JSON format matching this exact schema:
81
- # {AnalyticsResponse.__annotations__}
82
-
83
- # Ensure all numeric values are between 0 and 1 (accuracy upto 3 decimal places) where applicable.
84
-
85
- # Important analysis guidelines:
86
- # 1. Identify topics where students show confusion or ask multiple follow-up questions
87
- # 2. Look for patterns in question types and complexity
88
- # 3. Analyze response understanding based on follow-up questions
89
- # 4. Consider both explicit and implicit signs of difficulty
90
- # 5. Focus on concept relationships and prerequisite understanding"""
91
-
92
- # Prompt 2:
93
- # return f"""Analyze the provided student chat histories for a university course and generate concise, actionable analytics.
94
-
95
- # Context:
96
- # - Chat histories: {json.dumps(chat_histories, indent=2)}
97
- # - These are pre-class interactions between students and an AI tutor aimed at identifying learning difficulties and improving course delivery.
98
- # - Topics covered: {', '.join(all_topics)}.
99
-
100
- # Your task is to extract key insights that will help faculty address challenges effectively and enhance learning outcomes.
101
-
102
- # Output Format:
103
- # 1. Topics where students face significant difficulties:
104
- # - Provide a ranked list of topics where the majority of students are struggling, based on the frequency and nature of their questions or misconceptions.
105
- # - Include the percentage of students who found each topic challenging.
106
-
107
- # 2. AI-recommended actions for faculty:
108
- # - Suggest actionable steps to address the difficulties identified in each critical topic.
109
- # - Specify the priority of each action (high, medium, low) based on the urgency and impact.
110
- # - Explain the reasoning behind each recommendation and its expected impact on student outcomes.
111
-
112
- # 3. Student-specific analytics (focusing on at-risk students):
113
- # - Identify students categorized as "at-risk" based on their engagement levels, question complexity, and recurring struggles.
114
- # - For each at-risk student, list their top 3 struggling topics and their engagement metrics (participation level, concept understanding).
115
- # - Provide personalized recommendations for improving their understanding.
116
-
117
- # Guidelines for Analysis:
118
- # - Focus on actionable and concise insights rather than exhaustive details.
119
- # - Use both explicit (e.g., direct questions) and implicit (e.g., repeated follow-ups) cues to identify areas of difficulty.
120
- # - Prioritize topics with higher difficulty scores or more students struggling.
121
- # - Ensure numerical values (e.g., difficulty levels, percentages) are between 0 and 1 where applicable.
122
-
123
- # The response must be well-structured, concise, and highly actionable for faculty to implement improvements effectively."""
124
-
125
- # Prompt 3:
126
- return f"""Analyze the provided student chat histories for a university course and generate concise, actionable analytics.
127
- Context:
128
- - Chat histories: {json.dumps(chat_histories, indent=2)}
129
- - These are pre-class interactions between students and an AI tutor aimed at identifying learning difficulties and improving course delivery.
130
- - Topics covered: {', '.join(all_topics)}.
131
-
132
- Your task is to provide detailed analytics that will help faculty address challenges effectively and enhance learning outcomes.
133
-
134
- Output Format (strictly follow this JSON structure):
135
- {{
136
- "topic_wise_insights": [
137
- {{
138
- "topic": "<string>",
139
- "struggling_percentage": <number between 0 and 1>,
140
- "key_issues": ["<string>", "<string>", ...],
141
- "key_misconceptions": ["<string>", "<string>", ...],
142
- "recommended_actions": {{
143
- "description": "<string>",
144
- "priority": "high|medium|low",
145
- "expected_outcome": "<string>"
146
- }}
147
- }}
148
- ],
149
- "ai_recommended_actions": [
150
- {{
151
- "action": "<string>",
152
- "priority": "high|medium|low",
153
- "reasoning": "<string>",
154
- "expected_outcome": "<string>",
155
- "pedagogy_recommendations": {{
156
- "methods": ["<string>", "<string>", ...],
157
- "resources": ["<string>", "<string>", ...],
158
- "expected_impact": "<string>"
159
- }}
160
- }}
161
- ],
162
- "student_analytics": [
163
- {{
164
- "student_id": "<string>",
165
- "engagement_metrics": {{
166
- "participation_level": <number between 0 and 1>,
167
- "concept_understanding": "strong|moderate|needs_improvement",
168
- "question_quality": "advanced|intermediate|basic"
169
- }},
170
- "struggling_topics": ["<string>", "<string>", ...],
171
- "personalized_recommendation": "<string>"
172
- }}
173
- ]
174
- }}
175
-
176
- Guidelines for Analysis:
177
- - Focus on actionable and concise insights rather than exhaustive details.
178
- - Use both explicit (e.g., direct questions) and implicit (e.g., repeated follow-ups) cues to identify areas of difficulty.
179
- - Prioritize topics with higher difficulty scores or more students struggling.
180
- - Ensure numerical values (e.g., difficulty levels, percentages) are between 0 and 1 where applicable.
181
- - Make sure to include All** students in the analysis, not just a subset.
182
- - for the ai_recommended_actions:
183
- - Prioritize pedagogy recommendations for critical topics with the high difficulty scores or struggling percentages.
184
- - For each action:
185
- - Include specific teaching methods (e.g., interactive discussions or quizzes, problem-based learning, practical examples etc).
186
- - Recommend supporting resources (e.g., videos, handouts, simulations).
187
- - Provide reasoning for the recommendation and the expected outcomes for student learning.
188
- - Example:
189
- - **Action:** Conduct an interactive problem-solving session on "<Topic Name>".
190
- - **Reasoning:** Students showed difficulty in applying concepts to practical problems.
191
- - **Expected Outcome:** Improved practical understanding and application of the topic.
192
- - **Pedagogy Recommendations:**
193
- - **Methods:** Group discussions, real-world case studies.
194
- - **Resources:** Online interactive tools, relevant case studies, video walkthroughs.
195
- - **Expected Impact:** Enhance conceptual clarity by 40% and practical application by 30%.
196
-
197
- The response must adhere strictly to the above JSON structure, with all fields populated appropriately."""
198
-
199
-
200
- def _calculate_class_distribution(self, analytics: Dict) -> Dict:
201
- """Calculate the distribution of students across performance levels."""
202
- try:
203
- total_students = len(analytics.get("student_insights", []))
204
- if total_students == 0:
205
- return {
206
- "high_performers": 0,
207
- "average_performers": 0,
208
- "at_risk": 0
209
- }
210
-
211
- distribution = defaultdict(int)
212
-
213
- for student in analytics.get("student_insights", []):
214
- performance_level = student.get("performance_level", "average")
215
- # Map performance levels to our three categories
216
- if performance_level in ["excellent", "high", "high_performer"]:
217
- distribution["high_performers"] += 1
218
- elif performance_level in ["struggling", "low", "at_risk"]:
219
- distribution["at_risk"] += 1
220
- else:
221
- distribution["average_performers"] += 1
222
-
223
- # Convert to percentages
224
- return {
225
- level: count/total_students
226
- for level, count in distribution.items()
227
- }
228
- except Exception as e:
229
- print(f"Error calculating class distribution: {str(e)}")
230
- return {
231
- "high_performers": 0,
232
- "average_performers": 0,
233
- "at_risk": 0
234
- }
235
-
236
- def _identify_urgent_cases(self, analytics: Dict) -> List[str]:
237
- """Identify students needing immediate attention."""
238
- try:
239
- urgent_cases = []
240
- for student in analytics.get("student_insights", []):
241
- student_id = student.get("student_id")
242
- if not student_id:
243
- continue
244
-
245
- # Check multiple risk factors
246
- risk_factors = 0
247
-
248
- # Factor 1: Performance level
249
- if student.get("performance_level") in ["struggling", "at_risk", "low"]:
250
- risk_factors += 1
251
-
252
- # Factor 2: Number of struggling topics
253
- if len(student.get("struggling_topics", [])) >= 2:
254
- risk_factors += 1
255
-
256
- # Factor 3: Engagement metrics
257
- engagement = student.get("engagement_metrics", {})
258
- if (engagement.get("participation_level") == "low" or
259
- engagement.get("concept_understanding") == "needs_improvement"):
260
- risk_factors += 1
261
-
262
- # If student has multiple risk factors, add to urgent cases
263
- if risk_factors >= 2:
264
- urgent_cases.append(student_id)
265
-
266
- return urgent_cases
267
- except Exception as e:
268
- print(f"Error identifying urgent cases: {str(e)}")
269
- return []
270
-
271
- def _identify_monitoring_cases(self, analytics: Dict) -> List[str]:
272
- """Identify students who need monitoring but aren't urgent cases."""
273
- try:
274
- monitoring_cases = []
275
- urgent_cases = set(self._identify_urgent_cases(analytics))
276
-
277
- for student in analytics.get("student_insights", []):
278
- student_id = student.get("student_id")
279
- if not student_id or student_id in urgent_cases:
280
- continue
281
-
282
- # Check monitoring criteria
283
- monitoring_needed = False
284
-
285
- # Criterion 1: Has some struggling topics but not enough for urgent
286
- if len(student.get("struggling_topics", [])) == 1:
287
- monitoring_needed = True
288
-
289
- # Criterion 2: Medium-low engagement
290
- engagement = student.get("engagement_metrics", {})
291
- if engagement.get("participation_level") == "medium":
292
- monitoring_needed = True
293
-
294
- # Criterion 3: Recent performance decline
295
- if student.get("performance_level") == "average":
296
- monitoring_needed = True
297
-
298
- if monitoring_needed:
299
- monitoring_cases.append(student_id)
300
-
301
- return monitoring_cases
302
- except Exception as e:
303
- print(f"Error identifying monitoring cases: {str(e)}")
304
- return []
305
-
306
- def _identify_critical_topics(self, analytics: Dict) -> List[str]:
307
- """
308
- Identify critical topics that need attention based on multiple factors.
309
- Returns a list of topic names that are considered critical.
310
- """
311
- try:
312
- critical_topics = []
313
- topics = analytics.get("topic_insights", [])
314
-
315
- for topic in topics:
316
- if not isinstance(topic, dict):
317
- continue
318
-
319
- # Initialize score for topic criticality
320
- critical_score = 0
321
-
322
- # Factor 1: High difficulty level
323
- difficulty_level = topic.get("difficulty_level", 0)
324
- if difficulty_level > 0.7:
325
- critical_score += 2
326
- elif difficulty_level > 0.5:
327
- critical_score += 1
328
-
329
- # Factor 2: Number of students struggling
330
- student_count = topic.get("student_count", 0)
331
- total_students = len(analytics.get("student_insights", []))
332
- if total_students > 0:
333
- struggle_ratio = student_count / total_students
334
- if struggle_ratio > 0.5:
335
- critical_score += 2
336
- elif struggle_ratio > 0.3:
337
- critical_score += 1
338
-
339
- # Factor 3: Number of common issues
340
- if len(topic.get("common_issues", [])) > 2:
341
- critical_score += 1
342
-
343
- # Factor 4: Number of key misconceptions
344
- if len(topic.get("key_misconceptions", [])) > 1:
345
- critical_score += 1
346
-
347
- # If topic exceeds threshold, mark as critical
348
- if critical_score >= 3:
349
- critical_topics.append(topic.get("topic", "Unknown Topic"))
350
-
351
- return critical_topics
352
-
353
- except Exception as e:
354
- print(f"Error identifying critical topics: {str(e)}")
355
- return []
356
-
357
- def _calculate_engagement(self, analytics: Dict) -> Dict:
358
- """
359
- Calculate detailed engagement metrics across all students.
360
- Returns a dictionary with engagement statistics.
361
- """
362
- try:
363
- total_students = len(analytics.get("student_insights", []))
364
- if total_students == 0:
365
- return {
366
- "total_students": 0,
367
- "overall_score": 0,
368
- "engagement_distribution": {
369
- "high": 0,
370
- "medium": 0,
371
- "low": 0
372
- },
373
- "participation_metrics": {
374
- "average_topics_per_student": 0,
375
- "active_participants": 0
376
- }
377
- }
378
-
379
- engagement_levels = defaultdict(int)
380
- total_topics_engaged = 0
381
- active_participants = 0
382
-
383
- for student in analytics.get("student_insights", []):
384
- # Get engagement metrics
385
- metrics = student.get("engagement_metrics", {})
386
-
387
- # Calculate participation level
388
- participation = metrics.get("participation_level", "low").lower()
389
- engagement_levels[participation] += 1
390
-
391
- # Count topics student is engaged with
392
- topics_count = len(student.get("struggling_topics", []))
393
- total_topics_engaged += topics_count
394
-
395
- # Count active participants (students engaging with any topics)
396
- if topics_count > 0:
397
- active_participants += 1
398
-
399
- # Calculate overall engagement score (0-1)
400
- weighted_score = (
401
- (engagement_levels["high"] * 1.0 +
402
- engagement_levels["medium"] * 0.6 +
403
- engagement_levels["low"] * 0.2) / total_students
404
- )
405
-
406
- return {
407
- "total_students": total_students,
408
- "overall_score": round(weighted_score, 2),
409
- "engagement_distribution": {
410
- level: count/total_students
411
- for level, count in engagement_levels.items()
412
- },
413
- "participation_metrics": {
414
- "average_topics_per_student": round(total_topics_engaged / total_students, 2),
415
- "active_participants_ratio": round(active_participants / total_students, 2)
416
- }
417
- }
418
-
419
- except Exception as e:
420
- print(f"Error calculating engagement: {str(e)}")
421
- return {
422
- "total_students": 0,
423
- "overall_score": 0,
424
- "engagement_distribution": {
425
- "high": 0,
426
- "medium": 0,
427
- "low": 0
428
- },
429
- "participation_metrics": {
430
- "average_topics_per_student": 0,
431
- "active_participants_ratio": 0
432
- }
433
- }
434
-
435
- def _process_gemini_response(self, response: str) -> Dict:
436
- """Process and validate Gemini's response."""
437
- # try:
438
- # analytics = json.loads(response)
439
- # return self._enrich_analytics(analytics)
440
- # except json.JSONDecodeError as e:
441
- # print(f"Error decoding Gemini response: {e}")
442
- # return self._fallback_analytics()
443
- try:
444
- # Parse JSON response
445
- analytics = json.loads(response)
446
-
447
- # Validate required fields exist
448
- required_fields = {
449
- "topic_insights": [],
450
- "student_insights": [],
451
- "recommended_actions": []
452
- }
453
-
454
- # Ensure all required fields exist with default values
455
- for field, default_value in required_fields.items():
456
- if field not in analytics or not analytics[field]:
457
- analytics[field] = default_value
458
-
459
- # Now enrich the validated analytics
460
- return self._enrich_analytics(analytics)
461
-
462
- except (json.JSONDecodeError, KeyError, TypeError) as e:
463
- print(f"Error processing Gemini response: {str(e)}")
464
- print(f"Raw response: {response}")
465
- return self._fallback_analytics()
466
-
467
- def _enrich_analytics(self, analytics: Dict) -> Dict:
468
- """Add derived insights and metrics to the analytics."""
469
- # Add overall course health metrics
470
- analytics["course_health"] = {
471
- "overall_engagement": self._calculate_engagement(analytics),
472
- "critical_topics": self._identify_critical_topics(analytics),
473
- "class_distribution": self._calculate_class_distribution(analytics)
474
- }
475
-
476
- # Add intervention urgency scores
477
- analytics["intervention_metrics"] = {
478
- "immediate_attention_needed": self._identify_urgent_cases(analytics),
479
- "monitoring_required": self._identify_monitoring_cases(analytics)
480
- }
481
-
482
- return analytics
483
-
484
- def _calculate_engagement(self, analytics: Dict) -> Dict:
485
- # """Calculate overall engagement metrics."""
486
- # total_students = len(analytics["student_insights"])
487
- # engagement_levels = defaultdict(int)
488
-
489
- # for student in analytics["student_insights"]:
490
- # engagement_levels[student["engagement_metrics"]["participation_level"]] += 1
491
-
492
- # return {
493
- # "total_students": total_students,
494
- # "engagement_distribution": {
495
- # level: count/total_students
496
- # for level, count in engagement_levels.items()
497
- # }
498
- # }
499
- """Calculate overall engagement metrics with defensive programming."""
500
- try:
501
- total_students = len(analytics.get("student_insights", []))
502
- if total_students == 0:
503
- return {
504
- "total_students": 0,
505
- "engagement_distribution": {
506
- "high": 0,
507
- "medium": 0,
508
- "low": 0
509
- }
510
- }
511
-
512
- engagement_levels = defaultdict(int)
513
-
514
- for student in analytics.get("student_insights", []):
515
- metrics = student.get("engagement_metrics", {})
516
- level = metrics.get("participation_level", "low")
517
- engagement_levels[level] += 1
518
-
519
- return {
520
- "total_students": total_students,
521
- "engagement_distribution": {
522
- level: count/total_students
523
- for level, count in engagement_levels.items()
524
- }
525
- }
526
- except Exception as e:
527
- print(f"Error calculating engagement: {str(e)}")
528
- return {
529
- "total_students": 0,
530
- "engagement_distribution": {
531
- "high": 0,
532
- "medium": 0,
533
- "low": 0
534
- }
535
- }
536
-
537
- def _identify_critical_topics(self, analytics: Dict) -> List[Dict]:
538
- # """Identify topics needing immediate attention."""
539
- # return [
540
- # topic for topic in analytics["topic_insights"]
541
- # if topic["difficulty_level"] > 0.7 or
542
- # len(topic["common_issues"]) > 2
543
- # ]
544
- """Identify topics needing immediate attention with defensive programming."""
545
- try:
546
- return [
547
- topic for topic in analytics.get("topic_insights", [])
548
- if topic.get("difficulty_level", 0) > 0.7 or
549
- len(topic.get("common_issues", [])) > 2
550
- ]
551
- except Exception as e:
552
- print(f"Error identifying critical topics: {str(e)}")
553
- return []
554
-
555
- def generate_analytics(self, chat_histories: List[Dict], all_topics: List[str]) -> Dict:
556
- # Method 1: (caused key 'student_insights' error):
557
- # """Main method to generate analytics from chat histories."""
558
- # # Preprocess chat histories
559
- # processed_histories = self._preprocess_chat_histories(chat_histories)
560
-
561
- # # Create and send prompt to Gemini
562
- # prompt = self._create_analytics_prompt(processed_histories, all_topics)
563
- # response = self.model.generate_content(
564
- # prompt,
565
- # generation_config=genai.GenerationConfig(
566
- # response_mime_type="application/json",
567
- # response_schema=AnalyticsResponse
568
- # )
569
- # )
570
-
571
- # # # Process and enrich analytics
572
- # # analytics = self._process_gemini_response(response.text)
573
- # # return analytics
574
- # # Process, validate, and enrich the response
575
- # analytics = self._process_gemini_response(response.text)
576
-
577
- # # Then cast it to satisfy the type checker
578
- # return typing.cast(AnalyticsResponse, analytics)
579
-
580
- # Method 2 (possible fix):
581
- # """Main method to generate analytics with better error handling."""
582
- # try:
583
- # processed_histories = self._preprocess_chat_histories(chat_histories)
584
- # prompt = self._create_analytics_prompt(processed_histories, all_topics)
585
-
586
- # response = self.model.generate_content(
587
- # prompt,
588
- # generation_config=genai.GenerationConfig(
589
- # response_mime_type="application/json",
590
- # temperature=0.15
591
- # # response_schema=AnalyticsResponse
592
- # )
593
- # )
594
-
595
- # if not response.text:
596
- # print("Empty response from Gemini")
597
- # return self._fallback_analytics()
598
-
599
- # # analytics = self._process_gemini_response(response.text)
600
- # # return typing.cast(AnalyticsResponse, analytics)
601
- # # return response.text;
602
- # analytics = json.loads(response.text)
603
- # return analytics
604
-
605
- # except Exception as e:
606
- # print(f"Error generating analytics: {str(e)}")
607
- # return self._fallback_analytics()
608
-
609
-
610
- # Debugging code:
611
- """Main method to generate analytics with better error handling."""
612
- try:
613
- # Debug print for input validation
614
- print("Input validation:")
615
- print(f"Chat histories: {len(chat_histories)} entries")
616
- print(f"Topics: {all_topics}")
617
-
618
- if not chat_histories or not all_topics:
619
- print("Missing required input data")
620
- return self._fallback_analytics()
621
-
622
- # Debug the preprocessing step
623
- try:
624
- processed_histories = self._preprocess_chat_histories(chat_histories)
625
- print("Successfully preprocessed chat histories")
626
- except Exception as preprocess_error:
627
- print(f"Error in preprocessing: {str(preprocess_error)}")
628
- return self._fallback_analytics()
629
-
630
- # Debug the prompt creation
631
- try:
632
- prompt = self._create_analytics_prompt(processed_histories, all_topics)
633
- print("Successfully created prompt")
634
- print("Prompt preview:", prompt[:200] + "...") # Print first 200 chars
635
- except Exception as prompt_error:
636
- print(f"Error in prompt creation: {str(prompt_error)}")
637
- return self._fallback_analytics()
638
-
639
- # Rest of the function remains the same
640
- response = self.model.generate_content(
641
- prompt,
642
- generation_config=genai.GenerationConfig(
643
- response_mime_type="application/json",
644
- temperature=0.15
645
- )
646
- )
647
-
648
- if not response.text:
649
- print("Empty response from Gemini")
650
- return self._fallback_analytics()
651
-
652
- analytics = json.loads(response.text)
653
- return analytics
654
-
655
- except Exception as e:
656
- print(f"Error generating analytics: {str(e)}")
657
- print(f"Error type: {type(e)}")
658
- import traceback
659
- print("Full traceback:", traceback.format_exc())
660
- return self._fallback_analytics()
661
-
662
- def _preprocess_chat_histories(self, chat_histories: List[Dict]) -> List[Dict]:
663
- # """Preprocess chat histories to focus on relevant information."""
664
- # processed = []
665
-
666
- # for chat in chat_histories:
667
- # print(str(chat["user_id"]))
668
- # processed_chat = {
669
- # "user_id": str(chat["user_id"]),
670
- # "messages": [
671
- # {
672
- # "prompt": msg["prompt"],
673
- # "response": msg["response"]
674
- # }
675
- # for msg in chat["messages"]
676
- # ]
677
- # }
678
- # processed.append(processed_chat)
679
-
680
- # return processed
681
-
682
- # Code 2:
683
- """Preprocess chat histories to focus on relevant information."""
684
- processed = []
685
-
686
- for chat in chat_histories:
687
- # Convert ObjectId to string if it's an ObjectId
688
- user_id = str(chat["user_id"]["$oid"]) if isinstance(chat["user_id"], dict) and "$oid" in chat["user_id"] else str(chat["user_id"])
689
-
690
- try:
691
- processed_chat = {
692
- "user_id": user_id,
693
- "messages": [
694
- {
695
- "prompt": msg["prompt"],
696
- "response": msg["response"]
697
- }
698
- for msg in chat["messages"]
699
- ]
700
- }
701
- processed.append(processed_chat)
702
- print(f"Successfully processed chat for user: {user_id}")
703
- except Exception as e:
704
- print(f"Error processing chat for user: {user_id}")
705
- print(f"Error details: {str(e)}")
706
- continue
707
-
708
- return processed
709
-
710
- def _fallback_analytics(self) -> Dict:
711
- # """Provide basic analytics in case of LLM processing failure."""
712
- # return {
713
- # "topic_insights": [],
714
- # "student_insights": [],
715
- # "recommended_actions": [
716
- # {
717
- # "action": "Review analytics generation process",
718
- # "priority": "high",
719
- # "target_group": "system_administrators",
720
- # "reasoning": "Analytics generation failed",
721
- # "expected_impact": "Restore analytics functionality"
722
- # }
723
- # ]
724
- # }
725
- """Provide comprehensive fallback analytics that match our schema."""
726
- return {
727
- "topic_insights": [],
728
- "student_insights": [],
729
- "recommended_actions": [
730
- {
731
- "action": "Review analytics generation process",
732
- "priority": "high",
733
- "target_group": "system_administrators",
734
- "reasoning": "Analytics generation failed",
735
- "expected_impact": "Restore analytics functionality"
736
- }
737
- ],
738
- "course_health": {
739
- "overall_engagement": 0,
740
- "critical_topics": [],
741
- "class_distribution": {
742
- "high_performers": 0,
743
- "average_performers": 0,
744
- "at_risk": 0
745
- }
746
- },
747
- "intervention_metrics": {
748
- "immediate_attention_needed": [],
749
- "monitoring_required": []
750
- }
751
- }
752
-
753
- # if __name__ == "__main__":
754
- # # Example usage
755
-
756
-
757
- # analytics_generator = NovaScholarAnalytics()
758
- # analytics = analytics_generator.generate_analytics(chat_histories, all_topics)
759
  # print(json.dumps(analytics, indent=2))
 
1
+ import json
2
+ import typing_extensions as typing
3
+ import google.generativeai as genai
4
+ from typing import List, Dict, Any
5
+ import numpy as np
6
+ from collections import defaultdict
7
+
8
+ from dotenv import load_dotenv
9
+ import os
10
+ import pymongo
11
+ from pymongo import MongoClient
12
+
13
+ load_dotenv()
14
+ GEMINI_API_KEY = os.getenv('GEMINI_KEY')
15
+
16
+ class EngagementMetrics(typing.TypedDict):
17
+ participation_level: str # "high" | "medium" | "low"
18
+ question_quality: str # "advanced" | "intermediate" | "basic"
19
+ concept_understanding: str # "strong" | "moderate" | "needs_improvement"
20
+
21
+ class StudentInsight(typing.TypedDict):
22
+ student_id: str
23
+ performance_level: str # "high_performer" | "average" | "at_risk"
24
+ struggling_topics: list[str]
25
+ engagement_metrics: EngagementMetrics
26
+
27
+ class TopicInsight(typing.TypedDict):
28
+ topic: str
29
+ difficulty_level: float # 0 to 1
30
+ student_count: int
31
+ common_issues: list[str]
32
+ key_misconceptions: list[str]
33
+
34
+ class RecommendedAction(typing.TypedDict):
35
+ action: str
36
+ priority: str # "high" | "medium" | "low"
37
+ target_group: str # "all_students" | "specific_students" | "faculty"
38
+ reasoning: str
39
+ expected_impact: str
40
+
41
+ class ClassDistribution(typing.TypedDict):
42
+ high_performers: float
43
+ average_performers: float
44
+ at_risk: float
45
+
46
+ class CourseHealth(typing.TypedDict):
47
+ overall_engagement: float # 0 to 1
48
+ critical_topics: list[str]
49
+ class_distribution: ClassDistribution
50
+
51
+ class InterventionMetrics(typing.TypedDict):
52
+ immediate_attention_needed: list[str] # student_ids
53
+ monitoring_required: list[str] # student_ids
54
+
55
+ class AnalyticsResponse(typing.TypedDict):
56
+ topic_insights: list[TopicInsight]
57
+ student_insights: list[StudentInsight]
58
+ recommended_actions: list[RecommendedAction]
59
+ course_health: CourseHealth
60
+ intervention_metrics: InterventionMetrics
61
+
62
+
63
+
64
+ class NovaScholarAnalytics:
65
+ def __init__(self, model_name: str = "gemini-1.5-flash"):
66
+ genai.configure(api_key=GEMINI_API_KEY)
67
+ self.model = genai.GenerativeModel(model_name)
68
+
69
+ def _create_analytics_prompt(self, chat_histories: List[Dict], all_topics: List[str]) -> str:
70
+ """Creates a structured prompt for Gemini to analyze chat histories."""
71
+ # Prompt 1:
72
+ # return f"""Analyze these student chat histories for a university course and provide detailed analytics.
73
+
74
+ # Context:
75
+ # - These are pre-class chat interactions between students and an AI tutor
76
+ # - Topics covered: {', '.join(all_topics)}
77
+
78
+ # Chat histories: {json.dumps(chat_histories, indent=2)}
79
+
80
+ # Return the analysis in JSON format matching this exact schema:
81
+ # {AnalyticsResponse.__annotations__}
82
+
83
+ # Ensure all numeric values are between 0 and 1 (accuracy upto 3 decimal places) where applicable.
84
+
85
+ # Important analysis guidelines:
86
+ # 1. Identify topics where students show confusion or ask multiple follow-up questions
87
+ # 2. Look for patterns in question types and complexity
88
+ # 3. Analyze response understanding based on follow-up questions
89
+ # 4. Consider both explicit and implicit signs of difficulty
90
+ # 5. Focus on concept relationships and prerequisite understanding"""
91
+
92
+ # Prompt 2:
93
+ # return f"""Analyze the provided student chat histories for a university course and generate concise, actionable analytics.
94
+
95
+ # Context:
96
+ # - Chat histories: {json.dumps(chat_histories, indent=2)}
97
+ # - These are pre-class interactions between students and an AI tutor aimed at identifying learning difficulties and improving course delivery.
98
+ # - Topics covered: {', '.join(all_topics)}.
99
+
100
+ # Your task is to extract key insights that will help faculty address challenges effectively and enhance learning outcomes.
101
+
102
+ # Output Format:
103
+ # 1. Topics where students face significant difficulties:
104
+ # - Provide a ranked list of topics where the majority of students are struggling, based on the frequency and nature of their questions or misconceptions.
105
+ # - Include the percentage of students who found each topic challenging.
106
+
107
+ # 2. AI-recommended actions for faculty:
108
+ # - Suggest actionable steps to address the difficulties identified in each critical topic.
109
+ # - Specify the priority of each action (high, medium, low) based on the urgency and impact.
110
+ # - Explain the reasoning behind each recommendation and its expected impact on student outcomes.
111
+
112
+ # 3. Student-specific analytics (focusing on at-risk students):
113
+ # - Identify students categorized as "at-risk" based on their engagement levels, question complexity, and recurring struggles.
114
+ # - For each at-risk student, list their top 3 struggling topics and their engagement metrics (participation level, concept understanding).
115
+ # - Provide personalized recommendations for improving their understanding.
116
+
117
+ # Guidelines for Analysis:
118
+ # - Focus on actionable and concise insights rather than exhaustive details.
119
+ # - Use both explicit (e.g., direct questions) and implicit (e.g., repeated follow-ups) cues to identify areas of difficulty.
120
+ # - Prioritize topics with higher difficulty scores or more students struggling.
121
+ # - Ensure numerical values (e.g., difficulty levels, percentages) are between 0 and 1 where applicable.
122
+
123
+ # The response must be well-structured, concise, and highly actionable for faculty to implement improvements effectively."""
124
+
125
+ # Prompt 3:
126
+ return f"""Analyze the provided student chat histories for a university course and generate concise, actionable analytics.
127
+ Context:
128
+ - Chat histories: {json.dumps(chat_histories, indent=2)}
129
+ - These are pre-class interactions between students and an AI tutor aimed at identifying learning difficulties and improving course delivery.
130
+ - Topics covered: {', '.join(all_topics)}.
131
+
132
+ Your task is to provide detailed analytics that will help faculty address challenges effectively and enhance learning outcomes.
133
+
134
+ Output Format (strictly follow this JSON structure):
135
+ {{
136
+ "topic_wise_insights": [
137
+ {{
138
+ "topic": "<string>",
139
+ "struggling_percentage": <number between 0 and 1>,
140
+ "key_issues": ["<string>", "<string>", ...],
141
+ "key_misconceptions": ["<string>", "<string>", ...],
142
+ "recommended_actions": {{
143
+ "description": "<string>",
144
+ "priority": "high|medium|low",
145
+ "expected_outcome": "<string>"
146
+ }}
147
+ }}
148
+ ],
149
+ "ai_recommended_actions": [
150
+ {{
151
+ "action": "<string>",
152
+ "priority": "high|medium|low",
153
+ "reasoning": "<string>",
154
+ "expected_outcome": "<string>",
155
+ "pedagogy_recommendations": {{
156
+ "methods": ["<string>", "<string>", ...],
157
+ "resources": ["<string>", "<string>", ...],
158
+ "expected_impact": "<string>"
159
+ }}
160
+ }}
161
+ ],
162
+ "student_analytics": [
163
+ {{
164
+ "student_id": "<string>",
165
+ "engagement_metrics": {{
166
+ "participation_level": <number between 0 and 1>,
167
+ "concept_understanding": "strong|moderate|needs_improvement",
168
+ "question_quality": "advanced|intermediate|basic"
169
+ }},
170
+ "struggling_topics": ["<string>", "<string>", ...],
171
+ "personalized_recommendation": "<string>"
172
+ }}
173
+ ]
174
+ }}
175
+
176
+ Guidelines for Analysis:
177
+ - Focus on actionable and concise insights rather than exhaustive details.
178
+ - Use both explicit (e.g., direct questions) and implicit (e.g., repeated follow-ups) cues to identify areas of difficulty.
179
+ - Prioritize topics with higher difficulty scores or more students struggling.
180
+ - Ensure numerical values (e.g., difficulty levels, percentages) are between 0 and 1 where applicable.
181
+ - Make sure to include All** students in the analysis, not just a subset.
182
+ - for the ai_recommended_actions:
183
+ - Prioritize pedagogy recommendations for critical topics with the high difficulty scores or struggling percentages.
184
+ - For each action:
185
+ - Include specific teaching methods (e.g., interactive discussions or quizzes, problem-based learning, practical examples etc).
186
+ - Recommend supporting resources (e.g., videos, handouts, simulations).
187
+ - Provide reasoning for the recommendation and the expected outcomes for student learning.
188
+ - Example:
189
+ - **Action:** Conduct an interactive problem-solving session on "<Topic Name>".
190
+ - **Reasoning:** Students showed difficulty in applying concepts to practical problems.
191
+ - **Expected Outcome:** Improved practical understanding and application of the topic.
192
+ - **Pedagogy Recommendations:**
193
+ - **Methods:** Group discussions, real-world case studies.
194
+ - **Resources:** Online interactive tools, relevant case studies, video walkthroughs.
195
+ - **Expected Impact:** Enhance conceptual clarity by 40% and practical application by 30%.
196
+
197
+ The response must adhere strictly to the above JSON structure, with all fields populated appropriately."""
198
+
199
+
200
+ def _calculate_class_distribution(self, analytics: Dict) -> Dict:
201
+ """Calculate the distribution of students across performance levels."""
202
+ try:
203
+ total_students = len(analytics.get("student_insights", []))
204
+ if total_students == 0:
205
+ return {
206
+ "high_performers": 0,
207
+ "average_performers": 0,
208
+ "at_risk": 0
209
+ }
210
+
211
+ distribution = defaultdict(int)
212
+
213
+ for student in analytics.get("student_insights", []):
214
+ performance_level = student.get("performance_level", "average")
215
+ # Map performance levels to our three categories
216
+ if performance_level in ["excellent", "high", "high_performer"]:
217
+ distribution["high_performers"] += 1
218
+ elif performance_level in ["struggling", "low", "at_risk"]:
219
+ distribution["at_risk"] += 1
220
+ else:
221
+ distribution["average_performers"] += 1
222
+
223
+ # Convert to percentages
224
+ return {
225
+ level: count/total_students
226
+ for level, count in distribution.items()
227
+ }
228
+ except Exception as e:
229
+ print(f"Error calculating class distribution: {str(e)}")
230
+ return {
231
+ "high_performers": 0,
232
+ "average_performers": 0,
233
+ "at_risk": 0
234
+ }
235
+
236
+ def _identify_urgent_cases(self, analytics: Dict) -> List[str]:
237
+ """Identify students needing immediate attention."""
238
+ try:
239
+ urgent_cases = []
240
+ for student in analytics.get("student_insights", []):
241
+ student_id = student.get("student_id")
242
+ if not student_id:
243
+ continue
244
+
245
+ # Check multiple risk factors
246
+ risk_factors = 0
247
+
248
+ # Factor 1: Performance level
249
+ if student.get("performance_level") in ["struggling", "at_risk", "low"]:
250
+ risk_factors += 1
251
+
252
+ # Factor 2: Number of struggling topics
253
+ if len(student.get("struggling_topics", [])) >= 2:
254
+ risk_factors += 1
255
+
256
+ # Factor 3: Engagement metrics
257
+ engagement = student.get("engagement_metrics", {})
258
+ if (engagement.get("participation_level") == "low" or
259
+ engagement.get("concept_understanding") == "needs_improvement"):
260
+ risk_factors += 1
261
+
262
+ # If student has multiple risk factors, add to urgent cases
263
+ if risk_factors >= 2:
264
+ urgent_cases.append(student_id)
265
+
266
+ return urgent_cases
267
+ except Exception as e:
268
+ print(f"Error identifying urgent cases: {str(e)}")
269
+ return []
270
+
271
+ def _identify_monitoring_cases(self, analytics: Dict) -> List[str]:
272
+ """Identify students who need monitoring but aren't urgent cases."""
273
+ try:
274
+ monitoring_cases = []
275
+ urgent_cases = set(self._identify_urgent_cases(analytics))
276
+
277
+ for student in analytics.get("student_insights", []):
278
+ student_id = student.get("student_id")
279
+ if not student_id or student_id in urgent_cases:
280
+ continue
281
+
282
+ # Check monitoring criteria
283
+ monitoring_needed = False
284
+
285
+ # Criterion 1: Has some struggling topics but not enough for urgent
286
+ if len(student.get("struggling_topics", [])) == 1:
287
+ monitoring_needed = True
288
+
289
+ # Criterion 2: Medium-low engagement
290
+ engagement = student.get("engagement_metrics", {})
291
+ if engagement.get("participation_level") == "medium":
292
+ monitoring_needed = True
293
+
294
+ # Criterion 3: Recent performance decline
295
+ if student.get("performance_level") == "average":
296
+ monitoring_needed = True
297
+
298
+ if monitoring_needed:
299
+ monitoring_cases.append(student_id)
300
+
301
+ return monitoring_cases
302
+ except Exception as e:
303
+ print(f"Error identifying monitoring cases: {str(e)}")
304
+ return []
305
+
306
+ def _identify_critical_topics(self, analytics: Dict) -> List[str]:
307
+ """
308
+ Identify critical topics that need attention based on multiple factors.
309
+ Returns a list of topic names that are considered critical.
310
+ """
311
+ try:
312
+ critical_topics = []
313
+ topics = analytics.get("topic_insights", [])
314
+
315
+ for topic in topics:
316
+ if not isinstance(topic, dict):
317
+ continue
318
+
319
+ # Initialize score for topic criticality
320
+ critical_score = 0
321
+
322
+ # Factor 1: High difficulty level
323
+ difficulty_level = topic.get("difficulty_level", 0)
324
+ if difficulty_level > 0.7:
325
+ critical_score += 2
326
+ elif difficulty_level > 0.5:
327
+ critical_score += 1
328
+
329
+ # Factor 2: Number of students struggling
330
+ student_count = topic.get("student_count", 0)
331
+ total_students = len(analytics.get("student_insights", []))
332
+ if total_students > 0:
333
+ struggle_ratio = student_count / total_students
334
+ if struggle_ratio > 0.5:
335
+ critical_score += 2
336
+ elif struggle_ratio > 0.3:
337
+ critical_score += 1
338
+
339
+ # Factor 3: Number of common issues
340
+ if len(topic.get("common_issues", [])) > 2:
341
+ critical_score += 1
342
+
343
+ # Factor 4: Number of key misconceptions
344
+ if len(topic.get("key_misconceptions", [])) > 1:
345
+ critical_score += 1
346
+
347
+ # If topic exceeds threshold, mark as critical
348
+ if critical_score >= 3:
349
+ critical_topics.append(topic.get("topic", "Unknown Topic"))
350
+
351
+ return critical_topics
352
+
353
+ except Exception as e:
354
+ print(f"Error identifying critical topics: {str(e)}")
355
+ return []
356
+
357
+ def _calculate_engagement(self, analytics: Dict) -> Dict:
358
+ """
359
+ Calculate detailed engagement metrics across all students.
360
+ Returns a dictionary with engagement statistics.
361
+ """
362
+ try:
363
+ total_students = len(analytics.get("student_insights", []))
364
+ if total_students == 0:
365
+ return {
366
+ "total_students": 0,
367
+ "overall_score": 0,
368
+ "engagement_distribution": {
369
+ "high": 0,
370
+ "medium": 0,
371
+ "low": 0
372
+ },
373
+ "participation_metrics": {
374
+ "average_topics_per_student": 0,
375
+ "active_participants": 0
376
+ }
377
+ }
378
+
379
+ engagement_levels = defaultdict(int)
380
+ total_topics_engaged = 0
381
+ active_participants = 0
382
+
383
+ for student in analytics.get("student_insights", []):
384
+ # Get engagement metrics
385
+ metrics = student.get("engagement_metrics", {})
386
+
387
+ # Calculate participation level
388
+ participation = metrics.get("participation_level", "low").lower()
389
+ engagement_levels[participation] += 1
390
+
391
+ # Count topics student is engaged with
392
+ topics_count = len(student.get("struggling_topics", []))
393
+ total_topics_engaged += topics_count
394
+
395
+ # Count active participants (students engaging with any topics)
396
+ if topics_count > 0:
397
+ active_participants += 1
398
+
399
+ # Calculate overall engagement score (0-1)
400
+ weighted_score = (
401
+ (engagement_levels["high"] * 1.0 +
402
+ engagement_levels["medium"] * 0.6 +
403
+ engagement_levels["low"] * 0.2) / total_students
404
+ )
405
+
406
+ return {
407
+ "total_students": total_students,
408
+ "overall_score": round(weighted_score, 2),
409
+ "engagement_distribution": {
410
+ level: count/total_students
411
+ for level, count in engagement_levels.items()
412
+ },
413
+ "participation_metrics": {
414
+ "average_topics_per_student": round(total_topics_engaged / total_students, 2),
415
+ "active_participants_ratio": round(active_participants / total_students, 2)
416
+ }
417
+ }
418
+
419
+ except Exception as e:
420
+ print(f"Error calculating engagement: {str(e)}")
421
+ return {
422
+ "total_students": 0,
423
+ "overall_score": 0,
424
+ "engagement_distribution": {
425
+ "high": 0,
426
+ "medium": 0,
427
+ "low": 0
428
+ },
429
+ "participation_metrics": {
430
+ "average_topics_per_student": 0,
431
+ "active_participants_ratio": 0
432
+ }
433
+ }
434
+
435
+ def _process_gemini_response(self, response: str) -> Dict:
436
+ """Process and validate Gemini's response."""
437
+ # try:
438
+ # analytics = json.loads(response)
439
+ # return self._enrich_analytics(analytics)
440
+ # except json.JSONDecodeError as e:
441
+ # print(f"Error decoding Gemini response: {e}")
442
+ # return self._fallback_analytics()
443
+ try:
444
+ # Parse JSON response
445
+ analytics = json.loads(response)
446
+
447
+ # Validate required fields exist
448
+ required_fields = {
449
+ "topic_insights": [],
450
+ "student_insights": [],
451
+ "recommended_actions": []
452
+ }
453
+
454
+ # Ensure all required fields exist with default values
455
+ for field, default_value in required_fields.items():
456
+ if field not in analytics or not analytics[field]:
457
+ analytics[field] = default_value
458
+
459
+ # Now enrich the validated analytics
460
+ return self._enrich_analytics(analytics)
461
+
462
+ except (json.JSONDecodeError, KeyError, TypeError) as e:
463
+ print(f"Error processing Gemini response: {str(e)}")
464
+ print(f"Raw response: {response}")
465
+ return self._fallback_analytics()
466
+
467
+ def _enrich_analytics(self, analytics: Dict) -> Dict:
468
+ """Add derived insights and metrics to the analytics."""
469
+ # Add overall course health metrics
470
+ analytics["course_health"] = {
471
+ "overall_engagement": self._calculate_engagement(analytics),
472
+ "critical_topics": self._identify_critical_topics(analytics),
473
+ "class_distribution": self._calculate_class_distribution(analytics)
474
+ }
475
+
476
+ # Add intervention urgency scores
477
+ analytics["intervention_metrics"] = {
478
+ "immediate_attention_needed": self._identify_urgent_cases(analytics),
479
+ "monitoring_required": self._identify_monitoring_cases(analytics)
480
+ }
481
+
482
+ return analytics
483
+
484
+ def _calculate_engagement(self, analytics: Dict) -> Dict:
485
+ # """Calculate overall engagement metrics."""
486
+ # total_students = len(analytics["student_insights"])
487
+ # engagement_levels = defaultdict(int)
488
+
489
+ # for student in analytics["student_insights"]:
490
+ # engagement_levels[student["engagement_metrics"]["participation_level"]] += 1
491
+
492
+ # return {
493
+ # "total_students": total_students,
494
+ # "engagement_distribution": {
495
+ # level: count/total_students
496
+ # for level, count in engagement_levels.items()
497
+ # }
498
+ # }
499
+ """Calculate overall engagement metrics with defensive programming."""
500
+ try:
501
+ total_students = len(analytics.get("student_insights", []))
502
+ if total_students == 0:
503
+ return {
504
+ "total_students": 0,
505
+ "engagement_distribution": {
506
+ "high": 0,
507
+ "medium": 0,
508
+ "low": 0
509
+ }
510
+ }
511
+
512
+ engagement_levels = defaultdict(int)
513
+
514
+ for student in analytics.get("student_insights", []):
515
+ metrics = student.get("engagement_metrics", {})
516
+ level = metrics.get("participation_level", "low")
517
+ engagement_levels[level] += 1
518
+
519
+ return {
520
+ "total_students": total_students,
521
+ "engagement_distribution": {
522
+ level: count/total_students
523
+ for level, count in engagement_levels.items()
524
+ }
525
+ }
526
+ except Exception as e:
527
+ print(f"Error calculating engagement: {str(e)}")
528
+ return {
529
+ "total_students": 0,
530
+ "engagement_distribution": {
531
+ "high": 0,
532
+ "medium": 0,
533
+ "low": 0
534
+ }
535
+ }
536
+
537
+ def _identify_critical_topics(self, analytics: Dict) -> List[Dict]:
538
+ # """Identify topics needing immediate attention."""
539
+ # return [
540
+ # topic for topic in analytics["topic_insights"]
541
+ # if topic["difficulty_level"] > 0.7 or
542
+ # len(topic["common_issues"]) > 2
543
+ # ]
544
+ """Identify topics needing immediate attention with defensive programming."""
545
+ try:
546
+ return [
547
+ topic for topic in analytics.get("topic_insights", [])
548
+ if topic.get("difficulty_level", 0) > 0.7 or
549
+ len(topic.get("common_issues", [])) > 2
550
+ ]
551
+ except Exception as e:
552
+ print(f"Error identifying critical topics: {str(e)}")
553
+ return []
554
+
555
+ def generate_analytics(self, chat_histories: List[Dict], all_topics: List[str]) -> Dict:
556
+ # Method 1: (caused key 'student_insights' error):
557
+ # """Main method to generate analytics from chat histories."""
558
+ # # Preprocess chat histories
559
+ # processed_histories = self._preprocess_chat_histories(chat_histories)
560
+
561
+ # # Create and send prompt to Gemini
562
+ # prompt = self._create_analytics_prompt(processed_histories, all_topics)
563
+ # response = self.model.generate_content(
564
+ # prompt,
565
+ # generation_config=genai.GenerationConfig(
566
+ # response_mime_type="application/json",
567
+ # response_schema=AnalyticsResponse
568
+ # )
569
+ # )
570
+
571
+ # # # Process and enrich analytics
572
+ # # analytics = self._process_gemini_response(response.text)
573
+ # # return analytics
574
+ # # Process, validate, and enrich the response
575
+ # analytics = self._process_gemini_response(response.text)
576
+
577
+ # # Then cast it to satisfy the type checker
578
+ # return typing.cast(AnalyticsResponse, analytics)
579
+
580
+ # Method 2 (possible fix):
581
+ # """Main method to generate analytics with better error handling."""
582
+ # try:
583
+ # processed_histories = self._preprocess_chat_histories(chat_histories)
584
+ # prompt = self._create_analytics_prompt(processed_histories, all_topics)
585
+
586
+ # response = self.model.generate_content(
587
+ # prompt,
588
+ # generation_config=genai.GenerationConfig(
589
+ # response_mime_type="application/json",
590
+ # temperature=0.15
591
+ # # response_schema=AnalyticsResponse
592
+ # )
593
+ # )
594
+
595
+ # if not response.text:
596
+ # print("Empty response from Gemini")
597
+ # return self._fallback_analytics()
598
+
599
+ # # analytics = self._process_gemini_response(response.text)
600
+ # # return typing.cast(AnalyticsResponse, analytics)
601
+ # # return response.text;
602
+ # analytics = json.loads(response.text)
603
+ # return analytics
604
+
605
+ # except Exception as e:
606
+ # print(f"Error generating analytics: {str(e)}")
607
+ # return self._fallback_analytics()
608
+
609
+
610
+ # Debugging code:
611
+ """Main method to generate analytics with better error handling."""
612
+ try:
613
+ # Debug print for input validation
614
+ print("Input validation:")
615
+ print(f"Chat histories: {len(chat_histories)} entries")
616
+ print(f"Topics: {all_topics}")
617
+
618
+ if not chat_histories or not all_topics:
619
+ print("Missing required input data")
620
+ return self._fallback_analytics()
621
+
622
+ # Debug the preprocessing step
623
+ try:
624
+ processed_histories = self._preprocess_chat_histories(chat_histories)
625
+ print("Successfully preprocessed chat histories")
626
+ except Exception as preprocess_error:
627
+ print(f"Error in preprocessing: {str(preprocess_error)}")
628
+ return self._fallback_analytics()
629
+
630
+ # Debug the prompt creation
631
+ try:
632
+ prompt = self._create_analytics_prompt(processed_histories, all_topics)
633
+ print("Successfully created prompt")
634
+ print("Prompt preview:", prompt[:200] + "...") # Print first 200 chars
635
+ except Exception as prompt_error:
636
+ print(f"Error in prompt creation: {str(prompt_error)}")
637
+ return self._fallback_analytics()
638
+
639
+ # Rest of the function remains the same
640
+ response = self.model.generate_content(
641
+ prompt,
642
+ generation_config=genai.GenerationConfig(
643
+ response_mime_type="application/json",
644
+ temperature=0.15
645
+ )
646
+ )
647
+
648
+ if not response.text:
649
+ print("Empty response from Gemini")
650
+ return self._fallback_analytics()
651
+
652
+ analytics = json.loads(response.text)
653
+ return analytics
654
+
655
+ except Exception as e:
656
+ print(f"Error generating analytics: {str(e)}")
657
+ print(f"Error type: {type(e)}")
658
+ import traceback
659
+ print("Full traceback:", traceback.format_exc())
660
+ return self._fallback_analytics()
661
+
662
+ def _preprocess_chat_histories(self, chat_histories: List[Dict]) -> List[Dict]:
663
+ # """Preprocess chat histories to focus on relevant information."""
664
+ # processed = []
665
+
666
+ # for chat in chat_histories:
667
+ # print(str(chat["user_id"]))
668
+ # processed_chat = {
669
+ # "user_id": str(chat["user_id"]),
670
+ # "messages": [
671
+ # {
672
+ # "prompt": msg["prompt"],
673
+ # "response": msg["response"]
674
+ # }
675
+ # for msg in chat["messages"]
676
+ # ]
677
+ # }
678
+ # processed.append(processed_chat)
679
+
680
+ # return processed
681
+
682
+ # Code 2:
683
+ """Preprocess chat histories to focus on relevant information."""
684
+ processed = []
685
+
686
+ for chat in chat_histories:
687
+ # Convert ObjectId to string if it's an ObjectId
688
+ user_id = str(chat["user_id"]["$oid"]) if isinstance(chat["user_id"], dict) and "$oid" in chat["user_id"] else str(chat["user_id"])
689
+
690
+ try:
691
+ processed_chat = {
692
+ "user_id": user_id,
693
+ "messages": [
694
+ {
695
+ "prompt": msg["prompt"],
696
+ "response": msg["response"]
697
+ }
698
+ for msg in chat["messages"]
699
+ ]
700
+ }
701
+ processed.append(processed_chat)
702
+ print(f"Successfully processed chat for user: {user_id}")
703
+ except Exception as e:
704
+ print(f"Error processing chat for user: {user_id}")
705
+ print(f"Error details: {str(e)}")
706
+ continue
707
+
708
+ return processed
709
+
710
+ def _fallback_analytics(self) -> Dict:
711
+ # """Provide basic analytics in case of LLM processing failure."""
712
+ # return {
713
+ # "topic_insights": [],
714
+ # "student_insights": [],
715
+ # "recommended_actions": [
716
+ # {
717
+ # "action": "Review analytics generation process",
718
+ # "priority": "high",
719
+ # "target_group": "system_administrators",
720
+ # "reasoning": "Analytics generation failed",
721
+ # "expected_impact": "Restore analytics functionality"
722
+ # }
723
+ # ]
724
+ # }
725
+ """Provide comprehensive fallback analytics that match our schema."""
726
+ return {
727
+ "topic_insights": [],
728
+ "student_insights": [],
729
+ "recommended_actions": [
730
+ {
731
+ "action": "Review analytics generation process",
732
+ "priority": "high",
733
+ "target_group": "system_administrators",
734
+ "reasoning": "Analytics generation failed",
735
+ "expected_impact": "Restore analytics functionality"
736
+ }
737
+ ],
738
+ "course_health": {
739
+ "overall_engagement": 0,
740
+ "critical_topics": [],
741
+ "class_distribution": {
742
+ "high_performers": 0,
743
+ "average_performers": 0,
744
+ "at_risk": 0
745
+ }
746
+ },
747
+ "intervention_metrics": {
748
+ "immediate_attention_needed": [],
749
+ "monitoring_required": []
750
+ }
751
+ }
752
+
753
+ # if __name__ == "__main__":
754
+ # # Example usage
755
+
756
+
757
+ # analytics_generator = NovaScholarAnalytics()
758
+ # analytics = analytics_generator.generate_analytics(chat_histories, all_topics)
759
  # print(json.dumps(analytics, indent=2))
pre_class_analytics4.py CHANGED
@@ -1,592 +1,592 @@
1
- import pandas as pd
2
- import numpy as np
3
- from datetime import datetime
4
- from typing import List, Dict, Any, Tuple
5
- import spacy
6
- from collections import Counter, defaultdict
7
- from sklearn.feature_extraction.text import TfidfVectorizer
8
- from sklearn.metrics.pairwise import cosine_similarity
9
- from textblob import TextBlob
10
- import networkx as nx
11
- from scipy import stats
12
- import logging
13
- import json
14
- from dataclasses import dataclass
15
- from enum import Enum
16
-
17
- # Configure logging
18
- logging.basicConfig(level=logging.INFO)
19
- logger = logging.getLogger(__name__)
20
-
21
- class TopicDifficulty(Enum):
22
- EASY = "easy"
23
- MODERATE = "moderate"
24
- DIFFICULT = "difficult"
25
- VERY_DIFFICULT = "very_difficult"
26
-
27
-
28
- @dataclass
29
- class QuestionMetrics:
30
- complexity_score: float
31
- follow_up_count: int
32
- clarification_count: int
33
- time_spent: float
34
- sentiment_score: float
35
-
36
- @dataclass
37
- class TopicInsights:
38
- difficulty_level: TopicDifficulty
39
- common_confusion_points: List[str]
40
- question_patterns: List[str]
41
- time_distribution: Dict[str, float]
42
- engagement_metrics: Dict[str, float]
43
- recommended_focus_areas: List[str]
44
-
45
- def to_dict(self):
46
- return {
47
- "difficulty_level": self.difficulty_level.value, # Convert enum to its value
48
- "common_confusion_points": self.common_confusion_points,
49
- "question_patterns": self.question_patterns,
50
- "time_distribution": {str(k): v for k, v in self.time_distribution.items()},
51
- "engagement_metrics": self.engagement_metrics,
52
- "recommended_focus_areas": self.recommended_focus_areas,
53
- }
54
-
55
- class PreClassAnalytics:
56
- def __init__(self, nlp_model: str = "en_core_web_lg"):
57
- """Initialize the analytics system with necessary components."""
58
- self.nlp = spacy.load(nlp_model)
59
- self.question_indicators = {
60
- "what", "why", "how", "when", "where", "which", "who",
61
- "whose", "whom", "can", "could", "would", "will", "explain"
62
- }
63
- self.confusion_indicators = {
64
- "confused", "don't understand", "unclear", "not clear",
65
- "stuck", "difficult", "hard", "help", "explain again"
66
- }
67
- self.follow_up_indicators = {
68
- "also", "another", "additionally", "furthermore", "moreover",
69
- "besides", "related", "similarly", "again"
70
- }
71
-
72
- def preprocess_chat_history(self, chat_history: List[Dict]) -> pd.DataFrame:
73
- """Convert chat history to DataFrame with enhanced features."""
74
- messages = []
75
- for chat in chat_history:
76
- user_id = chat['user_id']['$oid']
77
- for msg in chat['messages']:
78
- try:
79
- # Ensure the timestamp is in the correct format
80
- if isinstance(msg['timestamp'], dict) and '$date' in msg['timestamp']:
81
- timestamp = pd.to_datetime(msg['timestamp']['$date'])
82
- elif isinstance(msg['timestamp'], str):
83
- timestamp = pd.to_datetime(msg['timestamp'])
84
- else:
85
- raise ValueError("Invalid timestamp format")
86
- except Exception as e:
87
- print(f"Error parsing timestamp: {msg['timestamp']}, error: {e}")
88
- timestamp = pd.NaT # Use NaT (Not a Time) for invalid timestamps
89
-
90
- messages.append({
91
- 'user_id': user_id,
92
- 'timestamp': timestamp,
93
- 'prompt': msg['prompt'],
94
- 'response': msg['response'],
95
- 'is_question': any(q in msg['prompt'].lower() for q in self.question_indicators),
96
- 'shows_confusion': any(c in msg['prompt'].lower() for c in self.confusion_indicators),
97
- 'is_followup': any(f in msg['prompt'].lower() for f in self.follow_up_indicators)
98
- })
99
-
100
- df = pd.DataFrame(messages)
101
- df['sentiment'] = df['prompt'].apply(lambda x: TextBlob(x).sentiment.polarity)
102
- return df
103
-
104
- def extract_topic_hierarchies(self, df: pd.DataFrame) -> Dict[str, List[str]]:
105
- """Extract hierarchical topic relationships from conversations."""
106
- topic_hierarchy = defaultdict(list)
107
-
108
- for _, row in df.iterrows():
109
- doc = self.nlp(row['prompt'])
110
-
111
- # Extract main topics and subtopics using noun chunks and dependencies
112
- main_topics = []
113
- subtopics = []
114
-
115
- for chunk in doc.noun_chunks:
116
- if chunk.root.dep_ in ('nsubj', 'dobj'):
117
- main_topics.append(chunk.text.lower())
118
- else:
119
- subtopics.append(chunk.text.lower())
120
-
121
- # Build hierarchy
122
- for main_topic in main_topics:
123
- topic_hierarchy[main_topic].extend(subtopics)
124
-
125
- # Clean and deduplicate
126
- return {k: list(set(v)) for k, v in topic_hierarchy.items()}
127
-
128
- def analyze_topic_difficulty(self, df: pd.DataFrame, topic: str) -> TopicDifficulty:
129
- """Determine topic difficulty based on various metrics."""
130
- topic_msgs = df[df['prompt'].str.contains(topic, case=False)]
131
-
132
- # Calculate difficulty indicators
133
- confusion_rate = topic_msgs['shows_confusion'].mean()
134
- question_rate = topic_msgs['is_question'].mean()
135
- follow_up_rate = topic_msgs['is_followup'].mean()
136
- avg_sentiment = topic_msgs['sentiment'].mean()
137
-
138
- # Calculate composite difficulty score
139
- difficulty_score = (
140
- confusion_rate * 0.4 +
141
- question_rate * 0.3 +
142
- follow_up_rate * 0.2 +
143
- (1 - (avg_sentiment + 1) / 2) * 0.1
144
- )
145
-
146
- # Map score to difficulty level
147
- if difficulty_score < 0.3:
148
- return TopicDifficulty.EASY
149
- elif difficulty_score < 0.5:
150
- return TopicDifficulty.MODERATE
151
- elif difficulty_score < 0.7:
152
- return TopicDifficulty.DIFFICULT
153
- else:
154
- return TopicDifficulty.VERY_DIFFICULT
155
-
156
- def identify_confusion_patterns(self, df: pd.DataFrame, topic: str) -> List[str]:
157
- """Identify common patterns in student confusion."""
158
- confused_msgs = df[
159
- (df['prompt'].str.contains(topic, case=False)) &
160
- (df['shows_confusion'])
161
- ]['prompt']
162
-
163
- patterns = []
164
- for msg in confused_msgs:
165
- doc = self.nlp(msg)
166
-
167
- # Extract key phrases around confusion indicators
168
- for sent in doc.sents:
169
- for token in sent:
170
- if token.text.lower() in self.confusion_indicators:
171
- # Get context window around confusion indicator
172
- context = sent.text
173
- patterns.append(context)
174
-
175
- # Group similar patterns
176
- if patterns:
177
- vectorizer = TfidfVectorizer(ngram_range=(1, 3))
178
- tfidf_matrix = vectorizer.fit_transform(patterns)
179
- similarity_matrix = cosine_similarity(tfidf_matrix)
180
-
181
- # Cluster similar patterns
182
- G = nx.Graph()
183
- for i in range(len(patterns)):
184
- for j in range(i + 1, len(patterns)):
185
- if similarity_matrix[i][j] > 0.5: # Similarity threshold
186
- G.add_edge(i, j)
187
-
188
- # Extract representative patterns from each cluster
189
- clusters = list(nx.connected_components(G))
190
- return [patterns[min(cluster)] for cluster in clusters]
191
-
192
- return []
193
-
194
- def analyze_question_patterns(self, df: pd.DataFrame, topic: str) -> List[str]:
195
- """Analyze patterns in student questions about the topic."""
196
- topic_questions = df[
197
- (df['prompt'].str.contains(topic, case=False)) &
198
- (df['is_question'])
199
- ]['prompt']
200
-
201
- question_types = defaultdict(list)
202
- for question in topic_questions:
203
- doc = self.nlp(question)
204
-
205
- # Categorize questions
206
- if any(token.text.lower() in {"what", "define", "explain"} for token in doc):
207
- question_types["conceptual"].append(question)
208
- elif any(token.text.lower() in {"how", "steps", "process"} for token in doc):
209
- question_types["procedural"].append(question)
210
- elif any(token.text.lower() in {"why", "reason", "because"} for token in doc):
211
- question_types["reasoning"].append(question)
212
- else:
213
- question_types["other"].append(question)
214
-
215
- # Extract patterns from each category
216
- patterns = []
217
- for category, questions in question_types.items():
218
- if questions:
219
- vectorizer = TfidfVectorizer(ngram_range=(1, 3))
220
- tfidf_matrix = vectorizer.fit_transform(questions)
221
-
222
- # Get most representative questions
223
- feature_array = np.mean(tfidf_matrix.toarray(), axis=0)
224
- tfidf_sorting = np.argsort(feature_array)[::-1]
225
- features = vectorizer.get_feature_names_out()
226
-
227
- patterns.append(f"{category}: {' '.join(features[tfidf_sorting[:3]])}")
228
-
229
- return patterns
230
-
231
- def analyze_time_distribution(self, df: pd.DataFrame, topic: str) -> Dict[str, float]:
232
- """Analyze time spent on different aspects of the topic."""
233
- topic_msgs = df[df['prompt'].str.contains(topic, case=False)].copy()
234
- if len(topic_msgs) < 2:
235
- return {}
236
-
237
- topic_msgs['time_diff'] = topic_msgs['timestamp'].diff()
238
-
239
- # Calculate time distribution
240
- distribution = {
241
- 'total_time': topic_msgs['time_diff'].sum().total_seconds() / 60,
242
- 'avg_time_per_message': topic_msgs['time_diff'].mean().total_seconds() / 60,
243
- 'max_time_gap': topic_msgs['time_diff'].max().total_seconds() / 60,
244
- 'time_spent_on_questions': topic_msgs[topic_msgs['is_question']]['time_diff'].sum().total_seconds() / 60,
245
- 'time_spent_on_confusion': topic_msgs[topic_msgs['shows_confusion']]['time_diff'].sum().total_seconds() / 60
246
- }
247
-
248
- return distribution
249
-
250
- def calculate_engagement_metrics(self, df: pd.DataFrame, topic: str) -> Dict[str, float]:
251
- """Calculate student engagement metrics for the topic."""
252
- topic_msgs = df[df['prompt'].str.contains(topic, case=False)]
253
-
254
- metrics = {
255
- 'message_count': len(topic_msgs),
256
- 'question_ratio': topic_msgs['is_question'].mean(),
257
- 'confusion_ratio': topic_msgs['shows_confusion'].mean(),
258
- 'follow_up_ratio': topic_msgs['is_followup'].mean(),
259
- 'avg_sentiment': topic_msgs['sentiment'].mean(),
260
- 'engagement_score': 0.0 # Will be calculated below
261
- }
262
-
263
- # Calculate engagement score
264
- metrics['engagement_score'] = (
265
- metrics['message_count'] * 0.3 +
266
- metrics['question_ratio'] * 0.25 +
267
- metrics['follow_up_ratio'] * 0.25 +
268
- (metrics['avg_sentiment'] + 1) / 2 * 0.2 # Normalize sentiment to 0-1
269
- )
270
-
271
- return metrics
272
-
273
- def generate_topic_insights(self, df: pd.DataFrame, topic: str) -> TopicInsights:
274
- """Generate comprehensive insights for a topic."""
275
- difficulty = self.analyze_topic_difficulty(df, topic)
276
- confusion_points = self.identify_confusion_patterns(df, topic)
277
- question_patterns = self.analyze_question_patterns(df, topic)
278
- time_distribution = self.analyze_time_distribution(df, topic)
279
- engagement_metrics = self.calculate_engagement_metrics(df, topic)
280
-
281
- # Generate recommended focus areas based on insights
282
- focus_areas = []
283
-
284
- if difficulty in (TopicDifficulty.DIFFICULT, TopicDifficulty.VERY_DIFFICULT):
285
- focus_areas.append("Fundamental concept reinforcement needed")
286
-
287
- if confusion_points:
288
- focus_areas.append(f"Address common confusion around: {', '.join(confusion_points[:3])}")
289
-
290
- if engagement_metrics['confusion_ratio'] > 0.3:
291
- focus_areas.append("Consider alternative teaching approaches")
292
-
293
- if time_distribution.get('time_spent_on_questions', 0) > time_distribution.get('total_time', 0) * 0.5:
294
- focus_areas.append("More practical examples or demonstrations needed")
295
-
296
- return TopicInsights(
297
- difficulty_level=difficulty,
298
- common_confusion_points=confusion_points,
299
- question_patterns=question_patterns,
300
- time_distribution=time_distribution,
301
- engagement_metrics=engagement_metrics,
302
- recommended_focus_areas=focus_areas
303
- )
304
-
305
- def analyze_student_progress(self, df: pd.DataFrame) -> Dict[str, Any]:
306
- """Analyze individual student progress and learning patterns."""
307
- student_progress = {}
308
-
309
- for student_id in df['user_id'].unique():
310
- student_msgs = df[df['user_id'] == student_id]
311
-
312
- # Calculate student-specific metrics
313
- progress = {
314
- 'total_messages': len(student_msgs),
315
- 'questions_asked': student_msgs['is_question'].sum(),
316
- 'confusion_instances': student_msgs['shows_confusion'].sum(),
317
- 'avg_sentiment': student_msgs['sentiment'].mean(),
318
- 'topic_engagement': {},
319
- 'learning_pattern': self._identify_learning_pattern(student_msgs)
320
- }
321
-
322
- # Analyze topic-specific engagement
323
- topics = self.extract_topic_hierarchies(student_msgs)
324
- for topic in topics:
325
- topic_msgs = student_msgs[student_msgs['prompt'].str.contains(topic, case=False)]
326
- progress['topic_engagement'][topic] = {
327
- 'message_count': len(topic_msgs),
328
- 'confusion_rate': topic_msgs['shows_confusion'].mean(),
329
- 'sentiment_trend': stats.linregress(
330
- range(len(topic_msgs)),
331
- topic_msgs['sentiment']
332
- ).slope
333
- }
334
-
335
- student_progress[student_id] = progress
336
-
337
- return student_progress
338
-
339
- def _identify_learning_pattern(self, student_msgs: pd.DataFrame) -> str:
340
- """Identify student's learning pattern based on their interaction style."""
341
- # Calculate key metrics
342
- question_ratio = student_msgs['is_question'].mean()
343
- confusion_ratio = student_msgs['shows_confusion'].mean()
344
- follow_up_ratio = student_msgs['is_followup'].mean()
345
- sentiment_trend = stats.linregress(
346
- range(len(student_msgs)),
347
- student_msgs['sentiment']
348
- ).slope
349
-
350
- # Identify pattern
351
- if question_ratio > 0.6:
352
- return "Inquisitive Learner"
353
- elif confusion_ratio > 0.4:
354
- return "Needs Additional Support"
355
- elif follow_up_ratio > 0.5:
356
- return "Deep Dive Learner"
357
- elif sentiment_trend > 0:
358
- return "Progressive Learner"
359
- else:
360
- return "Steady Learner"
361
-
362
- def generate_comprehensive_report(self, chat_history: List[Dict]) -> Dict[str, Any]:
363
- """Generate a comprehensive analytics report."""
364
- # Preprocess chat history
365
- df = self.preprocess_chat_history(chat_history)
366
-
367
- # Extract topics
368
- topics = self.extract_topic_hierarchies(df)
369
-
370
- report = {
371
- 'topics': {},
372
- 'student_progress': self.analyze_student_progress(df),
373
- 'overall_metrics': {
374
- 'total_conversations': len(df),
375
- 'unique_students': df['user_id'].nunique(),
376
- 'avg_sentiment': df['sentiment'].mean(),
377
- 'most_discussed_topics': Counter(
378
- topic for topics_list in topics.values()
379
- for topic in topics_list
380
- ).most_common(5)
381
- }
382
- }
383
-
384
- # Generate topic-specific insights
385
- for main_topic, subtopics in topics.items():
386
- subtopic_insights = {}
387
- for subtopic in subtopics:
388
- subtopic_insights[subtopic] = {
389
- 'insights': self.generate_topic_insights(df, subtopic),
390
- 'related_topics': [t for t in subtopics if t != subtopic],
391
- 'student_engagement': {
392
- student_id: self.calculate_engagement_metrics(
393
- df[df['user_id'] == student_id],
394
- subtopic
395
- )
396
- for student_id in df['user_id'].unique()
397
- }
398
- }
399
-
400
- report['topics'][main_topic] = {
401
- 'insights': self.generate_topic_insights(df, main_topic),
402
- 'subtopics': subtopic_insights,
403
- 'topic_relationships': {
404
- 'hierarchy_depth': len(subtopics),
405
- 'connection_strength': self._calculate_topic_connections(df, main_topic, subtopics),
406
- 'progression_path': self._identify_topic_progression(df, main_topic, subtopics)
407
- }
408
- }
409
-
410
- # Add temporal analysis
411
- report['temporal_analysis'] = {
412
- 'daily_engagement': df.groupby(df['timestamp'].dt.date).agg({
413
- 'user_id': 'count',
414
- 'is_question': 'sum',
415
- 'shows_confusion': 'sum',
416
- 'sentiment': 'mean'
417
- }).to_dict(),
418
- 'peak_activity_hours': df.groupby(df['timestamp'].dt.hour)['user_id'].count().nlargest(3).to_dict(),
419
- 'learning_trends': self._analyze_learning_trends(df)
420
- }
421
-
422
- # Add recommendations
423
- report['recommendations'] = self._generate_recommendations(report)
424
-
425
- return report
426
-
427
- def _calculate_topic_connections(self, df: pd.DataFrame, main_topic: str, subtopics: List[str]) -> Dict[str, float]:
428
- """Calculate connection strength between topics based on co-occurrence."""
429
- connections = {}
430
- main_topic_msgs = df[df['prompt'].str.contains(main_topic, case=False)]
431
-
432
- for subtopic in subtopics:
433
- cooccurrence = df[
434
- df['prompt'].str.contains(main_topic, case=False) &
435
- df['prompt'].str.contains(subtopic, case=False)
436
- ].shape[0]
437
-
438
- connection_strength = cooccurrence / len(main_topic_msgs) if len(main_topic_msgs) > 0 else 0
439
- connections[subtopic] = connection_strength
440
-
441
- return connections
442
-
443
- def _identify_topic_progression(self, df: pd.DataFrame, main_topic: str, subtopics: List[str]) -> List[str]:
444
- """Identify optimal topic progression path based on student interactions."""
445
- topic_difficulties = {}
446
-
447
- for subtopic in subtopics:
448
- difficulty = self.analyze_topic_difficulty(df, subtopic)
449
- topic_difficulties[subtopic] = difficulty.value
450
-
451
- # Sort subtopics by difficulty
452
- return sorted(subtopics, key=lambda x: topic_difficulties[x])
453
-
454
- def _analyze_learning_trends(self, df: pd.DataFrame) -> Dict[str, Any]:
455
- """Analyze overall learning trends across the dataset."""
456
- return {
457
- 'sentiment_trend': stats.linregress(
458
- range(len(df)),
459
- df['sentiment']
460
- )._asdict(),
461
- 'confusion_trend': stats.linregress(
462
- range(len(df)),
463
- df['shows_confusion']
464
- )._asdict(),
465
- 'engagement_progression': self._calculate_engagement_progression(df)
466
- }
467
-
468
- def _calculate_engagement_progression(self, df: pd.DataFrame) -> Dict[str, float]:
469
- """Calculate how student engagement changes over time."""
470
- df['week'] = df['timestamp'].dt.isocalendar().week
471
- weekly_engagement = df.groupby('week').agg({
472
- 'is_question': 'mean',
473
- 'shows_confusion': 'mean',
474
- 'is_followup': 'mean',
475
- 'sentiment': 'mean'
476
- })
477
-
478
- return {
479
- 'question_trend': stats.linregress(
480
- range(len(weekly_engagement)),
481
- weekly_engagement['is_question']
482
- ).slope,
483
- 'confusion_trend': stats.linregress(
484
- range(len(weekly_engagement)),
485
- weekly_engagement['shows_confusion']
486
- ).slope,
487
- 'follow_up_trend': stats.linregress(
488
- range(len(weekly_engagement)),
489
- weekly_engagement['is_followup']
490
- ).slope,
491
- 'sentiment_trend': stats.linregress(
492
- range(len(weekly_engagement)),
493
- weekly_engagement['sentiment']
494
- ).slope
495
- }
496
-
497
- def _generate_recommendations(self, report: Dict[str, Any]) -> List[str]:
498
- """Generate actionable recommendations based on the analysis."""
499
- recommendations = []
500
-
501
- # Analyze difficulty distribution
502
- difficult_topics = [
503
- topic for topic, data in report['topics'].items()
504
- if data['insights'].difficulty_level in
505
- (TopicDifficulty.DIFFICULT, TopicDifficulty.VERY_DIFFICULT)
506
- ]
507
-
508
- if difficult_topics:
509
- recommendations.append(
510
- f"Consider providing additional resources for challenging topics: {', '.join(difficult_topics)}"
511
- )
512
-
513
- # Analyze student engagement
514
- avg_engagement = np.mean([
515
- progress['questions_asked'] / progress['total_messages']
516
- for progress in report['student_progress'].values()
517
- ])
518
-
519
- if avg_engagement < 0.3:
520
- recommendations.append(
521
- "Implement more interactive elements to increase student engagement"
522
- )
523
-
524
- # Analyze temporal patterns
525
- peak_hours = list(report['temporal_analysis']['peak_activity_hours'].keys())
526
- recommendations.append(
527
- f"Consider scheduling additional support during peak activity hours: {peak_hours}"
528
- )
529
-
530
- # Analyze learning trends
531
- # sentiment_trend = report['temporal_analysis']['learning_trends']['sentiment_trend']
532
- # if sentiment_trend < 0:
533
- # recommendations.append(
534
- # "Review teaching approach to address declining student satisfaction"
535
- # )
536
- # Analyze learning trends
537
- # Analyze learning trends
538
- sentiment_trend = report.get('temporal_analysis', {}).get('learning_trends', {}).get('sentiment_trend', None)
539
- if isinstance(sentiment_trend, (int, float)):
540
- if sentiment_trend < 0:
541
- recommendations.append(
542
- "Review teaching approach to address declining student satisfaction"
543
- )
544
- elif isinstance(sentiment_trend, dict):
545
- # Handle the case where sentiment_trend is a dictionary
546
- print(f"Unexpected dict format for sentiment_trend: {sentiment_trend}")
547
- else:
548
- print(f"Unexpected type for sentiment_trend: {type(sentiment_trend)}")
549
-
550
- return recommendations
551
-
552
- class CustomJSONEncoder(json.JSONEncoder):
553
- def default(self, obj):
554
- if isinstance(obj, TopicDifficulty):
555
- return obj.value
556
- if isinstance(obj, TopicInsights):
557
- return obj.to_dict()
558
- if isinstance(obj, np.integer):
559
- return int(obj)
560
- if isinstance(obj, np.floating):
561
- return float(obj)
562
- if isinstance(obj, np.ndarray):
563
- return obj.tolist()
564
- if isinstance(obj, datetime):
565
- return obj.isoformat()
566
- return super().default(obj)
567
-
568
- def convert_insights_to_dict(report):
569
- for main_topic, data in report['topics'].items():
570
- if isinstance(data['insights'], TopicInsights):
571
- data['insights'] = data['insights'].to_dict()
572
- for subtopic, subdata in data['subtopics'].items():
573
- if isinstance(subdata['insights'], TopicInsights):
574
- subdata['insights'] = subdata['insights'].to_dict()
575
-
576
- if __name__ == "__main__":
577
- # Load chat history data
578
- chat_history = None
579
- with open('sample_files/chat_history_corpus.json', 'r', encoding="utf-8") as file:
580
- chat_history = json.load(file)
581
-
582
- # Initialize analytics system
583
- analytics = PreClassAnalytics()
584
-
585
- # Generate comprehensive report
586
- report = analytics.generate_comprehensive_report(chat_history)
587
-
588
- # Convert insights to dictionary
589
- # convert_insights_to_dict(report)
590
-
591
- print(json.dumps(report, indent=4, cls=CustomJSONEncoder))
592
  # print(report)
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from datetime import datetime
4
+ from typing import List, Dict, Any, Tuple
5
+ import spacy
6
+ from collections import Counter, defaultdict
7
+ from sklearn.feature_extraction.text import TfidfVectorizer
8
+ from sklearn.metrics.pairwise import cosine_similarity
9
+ from textblob import TextBlob
10
+ import networkx as nx
11
+ from scipy import stats
12
+ import logging
13
+ import json
14
+ from dataclasses import dataclass
15
+ from enum import Enum
16
+
17
+ # Configure logging
18
+ logging.basicConfig(level=logging.INFO)
19
+ logger = logging.getLogger(__name__)
20
+
21
+ class TopicDifficulty(Enum):
22
+ EASY = "easy"
23
+ MODERATE = "moderate"
24
+ DIFFICULT = "difficult"
25
+ VERY_DIFFICULT = "very_difficult"
26
+
27
+
28
+ @dataclass
29
+ class QuestionMetrics:
30
+ complexity_score: float
31
+ follow_up_count: int
32
+ clarification_count: int
33
+ time_spent: float
34
+ sentiment_score: float
35
+
36
+ @dataclass
37
+ class TopicInsights:
38
+ difficulty_level: TopicDifficulty
39
+ common_confusion_points: List[str]
40
+ question_patterns: List[str]
41
+ time_distribution: Dict[str, float]
42
+ engagement_metrics: Dict[str, float]
43
+ recommended_focus_areas: List[str]
44
+
45
+ def to_dict(self):
46
+ return {
47
+ "difficulty_level": self.difficulty_level.value, # Convert enum to its value
48
+ "common_confusion_points": self.common_confusion_points,
49
+ "question_patterns": self.question_patterns,
50
+ "time_distribution": {str(k): v for k, v in self.time_distribution.items()},
51
+ "engagement_metrics": self.engagement_metrics,
52
+ "recommended_focus_areas": self.recommended_focus_areas,
53
+ }
54
+
55
+ class PreClassAnalytics:
56
+ def __init__(self, nlp_model: str = "en_core_web_lg"):
57
+ """Initialize the analytics system with necessary components."""
58
+ self.nlp = spacy.load(nlp_model)
59
+ self.question_indicators = {
60
+ "what", "why", "how", "when", "where", "which", "who",
61
+ "whose", "whom", "can", "could", "would", "will", "explain"
62
+ }
63
+ self.confusion_indicators = {
64
+ "confused", "don't understand", "unclear", "not clear",
65
+ "stuck", "difficult", "hard", "help", "explain again"
66
+ }
67
+ self.follow_up_indicators = {
68
+ "also", "another", "additionally", "furthermore", "moreover",
69
+ "besides", "related", "similarly", "again"
70
+ }
71
+
72
+ def preprocess_chat_history(self, chat_history: List[Dict]) -> pd.DataFrame:
73
+ """Convert chat history to DataFrame with enhanced features."""
74
+ messages = []
75
+ for chat in chat_history:
76
+ user_id = chat['user_id']['$oid']
77
+ for msg in chat['messages']:
78
+ try:
79
+ # Ensure the timestamp is in the correct format
80
+ if isinstance(msg['timestamp'], dict) and '$date' in msg['timestamp']:
81
+ timestamp = pd.to_datetime(msg['timestamp']['$date'])
82
+ elif isinstance(msg['timestamp'], str):
83
+ timestamp = pd.to_datetime(msg['timestamp'])
84
+ else:
85
+ raise ValueError("Invalid timestamp format")
86
+ except Exception as e:
87
+ print(f"Error parsing timestamp: {msg['timestamp']}, error: {e}")
88
+ timestamp = pd.NaT # Use NaT (Not a Time) for invalid timestamps
89
+
90
+ messages.append({
91
+ 'user_id': user_id,
92
+ 'timestamp': timestamp,
93
+ 'prompt': msg['prompt'],
94
+ 'response': msg['response'],
95
+ 'is_question': any(q in msg['prompt'].lower() for q in self.question_indicators),
96
+ 'shows_confusion': any(c in msg['prompt'].lower() for c in self.confusion_indicators),
97
+ 'is_followup': any(f in msg['prompt'].lower() for f in self.follow_up_indicators)
98
+ })
99
+
100
+ df = pd.DataFrame(messages)
101
+ df['sentiment'] = df['prompt'].apply(lambda x: TextBlob(x).sentiment.polarity)
102
+ return df
103
+
104
+ def extract_topic_hierarchies(self, df: pd.DataFrame) -> Dict[str, List[str]]:
105
+ """Extract hierarchical topic relationships from conversations."""
106
+ topic_hierarchy = defaultdict(list)
107
+
108
+ for _, row in df.iterrows():
109
+ doc = self.nlp(row['prompt'])
110
+
111
+ # Extract main topics and subtopics using noun chunks and dependencies
112
+ main_topics = []
113
+ subtopics = []
114
+
115
+ for chunk in doc.noun_chunks:
116
+ if chunk.root.dep_ in ('nsubj', 'dobj'):
117
+ main_topics.append(chunk.text.lower())
118
+ else:
119
+ subtopics.append(chunk.text.lower())
120
+
121
+ # Build hierarchy
122
+ for main_topic in main_topics:
123
+ topic_hierarchy[main_topic].extend(subtopics)
124
+
125
+ # Clean and deduplicate
126
+ return {k: list(set(v)) for k, v in topic_hierarchy.items()}
127
+
128
+ def analyze_topic_difficulty(self, df: pd.DataFrame, topic: str) -> TopicDifficulty:
129
+ """Determine topic difficulty based on various metrics."""
130
+ topic_msgs = df[df['prompt'].str.contains(topic, case=False)]
131
+
132
+ # Calculate difficulty indicators
133
+ confusion_rate = topic_msgs['shows_confusion'].mean()
134
+ question_rate = topic_msgs['is_question'].mean()
135
+ follow_up_rate = topic_msgs['is_followup'].mean()
136
+ avg_sentiment = topic_msgs['sentiment'].mean()
137
+
138
+ # Calculate composite difficulty score
139
+ difficulty_score = (
140
+ confusion_rate * 0.4 +
141
+ question_rate * 0.3 +
142
+ follow_up_rate * 0.2 +
143
+ (1 - (avg_sentiment + 1) / 2) * 0.1
144
+ )
145
+
146
+ # Map score to difficulty level
147
+ if difficulty_score < 0.3:
148
+ return TopicDifficulty.EASY
149
+ elif difficulty_score < 0.5:
150
+ return TopicDifficulty.MODERATE
151
+ elif difficulty_score < 0.7:
152
+ return TopicDifficulty.DIFFICULT
153
+ else:
154
+ return TopicDifficulty.VERY_DIFFICULT
155
+
156
+ def identify_confusion_patterns(self, df: pd.DataFrame, topic: str) -> List[str]:
157
+ """Identify common patterns in student confusion."""
158
+ confused_msgs = df[
159
+ (df['prompt'].str.contains(topic, case=False)) &
160
+ (df['shows_confusion'])
161
+ ]['prompt']
162
+
163
+ patterns = []
164
+ for msg in confused_msgs:
165
+ doc = self.nlp(msg)
166
+
167
+ # Extract key phrases around confusion indicators
168
+ for sent in doc.sents:
169
+ for token in sent:
170
+ if token.text.lower() in self.confusion_indicators:
171
+ # Get context window around confusion indicator
172
+ context = sent.text
173
+ patterns.append(context)
174
+
175
+ # Group similar patterns
176
+ if patterns:
177
+ vectorizer = TfidfVectorizer(ngram_range=(1, 3))
178
+ tfidf_matrix = vectorizer.fit_transform(patterns)
179
+ similarity_matrix = cosine_similarity(tfidf_matrix)
180
+
181
+ # Cluster similar patterns
182
+ G = nx.Graph()
183
+ for i in range(len(patterns)):
184
+ for j in range(i + 1, len(patterns)):
185
+ if similarity_matrix[i][j] > 0.5: # Similarity threshold
186
+ G.add_edge(i, j)
187
+
188
+ # Extract representative patterns from each cluster
189
+ clusters = list(nx.connected_components(G))
190
+ return [patterns[min(cluster)] for cluster in clusters]
191
+
192
+ return []
193
+
194
+ def analyze_question_patterns(self, df: pd.DataFrame, topic: str) -> List[str]:
195
+ """Analyze patterns in student questions about the topic."""
196
+ topic_questions = df[
197
+ (df['prompt'].str.contains(topic, case=False)) &
198
+ (df['is_question'])
199
+ ]['prompt']
200
+
201
+ question_types = defaultdict(list)
202
+ for question in topic_questions:
203
+ doc = self.nlp(question)
204
+
205
+ # Categorize questions
206
+ if any(token.text.lower() in {"what", "define", "explain"} for token in doc):
207
+ question_types["conceptual"].append(question)
208
+ elif any(token.text.lower() in {"how", "steps", "process"} for token in doc):
209
+ question_types["procedural"].append(question)
210
+ elif any(token.text.lower() in {"why", "reason", "because"} for token in doc):
211
+ question_types["reasoning"].append(question)
212
+ else:
213
+ question_types["other"].append(question)
214
+
215
+ # Extract patterns from each category
216
+ patterns = []
217
+ for category, questions in question_types.items():
218
+ if questions:
219
+ vectorizer = TfidfVectorizer(ngram_range=(1, 3))
220
+ tfidf_matrix = vectorizer.fit_transform(questions)
221
+
222
+ # Get most representative questions
223
+ feature_array = np.mean(tfidf_matrix.toarray(), axis=0)
224
+ tfidf_sorting = np.argsort(feature_array)[::-1]
225
+ features = vectorizer.get_feature_names_out()
226
+
227
+ patterns.append(f"{category}: {' '.join(features[tfidf_sorting[:3]])}")
228
+
229
+ return patterns
230
+
231
+ def analyze_time_distribution(self, df: pd.DataFrame, topic: str) -> Dict[str, float]:
232
+ """Analyze time spent on different aspects of the topic."""
233
+ topic_msgs = df[df['prompt'].str.contains(topic, case=False)].copy()
234
+ if len(topic_msgs) < 2:
235
+ return {}
236
+
237
+ topic_msgs['time_diff'] = topic_msgs['timestamp'].diff()
238
+
239
+ # Calculate time distribution
240
+ distribution = {
241
+ 'total_time': topic_msgs['time_diff'].sum().total_seconds() / 60,
242
+ 'avg_time_per_message': topic_msgs['time_diff'].mean().total_seconds() / 60,
243
+ 'max_time_gap': topic_msgs['time_diff'].max().total_seconds() / 60,
244
+ 'time_spent_on_questions': topic_msgs[topic_msgs['is_question']]['time_diff'].sum().total_seconds() / 60,
245
+ 'time_spent_on_confusion': topic_msgs[topic_msgs['shows_confusion']]['time_diff'].sum().total_seconds() / 60
246
+ }
247
+
248
+ return distribution
249
+
250
+ def calculate_engagement_metrics(self, df: pd.DataFrame, topic: str) -> Dict[str, float]:
251
+ """Calculate student engagement metrics for the topic."""
252
+ topic_msgs = df[df['prompt'].str.contains(topic, case=False)]
253
+
254
+ metrics = {
255
+ 'message_count': len(topic_msgs),
256
+ 'question_ratio': topic_msgs['is_question'].mean(),
257
+ 'confusion_ratio': topic_msgs['shows_confusion'].mean(),
258
+ 'follow_up_ratio': topic_msgs['is_followup'].mean(),
259
+ 'avg_sentiment': topic_msgs['sentiment'].mean(),
260
+ 'engagement_score': 0.0 # Will be calculated below
261
+ }
262
+
263
+ # Calculate engagement score
264
+ metrics['engagement_score'] = (
265
+ metrics['message_count'] * 0.3 +
266
+ metrics['question_ratio'] * 0.25 +
267
+ metrics['follow_up_ratio'] * 0.25 +
268
+ (metrics['avg_sentiment'] + 1) / 2 * 0.2 # Normalize sentiment to 0-1
269
+ )
270
+
271
+ return metrics
272
+
273
+ def generate_topic_insights(self, df: pd.DataFrame, topic: str) -> TopicInsights:
274
+ """Generate comprehensive insights for a topic."""
275
+ difficulty = self.analyze_topic_difficulty(df, topic)
276
+ confusion_points = self.identify_confusion_patterns(df, topic)
277
+ question_patterns = self.analyze_question_patterns(df, topic)
278
+ time_distribution = self.analyze_time_distribution(df, topic)
279
+ engagement_metrics = self.calculate_engagement_metrics(df, topic)
280
+
281
+ # Generate recommended focus areas based on insights
282
+ focus_areas = []
283
+
284
+ if difficulty in (TopicDifficulty.DIFFICULT, TopicDifficulty.VERY_DIFFICULT):
285
+ focus_areas.append("Fundamental concept reinforcement needed")
286
+
287
+ if confusion_points:
288
+ focus_areas.append(f"Address common confusion around: {', '.join(confusion_points[:3])}")
289
+
290
+ if engagement_metrics['confusion_ratio'] > 0.3:
291
+ focus_areas.append("Consider alternative teaching approaches")
292
+
293
+ if time_distribution.get('time_spent_on_questions', 0) > time_distribution.get('total_time', 0) * 0.5:
294
+ focus_areas.append("More practical examples or demonstrations needed")
295
+
296
+ return TopicInsights(
297
+ difficulty_level=difficulty,
298
+ common_confusion_points=confusion_points,
299
+ question_patterns=question_patterns,
300
+ time_distribution=time_distribution,
301
+ engagement_metrics=engagement_metrics,
302
+ recommended_focus_areas=focus_areas
303
+ )
304
+
305
+ def analyze_student_progress(self, df: pd.DataFrame) -> Dict[str, Any]:
306
+ """Analyze individual student progress and learning patterns."""
307
+ student_progress = {}
308
+
309
+ for student_id in df['user_id'].unique():
310
+ student_msgs = df[df['user_id'] == student_id]
311
+
312
+ # Calculate student-specific metrics
313
+ progress = {
314
+ 'total_messages': len(student_msgs),
315
+ 'questions_asked': student_msgs['is_question'].sum(),
316
+ 'confusion_instances': student_msgs['shows_confusion'].sum(),
317
+ 'avg_sentiment': student_msgs['sentiment'].mean(),
318
+ 'topic_engagement': {},
319
+ 'learning_pattern': self._identify_learning_pattern(student_msgs)
320
+ }
321
+
322
+ # Analyze topic-specific engagement
323
+ topics = self.extract_topic_hierarchies(student_msgs)
324
+ for topic in topics:
325
+ topic_msgs = student_msgs[student_msgs['prompt'].str.contains(topic, case=False)]
326
+ progress['topic_engagement'][topic] = {
327
+ 'message_count': len(topic_msgs),
328
+ 'confusion_rate': topic_msgs['shows_confusion'].mean(),
329
+ 'sentiment_trend': stats.linregress(
330
+ range(len(topic_msgs)),
331
+ topic_msgs['sentiment']
332
+ ).slope
333
+ }
334
+
335
+ student_progress[student_id] = progress
336
+
337
+ return student_progress
338
+
339
+ def _identify_learning_pattern(self, student_msgs: pd.DataFrame) -> str:
340
+ """Identify student's learning pattern based on their interaction style."""
341
+ # Calculate key metrics
342
+ question_ratio = student_msgs['is_question'].mean()
343
+ confusion_ratio = student_msgs['shows_confusion'].mean()
344
+ follow_up_ratio = student_msgs['is_followup'].mean()
345
+ sentiment_trend = stats.linregress(
346
+ range(len(student_msgs)),
347
+ student_msgs['sentiment']
348
+ ).slope
349
+
350
+ # Identify pattern
351
+ if question_ratio > 0.6:
352
+ return "Inquisitive Learner"
353
+ elif confusion_ratio > 0.4:
354
+ return "Needs Additional Support"
355
+ elif follow_up_ratio > 0.5:
356
+ return "Deep Dive Learner"
357
+ elif sentiment_trend > 0:
358
+ return "Progressive Learner"
359
+ else:
360
+ return "Steady Learner"
361
+
362
+ def generate_comprehensive_report(self, chat_history: List[Dict]) -> Dict[str, Any]:
363
+ """Generate a comprehensive analytics report."""
364
+ # Preprocess chat history
365
+ df = self.preprocess_chat_history(chat_history)
366
+
367
+ # Extract topics
368
+ topics = self.extract_topic_hierarchies(df)
369
+
370
+ report = {
371
+ 'topics': {},
372
+ 'student_progress': self.analyze_student_progress(df),
373
+ 'overall_metrics': {
374
+ 'total_conversations': len(df),
375
+ 'unique_students': df['user_id'].nunique(),
376
+ 'avg_sentiment': df['sentiment'].mean(),
377
+ 'most_discussed_topics': Counter(
378
+ topic for topics_list in topics.values()
379
+ for topic in topics_list
380
+ ).most_common(5)
381
+ }
382
+ }
383
+
384
+ # Generate topic-specific insights
385
+ for main_topic, subtopics in topics.items():
386
+ subtopic_insights = {}
387
+ for subtopic in subtopics:
388
+ subtopic_insights[subtopic] = {
389
+ 'insights': self.generate_topic_insights(df, subtopic),
390
+ 'related_topics': [t for t in subtopics if t != subtopic],
391
+ 'student_engagement': {
392
+ student_id: self.calculate_engagement_metrics(
393
+ df[df['user_id'] == student_id],
394
+ subtopic
395
+ )
396
+ for student_id in df['user_id'].unique()
397
+ }
398
+ }
399
+
400
+ report['topics'][main_topic] = {
401
+ 'insights': self.generate_topic_insights(df, main_topic),
402
+ 'subtopics': subtopic_insights,
403
+ 'topic_relationships': {
404
+ 'hierarchy_depth': len(subtopics),
405
+ 'connection_strength': self._calculate_topic_connections(df, main_topic, subtopics),
406
+ 'progression_path': self._identify_topic_progression(df, main_topic, subtopics)
407
+ }
408
+ }
409
+
410
+ # Add temporal analysis
411
+ report['temporal_analysis'] = {
412
+ 'daily_engagement': df.groupby(df['timestamp'].dt.date).agg({
413
+ 'user_id': 'count',
414
+ 'is_question': 'sum',
415
+ 'shows_confusion': 'sum',
416
+ 'sentiment': 'mean'
417
+ }).to_dict(),
418
+ 'peak_activity_hours': df.groupby(df['timestamp'].dt.hour)['user_id'].count().nlargest(3).to_dict(),
419
+ 'learning_trends': self._analyze_learning_trends(df)
420
+ }
421
+
422
+ # Add recommendations
423
+ report['recommendations'] = self._generate_recommendations(report)
424
+
425
+ return report
426
+
427
+ def _calculate_topic_connections(self, df: pd.DataFrame, main_topic: str, subtopics: List[str]) -> Dict[str, float]:
428
+ """Calculate connection strength between topics based on co-occurrence."""
429
+ connections = {}
430
+ main_topic_msgs = df[df['prompt'].str.contains(main_topic, case=False)]
431
+
432
+ for subtopic in subtopics:
433
+ cooccurrence = df[
434
+ df['prompt'].str.contains(main_topic, case=False) &
435
+ df['prompt'].str.contains(subtopic, case=False)
436
+ ].shape[0]
437
+
438
+ connection_strength = cooccurrence / len(main_topic_msgs) if len(main_topic_msgs) > 0 else 0
439
+ connections[subtopic] = connection_strength
440
+
441
+ return connections
442
+
443
+ def _identify_topic_progression(self, df: pd.DataFrame, main_topic: str, subtopics: List[str]) -> List[str]:
444
+ """Identify optimal topic progression path based on student interactions."""
445
+ topic_difficulties = {}
446
+
447
+ for subtopic in subtopics:
448
+ difficulty = self.analyze_topic_difficulty(df, subtopic)
449
+ topic_difficulties[subtopic] = difficulty.value
450
+
451
+ # Sort subtopics by difficulty
452
+ return sorted(subtopics, key=lambda x: topic_difficulties[x])
453
+
454
+ def _analyze_learning_trends(self, df: pd.DataFrame) -> Dict[str, Any]:
455
+ """Analyze overall learning trends across the dataset."""
456
+ return {
457
+ 'sentiment_trend': stats.linregress(
458
+ range(len(df)),
459
+ df['sentiment']
460
+ )._asdict(),
461
+ 'confusion_trend': stats.linregress(
462
+ range(len(df)),
463
+ df['shows_confusion']
464
+ )._asdict(),
465
+ 'engagement_progression': self._calculate_engagement_progression(df)
466
+ }
467
+
468
+ def _calculate_engagement_progression(self, df: pd.DataFrame) -> Dict[str, float]:
469
+ """Calculate how student engagement changes over time."""
470
+ df['week'] = df['timestamp'].dt.isocalendar().week
471
+ weekly_engagement = df.groupby('week').agg({
472
+ 'is_question': 'mean',
473
+ 'shows_confusion': 'mean',
474
+ 'is_followup': 'mean',
475
+ 'sentiment': 'mean'
476
+ })
477
+
478
+ return {
479
+ 'question_trend': stats.linregress(
480
+ range(len(weekly_engagement)),
481
+ weekly_engagement['is_question']
482
+ ).slope,
483
+ 'confusion_trend': stats.linregress(
484
+ range(len(weekly_engagement)),
485
+ weekly_engagement['shows_confusion']
486
+ ).slope,
487
+ 'follow_up_trend': stats.linregress(
488
+ range(len(weekly_engagement)),
489
+ weekly_engagement['is_followup']
490
+ ).slope,
491
+ 'sentiment_trend': stats.linregress(
492
+ range(len(weekly_engagement)),
493
+ weekly_engagement['sentiment']
494
+ ).slope
495
+ }
496
+
497
+ def _generate_recommendations(self, report: Dict[str, Any]) -> List[str]:
498
+ """Generate actionable recommendations based on the analysis."""
499
+ recommendations = []
500
+
501
+ # Analyze difficulty distribution
502
+ difficult_topics = [
503
+ topic for topic, data in report['topics'].items()
504
+ if data['insights'].difficulty_level in
505
+ (TopicDifficulty.DIFFICULT, TopicDifficulty.VERY_DIFFICULT)
506
+ ]
507
+
508
+ if difficult_topics:
509
+ recommendations.append(
510
+ f"Consider providing additional resources for challenging topics: {', '.join(difficult_topics)}"
511
+ )
512
+
513
+ # Analyze student engagement
514
+ avg_engagement = np.mean([
515
+ progress['questions_asked'] / progress['total_messages']
516
+ for progress in report['student_progress'].values()
517
+ ])
518
+
519
+ if avg_engagement < 0.3:
520
+ recommendations.append(
521
+ "Implement more interactive elements to increase student engagement"
522
+ )
523
+
524
+ # Analyze temporal patterns
525
+ peak_hours = list(report['temporal_analysis']['peak_activity_hours'].keys())
526
+ recommendations.append(
527
+ f"Consider scheduling additional support during peak activity hours: {peak_hours}"
528
+ )
529
+
530
+ # Analyze learning trends
531
+ # sentiment_trend = report['temporal_analysis']['learning_trends']['sentiment_trend']
532
+ # if sentiment_trend < 0:
533
+ # recommendations.append(
534
+ # "Review teaching approach to address declining student satisfaction"
535
+ # )
536
+ # Analyze learning trends
537
+ # Analyze learning trends
538
+ sentiment_trend = report.get('temporal_analysis', {}).get('learning_trends', {}).get('sentiment_trend', None)
539
+ if isinstance(sentiment_trend, (int, float)):
540
+ if sentiment_trend < 0:
541
+ recommendations.append(
542
+ "Review teaching approach to address declining student satisfaction"
543
+ )
544
+ elif isinstance(sentiment_trend, dict):
545
+ # Handle the case where sentiment_trend is a dictionary
546
+ print(f"Unexpected dict format for sentiment_trend: {sentiment_trend}")
547
+ else:
548
+ print(f"Unexpected type for sentiment_trend: {type(sentiment_trend)}")
549
+
550
+ return recommendations
551
+
552
+ class CustomJSONEncoder(json.JSONEncoder):
553
+ def default(self, obj):
554
+ if isinstance(obj, TopicDifficulty):
555
+ return obj.value
556
+ if isinstance(obj, TopicInsights):
557
+ return obj.to_dict()
558
+ if isinstance(obj, np.integer):
559
+ return int(obj)
560
+ if isinstance(obj, np.floating):
561
+ return float(obj)
562
+ if isinstance(obj, np.ndarray):
563
+ return obj.tolist()
564
+ if isinstance(obj, datetime):
565
+ return obj.isoformat()
566
+ return super().default(obj)
567
+
568
+ def convert_insights_to_dict(report):
569
+ for main_topic, data in report['topics'].items():
570
+ if isinstance(data['insights'], TopicInsights):
571
+ data['insights'] = data['insights'].to_dict()
572
+ for subtopic, subdata in data['subtopics'].items():
573
+ if isinstance(subdata['insights'], TopicInsights):
574
+ subdata['insights'] = subdata['insights'].to_dict()
575
+
576
+ if __name__ == "__main__":
577
+ # Load chat history data
578
+ chat_history = None
579
+ with open('sample_files/chat_history_corpus.json', 'r', encoding="utf-8") as file:
580
+ chat_history = json.load(file)
581
+
582
+ # Initialize analytics system
583
+ analytics = PreClassAnalytics()
584
+
585
+ # Generate comprehensive report
586
+ report = analytics.generate_comprehensive_report(chat_history)
587
+
588
+ # Convert insights to dictionary
589
+ # convert_insights_to_dict(report)
590
+
591
+ print(json.dumps(report, indent=4, cls=CustomJSONEncoder))
592
  # print(report)
requirements.txt CHANGED
@@ -1,31 +1,36 @@
1
- streamlit
2
- pymongo
3
- PyPDF2
4
- python-docx
5
- openai
6
- google-generativeai
7
- llama-index
8
- werkzeug
9
- numpy
10
- pandas
11
- plotly
12
- scikit-learn
13
- networkx
14
- community
15
- umap-learn
16
- seaborn
17
- matplotlib
18
- scipy
19
- Pillow
20
- python-dotenv
21
- zoomus
22
- asyncio
23
- google-auth-oauthlib
24
- google-auth
25
- transformers
26
- textstat
27
- spacy
28
- streamlit_option_menu
29
- beautifulsoup4
30
- youtube-transcript-api
31
- requests
 
 
 
 
 
 
1
+ streamlit
2
+ pymongo
3
+ PyPDF2
4
+ python-docx
5
+ openai
6
+ google-generativeai
7
+ llama-index
8
+ werkzeug
9
+ numpy
10
+ pandas
11
+ plotly
12
+ scikit-learn
13
+ networkx
14
+ community
15
+ umap-learn
16
+ seaborn
17
+ matplotlib
18
+ scipy
19
+ Pillow
20
+ python-dotenv
21
+ zoomus
22
+ asyncio
23
+ google-auth-oauthlib
24
+ google-auth
25
+ transformers
26
+ textstat
27
+ spacy
28
+ streamlit_option_menu
29
+ beautifulsoup4
30
+ youtube-transcript-api
31
+ requests
32
+ xml==0.0.1
33
+ networkx==3.1
34
+ bokeh==3.2.1
35
+ scikit-learn==1.2.2
36
+ langchain==0.0.208
research22.py ADDED
@@ -0,0 +1,517 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # if __name__ == "__main__":
2
+ # main()
3
+ import streamlit as st
4
+ import google.generativeai as genai
5
+ from typing import Dict, Any
6
+ import PyPDF2
7
+ import io
8
+ from pymongo import MongoClient
9
+ from dotenv import load_dotenv
10
+ import os
11
+ import json
12
+ import re
13
+
14
+ # --------------------------------------------------------------------------------
15
+ # 1. Environment Setup
16
+ # --------------------------------------------------------------------------------
17
+ load_dotenv()
18
+ # MongoDB
19
+ MONGODB_URI = os.getenv(
20
+ "MONGODB_UR",
21
+ "mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
22
+ )
23
+ # Gemini
24
+ GEMINI_KEY = os.getenv("GEMINI_KEY", "AIzaSyCFIvntck54HOCS5pxxiy9wpr5HJN3r02I")
25
+
26
+ # Configure Gemini
27
+ genai.configure(api_key=GEMINI_KEY)
28
+
29
+
30
+ # --------------------------------------------------------------------------------
31
+ # 2. Database Connection
32
+ # --------------------------------------------------------------------------------
33
+ def create_db_connection():
34
+ """
35
+ Create MongoDB connection and return the 'papers' collection.
36
+ """
37
+ try:
38
+ client = MongoClient(MONGODB_URI)
39
+ db = client["novascholar_db"] # Database name
40
+ collection = db["research_papers"] # Collection name
41
+ # Ping to confirm connection
42
+ client.admin.command("ping")
43
+ return db
44
+ except Exception as e:
45
+ st.error(f"Database connection error: {str(e)}")
46
+ return None
47
+
48
+
49
+ # --------------------------------------------------------------------------------
50
+ # 3. PDF Text Extraction
51
+ # --------------------------------------------------------------------------------
52
+ def extract_text_from_pdf(pdf_file) -> str:
53
+ """
54
+ Extract all text from a PDF.
55
+ """
56
+ try:
57
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
58
+ text = ""
59
+ for page in pdf_reader.pages:
60
+ text += page.extract_text() + "\n"
61
+ return text
62
+ except Exception as e:
63
+ st.error(f"Error processing PDF: {str(e)}")
64
+ return ""
65
+
66
+
67
+ # --------------------------------------------------------------------------------
68
+ # 4. Gemini Response Helper
69
+ # --------------------------------------------------------------------------------
70
+ def get_gemini_response(prompt: str) -> str:
71
+ """
72
+ Sends a prompt to Google's Gemini model and returns the response text.
73
+ Adjust this function as needed for your generative AI usage.
74
+ """
75
+ try:
76
+ model = genai.GenerativeModel("gemini-pro")
77
+ response = model.generate_content(prompt)
78
+ return response.text
79
+ except Exception as e:
80
+ st.error(f"Gemini API Error: {str(e)}")
81
+ return ""
82
+
83
+
84
+ # --------------------------------------------------------------------------------
85
+ # 5. Basic Info Extraction
86
+ # --------------------------------------------------------------------------------
87
+ def extract_basic_info(text: str) -> Dict[str, str]:
88
+ """
89
+ Extract title, publication, journal/conference, abstract, keywords, author, and date from the paper text.
90
+ Return a dictionary with these fields.
91
+ """
92
+ prompt = f"""
93
+ Extract the following fields from the research paper text below:
94
+
95
+ Title
96
+ Publication
97
+ Journal_Conference
98
+ Abstract
99
+ Keywords
100
+ Author
101
+ Date_of_Publication
102
+
103
+ Paper text:
104
+ {text}
105
+
106
+ Return them in this format:
107
+ Title: ...
108
+ Publication: ...
109
+ Journal_Conference: ...
110
+ Abstract: ...
111
+ Keywords: ...
112
+ Author: ...
113
+ Date_of_Publication: ...
114
+ """
115
+ response = get_gemini_response(prompt)
116
+ if not response:
117
+ return {}
118
+ info = {}
119
+ lines = response.split("\n")
120
+ for line in lines:
121
+ if ":" in line:
122
+ key, value = line.split(":", 1)
123
+ info[key.strip()] = value.strip()
124
+ return info
125
+
126
+
127
+ # --------------------------------------------------------------------------------
128
+ # 6. Content Sections Extraction
129
+ # --------------------------------------------------------------------------------
130
+ def extract_content_sections(text: str) -> Dict[str, str]:
131
+ """
132
+ Extract expanded sections: Intro, Literature_Review, Research_Models_Used,
133
+ Methodology, Discussion, Future_Scope, Theory.
134
+ """
135
+ prompt = f"""Please extract these sections from the research paper:
136
+ 1. Introduction
137
+ 2. Literature Review
138
+ 3. Research Models Used
139
+ 4. Methodology
140
+ 5. Discussion
141
+ 6. Future Scope
142
+ 7. Theory
143
+
144
+ Paper text: {text}
145
+
146
+ Return in this exact format without any additional text or explanations also make sure
147
+ no data should be empty (at least 10-15 words) and it should be meaningful:
148
+ Intro: <text>
149
+ Literature_Review: <text>
150
+ Research_Models_Used: <text>
151
+ Methodology: <text>
152
+ Discussion: <text>
153
+ Future_Scope: <text>
154
+ Theory: <text>
155
+ """
156
+ response = get_gemini_response(prompt)
157
+ if not response:
158
+ return {}
159
+ sections = {}
160
+ lines = response.split("\n")
161
+ for line in lines:
162
+ if ":" in line:
163
+ key, value = line.split(":", 1)
164
+ sections[key.strip()] = value.strip()
165
+ return sections
166
+
167
+
168
+ # --------------------------------------------------------------------------------
169
+ # 7. Variables Extraction
170
+ # --------------------------------------------------------------------------------
171
+ def extract_variables(text: str) -> Dict[str, Any]:
172
+ """
173
+ Extract variable data: Independent_Variables, nof_Independent_Variables,
174
+ Dependent_Variables, nof_Dependent_Variables, Control_Variables,
175
+ Extraneous_Variables, nof_Control_Variables, nof_Extraneous_Variables
176
+ """
177
+ prompt = f"""From the paper text, extract the following fields:
178
+ 1. Independent_Variables
179
+ 2. nof_Independent_Variables
180
+ 3. Dependent_Variables
181
+ 4. nof_Dependent_Variables
182
+ 5. Control_Variables
183
+ 6. Extraneous_Variables
184
+ 7. nof_Control_Variables
185
+ 8. nof_Extraneous_Variables
186
+
187
+ Return them in this format:
188
+ Independent_Variables: <list>
189
+ nof_Independent_Variables: <integer>
190
+ Dependent_Variables: <list>
191
+ nof_Dependent_Variables: <integer>
192
+ Control_Variables: <list>
193
+ Extraneous_Variables: <list>
194
+ nof_Control_Variables: <integer>
195
+ nof_Extraneous_Variables: <integer>
196
+
197
+ Paper text: {text}
198
+ """
199
+ response = get_gemini_response(prompt)
200
+ if not response:
201
+ return {}
202
+ variables = {}
203
+ lines = response.split("\n")
204
+ for line in lines:
205
+ if ":" in line:
206
+ key, value = line.split(":", 1)
207
+ # Attempt to convert to integer where appropriate
208
+ clean_key = key.strip()
209
+ clean_value = value.strip()
210
+ if clean_key.startswith("nof_"):
211
+ try:
212
+ variables[clean_key] = int(clean_value)
213
+ except ValueError:
214
+ # fallback if it's not an integer
215
+ variables[clean_key] = 0
216
+ else:
217
+ variables[clean_key] = clean_value
218
+ return variables
219
+
220
+
221
+ # --------------------------------------------------------------------------------
222
+ # 8. Utility to ensure no empty fields (example logic)
223
+ # --------------------------------------------------------------------------------
224
+ def ensure_non_empty_values(data: Dict[str, Any], fallback_text: str) -> Dict[str, Any]:
225
+ """
226
+ Ensure each extracted field has meaningful content. If empty, fill with default text.
227
+ """
228
+ for k, v in data.items():
229
+ if not v or len(str(v).split()) < 3: # example check for minimal words
230
+ data[k] = f"No sufficient data found for {k}. Could not parse."
231
+ return data
232
+
233
+
234
+ # --------------------------------------------------------------------------------
235
+ # 9. Processing the Paper
236
+ # --------------------------------------------------------------------------------
237
+ # def process_paper(text: str) -> Dict[str, Any]:
238
+ # """
239
+ # Orchestrate calls to extract basic info, content sections, and variables.
240
+ # Return a dictionary containing all the fields with consistent naming.
241
+ # """
242
+ # with st.spinner("Extracting basic information..."):
243
+ # basic_info = extract_basic_info(text)
244
+ # basic_info = ensure_non_empty_values(basic_info, text)
245
+
246
+ # with st.spinner("Extracting content sections..."):
247
+ # content_sections = extract_content_sections(text)
248
+ # content_sections = ensure_non_empty_values(content_sections, text)
249
+
250
+ # with st.spinner("Extracting variables..."):
251
+ # variables_info = extract_variables(text)
252
+ # variables_info = ensure_non_empty_values(variables_info, text)
253
+
254
+ # # Create a single dictionary with all fields
255
+ # paper_doc = {
256
+ # "Title": basic_info.get("Title", ""),
257
+ # "Publication": basic_info.get("Publication", ""),
258
+ # "Journal_Conference": basic_info.get("Journal_Conference", ""),
259
+ # "Abstract": basic_info.get("Abstract", ""),
260
+ # "Keywords": basic_info.get("Keywords", ""),
261
+ # "Author": basic_info.get("Author", ""),
262
+ # "Date_of_Publication": basic_info.get("Date_of_Publication", ""),
263
+ # "Intro": content_sections.get("Intro", ""),
264
+ # "Literature_Review": content_sections.get("Literature_Review", ""),
265
+ # "Research_Models_Used": content_sections.get("Research_Models_Used", ""),
266
+ # "Methodology": content_sections.get("Methodology", ""),
267
+ # "Discussion": content_sections.get("Discussion", ""),
268
+ # "Future_Scope": content_sections.get("Future_Scope", ""),
269
+ # "Theory": content_sections.get("Theory", ""),
270
+ # "Independent_Variables": variables_info.get("Independent_Variables", ""),
271
+ # "nof_Independent_Variables": variables_info.get("nof_Independent_Variables", 0),
272
+ # "Dependent_Variables": variables_info.get("Dependent_Variables", ""),
273
+ # "nof_Dependent_Variables": variables_info.get("nof_Dependent_Variables", 0),
274
+ # "Control_Variables": variables_info.get("Control_Variables", ""),
275
+ # "Extraneous_Variables": variables_info.get("Extraneous_Variables", ""),
276
+ # "nof_Control_Variables": variables_info.get("nof_Control_Variables", 0),
277
+ # "nof_Extraneous_Variables": variables_info.get("nof_Extraneous_Variables", 0),
278
+ # }
279
+
280
+ # return paper_doc
281
+
282
+ # filepath: /c:/Users/acer/OneDrive/Documents/GitHub/res-cor/research22.py
283
+ # ...existing code continues...
284
+
285
+ # --------------------------------------------------------------------------------
286
+ # 3. Paper Type Attributes
287
+ # --------------------------------------------------------------------------------
288
+ PAPER_TYPE_ATTRIBUTES = {
289
+ "Review Based Paper": [
290
+ "Title",
291
+ "Publication",
292
+ "Journal_Conference",
293
+ "Abstract",
294
+ "Keywords",
295
+ "Author",
296
+ "Date_of_Publication",
297
+ "Intro",
298
+ "Literature_Review",
299
+ "Body",
300
+ "Protocol",
301
+ "Search String",
302
+ "Included Studies",
303
+ "Data Collection and Analysis Methods",
304
+ "Data Extraction Table",
305
+ "Synthesis and Analysis",
306
+ "Conclusion",
307
+ "Limitations",
308
+ "Results",
309
+ "References",
310
+ "Risk of Bias Assessment",
311
+ ],
312
+ "Opinion/Perspective Based Paper": [
313
+ "Title",
314
+ "Publication",
315
+ "Journal_Conference",
316
+ "Abstract",
317
+ "Keywords",
318
+ "Author",
319
+ "Date_of_Publication",
320
+ "Intro",
321
+ "Literature_Review",
322
+ "Introduction",
323
+ "Body",
324
+ "Results and Discussion",
325
+ "Conclusion",
326
+ "References",
327
+ ],
328
+ "Empirical Research Paper": [
329
+ "Title",
330
+ "Publication",
331
+ "Journal_Conference",
332
+ "Abstract",
333
+ "Keywords",
334
+ "Author",
335
+ "Date_of_Publication",
336
+ "Intro",
337
+ "Literature_Review",
338
+ "Introduction",
339
+ "Body",
340
+ "Methodology",
341
+ "Participants",
342
+ "Survey Instrument",
343
+ "Data Collection",
344
+ "Data Analysis",
345
+ "Results and Discussion",
346
+ "Conclusion",
347
+ "References",
348
+ ],
349
+ "Research Paper (Other)": [
350
+ "Title",
351
+ "Publication",
352
+ "Journal_Conference",
353
+ "Abstract",
354
+ "Keywords",
355
+ "Author",
356
+ "Date_of_Publication",
357
+ "Intro",
358
+ "Literature_Review",
359
+ "Research_Models_Used",
360
+ "Methodology",
361
+ "Discussion",
362
+ "Future_Scope",
363
+ "Theory",
364
+ "Independent_Variables",
365
+ "nof_Independent_Variables",
366
+ "Dependent_Variables",
367
+ "nof_Dependent_Variables",
368
+ "Control_Variables",
369
+ "Extraneous_Variables",
370
+ "nof_Control_Variables",
371
+ "nof_Extraneous_Variables",
372
+ ],
373
+ }
374
+
375
+
376
+ # --------------------------------------------------------------------------------
377
+ # 4. Extract Paper Fields
378
+ # --------------------------------------------------------------------------------
379
+ def extract_paper_fields(text: str, paper_type: str) -> Dict[str, Any]:
380
+ """
381
+ Use Gemini to extract fields based on the paper type attributes,
382
+ then return a dictionary of extracted fields.
383
+ """
384
+ if paper_type not in PAPER_TYPE_ATTRIBUTES:
385
+ st.error("Invalid paper type selected.")
386
+ return {}
387
+
388
+ selected_attrs = PAPER_TYPE_ATTRIBUTES[paper_type]
389
+ prompt = f"""
390
+ Extract the following fields from the research paper text below:
391
+
392
+ {", ".join(selected_attrs)}
393
+
394
+ Paper text:
395
+ {text}
396
+
397
+ Return them in this JSON format strictly, with no extra text:
398
+ [
399
+ {{
400
+ {", ".join([f'"{attr}": "value"' for attr in selected_attrs])}
401
+ }}
402
+ ]
403
+ """
404
+
405
+ try:
406
+ response = get_gemini_response(prompt)
407
+ if not response:
408
+ st.error("No response from Gemini.")
409
+ return {}
410
+
411
+ # Clean up any text around JSON
412
+ # Clean up any text around JSON
413
+ raw_text = response.strip()
414
+
415
+ # Find start and end of JSON
416
+ json_start = raw_text.find("[")
417
+ json_end = raw_text.rfind("]") + 1
418
+ json_str = raw_text[json_start:json_end]
419
+
420
+ # Try removing trailing commas, extra quotes, etc.
421
+ json_str = re.sub(r",\s*}", "}", json_str)
422
+ json_str = re.sub(r",\s*\]", "]", json_str)
423
+
424
+ try:
425
+ data = json.loads(json_str)
426
+ except json.JSONDecodeError as e:
427
+ st.warning(f"Fixing JSON errors: {str(e)}")
428
+ # As a last-resort attempt, remove anything after the last curly bracket
429
+ bracket_pos = json_str.rfind("}")
430
+ if bracket_pos != -1:
431
+ json_str = json_str[: bracket_pos + 1]
432
+ # Try again
433
+ data = json.loads(json_str)
434
+
435
+ if isinstance(data, list) and len(data) > 0:
436
+ return data[0]
437
+ else:
438
+ st.error("Gemini did not return a valid JSON array.")
439
+ return {}
440
+ except Exception as e:
441
+ st.error(f"Error in Gemini extraction: {str(e)}")
442
+ return {}
443
+
444
+
445
+ # --------------------------------------------------------------------------------
446
+ # 5. Process Paper and Save
447
+ # --------------------------------------------------------------------------------
448
+ def process_paper(text: str, paper_type: str):
449
+ """
450
+ Extract paper fields based on paper type, then save to
451
+ the corresponding MongoDB collection.
452
+ """
453
+ db = create_db_connection()
454
+ if not db:
455
+ return
456
+
457
+ # Determine collection name
458
+ collection_name = paper_type.replace(" ", "_").lower()
459
+ collection = db[collection_name]
460
+
461
+ # Extract fields
462
+ extracted_data = extract_paper_fields(text, paper_type)
463
+ if extracted_data:
464
+ # Insert into MongoDB
465
+ collection.insert_one(extracted_data)
466
+ return extracted_data
467
+ return {}
468
+
469
+
470
+ # --------------------------------------------------------------------------------
471
+ # 6. Streamlit UI for Paper Extraction
472
+ # --------------------------------------------------------------------------------
473
+ def main():
474
+ # st.set_page_config(page_title="Extract Research Paper", layout="wide")
475
+ st.title("Extract Research Paper")
476
+
477
+ paper_type = st.selectbox(
478
+ "Select type of research paper:",
479
+ [
480
+ "Review Based Paper",
481
+ "Opinion/Perspective Based Paper",
482
+ "Empirical Research Paper",
483
+ "Research Paper (Other)",
484
+ ],
485
+ )
486
+
487
+ uploaded_file = st.file_uploader("Upload a PDF or text file", type=["pdf", "txt"])
488
+
489
+ if st.button("Extract & Save") and uploaded_file:
490
+ try:
491
+ # Read file content
492
+ if uploaded_file.type == "application/pdf":
493
+ pdf_reader = PyPDF2.PdfReader(uploaded_file)
494
+ text_content = ""
495
+ for page in pdf_reader.pages:
496
+ text_content += page.extract_text()
497
+ else:
498
+ text_content = uploaded_file.read().decode("utf-8", errors="replace")
499
+
500
+ with st.spinner("Extracting fields..."):
501
+ data = process_paper(text_content, paper_type)
502
+
503
+ if data:
504
+ st.success(
505
+ f"Paper extracted and saved to MongoDB in '{paper_type}' collection!"
506
+ )
507
+ st.write("Extracted fields:")
508
+ st.json(data)
509
+
510
+ except Exception as e:
511
+ st.error(f"An error occurred: {str(e)}")
512
+
513
+
514
+ # ...existing code (if any)...
515
+
516
+ if __name__ == "__main__":
517
+ main()
research3.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import requests
4
+ import json
5
+ import os
6
+ from dotenv import load_dotenv
7
+
8
+ # Load environment variables
9
+ load_dotenv()
10
+ PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
11
+ PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
12
+
13
+
14
+ def call_perplexity_api(prompt: str) -> str:
15
+ """Call Perplexity AI with a prompt, return the text response if successful."""
16
+ headers = {
17
+ "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
18
+ "Content-Type": "application/json",
19
+ }
20
+ payload = {
21
+ "model": "llama-3.1-sonar-small-128k-chat",
22
+ "messages": [{"role": "user", "content": prompt}],
23
+ "temperature": 0.3,
24
+ }
25
+
26
+ try:
27
+ response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
28
+ response.raise_for_status()
29
+ return response.json()["choices"][0]["message"]["content"]
30
+ except Exception as e:
31
+ st.error(f"API Error: {str(e)}")
32
+ return ""
33
+
34
+
35
+ def generate_research_paper(df: pd.DataFrame, topic: str) -> dict:
36
+ """
37
+ For each column in the DataFrame, generate a research paper section (200-500 words)
38
+ that addresses the data in that column on the given topic. Return a dict: column -> text.
39
+ """
40
+ paper_sections = {}
41
+ for col in df.columns:
42
+ # Convert all non-null rows in the column to strings and join them for context
43
+ col_values = df[col].dropna().astype(str).tolist()
44
+ # We'll truncate if there's a ton of text
45
+ sample_text = " | ".join(col_values[:50]) # limit to first 50 rows for brevity
46
+
47
+ prompt = f"""
48
+ Topic: {topic}
49
+ Column: {col}
50
+ Data Samples: {sample_text}
51
+
52
+ Generate a well-structured research paper section that addresses the topic above,
53
+ referencing relevant information from the column data.
54
+ The section should be at least 100 words and at most 150 words.
55
+ Provide insights, examples, and possible research directions integrating the corpus data.
56
+ """
57
+ section_text = call_perplexity_api(prompt)
58
+ paper_sections[col] = section_text.strip() if section_text else ""
59
+ return paper_sections
60
+
61
+
62
+ def format_paper(paper_dict: dict, topic: str) -> str:
63
+ """
64
+ Format the generated paper into a Markdown string.
65
+ Add the topic as the main title, each column name as a heading, and
66
+ the corresponding text as paragraph content.
67
+ """
68
+ md_text = f"# Research Paper on: {topic}\n\n"
69
+ for col, content in paper_dict.items():
70
+ md_text += f"## {col}\n{content}\n\n"
71
+ return md_text
72
+
73
+
74
+ def main():
75
+ st.title("Topic + Corpus-Based Research Paper Generator")
76
+
77
+ topic_input = st.text_input("Enter the topic for the research paper:")
78
+ uploaded_file = st.file_uploader("Upload CSV corpus file", type="csv")
79
+
80
+ if uploaded_file:
81
+ df = pd.read_csv(uploaded_file)
82
+ st.write("### Preview of Uploaded Data")
83
+ st.dataframe(df.head())
84
+
85
+ if st.button("Generate Research Paper"):
86
+ if topic_input.strip():
87
+ st.info("Generating paper based on the topic and the corpus columns...")
88
+ with st.spinner("Calling Perplexity AI..."):
89
+ paper = generate_research_paper(df, topic_input)
90
+ if paper:
91
+ formatted_paper = format_paper(paper, topic_input)
92
+ st.success("Research Paper Generated Successfully!")
93
+ st.write(formatted_paper)
94
+
95
+ st.download_button(
96
+ label="Download Paper as Markdown",
97
+ data=formatted_paper,
98
+ file_name="research_paper.md",
99
+ mime="text/markdown",
100
+ )
101
+ else:
102
+ st.error(
103
+ "Paper generation failed. Please check Perplexity API key."
104
+ )
105
+ else:
106
+ st.warning("Please enter a valid topic.")
107
+
108
+
109
+ if __name__ == "__main__":
110
+ main()
research_assistant_dashboard.py CHANGED
@@ -1,342 +1,349 @@
1
- import streamlit as st
2
- from openai import OpenAI
3
- import os
4
- from dotenv import load_dotenv
5
- from llama_index.core import (
6
- VectorStoreIndex,
7
- SimpleDirectoryReader,
8
- Document,
9
- GPTVectorStoreIndex,
10
- )
11
- from bson import ObjectId
12
- import requests
13
- import openai
14
- import numpy as np
15
- from pymongo import MongoClient
16
- from bson import ObjectId
17
- from datetime import datetime
18
- from llama_index.embeddings.openai import OpenAIEmbedding
19
- from typing import List, Dict
20
-
21
- # Initialize Perplexity API and OpenAI API
22
- load_dotenv()
23
- perplexity_api_key = os.getenv("PERPLEXITY_KEY")
24
- openai.api_key = os.getenv("OPENAI_KEY")
25
-
26
- # MongoDB setup
27
- MONGO_URI = os.getenv("MONGO_URI")
28
- client = MongoClient(MONGO_URI)
29
- db = client["novascholar_db"]
30
- research_papers_collection = db["research_papers"]
31
-
32
-
33
- def fetch_perplexity_data(api_key, topic):
34
- """
35
- Fetch research papers data from Perplexity API with proper formatting
36
- """
37
- headers = {
38
- "accept": "application/json",
39
- "content-type": "application/json",
40
- "authorization": f"Bearer {api_key}",
41
- }
42
-
43
- # Structured prompt to get properly formatted response
44
- messages = [
45
- {
46
- "role": "system",
47
- "content": """You are a research paper retrieval expert. For the given topic, return exactly 10 research papers in the following format:
48
- Title: Paper Title
49
- Authors: Author 1, Author 2
50
- Year: YYYY
51
- Content: Detailed paper content with abstract and key findings
52
- URL: DOI or paper URL
53
- """,
54
- },
55
- {"role": "user", "content": f"Find 10 research papers about: {topic}"},
56
- ]
57
-
58
- try:
59
- client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
60
- response = client.chat.completions.create(
61
- model="llama-3.1-sonar-small-128k-chat", # Use the best Perplexity model
62
- messages=messages,
63
- )
64
-
65
- # Extract and validate response
66
- content = response.choices[0].message.content
67
- st.write("Fetched Data:", content) # Debugging line to check the fetched data
68
-
69
- return content
70
-
71
- except Exception as e:
72
- st.error(f"Failed to fetch data from Perplexity API: {str(e)}")
73
- return ""
74
-
75
-
76
- def split_and_vectorize_papers(content: str) -> List[Dict]:
77
- """Split and vectorize papers using OpenAI embeddings"""
78
- papers = content.split("\n\n")
79
-
80
- # Initialize OpenAI client
81
- # client = OpenAI() # Uses api_key from environment variable
82
- vectors = []
83
-
84
- for paper in papers:
85
- try:
86
- # Get embedding using OpenAI's API directly
87
- response = openai.embeddings.create(
88
- model="text-embedding-ada-002", input=paper, encoding_format="float"
89
- )
90
-
91
- # Extract embedding from response
92
- embedding = response.data[0].embedding
93
-
94
- vectors.append(
95
- {"content": paper, "vector": embedding, "timestamp": datetime.utcnow()}
96
- )
97
-
98
- except Exception as e:
99
- st.error(f"Error vectorizing paper: {str(e)}")
100
- continue
101
-
102
- return vectors
103
-
104
-
105
- def store_papers_in_mongodb(papers):
106
- """Store papers with vectors in MongoDB"""
107
- try:
108
- for paper in papers:
109
- # Prepare MongoDB document
110
- mongo_doc = {
111
- "content": paper["content"],
112
- "vector": paper["vector"],
113
- "created_at": datetime.utcnow(),
114
- }
115
-
116
- # Insert into MongoDB
117
- db.papers.update_one(
118
- {"content": paper["content"]}, {"$set": mongo_doc}, upsert=True
119
- )
120
-
121
- st.success(f"Stored {len(papers)} papers in database")
122
- return True
123
- except Exception as e:
124
- st.error(f"Error storing papers: {str(e)}")
125
-
126
-
127
- def get_research_papers(query):
128
- """
129
- Get and store research papers with improved error handling
130
- """
131
- # Fetch papers from Perplexity
132
- content = fetch_perplexity_data(perplexity_api_key, query)
133
-
134
- if not content:
135
- return []
136
-
137
- # Split and vectorize papers
138
- papers = split_and_vectorize_papers(content)
139
-
140
- # Store papers in MongoDB
141
- if store_papers_in_mongodb(papers):
142
- return papers
143
- else:
144
- st.warning("Failed to store papers in database, but returning fetched results")
145
- return papers
146
-
147
-
148
- def analyze_research_gaps(papers):
149
- """
150
- Analyze research gaps with improved prompt and error handling
151
- """
152
- if not papers:
153
- return "No papers provided for analysis"
154
-
155
- # Prepare paper summaries for analysis
156
- paper_summaries = "\n\n".join(
157
- [
158
- f"Key Findings: {paper['content'][:500]}..."
159
- # f"Title: {paper['title']}\nYear: {paper['year']}\nKey Findings: {paper['content'][:500]}..."
160
- for paper in papers
161
- ]
162
- )
163
-
164
- headers = {
165
- "Authorization": f"Bearer {perplexity_api_key}",
166
- "Content-Type": "application/json",
167
- }
168
-
169
- data = {
170
- "messages": [
171
- {
172
- "role": "system",
173
- "content": "You are a research analysis expert. Identify specific research gaps and future research directions based on the provided papers. Format your response with clear sections: Current State, Identified Gaps, and Future Directions.",
174
- },
175
- {
176
- "role": "user",
177
- "content": f"Analyze these papers and identify research gaps:\n\n{paper_summaries}",
178
- },
179
- ]
180
- }
181
-
182
- try:
183
- client = OpenAI(
184
- api_key=perplexity_api_key, base_url="https://api.perplexity.ai"
185
- )
186
- response = client.chat.completions.create(
187
- model="llama-3.1-sonar-small-128k-chat", # Use the best Perplexity model
188
- messages=data["messages"],
189
- )
190
- return response.choices[0].message.content
191
-
192
- except Exception as e:
193
- st.error(f"Failed to analyze research gaps: {str(e)}")
194
- return "Error analyzing research gaps"
195
-
196
-
197
- def create_research_paper(gaps, topic, papers):
198
- """
199
- Create a research paper that addresses the identified gaps using Perplexity API
200
- """
201
- full_texts = "\n\n".join([paper["content"] for paper in papers])
202
- headers = {
203
- "Authorization": f"Bearer {perplexity_api_key}",
204
- "Content-Type": "application/json",
205
- }
206
- data = {
207
- "messages": [
208
- {
209
- "role": "system",
210
- "content": "You are a research paper generation expert. Create a comprehensive research paper that addresses the identified gaps based on the provided papers. Format your response with clear sections: Introduction, Literature Review, Methodology, Results, Discussion, Conclusion, and References.",
211
- },
212
- {
213
- "role": "user",
214
- "content": f"Create a research paper on the topic '{topic}' that addresses the following research gaps:\n\n{gaps}\n\nBased on the following papers:\n\n{full_texts}",
215
- },
216
- ]
217
- }
218
- try:
219
- client = OpenAI(
220
- api_key=perplexity_api_key, base_url="https://api.perplexity.ai"
221
- )
222
- response = client.chat.completions.create(
223
- model="llama-3.1-sonar-small-128k-chat", # Use the best Perplexity model
224
- messages=data["messages"],
225
- )
226
- return response.choices[0].message.content
227
-
228
- except Exception as e:
229
- st.error(f"Failed to create research paper: {str(e)}")
230
- return "Error creating research paper"
231
-
232
-
233
- def cosine_similarity(vec1, vec2):
234
- """Calculate the cosine similarity between two vectors"""
235
- vec1 = np.array(vec1)
236
- vec2 = np.array(vec2)
237
- return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
238
-
239
-
240
- def calculate_cosine_similarity(vec1: List[float], vec2: List[float]) -> float:
241
- """Calculate cosine similarity between two vectors"""
242
- return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
243
-
244
-
245
- def display_research_assistant_dashboard():
246
- """Display research assistant dashboard"""
247
- # Initialize session state for recommendations
248
- if "recommendations" not in st.session_state:
249
- st.session_state.recommendations = None
250
- if "vectors" not in st.session_state:
251
- st.session_state.vectors = None
252
- if "generated_paper" not in st.session_state:
253
- st.session_state.generated_paper = None
254
-
255
- # Sidebar
256
- with st.sidebar:
257
- st.title(f"Welcome, {st.session_state.username}")
258
- if st.button("Logout", use_container_width=True):
259
- for key in st.session_state.keys():
260
- del st.session_state[key]
261
- st.rerun()
262
-
263
- # Main content
264
- st.title("Research Paper Recommendations")
265
- search_query = st.text_input("Enter research topic:")
266
- col1, col2 = st.columns(2)
267
- with col1:
268
- if st.button("Get Research Papers"):
269
- if search_query:
270
- with st.spinner("Fetching recommendations..."):
271
- st.session_state.recommendations = get_research_papers(search_query)
272
- st.session_state.vectors = [
273
- paper["vector"] for paper in st.session_state.recommendations
274
- ]
275
- st.markdown(
276
- "\n\n".join(
277
- [
278
- f"**{i+1}.**\n{paper['content']}"
279
- # f"**{i+1}. {paper['title']}**\n{paper['content']}"
280
- for i, paper in enumerate(
281
- st.session_state.recommendations
282
- )
283
- ]
284
- )
285
- )
286
- else:
287
- st.warning("Please enter a search query")
288
- with col2:
289
- if st.button("Analyze Research Gaps"):
290
- if st.session_state.recommendations:
291
- with st.spinner("Analyzing research gaps..."):
292
- gaps = analyze_research_gaps(st.session_state.recommendations)
293
- st.session_state.generated_paper = create_research_paper(
294
- gaps, search_query, st.session_state.recommendations
295
- )
296
- st.markdown("### Potential Research Gaps")
297
- st.markdown(gaps)
298
- else:
299
- st.warning("Please get research papers first")
300
-
301
- if st.button("Save and Vectorize"):
302
- if st.session_state.generated_paper:
303
- try:
304
- # Initialize OpenAI client
305
-
306
- # Get embedding for generated paper
307
- response = openai.embeddings.create(
308
- model="text-embedding-ada-002",
309
- input=st.session_state.generated_paper,
310
- encoding_format="float",
311
- )
312
- generated_vector = response.data[0].embedding
313
-
314
- # Calculate similarities with stored vectors
315
- similarities = [
316
- calculate_cosine_similarity(generated_vector, paper_vector)
317
- for paper_vector in st.session_state.vectors
318
- ]
319
-
320
- # Display results
321
- st.markdown("### Generated Research Paper")
322
- st.markdown(st.session_state.generated_paper)
323
-
324
- st.markdown("### Cosine Similarities with Original Papers")
325
- for i, similarity in enumerate(similarities):
326
- st.metric(
327
- f"Paper {i+1}",
328
- value=f"{similarity:.3f}",
329
- help="Cosine similarity (1.0 = identical, 0.0 = completely different)",
330
- )
331
-
332
- except Exception as e:
333
- st.error(f"Error during vectorization: {str(e)}")
334
- else:
335
- st.warning("Please analyze research gaps first")
336
-
337
-
338
- # Run the dashboard
339
- if __name__ == "__main__":
340
- display_research_assistant_dashboard()
341
-
342
-
 
 
 
 
 
 
 
 
1
+ # import streamlit as st
2
+ # from openai import OpenAI
3
+ # import os
4
+ # from dotenv import load_dotenv
5
+ # from llama_index.core import (
6
+ # VectorStoreIndex,
7
+ # SimpleDirectoryReader,
8
+ # Document,
9
+ # GPTVectorStoreIndex,
10
+ # )
11
+ # from bson import ObjectId
12
+ # import requests
13
+ # import openai
14
+ # import numpy as np
15
+ # from pymongo import MongoClient
16
+ # from bson import ObjectId
17
+ # from datetime import datetime
18
+ # from llama_index.embeddings.openai import OpenAIEmbedding
19
+ # from typing import List, Dict
20
+
21
+ # # Initialize Perplexity API and OpenAI API
22
+ # load_dotenv()
23
+ # perplexity_api_key = os.getenv("PERPLEXITY_KEY")
24
+ # openai.api_key = os.getenv("OPENAI_KEY")
25
+
26
+ # # MongoDB setup
27
+ # MONGO_URI = os.getenv("MONGO_URI")
28
+ # client = MongoClient(MONGO_URI)
29
+ # db = client["novascholar_db"]
30
+ # research_papers_collection = db["research_papers"]
31
+
32
+
33
+ # def fetch_perplexity_data(api_key, topic):
34
+ # """
35
+ # Fetch research papers data from Perplexity API with proper formatting
36
+ # """
37
+ # headers = {
38
+ # "accept": "application/json",
39
+ # "content-type": "application/json",
40
+ # "authorization": f"Bearer {api_key}",
41
+ # }
42
+
43
+ # # Structured prompt to get properly formatted response
44
+ # messages = [
45
+ # {
46
+ # "role": "system",
47
+ # "content": """You are a research paper retrieval expert. For the given topic, return exactly 10 research papers in the following format:
48
+ # Title: Paper Title
49
+ # Authors: Author 1, Author 2
50
+ # Year: YYYY
51
+ # Content: Detailed paper content with abstract and key findings
52
+ # URL: DOI or paper URL
53
+ # """,
54
+ # },
55
+ # {"role": "user", "content": f"Find 10 research papers about: {topic}"},
56
+ # ]
57
+
58
+ # try:
59
+ # client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
60
+ # response = client.chat.completions.create(
61
+ # model="llama-3.1-sonar-small-128k-chat", # Use the best Perplexity model
62
+ # messages=messages,
63
+ # )
64
+
65
+ # # Extract and validate response
66
+ # content = response.choices[0].message.content
67
+ # st.write("Fetched Data:", content) # Debugging line to check the fetched data
68
+
69
+ # return content
70
+
71
+ # except Exception as e:
72
+ # st.error(f"Failed to fetch data from Perplexity API: {str(e)}")
73
+ # return ""
74
+
75
+
76
+ # def split_and_vectorize_papers(content: str) -> List[Dict]:
77
+ # """Split and vectorize papers using OpenAI embeddings"""
78
+ # papers = content.split("\n\n")
79
+
80
+ # # Initialize OpenAI client
81
+ # # client = OpenAI() # Uses api_key from environment variable
82
+ # vectors = []
83
+
84
+ # for paper in papers:
85
+ # try:
86
+ # # Get embedding using OpenAI's API directly
87
+ # response = openai.embeddings.create(
88
+ # model="text-embedding-ada-002", input=paper, encoding_format="float"
89
+ # )
90
+
91
+ # # Extract embedding from response
92
+ # embedding = response.data[0].embedding
93
+
94
+ # vectors.append(
95
+ # {"content": paper, "vector": embedding, "timestamp": datetime.utcnow()}
96
+ # )
97
+
98
+ # except Exception as e:
99
+ # st.error(f"Error vectorizing paper: {str(e)}")
100
+ # continue
101
+
102
+ # return vectors
103
+
104
+
105
+ # def store_papers_in_mongodb(papers):
106
+ # """Store papers with vectors in MongoDB"""
107
+ # try:
108
+ # for paper in papers:
109
+ # # Prepare MongoDB document
110
+ # mongo_doc = {
111
+ # "content": paper["content"],
112
+ # "vector": paper["vector"],
113
+ # "created_at": datetime.utcnow(),
114
+ # }
115
+
116
+ # # Insert into MongoDB
117
+ # db.papers.update_one(
118
+ # {"content": paper["content"]}, {"$set": mongo_doc}, upsert=True
119
+ # )
120
+
121
+ # st.success(f"Stored {len(papers)} papers in database")
122
+ # return True
123
+ # except Exception as e:
124
+ # st.error(f"Error storing papers: {str(e)}")
125
+
126
+
127
+ # def get_research_papers(query):
128
+ # """
129
+ # Get and store research papers with improved error handling
130
+ # """
131
+ # # Fetch papers from Perplexity
132
+ # content = fetch_perplexity_data(perplexity_api_key, query)
133
+
134
+ # if not content:
135
+ # return []
136
+
137
+ # # Split and vectorize papers
138
+ # papers = split_and_vectorize_papers(content)
139
+
140
+ # # Store papers in MongoDB
141
+ # if store_papers_in_mongodb(papers):
142
+ # return papers
143
+ # else:
144
+ # st.warning("Failed to store papers in database, but returning fetched results")
145
+ # return papers
146
+
147
+
148
+ # def analyze_research_gaps(papers):
149
+ # """
150
+ # Analyze research gaps with improved prompt and error handling
151
+ # """
152
+ # if not papers:
153
+ # return "No papers provided for analysis"
154
+
155
+ # # Prepare paper summaries for analysis
156
+ # paper_summaries = "\n\n".join(
157
+ # [
158
+ # f"Key Findings: {paper['content'][:500]}..."
159
+ # # f"Title: {paper['title']}\nYear: {paper['year']}\nKey Findings: {paper['content'][:500]}..."
160
+ # for paper in papers
161
+ # ]
162
+ # )
163
+
164
+ # headers = {
165
+ # "Authorization": f"Bearer {perplexity_api_key}",
166
+ # "Content-Type": "application/json",
167
+ # }
168
+
169
+ # data = {
170
+ # "messages": [
171
+ # {
172
+ # "role": "system",
173
+ # "content": "You are a research analysis expert. Identify specific research gaps and future research directions based on the provided papers. Format your response with clear sections: Current State, Identified Gaps, and Future Directions.",
174
+ # },
175
+ # {
176
+ # "role": "user",
177
+ # "content": f"Analyze these papers and identify research gaps:\n\n{paper_summaries}",
178
+ # },
179
+ # ]
180
+ # }
181
+
182
+ # try:
183
+ # client = OpenAI(
184
+ # api_key=perplexity_api_key, base_url="https://api.perplexity.ai"
185
+ # )
186
+ # response = client.chat.completions.create(
187
+ # model="llama-3.1-sonar-small-128k-chat", # Use the best Perplexity model
188
+ # messages=data["messages"],
189
+ # )
190
+ # return response.choices[0].message.content
191
+
192
+ # except Exception as e:
193
+ # st.error(f"Failed to analyze research gaps: {str(e)}")
194
+ # return "Error analyzing research gaps"
195
+
196
+
197
+ # def create_research_paper(gaps, topic, papers):
198
+ # """
199
+ # Create a research paper that addresses the identified gaps using Perplexity API
200
+ # """
201
+ # full_texts = "\n\n".join([paper["content"] for paper in papers])
202
+ # headers = {
203
+ # "Authorization": f"Bearer {perplexity_api_key}",
204
+ # "Content-Type": "application/json",
205
+ # }
206
+ # data = {
207
+ # "messages": [
208
+ # {
209
+ # "role": "system",
210
+ # "content": "You are a research paper generation expert. Create a comprehensive research paper that addresses the identified gaps based on the provided papers. Format your response with clear sections: Introduction, Literature Review, Methodology, Results, Discussion, Conclusion, and References.",
211
+ # },
212
+ # {
213
+ # "role": "user",
214
+ # "content": f"Create a research paper on the topic '{topic}' that addresses the following research gaps:\n\n{gaps}\n\nBased on the following papers:\n\n{full_texts}",
215
+ # },
216
+ # ]
217
+ # }
218
+ # try:
219
+ # client = OpenAI(
220
+ # api_key=perplexity_api_key, base_url="https://api.perplexity.ai"
221
+ # )
222
+ # response = client.chat.completions.create(
223
+ # model="llama-3.1-sonar-small-128k-chat", # Use the best Perplexity model
224
+ # messages=data["messages"],
225
+ # )
226
+ # return response.choices[0].message.content
227
+
228
+ # except Exception as e:
229
+ # st.error(f"Failed to create research paper: {str(e)}")
230
+ # return "Error creating research paper"
231
+
232
+
233
+ # def cosine_similarity(vec1, vec2):
234
+ # """Calculate the cosine similarity between two vectors"""
235
+ # vec1 = np.array(vec1)
236
+ # vec2 = np.array(vec2)
237
+ # return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
238
+
239
+
240
+ # def calculate_cosine_similarity(vec1: List[float], vec2: List[float]) -> float:
241
+ # """Calculate cosine similarity between two vectors"""
242
+ # return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
243
+
244
+
245
+ # def display_research_assistant_dashboard():
246
+ # """Display research assistant dashboard"""
247
+ # # Initialize session state for recommendations
248
+ # if "recommendations" not in st.session_state:
249
+ # st.session_state.recommendations = None
250
+ # if "vectors" not in st.session_state:
251
+ # st.session_state.vectors = None
252
+ # if "generated_paper" not in st.session_state:
253
+ # st.session_state.generated_paper = None
254
+
255
+ # # Sidebar
256
+ # with st.sidebar:
257
+ # st.title(f"Welcome, {st.session_state.username}")
258
+ # if st.button("Logout", use_container_width=True):
259
+ # for key in st.session_state.keys():
260
+ # del st.session_state[key]
261
+ # st.rerun()
262
+
263
+ # # Main content
264
+ # st.title("Research Paper Recommendations")
265
+ # search_query = st.text_input("Enter research topic:")
266
+ # col1, col2 = st.columns(2)
267
+ # with col1:
268
+ # if st.button("Get Research Papers"):
269
+ # if search_query:
270
+ # with st.spinner("Fetching recommendations..."):
271
+ # st.session_state.recommendations = get_research_papers(search_query)
272
+ # st.session_state.vectors = [
273
+ # paper["vector"] for paper in st.session_state.recommendations
274
+ # ]
275
+ # st.markdown(
276
+ # "\n\n".join(
277
+ # [
278
+ # f"**{i+1}.**\n{paper['content']}"
279
+ # # f"**{i+1}. {paper['title']}**\n{paper['content']}"
280
+ # for i, paper in enumerate(
281
+ # st.session_state.recommendations
282
+ # )
283
+ # ]
284
+ # )
285
+ # )
286
+ # else:
287
+ # st.warning("Please enter a search query")
288
+ # with col2:
289
+ # if st.button("Analyze Research Gaps"):
290
+ # if st.session_state.recommendations:
291
+ # with st.spinner("Analyzing research gaps..."):
292
+ # gaps = analyze_research_gaps(st.session_state.recommendations)
293
+ # st.session_state.generated_paper = create_research_paper(
294
+ # gaps, search_query, st.session_state.recommendations
295
+ # )
296
+ # st.markdown("### Potential Research Gaps")
297
+ # st.markdown(gaps)
298
+ # else:
299
+ # st.warning("Please get research papers first")
300
+
301
+ # if st.button("Save and Vectorize"):
302
+ # if st.session_state.generated_paper:
303
+ # try:
304
+ # # Initialize OpenAI client
305
+
306
+ # # Get embedding for generated paper
307
+ # response = openai.embeddings.create(
308
+ # model="text-embedding-ada-002",
309
+ # input=st.session_state.generated_paper,
310
+ # encoding_format="float",
311
+ # )
312
+ # generated_vector = response.data[0].embedding
313
+
314
+ # # Calculate similarities with stored vectors
315
+ # similarities = [
316
+ # calculate_cosine_similarity(generated_vector, paper_vector)
317
+ # for paper_vector in st.session_state.vectors
318
+ # ]
319
+
320
+ # # Display results
321
+ # st.markdown("### Generated Research Paper")
322
+ # st.markdown(st.session_state.generated_paper)
323
+
324
+ # st.markdown("### Cosine Similarities with Original Papers")
325
+ # for i, similarity in enumerate(similarities):
326
+ # st.metric(
327
+ # f"Paper {i+1}",
328
+ # value=f"{similarity:.3f}",
329
+ # help="Cosine similarity (1.0 = identical, 0.0 = completely different)",
330
+ # )
331
+
332
+ # except Exception as e:
333
+ # st.error(f"Error during vectorization: {str(e)}")
334
+ # else:
335
+ # st.warning("Please analyze research gaps first")
336
+
337
+
338
+ # # Run the dashboard
339
+ # if __name__ == "__main__":
340
+ # display_research_assistant_dashboard()
341
+
342
+ import research_combine2
343
+ # if __name__ == "__main__":
344
+ # display_research_assistant_dashboard()
345
+ def display_research_assistant_dashboard():
346
+ research_combine2.display_research_assistant_dashboard()
347
+
348
+
349
+
research_combine.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import json
4
+ import requests
5
+ from dotenv import load_dotenv
6
+ from pymongo import MongoClient
7
+ from typing import Dict, Any
8
+
9
+ # Load environment variables
10
+ load_dotenv()
11
+ PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
12
+ PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
13
+ MONGODB_URI = os.getenv(
14
+ "MONGODB_UR",
15
+ "mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
16
+ )
17
+
18
+ # MongoDB setup
19
+ client = MongoClient(MONGODB_URI)
20
+ db = client["novascholar_db"]
21
+ collection = db["research_papers"]
22
+
23
+
24
+ def search_papers(topic: str, num_papers: int) -> str:
25
+ headers = {
26
+ "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
27
+ "Content-Type": "application/json",
28
+ }
29
+
30
+ prompt = f"""Find {num_papers} recent research papers about {topic}.
31
+ Return ONLY a valid JSON array with the following structure for each paper, no additional text:
32
+ [
33
+ {{
34
+ "Title": "paper title",
35
+ "Publication": "publication name",
36
+ "Journal_Conference": "venue name",
37
+ "Abstract": "abstract text",
38
+ "Keywords": "key terms",
39
+ "Author": "author names",
40
+ "Date_of_Publication": "publication date",
41
+ "Intro": "introduction summary",
42
+ "Literature_Review": "literature review summary",
43
+ "Research_Models_Used": "models description",
44
+ "Methodology": "methodology description",
45
+ "Discussion": "discussion summary",
46
+ "Future_Scope": "future work",
47
+ "Theory": "theoretical framework",
48
+ "Independent_Variables": "list of variables",
49
+ "nof_Independent_Variables": 0,
50
+ "Dependent_Variables": "list of variables",
51
+ "nof_Dependent_Variables": 0,
52
+ "Control_Variables": "list of variables",
53
+ "nof_Control_Variables": 0,
54
+ "Extraneous_Variables": "list of variables",
55
+ "nof_Extraneous_Variables": 0
56
+ }}
57
+ ]"""
58
+
59
+ payload = {
60
+ "model": "llama-3.1-sonar-small-128k-chat",
61
+ "messages": [
62
+ {
63
+ "role": "system",
64
+ "content": "You are a research paper analyzer that returns only valid JSON arrays.",
65
+ },
66
+ {"role": "user", "content": prompt},
67
+ ],
68
+ "temperature": 0.1,
69
+ }
70
+
71
+ try:
72
+ response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
73
+ response.raise_for_status()
74
+ content = response.json()["choices"][0]["message"]["content"]
75
+
76
+ # Clean response and ensure it's valid JSON
77
+ content = content.strip()
78
+ if not content.startswith("["):
79
+ content = content[content.find("[") :]
80
+ if not content.endswith("]"):
81
+ content = content[: content.rfind("]") + 1]
82
+
83
+ # Validate JSON
84
+ papers = json.loads(content)
85
+ if not isinstance(papers, list):
86
+ raise ValueError("Response is not a JSON array")
87
+
88
+ # Insert into MongoDB
89
+ if papers:
90
+ collection.insert_many(papers)
91
+ return content
92
+ return "[]"
93
+
94
+ except json.JSONDecodeError as e:
95
+ st.error(f"Invalid JSON response: {str(e)}")
96
+ return None
97
+ except Exception as e:
98
+ st.error(f"Error: {str(e)}")
99
+ return None
100
+
101
+
102
+ import research22
103
+ import keywords_database_download
104
+ import new_keywords
105
+ import infranew
106
+ import loldude
107
+ import new_research_paper
108
+ import research3
109
+ import entire_download
110
+
111
+
112
+ def main():
113
+ st.set_page_config(page_title="Research Papers", layout="wide")
114
+
115
+ st.title("Research Papers")
116
+
117
+ # Sidebar radio
118
+ option = st.sidebar.radio(
119
+ "Select an option",
120
+ [
121
+ "Search Papers",
122
+ "Upload Paper",
123
+ "Single Keyword Search",
124
+ "Multiple Keywords Search",
125
+ "Knowledge Graph",
126
+ "Cosine Similarity",
127
+ "Paper Generator",
128
+ "Paper from Topic",
129
+ "Download Entire Corpus",
130
+ ],
131
+ )
132
+
133
+ if option == "Search Papers":
134
+ st.subheader("Search and Store Papers")
135
+
136
+ topic = st.text_input("Enter research topic")
137
+ num_papers = st.number_input(
138
+ "Number of papers", min_value=1, max_value=10, value=5
139
+ )
140
+
141
+ if st.button("Search and Store"):
142
+ if topic:
143
+ with st.spinner(f"Searching and storing papers about {topic}..."):
144
+ results = search_papers(topic, num_papers)
145
+ if results:
146
+ st.success(
147
+ f"Successfully stored {num_papers} papers in MongoDB"
148
+ )
149
+ # Display results
150
+ papers = json.loads(results)
151
+ for paper in papers:
152
+ with st.expander(paper["Title"]):
153
+ for key, value in paper.items():
154
+ if key != "Title":
155
+ st.write(f"**{key}:** {value}")
156
+ else:
157
+ st.warning("Please enter a research topic")
158
+
159
+ # Add MongoDB connection status
160
+ if st.sidebar.button("Check Database Connection"):
161
+ try:
162
+ client.admin.command("ping")
163
+ print(MONGODB_URI)
164
+ st.sidebar.success("Connected to MongoDB")
165
+ except Exception as e:
166
+ st.sidebar.error(f"MongoDB Connection Error: {str(e)}")
167
+ elif option == "Single Keyword Search":
168
+ keywords_database_download.main()
169
+ elif option == "Multiple Keywords Search":
170
+ new_keywords.main()
171
+ elif option == "Knowledge Graph":
172
+ infranew.main()
173
+ elif option == "Cosine Similarity":
174
+ loldude.main()
175
+ elif option == "Paper Generator":
176
+ new_research_paper.main()
177
+ elif option == "Paper from Topic":
178
+ research3.main()
179
+ elif option == "Download Entire Corpus":
180
+ entire_download.main()
181
+ else:
182
+ # st.subheader("Blank Page")
183
+ # st.write("This is a placeholder for alternative content.")
184
+ research22.main()
185
+
186
+
187
+ if __name__ == "__main__":
188
+ main()
research_combine2.py ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import new_research_paper
2
+ import research3
3
+ import entire_download
4
+ import streamlit as st
5
+ import os
6
+ import json
7
+ import requests
8
+ from dotenv import load_dotenv
9
+ from pymongo import MongoClient
10
+ from typing import Dict, Any
11
+ import research22
12
+ import keywords_database_download
13
+ import new_keywords
14
+ import infranew
15
+ import loldude
16
+ import new_research_paper
17
+ import research3
18
+ import entire_download
19
+ import sciclone
20
+ import extract
21
+
22
+ # Load environment variables
23
+ load_dotenv()
24
+ PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
25
+ PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
26
+ MONGODB_URI = os.getenv(
27
+ "MONGODB_UR",
28
+ "mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
29
+ )
30
+
31
+ # MongoDB setup
32
+ client = MongoClient(MONGODB_URI)
33
+ db = client["novascholar_db"]
34
+
35
+
36
+ def search_papers(topic: str, num_papers: int, paper_type: str) -> str:
37
+ headers = {
38
+ "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
39
+ "Content-Type": "application/json",
40
+ }
41
+
42
+ attributes = {
43
+ "Review Based Paper": [
44
+ "Title",
45
+ "Publication",
46
+ "Journal_Conference",
47
+ "Abstract",
48
+ "Keywords",
49
+ "Author",
50
+ "Date_of_Publication",
51
+ "Intro",
52
+ "Literature_Review",
53
+ "Body",
54
+ "Protocol",
55
+ "Search String",
56
+ "Included Studies",
57
+ "Data Collection and Analysis Methods",
58
+ "Data Extraction Table",
59
+ "Synthesis and Analysis",
60
+ "Conclusion",
61
+ "Limitations",
62
+ "Results",
63
+ "References",
64
+ "Risk of Bias Assessment",
65
+ ],
66
+ "Opinion/Perspective Based Paper": [
67
+ "Title",
68
+ "Publication",
69
+ "Journal_Conference",
70
+ "Abstract",
71
+ "Keywords",
72
+ "Author",
73
+ "Date_of_Publication",
74
+ "Intro",
75
+ "Literature_Review",
76
+ "Introduction",
77
+ "Body",
78
+ "Results and Discussion",
79
+ "Conclusion",
80
+ "References",
81
+ ],
82
+ "Empirical Research Paper": [
83
+ "Title",
84
+ "Publication",
85
+ "Journal_Conference",
86
+ "Abstract",
87
+ "Keywords",
88
+ "Author",
89
+ "Date_of_Publication",
90
+ "Intro",
91
+ "Literature_Review",
92
+ "Introduction",
93
+ "Body",
94
+ "Methodology",
95
+ "Participants",
96
+ "Survey Instrument",
97
+ "Data Collection",
98
+ "Data Analysis",
99
+ "Results and Discussion",
100
+ "Conclusion",
101
+ "References",
102
+ ],
103
+ "Research Paper (Other)": [
104
+ "Title",
105
+ "Publication",
106
+ "Journal_Conference",
107
+ "Abstract",
108
+ "Keywords",
109
+ "Author",
110
+ "Date_of_Publication",
111
+ "Intro",
112
+ "Literature_Review",
113
+ "Research_Models_Used",
114
+ "Methodology",
115
+ "Discussion",
116
+ "Future_Scope",
117
+ "Theory",
118
+ "Independent_Variables",
119
+ "nof_Independent_Variables",
120
+ "Dependent_Variables",
121
+ "nof_Dependent_Variables",
122
+ "Control_Variables",
123
+ "Extraneous_Variables",
124
+ "nof_Control_Variables",
125
+ "nof_Extraneous_Variables",
126
+ ],
127
+ }
128
+
129
+ selected_attributes = attributes[paper_type]
130
+ prompt = f"""Find {num_papers} recent research papers about {topic}.
131
+ Return ONLY a valid JSON array with the following structure for each paper, no additional text:
132
+ [{{
133
+ {", ".join([f'"{attr}": "value"' for attr in selected_attributes])}
134
+ }}]"""
135
+
136
+ payload = {
137
+ "model": "llama-3.1-sonar-small-128k-chat",
138
+ "messages": [
139
+ {
140
+ "role": "system",
141
+ "content": "You are a research paper analyzer that returns only valid JSON arrays.",
142
+ },
143
+ {"role": "user", "content": prompt},
144
+ ],
145
+ "temperature": 0.1,
146
+ }
147
+
148
+ try:
149
+ response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
150
+ response.raise_for_status()
151
+ content = response.json()["choices"][0]["message"]["content"]
152
+
153
+ # Clean response and ensure it's valid JSON
154
+ content = content.strip()
155
+ if not content.startswith("["):
156
+ content = content[content.find("[") :]
157
+ if not content.endswith("]"):
158
+ content = content[: content.rfind("]") + 1]
159
+
160
+ # Validate JSON
161
+ papers = json.loads(content)
162
+ if not isinstance(papers, list):
163
+ raise ValueError("Response is not a JSON array")
164
+
165
+ # Insert into MongoDB
166
+ collection = db[paper_type.replace(" ", "_").lower()]
167
+ if papers:
168
+ collection.insert_many(papers)
169
+ return content
170
+ return "[]"
171
+
172
+ except json.JSONDecodeError as e:
173
+ st.error(f"Invalid JSON response: {str(e)}")
174
+ return None
175
+ except Exception as e:
176
+ st.error(f"Error: {str(e)}")
177
+ return None
178
+
179
+
180
+ def display_research_assistant_dashboard():
181
+ #st.set_page_config(page_title="Research Papers", layout="wide")
182
+
183
+ # st.title("Research Papers")
184
+
185
+ # Sidebar radio
186
+ option = st.sidebar.radio(
187
+ "Select an option",
188
+ [
189
+ "Search Papers",
190
+ "Upload Paper",
191
+ "Single Keyword Search",
192
+ "Multiple Keywords Search",
193
+ "Knowledge Graph",
194
+ "Cosine Similarity",
195
+ "Paper Generator",
196
+ "Paper from Topic",
197
+ "Download Entire Corpus",
198
+ "Research Copilot",
199
+ "Research Paper Analysis Tool",
200
+ ],
201
+ )
202
+
203
+ if option == "Search Papers":
204
+ st.subheader("Search and Store Papers")
205
+
206
+ topic = st.text_input("Enter research topic")
207
+ num_papers = st.number_input(
208
+ "Number of papers", min_value=1, max_value=10, value=5
209
+ )
210
+ paper_type = st.selectbox(
211
+ "Select type of research paper",
212
+ [
213
+ "Review Based Paper",
214
+ "Opinion/Perspective Based Paper",
215
+ "Empirical Research Paper",
216
+ "Research Paper (Other)",
217
+ ],
218
+ )
219
+
220
+ if st.button("Search and Store"):
221
+ if topic:
222
+ with st.spinner(f"Searching and storing papers about {topic}..."):
223
+ results = search_papers(topic, num_papers, paper_type)
224
+ if results:
225
+ st.success(
226
+ f"Successfully stored {num_papers} papers in MongoDB"
227
+ )
228
+ # Display results
229
+ papers = json.loads(results)
230
+ for paper in papers:
231
+ with st.expander(paper["Title"]):
232
+ for key, value in paper.items():
233
+ if key != "Title":
234
+ st.write(f"**{key}:** {value}")
235
+ else:
236
+ st.warning("Please enter a research topic")
237
+
238
+ # Add MongoDB connection status
239
+ if st.sidebar.button("Check Database Connection"):
240
+ try:
241
+ client.admin.command("ping")
242
+ print(MONGODB_URI)
243
+ st.sidebar.success("Connected to MongoDB")
244
+ except Exception as e:
245
+ st.sidebar.error(f"MongoDB Connection Error: {str(e)}")
246
+ elif option == "Single Keyword Search":
247
+ keywords_database_download.main()
248
+ elif option == "Multiple Keywords Search":
249
+ new_keywords.main()
250
+ elif option == "Knowledge Graph":
251
+ infranew.main()
252
+ elif option == "Cosine Similarity":
253
+ loldude.main()
254
+ elif option == "Paper Generator":
255
+ new_research_paper.main()
256
+ elif option == "Paper from Topic":
257
+ research3.main()
258
+ elif option == "Download Entire Corpus":
259
+ entire_download.main()
260
+ elif option == "Research Copilot":
261
+ sciclone.main()
262
+ elif option == "Research Paper Analysis Tool":
263
+ extract.main()
264
+ else:
265
+ research22.main()
266
+
267
+
268
+ if __name__ == "__main__":
269
+ display_research_assistant_dashboard()
rubrics.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from pymongo import MongoClient
3
+ from openai import OpenAI
4
+ from bson import ObjectId
5
+ import json
6
+ from dotenv import load_dotenv
7
+ import os
8
+
9
+ load_dotenv()
10
+ MONGO_URI = os.getenv('MONGO_URI')
11
+ OPENAI_API_KEY = os.getenv('OPENAI_KEY')
12
+
13
+ client = MongoClient(MONGO_URI)
14
+ db = client['novascholar_db']
15
+ # db.create_collection("rubrics")
16
+ rubrics_collection = db['rubrics']
17
+ resources_collection = db['resources']
18
+ courses_collection = db['courses']
19
+
20
+ def generate_rubrics(api_key, session_title, outcome_description, taxonomy, pre_class_material):
21
+ prompt = f"""
22
+ You are an expert educational AI assistant specializing in instructional design. Generate a detailed rubric for the session titled "{session_title}". The rubric should be aligned with Bloom's Taxonomy level "{taxonomy}" and use numerical scoring levels (4,3,2,1) instead of descriptive levels. Use the following context:
23
+
24
+ Session Outcome Description:
25
+ {outcome_description}
26
+
27
+ Pre-class Material:
28
+ {pre_class_material}
29
+
30
+ Please generate the rubric in JSON format with these specifications:
31
+ 1. Use numerical levels (4=Highest, 1=Lowest) instead of descriptive levels
32
+ 2. Include 4-5 relevant criteria based on the session outcome
33
+ 3. Each criterion should have clear descriptors for each numerical level
34
+ 4. Focus on objectively measurable aspects for evaluation
35
+ 5. Structure should be suitable for evaluating assignments and test answers
36
+
37
+ ***IMPORTANT: DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.***
38
+ """
39
+
40
+ messages = [
41
+ {
42
+ "role": "system",
43
+ "content": "You are an expert educational AI assistant specializing in instructional design.",
44
+ },
45
+ {
46
+ "role": "user",
47
+ "content": prompt
48
+ },
49
+ ]
50
+
51
+ try:
52
+ client = OpenAI(api_key=api_key)
53
+ response = client.chat.completions.create(
54
+ model="gpt-4-0125-preview",
55
+ messages=messages
56
+ )
57
+ return response.choices[0].message.content
58
+ except Exception as e:
59
+ st.error(f"Failed to generate rubrics: {e}")
60
+ return None
61
+
62
+ def display_rubrics_tab(session, course_id):
63
+ st.subheader("Generated Rubrics")
64
+
65
+ # Fetch session details from the courses collection
66
+ course_data = courses_collection.find_one(
67
+ {"course_id": course_id, "sessions.session_id": session['session_id']},
68
+ {"sessions.$": 1}
69
+ )
70
+
71
+ if course_data and 'sessions' in course_data and len(course_data['sessions']) > 0:
72
+ session_data = course_data['sessions'][0]
73
+
74
+ # Extract session learning outcomes
75
+ if 'session_learning_outcomes' in session_data and len(session_data['session_learning_outcomes']) > 0:
76
+ outcome = session_data['session_learning_outcomes'][0]
77
+ outcome_description = outcome.get('outcome_description', '')
78
+ taxonomy_level = outcome.get('bloom_taxonomy_level', '')
79
+
80
+ # Display fetched information
81
+ st.markdown("### Session Information")
82
+ st.markdown(f"**Session Title:** {session['title']}")
83
+ st.markdown(f"**Learning Outcome:** {outcome_description}")
84
+ st.markdown(f"**Taxonomy Level:** {taxonomy_level}")
85
+
86
+ # Fetch pre-class material
87
+ pre_class_material_docs = resources_collection.find({"session_id": session['session_id']})
88
+ pre_class_material = "\n".join([f"{doc.get('title', 'No Title')}: {doc.get('url', 'No URL')}" for doc in pre_class_material_docs])
89
+
90
+ if st.button("Generate Rubric"):
91
+ rubric = generate_rubrics(
92
+ OPENAI_API_KEY,
93
+ session['title'],
94
+ outcome_description,
95
+ taxonomy_level,
96
+ pre_class_material
97
+ )
98
+
99
+ if rubric:
100
+ st.json(rubric)
101
+ if st.button("Save Rubric"):
102
+ rubric_data = {
103
+ "course_id": course_id,
104
+ "session_id": session['session_id'],
105
+ "rubric": json.loads(rubric)
106
+ }
107
+ rubrics_collection.insert_one(rubric_data)
108
+ st.success("Rubric saved successfully!")
109
+ else:
110
+ st.error("No learning outcomes found for this session")
111
+ else:
112
+ st.error("Session data not found")
sciclone.py ADDED
@@ -0,0 +1,466 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import PyPDF2
4
+ from typing import Optional, Dict, List
5
+ import json
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from concurrent.futures import ThreadPoolExecutor
8
+ import xml.etree.ElementTree as ET
9
+ import re
10
+ from datetime import datetime
11
+ import time
12
+ from dotenv import load_dotenv
13
+ import os
14
+ import pandas as pd
15
+
16
+ # Load environment variables
17
+ load_dotenv()
18
+ PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
19
+ PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
20
+ SAPLING_API_KEY = os.getenv("SAPLING_API_KEY")
21
+
22
+
23
+ def call_perplexity_api(prompt: str) -> str:
24
+ """Call Perplexity AI with a prompt, return the text response if successful."""
25
+ headers = {
26
+ "Authorization": f"Bearer {PERPLEXITY_API_KEY}",
27
+ "Content-Type": "application/json",
28
+ }
29
+
30
+ payload = {
31
+ "model": "llama-3.1-sonar-small-128k-chat",
32
+ "messages": [{"role": "user", "content": prompt}],
33
+ "temperature": 0.3,
34
+ }
35
+
36
+ try:
37
+ response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
38
+ response.raise_for_status()
39
+ return response.json()["choices"][0]["message"]["content"]
40
+ except Exception as e:
41
+ st.error(f"API Error: {str(e)}")
42
+ return ""
43
+
44
+
45
+ def extract_text_from_pdf(pdf_file):
46
+ """Extract text content from a PDF file."""
47
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
48
+ text = ""
49
+ for page in pdf_reader.pages:
50
+ text += page.extract_text() + "\n"
51
+ return text
52
+
53
+
54
+ def analyze_paper(text: str, category: str) -> str:
55
+ """Generate a prompt and get analysis for a specific category."""
56
+ prompts = {
57
+ "Summarized Abstract": "Extract and summarize the abstract from this research paper:",
58
+ "Results": "What are the main results and findings from this research paper:",
59
+ "Summarized Introduction": "Summarize the introduction section of this research paper:",
60
+ "Methods Used": "What are the main methods and methodologies used in this research:",
61
+ "Literature Survey": "Summarize the literature review or related work from this paper:",
62
+ "Limitations": "What are the limitations mentioned in this research:",
63
+ "Contributions": "What are the main contributions of this research:",
64
+ "Practical Implications": "What are the practical implications of this research:",
65
+ "Objectives": "What are the main objectives of this research:",
66
+ "Findings": "What are the key findings from this research:",
67
+ "Future Research": "What future research directions are suggested in this paper:",
68
+ "Dependent Variables": "What are the dependent variables studied in this research:",
69
+ "Independent Variables": "What are the independent variables studied in this research:",
70
+ "Dataset": "What dataset(s) were used in this research:",
71
+ "Problem Statement": "What is the main problem statement or research question:",
72
+ "Challenges": "What challenges were faced or addressed in this research:",
73
+ "Applications": "What are the potential applications of this research:",
74
+ }
75
+
76
+ prompt = f"{prompts[category]}\n\nPaper text: {text[:5000]}" # Limit text to avoid token limits
77
+ return call_perplexity_api(prompt)
78
+
79
+
80
+ class ResearchAssistant:
81
+ def __init__(self, perplexity_key: str):
82
+ self.perplexity_key = perplexity_key
83
+
84
+ def chat_with_pdf(self, pdf_text: str, query: str) -> Dict:
85
+ chunks = self._split_text(pdf_text)
86
+ relevant_chunks = self._get_relevant_chunks(chunks, query)
87
+
88
+ prompt = f"Context from PDF:\n\n{relevant_chunks}\n\nQuestion: {query}"
89
+ response_text = call_perplexity_api(prompt)
90
+ return {"choices": [{"message": {"content": response_text}}]}
91
+
92
+ def generate_literature_review(self, topic: str) -> Dict:
93
+ try:
94
+ # Search arXiv for papers
95
+ papers = self._search_arxiv(topic)
96
+ if not papers:
97
+ return {"error": "No papers found on the topic"}
98
+
99
+ # Format paper information
100
+ papers_summary = "\n\n".join(
101
+ [
102
+ f"Paper: {p['title']}\nAuthors: {', '.join(p['authors'])}\nSummary: {p['summary']}"
103
+ for p in papers
104
+ ]
105
+ )
106
+
107
+ prompt = f"""Generate a comprehensive literature review on '{topic}'. Based on these papers:
108
+
109
+ {papers_summary}
110
+
111
+ Structure the review as follows:
112
+ 1. Introduction and Background
113
+ 2. Current Research Trends
114
+ 3. Key Findings and Themes
115
+ 4. Research Gaps
116
+ 5. Future Directions"""
117
+
118
+ response_text = call_perplexity_api(prompt)
119
+ return {"choices": [{"message": {"content": response_text}}]}
120
+ except Exception as e:
121
+ return {"error": f"Literature review generation failed: {str(e)}"}
122
+
123
+ def ai_writer(self, outline: str, references: List[str]) -> Dict:
124
+ prompt = f"""Write a research paper following this structure:
125
+
126
+ Outline:
127
+ {outline}
128
+
129
+ References to incorporate:
130
+ {json.dumps(references)}
131
+
132
+ Instructions:
133
+ - Follow academic writing style
134
+ - Include appropriate citations
135
+ - Maintain logical flow
136
+ - Include introduction and conclusion"""
137
+
138
+ response_text = call_perplexity_api(prompt)
139
+ return {"choices": [{"message": {"content": response_text}}]}
140
+
141
+ def refine_response(self, response: str, column: str) -> str:
142
+ prompt = f"""Refine the following response to fit the '{column}' column in a research paper CSV format:
143
+
144
+ Response: {response}
145
+
146
+ Ensure the response is clear, concise, and fits the context of the column."""
147
+
148
+ refined_response = call_perplexity_api(prompt)
149
+ return refined_response
150
+
151
+ def paraphrase(self, text: str) -> Dict:
152
+ prompt = f"""Paraphrase the following text while:
153
+ - Maintaining academic tone
154
+ - Preserving key meaning
155
+ - Improving clarity
156
+
157
+ Text: {text}"""
158
+
159
+ response_text = call_perplexity_api(prompt)
160
+ return {"choices": [{"message": {"content": response_text}}]}
161
+
162
+ def generate_citation(self, paper_info: Dict, style: str = "APA") -> Dict:
163
+ prompt = f"""Generate a {style} citation for:
164
+ Title: {paper_info['title']}
165
+ Authors: {', '.join(paper_info['authors'])}
166
+ Year: {paper_info['year']}
167
+
168
+ Follow exact {style} format guidelines."""
169
+
170
+ response_text = call_perplexity_api(prompt)
171
+ return {"citation": response_text}
172
+
173
+ def detect_ai_content(self, text: str) -> Dict:
174
+ prompt = f"""You are an AI content detector. Analyze the text for:
175
+ 1. Writing style consistency
176
+ 2. Language patterns
177
+ 3. Contextual coherence
178
+ 4. Common AI patterns
179
+ Provide a clear analysis with confidence level.
180
+
181
+ Text: {text}"""
182
+
183
+ response = requests.post(
184
+ "https://api.sapling.ai/api/v1/aidetect",
185
+ json={"key": SAPLING_API_KEY, "text": text},
186
+ )
187
+ st.info(
188
+ "A score from 0 to 1 will be returned, with 0 indicating the maximum confidence that the text is human-written, and 1 indicating the maximum confidence that the text is AI-generated."
189
+ )
190
+
191
+ if response.status_code == 200:
192
+ return {"choices": [{"message": {"content": response.json()}}]}
193
+ else:
194
+ return {
195
+ "error": f"Sapling API Error: {response.status_code} - {response.text}"
196
+ }
197
+
198
+ def _split_text(self, text: str) -> List[str]:
199
+ splitter = RecursiveCharacterTextSplitter(
200
+ chunk_size=1000, chunk_overlap=200, separators=["\n\n", "\n", ". ", " ", ""]
201
+ )
202
+ return splitter.split_text(text)
203
+
204
+ def _get_relevant_chunks(self, chunks: List[str], query: str) -> str:
205
+ # Simple keyword-based relevance scoring
206
+ query_words = set(query.lower().split())
207
+ scored_chunks = []
208
+
209
+ for chunk in chunks:
210
+ chunk_words = set(chunk.lower().split())
211
+ score = len(query_words.intersection(chunk_words))
212
+ scored_chunks.append((score, chunk))
213
+
214
+ scored_chunks.sort(reverse=True)
215
+ return "\n\n".join(chunk for _, chunk in scored_chunks[:3])
216
+
217
+ def _search_arxiv(self, topic: str) -> List[Dict]:
218
+ try:
219
+ query = "+AND+".join(topic.split())
220
+ url = f"http://export.arxiv.org/api/query?search_query=all:{query}&start=0&max_results=5"
221
+ response = requests.get(url, timeout=10)
222
+ response.raise_for_status()
223
+ return self._parse_arxiv_response(response.text)
224
+ except Exception as e:
225
+ print(f"arXiv search failed: {str(e)}")
226
+ return []
227
+
228
+ def _parse_arxiv_response(self, response_text: str) -> List[Dict]:
229
+ try:
230
+ root = ET.fromstring(response_text)
231
+ papers = []
232
+ for entry in root.findall("{http://www.w3.org/2005/Atom}entry"):
233
+ paper = {
234
+ "id": entry.find("{http://www.w3.org/2005/Atom}id").text,
235
+ "title": entry.find(
236
+ "{http://www.w3.org/2005/Atom}title"
237
+ ).text.strip(),
238
+ "summary": entry.find(
239
+ "{http://www.w3.org/2005/Atom}summary"
240
+ ).text.strip(),
241
+ "authors": [
242
+ author.find("{http://www.w3.org/2005/Atom}name").text.strip()
243
+ for author in entry.findall(
244
+ "{http://www.w3.org/2005/Atom}author"
245
+ )
246
+ ],
247
+ "published": entry.find(
248
+ "{http://www.w3.org/2005/Atom}published"
249
+ ).text[:10],
250
+ }
251
+ papers.append(paper)
252
+ return papers
253
+ except Exception as e:
254
+ print(f"arXiv response parsing failed: {str(e)}")
255
+ return []
256
+
257
+
258
+ def main():
259
+ # st.set_page_config(page_title="Research Assistant", layout="wide")
260
+ st.title("Research Copilot")
261
+
262
+ if not PERPLEXITY_API_KEY:
263
+ st.warning("Perplexity API key not found in environment variables.")
264
+ return
265
+
266
+ assistant = ResearchAssistant(PERPLEXITY_API_KEY)
267
+
268
+ tabs = st.tabs(
269
+ [
270
+ "Chat with PDF",
271
+ "Literature Review",
272
+ "AI Writer",
273
+ "Extract Data",
274
+ "Paraphraser",
275
+ "Citation Generator",
276
+ "AI Detector",
277
+ ]
278
+ )
279
+
280
+ with tabs[0]: # Chat with PDF
281
+ st.header("Chat with PDF")
282
+
283
+ # File uploader with clear button
284
+ col1, col2 = st.columns([3, 1])
285
+ with col1:
286
+ uploaded_file = st.file_uploader("Upload PDF", type="pdf", key="pdf_chat")
287
+ with col2:
288
+ if st.button("Clear PDF"):
289
+ st.session_state.pop("pdf_text", None)
290
+ st.rerun()
291
+
292
+ if uploaded_file:
293
+ if "pdf_text" not in st.session_state:
294
+ with st.spinner("Processing PDF..."):
295
+ reader = PyPDF2.PdfReader(uploaded_file)
296
+ st.session_state.pdf_text = ""
297
+ for page in reader.pages:
298
+ st.session_state.pdf_text += page.extract_text()
299
+ st.success("PDF processed successfully!")
300
+
301
+ query = st.text_input("Ask a question about the PDF")
302
+ if query:
303
+ with st.spinner("Analyzing..."):
304
+ response = assistant.chat_with_pdf(st.session_state.pdf_text, query)
305
+ if "error" in response:
306
+ st.error(response["error"])
307
+ else:
308
+ st.write(response["choices"][0]["message"]["content"])
309
+
310
+ with tabs[1]: # Literature Review
311
+ st.header("Literature Review")
312
+ topic = st.text_input("Enter research topic")
313
+ if st.button("Generate Review") and topic:
314
+ with st.spinner("Generating literature review..."):
315
+ review = assistant.generate_literature_review(topic)
316
+ if "error" in review:
317
+ st.error(review["error"])
318
+ else:
319
+ st.write(review["choices"][0]["message"]["content"])
320
+
321
+ with tabs[2]: # AI Writer
322
+ st.header("AI Writer")
323
+ outline = st.text_area("Enter paper outline")
324
+ references = st.text_area("Enter references (one per line)")
325
+ if st.button("Generate Paper") and outline:
326
+ with st.spinner("Writing paper..."):
327
+ paper = assistant.ai_writer(outline, references.split("\n"))
328
+ if "error" in paper:
329
+ st.error(paper["error"])
330
+ else:
331
+ st.write(paper["choices"][0]["message"]["content"])
332
+
333
+ with tabs[3]: # Extract Data
334
+ st.header("Extract Data")
335
+
336
+ uploaded_files = st.file_uploader(
337
+ "Upload multiple PDF files", type="pdf", accept_multiple_files=True
338
+ )
339
+
340
+ if uploaded_files:
341
+ if st.button("Process Papers"):
342
+ # Initialize progress bar
343
+ progress_bar = st.progress(0)
344
+ status_text = st.empty()
345
+
346
+ # Initialize results dictionary
347
+ results = []
348
+
349
+ # Define categories
350
+ categories = [
351
+ "Summarized Abstract",
352
+ "Results",
353
+ "Summarized Introduction",
354
+ "Methods Used",
355
+ "Literature Survey",
356
+ "Limitations",
357
+ "Contributions",
358
+ "Practical Implications",
359
+ "Objectives",
360
+ "Findings",
361
+ "Future Research",
362
+ "Dependent Variables",
363
+ "Independent Variables",
364
+ "Dataset",
365
+ "Problem Statement",
366
+ "Challenges",
367
+ "Applications",
368
+ ]
369
+
370
+ # Process each file
371
+ for i, file in enumerate(uploaded_files):
372
+ status_text.text(f"Processing {file.name}...")
373
+
374
+ # Extract text from PDF
375
+ text = extract_text_from_pdf(file)
376
+
377
+ # Initialize paper results
378
+ paper_results = {"Filename": file.name}
379
+
380
+ # Analyze each category
381
+ for j, category in enumerate(categories):
382
+ status_text.text(f"Processing {file.name} - {category}")
383
+ paper_results[category] = analyze_paper(text, category)
384
+
385
+ # Update progress
386
+ progress = (i * len(categories) + j + 1) / (
387
+ len(uploaded_files) * len(categories)
388
+ )
389
+ progress_bar.progress(progress)
390
+
391
+ # Add small delay to avoid API rate limits
392
+ time.sleep(1)
393
+
394
+ results.append(paper_results)
395
+
396
+ # Create DataFrame
397
+ df = pd.DataFrame(results)
398
+
399
+ # Convert DataFrame to CSV
400
+ csv = df.to_csv(index=False)
401
+
402
+ # Create download button
403
+ st.download_button(
404
+ label="Download Results as CSV",
405
+ data=csv,
406
+ file_name="research_papers_analysis.csv",
407
+ mime="text/csv",
408
+ )
409
+
410
+ # Display results in the app
411
+ st.subheader("Analysis Results")
412
+ st.dataframe(df)
413
+
414
+ status_text.text("Processing complete!")
415
+ progress_bar.progress(1.0)
416
+
417
+ with tabs[4]: # Paraphraser
418
+ st.header("Paraphraser")
419
+ text = st.text_area("Enter text to paraphrase")
420
+ if st.button("Paraphrase") and text:
421
+ with st.spinner("Paraphrasing..."):
422
+ result = assistant.paraphrase(text)
423
+ if "error" in result:
424
+ st.error(result["error"])
425
+ else:
426
+ st.write(result["choices"][0]["message"]["content"])
427
+
428
+ with tabs[5]: # Citation Generator
429
+ st.header("Citation Generator")
430
+ col1, col2 = st.columns(2)
431
+ with col1:
432
+ title = st.text_input("Paper Title")
433
+ authors = st.text_input("Authors (comma-separated)")
434
+ with col2:
435
+ year = st.text_input("Year")
436
+ style = st.selectbox("Citation Style", ["APA", "MLA", "Chicago"])
437
+
438
+ if st.button("Generate Citation") and title:
439
+ with st.spinner("Generating citation..."):
440
+ citation = assistant.generate_citation(
441
+ {
442
+ "title": title,
443
+ "authors": [a.strip() for a in authors.split(",")],
444
+ "year": year,
445
+ },
446
+ style,
447
+ )
448
+ if "error" in citation:
449
+ st.error(citation["error"])
450
+ else:
451
+ st.code(citation["citation"], language="text")
452
+
453
+ with tabs[6]: # AI Detector
454
+ st.header("AI Detector")
455
+ text = st.text_area("Enter text to analyze")
456
+ if st.button("Detect AI Content") and text:
457
+ with st.spinner("Analyzing..."):
458
+ result = assistant.detect_ai_content(text)
459
+ if "error" in result:
460
+ st.error(result["error"])
461
+ else:
462
+ st.write(result["choices"][0]["message"]["content"])
463
+
464
+
465
+ if __name__ == "__main__":
466
+ main()
session_page.py CHANGED
The diff for this file is too large to render. See raw diff
 
subjective_test_evaluation.py ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from datetime import datetime
3
+ from pymongo import MongoClient
4
+ import os
5
+ from openai import OpenAI
6
+ from dotenv import load_dotenv
7
+ from bson import ObjectId
8
+
9
+ load_dotenv()
10
+
11
+ # MongoDB setup
12
+ MONGO_URI = os.getenv('MONGO_URI')
13
+ client = MongoClient(MONGO_URI)
14
+ db = client["novascholar_db"]
15
+ subjective_tests_collection = db["subjective_tests"]
16
+ subjective_test_evaluation_collection = db["subjective_test_evaluation"]
17
+ resources_collection = db["resources"]
18
+ students_collection = db["students"]
19
+
20
+ def evaluate_subjective_answers(session_id, student_id, test_id):
21
+ """
22
+ Generate evaluation and analysis for subjective test answers
23
+ """
24
+ try:
25
+ # Fetch test and student submission
26
+ test = subjective_tests_collection.find_one({"_id": test_id})
27
+ if not test:
28
+ return None
29
+
30
+ # Find student's submission
31
+ submission = next(
32
+ (sub for sub in test.get('submissions', [])
33
+ if sub['student_id'] == str(student_id)),
34
+ None
35
+ )
36
+ if not submission:
37
+ return None
38
+
39
+ # Fetch pre-class materials
40
+ pre_class_materials = resources_collection.find({"session_id": session_id})
41
+ pre_class_content = ""
42
+ for material in pre_class_materials:
43
+ if 'text_content' in material:
44
+ pre_class_content += material['text_content'] + "\n"
45
+
46
+ # Default rubric (can be customized later)
47
+ default_rubric = """
48
+ 1. Content Understanding (1-4):
49
+ - Demonstrates comprehensive understanding of core concepts
50
+ - Accurately applies relevant theories and principles
51
+ - Provides specific examples and evidence
52
+
53
+ 2. Critical Analysis (1-4):
54
+ - Shows depth of analysis
55
+ - Makes meaningful connections
56
+ - Demonstrates original thinking
57
+
58
+ 3. Organization & Clarity (1-4):
59
+ - Clear structure and flow
60
+ - Well-developed arguments
61
+ - Effective use of examples
62
+ """
63
+
64
+ # Initialize OpenAI client
65
+ client = OpenAI(api_key=os.getenv('OPENAI_KEY'))
66
+
67
+ evaluations = []
68
+ for i, (question, answer) in enumerate(zip(test['questions'], submission['answers'])):
69
+ analysis_content = f"""
70
+ Question: {question['question']}
71
+ Student Answer: {answer}
72
+ """
73
+
74
+ prompt_template = f"""As an educational assessor, evaluate this student's answer based on the provided rubric criteria and pre-class materials. Follow these assessment guidelines:
75
+
76
+ 1. Evaluation Process:
77
+ - Use each rubric criterion (scored 1-4) for internal assessment
78
+ - Compare response with pre-class materials
79
+ - Check alignment with all rubric requirements
80
+ - Calculate final score: sum of criteria scores converted to 10-point scale
81
+
82
+ Pre-class Materials:
83
+ {pre_class_content[:1000]} # Truncate to avoid token limits
84
+
85
+ Rubric Criteria:
86
+ {default_rubric}
87
+
88
+ Question and Answer:
89
+ {analysis_content}
90
+
91
+ Provide your assessment in the following format:
92
+
93
+ **Score and Evidence**
94
+ - Score: [X]/10
95
+ - Evidence for deduction: [One-line reference to most significant gap or inaccuracy]
96
+
97
+ **Key Areas for Improvement**
98
+ - [Concise improvement point 1]
99
+ - [Concise improvement point 2]
100
+ - [Concise improvement point 3]
101
+ """
102
+
103
+ # Generate evaluation using OpenAI
104
+ response = client.chat.completions.create(
105
+ model="gpt-4o-mini",
106
+ messages=[{"role": "user", "content": prompt_template}],
107
+ max_tokens=500,
108
+ temperature=0.4
109
+ )
110
+
111
+ evaluations.append({
112
+ "question_number": i + 1,
113
+ "question": question['question'],
114
+ "answer": answer,
115
+ "evaluation": response.choices[0].message.content
116
+ })
117
+
118
+ # Store evaluation in MongoDB
119
+ evaluation_doc = {
120
+ "test_id": test_id,
121
+ "student_id": student_id,
122
+ "session_id": session_id,
123
+ "evaluations": evaluations,
124
+ "evaluated_at": datetime.utcnow()
125
+ }
126
+
127
+ subjective_test_evaluation_collection.insert_one(evaluation_doc)
128
+ return evaluation_doc
129
+
130
+ except Exception as e:
131
+ print(f"Error in evaluate_subjective_answers: {str(e)}")
132
+ return None
133
+
134
+ def display_evaluation_to_faculty(session_id, student_id, course_id):
135
+ """
136
+ Display interface for faculty to generate and view evaluations
137
+ """
138
+ st.header("Evaluate Subjective Tests")
139
+
140
+ try:
141
+ # Fetch available tests
142
+ tests = list(subjective_tests_collection.find({
143
+ "session_id": str(session_id),
144
+ "status": "active"
145
+ }))
146
+
147
+ if not tests:
148
+ st.info("No subjective tests found for this session.")
149
+ return
150
+
151
+ # Select test
152
+ test_options = {
153
+ f"{test['title']} (Created: {test['created_at'].strftime('%Y-%m-%d %H:%M')})" if 'created_at' in test else test['title']: test['_id']
154
+ for test in tests
155
+ }
156
+
157
+ if test_options:
158
+ selected_test = st.selectbox(
159
+ "Select Test to Evaluate",
160
+ options=list(test_options.keys())
161
+ )
162
+
163
+ if selected_test:
164
+ test_id = test_options[selected_test]
165
+ test = subjective_tests_collection.find_one({"_id": test_id})
166
+
167
+ if test:
168
+ submissions = test.get('submissions', [])
169
+ if not submissions:
170
+ st.warning("No submissions found for this test.")
171
+ return
172
+
173
+ # Create a dropdown for student submissions
174
+ student_options = {
175
+ f"{students_collection.find_one({'_id': ObjectId(sub['student_id'])})['full_name']} (Submitted: {sub['submitted_at'].strftime('%Y-%m-%d %H:%M')})": sub['student_id']
176
+ for sub in submissions
177
+ }
178
+
179
+ selected_student = st.selectbox(
180
+ "Select Student Submission",
181
+ options=list(student_options.keys())
182
+ )
183
+
184
+ if selected_student:
185
+ student_id = student_options[selected_student]
186
+ submission = next(sub for sub in submissions if sub['student_id'] == student_id)
187
+
188
+ st.markdown(f"**Submission Date:** {submission.get('submitted_at', 'No submission date')}")
189
+ st.markdown("---")
190
+
191
+ # Display questions and answers
192
+ st.subheader("Submission Details")
193
+ for i, (question, answer) in enumerate(zip(test['questions'], submission['answers'])):
194
+ st.markdown(f"**Question {i+1}:** {question['question']}")
195
+ st.markdown(f"**Answer:** {answer}")
196
+ st.markdown("---")
197
+
198
+ # Check for existing evaluation
199
+ existing_eval = subjective_test_evaluation_collection.find_one({
200
+ "test_id": test_id,
201
+ "student_id": student_id,
202
+ "session_id": str(session_id)
203
+ })
204
+
205
+ if existing_eval:
206
+ st.subheader("Evaluation Results")
207
+ for eval_item in existing_eval['evaluations']:
208
+ st.markdown(f"### Evaluation for Question {eval_item['question_number']}")
209
+ st.markdown(eval_item['evaluation'])
210
+ st.markdown("---")
211
+
212
+ st.success("✓ Evaluation completed")
213
+ if st.button("Regenerate Evaluation", key=f"regenerate_{student_id}_{test_id}"):
214
+ with st.spinner("Regenerating evaluation..."):
215
+ evaluation = evaluate_subjective_answers(
216
+ str(session_id),
217
+ student_id,
218
+ test_id
219
+ )
220
+ if evaluation:
221
+ st.success("Evaluation regenerated successfully!")
222
+ st.rerun()
223
+ else:
224
+ st.error("Error regenerating evaluation.")
225
+ else:
226
+ st.subheader("Generate Evaluation")
227
+ if st.button("Generate Evaluation", key=f"evaluate_{student_id}_{test_id}"):
228
+ with st.spinner("Generating evaluation..."):
229
+ evaluation = evaluate_subjective_answers(
230
+ str(session_id),
231
+ student_id,
232
+ test_id
233
+ )
234
+ if evaluation:
235
+ st.success("Evaluation generated successfully!")
236
+ st.markdown("### Generated Evaluation")
237
+ for eval_item in evaluation['evaluations']:
238
+ st.markdown(f"#### Question {eval_item['question_number']}")
239
+ st.markdown(eval_item['evaluation'])
240
+ st.markdown("---")
241
+ st.rerun()
242
+ else:
243
+ st.error("Error generating evaluation.")
244
+
245
+ except Exception as e:
246
+ st.error(f"An error occurred while loading the evaluations: {str(e)}")
247
+ print(f"Error in display_evaluation_to_faculty: {str(e)}")
ui.py CHANGED
@@ -1,111 +1,111 @@
1
- import streamlit as st
2
- from streamlit_option_menu import option_menu
3
-
4
-
5
- # Page Configuration
6
- st.set_page_config(page_title="Enhanced Navigation Demo", layout="wide")
7
-
8
- # Top Navigation Bar using option_menu
9
- selected = option_menu(
10
- menu_title=None,
11
- options=["Home", "Documentation", "Examples", "Community", "About"],
12
- icons=["house", "book", "code", "people", "info-circle"],
13
- menu_icon="cast",
14
- default_index=0,
15
- orientation="horizontal",
16
- styles={
17
- "container": {"padding": "0!important", "background-color": "#fafafa"},
18
- "icon": {"color": "orange", "font-size": "25px"},
19
- "nav-link": {
20
- "font-size": "15px",
21
- "text-align": "center",
22
- "margin":"0px",
23
- "--hover-color": "#eee",
24
- },
25
- "nav-link-selected": {"background-color": "#0083B8"},
26
- }
27
- )
28
-
29
- # Sidebar Navigation
30
- with st.sidebar:
31
- st.header("Navigation Menu")
32
-
33
- # Main Menu Items
34
- selected_side = option_menu(
35
- menu_title="Go to",
36
- options=["Dashboard", "Analytics", "Reports", "Settings"],
37
- icons=["speedometer2", "graph-up", "file-text", "gear"],
38
- menu_icon="list",
39
- default_index=0,
40
- )
41
-
42
- # Expandable Reports Section
43
- if selected_side == "Reports":
44
- with st.expander("Reports", expanded=True):
45
- st.button("Weekly Report")
46
- st.button("Monthly Report")
47
- st.button("Annual Report")
48
-
49
- # Main Content Area based on top navigation
50
- if selected == "Home":
51
- st.title("Welcome to Home")
52
- st.write("This is the home page content.")
53
-
54
- # Dashboard Content
55
- st.header("Dashboard")
56
- col1, col2, col3 = st.columns(3)
57
- with col1:
58
- st.metric("Sales", "$12,345", "+2.5%")
59
- with col2:
60
- st.metric("Users", "1,234", "-8%")
61
- with col3:
62
- st.metric("Conversion", "3.2%", "+1.2%")
63
-
64
- elif selected == "Documentation":
65
- st.title("Documentation")
66
- st.write("Documentation content goes here.")
67
-
68
- elif selected == "Examples":
69
- st.title("Examples")
70
- st.write("Example content goes here.")
71
-
72
- elif selected == "Community":
73
- st.title("Community")
74
- st.write("Community content goes here.")
75
-
76
- elif selected == "About":
77
- st.title("About")
78
- st.write("About content goes here.")
79
-
80
- # Content based on sidebar selection
81
- if selected_side == "Analytics":
82
- st.header("Analytics")
83
- st.line_chart({"data": [1, 5, 2, 6, 2, 1]})
84
- elif selected_side == "Settings":
85
- st.header("Settings")
86
- st.toggle("Dark Mode")
87
- st.toggle("Notifications")
88
- st.slider("Volume", 0, 100, 50)
89
-
90
- # Footer
91
- st.markdown(
92
- """
93
- <style>
94
- .footer {
95
- position: fixed;
96
- left: 0;
97
- bottom: 0;
98
- width: 100%;
99
- background-color: #0E1117;
100
- color: white;
101
- text-align: center;
102
- padding: 10px;
103
- font-size: 14px;
104
- }
105
- </style>
106
- <div class='footer'>
107
- © 2024 Your App Name • Privacy Policy • Terms of Service
108
- </div>
109
- """,
110
- unsafe_allow_html=True
111
  )
 
1
+ import streamlit as st
2
+ from streamlit_option_menu import option_menu
3
+
4
+
5
+ # Page Configuration
6
+ st.set_page_config(page_title="Enhanced Navigation Demo", layout="wide")
7
+
8
+ # Top Navigation Bar using option_menu
9
+ selected = option_menu(
10
+ menu_title=None,
11
+ options=["Home", "Documentation", "Examples", "Community", "About"],
12
+ icons=["house", "book", "code", "people", "info-circle"],
13
+ menu_icon="cast",
14
+ default_index=0,
15
+ orientation="horizontal",
16
+ styles={
17
+ "container": {"padding": "0!important", "background-color": "#fafafa"},
18
+ "icon": {"color": "orange", "font-size": "25px"},
19
+ "nav-link": {
20
+ "font-size": "15px",
21
+ "text-align": "center",
22
+ "margin":"0px",
23
+ "--hover-color": "#eee",
24
+ },
25
+ "nav-link-selected": {"background-color": "#0083B8"},
26
+ }
27
+ )
28
+
29
+ # Sidebar Navigation
30
+ with st.sidebar:
31
+ st.header("Navigation Menu")
32
+
33
+ # Main Menu Items
34
+ selected_side = option_menu(
35
+ menu_title="Go to",
36
+ options=["Dashboard", "Analytics", "Reports", "Settings"],
37
+ icons=["speedometer2", "graph-up", "file-text", "gear"],
38
+ menu_icon="list",
39
+ default_index=0,
40
+ )
41
+
42
+ # Expandable Reports Section
43
+ if selected_side == "Reports":
44
+ with st.expander("Reports", expanded=True):
45
+ st.button("Weekly Report")
46
+ st.button("Monthly Report")
47
+ st.button("Annual Report")
48
+
49
+ # Main Content Area based on top navigation
50
+ if selected == "Home":
51
+ st.title("Welcome to Home")
52
+ st.write("This is the home page content.")
53
+
54
+ # Dashboard Content
55
+ st.header("Dashboard")
56
+ col1, col2, col3 = st.columns(3)
57
+ with col1:
58
+ st.metric("Sales", "$12,345", "+2.5%")
59
+ with col2:
60
+ st.metric("Users", "1,234", "-8%")
61
+ with col3:
62
+ st.metric("Conversion", "3.2%", "+1.2%")
63
+
64
+ elif selected == "Documentation":
65
+ st.title("Documentation")
66
+ st.write("Documentation content goes here.")
67
+
68
+ elif selected == "Examples":
69
+ st.title("Examples")
70
+ st.write("Example content goes here.")
71
+
72
+ elif selected == "Community":
73
+ st.title("Community")
74
+ st.write("Community content goes here.")
75
+
76
+ elif selected == "About":
77
+ st.title("About")
78
+ st.write("About content goes here.")
79
+
80
+ # Content based on sidebar selection
81
+ if selected_side == "Analytics":
82
+ st.header("Analytics")
83
+ st.line_chart({"data": [1, 5, 2, 6, 2, 1]})
84
+ elif selected_side == "Settings":
85
+ st.header("Settings")
86
+ st.toggle("Dark Mode")
87
+ st.toggle("Notifications")
88
+ st.slider("Volume", 0, 100, 50)
89
+
90
+ # Footer
91
+ st.markdown(
92
+ """
93
+ <style>
94
+ .footer {
95
+ position: fixed;
96
+ left: 0;
97
+ bottom: 0;
98
+ width: 100%;
99
+ background-color: #0E1117;
100
+ color: white;
101
+ text-align: center;
102
+ padding: 10px;
103
+ font-size: 14px;
104
+ }
105
+ </style>
106
+ <div class='footer'>
107
+ © 2024 Your App Name • Privacy Policy • Terms of Service
108
+ </div>
109
+ """,
110
+ unsafe_allow_html=True
111
  )