Spaces:
Build error
Build error
Upload 38 files
Browse files- .gitignore +21 -21
- Columns.py +247 -0
- Research Paper Attributes.txt +98 -0
- analytics.py +97 -97
- app.py +0 -0
- assignment_evaluation.py +261 -0
- chatbot.py +66 -66
- create_course.py +271 -271
- create_course2.py +330 -330
- create_course3.py +609 -0
- db.py +696 -696
- entire_download.py +90 -0
- extract.py +140 -0
- file_upload_vectorize.py +178 -178
- gen_mcqs.py +205 -205
- goals2.py +658 -658
- infranew.py +231 -0
- keywords_database_download.py +104 -0
- live_polls.py +114 -114
- loldude.py +135 -0
- modify_schema.py +221 -221
- new_keywords.py +127 -0
- new_research_paper.py +103 -0
- poll_db_operations.py +69 -69
- poll_db_setup.py +34 -34
- pre_class_analytics2.py +758 -758
- pre_class_analytics4.py +591 -591
- requirements.txt +36 -31
- research22.py +517 -0
- research3.py +110 -0
- research_assistant_dashboard.py +349 -342
- research_combine.py +188 -0
- research_combine2.py +269 -0
- rubrics.py +112 -0
- sciclone.py +466 -0
- session_page.py +0 -0
- subjective_test_evaluation.py +247 -0
- ui.py +110 -110
.gitignore
CHANGED
@@ -1,22 +1,22 @@
|
|
1 |
-
# Ignore .env file
|
2 |
-
.env
|
3 |
-
__pycache__/
|
4 |
-
newenv
|
5 |
-
backupgoal.py
|
6 |
-
backupgoal2.py
|
7 |
-
backupresearch.py
|
8 |
-
goals.py
|
9 |
-
goals3.py
|
10 |
-
research_assistant_dashboard2.py
|
11 |
-
tempCodeRunnerFile.py
|
12 |
-
all_chat_histories.json
|
13 |
-
all_chat_histories2.json
|
14 |
-
analytics.ipynb
|
15 |
-
chat_history.csv
|
16 |
-
harshal.py
|
17 |
-
course_creation.py
|
18 |
-
topics.json
|
19 |
-
new_analytics.json
|
20 |
-
new_analytics2.json
|
21 |
-
pre_class_analytics.py
|
22 |
sample_files/
|
|
|
1 |
+
# Ignore .env file
|
2 |
+
.env
|
3 |
+
__pycache__/
|
4 |
+
newenv
|
5 |
+
backupgoal.py
|
6 |
+
backupgoal2.py
|
7 |
+
backupresearch.py
|
8 |
+
goals.py
|
9 |
+
goals3.py
|
10 |
+
research_assistant_dashboard2.py
|
11 |
+
tempCodeRunnerFile.py
|
12 |
+
all_chat_histories.json
|
13 |
+
all_chat_histories2.json
|
14 |
+
analytics.ipynb
|
15 |
+
chat_history.csv
|
16 |
+
harshal.py
|
17 |
+
course_creation.py
|
18 |
+
topics.json
|
19 |
+
new_analytics.json
|
20 |
+
new_analytics2.json
|
21 |
+
pre_class_analytics.py
|
22 |
sample_files/
|
Columns.py
ADDED
@@ -0,0 +1,247 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import streamlit as st
|
3 |
+
import pandas as pd
|
4 |
+
import PyPDF2
|
5 |
+
import io
|
6 |
+
import os
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
import requests
|
9 |
+
import time
|
10 |
+
from mistralai import Mistral
|
11 |
+
from typing import List, Dict
|
12 |
+
from fpdf import FPDF
|
13 |
+
|
14 |
+
load_dotenv()
|
15 |
+
MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
|
16 |
+
MISTRAL_API_URL = "https://api.mistral.ai/v1/completions"
|
17 |
+
|
18 |
+
# Initialize the Mistral client
|
19 |
+
client = Mistral(api_key=MISTRAL_API_KEY)
|
20 |
+
|
21 |
+
def call_mistral_api(prompt: str) -> str:
|
22 |
+
"""Call Mistral AI with a prompt, return the text response if successful."""
|
23 |
+
messages = [
|
24 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
25 |
+
{"role": "user", "content": prompt}
|
26 |
+
]
|
27 |
+
tools = [] # Add any tools if necessary
|
28 |
+
|
29 |
+
try:
|
30 |
+
# Make the API call
|
31 |
+
response = client.chat.complete(
|
32 |
+
model="mistral-large-latest",
|
33 |
+
messages=messages,
|
34 |
+
tools=tools,
|
35 |
+
tool_choice="any",
|
36 |
+
)
|
37 |
+
return response.choices[0].message.content
|
38 |
+
except Exception as e:
|
39 |
+
print(f"API Error: {str(e)}")
|
40 |
+
return ""
|
41 |
+
|
42 |
+
def process_dataframe(df: pd.DataFrame) -> pd.DataFrame:
|
43 |
+
"""Process the DataFrame and return a DataFrame with analysis results."""
|
44 |
+
print("Processing DataFrame...")
|
45 |
+
# Initialize results dictionary
|
46 |
+
results = []
|
47 |
+
|
48 |
+
# Process each column starting from the third column
|
49 |
+
for i, column in enumerate(df.columns[2:], start=2):
|
50 |
+
print(f"Processing column: {column}")
|
51 |
+
# Extract text from column and attach values from the first and second columns
|
52 |
+
text = " ".join(
|
53 |
+
f"Column1-{row[df.columns[0]]}, Column2-{row[df.columns[1]]}, {value}"
|
54 |
+
for _, row in df.iterrows()
|
55 |
+
for value in [row[column]]
|
56 |
+
if pd.notna(value)
|
57 |
+
)
|
58 |
+
|
59 |
+
# Generate prompt
|
60 |
+
prompt = f"You are a Professional Researcher and Analyser with 10 yrs of Experience. Find details and Elaborate on Top Trends,Patterns ,Highlight Theories and Method in this topic.Support your answer with rightful evidence of corresponding DOI/SrNo and Frequency(how many times same topic repeated and in which papers):Make sure to limit the answer within 400 words ({column}):\n\n{text}"
|
61 |
+
|
62 |
+
# Call Mistral API
|
63 |
+
result1 = call_mistral_api(prompt)
|
64 |
+
prompt1=f"""This result was the reponse of an earlier prompt Result -{result1}, Fact check the result with my original data -({column}):\n\n{text}. Return the refined Result(after careful fact checking and finding adequate evidence within the original data) , Make sure the meaning/structure of the Result doesnt change,only false/low evidence statements get eliminated.Limit the response to 400 words.MAKE SURE THERE IS NO CONTEXT CHANGE AND MEANING REMAINS SAME JUST WITH GOOD EVIDENCE AND REFINED RESULT. """
|
65 |
+
result=call_mistral_api(prompt1)
|
66 |
+
results.append({"Column": column, "Result": result})
|
67 |
+
|
68 |
+
# Create DataFrame from results
|
69 |
+
results_df = pd.DataFrame(results)
|
70 |
+
print("DataFrame processing complete.")
|
71 |
+
return results_df
|
72 |
+
|
73 |
+
def split_dataframe(df: pd.DataFrame, max_rows: int = 52) -> List[pd.DataFrame]:
|
74 |
+
"""
|
75 |
+
Split a DataFrame into multiple smaller DataFrames, each having a maximum of `max_rows` rows.
|
76 |
+
|
77 |
+
Args:
|
78 |
+
df (pd.DataFrame): The original DataFrame to be split.
|
79 |
+
max_rows (int): The maximum number of rows for each smaller DataFrame (excluding the header row).
|
80 |
+
|
81 |
+
Returns:
|
82 |
+
List[pd.DataFrame]: A list of smaller DataFrames.
|
83 |
+
"""
|
84 |
+
print("Splitting DataFrame...")
|
85 |
+
# Calculate the number of splits needed
|
86 |
+
num_splits = (len(df) + max_rows - 1) // max_rows
|
87 |
+
|
88 |
+
# Split the DataFrame
|
89 |
+
split_dfs = [df.iloc[i * max_rows:(i + 1) * max_rows].reset_index(drop=True) for i in range(num_splits)]
|
90 |
+
print(f"DataFrame split into {len(split_dfs)} parts.")
|
91 |
+
return split_dfs
|
92 |
+
|
93 |
+
def generate_professional_review(df1: pd.DataFrame) -> str:
|
94 |
+
"""
|
95 |
+
Generate a professional literature review, trends analysis, TSM/ADO analysis, gaps, theories, and frameworks
|
96 |
+
based on DOI and Serial Number as key value pairs.
|
97 |
+
|
98 |
+
Args:
|
99 |
+
df1 (pd.DataFrame): The first DataFrame.
|
100 |
+
df2 (pd.DataFrame): The second DataFrame.
|
101 |
+
|
102 |
+
Returns:
|
103 |
+
str: The generated analysis text.
|
104 |
+
"""
|
105 |
+
print("Generating professional review...")
|
106 |
+
# Concatenate DataFrames
|
107 |
+
|
108 |
+
|
109 |
+
# Convert the concatenated DataFrame to a string format suitable for the prompt
|
110 |
+
context = df1.to_string(index=True)
|
111 |
+
|
112 |
+
# Generate a single prompt for the analysis
|
113 |
+
prompt = f"""Generate a professional literature review, trends analysis, TCM ADO (Theories,Context,Method ,Ancedents,Decisions,Outcomes), gaps, theories, and frameworks
|
114 |
+
based on the following data , If you find evidence as proper DOI make sure you analyze the whole
|
115 |
+
table with more DOI,Serial No and find more evidence.Always give supporting evidence for your literature review,TCM ADO analysis,trends ,frameworks,
|
116 |
+
check DOIs and find more evidence as inference again.Make sure the review is as professional as possible.Limit the answer to 500 words and only highlight the most imp trends with supporting evidence of DOI/SrNo and frequency(how many papers used that and top 2 DOI of that),Limit it to 500 words.Make sure all important details/frequently repeating trends/methods are highlighted.:\n\n{context}."""
|
117 |
+
|
118 |
+
|
119 |
+
# Call Mistral API
|
120 |
+
result = call_mistral_api(prompt)
|
121 |
+
print("Professional review generated.")
|
122 |
+
return result
|
123 |
+
|
124 |
+
|
125 |
+
def main():
|
126 |
+
st.title("Research Corpus Synthesis Tool")
|
127 |
+
|
128 |
+
# Logout button
|
129 |
+
if st.button("Logout", use_container_width=True):
|
130 |
+
for key in st.session_state.keys():
|
131 |
+
del st.session_state[key]
|
132 |
+
st.rerun()
|
133 |
+
|
134 |
+
# File uploader
|
135 |
+
uploaded_file = st.file_uploader("Upload CSV file", type="csv")
|
136 |
+
|
137 |
+
if uploaded_file:
|
138 |
+
if st.button("Process CSV"):
|
139 |
+
print("CSV file uploaded.")
|
140 |
+
# Initialize progress bar
|
141 |
+
progress_bar = st.progress(0)
|
142 |
+
status_text = st.empty()
|
143 |
+
|
144 |
+
# Read CSV file into DataFrame
|
145 |
+
df = pd.read_csv(uploaded_file)
|
146 |
+
print("CSV file read into DataFrame.")
|
147 |
+
|
148 |
+
# Split DataFrame into smaller DataFrames
|
149 |
+
split_dfs = split_dataframe(df, max_rows=52)
|
150 |
+
|
151 |
+
# Initialize variable to concatenate all generated reviews
|
152 |
+
concatenated_reviews = ""
|
153 |
+
|
154 |
+
# Process each smaller DataFrame
|
155 |
+
for i, split_df in enumerate(split_dfs):
|
156 |
+
status_text.text(f"Processing part {i + 1} of {len(split_dfs)}")
|
157 |
+
print(f"Processing part {i + 1} of {len(split_dfs)}")
|
158 |
+
|
159 |
+
# Process the smaller DataFrame
|
160 |
+
processed_df = process_dataframe(split_df)
|
161 |
+
|
162 |
+
# Generate professional review
|
163 |
+
review = generate_professional_review(processed_df)
|
164 |
+
|
165 |
+
# Concatenate the generated review
|
166 |
+
concatenated_reviews += review + "\n\n"
|
167 |
+
|
168 |
+
# Update progress
|
169 |
+
progress = (i + 1) / len(split_dfs)
|
170 |
+
progress_bar.progress(progress)
|
171 |
+
st.write(i)
|
172 |
+
st.write(review)
|
173 |
+
|
174 |
+
|
175 |
+
|
176 |
+
|
177 |
+
|
178 |
+
# Generate final analysis based on the concatenated reviews
|
179 |
+
final_prompt = f"""
|
180 |
+
Given is a consolidated research review of a huge number of research papers (evidence is DOI, Serial No). Perform this:
|
181 |
+
Given as a context is a table of analyzing trends/frameworks analysis of a huge corpus of papers specific to the columns.
|
182 |
+
Analyze the table properly and create a professional and accurate literature review (Ensure to cite DOI as evidence).
|
183 |
+
|
184 |
+
Subheadings for Literature Review :
|
185 |
+
1. Introduction
|
186 |
+
○ Overview of the main topic or concept.
|
187 |
+
○ Key research questions or objectives.
|
188 |
+
2. Theoretical Foundations
|
189 |
+
○ Exploration of dominant theories related to the topic.
|
190 |
+
○ Domain-specific theoretical applications.
|
191 |
+
3. Contextual Analysis
|
192 |
+
○ Geographic contexts and challenges.
|
193 |
+
○ Sectoral applications and digital infrastructure readiness.
|
194 |
+
4. Methodological Approaches
|
195 |
+
○ Qualitative, quantitative, and mixed-methods approaches used in research.
|
196 |
+
5. Discussion and Future Research
|
197 |
+
○ Current challenges and limitations.
|
198 |
+
○ Potential areas for future study.
|
199 |
+
6. Conclusion
|
200 |
+
○ Summary of findings.
|
201 |
+
○ Implications and future directions.
|
202 |
+
|
203 |
+
TCM-ADO Framework in Research Analysis and Literature Review:
|
204 |
+
Theory
|
205 |
+
Theoretical foundations driving the research.
|
206 |
+
● Focus on identifying and analyzing the conceptual models or frameworks that underpin the study.
|
207 |
+
● Establish the intellectual basis and rationale for the research direction.
|
208 |
+
Context
|
209 |
+
Situational and environmental factors shaping the research.
|
210 |
+
● Emphasis on geographic, sectoral, cultural, and infrastructural dimensions influencing the implementation or findings.
|
211 |
+
● Examples include urban versus rural settings, digital infrastructure readiness, or policy landscapes.
|
212 |
+
● Objective: To understand how external conditions impact the dynamics and applicability of the research.
|
213 |
+
Method
|
214 |
+
Research methodologies and analytical approaches utilized.
|
215 |
+
● Covers the selection of qualitative, quantitative, or mixed-method approaches, along with tools and techniques employed.
|
216 |
+
● Objective: To ensure methodological rigor and the validity of findings.
|
217 |
+
Antecedents
|
218 |
+
Pre-existing conditions enabling or constraining research or implementation.
|
219 |
+
● Includes factors such as technological infrastructure, stakeholder preparedness, and
|
220 |
+
regulatory frameworks.
|
221 |
+
● To identify critical prerequisites that influence the starting point of the research or
|
222 |
+
initiative.
|
223 |
+
Decisions
|
224 |
+
Strategic choices made throughout the implementation or research process.
|
225 |
+
● Involves critical decision points in areas like technology adoption, governance
|
226 |
+
frameworks, and operational strategies.
|
227 |
+
● analyze how informed decision-making shapes the trajectory and success of the project.
|
228 |
+
Outcomes
|
229 |
+
Results and impacts observed as a consequence of the initiative or study.
|
230 |
+
● Evaluates direct and indirect contributions to the research objectives or broader societal
|
231 |
+
goals.
|
232 |
+
● assess the effectiveness and long-term implications of the research or project outcomes.
|
233 |
+
"""
|
234 |
+
|
235 |
+
final_result = call_mistral_api(final_prompt)
|
236 |
+
print("Final analysis generated.")
|
237 |
+
|
238 |
+
# Display the final result
|
239 |
+
st.subheader("Final Analysis")
|
240 |
+
st.write(final_result)
|
241 |
+
|
242 |
+
status_text.text("Processing complete!")
|
243 |
+
progress_bar.progress(1.0)
|
244 |
+
print("Processing complete.")
|
245 |
+
|
246 |
+
if __name__ == "__main__":
|
247 |
+
main()
|
Research Paper Attributes.txt
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Review Based Paper
|
2 |
+
Title TEXT,
|
3 |
+
Publication TEXT,
|
4 |
+
Journal_Conference TEXT,
|
5 |
+
Abstract TEXT,
|
6 |
+
Keywords TEXT,
|
7 |
+
Author TEXT
|
8 |
+
Date_of_Publication TEXT,
|
9 |
+
Intro TEXT,
|
10 |
+
Literature_Review TEXT,
|
11 |
+
Body: TEXT
|
12 |
+
Protocol: TEXT
|
13 |
+
Search String: TEXT
|
14 |
+
Included Studies: TEXT
|
15 |
+
Data Collection and Analysis Methods: TEXT
|
16 |
+
Data Extraction Table: TEXT
|
17 |
+
Synthesis and Analysis: TEXT
|
18 |
+
Conclusion
|
19 |
+
Limitations
|
20 |
+
Results
|
21 |
+
References
|
22 |
+
|
23 |
+
Risk of Bias Assessment:Opinion/Perspective Based Paper
|
24 |
+
Title TEXT,
|
25 |
+
Publication TEXT,
|
26 |
+
Journal_Conference TEXT,
|
27 |
+
Abstract TEXT,
|
28 |
+
Keywords TEXT,
|
29 |
+
Author TEXT,
|
30 |
+
Date_of_Publication TEXT,
|
31 |
+
Intro TEXT,
|
32 |
+
Literature_Review TEXT
|
33 |
+
Introduction: TEXT
|
34 |
+
Body: TEXT
|
35 |
+
Results and Discussion:TEXT
|
36 |
+
Conclusion: TEXT
|
37 |
+
References: TEXT
|
38 |
+
|
39 |
+
|
40 |
+
|
41 |
+
|
42 |
+
|
43 |
+
|
44 |
+
|
45 |
+
|
46 |
+
|
47 |
+
|
48 |
+
|
49 |
+
|
50 |
+
|
51 |
+
|
52 |
+
|
53 |
+
|
54 |
+
Empirical Research Paper
|
55 |
+
Title TEXT,
|
56 |
+
Publication TEXT,
|
57 |
+
Journal_Conference TEXT,
|
58 |
+
Abstract TEXT,
|
59 |
+
Keywords TEXT,
|
60 |
+
Author TEXT,
|
61 |
+
Date_of_Publication TEXT,
|
62 |
+
Intro TEXT,
|
63 |
+
Literature_Review TEXT
|
64 |
+
Introduction: TEXT
|
65 |
+
Body: TEXT
|
66 |
+
Methodology: TEXT
|
67 |
+
Participants: TEXT - Describes the sample and the sampling methods used.
|
68 |
+
Survey Instrument: TEXT - Describes the design and development of the survey questionnaire.
|
69 |
+
Data Collection: TEXT - Explains how the survey data was collected.
|
70 |
+
Data Analysis: TEXT - Details the statistical techniques used to analyze the data.
|
71 |
+
|
72 |
+
|
73 |
+
Results and Discussion:TEXT
|
74 |
+
Conclusion: TEXT
|
75 |
+
References: TEXT
|
76 |
+
Research Paper (Other)
|
77 |
+
Title TEXT,
|
78 |
+
Publication TEXT,
|
79 |
+
Journal_Conference TEXT,
|
80 |
+
Abstract TEXT,
|
81 |
+
Keywords TEXT,
|
82 |
+
Author TEXT,
|
83 |
+
Date_of_Publication TEXT,
|
84 |
+
Intro TEXT,
|
85 |
+
Literature_Review TEXT,
|
86 |
+
Research_Models_Used TEXT,
|
87 |
+
Methodology TEXT,
|
88 |
+
Discussion TEXT,
|
89 |
+
Future_Scope TEXT,
|
90 |
+
Theory TEXT,
|
91 |
+
Independent_Variables TEXT,
|
92 |
+
nof_Independent_Variables INTEGER,
|
93 |
+
Dependent_Variables TEXT,
|
94 |
+
nof_Dependent_Variables INTEGER,
|
95 |
+
Control_Variables TEXT,
|
96 |
+
Extraneous_Variables TEXT,
|
97 |
+
nof_Control_Variables INTEGER,
|
98 |
+
nof_Extraneous_Variables INTEGER
|
analytics.py
CHANGED
@@ -1,97 +1,97 @@
|
|
1 |
-
import os
|
2 |
-
import pandas as pd
|
3 |
-
import numpy as np
|
4 |
-
from numpy.linalg import norm
|
5 |
-
from pymongo import MongoClient
|
6 |
-
import openai
|
7 |
-
from openai import OpenAI
|
8 |
-
import streamlit as st
|
9 |
-
from datetime import datetime
|
10 |
-
|
11 |
-
# MongoDB connection
|
12 |
-
MONGO_URI = os.getenv('MONGO_URI')
|
13 |
-
|
14 |
-
client = MongoClient(MONGO_URI)
|
15 |
-
db = client['digital_nova']
|
16 |
-
themes_collection = db['themes']
|
17 |
-
corpus_collection = db['corpus']
|
18 |
-
vectors_collection = db['vectors'] # Reference to 'vectors' collection
|
19 |
-
users_collection = db['users']
|
20 |
-
|
21 |
-
# Function to create embeddings
|
22 |
-
def create_embeddings(text, openai_api_key):
|
23 |
-
client = OpenAI(api_key=openai_api_key)
|
24 |
-
response = client.embeddings.create(
|
25 |
-
input=text,
|
26 |
-
model="text-embedding-3-small"
|
27 |
-
)
|
28 |
-
return response.data[0].embedding
|
29 |
-
|
30 |
-
# Function to calculate cosine similarity
|
31 |
-
def cosine_similarity(v1, v2):
|
32 |
-
v1 = np.array(v1)
|
33 |
-
v2 = np.array(v2)
|
34 |
-
dot_product = np.dot(v1, v2)
|
35 |
-
norm_product = norm(v1) * norm(v2)
|
36 |
-
return dot_product / norm_product if norm_product != 0 else 0
|
37 |
-
|
38 |
-
def derive_analytics(goal, reference_text, openai_api_key, context=None, synoptic=None):
|
39 |
-
"""
|
40 |
-
Analyze subjective answers with respect to pre-class materials and synoptic, and provide detailed feedback
|
41 |
-
|
42 |
-
Args:
|
43 |
-
goal (str): Analysis objective
|
44 |
-
reference_text (str): Student's answer text
|
45 |
-
openai_api_key (str): OpenAI API key
|
46 |
-
context (str, optional): Pre-class material content for comparison
|
47 |
-
synoptic (str, optional): Synoptic content for evaluation
|
48 |
-
"""
|
49 |
-
template = f"""Given a student's answer to a subjective question, analyze it following these specific guidelines. Compare it with the provided pre-class materials and synoptic (if available) to assess correctness and completeness.
|
50 |
-
|
51 |
-
1. Analyze the text as an experienced educational assessor, considering:
|
52 |
-
- Conceptual understanding
|
53 |
-
- Factual accuracy
|
54 |
-
- Completeness of response
|
55 |
-
- Use of relevant terminology
|
56 |
-
- Application of concepts
|
57 |
-
|
58 |
-
2. Structure the output in markdown with two sections:
|
59 |
-
|
60 |
-
**Correctness Assessment**
|
61 |
-
- Rate overall correctness on a scale of 1-10
|
62 |
-
|
63 |
-
**Evidence-Based Feedback**
|
64 |
-
- Provide specific evidence from the student's answer to justify the score reduction
|
65 |
-
- Highlight the exact lines or phrases that need improvement
|
66 |
-
|
67 |
-
Pre-class Materials Context:
|
68 |
-
{context if context else "No reference materials provided"}
|
69 |
-
|
70 |
-
Synoptic:
|
71 |
-
{synoptic if synoptic else "No synoptic provided"}
|
72 |
-
|
73 |
-
Student's Answer:
|
74 |
-
{reference_text}
|
75 |
-
|
76 |
-
Rules:
|
77 |
-
- Base assessment strictly on provided content
|
78 |
-
- Be specific in feedback and suggestions
|
79 |
-
"""
|
80 |
-
|
81 |
-
# Initialize OpenAI client
|
82 |
-
client = OpenAI(api_key=openai_api_key)
|
83 |
-
|
84 |
-
try:
|
85 |
-
response = client.chat.completions.create(
|
86 |
-
model="gpt-4-0125-preview",
|
87 |
-
messages=[
|
88 |
-
{"role": "system", "content": "You are an educational assessment expert."},
|
89 |
-
{"role": "user", "content": template}
|
90 |
-
],
|
91 |
-
temperature=0.7
|
92 |
-
)
|
93 |
-
analysis = response.choices[0].message.content
|
94 |
-
return analysis
|
95 |
-
except Exception as e:
|
96 |
-
print(f"Error in generating analysis with OpenAI: {str(e)}")
|
97 |
-
return "Error generating analysis"
|
|
|
1 |
+
import os
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
from numpy.linalg import norm
|
5 |
+
from pymongo import MongoClient
|
6 |
+
import openai
|
7 |
+
from openai import OpenAI
|
8 |
+
import streamlit as st
|
9 |
+
from datetime import datetime
|
10 |
+
|
11 |
+
# MongoDB connection
|
12 |
+
MONGO_URI = os.getenv('MONGO_URI')
|
13 |
+
|
14 |
+
client = MongoClient(MONGO_URI)
|
15 |
+
db = client['digital_nova']
|
16 |
+
themes_collection = db['themes']
|
17 |
+
corpus_collection = db['corpus']
|
18 |
+
vectors_collection = db['vectors'] # Reference to 'vectors' collection
|
19 |
+
users_collection = db['users']
|
20 |
+
|
21 |
+
# Function to create embeddings
|
22 |
+
def create_embeddings(text, openai_api_key):
|
23 |
+
client = OpenAI(api_key=openai_api_key)
|
24 |
+
response = client.embeddings.create(
|
25 |
+
input=text,
|
26 |
+
model="text-embedding-3-small"
|
27 |
+
)
|
28 |
+
return response.data[0].embedding
|
29 |
+
|
30 |
+
# Function to calculate cosine similarity
|
31 |
+
def cosine_similarity(v1, v2):
|
32 |
+
v1 = np.array(v1)
|
33 |
+
v2 = np.array(v2)
|
34 |
+
dot_product = np.dot(v1, v2)
|
35 |
+
norm_product = norm(v1) * norm(v2)
|
36 |
+
return dot_product / norm_product if norm_product != 0 else 0
|
37 |
+
|
38 |
+
def derive_analytics(goal, reference_text, openai_api_key, context=None, synoptic=None):
|
39 |
+
"""
|
40 |
+
Analyze subjective answers with respect to pre-class materials and synoptic, and provide detailed feedback
|
41 |
+
|
42 |
+
Args:
|
43 |
+
goal (str): Analysis objective
|
44 |
+
reference_text (str): Student's answer text
|
45 |
+
openai_api_key (str): OpenAI API key
|
46 |
+
context (str, optional): Pre-class material content for comparison
|
47 |
+
synoptic (str, optional): Synoptic content for evaluation
|
48 |
+
"""
|
49 |
+
template = f"""Given a student's answer to a subjective question, analyze it following these specific guidelines. Compare it with the provided pre-class materials and synoptic (if available) to assess correctness and completeness.
|
50 |
+
|
51 |
+
1. Analyze the text as an experienced educational assessor, considering:
|
52 |
+
- Conceptual understanding
|
53 |
+
- Factual accuracy
|
54 |
+
- Completeness of response
|
55 |
+
- Use of relevant terminology
|
56 |
+
- Application of concepts
|
57 |
+
|
58 |
+
2. Structure the output in markdown with two sections:
|
59 |
+
|
60 |
+
**Correctness Assessment**
|
61 |
+
- Rate overall correctness on a scale of 1-10
|
62 |
+
|
63 |
+
**Evidence-Based Feedback**
|
64 |
+
- Provide specific evidence from the student's answer to justify the score reduction
|
65 |
+
- Highlight the exact lines or phrases that need improvement
|
66 |
+
|
67 |
+
Pre-class Materials Context:
|
68 |
+
{context if context else "No reference materials provided"}
|
69 |
+
|
70 |
+
Synoptic:
|
71 |
+
{synoptic if synoptic else "No synoptic provided"}
|
72 |
+
|
73 |
+
Student's Answer:
|
74 |
+
{reference_text}
|
75 |
+
|
76 |
+
Rules:
|
77 |
+
- Base assessment strictly on provided content
|
78 |
+
- Be specific in feedback and suggestions
|
79 |
+
"""
|
80 |
+
|
81 |
+
# Initialize OpenAI client
|
82 |
+
client = OpenAI(api_key=openai_api_key)
|
83 |
+
|
84 |
+
try:
|
85 |
+
response = client.chat.completions.create(
|
86 |
+
model="gpt-4-0125-preview",
|
87 |
+
messages=[
|
88 |
+
{"role": "system", "content": "You are an educational assessment expert."},
|
89 |
+
{"role": "user", "content": template}
|
90 |
+
],
|
91 |
+
temperature=0.7
|
92 |
+
)
|
93 |
+
analysis = response.choices[0].message.content
|
94 |
+
return analysis
|
95 |
+
except Exception as e:
|
96 |
+
print(f"Error in generating analysis with OpenAI: {str(e)}")
|
97 |
+
return "Error generating analysis"
|
app.py
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
assignment_evaluation.py
ADDED
@@ -0,0 +1,261 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# assignment_evaluation.py
|
2 |
+
|
3 |
+
import streamlit as st
|
4 |
+
from datetime import datetime
|
5 |
+
from pymongo import MongoClient
|
6 |
+
import os
|
7 |
+
from openai import OpenAI
|
8 |
+
from dotenv import load_dotenv
|
9 |
+
from bson import ObjectId
|
10 |
+
|
11 |
+
load_dotenv()
|
12 |
+
|
13 |
+
# MongoDB setup
|
14 |
+
MONGO_URI = os.getenv('MONGO_URI')
|
15 |
+
client = MongoClient(MONGO_URI)
|
16 |
+
db = client["novascholar_db"]
|
17 |
+
assignments_collection = db["assignments"]
|
18 |
+
assignment_evaluation_collection = db["assignment_evaluation"]
|
19 |
+
resources_collection = db["resources"]
|
20 |
+
students_collection = db["students"]
|
21 |
+
|
22 |
+
def evaluate_assignment(session_id, student_id, assignment_id):
|
23 |
+
"""
|
24 |
+
Generate evaluation and analysis for submitted assignments
|
25 |
+
"""
|
26 |
+
try:
|
27 |
+
# Fetch assignment and student submission
|
28 |
+
assignment = assignments_collection.find_one({"_id": assignment_id})
|
29 |
+
if not assignment:
|
30 |
+
return None
|
31 |
+
|
32 |
+
# Find student's submission
|
33 |
+
submission = next(
|
34 |
+
(sub for sub in assignment.get('submissions', [])
|
35 |
+
if sub['student_id'] == str(student_id)),
|
36 |
+
None
|
37 |
+
)
|
38 |
+
if not submission:
|
39 |
+
return None
|
40 |
+
|
41 |
+
# Default rubric for assignment evaluation
|
42 |
+
default_rubric = """
|
43 |
+
1. Understanding & Implementation (1-4):
|
44 |
+
- Demonstrates understanding of assignment requirements
|
45 |
+
- Implements required components correctly
|
46 |
+
- Shows attention to detail
|
47 |
+
|
48 |
+
2. Quality & Completeness (1-4):
|
49 |
+
- Work is complete and thorough
|
50 |
+
- Meets all assignment objectives
|
51 |
+
- Shows evidence of effort and care
|
52 |
+
|
53 |
+
3. Presentation & Organization (1-4):
|
54 |
+
- Clear and professional presentation
|
55 |
+
- Well-structured and organized
|
56 |
+
- Follows required format and guidelines
|
57 |
+
"""
|
58 |
+
|
59 |
+
# Initialize OpenAI client
|
60 |
+
client = OpenAI(api_key=os.getenv('OPENAI_KEY'))
|
61 |
+
|
62 |
+
# Create evaluation prompt
|
63 |
+
prompt_template = f"""As an assignment evaluator, assess this student's submission based on the provided rubric criteria. Follow these guidelines:
|
64 |
+
|
65 |
+
1. Evaluation Process:
|
66 |
+
- Use each rubric criterion (scored 1-4)
|
67 |
+
- Evaluate completeness and quality
|
68 |
+
- Check alignment with assignment requirements
|
69 |
+
- Calculate final score: sum of criteria scores converted to 10-point scale
|
70 |
+
|
71 |
+
Assignment Title: {assignment['title']}
|
72 |
+
Due Date: {assignment['due_date']}
|
73 |
+
|
74 |
+
Submission Content:
|
75 |
+
{submission.get('text_content', 'No text content available')}
|
76 |
+
|
77 |
+
Rubric Criteria:
|
78 |
+
{default_rubric}
|
79 |
+
|
80 |
+
Provide your assessment in the following format:
|
81 |
+
|
82 |
+
**Overall Score and Summary**
|
83 |
+
- Score: [X]/10
|
84 |
+
- Overall Assessment: [2-3 sentence summary]
|
85 |
+
|
86 |
+
**Strengths**
|
87 |
+
- [Key strength 1]
|
88 |
+
- [Key strength 2]
|
89 |
+
- [Key strength 3]
|
90 |
+
|
91 |
+
**Areas for Improvement**
|
92 |
+
- [Improvement point 1]
|
93 |
+
- [Improvement point 2]
|
94 |
+
- [Improvement point 3]
|
95 |
+
|
96 |
+
**Specific Recommendations**
|
97 |
+
[2-3 sentences with actionable feedback]
|
98 |
+
"""
|
99 |
+
|
100 |
+
# Generate evaluation using OpenAI
|
101 |
+
response = client.chat.completions.create(
|
102 |
+
model="gpt-4o-mini",
|
103 |
+
messages=[{"role": "user", "content": prompt_template}],
|
104 |
+
max_tokens=1000,
|
105 |
+
temperature=0.4
|
106 |
+
)
|
107 |
+
|
108 |
+
# Store evaluation in MongoDB
|
109 |
+
evaluation_doc = {
|
110 |
+
"assignment_id": assignment_id,
|
111 |
+
"student_id": student_id,
|
112 |
+
"session_id": session_id,
|
113 |
+
"evaluation": response.choices[0].message.content,
|
114 |
+
"evaluated_at": datetime.utcnow()
|
115 |
+
}
|
116 |
+
|
117 |
+
assignment_evaluation_collection.insert_one(evaluation_doc)
|
118 |
+
return evaluation_doc
|
119 |
+
|
120 |
+
except Exception as e:
|
121 |
+
print(f"Error in evaluate_assignment: {str(e)}")
|
122 |
+
return None
|
123 |
+
|
124 |
+
def display_evaluation_to_faculty(session_id, student_id, course_id):
|
125 |
+
"""
|
126 |
+
Display interface for faculty to generate and view assignment evaluations
|
127 |
+
"""
|
128 |
+
st.header("Evaluate Assignments")
|
129 |
+
|
130 |
+
try:
|
131 |
+
# Fetch available assignments
|
132 |
+
assignments = list(assignments_collection.find({
|
133 |
+
"session_id": str(session_id),
|
134 |
+
"course_id": course_id
|
135 |
+
}))
|
136 |
+
|
137 |
+
if not assignments:
|
138 |
+
st.info("No assignments found for this session.")
|
139 |
+
return
|
140 |
+
|
141 |
+
# Select assignment
|
142 |
+
assignment_options = {
|
143 |
+
f"{assignment['title']} (Due: {assignment['due_date'].strftime('%Y-%m-%d')})" if 'due_date' in assignment else assignment['title']: assignment['_id']
|
144 |
+
for assignment in assignments
|
145 |
+
}
|
146 |
+
|
147 |
+
if assignment_options:
|
148 |
+
selected_assignment = st.selectbox(
|
149 |
+
"Select Assignment to Evaluate",
|
150 |
+
options=list(assignment_options.keys())
|
151 |
+
)
|
152 |
+
|
153 |
+
if selected_assignment:
|
154 |
+
assignment_id = assignment_options[selected_assignment]
|
155 |
+
assignment = assignments_collection.find_one({"_id": assignment_id})
|
156 |
+
|
157 |
+
if assignment:
|
158 |
+
submissions = assignment.get('submissions', [])
|
159 |
+
if not submissions:
|
160 |
+
st.warning("No submissions found for this assignment.")
|
161 |
+
return
|
162 |
+
|
163 |
+
# Create a dropdown for student submissions
|
164 |
+
student_options = {
|
165 |
+
f"{students_collection.find_one({'_id': ObjectId(sub['student_id'])})['full_name']} (Submitted: {sub['submitted_at'].strftime('%Y-%m-%d %H:%M')})": sub['student_id']
|
166 |
+
for sub in submissions
|
167 |
+
}
|
168 |
+
|
169 |
+
selected_student = st.selectbox(
|
170 |
+
"Select Student Submission",
|
171 |
+
options=list(student_options.keys())
|
172 |
+
)
|
173 |
+
|
174 |
+
if selected_student:
|
175 |
+
student_id = student_options[selected_student]
|
176 |
+
submission = next(sub for sub in submissions if sub['student_id'] == student_id)
|
177 |
+
|
178 |
+
# Display submission details
|
179 |
+
st.subheader("Submission Details")
|
180 |
+
st.markdown(f"**Submitted:** {submission['submitted_at'].strftime('%Y-%m-%d %H:%M')}")
|
181 |
+
st.markdown(f"**File Name:** {submission['file_name']}")
|
182 |
+
|
183 |
+
# Add download button for submitted file
|
184 |
+
if 'file_content' in submission:
|
185 |
+
st.download_button(
|
186 |
+
label="Download Submission",
|
187 |
+
data=submission['file_content'],
|
188 |
+
file_name=submission['file_name'],
|
189 |
+
mime=submission['file_type']
|
190 |
+
)
|
191 |
+
|
192 |
+
# Check for existing evaluation
|
193 |
+
existing_eval = assignment_evaluation_collection.find_one({
|
194 |
+
"assignment_id": assignment_id,
|
195 |
+
"student_id": student_id,
|
196 |
+
"session_id": str(session_id)
|
197 |
+
})
|
198 |
+
|
199 |
+
if existing_eval:
|
200 |
+
st.subheader("Evaluation Results")
|
201 |
+
st.markdown(existing_eval['evaluation'])
|
202 |
+
st.success("✓ Evaluation completed")
|
203 |
+
|
204 |
+
if st.button("Regenerate Evaluation"):
|
205 |
+
with st.spinner("Regenerating evaluation..."):
|
206 |
+
evaluation = evaluate_assignment(
|
207 |
+
str(session_id),
|
208 |
+
student_id,
|
209 |
+
assignment_id
|
210 |
+
)
|
211 |
+
if evaluation:
|
212 |
+
st.success("Evaluation regenerated successfully!")
|
213 |
+
st.rerun()
|
214 |
+
else:
|
215 |
+
st.error("Error regenerating evaluation.")
|
216 |
+
else:
|
217 |
+
if st.button("Generate Evaluation"):
|
218 |
+
with st.spinner("Generating evaluation..."):
|
219 |
+
evaluation = evaluate_assignment(
|
220 |
+
str(session_id),
|
221 |
+
student_id,
|
222 |
+
assignment_id
|
223 |
+
)
|
224 |
+
if evaluation:
|
225 |
+
st.success("Evaluation generated successfully!")
|
226 |
+
st.markdown("### Generated Evaluation")
|
227 |
+
st.markdown(evaluation['evaluation'])
|
228 |
+
st.rerun()
|
229 |
+
else:
|
230 |
+
st.error("Error generating evaluation.")
|
231 |
+
|
232 |
+
except Exception as e:
|
233 |
+
st.error(f"An error occurred while loading the evaluations: {str(e)}")
|
234 |
+
print(f"Error in display_evaluation_to_faculty: {str(e)}")
|
235 |
+
|
236 |
+
def display_assignment_results(assignment_id, student_id):
|
237 |
+
"""
|
238 |
+
Display assignment results and analysis for a student
|
239 |
+
"""
|
240 |
+
try:
|
241 |
+
# Fetch analysis from evaluation collection
|
242 |
+
analysis = assignment_evaluation_collection.find_one({
|
243 |
+
"assignment_id": assignment_id,
|
244 |
+
"student_id": str(student_id)
|
245 |
+
})
|
246 |
+
|
247 |
+
if not analysis:
|
248 |
+
st.info("Evaluation will be available soon. Please check back later.")
|
249 |
+
return
|
250 |
+
|
251 |
+
st.header("Assignment Evaluation")
|
252 |
+
|
253 |
+
# Display evaluation content
|
254 |
+
st.markdown(analysis["evaluation"])
|
255 |
+
|
256 |
+
# Display evaluation timestamp
|
257 |
+
st.caption(f"Evaluation generated on: {analysis['evaluated_at'].strftime('%Y-%m-%d %H:%M:%S UTC')}")
|
258 |
+
|
259 |
+
except Exception as e:
|
260 |
+
st.error("An error occurred while loading the evaluation. Please try again later.")
|
261 |
+
print(f"Error in display_assignment_results: {str(e)}")
|
chatbot.py
CHANGED
@@ -1,67 +1,67 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
import datetime
|
3 |
-
from db import courses_collection2, faculty_collection, students_collection, vectors_collection, chat_history_collection
|
4 |
-
from PIL import Image
|
5 |
-
from dotenv import load_dotenv
|
6 |
-
import os
|
7 |
-
from datetime import datetime
|
8 |
-
from bson import ObjectId
|
9 |
-
from file_upload_vectorize import model
|
10 |
-
from gen_mcqs import generate_mcqs, quizzes_collection
|
11 |
-
|
12 |
-
load_dotenv()
|
13 |
-
MONGO_URI = os.getenv('MONGO_URI')
|
14 |
-
OPENAI_KEY = os.getenv('OPENAI_KEY')
|
15 |
-
GEMINI_KEY = os.getenv('GEMINI_KEY')
|
16 |
-
|
17 |
-
def insert_chat_message(user_id, session_id, role, content):
|
18 |
-
message = {
|
19 |
-
"role": role,
|
20 |
-
"content": content,
|
21 |
-
"timestamp": datetime.utcnow()
|
22 |
-
}
|
23 |
-
|
24 |
-
chat_history_collection.update_one(
|
25 |
-
{"user_id": ObjectId(user_id), "session_id": session_id},
|
26 |
-
{"$push": {"messages": message}, "$set": {"timestamp": datetime.utcnow()}},
|
27 |
-
upsert=True
|
28 |
-
)
|
29 |
-
|
30 |
-
def give_chat_response(user_id, session_id, question, title, description, context):
|
31 |
-
context_prompt = f"""
|
32 |
-
Based on the following session title, description, and context, answer the user's question in 3-4 lines:
|
33 |
-
|
34 |
-
Title: {title}
|
35 |
-
Description: {description}
|
36 |
-
Context: {context}
|
37 |
-
|
38 |
-
Question: {question}
|
39 |
-
|
40 |
-
Please provide a clear and concise answer based on the information provided.
|
41 |
-
"""
|
42 |
-
|
43 |
-
response = model.generate_content(context_prompt)
|
44 |
-
if not response or not response.text:
|
45 |
-
return "No response received from the model"
|
46 |
-
|
47 |
-
assistant_response = response.text.strip()
|
48 |
-
|
49 |
-
# Save the chat message
|
50 |
-
insert_chat_message(user_id, session_id, "assistant", assistant_response)
|
51 |
-
|
52 |
-
return assistant_response
|
53 |
-
|
54 |
-
def create_quiz_by_context(user_id, session_id, context, length, session_title, session_description):
|
55 |
-
"""Create a quiz based on the context provided"""
|
56 |
-
quiz = generate_mcqs(context, length, session_title, session_description)
|
57 |
-
if not quiz:
|
58 |
-
return "No quiz generated";
|
59 |
-
|
60 |
-
# Save the quiz
|
61 |
-
quizzes_collection.insert_one({
|
62 |
-
"user_id": ObjectId(user_id),
|
63 |
-
"session_id": ObjectId(session_id),
|
64 |
-
"questions": quiz,
|
65 |
-
"timestamp": datetime.utcnow()
|
66 |
-
})
|
67 |
return "Quiz created successfully"
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import datetime
|
3 |
+
from db import courses_collection2, faculty_collection, students_collection, vectors_collection, chat_history_collection
|
4 |
+
from PIL import Image
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
import os
|
7 |
+
from datetime import datetime
|
8 |
+
from bson import ObjectId
|
9 |
+
from file_upload_vectorize import model
|
10 |
+
from gen_mcqs import generate_mcqs, quizzes_collection
|
11 |
+
|
12 |
+
load_dotenv()
|
13 |
+
MONGO_URI = os.getenv('MONGO_URI')
|
14 |
+
OPENAI_KEY = os.getenv('OPENAI_KEY')
|
15 |
+
GEMINI_KEY = os.getenv('GEMINI_KEY')
|
16 |
+
|
17 |
+
def insert_chat_message(user_id, session_id, role, content):
|
18 |
+
message = {
|
19 |
+
"role": role,
|
20 |
+
"content": content,
|
21 |
+
"timestamp": datetime.utcnow()
|
22 |
+
}
|
23 |
+
|
24 |
+
chat_history_collection.update_one(
|
25 |
+
{"user_id": ObjectId(user_id), "session_id": session_id},
|
26 |
+
{"$push": {"messages": message}, "$set": {"timestamp": datetime.utcnow()}},
|
27 |
+
upsert=True
|
28 |
+
)
|
29 |
+
|
30 |
+
def give_chat_response(user_id, session_id, question, title, description, context):
|
31 |
+
context_prompt = f"""
|
32 |
+
Based on the following session title, description, and context, answer the user's question in 3-4 lines:
|
33 |
+
|
34 |
+
Title: {title}
|
35 |
+
Description: {description}
|
36 |
+
Context: {context}
|
37 |
+
|
38 |
+
Question: {question}
|
39 |
+
|
40 |
+
Please provide a clear and concise answer based on the information provided.
|
41 |
+
"""
|
42 |
+
|
43 |
+
response = model.generate_content(context_prompt)
|
44 |
+
if not response or not response.text:
|
45 |
+
return "No response received from the model"
|
46 |
+
|
47 |
+
assistant_response = response.text.strip()
|
48 |
+
|
49 |
+
# Save the chat message
|
50 |
+
insert_chat_message(user_id, session_id, "assistant", assistant_response)
|
51 |
+
|
52 |
+
return assistant_response
|
53 |
+
|
54 |
+
def create_quiz_by_context(user_id, session_id, context, length, session_title, session_description):
|
55 |
+
"""Create a quiz based on the context provided"""
|
56 |
+
quiz = generate_mcqs(context, length, session_title, session_description)
|
57 |
+
if not quiz:
|
58 |
+
return "No quiz generated";
|
59 |
+
|
60 |
+
# Save the quiz
|
61 |
+
quizzes_collection.insert_one({
|
62 |
+
"user_id": ObjectId(user_id),
|
63 |
+
"session_id": ObjectId(session_id),
|
64 |
+
"questions": quiz,
|
65 |
+
"timestamp": datetime.utcnow()
|
66 |
+
})
|
67 |
return "Quiz created successfully"
|
create_course.py
CHANGED
@@ -1,272 +1,272 @@
|
|
1 |
-
from datetime import datetime, timedelta
|
2 |
-
import os
|
3 |
-
from typing import Dict, List, Any
|
4 |
-
from pymongo import MongoClient
|
5 |
-
import requests
|
6 |
-
import uuid
|
7 |
-
import openai
|
8 |
-
from openai import OpenAI
|
9 |
-
import streamlit as st
|
10 |
-
from bson import ObjectId
|
11 |
-
from dotenv import load_dotenv
|
12 |
-
import json
|
13 |
-
|
14 |
-
load_dotenv()
|
15 |
-
MONGODB_URI = os.getenv("MONGO_URI")
|
16 |
-
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_KEY")
|
17 |
-
OPENAI_API_KEY = os.getenv("OPENAI_KEY")
|
18 |
-
|
19 |
-
client = MongoClient(MONGODB_URI)
|
20 |
-
db = client['novascholar_db']
|
21 |
-
courses_collection = db['courses']
|
22 |
-
|
23 |
-
def generate_perplexity_response(api_key, course_name):
|
24 |
-
headers = {
|
25 |
-
"accept": "application/json",
|
26 |
-
"content-type": "application/json",
|
27 |
-
"authorization": f"Bearer {api_key}"
|
28 |
-
}
|
29 |
-
|
30 |
-
prompt = f"""
|
31 |
-
You are an expert educational AI assistant specializing in curriculum design and instructional planning. Your task is to generate comprehensive, academically rigorous course structures for undergraduate level education.
|
32 |
-
|
33 |
-
Please generate a detailed course structure for the course {course_name} in JSON format following these specifications:
|
34 |
-
|
35 |
-
1. The course structure should be appropriate for a full semester (14-16 weeks)
|
36 |
-
2. Each module should be designed for 2-4 weeks of instruction
|
37 |
-
3. Follow standard academic practices and nomenclature
|
38 |
-
4. Ensure progressive complexity from foundational to advanced concepts
|
39 |
-
5. The course_title should exactly match the course name provided in the prompt. No additional information should be included in the course_title field.
|
40 |
-
6: Ensure that the property names are enclosed in double quotes (") and followed by a colon (:), and the values are enclosed in double quotes (").
|
41 |
-
7. **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
|
42 |
-
|
43 |
-
|
44 |
-
The JSON response should follow this structure:
|
45 |
-
{{
|
46 |
-
"course_title": "string",
|
47 |
-
"course_description": "string",
|
48 |
-
"modules": [
|
49 |
-
{{
|
50 |
-
"module_title": "string",
|
51 |
-
"sub_modules": [
|
52 |
-
{{
|
53 |
-
"title": "string",
|
54 |
-
"topics": [string],
|
55 |
-
}}
|
56 |
-
]
|
57 |
-
}}
|
58 |
-
]
|
59 |
-
}}
|
60 |
-
|
61 |
-
Example response:
|
62 |
-
{{
|
63 |
-
"course_title": "Advanced Natural Language Processing",
|
64 |
-
"course_descriptio": "An advanced course covering modern approaches to NLP using deep learning, with focus on transformer architectures and their applications.",
|
65 |
-
"modules": [
|
66 |
-
{{
|
67 |
-
"module_title": "Foundations of Modern NLP",
|
68 |
-
"sub_modules": [
|
69 |
-
{{
|
70 |
-
"title": "Attention Mechanism",
|
71 |
-
"topics": [
|
72 |
-
"Self-attention",
|
73 |
-
"Multi-head attention",
|
74 |
-
"Positional encoding"
|
75 |
-
]
|
76 |
-
}}
|
77 |
-
]
|
78 |
-
}}
|
79 |
-
]
|
80 |
-
}}
|
81 |
-
"""
|
82 |
-
|
83 |
-
messages = [
|
84 |
-
{
|
85 |
-
"role": "system",
|
86 |
-
"content": (
|
87 |
-
"You are an expert educational AI assistant specializing in course design and curriculum planning. "
|
88 |
-
"Your task is to generate accurate, detailed, and structured educational content for undergraduate-level and post-graduate-level courses. "
|
89 |
-
"Provide detailed and accurate information tailored to the user's prompt."
|
90 |
-
"Ensure that the responses are logical, follow standard academic practices, and include realistic concepts relevant to the course."
|
91 |
-
),
|
92 |
-
},
|
93 |
-
{
|
94 |
-
"role": "user",
|
95 |
-
"content": prompt
|
96 |
-
},
|
97 |
-
]
|
98 |
-
try:
|
99 |
-
client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
|
100 |
-
response = client.chat.completions.create(
|
101 |
-
model="llama-3.1-sonar-small-128k-online",
|
102 |
-
messages=messages
|
103 |
-
)
|
104 |
-
content = response.choices[0].message.content
|
105 |
-
return content
|
106 |
-
except Exception as e:
|
107 |
-
st.error(f"Failed to fetch data from Perplexity API: {e}")
|
108 |
-
return ""
|
109 |
-
|
110 |
-
def get_new_course_id():
|
111 |
-
"""Generate a new course ID by incrementing the last course ID"""
|
112 |
-
last_course = courses_collection.find_one(sort=[("course_id", -1)])
|
113 |
-
if last_course:
|
114 |
-
last_course_id = int(last_course["course_id"][2:])
|
115 |
-
new_course_id = f"CS{last_course_id + 1}"
|
116 |
-
else:
|
117 |
-
new_course_id = "CS101"
|
118 |
-
return new_course_id
|
119 |
-
|
120 |
-
|
121 |
-
def create_course(course_name, start_date, duration_weeks):
|
122 |
-
# Generate course overview
|
123 |
-
# overview_prompt = f"""Generate an overview for the undergraduate course {course_name}
|
124 |
-
# Include all relevant concepts and key topics covered in a typical curriculum.
|
125 |
-
# The response should be concise (300-400 words). Ensure that your response is in a valid JSON format."""
|
126 |
-
|
127 |
-
# overview_prompt2 = f"""Generate an overview for the undergraduate course {course_name}.
|
128 |
-
# The overview should include:
|
129 |
-
# The course title, a detailed course description,
|
130 |
-
# a division of all relevant concepts and key topics into 4-6 logical modules,
|
131 |
-
# capturing the flow and structure of a typical curriculum.
|
132 |
-
# Ensure the response adheres to the following JSON format:
|
133 |
-
# {{
|
134 |
-
# 'overview': 'string',
|
135 |
-
# 'modules': [
|
136 |
-
# {{
|
137 |
-
# 'name': 'string',
|
138 |
-
# 'description': 'string'
|
139 |
-
# }}
|
140 |
-
# ]
|
141 |
-
# }}
|
142 |
-
# overview: A detailed description of the course.
|
143 |
-
# modules: An array of 4-6 objects, each representing a logical module with a name and a brief description
|
144 |
-
# **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}"""
|
145 |
-
|
146 |
-
# course_overview = generate_perplexity_response(PERPLEXITY_API_KEY, overview_prompt2)
|
147 |
-
# # print(course_overview)
|
148 |
-
# course_overview_store = course_overview
|
149 |
-
# # print(course_overview_store)
|
150 |
-
# # Generate modules
|
151 |
-
# # modules_prompt = f"Based on this overview: {course_overview}\nCreate 4-6 logical modules for the course, each module should group related concepts and each module may include reference books if applicable"
|
152 |
-
# sub_modules_prompt = f"""Using the provided modules in the overview {course_overview_store}, generate 2-3 submodules for each module.
|
153 |
-
# Each submodule should represent a cohesive subset of the module's topics, logically organized for teaching purposes.
|
154 |
-
# Ensure the response adheres to the following JSON format:
|
155 |
-
# {
|
156 |
-
# 'modules': [
|
157 |
-
# {
|
158 |
-
# 'name': 'string',
|
159 |
-
# 'sub_modules': [
|
160 |
-
# {
|
161 |
-
# 'name': 'string',
|
162 |
-
# 'description': 'string'
|
163 |
-
# }
|
164 |
-
# ]
|
165 |
-
# }
|
166 |
-
# ]
|
167 |
-
# }
|
168 |
-
# modules: An array where each object contains the name of the module and its corresponding sub_modules.
|
169 |
-
# sub_modules: An array of 2-3 objects for each module, each having a name and a brief description."
|
170 |
-
# **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}
|
171 |
-
# """
|
172 |
-
# sub_modules = generate_perplexity_response(PERPLEXITY_API_KEY, sub_modules_prompt)
|
173 |
-
|
174 |
-
# # modules_response = generate_perplexity_response(modules_prompt)
|
175 |
-
# print(sub_modules)
|
176 |
-
|
177 |
-
# total_sessions = duration_weeks * sessions_per_week
|
178 |
-
|
179 |
-
course_plan = generate_perplexity_response(PERPLEXITY_API_KEY, course_name)
|
180 |
-
course_plan_json = json.loads(course_plan)
|
181 |
-
|
182 |
-
# Generate sessions for each module
|
183 |
-
all_sessions = []
|
184 |
-
for module in course_plan_json['modules']:
|
185 |
-
for sub_module in module['sub_modules']:
|
186 |
-
for topic in sub_module['topics']:
|
187 |
-
session = create_session(
|
188 |
-
title=topic,
|
189 |
-
date=start_date,
|
190 |
-
module_name=module['module_title']
|
191 |
-
)
|
192 |
-
# print(session)
|
193 |
-
all_sessions.append(session)
|
194 |
-
start_date += timedelta(days=7) # Next session after a week
|
195 |
-
|
196 |
-
# sample_sessions = [
|
197 |
-
# {'session_id': ObjectId('6767d0bbad8316ac358def25'), 'title': 'What is Generative AI?', 'date': datetime(2024, 12, 22, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
198 |
-
# {'session_id': ObjectId('6767d0bbad8316ac358def26'), 'title': 'History and Evolution of AI', 'date': datetime(2024, 12, 29, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
199 |
-
# {'session_id': ObjectId('6767d0bbad8316ac358def27'), 'title': 'Types of Generative AI (e.g., GANs, VAEs, LLMs)', 'date': datetime(2025, 1, 5, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 505626), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
200 |
-
# {'session_id': ObjectId('6767d0bbad8316ac358def28'), 'title': 'Overview of popular GenAI tools (e.g., ChatGPT, Claude, Google Gemini)', 'date': datetime(2025, 1, 12, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 506559), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
201 |
-
# {'session_id': ObjectId('6767d0bbad8316ac358def29'), 'title': 'Frameworks for building GenAI models (e.g., TensorFlow, PyTorch)', 'date': datetime(2025, 1, 19, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 506559), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
202 |
-
# {'session_id': ObjectId('6767d0bbad8316ac358def2a'), 'title': 'Integration with other AI technologies', 'date': datetime(2025, 1, 26, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 507612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
203 |
-
# {'session_id': ObjectId('6767d0bbad8316ac358def2b'), 'title': 'Text-to-text models (e.g., GPT-3, BERT)', 'date': datetime(2025, 2, 2, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 508512), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
204 |
-
# {'session_id': ObjectId('6767d0bbad8316ac358def2c'), 'title': 'Text generation for content creation and marketing', 'date': datetime(2025, 2, 9, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 508512), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
205 |
-
# {'session_id': ObjectId('6767d0bbad8316ac358def2d'), 'title': 'Chatbots and conversational interfaces', 'date': datetime(2025, 2, 16, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 509612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
206 |
-
# {'session_id': ObjectId('6767d0bbad8316ac358def2e'), 'title': 'Generative Adversarial Networks (GANs)', 'date': datetime(2025, 2, 23, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 509612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
207 |
-
# {'session_id': ObjectId('6767d0bbad8316ac358def2f'), 'title': 'Variational Autoencoders (VAEs)', 'date': datetime(2025, 3, 2, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 510612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
208 |
-
# {'session_id': ObjectId('6767d0bbad8316ac358def30'), 'title': 'Applications in art, design, and media', 'date': datetime(2025, 3, 9, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 511497), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
209 |
-
# {'session_id': ObjectId('6767d0bbad8316ac358def31'), 'title': 'Understanding prompt design principles', 'date': datetime(2025, 3, 16, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 511497), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
210 |
-
# {'session_id': ObjectId('6767d0bbad8316ac358def33'), 'title': 'Advanced techniques for fine-tuning models', 'date': datetime(2025, 3, 30, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 512514), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
211 |
-
# {'session_id': ObjectId('6767d0bbad8316ac358def34'), 'title': 'Ethical implications of AI-generated content', 'date': datetime(2025, 4, 6, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 513613), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
212 |
-
# {'session_id': ObjectId('6767d0bbad8316ac358def35'), 'title': 'Addressing bias in AI models', 'date': datetime(2025, 4, 13, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 514639), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
213 |
-
# {'session_id': ObjectId('6767d0bbad8316ac358def36'), 'title': 'Regulatory frameworks and guidelines', 'date': datetime(2025, 4, 20, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 514639), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
214 |
-
# {'session_id': ObjectId('6767d0bbad8316ac358def37'), 'title': 'Case studies from various industries (e.g., marketing, healthcare, finance)', 'date': datetime(2025, 4, 27, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 515610), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
215 |
-
# {'session_id': ObjectId('6767d0bbad8316ac358def38'), 'title': 'Success stories and challenges faced by companies using GenAI', 'date': datetime(2025, 5, 4, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 515610), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
216 |
-
# {'session_id': ObjectId('6767d0bbad8316ac358def39'), 'title': 'Guidelines for developing a GenAI project', 'date': datetime(2025, 5, 11, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 516614), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
217 |
-
# {'session_id': ObjectId('6767d0bbad8316ac358def3a'), 'title': 'Tools and resources for project implementation', 'date': datetime(2025, 5, 18, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 516614), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
218 |
-
# {'session_id': ObjectId('6767d0bbad8316ac358def3b'), 'title': 'Best practices for testing and deployment', 'date': datetime(2025, 5, 25, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 517563), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}}
|
219 |
-
# ]
|
220 |
-
|
221 |
-
# small_sample_sessions = [
|
222 |
-
# {'session_id': ObjectId('6767d0bbad8316ac358def25'), 'title': 'What is Generative AI?', 'date': datetime(2024, 12, 22, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
223 |
-
# {'session_id': ObjectId('6767d0bbad8316ac358def26'), 'title': 'History and Evolution of AI', 'date': datetime(2024, 12, 29, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
224 |
-
# ]
|
225 |
-
|
226 |
-
|
227 |
-
# print(all_sessions)
|
228 |
-
|
229 |
-
print("Number of sessions:", len(all_sessions))
|
230 |
-
# Create course document
|
231 |
-
# course_description = course_plan_json['course_description']
|
232 |
-
# course_doc = {
|
233 |
-
# "course_id": get_new_course_id(),
|
234 |
-
# "title": course_name,
|
235 |
-
# "description": course_description,
|
236 |
-
# "faculty": faculty_name,
|
237 |
-
# "faculty_id": faculty_id,
|
238 |
-
# "duration": f"{duration_weeks} weeks",
|
239 |
-
# "created_at": datetime.utcnow(),
|
240 |
-
# "sessions": all_sessions
|
241 |
-
# }
|
242 |
-
# try:
|
243 |
-
# courses_collection.insert_one(course_doc)
|
244 |
-
# except Exception as e:
|
245 |
-
# st.error(f"Failed to insert course data into the database: {e}")
|
246 |
-
|
247 |
-
# print(course_plan)
|
248 |
-
|
249 |
-
def create_session(title: str, date: datetime, module_name: str):
|
250 |
-
"""Create a session document with pre-class, in-class, and post-class components."""
|
251 |
-
return {
|
252 |
-
"session_id": ObjectId(),
|
253 |
-
"title": title,
|
254 |
-
"date": date,
|
255 |
-
"status": "upcoming",
|
256 |
-
"created_at": datetime.utcnow(),
|
257 |
-
"pre_class": {
|
258 |
-
"resources": [],
|
259 |
-
"completion_required": True
|
260 |
-
},
|
261 |
-
"in_class": {
|
262 |
-
"quiz": [],
|
263 |
-
"polls": []
|
264 |
-
},
|
265 |
-
"post_class": {
|
266 |
-
"assignments": []
|
267 |
-
}
|
268 |
-
}
|
269 |
-
|
270 |
-
# Usage example:
|
271 |
-
if __name__ == "__main__":
|
272 |
create_course("Introduction to Data Analytics", datetime.now(), 2)
|
|
|
1 |
+
from datetime import datetime, timedelta
|
2 |
+
import os
|
3 |
+
from typing import Dict, List, Any
|
4 |
+
from pymongo import MongoClient
|
5 |
+
import requests
|
6 |
+
import uuid
|
7 |
+
import openai
|
8 |
+
from openai import OpenAI
|
9 |
+
import streamlit as st
|
10 |
+
from bson import ObjectId
|
11 |
+
from dotenv import load_dotenv
|
12 |
+
import json
|
13 |
+
|
14 |
+
load_dotenv()
|
15 |
+
MONGODB_URI = os.getenv("MONGO_URI")
|
16 |
+
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_KEY")
|
17 |
+
OPENAI_API_KEY = os.getenv("OPENAI_KEY")
|
18 |
+
|
19 |
+
client = MongoClient(MONGODB_URI)
|
20 |
+
db = client['novascholar_db']
|
21 |
+
courses_collection = db['courses']
|
22 |
+
|
23 |
+
def generate_perplexity_response(api_key, course_name):
|
24 |
+
headers = {
|
25 |
+
"accept": "application/json",
|
26 |
+
"content-type": "application/json",
|
27 |
+
"authorization": f"Bearer {api_key}"
|
28 |
+
}
|
29 |
+
|
30 |
+
prompt = f"""
|
31 |
+
You are an expert educational AI assistant specializing in curriculum design and instructional planning. Your task is to generate comprehensive, academically rigorous course structures for undergraduate level education.
|
32 |
+
|
33 |
+
Please generate a detailed course structure for the course {course_name} in JSON format following these specifications:
|
34 |
+
|
35 |
+
1. The course structure should be appropriate for a full semester (14-16 weeks)
|
36 |
+
2. Each module should be designed for 2-4 weeks of instruction
|
37 |
+
3. Follow standard academic practices and nomenclature
|
38 |
+
4. Ensure progressive complexity from foundational to advanced concepts
|
39 |
+
5. The course_title should exactly match the course name provided in the prompt. No additional information should be included in the course_title field.
|
40 |
+
6: Ensure that the property names are enclosed in double quotes (") and followed by a colon (:), and the values are enclosed in double quotes (").
|
41 |
+
7. **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
|
42 |
+
|
43 |
+
|
44 |
+
The JSON response should follow this structure:
|
45 |
+
{{
|
46 |
+
"course_title": "string",
|
47 |
+
"course_description": "string",
|
48 |
+
"modules": [
|
49 |
+
{{
|
50 |
+
"module_title": "string",
|
51 |
+
"sub_modules": [
|
52 |
+
{{
|
53 |
+
"title": "string",
|
54 |
+
"topics": [string],
|
55 |
+
}}
|
56 |
+
]
|
57 |
+
}}
|
58 |
+
]
|
59 |
+
}}
|
60 |
+
|
61 |
+
Example response:
|
62 |
+
{{
|
63 |
+
"course_title": "Advanced Natural Language Processing",
|
64 |
+
"course_descriptio": "An advanced course covering modern approaches to NLP using deep learning, with focus on transformer architectures and their applications.",
|
65 |
+
"modules": [
|
66 |
+
{{
|
67 |
+
"module_title": "Foundations of Modern NLP",
|
68 |
+
"sub_modules": [
|
69 |
+
{{
|
70 |
+
"title": "Attention Mechanism",
|
71 |
+
"topics": [
|
72 |
+
"Self-attention",
|
73 |
+
"Multi-head attention",
|
74 |
+
"Positional encoding"
|
75 |
+
]
|
76 |
+
}}
|
77 |
+
]
|
78 |
+
}}
|
79 |
+
]
|
80 |
+
}}
|
81 |
+
"""
|
82 |
+
|
83 |
+
messages = [
|
84 |
+
{
|
85 |
+
"role": "system",
|
86 |
+
"content": (
|
87 |
+
"You are an expert educational AI assistant specializing in course design and curriculum planning. "
|
88 |
+
"Your task is to generate accurate, detailed, and structured educational content for undergraduate-level and post-graduate-level courses. "
|
89 |
+
"Provide detailed and accurate information tailored to the user's prompt."
|
90 |
+
"Ensure that the responses are logical, follow standard academic practices, and include realistic concepts relevant to the course."
|
91 |
+
),
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"role": "user",
|
95 |
+
"content": prompt
|
96 |
+
},
|
97 |
+
]
|
98 |
+
try:
|
99 |
+
client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
|
100 |
+
response = client.chat.completions.create(
|
101 |
+
model="llama-3.1-sonar-small-128k-online",
|
102 |
+
messages=messages
|
103 |
+
)
|
104 |
+
content = response.choices[0].message.content
|
105 |
+
return content
|
106 |
+
except Exception as e:
|
107 |
+
st.error(f"Failed to fetch data from Perplexity API: {e}")
|
108 |
+
return ""
|
109 |
+
|
110 |
+
def get_new_course_id():
|
111 |
+
"""Generate a new course ID by incrementing the last course ID"""
|
112 |
+
last_course = courses_collection.find_one(sort=[("course_id", -1)])
|
113 |
+
if last_course:
|
114 |
+
last_course_id = int(last_course["course_id"][2:])
|
115 |
+
new_course_id = f"CS{last_course_id + 1}"
|
116 |
+
else:
|
117 |
+
new_course_id = "CS101"
|
118 |
+
return new_course_id
|
119 |
+
|
120 |
+
|
121 |
+
def create_course(course_name, start_date, duration_weeks):
|
122 |
+
# Generate course overview
|
123 |
+
# overview_prompt = f"""Generate an overview for the undergraduate course {course_name}
|
124 |
+
# Include all relevant concepts and key topics covered in a typical curriculum.
|
125 |
+
# The response should be concise (300-400 words). Ensure that your response is in a valid JSON format."""
|
126 |
+
|
127 |
+
# overview_prompt2 = f"""Generate an overview for the undergraduate course {course_name}.
|
128 |
+
# The overview should include:
|
129 |
+
# The course title, a detailed course description,
|
130 |
+
# a division of all relevant concepts and key topics into 4-6 logical modules,
|
131 |
+
# capturing the flow and structure of a typical curriculum.
|
132 |
+
# Ensure the response adheres to the following JSON format:
|
133 |
+
# {{
|
134 |
+
# 'overview': 'string',
|
135 |
+
# 'modules': [
|
136 |
+
# {{
|
137 |
+
# 'name': 'string',
|
138 |
+
# 'description': 'string'
|
139 |
+
# }}
|
140 |
+
# ]
|
141 |
+
# }}
|
142 |
+
# overview: A detailed description of the course.
|
143 |
+
# modules: An array of 4-6 objects, each representing a logical module with a name and a brief description
|
144 |
+
# **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}"""
|
145 |
+
|
146 |
+
# course_overview = generate_perplexity_response(PERPLEXITY_API_KEY, overview_prompt2)
|
147 |
+
# # print(course_overview)
|
148 |
+
# course_overview_store = course_overview
|
149 |
+
# # print(course_overview_store)
|
150 |
+
# # Generate modules
|
151 |
+
# # modules_prompt = f"Based on this overview: {course_overview}\nCreate 4-6 logical modules for the course, each module should group related concepts and each module may include reference books if applicable"
|
152 |
+
# sub_modules_prompt = f"""Using the provided modules in the overview {course_overview_store}, generate 2-3 submodules for each module.
|
153 |
+
# Each submodule should represent a cohesive subset of the module's topics, logically organized for teaching purposes.
|
154 |
+
# Ensure the response adheres to the following JSON format:
|
155 |
+
# {
|
156 |
+
# 'modules': [
|
157 |
+
# {
|
158 |
+
# 'name': 'string',
|
159 |
+
# 'sub_modules': [
|
160 |
+
# {
|
161 |
+
# 'name': 'string',
|
162 |
+
# 'description': 'string'
|
163 |
+
# }
|
164 |
+
# ]
|
165 |
+
# }
|
166 |
+
# ]
|
167 |
+
# }
|
168 |
+
# modules: An array where each object contains the name of the module and its corresponding sub_modules.
|
169 |
+
# sub_modules: An array of 2-3 objects for each module, each having a name and a brief description."
|
170 |
+
# **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}
|
171 |
+
# """
|
172 |
+
# sub_modules = generate_perplexity_response(PERPLEXITY_API_KEY, sub_modules_prompt)
|
173 |
+
|
174 |
+
# # modules_response = generate_perplexity_response(modules_prompt)
|
175 |
+
# print(sub_modules)
|
176 |
+
|
177 |
+
# total_sessions = duration_weeks * sessions_per_week
|
178 |
+
|
179 |
+
course_plan = generate_perplexity_response(PERPLEXITY_API_KEY, course_name)
|
180 |
+
course_plan_json = json.loads(course_plan)
|
181 |
+
|
182 |
+
# Generate sessions for each module
|
183 |
+
all_sessions = []
|
184 |
+
for module in course_plan_json['modules']:
|
185 |
+
for sub_module in module['sub_modules']:
|
186 |
+
for topic in sub_module['topics']:
|
187 |
+
session = create_session(
|
188 |
+
title=topic,
|
189 |
+
date=start_date,
|
190 |
+
module_name=module['module_title']
|
191 |
+
)
|
192 |
+
# print(session)
|
193 |
+
all_sessions.append(session)
|
194 |
+
start_date += timedelta(days=7) # Next session after a week
|
195 |
+
|
196 |
+
# sample_sessions = [
|
197 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def25'), 'title': 'What is Generative AI?', 'date': datetime(2024, 12, 22, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
198 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def26'), 'title': 'History and Evolution of AI', 'date': datetime(2024, 12, 29, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
199 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def27'), 'title': 'Types of Generative AI (e.g., GANs, VAEs, LLMs)', 'date': datetime(2025, 1, 5, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 505626), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
200 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def28'), 'title': 'Overview of popular GenAI tools (e.g., ChatGPT, Claude, Google Gemini)', 'date': datetime(2025, 1, 12, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 506559), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
201 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def29'), 'title': 'Frameworks for building GenAI models (e.g., TensorFlow, PyTorch)', 'date': datetime(2025, 1, 19, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 506559), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
202 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def2a'), 'title': 'Integration with other AI technologies', 'date': datetime(2025, 1, 26, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 507612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
203 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def2b'), 'title': 'Text-to-text models (e.g., GPT-3, BERT)', 'date': datetime(2025, 2, 2, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 508512), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
204 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def2c'), 'title': 'Text generation for content creation and marketing', 'date': datetime(2025, 2, 9, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 508512), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
205 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def2d'), 'title': 'Chatbots and conversational interfaces', 'date': datetime(2025, 2, 16, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 509612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
206 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def2e'), 'title': 'Generative Adversarial Networks (GANs)', 'date': datetime(2025, 2, 23, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 509612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
207 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def2f'), 'title': 'Variational Autoencoders (VAEs)', 'date': datetime(2025, 3, 2, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 510612), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
208 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def30'), 'title': 'Applications in art, design, and media', 'date': datetime(2025, 3, 9, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 511497), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
209 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def31'), 'title': 'Understanding prompt design principles', 'date': datetime(2025, 3, 16, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 511497), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
210 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def33'), 'title': 'Advanced techniques for fine-tuning models', 'date': datetime(2025, 3, 30, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 512514), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
211 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def34'), 'title': 'Ethical implications of AI-generated content', 'date': datetime(2025, 4, 6, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 513613), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
212 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def35'), 'title': 'Addressing bias in AI models', 'date': datetime(2025, 4, 13, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 514639), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
213 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def36'), 'title': 'Regulatory frameworks and guidelines', 'date': datetime(2025, 4, 20, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 514639), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
214 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def37'), 'title': 'Case studies from various industries (e.g., marketing, healthcare, finance)', 'date': datetime(2025, 4, 27, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 515610), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
215 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def38'), 'title': 'Success stories and challenges faced by companies using GenAI', 'date': datetime(2025, 5, 4, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 515610), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
216 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def39'), 'title': 'Guidelines for developing a GenAI project', 'date': datetime(2025, 5, 11, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 516614), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
217 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def3a'), 'title': 'Tools and resources for project implementation', 'date': datetime(2025, 5, 18, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 516614), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
218 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def3b'), 'title': 'Best practices for testing and deployment', 'date': datetime(2025, 5, 25, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 517563), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}}
|
219 |
+
# ]
|
220 |
+
|
221 |
+
# small_sample_sessions = [
|
222 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def25'), 'title': 'What is Generative AI?', 'date': datetime(2024, 12, 22, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
223 |
+
# {'session_id': ObjectId('6767d0bbad8316ac358def26'), 'title': 'History and Evolution of AI', 'date': datetime(2024, 12, 29, 14, 11, 27, 153899), 'status': 'upcoming', 'created_at': datetime(2024, 12, 22, 8, 41, 31, 504599), 'pre_class': {'resources': [], 'completion_required': True}, 'in_class': {'quiz': [], 'polls': []}, 'post_class': {'assignments': []}},
|
224 |
+
# ]
|
225 |
+
|
226 |
+
|
227 |
+
# print(all_sessions)
|
228 |
+
|
229 |
+
print("Number of sessions:", len(all_sessions))
|
230 |
+
# Create course document
|
231 |
+
# course_description = course_plan_json['course_description']
|
232 |
+
# course_doc = {
|
233 |
+
# "course_id": get_new_course_id(),
|
234 |
+
# "title": course_name,
|
235 |
+
# "description": course_description,
|
236 |
+
# "faculty": faculty_name,
|
237 |
+
# "faculty_id": faculty_id,
|
238 |
+
# "duration": f"{duration_weeks} weeks",
|
239 |
+
# "created_at": datetime.utcnow(),
|
240 |
+
# "sessions": all_sessions
|
241 |
+
# }
|
242 |
+
# try:
|
243 |
+
# courses_collection.insert_one(course_doc)
|
244 |
+
# except Exception as e:
|
245 |
+
# st.error(f"Failed to insert course data into the database: {e}")
|
246 |
+
|
247 |
+
# print(course_plan)
|
248 |
+
|
249 |
+
def create_session(title: str, date: datetime, module_name: str):
|
250 |
+
"""Create a session document with pre-class, in-class, and post-class components."""
|
251 |
+
return {
|
252 |
+
"session_id": ObjectId(),
|
253 |
+
"title": title,
|
254 |
+
"date": date,
|
255 |
+
"status": "upcoming",
|
256 |
+
"created_at": datetime.utcnow(),
|
257 |
+
"pre_class": {
|
258 |
+
"resources": [],
|
259 |
+
"completion_required": True
|
260 |
+
},
|
261 |
+
"in_class": {
|
262 |
+
"quiz": [],
|
263 |
+
"polls": []
|
264 |
+
},
|
265 |
+
"post_class": {
|
266 |
+
"assignments": []
|
267 |
+
}
|
268 |
+
}
|
269 |
+
|
270 |
+
# Usage example:
|
271 |
+
if __name__ == "__main__":
|
272 |
create_course("Introduction to Data Analytics", datetime.now(), 2)
|
create_course2.py
CHANGED
@@ -1,331 +1,331 @@
|
|
1 |
-
from datetime import datetime, timedelta
|
2 |
-
import os
|
3 |
-
from typing import Dict, List, Any
|
4 |
-
from pymongo import MongoClient
|
5 |
-
import requests
|
6 |
-
import uuid
|
7 |
-
import openai
|
8 |
-
from openai import OpenAI
|
9 |
-
import streamlit as st
|
10 |
-
from bson import ObjectId
|
11 |
-
from dotenv import load_dotenv
|
12 |
-
import json
|
13 |
-
|
14 |
-
load_dotenv()
|
15 |
-
MONGODB_URI = os.getenv("MONGO_URI")
|
16 |
-
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_KEY")
|
17 |
-
OPENAI_API_KEY = os.getenv("OPENAI_KEY")
|
18 |
-
|
19 |
-
client = MongoClient(MONGODB_URI)
|
20 |
-
db = client['novascholar_db']
|
21 |
-
courses_collection = db['courses']
|
22 |
-
|
23 |
-
def generate_perplexity_response(api_key, course_name, duration_weeks, sessions_per_week):
|
24 |
-
headers = {
|
25 |
-
"accept": "application/json",
|
26 |
-
"content-type": "application/json",
|
27 |
-
"authorization": f"Bearer {api_key}"
|
28 |
-
}
|
29 |
-
|
30 |
-
# Calculate sessions based on duration
|
31 |
-
total_sessions = duration_weeks * sessions_per_week # Assuming 2 sessions per week
|
32 |
-
|
33 |
-
prompt = f"""
|
34 |
-
You are an expert educational AI assistant specializing in curriculum design and instructional planning. Your task is to generate a comprehensive, academically rigorous course structure for the course {course_name} that fits exactly within {duration_weeks} weeks with {total_sessions} total sessions ({sessions_per_week} sessions per week).
|
35 |
-
|
36 |
-
Please generate a detailed course structure in JSON format following these specifications:
|
37 |
-
|
38 |
-
1. The course structure must be designed for exactly {duration_weeks} weeks with {total_sessions} total sessions
|
39 |
-
2. Each module should contain an appropriate number of sessions that sum up to exactly {total_sessions}
|
40 |
-
3. Each session should be designed for a 1-1.5-hour class duration
|
41 |
-
4. Follow standard academic practices and nomenclature
|
42 |
-
5. Ensure progressive complexity from foundational to advanced concepts
|
43 |
-
6. The course_title should exactly match the course name provided
|
44 |
-
7. Ensure that the property names are enclosed in double quotes (") and followed by a colon (:), and the values are enclosed in double quotes (").
|
45 |
-
8. **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
|
46 |
-
|
47 |
-
The JSON response should follow this structure:
|
48 |
-
{{
|
49 |
-
"course_title": "string",
|
50 |
-
"course_description": "string",
|
51 |
-
"total_duration_weeks": {duration_weeks},
|
52 |
-
"sessions_per_week": {sessions_per_week},
|
53 |
-
"total_sessions": {total_sessions},
|
54 |
-
"modules": [
|
55 |
-
{{
|
56 |
-
"module_title": "string",
|
57 |
-
"module_duration_sessions": number,
|
58 |
-
"sub_modules": [
|
59 |
-
{{
|
60 |
-
"title": "string",
|
61 |
-
"topics": [
|
62 |
-
{{
|
63 |
-
"title": "string",
|
64 |
-
"short_description": "string",
|
65 |
-
"concise_learning_objectives": ["string"]
|
66 |
-
}}
|
67 |
-
]
|
68 |
-
}}
|
69 |
-
]
|
70 |
-
}}
|
71 |
-
]
|
72 |
-
}}
|
73 |
-
|
74 |
-
Ensure that:
|
75 |
-
1. The sum of all module_duration_sessions equals exactly {total_sessions}
|
76 |
-
2. Each topic has clear learning objectives
|
77 |
-
3. Topics build upon each other logically
|
78 |
-
4. Content is distributed evenly across the available sessions
|
79 |
-
5. **This Instruction is Strictly followed: **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.****
|
80 |
-
|
81 |
-
"""
|
82 |
-
|
83 |
-
messages = [
|
84 |
-
{
|
85 |
-
"role": "system",
|
86 |
-
"content": (
|
87 |
-
"You are an expert educational AI assistant specializing in course design and curriculum planning. "
|
88 |
-
"Your task is to generate accurate, detailed, and structured educational content that precisely fits "
|
89 |
-
"the specified duration."
|
90 |
-
),
|
91 |
-
},
|
92 |
-
{
|
93 |
-
"role": "user",
|
94 |
-
"content": prompt
|
95 |
-
},
|
96 |
-
]
|
97 |
-
|
98 |
-
try:
|
99 |
-
client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
|
100 |
-
response = client.chat.completions.create(
|
101 |
-
model="llama-3.1-sonar-small-128k-online",
|
102 |
-
messages=messages
|
103 |
-
)
|
104 |
-
content = response.choices[0].message.content
|
105 |
-
|
106 |
-
# Validate session count
|
107 |
-
course_plan = json.loads(content)
|
108 |
-
total_planned_sessions = sum(
|
109 |
-
module.get('module_duration_sessions', 0)
|
110 |
-
for module in course_plan.get('modules', [])
|
111 |
-
)
|
112 |
-
|
113 |
-
if abs(total_planned_sessions - total_sessions) > 5:
|
114 |
-
raise ValueError(f"Generated plan has {total_planned_sessions} sessions, but {total_sessions} were requested")
|
115 |
-
|
116 |
-
return content
|
117 |
-
except Exception as e:
|
118 |
-
st.error(f"Failed to fetch data from Perplexity API: {e}")
|
119 |
-
return ""
|
120 |
-
|
121 |
-
def generate_session_resources(api_key, session_titles: List[str]):
|
122 |
-
"""
|
123 |
-
Generate relevant resources for each session title separately
|
124 |
-
"""
|
125 |
-
resources_prompt = f"""
|
126 |
-
You are an expert educational content curator. For each session title provided, suggest highly relevant and accurate learning resources.
|
127 |
-
Please provide resources for these sessions: {session_titles}
|
128 |
-
|
129 |
-
For each session, provide resources in this JSON format:
|
130 |
-
{{
|
131 |
-
"session_resources": [
|
132 |
-
{{
|
133 |
-
"session_title": "string",
|
134 |
-
"resources": {{
|
135 |
-
"readings": [
|
136 |
-
{{
|
137 |
-
"title": "string",
|
138 |
-
"url": "string",
|
139 |
-
"type": "string",
|
140 |
-
"estimated_read_time": "string"
|
141 |
-
}}
|
142 |
-
],
|
143 |
-
"books": [
|
144 |
-
{{
|
145 |
-
"title": "string",
|
146 |
-
"author": "string",
|
147 |
-
"isbn": "string",
|
148 |
-
"chapters": "string"
|
149 |
-
}}
|
150 |
-
],
|
151 |
-
"additional_resources": [
|
152 |
-
{{
|
153 |
-
"title": "string",
|
154 |
-
"url": "string",
|
155 |
-
"type": "string",
|
156 |
-
"description": "string"
|
157 |
-
}}
|
158 |
-
]
|
159 |
-
}}
|
160 |
-
}}
|
161 |
-
]
|
162 |
-
}}
|
163 |
-
|
164 |
-
Guidelines:
|
165 |
-
1. Ensure all URLs are real and currently active
|
166 |
-
2. Prioritize high-quality, authoritative sources
|
167 |
-
3. Include 1-2 resources of each type
|
168 |
-
5. For readings, include a mix of academic and practical resources. It can exceed to 3-4 readings
|
169 |
-
6. Book references should be real, recently published works
|
170 |
-
7. Additional resources can include tools, documentation, or practice platforms
|
171 |
-
8. Ensure that the property names are enclosed in double quotes (") and followed by a colon (:), and the values are enclosed in double quotes (").
|
172 |
-
9. ***NOTE: **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
|
173 |
-
"""
|
174 |
-
|
175 |
-
messages = [
|
176 |
-
{
|
177 |
-
"role": "system",
|
178 |
-
"content": "You are an expert educational content curator, focused on providing accurate and relevant learning resources.",
|
179 |
-
},
|
180 |
-
{
|
181 |
-
"role": "user",
|
182 |
-
"content": resources_prompt
|
183 |
-
},
|
184 |
-
]
|
185 |
-
|
186 |
-
try:
|
187 |
-
client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
|
188 |
-
response = client.chat.completions.create(
|
189 |
-
model="llama-3.1-sonar-small-128k-online",
|
190 |
-
messages=messages
|
191 |
-
)
|
192 |
-
print("Response is: \n", response.choices[0].message.content)
|
193 |
-
# try:
|
194 |
-
# return json.loads(response.choices[0].message.content)
|
195 |
-
# except json.JSONDecodeError as e:
|
196 |
-
# st.error(f"Failed to decode JSON response: {e}")
|
197 |
-
# return None
|
198 |
-
return response.choices[0].message.content
|
199 |
-
except Exception as e:
|
200 |
-
st.error(f"Failed to generate resources: {e}")
|
201 |
-
return None
|
202 |
-
|
203 |
-
def validate_course_plan(course_plan):
|
204 |
-
required_fields = ['course_title', 'course_description', 'modules']
|
205 |
-
if not all(field in course_plan for field in required_fields):
|
206 |
-
raise ValueError("Invalid course plan structure")
|
207 |
-
|
208 |
-
for module in course_plan['modules']:
|
209 |
-
if 'module_title' not in module or 'sub_modules' not in module:
|
210 |
-
raise ValueError("Invalid module structure")
|
211 |
-
|
212 |
-
def create_session(title: str, date: datetime, module_name: str, resources: dict):
|
213 |
-
"""Create a session document with pre-class, in-class, and post-class components."""
|
214 |
-
return {
|
215 |
-
"session_id": ObjectId(),
|
216 |
-
"title": title,
|
217 |
-
"date": date,
|
218 |
-
"status": "upcoming",
|
219 |
-
"created_at": datetime.utcnow(),
|
220 |
-
"module_name": module_name,
|
221 |
-
"pre_class": {
|
222 |
-
"resources": [],
|
223 |
-
"completion_required": True
|
224 |
-
},
|
225 |
-
"in_class": {
|
226 |
-
"quiz": [],
|
227 |
-
"polls": []
|
228 |
-
},
|
229 |
-
"post_class": {
|
230 |
-
"assignments": []
|
231 |
-
},
|
232 |
-
"external_resources": {
|
233 |
-
"readings": resources.get("readings", []),
|
234 |
-
"books": resources.get("books", []),
|
235 |
-
"additional_resources": resources.get("additional_resources", [])
|
236 |
-
}
|
237 |
-
}
|
238 |
-
|
239 |
-
def create_course(course_name: str, start_date: datetime, duration_weeks: int, sessions_per_week: int):
|
240 |
-
# First generate a course plan using Perplexity API
|
241 |
-
# course_plan = generate_perplexity_response(PERPLEXITY_API_KEY, course_name, duration_weeks, sessions_per_week)
|
242 |
-
# course_plan_json = json.loads(course_plan)
|
243 |
-
|
244 |
-
# print("Course Structure is: \n", course_plan_json);
|
245 |
-
|
246 |
-
# Earlier Code:
|
247 |
-
# Generate sessions for each module with resources
|
248 |
-
# all_sessions = []
|
249 |
-
# current_date = start_date
|
250 |
-
|
251 |
-
# for module in course_plan_json['modules']:
|
252 |
-
# for sub_module in module['sub_modules']:
|
253 |
-
# for topic in sub_module['topics']:
|
254 |
-
# session = create_session(
|
255 |
-
# title=topic['title'],
|
256 |
-
# date=current_date,
|
257 |
-
# module_name=module['module_title'],
|
258 |
-
# resources=topic['resources']
|
259 |
-
# )
|
260 |
-
# all_sessions.append(session)
|
261 |
-
# current_date += timedelta(days=3.5) # Spacing sessions evenly across the week
|
262 |
-
|
263 |
-
# return course_plan_json, all_sessions
|
264 |
-
|
265 |
-
# New Code:
|
266 |
-
# Extract all session titles
|
267 |
-
session_titles = []
|
268 |
-
# Load the course plan JSON
|
269 |
-
course_plan_json = {}
|
270 |
-
with open('sample_files/sample_course.json', 'r') as file:
|
271 |
-
course_plan_json = json.load(file)
|
272 |
-
|
273 |
-
for module in course_plan_json['modules']:
|
274 |
-
for sub_module in module['sub_modules']:
|
275 |
-
for topic in sub_module['topics']:
|
276 |
-
session_titles.append(topic['title'])
|
277 |
-
|
278 |
-
# Generate resources for all sessions
|
279 |
-
session_resources = generate_session_resources(PERPLEXITY_API_KEY, session_titles)
|
280 |
-
# print("Session Resources are: \n", session_resources)
|
281 |
-
resources = json.loads(session_resources)
|
282 |
-
# print("Resources JSON is: \n", resources_json)
|
283 |
-
|
284 |
-
# print("Session Resources are: \n", session_resources)
|
285 |
-
|
286 |
-
# Create a mapping of session titles to their resources
|
287 |
-
|
288 |
-
# Import Resources JSON
|
289 |
-
# resources = {}
|
290 |
-
# with open('sample_files/sample_course_resources.json', 'r') as file:
|
291 |
-
# resources = json.load(file)
|
292 |
-
|
293 |
-
resources_map = {
|
294 |
-
resource['session_title']: resource['resources']
|
295 |
-
for resource in resources['session_resources']
|
296 |
-
}
|
297 |
-
print("Resources Map is: \n", resources_map)
|
298 |
-
# print("Sample is: ", resources_map.get('Overview of ML Concepts, History, and Applications'));
|
299 |
-
# Generate sessions with their corresponding resources
|
300 |
-
all_sessions = []
|
301 |
-
current_date = start_date
|
302 |
-
|
303 |
-
for module in course_plan_json['modules']:
|
304 |
-
for sub_module in module['sub_modules']:
|
305 |
-
for topic in sub_module['topics']:
|
306 |
-
session = create_session(
|
307 |
-
title=topic['title'],
|
308 |
-
date=current_date,
|
309 |
-
module_name=module['module_title'],
|
310 |
-
resources=resources_map.get(topic['title'], {})
|
311 |
-
)
|
312 |
-
all_sessions.append(session)
|
313 |
-
current_date += timedelta(days=3.5)
|
314 |
-
|
315 |
-
print("All Sessions are: \n", all_sessions)
|
316 |
-
|
317 |
-
def get_new_course_id():
|
318 |
-
"""Generate a new course ID by incrementing the last course ID"""
|
319 |
-
last_course = courses_collection.find_one(sort=[("course_id", -1)])
|
320 |
-
if last_course:
|
321 |
-
last_course_id = int(last_course["course_id"][2:])
|
322 |
-
new_course_id = f"CS{last_course_id + 1}"
|
323 |
-
else:
|
324 |
-
new_course_id = "CS101"
|
325 |
-
return new_course_id
|
326 |
-
|
327 |
-
# if __name__ == "__main__":
|
328 |
-
# course_name = "Introduction to Machine Learning"
|
329 |
-
# start_date = datetime(2022, 9, 1)
|
330 |
-
# duration_weeks = 4
|
331 |
# create_course(course_name, start_date, duration_weeks, 3)
|
|
|
1 |
+
from datetime import datetime, timedelta
|
2 |
+
import os
|
3 |
+
from typing import Dict, List, Any
|
4 |
+
from pymongo import MongoClient
|
5 |
+
import requests
|
6 |
+
import uuid
|
7 |
+
import openai
|
8 |
+
from openai import OpenAI
|
9 |
+
import streamlit as st
|
10 |
+
from bson import ObjectId
|
11 |
+
from dotenv import load_dotenv
|
12 |
+
import json
|
13 |
+
|
14 |
+
load_dotenv()
|
15 |
+
MONGODB_URI = os.getenv("MONGO_URI")
|
16 |
+
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_KEY")
|
17 |
+
OPENAI_API_KEY = os.getenv("OPENAI_KEY")
|
18 |
+
|
19 |
+
client = MongoClient(MONGODB_URI)
|
20 |
+
db = client['novascholar_db']
|
21 |
+
courses_collection = db['courses']
|
22 |
+
|
23 |
+
def generate_perplexity_response(api_key, course_name, duration_weeks, sessions_per_week):
|
24 |
+
headers = {
|
25 |
+
"accept": "application/json",
|
26 |
+
"content-type": "application/json",
|
27 |
+
"authorization": f"Bearer {api_key}"
|
28 |
+
}
|
29 |
+
|
30 |
+
# Calculate sessions based on duration
|
31 |
+
total_sessions = duration_weeks * sessions_per_week # Assuming 2 sessions per week
|
32 |
+
|
33 |
+
prompt = f"""
|
34 |
+
You are an expert educational AI assistant specializing in curriculum design and instructional planning. Your task is to generate a comprehensive, academically rigorous course structure for the course {course_name} that fits exactly within {duration_weeks} weeks with {total_sessions} total sessions ({sessions_per_week} sessions per week).
|
35 |
+
|
36 |
+
Please generate a detailed course structure in JSON format following these specifications:
|
37 |
+
|
38 |
+
1. The course structure must be designed for exactly {duration_weeks} weeks with {total_sessions} total sessions
|
39 |
+
2. Each module should contain an appropriate number of sessions that sum up to exactly {total_sessions}
|
40 |
+
3. Each session should be designed for a 1-1.5-hour class duration
|
41 |
+
4. Follow standard academic practices and nomenclature
|
42 |
+
5. Ensure progressive complexity from foundational to advanced concepts
|
43 |
+
6. The course_title should exactly match the course name provided
|
44 |
+
7. Ensure that the property names are enclosed in double quotes (") and followed by a colon (:), and the values are enclosed in double quotes (").
|
45 |
+
8. **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
|
46 |
+
|
47 |
+
The JSON response should follow this structure:
|
48 |
+
{{
|
49 |
+
"course_title": "string",
|
50 |
+
"course_description": "string",
|
51 |
+
"total_duration_weeks": {duration_weeks},
|
52 |
+
"sessions_per_week": {sessions_per_week},
|
53 |
+
"total_sessions": {total_sessions},
|
54 |
+
"modules": [
|
55 |
+
{{
|
56 |
+
"module_title": "string",
|
57 |
+
"module_duration_sessions": number,
|
58 |
+
"sub_modules": [
|
59 |
+
{{
|
60 |
+
"title": "string",
|
61 |
+
"topics": [
|
62 |
+
{{
|
63 |
+
"title": "string",
|
64 |
+
"short_description": "string",
|
65 |
+
"concise_learning_objectives": ["string"]
|
66 |
+
}}
|
67 |
+
]
|
68 |
+
}}
|
69 |
+
]
|
70 |
+
}}
|
71 |
+
]
|
72 |
+
}}
|
73 |
+
|
74 |
+
Ensure that:
|
75 |
+
1. The sum of all module_duration_sessions equals exactly {total_sessions}
|
76 |
+
2. Each topic has clear learning objectives
|
77 |
+
3. Topics build upon each other logically
|
78 |
+
4. Content is distributed evenly across the available sessions
|
79 |
+
5. **This Instruction is Strictly followed: **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.****
|
80 |
+
|
81 |
+
"""
|
82 |
+
|
83 |
+
messages = [
|
84 |
+
{
|
85 |
+
"role": "system",
|
86 |
+
"content": (
|
87 |
+
"You are an expert educational AI assistant specializing in course design and curriculum planning. "
|
88 |
+
"Your task is to generate accurate, detailed, and structured educational content that precisely fits "
|
89 |
+
"the specified duration."
|
90 |
+
),
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"role": "user",
|
94 |
+
"content": prompt
|
95 |
+
},
|
96 |
+
]
|
97 |
+
|
98 |
+
try:
|
99 |
+
client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
|
100 |
+
response = client.chat.completions.create(
|
101 |
+
model="llama-3.1-sonar-small-128k-online",
|
102 |
+
messages=messages
|
103 |
+
)
|
104 |
+
content = response.choices[0].message.content
|
105 |
+
|
106 |
+
# Validate session count
|
107 |
+
course_plan = json.loads(content)
|
108 |
+
total_planned_sessions = sum(
|
109 |
+
module.get('module_duration_sessions', 0)
|
110 |
+
for module in course_plan.get('modules', [])
|
111 |
+
)
|
112 |
+
|
113 |
+
if abs(total_planned_sessions - total_sessions) > 5:
|
114 |
+
raise ValueError(f"Generated plan has {total_planned_sessions} sessions, but {total_sessions} were requested")
|
115 |
+
|
116 |
+
return content
|
117 |
+
except Exception as e:
|
118 |
+
st.error(f"Failed to fetch data from Perplexity API: {e}")
|
119 |
+
return ""
|
120 |
+
|
121 |
+
def generate_session_resources(api_key, session_titles: List[str]):
|
122 |
+
"""
|
123 |
+
Generate relevant resources for each session title separately
|
124 |
+
"""
|
125 |
+
resources_prompt = f"""
|
126 |
+
You are an expert educational content curator. For each session title provided, suggest highly relevant and accurate learning resources.
|
127 |
+
Please provide resources for these sessions: {session_titles}
|
128 |
+
|
129 |
+
For each session, provide resources in this JSON format:
|
130 |
+
{{
|
131 |
+
"session_resources": [
|
132 |
+
{{
|
133 |
+
"session_title": "string",
|
134 |
+
"resources": {{
|
135 |
+
"readings": [
|
136 |
+
{{
|
137 |
+
"title": "string",
|
138 |
+
"url": "string",
|
139 |
+
"type": "string",
|
140 |
+
"estimated_read_time": "string"
|
141 |
+
}}
|
142 |
+
],
|
143 |
+
"books": [
|
144 |
+
{{
|
145 |
+
"title": "string",
|
146 |
+
"author": "string",
|
147 |
+
"isbn": "string",
|
148 |
+
"chapters": "string"
|
149 |
+
}}
|
150 |
+
],
|
151 |
+
"additional_resources": [
|
152 |
+
{{
|
153 |
+
"title": "string",
|
154 |
+
"url": "string",
|
155 |
+
"type": "string",
|
156 |
+
"description": "string"
|
157 |
+
}}
|
158 |
+
]
|
159 |
+
}}
|
160 |
+
}}
|
161 |
+
]
|
162 |
+
}}
|
163 |
+
|
164 |
+
Guidelines:
|
165 |
+
1. Ensure all URLs are real and currently active
|
166 |
+
2. Prioritize high-quality, authoritative sources
|
167 |
+
3. Include 1-2 resources of each type
|
168 |
+
5. For readings, include a mix of academic and practical resources. It can exceed to 3-4 readings
|
169 |
+
6. Book references should be real, recently published works
|
170 |
+
7. Additional resources can include tools, documentation, or practice platforms
|
171 |
+
8. Ensure that the property names are enclosed in double quotes (") and followed by a colon (:), and the values are enclosed in double quotes (").
|
172 |
+
9. ***NOTE: **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
|
173 |
+
"""
|
174 |
+
|
175 |
+
messages = [
|
176 |
+
{
|
177 |
+
"role": "system",
|
178 |
+
"content": "You are an expert educational content curator, focused on providing accurate and relevant learning resources.",
|
179 |
+
},
|
180 |
+
{
|
181 |
+
"role": "user",
|
182 |
+
"content": resources_prompt
|
183 |
+
},
|
184 |
+
]
|
185 |
+
|
186 |
+
try:
|
187 |
+
client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
|
188 |
+
response = client.chat.completions.create(
|
189 |
+
model="llama-3.1-sonar-small-128k-online",
|
190 |
+
messages=messages
|
191 |
+
)
|
192 |
+
print("Response is: \n", response.choices[0].message.content)
|
193 |
+
# try:
|
194 |
+
# return json.loads(response.choices[0].message.content)
|
195 |
+
# except json.JSONDecodeError as e:
|
196 |
+
# st.error(f"Failed to decode JSON response: {e}")
|
197 |
+
# return None
|
198 |
+
return response.choices[0].message.content
|
199 |
+
except Exception as e:
|
200 |
+
st.error(f"Failed to generate resources: {e}")
|
201 |
+
return None
|
202 |
+
|
203 |
+
def validate_course_plan(course_plan):
|
204 |
+
required_fields = ['course_title', 'course_description', 'modules']
|
205 |
+
if not all(field in course_plan for field in required_fields):
|
206 |
+
raise ValueError("Invalid course plan structure")
|
207 |
+
|
208 |
+
for module in course_plan['modules']:
|
209 |
+
if 'module_title' not in module or 'sub_modules' not in module:
|
210 |
+
raise ValueError("Invalid module structure")
|
211 |
+
|
212 |
+
def create_session(title: str, date: datetime, module_name: str, resources: dict):
|
213 |
+
"""Create a session document with pre-class, in-class, and post-class components."""
|
214 |
+
return {
|
215 |
+
"session_id": ObjectId(),
|
216 |
+
"title": title,
|
217 |
+
"date": date,
|
218 |
+
"status": "upcoming",
|
219 |
+
"created_at": datetime.utcnow(),
|
220 |
+
"module_name": module_name,
|
221 |
+
"pre_class": {
|
222 |
+
"resources": [],
|
223 |
+
"completion_required": True
|
224 |
+
},
|
225 |
+
"in_class": {
|
226 |
+
"quiz": [],
|
227 |
+
"polls": []
|
228 |
+
},
|
229 |
+
"post_class": {
|
230 |
+
"assignments": []
|
231 |
+
},
|
232 |
+
"external_resources": {
|
233 |
+
"readings": resources.get("readings", []),
|
234 |
+
"books": resources.get("books", []),
|
235 |
+
"additional_resources": resources.get("additional_resources", [])
|
236 |
+
}
|
237 |
+
}
|
238 |
+
|
239 |
+
def create_course(course_name: str, start_date: datetime, duration_weeks: int, sessions_per_week: int):
|
240 |
+
# First generate a course plan using Perplexity API
|
241 |
+
# course_plan = generate_perplexity_response(PERPLEXITY_API_KEY, course_name, duration_weeks, sessions_per_week)
|
242 |
+
# course_plan_json = json.loads(course_plan)
|
243 |
+
|
244 |
+
# print("Course Structure is: \n", course_plan_json);
|
245 |
+
|
246 |
+
# Earlier Code:
|
247 |
+
# Generate sessions for each module with resources
|
248 |
+
# all_sessions = []
|
249 |
+
# current_date = start_date
|
250 |
+
|
251 |
+
# for module in course_plan_json['modules']:
|
252 |
+
# for sub_module in module['sub_modules']:
|
253 |
+
# for topic in sub_module['topics']:
|
254 |
+
# session = create_session(
|
255 |
+
# title=topic['title'],
|
256 |
+
# date=current_date,
|
257 |
+
# module_name=module['module_title'],
|
258 |
+
# resources=topic['resources']
|
259 |
+
# )
|
260 |
+
# all_sessions.append(session)
|
261 |
+
# current_date += timedelta(days=3.5) # Spacing sessions evenly across the week
|
262 |
+
|
263 |
+
# return course_plan_json, all_sessions
|
264 |
+
|
265 |
+
# New Code:
|
266 |
+
# Extract all session titles
|
267 |
+
session_titles = []
|
268 |
+
# Load the course plan JSON
|
269 |
+
course_plan_json = {}
|
270 |
+
with open('sample_files/sample_course.json', 'r') as file:
|
271 |
+
course_plan_json = json.load(file)
|
272 |
+
|
273 |
+
for module in course_plan_json['modules']:
|
274 |
+
for sub_module in module['sub_modules']:
|
275 |
+
for topic in sub_module['topics']:
|
276 |
+
session_titles.append(topic['title'])
|
277 |
+
|
278 |
+
# Generate resources for all sessions
|
279 |
+
session_resources = generate_session_resources(PERPLEXITY_API_KEY, session_titles)
|
280 |
+
# print("Session Resources are: \n", session_resources)
|
281 |
+
resources = json.loads(session_resources)
|
282 |
+
# print("Resources JSON is: \n", resources_json)
|
283 |
+
|
284 |
+
# print("Session Resources are: \n", session_resources)
|
285 |
+
|
286 |
+
# Create a mapping of session titles to their resources
|
287 |
+
|
288 |
+
# Import Resources JSON
|
289 |
+
# resources = {}
|
290 |
+
# with open('sample_files/sample_course_resources.json', 'r') as file:
|
291 |
+
# resources = json.load(file)
|
292 |
+
|
293 |
+
resources_map = {
|
294 |
+
resource['session_title']: resource['resources']
|
295 |
+
for resource in resources['session_resources']
|
296 |
+
}
|
297 |
+
print("Resources Map is: \n", resources_map)
|
298 |
+
# print("Sample is: ", resources_map.get('Overview of ML Concepts, History, and Applications'));
|
299 |
+
# Generate sessions with their corresponding resources
|
300 |
+
all_sessions = []
|
301 |
+
current_date = start_date
|
302 |
+
|
303 |
+
for module in course_plan_json['modules']:
|
304 |
+
for sub_module in module['sub_modules']:
|
305 |
+
for topic in sub_module['topics']:
|
306 |
+
session = create_session(
|
307 |
+
title=topic['title'],
|
308 |
+
date=current_date,
|
309 |
+
module_name=module['module_title'],
|
310 |
+
resources=resources_map.get(topic['title'], {})
|
311 |
+
)
|
312 |
+
all_sessions.append(session)
|
313 |
+
current_date += timedelta(days=3.5)
|
314 |
+
|
315 |
+
print("All Sessions are: \n", all_sessions)
|
316 |
+
|
317 |
+
def get_new_course_id():
|
318 |
+
"""Generate a new course ID by incrementing the last course ID"""
|
319 |
+
last_course = courses_collection.find_one(sort=[("course_id", -1)])
|
320 |
+
if last_course:
|
321 |
+
last_course_id = int(last_course["course_id"][2:])
|
322 |
+
new_course_id = f"CS{last_course_id + 1}"
|
323 |
+
else:
|
324 |
+
new_course_id = "CS101"
|
325 |
+
return new_course_id
|
326 |
+
|
327 |
+
# if __name__ == "__main__":
|
328 |
+
# course_name = "Introduction to Machine Learning"
|
329 |
+
# start_date = datetime(2022, 9, 1)
|
330 |
+
# duration_weeks = 4
|
331 |
# create_course(course_name, start_date, duration_weeks, 3)
|
create_course3.py
ADDED
@@ -0,0 +1,609 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datetime import datetime, timedelta
|
2 |
+
import os
|
3 |
+
from typing import Dict, List, Any
|
4 |
+
from pymongo import MongoClient
|
5 |
+
import requests
|
6 |
+
import uuid
|
7 |
+
import openai
|
8 |
+
from openai import OpenAI
|
9 |
+
import streamlit as st
|
10 |
+
from bson import ObjectId
|
11 |
+
from dotenv import load_dotenv
|
12 |
+
import json
|
13 |
+
import google.generativeai as genai
|
14 |
+
from mistralai import Mistral
|
15 |
+
|
16 |
+
load_dotenv()
|
17 |
+
MONGODB_URI = os.getenv("MONGODB_URI")
|
18 |
+
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
|
19 |
+
OPENAI_API_KEY = os.getenv("OPENAI_KEY")
|
20 |
+
GEMINI_API_KEY = os.getenv("GEMINI_KEY")
|
21 |
+
MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
|
22 |
+
|
23 |
+
client = MongoClient(MONGODB_URI)
|
24 |
+
db = client['novascholar_db']
|
25 |
+
courses_collection = db['courses']
|
26 |
+
|
27 |
+
genai.configure(api_key=GEMINI_API_KEY)
|
28 |
+
model = genai.GenerativeModel("gemini-1.5-flash")
|
29 |
+
|
30 |
+
|
31 |
+
def generate_course_outcomes(api_key, course_name, duration_weeks, sessions_per_week):
|
32 |
+
prompt = f"""
|
33 |
+
You are an expert educational AI assistant specializing in curriculum design and instructional planning. Your task is to generate a comprehensive, academically rigorous set of Course Learning Outcomes (CLOs) for the course {course_name}. These CLOs will serve as a foundation for instructional design and assessment planning.
|
34 |
+
|
35 |
+
Please generate a detailed list of CLOs in JSON format following these specifications:
|
36 |
+
|
37 |
+
1. The CLOs should be clear, concise, and aligned with Bloom's Taxonomy, progressively covering lower-order to higher-order cognitive skills.
|
38 |
+
2. Each CLO must explicitly define the skills, knowledge, or abilities the student is expected to acquire upon completing the course.
|
39 |
+
3. The CLOs must align with the overall course objective and encompass foundational to advanced concepts.
|
40 |
+
4. Use academic language appropriate for higher education or professional training.
|
41 |
+
5. Ensure the CLOs are measurable and actionable.
|
42 |
+
6. **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
|
43 |
+
|
44 |
+
The JSON response should follow this structure:
|
45 |
+
{{
|
46 |
+
"course_title": "string",
|
47 |
+
"course_description": "string",
|
48 |
+
"learning_outcomes": [
|
49 |
+
{{
|
50 |
+
"outcome_number": CO + number,
|
51 |
+
"outcome_description": "string",
|
52 |
+
"aligned_blooms_taxonomy_level": "string"
|
53 |
+
}}
|
54 |
+
]
|
55 |
+
}}
|
56 |
+
|
57 |
+
Ensure that:
|
58 |
+
1. Each outcome has a unique outcome_number starting from 1.
|
59 |
+
2. The aligned_blooms_taxonomy_level must be one of the following: "Remember", "Understand", "Apply", "Analyze", "Evaluate", or "Create".
|
60 |
+
3. The total number of CLOs should appropriately cover the breadth and depth of the course content, typically 5-7 CLOs.
|
61 |
+
4. **This Instruction is Strictly followed: DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
|
62 |
+
|
63 |
+
"""
|
64 |
+
|
65 |
+
messages = [
|
66 |
+
{
|
67 |
+
"role": "system",
|
68 |
+
"content": (
|
69 |
+
"You are an expert educational AI assistant specializing in course design and curriculum planning. "
|
70 |
+
"Your task is to generate accurate, detailed, and structured educational content that precisely fits "
|
71 |
+
"the specified requirements."
|
72 |
+
),
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"role": "user",
|
76 |
+
"content": prompt
|
77 |
+
},
|
78 |
+
]
|
79 |
+
response = model.generate_content(
|
80 |
+
prompt,
|
81 |
+
generation_config=genai.GenerationConfig(
|
82 |
+
response_mime_type="application/json"
|
83 |
+
)
|
84 |
+
)
|
85 |
+
try:
|
86 |
+
response_json = json.loads(response.text)
|
87 |
+
return response.text;
|
88 |
+
except json.JSONDecodeError as e:
|
89 |
+
print("Error decoding COs JSON response:", e)
|
90 |
+
|
91 |
+
def generate_module_outcomes(course_name, course_outcomes, duration_weeks, sessions_per_week):
|
92 |
+
total_sessions = duration_weeks * sessions_per_week
|
93 |
+
prompt = f"""
|
94 |
+
You are an expert educational AI assistant specializing in curriculum design and instructional planning. Your task is to break down the provided Course Learning Outcomes (CLOs) for the course {course_name} into logically structured modules and corresponding Module Learning Outcomes (MLOs). The structure must fit exactly within {duration_weeks} weeks with {total_sessions} total sessions ({sessions_per_week} sessions per week). Each module will be designed to align with specific CLOs and distribute content evenly across the available sessions.
|
95 |
+
|
96 |
+
Here are the Course Learning Outcomes (CLOs) for the course {course_name} in JSON format:
|
97 |
+
{course_outcomes}
|
98 |
+
|
99 |
+
Please generate the module structure in JSON format following these specifications:
|
100 |
+
|
101 |
+
1. Break the CLOs into logically grouped **modules**, ensuring that each module has a clear focus and progresses from foundational to advanced concepts.
|
102 |
+
2. Each module must include:
|
103 |
+
- A **module title** summarizing its focus.
|
104 |
+
- A list of aligned CLOs that are covered within the module.
|
105 |
+
- Module Learning Outcomes (MLOs) that are measurable and actionable, aligned with the CLOs.
|
106 |
+
- The number of sessions allocated to the module (module_duration_sessions), such that the total sessions across all modules sum up to {total_sessions}.
|
107 |
+
3. Ensure that the module_duration_sessions are evenly distributed while allowing for some variation based on the complexity of the module.
|
108 |
+
4. Progressively distribute content so that earlier modules cover foundational concepts, and later modules cover advanced topics.
|
109 |
+
5. The number of sessions allocated to each module must reflect the relative depth and complexity of its content.
|
110 |
+
6. Ensure all modules fit within {duration_weeks} weeks and {sessions_per_week} sessions per week.
|
111 |
+
|
112 |
+
The JSON response should follow this structure:
|
113 |
+
{{
|
114 |
+
"course_title": "string",
|
115 |
+
"course_description": "string",
|
116 |
+
"total_duration_weeks": {duration_weeks},
|
117 |
+
"sessions_per_week": {sessions_per_week},
|
118 |
+
"total_sessions": {total_sessions},
|
119 |
+
"modules": [
|
120 |
+
{{
|
121 |
+
"module_title": "string",
|
122 |
+
"module_duration_sessions": number,
|
123 |
+
"aligned_CLOs": ["CLO1", "CLO2", ...],
|
124 |
+
"module_learning_outcomes": [
|
125 |
+
{{
|
126 |
+
"outcome_number": "MLO + number",
|
127 |
+
"outcome_description": "string",
|
128 |
+
"aligned_blooms_taxonomy_level": "string"
|
129 |
+
}}
|
130 |
+
]
|
131 |
+
}}
|
132 |
+
]
|
133 |
+
}}
|
134 |
+
|
135 |
+
Ensure that:
|
136 |
+
1. The sum of all module_duration_sessions equals exactly {total_sessions}.
|
137 |
+
2. Each MLO is aligned with its respective CLOs and measurable within the allocated sessions.
|
138 |
+
3. Modules are well-distributed and follow a logical progression of topics.
|
139 |
+
4. **This Instruction is Strictly followed: DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
|
140 |
+
"""
|
141 |
+
response = model.generate_content(
|
142 |
+
prompt,
|
143 |
+
generation_config=genai.GenerationConfig(
|
144 |
+
response_mime_type="application/json"
|
145 |
+
)
|
146 |
+
)
|
147 |
+
try:
|
148 |
+
response_json = json.loads(response.text)
|
149 |
+
return response.text
|
150 |
+
except json.JSONDecodeError as e:
|
151 |
+
print("Error decoding Modules JSON response:", e)
|
152 |
+
|
153 |
+
def generate_submodule_outcomes(course_name, course_outcomes, module_outcomes, duration_weeks, sessions_per_week):
|
154 |
+
prompt = f"""
|
155 |
+
You are an expert educational AI assistant specializing in instructional design. Your task is to further break down each module from the given course structure into submodules. Each submodule will cover specific concepts or topics within the module, along with corresponding Submodule Learning Outcomes (SMLOs).
|
156 |
+
|
157 |
+
Here are the Course Learning Outcomes (CLOs) for the course {course_name}:
|
158 |
+
{course_outcomes}
|
159 |
+
and the Module Learning Outcomes (MLOs) for each module:
|
160 |
+
{module_outcomes}
|
161 |
+
|
162 |
+
Please follow these guidelines for creating submodules and SMLOs:
|
163 |
+
|
164 |
+
1. For each module, create 2-3 submodules depending on its scope and duration.
|
165 |
+
2. Assign each submodule a clear, concise title summarizing its focus.
|
166 |
+
3. Each submodule must align with at least one Module Learning Outcome (MLO) and, by extension, its parent CLO(s).
|
167 |
+
4. For each submodule, define 1-2 Submodule Learning Outcomes (SMLOs) that are measurable, actionable, and aligned with Bloom's Taxonomy.
|
168 |
+
5. Distribute the total allocated sessions (module_duration_sessions) evenly among submodules, allowing slight variations for complex topics.
|
169 |
+
6. Ensure that submodules progress logically within the module, starting with foundational concepts and advancing to more complex topics.
|
170 |
+
7. Align the submodules with the total sessions allocated to the module to ensure they fit within the course timeline.
|
171 |
+
|
172 |
+
The JSON response should follow this structure:
|
173 |
+
{{
|
174 |
+
"module_title": "string",
|
175 |
+
"submodules": [
|
176 |
+
{{
|
177 |
+
"submodule_title": "string",
|
178 |
+
"submodule_duration_sessions": number,
|
179 |
+
"aligned_MLOs": ["MLO1", "MLO2", ...],
|
180 |
+
"submodule_learning_outcomes": [
|
181 |
+
{{
|
182 |
+
"outcome_number": "SMLO + number",
|
183 |
+
"outcome_description": "string",
|
184 |
+
"aligned_blooms_taxonomy_level": "string"
|
185 |
+
}}
|
186 |
+
]
|
187 |
+
}}
|
188 |
+
]
|
189 |
+
}}
|
190 |
+
|
191 |
+
Ensure that:
|
192 |
+
1. The sum of all submodule_duration_sessions within a module equals the module's allocated sessions (module_duration_sessions).
|
193 |
+
2. SMLOs are specific, measurable, and actionable, aligning with their respective MLOs and CLOs.
|
194 |
+
3. Submodules are logically ordered, with earlier submodules focusing on foundational concepts and later ones covering advanced topics.
|
195 |
+
4. **This Instruction is Strictly followed: DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
|
196 |
+
"""
|
197 |
+
response = model.generate_content(
|
198 |
+
prompt,
|
199 |
+
generation_config=genai.GenerationConfig(
|
200 |
+
response_mime_type="application/json"
|
201 |
+
)
|
202 |
+
)
|
203 |
+
try:
|
204 |
+
parse_model_response(response.text)
|
205 |
+
return response.text
|
206 |
+
except json.JSONDecodeError as e:
|
207 |
+
print("Error decoding Submodules JSON response:", e)
|
208 |
+
|
209 |
+
import json
|
210 |
+
import ast
|
211 |
+
import re
|
212 |
+
import time
|
213 |
+
def parse_model_response(response_text):
|
214 |
+
"""Enhanced parser for model responses with better error handling.
|
215 |
+
|
216 |
+
Args:
|
217 |
+
response_text (str): Raw response text from the model
|
218 |
+
|
219 |
+
Returns:
|
220 |
+
dict or list: Parsed response object
|
221 |
+
|
222 |
+
Raises:
|
223 |
+
ValueError: If parsing fails
|
224 |
+
"""
|
225 |
+
|
226 |
+
# Remove markdown formatting and whitespace
|
227 |
+
cleaned_text = re.sub(r'```[a-zA-Z]*\n', '', response_text)
|
228 |
+
cleaned_text = cleaned_text.replace('```', '').strip()
|
229 |
+
|
230 |
+
# Try multiple parsing methods
|
231 |
+
parsing_methods = [
|
232 |
+
# Method 1: Direct JSON parsing
|
233 |
+
lambda x: json.loads(x),
|
234 |
+
|
235 |
+
# Method 2: AST literal evaluation
|
236 |
+
lambda x: ast.literal_eval(x),
|
237 |
+
|
238 |
+
# Method 3: Extract and parse content between curly braces
|
239 |
+
lambda x: json.loads(re.search(r'\{.*\}', x, re.DOTALL).group()),
|
240 |
+
|
241 |
+
# Method 4: Extract and parse content between square brackets
|
242 |
+
lambda x: json.loads(re.search(r'\[.*\]', x, re.DOTALL).group()),
|
243 |
+
|
244 |
+
# Method 5: Try to fix common JSON formatting issues and parse
|
245 |
+
lambda x: json.loads(x.replace("'", '"').replace('\n', '\\n'))
|
246 |
+
]
|
247 |
+
|
248 |
+
last_error = None
|
249 |
+
for parse_method in parsing_methods:
|
250 |
+
try:
|
251 |
+
result = parse_method(cleaned_text)
|
252 |
+
if result: # Ensure we have actual content
|
253 |
+
return result
|
254 |
+
except Exception as e:
|
255 |
+
last_error = e
|
256 |
+
continue
|
257 |
+
|
258 |
+
raise ValueError(f"Could not parse the model's response: {last_error}")
|
259 |
+
|
260 |
+
def extract_session_titles_concepts(session_data):
|
261 |
+
"""Extracts session titles and key concepts from the session data.
|
262 |
+
|
263 |
+
Args:
|
264 |
+
session_data (dict): Parsed JSON data containing session information
|
265 |
+
|
266 |
+
Returns:
|
267 |
+
list: List of dictionaries with session titles and key concepts
|
268 |
+
"""
|
269 |
+
session_info = []
|
270 |
+
for module in session_data.get("submodules", []):
|
271 |
+
for session in module.get("sessions", []):
|
272 |
+
session_info.append({
|
273 |
+
"title": session.get("session_title", ""),
|
274 |
+
"key_concepts": session.get("key_concepts", [])
|
275 |
+
})
|
276 |
+
return session_info
|
277 |
+
|
278 |
+
def generate_session_outcomes(course_name, course_outcomes, module_outcomes, submodule_outcomes, duration_weeks, sessions_per_week):
|
279 |
+
prompt = f"""
|
280 |
+
You are an expert educational AI assistant specializing in instructional design and assessment. Your task is to create highly focused and measurable Session-Level Learning Outcomes (SLOs) that are aligned with their parent Submodule Learning Outcomes (SMLOs) and ready to serve as the foundation for rubric-based evaluations.
|
281 |
+
|
282 |
+
### Context:
|
283 |
+
Course Name: {course_name}
|
284 |
+
Course Outcomes (CLOs): {course_outcomes}
|
285 |
+
Module Outcomes (MLOs): {module_outcomes}
|
286 |
+
Submodule Outcomes (SMLOs): {submodule_outcomes}
|
287 |
+
|
288 |
+
### Instructions for SLO Generation:
|
289 |
+
1. **For Each Submodule**: Break down its allocated sessions (submodule_duration_sessions) into Session-Level Learning Outcomes (SLOs) that:
|
290 |
+
- Are immediately actionable, measurable, and achievable within a single session (60-90 minutes).
|
291 |
+
- Are directly aligned with their parent SMLO, contributing to its achievement.
|
292 |
+
- Include observable behaviors or outputs using **action-oriented verbs** from Bloom's Taxonomy (e.g., analyze, demonstrate, create, justify).
|
293 |
+
- Are specific and detailed enough to support rubric development.
|
294 |
+
|
295 |
+
2. **Structure for Each Session**:
|
296 |
+
- **Session Title**: A concise and clear session title that captures its focus.
|
297 |
+
- **Prerequisites**: Any prerequisite knowledge or skills required.
|
298 |
+
- **Key Concepts**: Specific concepts or skills that will be covered.
|
299 |
+
- **Session Learning Outcomes (SLOs)**: Include 2-3 outcomes that:
|
300 |
+
- Define precise tasks or objectives for the session.
|
301 |
+
- Specify the expected level of understanding, skill, or performance.
|
302 |
+
- Directly support the parent SMLO while promoting progressive learning.
|
303 |
+
|
304 |
+
3. **Progressive Learning**: Ensure that earlier sessions address foundational knowledge, while later sessions build on this foundation, leading to higher-order skills and integration of concepts.
|
305 |
+
|
306 |
+
4. **Output Format**: Ensure the output follows this strict JSON structure:
|
307 |
+
|
308 |
+
"submodules": [
|
309 |
+
{{
|
310 |
+
"submodule_title": "string",
|
311 |
+
"sessions": [
|
312 |
+
{{
|
313 |
+
"session_number": number,
|
314 |
+
"session_title": "string",
|
315 |
+
"prerequisites": ["string"],
|
316 |
+
"key_concepts": ["string"],
|
317 |
+
"session_learning_outcomes": [
|
318 |
+
{{
|
319 |
+
"outcome_number": "SLO + number",
|
320 |
+
"outcome_description": "string",
|
321 |
+
"aligned_smlo": "SMLO + number",
|
322 |
+
"bloom_taxonomy_level": "string"
|
323 |
+
}}
|
324 |
+
]
|
325 |
+
}}
|
326 |
+
]
|
327 |
+
}}
|
328 |
+
]
|
329 |
+
|
330 |
+
### Example of Rubric-Ready SLOs:
|
331 |
+
- **Poor Example**: "Understand agile methodologies."
|
332 |
+
- **Good Example**: "Identify and describe the key principles of the Agile Manifesto, providing examples of how each principle applies to software development."
|
333 |
+
|
334 |
+
**Important Instructions**:
|
335 |
+
1. Ensure every SLO can be directly translated into rubric criteria (e.g., clarity, accuracy, application).
|
336 |
+
2. **DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT USE BACKTICKS (```), AND DO NOT INCLUDE ANY OTHER TEXT EXCEPT THE JSON RESPONSE. START WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.**
|
337 |
+
3. Make sure every SLO is relevant to its parent SMLO.
|
338 |
+
"""
|
339 |
+
|
340 |
+
response = model.generate_content(
|
341 |
+
prompt,
|
342 |
+
generation_config=genai.GenerationConfig(
|
343 |
+
response_mime_type="application/json"
|
344 |
+
)
|
345 |
+
)
|
346 |
+
try:
|
347 |
+
parse_model_response(response.text)
|
348 |
+
return response.text
|
349 |
+
except json.JSONDecodeError as e:
|
350 |
+
print("Error decoding Session Learning Outcomes JSON response:", e)
|
351 |
+
|
352 |
+
|
353 |
+
def merge_course_structure(cos, module_outcomes, submodules_los, sample_sessions_se):
|
354 |
+
# Load JSON data
|
355 |
+
cos_data = cos
|
356 |
+
module_outcomes_data = module_outcomes
|
357 |
+
submodules_los_data = submodules_los
|
358 |
+
sample_sessions_se_data = sample_sessions_se
|
359 |
+
|
360 |
+
# Create a mapping of submodule titles to their sessions
|
361 |
+
submodule_sessions_map = {}
|
362 |
+
for submodule in sample_sessions_se_data['submodules']:
|
363 |
+
submodule_sessions_map[submodule['submodule_title']] = submodule['sessions']
|
364 |
+
|
365 |
+
# Create a mapping of module titles to their submodules
|
366 |
+
module_submodules_map = {}
|
367 |
+
for module in submodules_los_data['modules']:
|
368 |
+
module_submodules_map[module['module_title']] = module['submodules']
|
369 |
+
|
370 |
+
# Merge submodules into modules
|
371 |
+
for module in module_outcomes_data['modules']:
|
372 |
+
module_title = module['module_title']
|
373 |
+
if module_title in module_submodules_map:
|
374 |
+
submodules = module_submodules_map[module_title]
|
375 |
+
for submodule in submodules:
|
376 |
+
submodule_title = submodule['submodule_title']
|
377 |
+
if submodule_title in submodule_sessions_map:
|
378 |
+
submodule['sessions'] = submodule_sessions_map[submodule_title]
|
379 |
+
module['submodules'] = submodules
|
380 |
+
|
381 |
+
# Merge modules into course structure
|
382 |
+
course_structure = cos_data
|
383 |
+
course_structure['modules'] = module_outcomes_data['modules']
|
384 |
+
|
385 |
+
return course_structure
|
386 |
+
|
387 |
+
def generate_session_resources(api_key, course_title, session_titles: List[str]):
|
388 |
+
"""
|
389 |
+
Generate relevant resources for each session title separately
|
390 |
+
"""
|
391 |
+
resources_prompt = f"""
|
392 |
+
You are an expert educational content curator with deep knowledge of instructional design and high-quality resource selection. Your task is to provide session-specific learning resources and course-level reference books for the course: {course_title}.
|
393 |
+
|
394 |
+
Guidelines for Resource Curation:
|
395 |
+
1. For each session, suggest **highly relevant and accurate learning resources** based on the session title and key concepts provided.
|
396 |
+
2. For the course as a whole, provide at most two **top reference books** that comprehensively cover the course objectives, including both academic and practical perspectives.
|
397 |
+
3. Resources can include:
|
398 |
+
- **Web articles or blogs** (ensure they are from authoritative and credible sources)
|
399 |
+
- **Videos** (e.g., YouTube or other educational platforms)
|
400 |
+
- **PDFs, PPTs, or other downloadable formats**
|
401 |
+
- **Official documentation** for tools, platforms, or technologies
|
402 |
+
4. Provide **multiple resources per session**, tailored to the topic's depth and complexity. Collectively, the number should not exceed 3.
|
403 |
+
5. **IMPORTANT: MAKE SURE READINGS AND VIDEOS ARE GIVEN SEPARATELY. READINGS SHOULD NOT CONTAIN VIDEOS, IT SHOULD ONLY CONTAIN READING MATERIAL AND VICE-VERSA FOR VIDEOS**
|
404 |
+
6. Ensure all URLs are **active and accessible**. Resources must be up-to-date, and links should work reliably.
|
405 |
+
7. Reference books for the course should be **real, recently published works** and relevant to the course-level outcomes.
|
406 |
+
|
407 |
+
Output Format:
|
408 |
+
{{
|
409 |
+
"course_reference_books": [
|
410 |
+
{{
|
411 |
+
"title": "string",
|
412 |
+
"author": "string",
|
413 |
+
"publisher": "string",
|
414 |
+
"year": number,
|
415 |
+
"description": "string"
|
416 |
+
}}
|
417 |
+
],
|
418 |
+
"session_resources": [
|
419 |
+
{{
|
420 |
+
"session_title": "string",
|
421 |
+
"resources": {{
|
422 |
+
"readings": [
|
423 |
+
{{
|
424 |
+
"title": "string",
|
425 |
+
"url": "string",
|
426 |
+
"type": "string",
|
427 |
+
"estimated_read_time": "string"
|
428 |
+
}}
|
429 |
+
],
|
430 |
+
"videos": [
|
431 |
+
{{
|
432 |
+
"title": "string",
|
433 |
+
"url": "string",
|
434 |
+
"type": "string",
|
435 |
+
"duration": "string"
|
436 |
+
}}
|
437 |
+
]
|
438 |
+
}}
|
439 |
+
}}
|
440 |
+
]
|
441 |
+
}}
|
442 |
+
|
443 |
+
Additional Instructions:
|
444 |
+
- Ensure **property names are enclosed in double quotes (")** and values are properly formatted.
|
445 |
+
- Reference books should include **a brief description** to explain why they are relevant to the course.
|
446 |
+
- Responses should be concise, structured, and focused exclusively on the requested information.
|
447 |
+
- ***IMPORTANT: DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.***
|
448 |
+
|
449 |
+
Here are the session titles and key concepts for which you need to generate resources: {session_titles}.
|
450 |
+
"""
|
451 |
+
|
452 |
+
messages = [
|
453 |
+
{
|
454 |
+
"role": "system",
|
455 |
+
"content": "You are an expert educational content curator, focused on providing accurate and relevant learning resources.",
|
456 |
+
},
|
457 |
+
{
|
458 |
+
"role": "user",
|
459 |
+
"content": resources_prompt
|
460 |
+
},
|
461 |
+
]
|
462 |
+
|
463 |
+
try:
|
464 |
+
client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
|
465 |
+
response = client.chat.completions.create(
|
466 |
+
model="llama-3.1-sonar-small-128k-online",
|
467 |
+
messages=messages
|
468 |
+
)
|
469 |
+
print("Response is: \n", response.choices[0].message.content)
|
470 |
+
# try:
|
471 |
+
# return json.loads(response.choices[0].message.content)
|
472 |
+
# except json.JSONDecodeError as e:
|
473 |
+
# st.error(f"Failed to decode JSON response: {e}")
|
474 |
+
# return None
|
475 |
+
return response.choices[0].message.content
|
476 |
+
except Exception as e:
|
477 |
+
st.error(f"Failed to generate resources: {e}")
|
478 |
+
return None
|
479 |
+
|
480 |
+
def generate_resources_by_titles_chunking(session_titles, course_title):
|
481 |
+
def chunk_list(lst, chunk_size):
|
482 |
+
for i in range(0, len(lst), chunk_size):
|
483 |
+
yield lst[i:i + chunk_size]
|
484 |
+
|
485 |
+
all_session_resources = []
|
486 |
+
course_reference_books = None # Initialize this variable
|
487 |
+
|
488 |
+
# Process each chunk of session titles
|
489 |
+
for i, chunk in enumerate(chunk_list(session_titles, 10)):
|
490 |
+
session_resources_chunk = generate_session_resources(PERPLEXITY_API_KEY, course_title, chunk)
|
491 |
+
if session_resources_chunk:
|
492 |
+
if "session_resources" in session_resources_chunk:
|
493 |
+
# Parse the JSON string if it's a string
|
494 |
+
if isinstance(session_resources_chunk, str):
|
495 |
+
session_resources_chunk = json.loads(session_resources_chunk)
|
496 |
+
all_session_resources.extend(session_resources_chunk["session_resources"])
|
497 |
+
# all_session_resources.extend(session_resources_chunk["session_resources"])
|
498 |
+
# Only take the course_reference_books from the first chunk
|
499 |
+
else:
|
500 |
+
print("Some problem occured. Session resources chunk:", session_resources_chunk)
|
501 |
+
|
502 |
+
if i == 0 and "course_reference_books" in session_resources_chunk:
|
503 |
+
course_reference_books = session_resources_chunk.get("course_reference_books", [])
|
504 |
+
time.sleep(2)
|
505 |
+
|
506 |
+
# Combine all session resources into a single dictionary
|
507 |
+
complete_session_resources = {
|
508 |
+
"course_reference_books": course_reference_books,
|
509 |
+
"session_resources": all_session_resources
|
510 |
+
}
|
511 |
+
# Save the complete session resources to a JSON file
|
512 |
+
output_file_path = 'sample_files/session_resources2.json'
|
513 |
+
with open(output_file_path, 'w') as outfile:
|
514 |
+
try:
|
515 |
+
json.dump(complete_session_resources, outfile, indent=4)
|
516 |
+
except Exception as e:
|
517 |
+
print(f"Failed to save session resources to file: {e}")
|
518 |
+
|
519 |
+
# Debug print before return
|
520 |
+
print("Type of complete_session_resources:", type(complete_session_resources))
|
521 |
+
print("Content of complete_session_resources:", complete_session_resources)
|
522 |
+
|
523 |
+
|
524 |
+
return complete_session_resources
|
525 |
+
|
526 |
+
if __name__ == "__main__":
|
527 |
+
# course_name = "Introduction to Machine Learning"
|
528 |
+
# duration_weeks = 12
|
529 |
+
# sessions_per_week = 2
|
530 |
+
# Load COs from JSON file:
|
531 |
+
# with open('sample_files/cos.json', 'r') as file:
|
532 |
+
# course_outcomes = json.load(file)
|
533 |
+
|
534 |
+
# # Load MLOs from JSON file:
|
535 |
+
# with open('sample_files/module_outcomes.json', 'r') as file:
|
536 |
+
# module_outcomes = json.load(file)
|
537 |
+
|
538 |
+
# # Load SMLOs from JSON file:
|
539 |
+
# with open('sample_files/submodules_los.json', 'r') as file:
|
540 |
+
# submodules_outcomes = json.load(file)
|
541 |
+
|
542 |
+
# print("Generating Course Outcomes...")
|
543 |
+
# course_outcomes = generate_course_outcomes(GEMINI_API_KEY, course_name, duration_weeks, sessions_per_week)
|
544 |
+
# print("Generating Modules...")
|
545 |
+
# module_outcomes = generate_module_outcomes(course_name, course_outcomes, duration_weeks, sessions_per_week)
|
546 |
+
# print("Generating Submodules...")
|
547 |
+
# submodules_outcomes = generate_submodule_outcomes(course_name, course_outcomes, module_outcomes, duration_weeks, sessions_per_week)
|
548 |
+
# print("Generating Sessions...")
|
549 |
+
# session_outcomes = generate_session_outcomes(course_name, course_outcomes, module_outcomes, submodules_outcomes, duration_weeks, sessions_per_week)
|
550 |
+
# print(session_outcomes)
|
551 |
+
# print("Extracting Session Titles...")
|
552 |
+
# # Load Sessions from JSON file:
|
553 |
+
with open('sample_files/sample_sessions_se.json', 'r') as file:
|
554 |
+
session_data = json.load(file)
|
555 |
+
session_titles_concepts = extract_session_titles_concepts(session_data)
|
556 |
+
print(session_titles_concepts)
|
557 |
+
# print("Generating Session Resources...")
|
558 |
+
# # Chunk the session titles into batches of 10
|
559 |
+
# def chunk_list(lst, chunk_size):
|
560 |
+
# for i in range(0, len(lst), chunk_size):
|
561 |
+
# yield lst[i:i + chunk_size]
|
562 |
+
|
563 |
+
# Extract session titles from session_titles_concepts
|
564 |
+
session_titles = [session["title"] for session in session_titles_concepts]
|
565 |
+
|
566 |
+
# # Initialize an empty list to store all session resources
|
567 |
+
# all_session_resources = []
|
568 |
+
|
569 |
+
# # Process each chunk of session titles
|
570 |
+
# for chunk in chunk_list(session_titles, 10):
|
571 |
+
# session_resources_chunk = generate_session_resources(PERPLEXITY_API_KEY, course_name, chunk)
|
572 |
+
# if session_resources_chunk:
|
573 |
+
# all_session_resources.extend(json.loads(session_resources_chunk)["session_resources"])
|
574 |
+
# time.sleep(2)
|
575 |
+
|
576 |
+
# # Combine all session resources into a single dictionary
|
577 |
+
# complete_session_resources = {
|
578 |
+
# "course_reference_books": json.loads(session_resources_chunk)["course_reference_books"],
|
579 |
+
# "session_resources": all_session_resources
|
580 |
+
# }
|
581 |
+
|
582 |
+
# # Save the complete session resources to a JSON file
|
583 |
+
# output_file_path = 'sample_files/session_resources.json'
|
584 |
+
# with open(output_file_path, 'w') as outfile:
|
585 |
+
# json.dump(complete_session_resources, outfile, indent=4)
|
586 |
+
# print(complete_session_resources)
|
587 |
+
|
588 |
+
|
589 |
+
# session_resources = generate_session_resources(PERPLEXITY_API_KEY, course_name, session_titles_concepts)
|
590 |
+
|
591 |
+
# # Save session resources to a JSON file
|
592 |
+
# output_file_path = 'sample_files/session_resources.json'
|
593 |
+
# with open(output_file_path, 'w') as outfile:
|
594 |
+
# json.dump(session_resources, outfile, indent=4)
|
595 |
+
# print(session_resources)
|
596 |
+
|
597 |
+
# Create course structure
|
598 |
+
# course_structure = merge_course_structure(course_outcomes, module_outcomes, submodules_outcomes, session_outcomes)
|
599 |
+
# Save course structure to a JSON file
|
600 |
+
# output_file_path = 'sample_files/course_structure2.json'
|
601 |
+
# with open(output_file_path, 'w') as outfile:
|
602 |
+
# json.dump(course_structure, outfile, indent=4)
|
603 |
+
# print(course_structure)
|
604 |
+
resources = generate_resources_by_titles_chunking(session_titles, "Software Engineering")
|
605 |
+
print(resources)
|
606 |
+
|
607 |
+
|
608 |
+
|
609 |
+
|
db.py
CHANGED
@@ -1,696 +1,696 @@
|
|
1 |
-
# Setup for MongoDB
|
2 |
-
from pymongo import MongoClient
|
3 |
-
from datetime import datetime
|
4 |
-
from werkzeug.security import generate_password_hash
|
5 |
-
import os
|
6 |
-
from dotenv import load_dotenv
|
7 |
-
|
8 |
-
load_dotenv()
|
9 |
-
MONGO_URI = os.getenv("MONGO_URI")
|
10 |
-
|
11 |
-
client = MongoClient(MONGO_URI)
|
12 |
-
try:
|
13 |
-
client.admin.command("ping")
|
14 |
-
print("MongoDB connection successful")
|
15 |
-
except Exception as e:
|
16 |
-
print(f"MongoDB connection failed: {e}")
|
17 |
-
|
18 |
-
db = client["novascholar_db"]
|
19 |
-
|
20 |
-
########
|
21 |
-
# Research Assistant Schema
|
22 |
-
research_assistant_schema = {
|
23 |
-
"bsonType": "object",
|
24 |
-
"required": ["full_name", "password", "email", "courses_assisted"],
|
25 |
-
"properties": {
|
26 |
-
"full_name": {
|
27 |
-
"bsonType": "string",
|
28 |
-
"description": "Full name of the research assistant",
|
29 |
-
},
|
30 |
-
"password": {
|
31 |
-
"bsonType": "string",
|
32 |
-
"description": "Hashed password of the research assistant",
|
33 |
-
},
|
34 |
-
"email": {
|
35 |
-
"bsonType": "string",
|
36 |
-
"description": "Email address of the research assistant",
|
37 |
-
},
|
38 |
-
"courses_assisted": {
|
39 |
-
"bsonType": "array",
|
40 |
-
"description": "List of courses the research assistant is assisting",
|
41 |
-
"items": {
|
42 |
-
"bsonType": "object",
|
43 |
-
"required": ["course_id"],
|
44 |
-
"properties": {
|
45 |
-
"course_id": {
|
46 |
-
"bsonType": "string",
|
47 |
-
"description": "ID of the course",
|
48 |
-
}
|
49 |
-
},
|
50 |
-
},
|
51 |
-
},
|
52 |
-
},
|
53 |
-
}
|
54 |
-
|
55 |
-
# Create research assistants collection
|
56 |
-
research_assistants_collection = db["research_assistants"]
|
57 |
-
|
58 |
-
# Create indexes
|
59 |
-
research_assistants_collection.create_index("full_name", unique=True)
|
60 |
-
research_assistants_collection.create_index("email", unique=True)
|
61 |
-
|
62 |
-
|
63 |
-
# Optional: Sample data insertion function
|
64 |
-
def insert_sample_research_assistants():
|
65 |
-
sample_research_assistants = [
|
66 |
-
{
|
67 |
-
"full_name": "John Doe RA",
|
68 |
-
"password": generate_password_hash("password123"),
|
69 |
-
"email": "[email protected]",
|
70 |
-
"courses_assisted": [{"course_id": "CS101"}, {"course_id": "CS102"}],
|
71 |
-
}
|
72 |
-
]
|
73 |
-
|
74 |
-
try:
|
75 |
-
research_assistants_collection.insert_many(sample_research_assistants)
|
76 |
-
print("Sample research assistants inserted successfully!")
|
77 |
-
except Exception as e:
|
78 |
-
print(f"Error inserting sample research assistants: {e}")
|
79 |
-
|
80 |
-
|
81 |
-
###########
|
82 |
-
|
83 |
-
###############
|
84 |
-
# Add after research assistant schema
|
85 |
-
|
86 |
-
# Analyst Schema
|
87 |
-
analyst_schema = {
|
88 |
-
"bsonType": "object",
|
89 |
-
"required": ["full_name", "password", "email", "courses_analyzed"],
|
90 |
-
"properties": {
|
91 |
-
"full_name": {"bsonType": "string", "description": "Full name of the analyst"},
|
92 |
-
"password": {
|
93 |
-
"bsonType": "string",
|
94 |
-
"description": "Hashed password of the analyst",
|
95 |
-
},
|
96 |
-
"email": {"bsonType": "string", "description": "Email address of the analyst"},
|
97 |
-
"courses_analyzed": {
|
98 |
-
"bsonType": "array",
|
99 |
-
"description": "List of courses the analyst is analyzing",
|
100 |
-
"items": {
|
101 |
-
"bsonType": "object",
|
102 |
-
"required": ["course_id"],
|
103 |
-
"properties": {
|
104 |
-
"course_id": {
|
105 |
-
"bsonType": "string",
|
106 |
-
"description": "ID of the course",
|
107 |
-
}
|
108 |
-
},
|
109 |
-
},
|
110 |
-
},
|
111 |
-
},
|
112 |
-
}
|
113 |
-
|
114 |
-
# Create analysts collection
|
115 |
-
analysts_collection = db["analysts"]
|
116 |
-
|
117 |
-
# Create indexes for analysts
|
118 |
-
analysts_collection.create_index("full_name", unique=True)
|
119 |
-
analysts_collection.create_index("email", unique=True)
|
120 |
-
|
121 |
-
|
122 |
-
def insert_sample_analysts():
|
123 |
-
sample_analysts = [
|
124 |
-
{
|
125 |
-
"full_name": "jane",
|
126 |
-
"password": generate_password_hash("jane"),
|
127 |
-
"email": "[email protected]",
|
128 |
-
"courses_analyzed": [{"course_id": "CS101"}, {"course_id": "CS102"}],
|
129 |
-
}
|
130 |
-
]
|
131 |
-
|
132 |
-
try:
|
133 |
-
analysts_collection.insert_many(sample_analysts)
|
134 |
-
print("Sample analysts inserted successfully!")
|
135 |
-
except Exception as e:
|
136 |
-
print(f"Error inserting sample analysts: {e}")
|
137 |
-
|
138 |
-
|
139 |
-
##############@
|
140 |
-
|
141 |
-
|
142 |
-
# Define the course schema
|
143 |
-
course_schema = {
|
144 |
-
"bsonType": "object",
|
145 |
-
"required": [
|
146 |
-
"course_id",
|
147 |
-
"title",
|
148 |
-
"description",
|
149 |
-
"faculty",
|
150 |
-
"faculty_id",
|
151 |
-
"duration",
|
152 |
-
"created_at",
|
153 |
-
],
|
154 |
-
"properties": {
|
155 |
-
"course_id": {
|
156 |
-
"bsonType": "string",
|
157 |
-
"description": "Unique identifier for the course",
|
158 |
-
},
|
159 |
-
"title": {"bsonType": "string", "description": "Title of the course"},
|
160 |
-
"description": {
|
161 |
-
"bsonType": "string",
|
162 |
-
"description": "Description of the course",
|
163 |
-
},
|
164 |
-
"faculty": {"bsonType": "string", "description": "Name of the faculty"},
|
165 |
-
"duration": {"bsonType": "string", "description": "Duration of the course"},
|
166 |
-
"created_at": {
|
167 |
-
"bsonType": "date",
|
168 |
-
"description": "Date when the course was created",
|
169 |
-
},
|
170 |
-
"sessions": {
|
171 |
-
"bsonType": "array",
|
172 |
-
"description": "List of sessions associated with the course",
|
173 |
-
"items": {
|
174 |
-
"bsonType": "object",
|
175 |
-
"required": ["session_id", "title", "date", "status", "created_at"],
|
176 |
-
"properties": {
|
177 |
-
"session_id": {
|
178 |
-
"bsonType": "string",
|
179 |
-
"description": "Unique identifier for the session",
|
180 |
-
},
|
181 |
-
"title": {
|
182 |
-
"bsonType": "string",
|
183 |
-
"description": "Title of the session",
|
184 |
-
},
|
185 |
-
"date": {"bsonType": "date", "description": "Date of the session"},
|
186 |
-
"status": {
|
187 |
-
"bsonType": "string",
|
188 |
-
"description": "Status of the session (e.g., completed, upcoming)",
|
189 |
-
},
|
190 |
-
"created_at": {
|
191 |
-
"bsonType": "date",
|
192 |
-
"description": "Date when the session was created",
|
193 |
-
},
|
194 |
-
"pre_class": {
|
195 |
-
"bsonType": "object",
|
196 |
-
"description": "Pre-class segment data",
|
197 |
-
"properties": {
|
198 |
-
"resources": {
|
199 |
-
"bsonType": "array",
|
200 |
-
"description": "List of pre-class resources",
|
201 |
-
"items": {
|
202 |
-
"bsonType": "object",
|
203 |
-
"required": ["type", "title", "url"],
|
204 |
-
"properties": {
|
205 |
-
"type": {
|
206 |
-
"bsonType": "string",
|
207 |
-
"description": "Type of resource (e.g., pdf, video)",
|
208 |
-
},
|
209 |
-
"title": {
|
210 |
-
"bsonType": "string",
|
211 |
-
"description": "Title of the resource",
|
212 |
-
},
|
213 |
-
"url": {
|
214 |
-
"bsonType": "string",
|
215 |
-
"description": "URL of the resource",
|
216 |
-
},
|
217 |
-
"vector": {
|
218 |
-
"bsonType": "array",
|
219 |
-
"description": "Vector representation of the resource",
|
220 |
-
"items": {"bsonType": "double"},
|
221 |
-
},
|
222 |
-
},
|
223 |
-
},
|
224 |
-
},
|
225 |
-
"completion_required": {
|
226 |
-
"bsonType": "bool",
|
227 |
-
"description": "Indicates if completion of pre-class resources is required",
|
228 |
-
},
|
229 |
-
},
|
230 |
-
},
|
231 |
-
"in_class": {
|
232 |
-
"bsonType": "object",
|
233 |
-
"description": "In-class segment data",
|
234 |
-
"properties": {
|
235 |
-
"topics": {
|
236 |
-
"bsonType": "array",
|
237 |
-
"description": "List of topics covered in the session",
|
238 |
-
"items": {"bsonType": "string"},
|
239 |
-
},
|
240 |
-
"quiz": {
|
241 |
-
"bsonType": "object",
|
242 |
-
"description": "Quiz data",
|
243 |
-
"properties": {
|
244 |
-
"title": {
|
245 |
-
"bsonType": "string",
|
246 |
-
"description": "Title of the quiz",
|
247 |
-
},
|
248 |
-
"questions": {
|
249 |
-
"bsonType": "int",
|
250 |
-
"description": "Number of questions in the quiz",
|
251 |
-
},
|
252 |
-
"duration": {
|
253 |
-
"bsonType": "int",
|
254 |
-
"description": "Duration of the quiz in minutes",
|
255 |
-
},
|
256 |
-
},
|
257 |
-
},
|
258 |
-
"polls": {
|
259 |
-
"bsonType": "array",
|
260 |
-
"description": "List of polls conducted during the session",
|
261 |
-
"items": {
|
262 |
-
"bsonType": "object",
|
263 |
-
"required": ["question", "options"],
|
264 |
-
"properties": {
|
265 |
-
"question": {
|
266 |
-
"bsonType": "string",
|
267 |
-
"description": "Poll question",
|
268 |
-
},
|
269 |
-
"options": {
|
270 |
-
"bsonType": "array",
|
271 |
-
"description": "List of poll options",
|
272 |
-
"items": {"bsonType": "string"},
|
273 |
-
},
|
274 |
-
"responses": {
|
275 |
-
"bsonType": "object",
|
276 |
-
"description": "Responses to the poll",
|
277 |
-
"additionalProperties": {"bsonType": "int"},
|
278 |
-
},
|
279 |
-
},
|
280 |
-
},
|
281 |
-
},
|
282 |
-
},
|
283 |
-
},
|
284 |
-
"post_class": {
|
285 |
-
"bsonType": "object",
|
286 |
-
"description": "Post-class segment data",
|
287 |
-
"properties": {
|
288 |
-
"assignments": {
|
289 |
-
"bsonType": "array",
|
290 |
-
"description": "List of assignments",
|
291 |
-
"items": {
|
292 |
-
"bsonType": "object",
|
293 |
-
"required": ["id", "title", "due_date", "status"],
|
294 |
-
"properties": {
|
295 |
-
"id": {
|
296 |
-
"bsonType": "int",
|
297 |
-
"description": "Assignment ID",
|
298 |
-
},
|
299 |
-
"title": {
|
300 |
-
"bsonType": "string",
|
301 |
-
"description": "Title of the assignment",
|
302 |
-
},
|
303 |
-
"due_date": {
|
304 |
-
"bsonType": "date",
|
305 |
-
"description": "Due date of the assignment",
|
306 |
-
},
|
307 |
-
"status": {
|
308 |
-
"bsonType": "string",
|
309 |
-
"description": "Status of the assignment (e.g., pending, completed)",
|
310 |
-
},
|
311 |
-
"submissions": {
|
312 |
-
"bsonType": "array",
|
313 |
-
"description": "List of submissions",
|
314 |
-
"items": {
|
315 |
-
"bsonType": "object",
|
316 |
-
"required": [
|
317 |
-
"student_id",
|
318 |
-
"file_url",
|
319 |
-
"submitted_at",
|
320 |
-
],
|
321 |
-
"properties": {
|
322 |
-
"student_id": {
|
323 |
-
"bsonType": "string",
|
324 |
-
"description": "ID of the student who submitted the assignment",
|
325 |
-
},
|
326 |
-
"file_url": {
|
327 |
-
"bsonType": "string",
|
328 |
-
"description": "URL of the submitted file",
|
329 |
-
},
|
330 |
-
"submitted_at": {
|
331 |
-
"bsonType": "date",
|
332 |
-
"description": "Date when the assignment was submitted",
|
333 |
-
},
|
334 |
-
},
|
335 |
-
},
|
336 |
-
},
|
337 |
-
},
|
338 |
-
},
|
339 |
-
}
|
340 |
-
},
|
341 |
-
},
|
342 |
-
},
|
343 |
-
},
|
344 |
-
},
|
345 |
-
},
|
346 |
-
}
|
347 |
-
|
348 |
-
# Create the collection with the schema
|
349 |
-
# db.create_collection("courses_collection2", validator={"$jsonSchema": course_schema})
|
350 |
-
|
351 |
-
# sample_course = {
|
352 |
-
# "course_id": "CS101",
|
353 |
-
# "title": "Introduction to Computer Science",
|
354 |
-
# "description": "This course covers the basics of computer science and programming.",
|
355 |
-
# "faculty": "Dr. John Doe",
|
356 |
-
# "faculty_id": "F101",
|
357 |
-
# "duration": "10 weeks",
|
358 |
-
# "created_at": datetime.utcnow(),
|
359 |
-
# "sessions": [
|
360 |
-
# {
|
361 |
-
# "session_id": "S101",
|
362 |
-
# "title": "Introduction to Programming Fundamentals",
|
363 |
-
# "date": datetime.utcnow() - timedelta(days=7),
|
364 |
-
# "status": "completed",
|
365 |
-
# "created_at": datetime.utcnow() - timedelta(days=7),
|
366 |
-
# "pre_class": {
|
367 |
-
# "resources": [
|
368 |
-
# {
|
369 |
-
# "type": "pdf",
|
370 |
-
# "title": "Introduction to Python Basics",
|
371 |
-
# "url": "/assets/python_basics.pdf",
|
372 |
-
# "vector": [0.1, 0.2, 0.3] # Example vector
|
373 |
-
# }
|
374 |
-
# ],
|
375 |
-
# "completion_required": True
|
376 |
-
# },
|
377 |
-
# "in_class": {
|
378 |
-
# "topics": ["Variables", "Data Types", "Basic Operations"],
|
379 |
-
# "quiz": {
|
380 |
-
# "title": "Python Basics Quiz",
|
381 |
-
# "questions": 5,
|
382 |
-
# "duration": 15
|
383 |
-
# },
|
384 |
-
# "polls": [
|
385 |
-
# {
|
386 |
-
# "question": "How comfortable are you with Python syntax?",
|
387 |
-
# "options": ["Very", "Somewhat", "Not at all"],
|
388 |
-
# "responses": {"Very": 10, "Somewhat": 5, "Not at all": 2}
|
389 |
-
# }
|
390 |
-
# ]
|
391 |
-
# },
|
392 |
-
# "post_class": {
|
393 |
-
# "assignments": [
|
394 |
-
# {
|
395 |
-
# "id": 1,
|
396 |
-
# "title": "Basic Python Programs",
|
397 |
-
# "due_date": datetime.utcnow() + timedelta(days=2),
|
398 |
-
# "status": "pending",
|
399 |
-
# "submissions": []
|
400 |
-
# }
|
401 |
-
# ]
|
402 |
-
# }
|
403 |
-
# },
|
404 |
-
# {
|
405 |
-
# "session_id": "S102",
|
406 |
-
# "title": "Control Flow and Functions",
|
407 |
-
# "date": datetime.utcnow() - timedelta(days=3),
|
408 |
-
# "status": "completed",
|
409 |
-
# "created_at": datetime.utcnow() - timedelta(days=3),
|
410 |
-
# "pre_class": {
|
411 |
-
# "resources": [
|
412 |
-
# {
|
413 |
-
# "type": "pdf",
|
414 |
-
# "title": "Control Flow in Python",
|
415 |
-
# "url": "/assets/control_flow.pdf",
|
416 |
-
# "vector": [0.4, 0.5, 0.6] # Example vector
|
417 |
-
# }
|
418 |
-
# ],
|
419 |
-
# "completion_required": True
|
420 |
-
# },
|
421 |
-
# "in_class": {
|
422 |
-
# "topics": ["If-else statements", "Loops", "Function definitions"],
|
423 |
-
# "quiz": {
|
424 |
-
# "title": "Control Flow Quiz",
|
425 |
-
# "questions": 8,
|
426 |
-
# "duration": 20
|
427 |
-
# },
|
428 |
-
# "polls": [
|
429 |
-
# {
|
430 |
-
# "question": "Which loop type do you find more intuitive?",
|
431 |
-
# "options": ["For loops", "While loops", "Both"],
|
432 |
-
# "responses": {"For loops": 12, "While loops": 8, "Both": 10}
|
433 |
-
# }
|
434 |
-
# ]
|
435 |
-
# },
|
436 |
-
# "post_class": {
|
437 |
-
# "assignments": [
|
438 |
-
# {
|
439 |
-
# "id": 2,
|
440 |
-
# "title": "Function Implementation Exercise",
|
441 |
-
# "due_date": datetime.utcnow() + timedelta(days=4),
|
442 |
-
# "status": "pending",
|
443 |
-
# "submissions": []
|
444 |
-
# }
|
445 |
-
# ]
|
446 |
-
# }
|
447 |
-
# }
|
448 |
-
# ]
|
449 |
-
# }
|
450 |
-
courses_collection2 = db["courses_collection2"]
|
451 |
-
|
452 |
-
|
453 |
-
# Define the users schema
|
454 |
-
users_schema = {
|
455 |
-
"bsonType": "object",
|
456 |
-
"required": ["user_id", "username", "password", "role", "created_at"],
|
457 |
-
"properties": {
|
458 |
-
"user_id": {
|
459 |
-
"bsonType": "string",
|
460 |
-
"description": "Unique identifier for the user",
|
461 |
-
},
|
462 |
-
"username": {"bsonType": "string", "description": "Name of the User"},
|
463 |
-
"password": {"bsonType": "string", "description": "Password of the user"},
|
464 |
-
"role": {
|
465 |
-
"bsonType": "string",
|
466 |
-
"description": "Type of user (e.g., student, faculty)",
|
467 |
-
},
|
468 |
-
"created_at": {
|
469 |
-
"bsonType": "date",
|
470 |
-
"description": "Date when the user was created",
|
471 |
-
},
|
472 |
-
},
|
473 |
-
}
|
474 |
-
# Create the collection with the schema
|
475 |
-
# db.create_collection("users", validator={"$jsonSchema": users_schema})
|
476 |
-
users_collection = db["users"]
|
477 |
-
|
478 |
-
|
479 |
-
# Defining the Student Collection
|
480 |
-
student_schema = {
|
481 |
-
"bsonType": "object",
|
482 |
-
"required": ["SID", "full_name", "password", "enrolled_courses", "created_at"],
|
483 |
-
"properties": {
|
484 |
-
"SID": {
|
485 |
-
"bsonType": "string",
|
486 |
-
"description": "Unique identifier for the student",
|
487 |
-
},
|
488 |
-
"full_name": {"bsonType": "string", "description": "Full name of the student"},
|
489 |
-
"password": {
|
490 |
-
"bsonType": "string",
|
491 |
-
"description": "Hashed password of the student",
|
492 |
-
},
|
493 |
-
"enrolled_courses": {
|
494 |
-
"bsonType": "array",
|
495 |
-
"description": "List of courses the student is enrolled in",
|
496 |
-
"items": {
|
497 |
-
"bsonType": "object",
|
498 |
-
"required": ["course_id", "title"],
|
499 |
-
"properties": {
|
500 |
-
"course_id": {
|
501 |
-
"bsonType": "string",
|
502 |
-
"description": "Unique identifier for the course",
|
503 |
-
},
|
504 |
-
"title": {
|
505 |
-
"bsonType": "string",
|
506 |
-
"description": "Title of the course",
|
507 |
-
},
|
508 |
-
},
|
509 |
-
},
|
510 |
-
},
|
511 |
-
"created_at": {
|
512 |
-
"bsonType": "date",
|
513 |
-
"description": "Date when the student was created",
|
514 |
-
},
|
515 |
-
},
|
516 |
-
}
|
517 |
-
# Defining the Faculty Collection
|
518 |
-
faculty_schema = {
|
519 |
-
"bsonType": "object",
|
520 |
-
"required": ["TID", "full_name", "password", "courses_taught", "created_at"],
|
521 |
-
"properties": {
|
522 |
-
"TID": {
|
523 |
-
"bsonType": "string",
|
524 |
-
"description": "Unique identifier for the faculty",
|
525 |
-
},
|
526 |
-
"full_name": {"bsonType": "string", "description": "Full name of the faculty"},
|
527 |
-
"password": {
|
528 |
-
"bsonType": "string",
|
529 |
-
"description": "Hashed password of the faculty",
|
530 |
-
},
|
531 |
-
"courses_taught": {
|
532 |
-
"bsonType": "array",
|
533 |
-
"description": "List of courses the faculty is teaching",
|
534 |
-
"items": {
|
535 |
-
"bsonType": "object",
|
536 |
-
"required": ["course_id", "title"],
|
537 |
-
"properties": {
|
538 |
-
"course_id": {
|
539 |
-
"bsonType": "string",
|
540 |
-
"description": "Unique identifier for the course",
|
541 |
-
},
|
542 |
-
"title": {
|
543 |
-
"bsonType": "string",
|
544 |
-
"description": "Title of the course",
|
545 |
-
},
|
546 |
-
},
|
547 |
-
},
|
548 |
-
},
|
549 |
-
"created_at": {
|
550 |
-
"bsonType": "date",
|
551 |
-
"description": "Date when the faculty was created",
|
552 |
-
},
|
553 |
-
},
|
554 |
-
}
|
555 |
-
# Creating the Collections
|
556 |
-
# db.create_collection("students", validator={"$jsonSchema": student_schema})
|
557 |
-
# db.create_collection("faculty", validator={"$jsonSchema": faculty_schema})
|
558 |
-
|
559 |
-
students_collection = db["students"]
|
560 |
-
faculty_collection = db["faculty"]
|
561 |
-
|
562 |
-
# Defining the Vector Collection Schema
|
563 |
-
vector_schema = {
|
564 |
-
"bsonType": "object",
|
565 |
-
"required": ["resource_id", "vector"],
|
566 |
-
"properties": {
|
567 |
-
"resource_id": {
|
568 |
-
"bsonType": "objectId",
|
569 |
-
"description": "Unique identifier for the resource",
|
570 |
-
},
|
571 |
-
"vector": {
|
572 |
-
"bsonType": "array",
|
573 |
-
"description": "Vector representation of the resource",
|
574 |
-
"items": {"bsonType": "double"},
|
575 |
-
},
|
576 |
-
"text": {"bsonType": "string", "description": "Text content of the resource"},
|
577 |
-
"created_at": {
|
578 |
-
"bsonType": "date",
|
579 |
-
"description": "Date when the vector was created",
|
580 |
-
},
|
581 |
-
},
|
582 |
-
}
|
583 |
-
# Creating the Vector Collection
|
584 |
-
# db.create_collection("vectors", validator={"$jsonSchema": vector_schema})
|
585 |
-
vectors_collection = db["vectors"]
|
586 |
-
|
587 |
-
|
588 |
-
# Creating a Chat-History Collection
|
589 |
-
# Creating a Chat-History Collection
|
590 |
-
chat_history_schema = {
|
591 |
-
"bsonType": "object",
|
592 |
-
"required": ["user_id", "session_id", "messages", "timestamp"],
|
593 |
-
"properties": {
|
594 |
-
"user_id": {
|
595 |
-
"bsonType": "objectId",
|
596 |
-
"description": "Unique identifier for the user",
|
597 |
-
},
|
598 |
-
"session_id": {
|
599 |
-
"bsonType": "string",
|
600 |
-
"description": "Identifier for the session",
|
601 |
-
},
|
602 |
-
"timestamp": {
|
603 |
-
"bsonType": "date",
|
604 |
-
"description": "Timestamp when the chat session started",
|
605 |
-
},
|
606 |
-
"messages": {
|
607 |
-
"bsonType": "array",
|
608 |
-
"description": "List of chat messages",
|
609 |
-
"items": {
|
610 |
-
"bsonType": "object",
|
611 |
-
"properties": {
|
612 |
-
"prompt": {
|
613 |
-
"bsonType": "string",
|
614 |
-
"description": "User's question or prompt",
|
615 |
-
},
|
616 |
-
"response": {
|
617 |
-
"bsonType": "string",
|
618 |
-
"description": "Assistant's response",
|
619 |
-
},
|
620 |
-
"timestamp": {
|
621 |
-
"bsonType": "date",
|
622 |
-
"description": "Timestamp of the message",
|
623 |
-
},
|
624 |
-
},
|
625 |
-
},
|
626 |
-
},
|
627 |
-
},
|
628 |
-
}
|
629 |
-
|
630 |
-
# Create the collection with the schema
|
631 |
-
# db.create_collection("chat_history", validator={"$jsonSchema": chat_history_schema})
|
632 |
-
chat_history_collection = db["chat_history"]
|
633 |
-
|
634 |
-
|
635 |
-
# Database setup for Research Assistant
|
636 |
-
# Research Assistant Schema
|
637 |
-
research_assistant_schema = {
|
638 |
-
"bsonType": "object",
|
639 |
-
"required": ["full_name", "password", "email", "courses_assisted"],
|
640 |
-
"properties": {
|
641 |
-
"full_name": {
|
642 |
-
"bsonType": "string",
|
643 |
-
"description": "Full name of the research assistant",
|
644 |
-
},
|
645 |
-
"password": {
|
646 |
-
"bsonType": "string",
|
647 |
-
"description": "Hashed password of the research assistant",
|
648 |
-
},
|
649 |
-
"email": {
|
650 |
-
"bsonType": "string",
|
651 |
-
"description": "Email address of the research assistant",
|
652 |
-
},
|
653 |
-
"courses_assisted": {
|
654 |
-
"bsonType": "array",
|
655 |
-
"description": "List of courses the research assistant is assisting",
|
656 |
-
"items": {
|
657 |
-
"bsonType": "object",
|
658 |
-
"required": ["course_id"],
|
659 |
-
"properties": {
|
660 |
-
"course_id": {
|
661 |
-
"bsonType": "string",
|
662 |
-
"description": "ID of the course",
|
663 |
-
}
|
664 |
-
},
|
665 |
-
},
|
666 |
-
},
|
667 |
-
},
|
668 |
-
}
|
669 |
-
|
670 |
-
# Create research assistants collection
|
671 |
-
research_assistants_collection = db["research_assistants"]
|
672 |
-
|
673 |
-
# Create indexes
|
674 |
-
research_assistants_collection.create_index("full_name", unique=True)
|
675 |
-
research_assistants_collection.create_index("email", unique=True)
|
676 |
-
|
677 |
-
|
678 |
-
# Optional: Sample data insertion function
|
679 |
-
# def insert_sample_research_assistants():
|
680 |
-
# sample_research_assistants = [
|
681 |
-
# {
|
682 |
-
# "full_name": "John Doe RA",
|
683 |
-
# "password": generate_password_hash("password123"),
|
684 |
-
# "email": "[email protected]",
|
685 |
-
# "courses_assisted": [{"course_id": "CS101"}, {"course_id": "CS102"}],
|
686 |
-
# }
|
687 |
-
# ]
|
688 |
-
|
689 |
-
# try:
|
690 |
-
# research_assistants_collection.insert_many(sample_research_assistants)
|
691 |
-
# print("Sample research assistants inserted successfully!")
|
692 |
-
# except Exception as e:
|
693 |
-
# print(f"Error inserting sample research assistants: {e}")
|
694 |
-
|
695 |
-
# if __name__ == "__main__":
|
696 |
-
# insert_sample_analysts()
|
|
|
1 |
+
# Setup for MongoDB
|
2 |
+
from pymongo import MongoClient
|
3 |
+
from datetime import datetime
|
4 |
+
from werkzeug.security import generate_password_hash
|
5 |
+
import os
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
|
8 |
+
load_dotenv()
|
9 |
+
MONGO_URI = os.getenv("MONGO_URI")
|
10 |
+
|
11 |
+
client = MongoClient(MONGO_URI)
|
12 |
+
try:
|
13 |
+
client.admin.command("ping")
|
14 |
+
print("MongoDB connection successful")
|
15 |
+
except Exception as e:
|
16 |
+
print(f"MongoDB connection failed: {e}")
|
17 |
+
|
18 |
+
db = client["novascholar_db"]
|
19 |
+
|
20 |
+
########
|
21 |
+
# Research Assistant Schema
|
22 |
+
research_assistant_schema = {
|
23 |
+
"bsonType": "object",
|
24 |
+
"required": ["full_name", "password", "email", "courses_assisted"],
|
25 |
+
"properties": {
|
26 |
+
"full_name": {
|
27 |
+
"bsonType": "string",
|
28 |
+
"description": "Full name of the research assistant",
|
29 |
+
},
|
30 |
+
"password": {
|
31 |
+
"bsonType": "string",
|
32 |
+
"description": "Hashed password of the research assistant",
|
33 |
+
},
|
34 |
+
"email": {
|
35 |
+
"bsonType": "string",
|
36 |
+
"description": "Email address of the research assistant",
|
37 |
+
},
|
38 |
+
"courses_assisted": {
|
39 |
+
"bsonType": "array",
|
40 |
+
"description": "List of courses the research assistant is assisting",
|
41 |
+
"items": {
|
42 |
+
"bsonType": "object",
|
43 |
+
"required": ["course_id"],
|
44 |
+
"properties": {
|
45 |
+
"course_id": {
|
46 |
+
"bsonType": "string",
|
47 |
+
"description": "ID of the course",
|
48 |
+
}
|
49 |
+
},
|
50 |
+
},
|
51 |
+
},
|
52 |
+
},
|
53 |
+
}
|
54 |
+
|
55 |
+
# Create research assistants collection
|
56 |
+
research_assistants_collection = db["research_assistants"]
|
57 |
+
|
58 |
+
# Create indexes
|
59 |
+
research_assistants_collection.create_index("full_name", unique=True)
|
60 |
+
research_assistants_collection.create_index("email", unique=True)
|
61 |
+
|
62 |
+
|
63 |
+
# Optional: Sample data insertion function
|
64 |
+
def insert_sample_research_assistants():
|
65 |
+
sample_research_assistants = [
|
66 |
+
{
|
67 |
+
"full_name": "John Doe RA",
|
68 |
+
"password": generate_password_hash("password123"),
|
69 |
+
"email": "[email protected]",
|
70 |
+
"courses_assisted": [{"course_id": "CS101"}, {"course_id": "CS102"}],
|
71 |
+
}
|
72 |
+
]
|
73 |
+
|
74 |
+
try:
|
75 |
+
research_assistants_collection.insert_many(sample_research_assistants)
|
76 |
+
print("Sample research assistants inserted successfully!")
|
77 |
+
except Exception as e:
|
78 |
+
print(f"Error inserting sample research assistants: {e}")
|
79 |
+
|
80 |
+
|
81 |
+
###########
|
82 |
+
|
83 |
+
###############
|
84 |
+
# Add after research assistant schema
|
85 |
+
|
86 |
+
# Analyst Schema
|
87 |
+
analyst_schema = {
|
88 |
+
"bsonType": "object",
|
89 |
+
"required": ["full_name", "password", "email", "courses_analyzed"],
|
90 |
+
"properties": {
|
91 |
+
"full_name": {"bsonType": "string", "description": "Full name of the analyst"},
|
92 |
+
"password": {
|
93 |
+
"bsonType": "string",
|
94 |
+
"description": "Hashed password of the analyst",
|
95 |
+
},
|
96 |
+
"email": {"bsonType": "string", "description": "Email address of the analyst"},
|
97 |
+
"courses_analyzed": {
|
98 |
+
"bsonType": "array",
|
99 |
+
"description": "List of courses the analyst is analyzing",
|
100 |
+
"items": {
|
101 |
+
"bsonType": "object",
|
102 |
+
"required": ["course_id"],
|
103 |
+
"properties": {
|
104 |
+
"course_id": {
|
105 |
+
"bsonType": "string",
|
106 |
+
"description": "ID of the course",
|
107 |
+
}
|
108 |
+
},
|
109 |
+
},
|
110 |
+
},
|
111 |
+
},
|
112 |
+
}
|
113 |
+
|
114 |
+
# Create analysts collection
|
115 |
+
analysts_collection = db["analysts"]
|
116 |
+
|
117 |
+
# Create indexes for analysts
|
118 |
+
analysts_collection.create_index("full_name", unique=True)
|
119 |
+
analysts_collection.create_index("email", unique=True)
|
120 |
+
|
121 |
+
|
122 |
+
def insert_sample_analysts():
|
123 |
+
sample_analysts = [
|
124 |
+
{
|
125 |
+
"full_name": "jane",
|
126 |
+
"password": generate_password_hash("jane"),
|
127 |
+
"email": "[email protected]",
|
128 |
+
"courses_analyzed": [{"course_id": "CS101"}, {"course_id": "CS102"}],
|
129 |
+
}
|
130 |
+
]
|
131 |
+
|
132 |
+
try:
|
133 |
+
analysts_collection.insert_many(sample_analysts)
|
134 |
+
print("Sample analysts inserted successfully!")
|
135 |
+
except Exception as e:
|
136 |
+
print(f"Error inserting sample analysts: {e}")
|
137 |
+
|
138 |
+
|
139 |
+
##############@
|
140 |
+
|
141 |
+
|
142 |
+
# Define the course schema
|
143 |
+
course_schema = {
|
144 |
+
"bsonType": "object",
|
145 |
+
"required": [
|
146 |
+
"course_id",
|
147 |
+
"title",
|
148 |
+
"description",
|
149 |
+
"faculty",
|
150 |
+
"faculty_id",
|
151 |
+
"duration",
|
152 |
+
"created_at",
|
153 |
+
],
|
154 |
+
"properties": {
|
155 |
+
"course_id": {
|
156 |
+
"bsonType": "string",
|
157 |
+
"description": "Unique identifier for the course",
|
158 |
+
},
|
159 |
+
"title": {"bsonType": "string", "description": "Title of the course"},
|
160 |
+
"description": {
|
161 |
+
"bsonType": "string",
|
162 |
+
"description": "Description of the course",
|
163 |
+
},
|
164 |
+
"faculty": {"bsonType": "string", "description": "Name of the faculty"},
|
165 |
+
"duration": {"bsonType": "string", "description": "Duration of the course"},
|
166 |
+
"created_at": {
|
167 |
+
"bsonType": "date",
|
168 |
+
"description": "Date when the course was created",
|
169 |
+
},
|
170 |
+
"sessions": {
|
171 |
+
"bsonType": "array",
|
172 |
+
"description": "List of sessions associated with the course",
|
173 |
+
"items": {
|
174 |
+
"bsonType": "object",
|
175 |
+
"required": ["session_id", "title", "date", "status", "created_at"],
|
176 |
+
"properties": {
|
177 |
+
"session_id": {
|
178 |
+
"bsonType": "string",
|
179 |
+
"description": "Unique identifier for the session",
|
180 |
+
},
|
181 |
+
"title": {
|
182 |
+
"bsonType": "string",
|
183 |
+
"description": "Title of the session",
|
184 |
+
},
|
185 |
+
"date": {"bsonType": "date", "description": "Date of the session"},
|
186 |
+
"status": {
|
187 |
+
"bsonType": "string",
|
188 |
+
"description": "Status of the session (e.g., completed, upcoming)",
|
189 |
+
},
|
190 |
+
"created_at": {
|
191 |
+
"bsonType": "date",
|
192 |
+
"description": "Date when the session was created",
|
193 |
+
},
|
194 |
+
"pre_class": {
|
195 |
+
"bsonType": "object",
|
196 |
+
"description": "Pre-class segment data",
|
197 |
+
"properties": {
|
198 |
+
"resources": {
|
199 |
+
"bsonType": "array",
|
200 |
+
"description": "List of pre-class resources",
|
201 |
+
"items": {
|
202 |
+
"bsonType": "object",
|
203 |
+
"required": ["type", "title", "url"],
|
204 |
+
"properties": {
|
205 |
+
"type": {
|
206 |
+
"bsonType": "string",
|
207 |
+
"description": "Type of resource (e.g., pdf, video)",
|
208 |
+
},
|
209 |
+
"title": {
|
210 |
+
"bsonType": "string",
|
211 |
+
"description": "Title of the resource",
|
212 |
+
},
|
213 |
+
"url": {
|
214 |
+
"bsonType": "string",
|
215 |
+
"description": "URL of the resource",
|
216 |
+
},
|
217 |
+
"vector": {
|
218 |
+
"bsonType": "array",
|
219 |
+
"description": "Vector representation of the resource",
|
220 |
+
"items": {"bsonType": "double"},
|
221 |
+
},
|
222 |
+
},
|
223 |
+
},
|
224 |
+
},
|
225 |
+
"completion_required": {
|
226 |
+
"bsonType": "bool",
|
227 |
+
"description": "Indicates if completion of pre-class resources is required",
|
228 |
+
},
|
229 |
+
},
|
230 |
+
},
|
231 |
+
"in_class": {
|
232 |
+
"bsonType": "object",
|
233 |
+
"description": "In-class segment data",
|
234 |
+
"properties": {
|
235 |
+
"topics": {
|
236 |
+
"bsonType": "array",
|
237 |
+
"description": "List of topics covered in the session",
|
238 |
+
"items": {"bsonType": "string"},
|
239 |
+
},
|
240 |
+
"quiz": {
|
241 |
+
"bsonType": "object",
|
242 |
+
"description": "Quiz data",
|
243 |
+
"properties": {
|
244 |
+
"title": {
|
245 |
+
"bsonType": "string",
|
246 |
+
"description": "Title of the quiz",
|
247 |
+
},
|
248 |
+
"questions": {
|
249 |
+
"bsonType": "int",
|
250 |
+
"description": "Number of questions in the quiz",
|
251 |
+
},
|
252 |
+
"duration": {
|
253 |
+
"bsonType": "int",
|
254 |
+
"description": "Duration of the quiz in minutes",
|
255 |
+
},
|
256 |
+
},
|
257 |
+
},
|
258 |
+
"polls": {
|
259 |
+
"bsonType": "array",
|
260 |
+
"description": "List of polls conducted during the session",
|
261 |
+
"items": {
|
262 |
+
"bsonType": "object",
|
263 |
+
"required": ["question", "options"],
|
264 |
+
"properties": {
|
265 |
+
"question": {
|
266 |
+
"bsonType": "string",
|
267 |
+
"description": "Poll question",
|
268 |
+
},
|
269 |
+
"options": {
|
270 |
+
"bsonType": "array",
|
271 |
+
"description": "List of poll options",
|
272 |
+
"items": {"bsonType": "string"},
|
273 |
+
},
|
274 |
+
"responses": {
|
275 |
+
"bsonType": "object",
|
276 |
+
"description": "Responses to the poll",
|
277 |
+
"additionalProperties": {"bsonType": "int"},
|
278 |
+
},
|
279 |
+
},
|
280 |
+
},
|
281 |
+
},
|
282 |
+
},
|
283 |
+
},
|
284 |
+
"post_class": {
|
285 |
+
"bsonType": "object",
|
286 |
+
"description": "Post-class segment data",
|
287 |
+
"properties": {
|
288 |
+
"assignments": {
|
289 |
+
"bsonType": "array",
|
290 |
+
"description": "List of assignments",
|
291 |
+
"items": {
|
292 |
+
"bsonType": "object",
|
293 |
+
"required": ["id", "title", "due_date", "status"],
|
294 |
+
"properties": {
|
295 |
+
"id": {
|
296 |
+
"bsonType": "int",
|
297 |
+
"description": "Assignment ID",
|
298 |
+
},
|
299 |
+
"title": {
|
300 |
+
"bsonType": "string",
|
301 |
+
"description": "Title of the assignment",
|
302 |
+
},
|
303 |
+
"due_date": {
|
304 |
+
"bsonType": "date",
|
305 |
+
"description": "Due date of the assignment",
|
306 |
+
},
|
307 |
+
"status": {
|
308 |
+
"bsonType": "string",
|
309 |
+
"description": "Status of the assignment (e.g., pending, completed)",
|
310 |
+
},
|
311 |
+
"submissions": {
|
312 |
+
"bsonType": "array",
|
313 |
+
"description": "List of submissions",
|
314 |
+
"items": {
|
315 |
+
"bsonType": "object",
|
316 |
+
"required": [
|
317 |
+
"student_id",
|
318 |
+
"file_url",
|
319 |
+
"submitted_at",
|
320 |
+
],
|
321 |
+
"properties": {
|
322 |
+
"student_id": {
|
323 |
+
"bsonType": "string",
|
324 |
+
"description": "ID of the student who submitted the assignment",
|
325 |
+
},
|
326 |
+
"file_url": {
|
327 |
+
"bsonType": "string",
|
328 |
+
"description": "URL of the submitted file",
|
329 |
+
},
|
330 |
+
"submitted_at": {
|
331 |
+
"bsonType": "date",
|
332 |
+
"description": "Date when the assignment was submitted",
|
333 |
+
},
|
334 |
+
},
|
335 |
+
},
|
336 |
+
},
|
337 |
+
},
|
338 |
+
},
|
339 |
+
}
|
340 |
+
},
|
341 |
+
},
|
342 |
+
},
|
343 |
+
},
|
344 |
+
},
|
345 |
+
},
|
346 |
+
}
|
347 |
+
|
348 |
+
# Create the collection with the schema
|
349 |
+
# db.create_collection("courses_collection2", validator={"$jsonSchema": course_schema})
|
350 |
+
|
351 |
+
# sample_course = {
|
352 |
+
# "course_id": "CS101",
|
353 |
+
# "title": "Introduction to Computer Science",
|
354 |
+
# "description": "This course covers the basics of computer science and programming.",
|
355 |
+
# "faculty": "Dr. John Doe",
|
356 |
+
# "faculty_id": "F101",
|
357 |
+
# "duration": "10 weeks",
|
358 |
+
# "created_at": datetime.utcnow(),
|
359 |
+
# "sessions": [
|
360 |
+
# {
|
361 |
+
# "session_id": "S101",
|
362 |
+
# "title": "Introduction to Programming Fundamentals",
|
363 |
+
# "date": datetime.utcnow() - timedelta(days=7),
|
364 |
+
# "status": "completed",
|
365 |
+
# "created_at": datetime.utcnow() - timedelta(days=7),
|
366 |
+
# "pre_class": {
|
367 |
+
# "resources": [
|
368 |
+
# {
|
369 |
+
# "type": "pdf",
|
370 |
+
# "title": "Introduction to Python Basics",
|
371 |
+
# "url": "/assets/python_basics.pdf",
|
372 |
+
# "vector": [0.1, 0.2, 0.3] # Example vector
|
373 |
+
# }
|
374 |
+
# ],
|
375 |
+
# "completion_required": True
|
376 |
+
# },
|
377 |
+
# "in_class": {
|
378 |
+
# "topics": ["Variables", "Data Types", "Basic Operations"],
|
379 |
+
# "quiz": {
|
380 |
+
# "title": "Python Basics Quiz",
|
381 |
+
# "questions": 5,
|
382 |
+
# "duration": 15
|
383 |
+
# },
|
384 |
+
# "polls": [
|
385 |
+
# {
|
386 |
+
# "question": "How comfortable are you with Python syntax?",
|
387 |
+
# "options": ["Very", "Somewhat", "Not at all"],
|
388 |
+
# "responses": {"Very": 10, "Somewhat": 5, "Not at all": 2}
|
389 |
+
# }
|
390 |
+
# ]
|
391 |
+
# },
|
392 |
+
# "post_class": {
|
393 |
+
# "assignments": [
|
394 |
+
# {
|
395 |
+
# "id": 1,
|
396 |
+
# "title": "Basic Python Programs",
|
397 |
+
# "due_date": datetime.utcnow() + timedelta(days=2),
|
398 |
+
# "status": "pending",
|
399 |
+
# "submissions": []
|
400 |
+
# }
|
401 |
+
# ]
|
402 |
+
# }
|
403 |
+
# },
|
404 |
+
# {
|
405 |
+
# "session_id": "S102",
|
406 |
+
# "title": "Control Flow and Functions",
|
407 |
+
# "date": datetime.utcnow() - timedelta(days=3),
|
408 |
+
# "status": "completed",
|
409 |
+
# "created_at": datetime.utcnow() - timedelta(days=3),
|
410 |
+
# "pre_class": {
|
411 |
+
# "resources": [
|
412 |
+
# {
|
413 |
+
# "type": "pdf",
|
414 |
+
# "title": "Control Flow in Python",
|
415 |
+
# "url": "/assets/control_flow.pdf",
|
416 |
+
# "vector": [0.4, 0.5, 0.6] # Example vector
|
417 |
+
# }
|
418 |
+
# ],
|
419 |
+
# "completion_required": True
|
420 |
+
# },
|
421 |
+
# "in_class": {
|
422 |
+
# "topics": ["If-else statements", "Loops", "Function definitions"],
|
423 |
+
# "quiz": {
|
424 |
+
# "title": "Control Flow Quiz",
|
425 |
+
# "questions": 8,
|
426 |
+
# "duration": 20
|
427 |
+
# },
|
428 |
+
# "polls": [
|
429 |
+
# {
|
430 |
+
# "question": "Which loop type do you find more intuitive?",
|
431 |
+
# "options": ["For loops", "While loops", "Both"],
|
432 |
+
# "responses": {"For loops": 12, "While loops": 8, "Both": 10}
|
433 |
+
# }
|
434 |
+
# ]
|
435 |
+
# },
|
436 |
+
# "post_class": {
|
437 |
+
# "assignments": [
|
438 |
+
# {
|
439 |
+
# "id": 2,
|
440 |
+
# "title": "Function Implementation Exercise",
|
441 |
+
# "due_date": datetime.utcnow() + timedelta(days=4),
|
442 |
+
# "status": "pending",
|
443 |
+
# "submissions": []
|
444 |
+
# }
|
445 |
+
# ]
|
446 |
+
# }
|
447 |
+
# }
|
448 |
+
# ]
|
449 |
+
# }
|
450 |
+
courses_collection2 = db["courses_collection2"]
|
451 |
+
|
452 |
+
|
453 |
+
# Define the users schema
|
454 |
+
users_schema = {
|
455 |
+
"bsonType": "object",
|
456 |
+
"required": ["user_id", "username", "password", "role", "created_at"],
|
457 |
+
"properties": {
|
458 |
+
"user_id": {
|
459 |
+
"bsonType": "string",
|
460 |
+
"description": "Unique identifier for the user",
|
461 |
+
},
|
462 |
+
"username": {"bsonType": "string", "description": "Name of the User"},
|
463 |
+
"password": {"bsonType": "string", "description": "Password of the user"},
|
464 |
+
"role": {
|
465 |
+
"bsonType": "string",
|
466 |
+
"description": "Type of user (e.g., student, faculty)",
|
467 |
+
},
|
468 |
+
"created_at": {
|
469 |
+
"bsonType": "date",
|
470 |
+
"description": "Date when the user was created",
|
471 |
+
},
|
472 |
+
},
|
473 |
+
}
|
474 |
+
# Create the collection with the schema
|
475 |
+
# db.create_collection("users", validator={"$jsonSchema": users_schema})
|
476 |
+
users_collection = db["users"]
|
477 |
+
|
478 |
+
|
479 |
+
# Defining the Student Collection
|
480 |
+
student_schema = {
|
481 |
+
"bsonType": "object",
|
482 |
+
"required": ["SID", "full_name", "password", "enrolled_courses", "created_at"],
|
483 |
+
"properties": {
|
484 |
+
"SID": {
|
485 |
+
"bsonType": "string",
|
486 |
+
"description": "Unique identifier for the student",
|
487 |
+
},
|
488 |
+
"full_name": {"bsonType": "string", "description": "Full name of the student"},
|
489 |
+
"password": {
|
490 |
+
"bsonType": "string",
|
491 |
+
"description": "Hashed password of the student",
|
492 |
+
},
|
493 |
+
"enrolled_courses": {
|
494 |
+
"bsonType": "array",
|
495 |
+
"description": "List of courses the student is enrolled in",
|
496 |
+
"items": {
|
497 |
+
"bsonType": "object",
|
498 |
+
"required": ["course_id", "title"],
|
499 |
+
"properties": {
|
500 |
+
"course_id": {
|
501 |
+
"bsonType": "string",
|
502 |
+
"description": "Unique identifier for the course",
|
503 |
+
},
|
504 |
+
"title": {
|
505 |
+
"bsonType": "string",
|
506 |
+
"description": "Title of the course",
|
507 |
+
},
|
508 |
+
},
|
509 |
+
},
|
510 |
+
},
|
511 |
+
"created_at": {
|
512 |
+
"bsonType": "date",
|
513 |
+
"description": "Date when the student was created",
|
514 |
+
},
|
515 |
+
},
|
516 |
+
}
|
517 |
+
# Defining the Faculty Collection
|
518 |
+
faculty_schema = {
|
519 |
+
"bsonType": "object",
|
520 |
+
"required": ["TID", "full_name", "password", "courses_taught", "created_at"],
|
521 |
+
"properties": {
|
522 |
+
"TID": {
|
523 |
+
"bsonType": "string",
|
524 |
+
"description": "Unique identifier for the faculty",
|
525 |
+
},
|
526 |
+
"full_name": {"bsonType": "string", "description": "Full name of the faculty"},
|
527 |
+
"password": {
|
528 |
+
"bsonType": "string",
|
529 |
+
"description": "Hashed password of the faculty",
|
530 |
+
},
|
531 |
+
"courses_taught": {
|
532 |
+
"bsonType": "array",
|
533 |
+
"description": "List of courses the faculty is teaching",
|
534 |
+
"items": {
|
535 |
+
"bsonType": "object",
|
536 |
+
"required": ["course_id", "title"],
|
537 |
+
"properties": {
|
538 |
+
"course_id": {
|
539 |
+
"bsonType": "string",
|
540 |
+
"description": "Unique identifier for the course",
|
541 |
+
},
|
542 |
+
"title": {
|
543 |
+
"bsonType": "string",
|
544 |
+
"description": "Title of the course",
|
545 |
+
},
|
546 |
+
},
|
547 |
+
},
|
548 |
+
},
|
549 |
+
"created_at": {
|
550 |
+
"bsonType": "date",
|
551 |
+
"description": "Date when the faculty was created",
|
552 |
+
},
|
553 |
+
},
|
554 |
+
}
|
555 |
+
# Creating the Collections
|
556 |
+
# db.create_collection("students", validator={"$jsonSchema": student_schema})
|
557 |
+
# db.create_collection("faculty", validator={"$jsonSchema": faculty_schema})
|
558 |
+
|
559 |
+
students_collection = db["students"]
|
560 |
+
faculty_collection = db["faculty"]
|
561 |
+
|
562 |
+
# Defining the Vector Collection Schema
|
563 |
+
vector_schema = {
|
564 |
+
"bsonType": "object",
|
565 |
+
"required": ["resource_id", "vector"],
|
566 |
+
"properties": {
|
567 |
+
"resource_id": {
|
568 |
+
"bsonType": "objectId",
|
569 |
+
"description": "Unique identifier for the resource",
|
570 |
+
},
|
571 |
+
"vector": {
|
572 |
+
"bsonType": "array",
|
573 |
+
"description": "Vector representation of the resource",
|
574 |
+
"items": {"bsonType": "double"},
|
575 |
+
},
|
576 |
+
"text": {"bsonType": "string", "description": "Text content of the resource"},
|
577 |
+
"created_at": {
|
578 |
+
"bsonType": "date",
|
579 |
+
"description": "Date when the vector was created",
|
580 |
+
},
|
581 |
+
},
|
582 |
+
}
|
583 |
+
# Creating the Vector Collection
|
584 |
+
# db.create_collection("vectors", validator={"$jsonSchema": vector_schema})
|
585 |
+
vectors_collection = db["vectors"]
|
586 |
+
|
587 |
+
|
588 |
+
# Creating a Chat-History Collection
|
589 |
+
# Creating a Chat-History Collection
|
590 |
+
chat_history_schema = {
|
591 |
+
"bsonType": "object",
|
592 |
+
"required": ["user_id", "session_id", "messages", "timestamp"],
|
593 |
+
"properties": {
|
594 |
+
"user_id": {
|
595 |
+
"bsonType": "objectId",
|
596 |
+
"description": "Unique identifier for the user",
|
597 |
+
},
|
598 |
+
"session_id": {
|
599 |
+
"bsonType": "string",
|
600 |
+
"description": "Identifier for the session",
|
601 |
+
},
|
602 |
+
"timestamp": {
|
603 |
+
"bsonType": "date",
|
604 |
+
"description": "Timestamp when the chat session started",
|
605 |
+
},
|
606 |
+
"messages": {
|
607 |
+
"bsonType": "array",
|
608 |
+
"description": "List of chat messages",
|
609 |
+
"items": {
|
610 |
+
"bsonType": "object",
|
611 |
+
"properties": {
|
612 |
+
"prompt": {
|
613 |
+
"bsonType": "string",
|
614 |
+
"description": "User's question or prompt",
|
615 |
+
},
|
616 |
+
"response": {
|
617 |
+
"bsonType": "string",
|
618 |
+
"description": "Assistant's response",
|
619 |
+
},
|
620 |
+
"timestamp": {
|
621 |
+
"bsonType": "date",
|
622 |
+
"description": "Timestamp of the message",
|
623 |
+
},
|
624 |
+
},
|
625 |
+
},
|
626 |
+
},
|
627 |
+
},
|
628 |
+
}
|
629 |
+
|
630 |
+
# Create the collection with the schema
|
631 |
+
# db.create_collection("chat_history", validator={"$jsonSchema": chat_history_schema})
|
632 |
+
chat_history_collection = db["chat_history"]
|
633 |
+
|
634 |
+
|
635 |
+
# Database setup for Research Assistant
|
636 |
+
# Research Assistant Schema
|
637 |
+
research_assistant_schema = {
|
638 |
+
"bsonType": "object",
|
639 |
+
"required": ["full_name", "password", "email", "courses_assisted"],
|
640 |
+
"properties": {
|
641 |
+
"full_name": {
|
642 |
+
"bsonType": "string",
|
643 |
+
"description": "Full name of the research assistant",
|
644 |
+
},
|
645 |
+
"password": {
|
646 |
+
"bsonType": "string",
|
647 |
+
"description": "Hashed password of the research assistant",
|
648 |
+
},
|
649 |
+
"email": {
|
650 |
+
"bsonType": "string",
|
651 |
+
"description": "Email address of the research assistant",
|
652 |
+
},
|
653 |
+
"courses_assisted": {
|
654 |
+
"bsonType": "array",
|
655 |
+
"description": "List of courses the research assistant is assisting",
|
656 |
+
"items": {
|
657 |
+
"bsonType": "object",
|
658 |
+
"required": ["course_id"],
|
659 |
+
"properties": {
|
660 |
+
"course_id": {
|
661 |
+
"bsonType": "string",
|
662 |
+
"description": "ID of the course",
|
663 |
+
}
|
664 |
+
},
|
665 |
+
},
|
666 |
+
},
|
667 |
+
},
|
668 |
+
}
|
669 |
+
|
670 |
+
# Create research assistants collection
|
671 |
+
research_assistants_collection = db["research_assistants"]
|
672 |
+
|
673 |
+
# Create indexes
|
674 |
+
research_assistants_collection.create_index("full_name", unique=True)
|
675 |
+
research_assistants_collection.create_index("email", unique=True)
|
676 |
+
|
677 |
+
|
678 |
+
# Optional: Sample data insertion function
|
679 |
+
# def insert_sample_research_assistants():
|
680 |
+
# sample_research_assistants = [
|
681 |
+
# {
|
682 |
+
# "full_name": "John Doe RA",
|
683 |
+
# "password": generate_password_hash("password123"),
|
684 |
+
# "email": "[email protected]",
|
685 |
+
# "courses_assisted": [{"course_id": "CS101"}, {"course_id": "CS102"}],
|
686 |
+
# }
|
687 |
+
# ]
|
688 |
+
|
689 |
+
# try:
|
690 |
+
# research_assistants_collection.insert_many(sample_research_assistants)
|
691 |
+
# print("Sample research assistants inserted successfully!")
|
692 |
+
# except Exception as e:
|
693 |
+
# print(f"Error inserting sample research assistants: {e}")
|
694 |
+
|
695 |
+
# if __name__ == "__main__":
|
696 |
+
# insert_sample_analysts()
|
entire_download.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
from pymongo import MongoClient
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
import os
|
6 |
+
|
7 |
+
# 1. Load environment variables
|
8 |
+
load_dotenv()
|
9 |
+
MONGODB_URI = os.getenv(
|
10 |
+
"MONGODB_UR",
|
11 |
+
"mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
|
12 |
+
)
|
13 |
+
|
14 |
+
# 2. Create MongoDB connection
|
15 |
+
client = MongoClient(MONGODB_URI)
|
16 |
+
db = client["novascholar_db"]
|
17 |
+
collection = db["research_papers"]
|
18 |
+
|
19 |
+
|
20 |
+
def get_collection_data(paper_type: str):
|
21 |
+
"""
|
22 |
+
Fetch all documents from the specified collection based on paper type.
|
23 |
+
"""
|
24 |
+
try:
|
25 |
+
# Determine collection name based on paper type
|
26 |
+
collection_name = paper_type.replace(" ", "_").lower()
|
27 |
+
doc_collection = db[collection_name]
|
28 |
+
|
29 |
+
# Get all documents
|
30 |
+
docs = list(doc_collection.find())
|
31 |
+
|
32 |
+
# Convert ObjectId to string
|
33 |
+
for doc in docs:
|
34 |
+
doc["_id"] = str(doc["_id"])
|
35 |
+
|
36 |
+
return docs
|
37 |
+
except Exception as e:
|
38 |
+
st.error(f"Database Error: {str(e)}")
|
39 |
+
return None
|
40 |
+
|
41 |
+
|
42 |
+
def main():
|
43 |
+
st.title("MongoDB Collection Download")
|
44 |
+
st.write("Download all documents from the selected research paper collection")
|
45 |
+
|
46 |
+
# Dropdown to select the type of research paper
|
47 |
+
paper_type = st.selectbox(
|
48 |
+
"Select type of research paper:",
|
49 |
+
[
|
50 |
+
"Review Based Paper",
|
51 |
+
"Opinion/Perspective Based Paper",
|
52 |
+
"Empirical Research Paper",
|
53 |
+
"Research Paper (Other)",
|
54 |
+
],
|
55 |
+
)
|
56 |
+
|
57 |
+
if st.button("Fetch Data"):
|
58 |
+
with st.spinner("Retrieving documents from MongoDB..."):
|
59 |
+
docs = get_collection_data(paper_type)
|
60 |
+
|
61 |
+
if docs:
|
62 |
+
# Convert to DataFrame
|
63 |
+
df = pd.DataFrame(docs)
|
64 |
+
# Convert lists to comma-separated strings for consistency
|
65 |
+
for col in df.columns:
|
66 |
+
if df[col].apply(lambda x: isinstance(x, list)).any():
|
67 |
+
df[col] = df[col].apply(
|
68 |
+
lambda x: (
|
69 |
+
", ".join(map(str, x)) if isinstance(x, list) else x
|
70 |
+
)
|
71 |
+
)
|
72 |
+
st.success(
|
73 |
+
f"Successfully retrieved {len(df)} documents from '{paper_type}' collection."
|
74 |
+
)
|
75 |
+
st.dataframe(df)
|
76 |
+
|
77 |
+
# Provide option to download the data as CSV
|
78 |
+
csv = df.to_csv(index=False).encode("utf-8")
|
79 |
+
st.download_button(
|
80 |
+
label="Download CSV",
|
81 |
+
data=csv,
|
82 |
+
file_name=f"{paper_type.replace(' ', '_').lower()}_papers.csv",
|
83 |
+
mime="text/csv",
|
84 |
+
)
|
85 |
+
else:
|
86 |
+
st.warning(f"No documents found in the '{paper_type}' collection.")
|
87 |
+
|
88 |
+
|
89 |
+
if __name__ == "__main__":
|
90 |
+
main()
|
extract.py
ADDED
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import PyPDF2
|
4 |
+
import io
|
5 |
+
import os
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
import requests
|
8 |
+
import time
|
9 |
+
|
10 |
+
# Load environment variables
|
11 |
+
load_dotenv()
|
12 |
+
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
|
13 |
+
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
|
14 |
+
|
15 |
+
def call_perplexity_api(prompt: str) -> str:
|
16 |
+
"""Call Perplexity AI with a prompt, return the text response if successful."""
|
17 |
+
headers = {
|
18 |
+
"Authorization": f"Bearer {PERPLEXITY_API_KEY}",
|
19 |
+
"Content-Type": "application/json",
|
20 |
+
}
|
21 |
+
|
22 |
+
payload = {
|
23 |
+
"model": "llama-3.1-sonar-small-128k-chat",
|
24 |
+
"messages": [{"role": "user", "content": prompt}],
|
25 |
+
"temperature": 0.3,
|
26 |
+
}
|
27 |
+
|
28 |
+
try:
|
29 |
+
response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
|
30 |
+
response.raise_for_status()
|
31 |
+
return response.json()["choices"][0]["message"]["content"]
|
32 |
+
except Exception as e:
|
33 |
+
st.error(f"API Error: {str(e)}")
|
34 |
+
return ""
|
35 |
+
|
36 |
+
def extract_text_from_pdf(pdf_file):
|
37 |
+
"""Extract text content from a PDF file."""
|
38 |
+
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
39 |
+
text = ""
|
40 |
+
for page in pdf_reader.pages:
|
41 |
+
text += page.extract_text() + "\n"
|
42 |
+
return text
|
43 |
+
|
44 |
+
def analyze_paper(text: str, category: str) -> str:
|
45 |
+
"""Generate a prompt and get analysis for a specific category."""
|
46 |
+
prompts = {
|
47 |
+
"Summarized Abstract": "Extract and summarize the abstract from this research paper:",
|
48 |
+
"Results": "What are the main results and findings from this research paper:",
|
49 |
+
"Summarized Introduction": "Summarize the introduction section of this research paper:",
|
50 |
+
"Methods Used": "What are the main methods and methodologies used in this research:",
|
51 |
+
"Literature Survey": "Summarize the literature review or related work from this paper:",
|
52 |
+
"Limitations": "What are the limitations mentioned in this research:",
|
53 |
+
"Contributions": "What are the main contributions of this research:",
|
54 |
+
"Practical Implications": "What are the practical implications of this research:",
|
55 |
+
"Objectives": "What are the main objectives of this research:",
|
56 |
+
"Findings": "What are the key findings from this research:",
|
57 |
+
"Future Research": "What future research directions are suggested in this paper:",
|
58 |
+
"Dependent Variables": "What are the dependent variables studied in this research:",
|
59 |
+
"Independent Variables": "What are the independent variables studied in this research:",
|
60 |
+
"Dataset": "What dataset(s) were used in this research:",
|
61 |
+
"Problem Statement": "What is the main problem statement or research question:",
|
62 |
+
"Challenges": "What challenges were faced or addressed in this research:",
|
63 |
+
"Applications": "What are the potential applications of this research:"
|
64 |
+
}
|
65 |
+
|
66 |
+
prompt = f"{prompts[category]}\n\nPaper text: {text[:5000]}" # Limit text to avoid token limits
|
67 |
+
return call_perplexity_api(prompt)
|
68 |
+
|
69 |
+
def main():
|
70 |
+
st.title("Research Paper Analysis Tool")
|
71 |
+
|
72 |
+
# File uploader
|
73 |
+
uploaded_files = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True)
|
74 |
+
|
75 |
+
if uploaded_files:
|
76 |
+
if st.button("Process Papers"):
|
77 |
+
# Initialize progress bar
|
78 |
+
progress_bar = st.progress(0)
|
79 |
+
status_text = st.empty()
|
80 |
+
|
81 |
+
# Initialize results dictionary
|
82 |
+
results = []
|
83 |
+
|
84 |
+
# Define categories
|
85 |
+
categories = [
|
86 |
+
"Summarized Abstract", "Results", "Summarized Introduction",
|
87 |
+
"Methods Used", "Literature Survey", "Limitations",
|
88 |
+
"Contributions", "Practical Implications", "Objectives",
|
89 |
+
"Findings", "Future Research", "Dependent Variables",
|
90 |
+
"Independent Variables", "Dataset", "Problem Statement",
|
91 |
+
"Challenges", "Applications"
|
92 |
+
]
|
93 |
+
|
94 |
+
# Process each file
|
95 |
+
for i, file in enumerate(uploaded_files):
|
96 |
+
status_text.text(f"Processing {file.name}...")
|
97 |
+
|
98 |
+
# Extract text from PDF
|
99 |
+
text = extract_text_from_pdf(file)
|
100 |
+
|
101 |
+
# Initialize paper results
|
102 |
+
paper_results = {"Filename": file.name}
|
103 |
+
|
104 |
+
# Analyze each category
|
105 |
+
for j, category in enumerate(categories):
|
106 |
+
status_text.text(f"Processing {file.name} - {category}")
|
107 |
+
paper_results[category] = analyze_paper(text, category)
|
108 |
+
|
109 |
+
# Update progress
|
110 |
+
progress = (i * len(categories) + j + 1) / (len(uploaded_files) * len(categories))
|
111 |
+
progress_bar.progress(progress)
|
112 |
+
|
113 |
+
# Add small delay to avoid API rate limits
|
114 |
+
time.sleep(1)
|
115 |
+
|
116 |
+
results.append(paper_results)
|
117 |
+
|
118 |
+
# Create DataFrame
|
119 |
+
df = pd.DataFrame(results)
|
120 |
+
|
121 |
+
# Convert DataFrame to CSV
|
122 |
+
csv = df.to_csv(index=False)
|
123 |
+
|
124 |
+
# Create download button
|
125 |
+
st.download_button(
|
126 |
+
label="Download Results as CSV",
|
127 |
+
data=csv,
|
128 |
+
file_name="research_papers_analysis.csv",
|
129 |
+
mime="text/csv"
|
130 |
+
)
|
131 |
+
|
132 |
+
# Display results in the app
|
133 |
+
st.subheader("Analysis Results")
|
134 |
+
st.dataframe(df)
|
135 |
+
|
136 |
+
status_text.text("Processing complete!")
|
137 |
+
progress_bar.progress(1.0)
|
138 |
+
|
139 |
+
if __name__ == "__main__":
|
140 |
+
main()
|
file_upload_vectorize.py
CHANGED
@@ -1,179 +1,179 @@
|
|
1 |
-
from pymongo import MongoClient
|
2 |
-
from datetime import datetime
|
3 |
-
import openai
|
4 |
-
import google.generativeai as genai
|
5 |
-
import streamlit as st
|
6 |
-
from db import courses_collection2, faculty_collection, students_collection, vectors_collection
|
7 |
-
from PIL import Image
|
8 |
-
import PyPDF2, docx, io
|
9 |
-
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Document
|
10 |
-
from bson import ObjectId
|
11 |
-
from dotenv import load_dotenv
|
12 |
-
import os
|
13 |
-
from create_course import courses_collection
|
14 |
-
|
15 |
-
load_dotenv()
|
16 |
-
MONGO_URI = os.getenv('MONGO_URI')
|
17 |
-
OPENAI_KEY = os.getenv('OPENAI_KEY')
|
18 |
-
GEMINI_KEY = os.getenv('GEMINI_KEY')
|
19 |
-
|
20 |
-
|
21 |
-
client = MongoClient(MONGO_URI)
|
22 |
-
db = client['novascholar_db']
|
23 |
-
resources_collection = db['resources']
|
24 |
-
|
25 |
-
# Configure APIs
|
26 |
-
openai.api_key = OPENAI_KEY
|
27 |
-
genai.configure(api_key=GEMINI_KEY)
|
28 |
-
model = genai.GenerativeModel('gemini-pro')
|
29 |
-
|
30 |
-
def upload_resource(course_id, session_id, file_name, file_content, material_type):
|
31 |
-
# material_data = {
|
32 |
-
# "session_id": session_id,
|
33 |
-
# "course_id": course_id,
|
34 |
-
# "file_name": file_name,
|
35 |
-
# "file_content": file_content,
|
36 |
-
# "material_type": material_type,
|
37 |
-
# "uploaded_at": datetime.utcnow()
|
38 |
-
# }
|
39 |
-
# return resources_collection.insert_one(material_data)
|
40 |
-
# resource_id = ObjectId()
|
41 |
-
|
42 |
-
# Extract text content from the file
|
43 |
-
text_content = extract_text_from_file(file_content)
|
44 |
-
|
45 |
-
# Check if a resource with this file name already exists
|
46 |
-
existing_resource = resources_collection.find_one({
|
47 |
-
"session_id": session_id,
|
48 |
-
"file_name": file_name
|
49 |
-
})
|
50 |
-
|
51 |
-
if existing_resource:
|
52 |
-
return existing_resource["_id"]
|
53 |
-
|
54 |
-
# Read the file content
|
55 |
-
file_content.seek(0) # Reset the file pointer to the beginning
|
56 |
-
original_file_content = file_content.read()
|
57 |
-
|
58 |
-
|
59 |
-
resource_data = {
|
60 |
-
"_id": ObjectId(),
|
61 |
-
"course_id": course_id,
|
62 |
-
"session_id": session_id,
|
63 |
-
"file_name": file_name,
|
64 |
-
"file_type": file_content.type,
|
65 |
-
"text_content": text_content,
|
66 |
-
"file_content": original_file_content, # Store the original file content
|
67 |
-
"material_type": material_type,
|
68 |
-
"uploaded_at": datetime.utcnow()
|
69 |
-
}
|
70 |
-
|
71 |
-
resources_collection.insert_one(resource_data)
|
72 |
-
resource_id = resource_data["_id"]
|
73 |
-
|
74 |
-
courses_collection.update_one(
|
75 |
-
{
|
76 |
-
"course_id": course_id,
|
77 |
-
"sessions.session_id": session_id
|
78 |
-
},
|
79 |
-
{
|
80 |
-
"$push": {"sessions.$.pre_class.resources": resource_id}
|
81 |
-
}
|
82 |
-
)
|
83 |
-
# print("End of Upload Resource, Resource ID is: ", resource_id)
|
84 |
-
# return resource_id
|
85 |
-
if text_content:
|
86 |
-
create_vector_store(text_content, resource_id)
|
87 |
-
return resource_id
|
88 |
-
|
89 |
-
def assignment_submit(student_id, course_id, session_id, assignment_id, file_name, file_content, text_content, material_type):
|
90 |
-
# Read the file content
|
91 |
-
file_content.seek(0) # Reset the file pointer to the beginning
|
92 |
-
original_file_content = file_content.read()
|
93 |
-
|
94 |
-
assignment_data = {
|
95 |
-
"student_id": student_id,
|
96 |
-
"course_id": course_id,
|
97 |
-
"session_id": session_id,
|
98 |
-
"assignment_id": assignment_id,
|
99 |
-
"file_name": file_name,
|
100 |
-
"file_type": file_content.type,
|
101 |
-
"file_content": original_file_content, # Store the original file content
|
102 |
-
"text_content": text_content,
|
103 |
-
"material_type": material_type,
|
104 |
-
"submitted_at": datetime.utcnow(),
|
105 |
-
"file_url": "sample_url"
|
106 |
-
}
|
107 |
-
try:
|
108 |
-
courses_collection2.update_one(
|
109 |
-
{
|
110 |
-
"course_id": course_id,
|
111 |
-
"sessions.session_id": session_id,
|
112 |
-
"sessions.post_class.assignments.id": assignment_id
|
113 |
-
},
|
114 |
-
{
|
115 |
-
"$push": {"sessions.$.post_class.assignments.$[assignment].submissions": assignment_data}
|
116 |
-
},
|
117 |
-
array_filters=[{"assignment.id": assignment_id}]
|
118 |
-
)
|
119 |
-
return True
|
120 |
-
except Exception as db_error:
|
121 |
-
print(f"Error saving submission: {str(db_error)}")
|
122 |
-
return False
|
123 |
-
|
124 |
-
def extract_text_from_file(uploaded_file):
|
125 |
-
text = ""
|
126 |
-
file_type = uploaded_file.type
|
127 |
-
|
128 |
-
try:
|
129 |
-
if file_type == "text/plain":
|
130 |
-
text = uploaded_file.getvalue().decode("utf-8")
|
131 |
-
elif file_type == "application/pdf":
|
132 |
-
pdf_reader = PyPDF2.PdfReader(io.BytesIO(uploaded_file.getvalue()))
|
133 |
-
for page in pdf_reader.pages:
|
134 |
-
text += page.extract_text() + "\n"
|
135 |
-
elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
|
136 |
-
doc = docx.Document(io.BytesIO(uploaded_file.getvalue()))
|
137 |
-
for para in doc.paragraphs:
|
138 |
-
text += para.text + "\n"
|
139 |
-
return text
|
140 |
-
except Exception as e:
|
141 |
-
st.error(f"Error processing file: {str(e)}")
|
142 |
-
return None
|
143 |
-
|
144 |
-
def get_embedding(text):
|
145 |
-
response = openai.embeddings.create(
|
146 |
-
model="text-embedding-ada-002",
|
147 |
-
input=text
|
148 |
-
)
|
149 |
-
return response.data[0].embedding
|
150 |
-
|
151 |
-
def create_vector_store(text, resource_id):
|
152 |
-
# resource_object_id = ObjectId(resource_id)
|
153 |
-
# Ensure resource_id is an ObjectId
|
154 |
-
# if not isinstance(resource_id, ObjectId):
|
155 |
-
# resource_id = ObjectId(resource_id)
|
156 |
-
|
157 |
-
existing_vector = vectors_collection.find_one({
|
158 |
-
"resource_id": resource_id,
|
159 |
-
"text": text
|
160 |
-
})
|
161 |
-
|
162 |
-
if existing_vector:
|
163 |
-
print(f"Vector already exists for Resource ID: {resource_id}")
|
164 |
-
return
|
165 |
-
|
166 |
-
print(f"In Vector Store method, Resource ID is: {resource_id}")
|
167 |
-
document = Document(text=text)
|
168 |
-
embedding = get_embedding(text)
|
169 |
-
|
170 |
-
vector_data = {
|
171 |
-
"resource_id": resource_id,
|
172 |
-
"vector": embedding,
|
173 |
-
"text": text,
|
174 |
-
"created_at": datetime.utcnow()
|
175 |
-
}
|
176 |
-
|
177 |
-
vectors_collection.insert_one(vector_data)
|
178 |
-
|
179 |
# return VectorStoreIndex.from_documents([document])
|
|
|
1 |
+
from pymongo import MongoClient
|
2 |
+
from datetime import datetime
|
3 |
+
import openai
|
4 |
+
import google.generativeai as genai
|
5 |
+
import streamlit as st
|
6 |
+
from db import courses_collection2, faculty_collection, students_collection, vectors_collection
|
7 |
+
from PIL import Image
|
8 |
+
import PyPDF2, docx, io
|
9 |
+
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Document
|
10 |
+
from bson import ObjectId
|
11 |
+
from dotenv import load_dotenv
|
12 |
+
import os
|
13 |
+
from create_course import courses_collection
|
14 |
+
|
15 |
+
load_dotenv()
|
16 |
+
MONGO_URI = os.getenv('MONGO_URI')
|
17 |
+
OPENAI_KEY = os.getenv('OPENAI_KEY')
|
18 |
+
GEMINI_KEY = os.getenv('GEMINI_KEY')
|
19 |
+
|
20 |
+
|
21 |
+
client = MongoClient(MONGO_URI)
|
22 |
+
db = client['novascholar_db']
|
23 |
+
resources_collection = db['resources']
|
24 |
+
|
25 |
+
# Configure APIs
|
26 |
+
openai.api_key = OPENAI_KEY
|
27 |
+
genai.configure(api_key=GEMINI_KEY)
|
28 |
+
model = genai.GenerativeModel('gemini-pro')
|
29 |
+
|
30 |
+
def upload_resource(course_id, session_id, file_name, file_content, material_type):
|
31 |
+
# material_data = {
|
32 |
+
# "session_id": session_id,
|
33 |
+
# "course_id": course_id,
|
34 |
+
# "file_name": file_name,
|
35 |
+
# "file_content": file_content,
|
36 |
+
# "material_type": material_type,
|
37 |
+
# "uploaded_at": datetime.utcnow()
|
38 |
+
# }
|
39 |
+
# return resources_collection.insert_one(material_data)
|
40 |
+
# resource_id = ObjectId()
|
41 |
+
|
42 |
+
# Extract text content from the file
|
43 |
+
text_content = extract_text_from_file(file_content)
|
44 |
+
|
45 |
+
# Check if a resource with this file name already exists
|
46 |
+
existing_resource = resources_collection.find_one({
|
47 |
+
"session_id": session_id,
|
48 |
+
"file_name": file_name
|
49 |
+
})
|
50 |
+
|
51 |
+
if existing_resource:
|
52 |
+
return existing_resource["_id"]
|
53 |
+
|
54 |
+
# Read the file content
|
55 |
+
file_content.seek(0) # Reset the file pointer to the beginning
|
56 |
+
original_file_content = file_content.read()
|
57 |
+
|
58 |
+
|
59 |
+
resource_data = {
|
60 |
+
"_id": ObjectId(),
|
61 |
+
"course_id": course_id,
|
62 |
+
"session_id": session_id,
|
63 |
+
"file_name": file_name,
|
64 |
+
"file_type": file_content.type,
|
65 |
+
"text_content": text_content,
|
66 |
+
"file_content": original_file_content, # Store the original file content
|
67 |
+
"material_type": material_type,
|
68 |
+
"uploaded_at": datetime.utcnow()
|
69 |
+
}
|
70 |
+
|
71 |
+
resources_collection.insert_one(resource_data)
|
72 |
+
resource_id = resource_data["_id"]
|
73 |
+
|
74 |
+
courses_collection.update_one(
|
75 |
+
{
|
76 |
+
"course_id": course_id,
|
77 |
+
"sessions.session_id": session_id
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"$push": {"sessions.$.pre_class.resources": resource_id}
|
81 |
+
}
|
82 |
+
)
|
83 |
+
# print("End of Upload Resource, Resource ID is: ", resource_id)
|
84 |
+
# return resource_id
|
85 |
+
if text_content:
|
86 |
+
create_vector_store(text_content, resource_id)
|
87 |
+
return resource_id
|
88 |
+
|
89 |
+
def assignment_submit(student_id, course_id, session_id, assignment_id, file_name, file_content, text_content, material_type):
|
90 |
+
# Read the file content
|
91 |
+
file_content.seek(0) # Reset the file pointer to the beginning
|
92 |
+
original_file_content = file_content.read()
|
93 |
+
|
94 |
+
assignment_data = {
|
95 |
+
"student_id": student_id,
|
96 |
+
"course_id": course_id,
|
97 |
+
"session_id": session_id,
|
98 |
+
"assignment_id": assignment_id,
|
99 |
+
"file_name": file_name,
|
100 |
+
"file_type": file_content.type,
|
101 |
+
"file_content": original_file_content, # Store the original file content
|
102 |
+
"text_content": text_content,
|
103 |
+
"material_type": material_type,
|
104 |
+
"submitted_at": datetime.utcnow(),
|
105 |
+
"file_url": "sample_url"
|
106 |
+
}
|
107 |
+
try:
|
108 |
+
courses_collection2.update_one(
|
109 |
+
{
|
110 |
+
"course_id": course_id,
|
111 |
+
"sessions.session_id": session_id,
|
112 |
+
"sessions.post_class.assignments.id": assignment_id
|
113 |
+
},
|
114 |
+
{
|
115 |
+
"$push": {"sessions.$.post_class.assignments.$[assignment].submissions": assignment_data}
|
116 |
+
},
|
117 |
+
array_filters=[{"assignment.id": assignment_id}]
|
118 |
+
)
|
119 |
+
return True
|
120 |
+
except Exception as db_error:
|
121 |
+
print(f"Error saving submission: {str(db_error)}")
|
122 |
+
return False
|
123 |
+
|
124 |
+
def extract_text_from_file(uploaded_file):
|
125 |
+
text = ""
|
126 |
+
file_type = uploaded_file.type
|
127 |
+
|
128 |
+
try:
|
129 |
+
if file_type == "text/plain":
|
130 |
+
text = uploaded_file.getvalue().decode("utf-8")
|
131 |
+
elif file_type == "application/pdf":
|
132 |
+
pdf_reader = PyPDF2.PdfReader(io.BytesIO(uploaded_file.getvalue()))
|
133 |
+
for page in pdf_reader.pages:
|
134 |
+
text += page.extract_text() + "\n"
|
135 |
+
elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
|
136 |
+
doc = docx.Document(io.BytesIO(uploaded_file.getvalue()))
|
137 |
+
for para in doc.paragraphs:
|
138 |
+
text += para.text + "\n"
|
139 |
+
return text
|
140 |
+
except Exception as e:
|
141 |
+
st.error(f"Error processing file: {str(e)}")
|
142 |
+
return None
|
143 |
+
|
144 |
+
def get_embedding(text):
|
145 |
+
response = openai.embeddings.create(
|
146 |
+
model="text-embedding-ada-002",
|
147 |
+
input=text
|
148 |
+
)
|
149 |
+
return response.data[0].embedding
|
150 |
+
|
151 |
+
def create_vector_store(text, resource_id):
|
152 |
+
# resource_object_id = ObjectId(resource_id)
|
153 |
+
# Ensure resource_id is an ObjectId
|
154 |
+
# if not isinstance(resource_id, ObjectId):
|
155 |
+
# resource_id = ObjectId(resource_id)
|
156 |
+
|
157 |
+
existing_vector = vectors_collection.find_one({
|
158 |
+
"resource_id": resource_id,
|
159 |
+
"text": text
|
160 |
+
})
|
161 |
+
|
162 |
+
if existing_vector:
|
163 |
+
print(f"Vector already exists for Resource ID: {resource_id}")
|
164 |
+
return
|
165 |
+
|
166 |
+
print(f"In Vector Store method, Resource ID is: {resource_id}")
|
167 |
+
document = Document(text=text)
|
168 |
+
embedding = get_embedding(text)
|
169 |
+
|
170 |
+
vector_data = {
|
171 |
+
"resource_id": resource_id,
|
172 |
+
"vector": embedding,
|
173 |
+
"text": text,
|
174 |
+
"created_at": datetime.utcnow()
|
175 |
+
}
|
176 |
+
|
177 |
+
vectors_collection.insert_one(vector_data)
|
178 |
+
|
179 |
# return VectorStoreIndex.from_documents([document])
|
gen_mcqs.py
CHANGED
@@ -1,206 +1,206 @@
|
|
1 |
-
import ast
|
2 |
-
from pymongo import MongoClient
|
3 |
-
from datetime import datetime
|
4 |
-
import openai
|
5 |
-
import google.generativeai as genai
|
6 |
-
from google.generativeai import GenerativeModel
|
7 |
-
from dotenv import load_dotenv
|
8 |
-
import os
|
9 |
-
from file_upload_vectorize import resources_collection, vectors_collection, courses_collection2, faculty_collection
|
10 |
-
|
11 |
-
# Load environment variables
|
12 |
-
load_dotenv()
|
13 |
-
MONGO_URI = os.getenv('MONGO_URI')
|
14 |
-
OPENAI_KEY = os.getenv('OPENAI_KEY')
|
15 |
-
GEMINI_KEY = os.getenv('GEMINI_KEY')
|
16 |
-
|
17 |
-
# Configure APIs
|
18 |
-
openai.api_key = OPENAI_KEY
|
19 |
-
genai.configure(api_key=GEMINI_KEY)
|
20 |
-
model = genai.GenerativeModel('gemini-pro')
|
21 |
-
|
22 |
-
# Connect to MongoDB
|
23 |
-
client = MongoClient(MONGO_URI)
|
24 |
-
db = client['novascholar_db']
|
25 |
-
quizzes_collection = db["quizzes"]
|
26 |
-
|
27 |
-
def strip_code_markers(response_text):
|
28 |
-
"""Strip off the markers ``` and python from a LLM model's response"""
|
29 |
-
if response_text.startswith("```python"):
|
30 |
-
response_text = response_text[len("```python"):].strip()
|
31 |
-
if response_text.startswith("```"):
|
32 |
-
response_text = response_text[len("```"):].strip()
|
33 |
-
if response_text.endswith("```"):
|
34 |
-
response_text = response_text[:-len("```")].strip()
|
35 |
-
return response_text
|
36 |
-
|
37 |
-
|
38 |
-
# New function to generate MCQs using Gemini
|
39 |
-
def generate_mcqs(context, num_questions, session_title, session_description):
|
40 |
-
"""Generate MCQs either from context or session details"""
|
41 |
-
try:
|
42 |
-
# Initialize Gemini model
|
43 |
-
if context:
|
44 |
-
prompt = f"""
|
45 |
-
Based on the following content, generate {num_questions} multiple choice questions.
|
46 |
-
Format each question as a Python dictionary with the following structure:
|
47 |
-
{{
|
48 |
-
"question": "Question text here",
|
49 |
-
"options": ["A) option1", "B) option2", "C) option3", "D) option4"],
|
50 |
-
"correct_option": "A) option1" or "B) option2" or "C) option3" or "D) option4"
|
51 |
-
}}
|
52 |
-
|
53 |
-
Content:
|
54 |
-
{context}
|
55 |
-
|
56 |
-
Generate challenging but clear questions that test understanding of key concepts.
|
57 |
-
Return only the Python list of dictionaries.
|
58 |
-
"""
|
59 |
-
else:
|
60 |
-
prompt = f"""
|
61 |
-
Generate {num_questions} multiple choice questions about the topic:
|
62 |
-
Title: {session_title}
|
63 |
-
Description: {session_description}
|
64 |
-
|
65 |
-
Format each question as a Python dictionary with the following structure:
|
66 |
-
{{
|
67 |
-
"question": "Question text here",
|
68 |
-
"options": ["A) option1", "B) option2", "C) option3", "D) option4"],
|
69 |
-
"correct_option": "A" or "B" or "C" or "D"
|
70 |
-
}}
|
71 |
-
|
72 |
-
Generate challenging but clear questions.
|
73 |
-
Return only the Python list of dictionaries without any additional formatting or markers
|
74 |
-
Do not write any other text, do not start the response with (```python), do not end the response with backticks(```)
|
75 |
-
A Sample response should look like this: Response Text: [
|
76 |
-
{
|
77 |
-
"question": "Which of the following is NOT a valid data type in C++?",
|
78 |
-
"options": ["int", "double", "boolean", "char"],
|
79 |
-
"correct_option": "C"
|
80 |
-
}
|
81 |
-
] (Notice that there are no backticks(```) around the response and no (```python))
|
82 |
-
.
|
83 |
-
"""
|
84 |
-
|
85 |
-
response = model.generate_content(prompt)
|
86 |
-
response_text = response.text.strip()
|
87 |
-
print("Response Text:", response_text)
|
88 |
-
modified_response_text = strip_code_markers(response_text)
|
89 |
-
print("Response Text Modified to:", modified_response_text)
|
90 |
-
# Extract and parse the response to get the list of MCQs
|
91 |
-
mcqs = ast.literal_eval(modified_response_text) # Be careful with eval, consider using ast.literal_eval for production
|
92 |
-
print(mcqs)
|
93 |
-
if not mcqs:
|
94 |
-
raise ValueError("No questions generated")
|
95 |
-
return mcqs
|
96 |
-
except Exception as e:
|
97 |
-
print(f"Error generating MCQs: , error: {e}")
|
98 |
-
return None
|
99 |
-
|
100 |
-
# New function to save quiz to database
|
101 |
-
def save_quiz(course_id, session_id, title, questions, user_id):
|
102 |
-
"""Save quiz to database"""
|
103 |
-
try:
|
104 |
-
quiz_data = {
|
105 |
-
"user_id": user_id,
|
106 |
-
"course_id": course_id,
|
107 |
-
"session_id": session_id,
|
108 |
-
"title": title,
|
109 |
-
"questions": questions,
|
110 |
-
"created_at": datetime.utcnow(),
|
111 |
-
"status": "active",
|
112 |
-
"submissions": []
|
113 |
-
}
|
114 |
-
result = quizzes_collection.insert_one(quiz_data)
|
115 |
-
return result.inserted_id
|
116 |
-
except Exception as e:
|
117 |
-
print(f"Error saving quiz: {e}")
|
118 |
-
return None
|
119 |
-
|
120 |
-
|
121 |
-
def get_student_quiz_score(quiz_id, student_id):
|
122 |
-
"""Get student's score for a specific quiz"""
|
123 |
-
quiz = quizzes_collection.find_one(
|
124 |
-
{
|
125 |
-
"_id": quiz_id,
|
126 |
-
"submissions.student_id": student_id
|
127 |
-
},
|
128 |
-
{"submissions.$": 1}
|
129 |
-
)
|
130 |
-
if quiz and quiz.get('submissions'):
|
131 |
-
return quiz['submissions'][0].get('score')
|
132 |
-
return None
|
133 |
-
|
134 |
-
# def submit_quiz_answers(quiz_id, student_id, student_answers):
|
135 |
-
# """Submit and score student's quiz answers"""
|
136 |
-
# quiz = quizzes_collection.find_one({"_id": quiz_id})
|
137 |
-
# if not quiz:
|
138 |
-
# return None
|
139 |
-
|
140 |
-
# # Calculate score
|
141 |
-
# correct_answers = 0
|
142 |
-
# total_questions = len(quiz['questions'])
|
143 |
-
|
144 |
-
# for q_idx, question in enumerate(quiz['questions']):
|
145 |
-
# if student_answers.get(str(q_idx)) == question['correct_option']:
|
146 |
-
# correct_answers += 1
|
147 |
-
|
148 |
-
# score = (correct_answers / total_questions) * 100
|
149 |
-
|
150 |
-
# # Store submission
|
151 |
-
# submission_data = {
|
152 |
-
# "student_id": student_id,
|
153 |
-
# "answers": student_answers,
|
154 |
-
# "score": score,
|
155 |
-
# "submitted_at": datetime.utcnow()
|
156 |
-
# }
|
157 |
-
|
158 |
-
# # Update quiz with submission
|
159 |
-
# quizzes_collection.update_one(
|
160 |
-
# {"_id": quiz_id},
|
161 |
-
# {
|
162 |
-
# "$push": {"submissions": submission_data}
|
163 |
-
# }
|
164 |
-
# )
|
165 |
-
|
166 |
-
# return score
|
167 |
-
def submit_quiz_answers(quiz_id, student_id, student_answers):
|
168 |
-
"""Submit and score student's quiz answers"""
|
169 |
-
try:
|
170 |
-
quiz = quizzes_collection.find_one({"_id": quiz_id})
|
171 |
-
if not quiz:
|
172 |
-
return None
|
173 |
-
|
174 |
-
# Calculate score
|
175 |
-
correct_answers = 0
|
176 |
-
total_questions = len(quiz['questions'])
|
177 |
-
|
178 |
-
for q_idx, question in enumerate(quiz['questions']):
|
179 |
-
student_answer = student_answers.get(str(q_idx))
|
180 |
-
if student_answer: # Only check if answer was provided
|
181 |
-
# Extract the option letter (A, B, C, D) from the full answer string
|
182 |
-
answer_letter = student_answer.split(')')[0].strip()
|
183 |
-
if answer_letter == question['correct_option']:
|
184 |
-
correct_answers += 1
|
185 |
-
|
186 |
-
score = (correct_answers / total_questions) * 100
|
187 |
-
|
188 |
-
# Store submission
|
189 |
-
submission_data = {
|
190 |
-
"student_id": student_id,
|
191 |
-
"answers": student_answers,
|
192 |
-
"score": score,
|
193 |
-
"submitted_at": datetime.utcnow()
|
194 |
-
}
|
195 |
-
|
196 |
-
# Update quiz with submission
|
197 |
-
result = quizzes_collection.update_one(
|
198 |
-
{"_id": quiz_id},
|
199 |
-
{"$push": {"submissions": submission_data}}
|
200 |
-
)
|
201 |
-
|
202 |
-
return score if result.modified_count > 0 else None
|
203 |
-
|
204 |
-
except Exception as e:
|
205 |
-
print(f"Error submitting quiz: {e}")
|
206 |
return None
|
|
|
1 |
+
import ast
|
2 |
+
from pymongo import MongoClient
|
3 |
+
from datetime import datetime
|
4 |
+
import openai
|
5 |
+
import google.generativeai as genai
|
6 |
+
from google.generativeai import GenerativeModel
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
import os
|
9 |
+
from file_upload_vectorize import resources_collection, vectors_collection, courses_collection2, faculty_collection
|
10 |
+
|
11 |
+
# Load environment variables
|
12 |
+
load_dotenv()
|
13 |
+
MONGO_URI = os.getenv('MONGO_URI')
|
14 |
+
OPENAI_KEY = os.getenv('OPENAI_KEY')
|
15 |
+
GEMINI_KEY = os.getenv('GEMINI_KEY')
|
16 |
+
|
17 |
+
# Configure APIs
|
18 |
+
openai.api_key = OPENAI_KEY
|
19 |
+
genai.configure(api_key=GEMINI_KEY)
|
20 |
+
model = genai.GenerativeModel('gemini-pro')
|
21 |
+
|
22 |
+
# Connect to MongoDB
|
23 |
+
client = MongoClient(MONGO_URI)
|
24 |
+
db = client['novascholar_db']
|
25 |
+
quizzes_collection = db["quizzes"]
|
26 |
+
|
27 |
+
def strip_code_markers(response_text):
|
28 |
+
"""Strip off the markers ``` and python from a LLM model's response"""
|
29 |
+
if response_text.startswith("```python"):
|
30 |
+
response_text = response_text[len("```python"):].strip()
|
31 |
+
if response_text.startswith("```"):
|
32 |
+
response_text = response_text[len("```"):].strip()
|
33 |
+
if response_text.endswith("```"):
|
34 |
+
response_text = response_text[:-len("```")].strip()
|
35 |
+
return response_text
|
36 |
+
|
37 |
+
|
38 |
+
# New function to generate MCQs using Gemini
|
39 |
+
def generate_mcqs(context, num_questions, session_title, session_description):
|
40 |
+
"""Generate MCQs either from context or session details"""
|
41 |
+
try:
|
42 |
+
# Initialize Gemini model
|
43 |
+
if context:
|
44 |
+
prompt = f"""
|
45 |
+
Based on the following content, generate {num_questions} multiple choice questions.
|
46 |
+
Format each question as a Python dictionary with the following structure:
|
47 |
+
{{
|
48 |
+
"question": "Question text here",
|
49 |
+
"options": ["A) option1", "B) option2", "C) option3", "D) option4"],
|
50 |
+
"correct_option": "A) option1" or "B) option2" or "C) option3" or "D) option4"
|
51 |
+
}}
|
52 |
+
|
53 |
+
Content:
|
54 |
+
{context}
|
55 |
+
|
56 |
+
Generate challenging but clear questions that test understanding of key concepts.
|
57 |
+
Return only the Python list of dictionaries.
|
58 |
+
"""
|
59 |
+
else:
|
60 |
+
prompt = f"""
|
61 |
+
Generate {num_questions} multiple choice questions about the topic:
|
62 |
+
Title: {session_title}
|
63 |
+
Description: {session_description}
|
64 |
+
|
65 |
+
Format each question as a Python dictionary with the following structure:
|
66 |
+
{{
|
67 |
+
"question": "Question text here",
|
68 |
+
"options": ["A) option1", "B) option2", "C) option3", "D) option4"],
|
69 |
+
"correct_option": "A" or "B" or "C" or "D"
|
70 |
+
}}
|
71 |
+
|
72 |
+
Generate challenging but clear questions.
|
73 |
+
Return only the Python list of dictionaries without any additional formatting or markers
|
74 |
+
Do not write any other text, do not start the response with (```python), do not end the response with backticks(```)
|
75 |
+
A Sample response should look like this: Response Text: [
|
76 |
+
{
|
77 |
+
"question": "Which of the following is NOT a valid data type in C++?",
|
78 |
+
"options": ["int", "double", "boolean", "char"],
|
79 |
+
"correct_option": "C"
|
80 |
+
}
|
81 |
+
] (Notice that there are no backticks(```) around the response and no (```python))
|
82 |
+
.
|
83 |
+
"""
|
84 |
+
|
85 |
+
response = model.generate_content(prompt)
|
86 |
+
response_text = response.text.strip()
|
87 |
+
print("Response Text:", response_text)
|
88 |
+
modified_response_text = strip_code_markers(response_text)
|
89 |
+
print("Response Text Modified to:", modified_response_text)
|
90 |
+
# Extract and parse the response to get the list of MCQs
|
91 |
+
mcqs = ast.literal_eval(modified_response_text) # Be careful with eval, consider using ast.literal_eval for production
|
92 |
+
print(mcqs)
|
93 |
+
if not mcqs:
|
94 |
+
raise ValueError("No questions generated")
|
95 |
+
return mcqs
|
96 |
+
except Exception as e:
|
97 |
+
print(f"Error generating MCQs: , error: {e}")
|
98 |
+
return None
|
99 |
+
|
100 |
+
# New function to save quiz to database
|
101 |
+
def save_quiz(course_id, session_id, title, questions, user_id):
|
102 |
+
"""Save quiz to database"""
|
103 |
+
try:
|
104 |
+
quiz_data = {
|
105 |
+
"user_id": user_id,
|
106 |
+
"course_id": course_id,
|
107 |
+
"session_id": session_id,
|
108 |
+
"title": title,
|
109 |
+
"questions": questions,
|
110 |
+
"created_at": datetime.utcnow(),
|
111 |
+
"status": "active",
|
112 |
+
"submissions": []
|
113 |
+
}
|
114 |
+
result = quizzes_collection.insert_one(quiz_data)
|
115 |
+
return result.inserted_id
|
116 |
+
except Exception as e:
|
117 |
+
print(f"Error saving quiz: {e}")
|
118 |
+
return None
|
119 |
+
|
120 |
+
|
121 |
+
def get_student_quiz_score(quiz_id, student_id):
|
122 |
+
"""Get student's score for a specific quiz"""
|
123 |
+
quiz = quizzes_collection.find_one(
|
124 |
+
{
|
125 |
+
"_id": quiz_id,
|
126 |
+
"submissions.student_id": student_id
|
127 |
+
},
|
128 |
+
{"submissions.$": 1}
|
129 |
+
)
|
130 |
+
if quiz and quiz.get('submissions'):
|
131 |
+
return quiz['submissions'][0].get('score')
|
132 |
+
return None
|
133 |
+
|
134 |
+
# def submit_quiz_answers(quiz_id, student_id, student_answers):
|
135 |
+
# """Submit and score student's quiz answers"""
|
136 |
+
# quiz = quizzes_collection.find_one({"_id": quiz_id})
|
137 |
+
# if not quiz:
|
138 |
+
# return None
|
139 |
+
|
140 |
+
# # Calculate score
|
141 |
+
# correct_answers = 0
|
142 |
+
# total_questions = len(quiz['questions'])
|
143 |
+
|
144 |
+
# for q_idx, question in enumerate(quiz['questions']):
|
145 |
+
# if student_answers.get(str(q_idx)) == question['correct_option']:
|
146 |
+
# correct_answers += 1
|
147 |
+
|
148 |
+
# score = (correct_answers / total_questions) * 100
|
149 |
+
|
150 |
+
# # Store submission
|
151 |
+
# submission_data = {
|
152 |
+
# "student_id": student_id,
|
153 |
+
# "answers": student_answers,
|
154 |
+
# "score": score,
|
155 |
+
# "submitted_at": datetime.utcnow()
|
156 |
+
# }
|
157 |
+
|
158 |
+
# # Update quiz with submission
|
159 |
+
# quizzes_collection.update_one(
|
160 |
+
# {"_id": quiz_id},
|
161 |
+
# {
|
162 |
+
# "$push": {"submissions": submission_data}
|
163 |
+
# }
|
164 |
+
# )
|
165 |
+
|
166 |
+
# return score
|
167 |
+
def submit_quiz_answers(quiz_id, student_id, student_answers):
|
168 |
+
"""Submit and score student's quiz answers"""
|
169 |
+
try:
|
170 |
+
quiz = quizzes_collection.find_one({"_id": quiz_id})
|
171 |
+
if not quiz:
|
172 |
+
return None
|
173 |
+
|
174 |
+
# Calculate score
|
175 |
+
correct_answers = 0
|
176 |
+
total_questions = len(quiz['questions'])
|
177 |
+
|
178 |
+
for q_idx, question in enumerate(quiz['questions']):
|
179 |
+
student_answer = student_answers.get(str(q_idx))
|
180 |
+
if student_answer: # Only check if answer was provided
|
181 |
+
# Extract the option letter (A, B, C, D) from the full answer string
|
182 |
+
answer_letter = student_answer.split(')')[0].strip()
|
183 |
+
if answer_letter == question['correct_option']:
|
184 |
+
correct_answers += 1
|
185 |
+
|
186 |
+
score = (correct_answers / total_questions) * 100
|
187 |
+
|
188 |
+
# Store submission
|
189 |
+
submission_data = {
|
190 |
+
"student_id": student_id,
|
191 |
+
"answers": student_answers,
|
192 |
+
"score": score,
|
193 |
+
"submitted_at": datetime.utcnow()
|
194 |
+
}
|
195 |
+
|
196 |
+
# Update quiz with submission
|
197 |
+
result = quizzes_collection.update_one(
|
198 |
+
{"_id": quiz_id},
|
199 |
+
{"$push": {"submissions": submission_data}}
|
200 |
+
)
|
201 |
+
|
202 |
+
return score if result.modified_count > 0 else None
|
203 |
+
|
204 |
+
except Exception as e:
|
205 |
+
print(f"Error submitting quiz: {e}")
|
206 |
return None
|
goals2.py
CHANGED
@@ -1,658 +1,658 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
from typing import List, Dict
|
3 |
-
import httpx
|
4 |
-
from pathlib import Path
|
5 |
-
import os
|
6 |
-
from dotenv import load_dotenv
|
7 |
-
import json
|
8 |
-
import numpy as np
|
9 |
-
from pymongo import MongoClient
|
10 |
-
from openai import OpenAI
|
11 |
-
from datetime import datetime
|
12 |
-
import asyncio
|
13 |
-
import pandas as pd
|
14 |
-
|
15 |
-
# Load environment variables
|
16 |
-
load_dotenv()
|
17 |
-
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_KEY")
|
18 |
-
MONGODB_URI = os.getenv("MONGO_URI")
|
19 |
-
OPENAI_API_KEY = os.getenv("OPENAI_KEY")
|
20 |
-
|
21 |
-
# Initialize MongoDB client
|
22 |
-
client = MongoClient(MONGODB_URI)
|
23 |
-
db = client["document_analysis"]
|
24 |
-
vectors_collection = db["document_vectors"]
|
25 |
-
|
26 |
-
# Initialize OpenAI client
|
27 |
-
openai_client = OpenAI(api_key=OPENAI_API_KEY)
|
28 |
-
|
29 |
-
|
30 |
-
class GoalAnalyzer:
|
31 |
-
def __init__(self):
|
32 |
-
self.api_key = PERPLEXITY_API_KEY
|
33 |
-
self.base_url = "https://api.perplexity.ai/chat/completions"
|
34 |
-
|
35 |
-
def clean_json_string(self, content: str) -> str:
|
36 |
-
"""Clean and extract valid JSON from string"""
|
37 |
-
# Remove markdown formatting
|
38 |
-
if "```json" in content:
|
39 |
-
content = content.split("```json")[1].split("```")[0]
|
40 |
-
elif "```" in content:
|
41 |
-
content = content.split("```")[1]
|
42 |
-
|
43 |
-
# Find the JSON object boundaries
|
44 |
-
start_idx = content.find("{")
|
45 |
-
end_idx = content.rfind("}") + 1
|
46 |
-
|
47 |
-
if start_idx != -1 and end_idx > 0:
|
48 |
-
content = content[start_idx:end_idx]
|
49 |
-
|
50 |
-
# Clean up common issues
|
51 |
-
content = content.strip()
|
52 |
-
content = content.replace("\n", "")
|
53 |
-
content = content.replace("'", '"')
|
54 |
-
|
55 |
-
return content
|
56 |
-
|
57 |
-
async def get_perplexity_analysis(self, text: str, goal: str) -> Dict:
|
58 |
-
"""Get analysis from Perplexity API"""
|
59 |
-
headers = {
|
60 |
-
"Authorization": f"Bearer {self.api_key}",
|
61 |
-
"Content-Type": "application/json",
|
62 |
-
}
|
63 |
-
|
64 |
-
prompt = f"""
|
65 |
-
Analyze the following text in context of the goal: {goal}
|
66 |
-
|
67 |
-
Text: {text}
|
68 |
-
|
69 |
-
Provide analysis in the following JSON format:
|
70 |
-
{{
|
71 |
-
"themes": ["theme1", "theme2"],
|
72 |
-
"subthemes": {{"theme1": ["subtheme1", "subtheme2"], "theme2": ["subtheme3"]}},
|
73 |
-
"keywords": ["keyword1", "keyword2"],
|
74 |
-
"relevance_score": 0-100
|
75 |
-
}}
|
76 |
-
"""
|
77 |
-
|
78 |
-
try:
|
79 |
-
async with httpx.AsyncClient() as client:
|
80 |
-
payload = {
|
81 |
-
"model": "llama-3.1-sonar-small-128k-chat", # Updated to supported model
|
82 |
-
"messages": [
|
83 |
-
{
|
84 |
-
"role": "system",
|
85 |
-
"content": "You are an AI assistant that analyzes documents and provides structured analysis.",
|
86 |
-
},
|
87 |
-
{"role": "user", "content": prompt},
|
88 |
-
],
|
89 |
-
"max_tokens": 1024,
|
90 |
-
}
|
91 |
-
|
92 |
-
# Debug info using expander
|
93 |
-
with st.expander("Debug Info", expanded=False):
|
94 |
-
st.write("Request payload:", payload)
|
95 |
-
|
96 |
-
response = await client.post(
|
97 |
-
self.base_url, headers=headers, json=payload, timeout=30.0
|
98 |
-
)
|
99 |
-
|
100 |
-
# Debug response info
|
101 |
-
with st.expander("Response Info", expanded=False):
|
102 |
-
st.write("Response status:", response.status_code)
|
103 |
-
st.write("Response headers:", dict(response.headers))
|
104 |
-
st.write("Response content:", response.text)
|
105 |
-
|
106 |
-
if response.status_code != 200:
|
107 |
-
error_detail = (
|
108 |
-
response.json() if response.content else "No error details"
|
109 |
-
)
|
110 |
-
raise Exception(
|
111 |
-
f"API returned status code {response.status_code}. Details: {error_detail}"
|
112 |
-
)
|
113 |
-
|
114 |
-
result = response.json()
|
115 |
-
content = (
|
116 |
-
result.get("choices", [{}])[0].get("message", {}).get("content", "")
|
117 |
-
)
|
118 |
-
|
119 |
-
# Clean and parse JSON
|
120 |
-
cleaned_content = self.clean_json_string(content)
|
121 |
-
|
122 |
-
try:
|
123 |
-
analysis = json.loads(cleaned_content)
|
124 |
-
|
125 |
-
# Validate required fields
|
126 |
-
required_fields = [
|
127 |
-
"themes",
|
128 |
-
"subthemes",
|
129 |
-
"keywords",
|
130 |
-
"relevance_score",
|
131 |
-
]
|
132 |
-
for field in required_fields:
|
133 |
-
if field not in analysis:
|
134 |
-
analysis[field] = [] if field != "relevance_score" else 0
|
135 |
-
|
136 |
-
return analysis
|
137 |
-
|
138 |
-
except json.JSONDecodeError as e:
|
139 |
-
st.error(f"JSON parsing error: {str(e)}")
|
140 |
-
st.error(f"Failed content: {cleaned_content}")
|
141 |
-
return {
|
142 |
-
"themes": ["Error parsing themes"],
|
143 |
-
"subthemes": {"Error": ["Failed to parse subthemes"]},
|
144 |
-
"keywords": ["parsing-error"],
|
145 |
-
"relevance_score": 0,
|
146 |
-
}
|
147 |
-
|
148 |
-
except Exception as e:
|
149 |
-
st.error(f"API Error: {str(e)}")
|
150 |
-
return None
|
151 |
-
|
152 |
-
def extract_text_from_file(self, file) -> str:
|
153 |
-
"""Extract text content from uploaded file"""
|
154 |
-
try:
|
155 |
-
text = ""
|
156 |
-
file_type = file.type
|
157 |
-
|
158 |
-
if file_type == "text/plain":
|
159 |
-
text = file.getvalue().decode("utf-8")
|
160 |
-
elif file_type == "application/pdf":
|
161 |
-
import PyPDF2
|
162 |
-
|
163 |
-
pdf_reader = PyPDF2.PdfReader(file)
|
164 |
-
for page in pdf_reader.pages:
|
165 |
-
text += page.extract_text()
|
166 |
-
elif (
|
167 |
-
file_type
|
168 |
-
== "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
169 |
-
):
|
170 |
-
import docx
|
171 |
-
|
172 |
-
doc = docx.Document(file)
|
173 |
-
text = " ".join([paragraph.text for paragraph in doc.paragraphs])
|
174 |
-
|
175 |
-
return text
|
176 |
-
except Exception as e:
|
177 |
-
st.error(f"Error extracting text: {str(e)}")
|
178 |
-
return ""
|
179 |
-
|
180 |
-
|
181 |
-
class DocumentVectorizer:
|
182 |
-
def __init__(self):
|
183 |
-
self.model = "text-embedding-ada-002"
|
184 |
-
self.client = MongoClient(MONGODB_URI)
|
185 |
-
self.db = self.client["document_analysis"]
|
186 |
-
self.vectors_collection = self.db["document_vectors"]
|
187 |
-
|
188 |
-
# Create vector search index if it doesn't exist
|
189 |
-
try:
|
190 |
-
self.vectors_collection.create_index(
|
191 |
-
[("vector", "2dsphere")], # Changed to 2dsphere for vector indexing
|
192 |
-
{
|
193 |
-
"vectorSearchConfig": {
|
194 |
-
"dimensions": 1536, # OpenAI embedding dimensions
|
195 |
-
"similarity": "cosine",
|
196 |
-
}
|
197 |
-
},
|
198 |
-
)
|
199 |
-
except Exception as e:
|
200 |
-
st.warning(f"Vector index may already exist")
|
201 |
-
|
202 |
-
def get_embedding(self, text: str) -> list:
|
203 |
-
"""Get embedding vector for text using OpenAI"""
|
204 |
-
try:
|
205 |
-
response = openai_client.embeddings.create(model=self.model, input=text)
|
206 |
-
return response.data[0].embedding
|
207 |
-
except Exception as e:
|
208 |
-
st.error(f"Error getting embedding: {str(e)}")
|
209 |
-
return None
|
210 |
-
|
211 |
-
# Add this method to DocumentVectorizer class
|
212 |
-
def vector_exists(self, doc_name: str) -> bool:
|
213 |
-
"""Check if vector exists for document"""
|
214 |
-
return self.vectors_collection.count_documents({"name": doc_name}) > 0
|
215 |
-
|
216 |
-
# Update store_vector method in DocumentVectorizer class
|
217 |
-
def store_vector(self, doc_name: str, vector: list, text: str, goal: str = None):
|
218 |
-
"""Store document/goal vector in MongoDB using upsert"""
|
219 |
-
try:
|
220 |
-
vector_doc = {
|
221 |
-
"name": doc_name,
|
222 |
-
"vector": vector,
|
223 |
-
"text": text,
|
224 |
-
"type": "document" if goal is None else "goal",
|
225 |
-
"goal": goal,
|
226 |
-
"updated_at": datetime.utcnow(),
|
227 |
-
}
|
228 |
-
|
229 |
-
# Use update_one with upsert
|
230 |
-
self.vectors_collection.update_one(
|
231 |
-
{"name": doc_name},
|
232 |
-
{"$set": vector_doc, "$setOnInsert": {"created_at": datetime.utcnow()}},
|
233 |
-
upsert=True,
|
234 |
-
)
|
235 |
-
|
236 |
-
except Exception as e:
|
237 |
-
st.error(f"Error storing vector: {str(e)}")
|
238 |
-
|
239 |
-
# Update vector_search method in DocumentVectorizer class
|
240 |
-
def vector_search(self, query_vector: List[float], limit: int = 5) -> List[Dict]:
|
241 |
-
"""Search for similar documents using vector similarity"""
|
242 |
-
try:
|
243 |
-
# Get all documents
|
244 |
-
documents = list(self.vectors_collection.find({"type": "document"}))
|
245 |
-
|
246 |
-
# Calculate similarities
|
247 |
-
similarities = []
|
248 |
-
for doc in documents:
|
249 |
-
similarity = self.calculate_similarity(query_vector, doc["vector"])
|
250 |
-
similarities.append(
|
251 |
-
{
|
252 |
-
"name": doc["name"],
|
253 |
-
"text": doc["text"],
|
254 |
-
"similarity": similarity, # Keep as float
|
255 |
-
"similarity_display": f"{similarity*100:.1f}%", # Add display version
|
256 |
-
}
|
257 |
-
)
|
258 |
-
|
259 |
-
# Sort by similarity and get top k
|
260 |
-
sorted_docs = sorted(
|
261 |
-
similarities,
|
262 |
-
key=lambda x: x["similarity"], # Sort by float value
|
263 |
-
reverse=True,
|
264 |
-
)[:limit]
|
265 |
-
|
266 |
-
return sorted_docs
|
267 |
-
|
268 |
-
except Exception as e:
|
269 |
-
st.error(f"Vector search error: {str(e)}")
|
270 |
-
return []
|
271 |
-
|
272 |
-
def find_similar_documents(self, text: str, limit: int = 5) -> List[Dict]:
|
273 |
-
"""Find similar documents for given text"""
|
274 |
-
vector = self.get_embedding(text)
|
275 |
-
if vector:
|
276 |
-
return self.vector_search(vector, limit)
|
277 |
-
return []
|
278 |
-
|
279 |
-
def calculate_similarity(self, vector1: list, vector2: list) -> float:
|
280 |
-
"""Calculate cosine similarity between two vectors"""
|
281 |
-
return np.dot(vector1, vector2) / (
|
282 |
-
np.linalg.norm(vector1) * np.linalg.norm(vector2)
|
283 |
-
)
|
284 |
-
|
285 |
-
|
286 |
-
def display_analysis_results(analysis: Dict):
|
287 |
-
"""Display analysis results in Streamlit UI"""
|
288 |
-
if not analysis:
|
289 |
-
return
|
290 |
-
|
291 |
-
# Display Themes
|
292 |
-
st.subheader("Themes")
|
293 |
-
for theme in analysis.get("themes", []):
|
294 |
-
with st.expander(f"🎯 {theme}"):
|
295 |
-
# Display subthemes for this theme
|
296 |
-
subthemes = analysis.get("subthemes", {}).get(theme, [])
|
297 |
-
if subthemes:
|
298 |
-
st.write("**Subthemes:**")
|
299 |
-
for subtheme in subthemes:
|
300 |
-
st.write(f"- {subtheme}")
|
301 |
-
|
302 |
-
# Display Keywords
|
303 |
-
st.subheader("Keywords")
|
304 |
-
keywords = analysis.get("keywords", [])
|
305 |
-
st.write(" | ".join([f"🔑 {keyword}" for keyword in keywords]))
|
306 |
-
|
307 |
-
# Display Relevance Score
|
308 |
-
score = analysis.get("relevance_score", 0)
|
309 |
-
st.metric("Relevance Score", f"{score}%")
|
310 |
-
|
311 |
-
|
312 |
-
def display_analyst_dashboard():
|
313 |
-
st.title("Multi-Goal Document Analysis")
|
314 |
-
|
315 |
-
with st.sidebar:
|
316 |
-
st.markdown("### Input Section")
|
317 |
-
tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
|
318 |
-
# tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
|
319 |
-
|
320 |
-
with tab1:
|
321 |
-
# Multiple goals input
|
322 |
-
num_goals = st.number_input("Number of goals:", min_value=1, value=1)
|
323 |
-
goals = []
|
324 |
-
for i in range(num_goals):
|
325 |
-
goal = st.text_area(f"Goal {i+1}:", key=f"goal_{i}", height=100)
|
326 |
-
if goal:
|
327 |
-
goals.append(goal)
|
328 |
-
|
329 |
-
uploaded_files = st.file_uploader(
|
330 |
-
"Upload documents",
|
331 |
-
accept_multiple_files=True,
|
332 |
-
type=["txt", "pdf", "docx"],
|
333 |
-
)
|
334 |
-
analyze_button = (
|
335 |
-
st.button("Analyze Documents") if goals and uploaded_files else None
|
336 |
-
)
|
337 |
-
|
338 |
-
with tab2:
|
339 |
-
# Keep existing similarity search tab
|
340 |
-
search_text = st.text_area("Enter text to find similar documents:")
|
341 |
-
search_limit = st.slider("Number of results", 1, 10, 5)
|
342 |
-
search_button = st.button("Search Similar") if search_text else None
|
343 |
-
|
344 |
-
if st.button("Logout", use_container_width=True):
|
345 |
-
for key in st.session_state.keys():
|
346 |
-
del st.session_state[key]
|
347 |
-
st.rerun()
|
348 |
-
|
349 |
-
if analyze_button:
|
350 |
-
analyzer = GoalAnalyzer()
|
351 |
-
vectorizer = DocumentVectorizer()
|
352 |
-
|
353 |
-
# Store vectors
|
354 |
-
doc_vectors = {}
|
355 |
-
goal_vectors = {}
|
356 |
-
|
357 |
-
# Process goals first
|
358 |
-
with st.spinner("Processing goals..."):
|
359 |
-
for i, goal in enumerate(goals):
|
360 |
-
vector = vectorizer.get_embedding(goal)
|
361 |
-
if vector:
|
362 |
-
goal_vectors[f"Goal {i+1}"] = vector
|
363 |
-
vectorizer.store_vector(f"Goal {i+1}", vector, goal, goal)
|
364 |
-
|
365 |
-
# Process documents
|
366 |
-
with st.spinner("Processing documents..."):
|
367 |
-
for file in uploaded_files:
|
368 |
-
st.markdown(f"### Analysis for {file.name}")
|
369 |
-
|
370 |
-
if vectorizer.vector_exists(file.name):
|
371 |
-
st.info(f"Vector already exists for {file.name}")
|
372 |
-
existing_doc = vectorizer.vectors_collection.find_one(
|
373 |
-
{"name": file.name}
|
374 |
-
)
|
375 |
-
doc_vectors[file.name] = existing_doc["vector"]
|
376 |
-
else:
|
377 |
-
text = analyzer.extract_text_from_file(file)
|
378 |
-
if not text:
|
379 |
-
st.warning(f"Could not extract text from {file.name}")
|
380 |
-
continue
|
381 |
-
|
382 |
-
vector = vectorizer.get_embedding(text)
|
383 |
-
if vector:
|
384 |
-
doc_vectors[file.name] = vector
|
385 |
-
vectorizer.store_vector(file.name, vector, text)
|
386 |
-
|
387 |
-
# Display goal similarities
|
388 |
-
st.subheader("Goal Relevance Scores")
|
389 |
-
col1, col2 = st.columns([1, 2])
|
390 |
-
|
391 |
-
with col1:
|
392 |
-
for goal_name, goal_vector in goal_vectors.items():
|
393 |
-
similarity = (
|
394 |
-
vectorizer.calculate_similarity(
|
395 |
-
doc_vectors[file.name], goal_vector
|
396 |
-
)
|
397 |
-
* 100
|
398 |
-
)
|
399 |
-
st.metric(f"{goal_name}", f"{similarity:.1f}%")
|
400 |
-
|
401 |
-
with col2:
|
402 |
-
# Get analysis for all goals combined
|
403 |
-
analysis = asyncio.run(
|
404 |
-
analyzer.get_perplexity_analysis(text, " | ".join(goals))
|
405 |
-
)
|
406 |
-
display_analysis_results(analysis)
|
407 |
-
|
408 |
-
st.divider()
|
409 |
-
|
410 |
-
# Document similarity matrix
|
411 |
-
if len(doc_vectors) > 1:
|
412 |
-
st.markdown("### Document Similarity Matrix")
|
413 |
-
files = list(doc_vectors.keys())
|
414 |
-
similarity_matrix = []
|
415 |
-
|
416 |
-
for file1 in files:
|
417 |
-
row = []
|
418 |
-
for file2 in files:
|
419 |
-
similarity = vectorizer.calculate_similarity(
|
420 |
-
doc_vectors[file1], doc_vectors[file2]
|
421 |
-
)
|
422 |
-
row.append(similarity)
|
423 |
-
similarity_matrix.append(row)
|
424 |
-
|
425 |
-
df = pd.DataFrame(similarity_matrix, columns=files, index=files)
|
426 |
-
st.dataframe(df.style.background_gradient(cmap="RdYlGn"))
|
427 |
-
|
428 |
-
# Add goal-document similarity matrix
|
429 |
-
st.markdown("### Goal-Document Similarity Matrix")
|
430 |
-
goal_doc_matrix = []
|
431 |
-
goal_names = list(goal_vectors.keys())
|
432 |
-
|
433 |
-
for file in files:
|
434 |
-
row = []
|
435 |
-
for goal in goal_names:
|
436 |
-
similarity = vectorizer.calculate_similarity(
|
437 |
-
doc_vectors[file], goal_vectors[goal]
|
438 |
-
)
|
439 |
-
row.append(similarity)
|
440 |
-
goal_doc_matrix.append(row)
|
441 |
-
|
442 |
-
df_goals = pd.DataFrame(
|
443 |
-
goal_doc_matrix, columns=goal_names, index=files
|
444 |
-
)
|
445 |
-
st.dataframe(df_goals.style.background_gradient(cmap="RdYlGn"))
|
446 |
-
|
447 |
-
# Keep existing similarity search functionality
|
448 |
-
elif search_button:
|
449 |
-
vectorizer = DocumentVectorizer()
|
450 |
-
with st.spinner("Searching similar documents..."):
|
451 |
-
query_vector = vectorizer.get_embedding(search_text)
|
452 |
-
if query_vector:
|
453 |
-
similar_docs = vectorizer.vector_search(query_vector, search_limit)
|
454 |
-
|
455 |
-
if similar_docs:
|
456 |
-
st.markdown("### Similar Documents Found")
|
457 |
-
|
458 |
-
# Create DataFrame with numeric similarities
|
459 |
-
df = pd.DataFrame(similar_docs)
|
460 |
-
|
461 |
-
# Apply gradient to numeric column
|
462 |
-
styled_df = df[["name", "similarity"]].style.background_gradient(
|
463 |
-
cmap="RdYlGn", subset=["similarity"]
|
464 |
-
)
|
465 |
-
|
466 |
-
# Format display after styling
|
467 |
-
styled_df = styled_df.format({"similarity": "{:.1%}"})
|
468 |
-
|
469 |
-
st.dataframe(styled_df)
|
470 |
-
|
471 |
-
# Show document contents
|
472 |
-
for doc in similar_docs:
|
473 |
-
with st.expander(
|
474 |
-
f"📄 {doc['name']} (Similarity: {doc['similarity_display']})"
|
475 |
-
):
|
476 |
-
st.text(
|
477 |
-
doc["text"][:20] + "..."
|
478 |
-
if len(doc["text"]) > 20
|
479 |
-
else doc["text"]
|
480 |
-
)
|
481 |
-
else:
|
482 |
-
st.info("No similar documents found")
|
483 |
-
else:
|
484 |
-
st.error("Could not process search query")
|
485 |
-
|
486 |
-
|
487 |
-
def main():
|
488 |
-
st.title("Multi-Goal Document Analysis")
|
489 |
-
|
490 |
-
with st.sidebar:
|
491 |
-
st.markdown("### Input Section")
|
492 |
-
tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
|
493 |
-
# tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
|
494 |
-
|
495 |
-
with tab1:
|
496 |
-
# Multiple goals input
|
497 |
-
num_goals = st.number_input("Number of goals:", min_value=1, value=1)
|
498 |
-
goals = []
|
499 |
-
for i in range(num_goals):
|
500 |
-
goal = st.text_area(f"Goal {i+1}:", key=f"goal_{i}", height=100)
|
501 |
-
if goal:
|
502 |
-
goals.append(goal)
|
503 |
-
|
504 |
-
uploaded_files = st.file_uploader(
|
505 |
-
"Upload documents",
|
506 |
-
accept_multiple_files=True,
|
507 |
-
type=["txt", "pdf", "docx"],
|
508 |
-
)
|
509 |
-
analyze_button = (
|
510 |
-
st.button("Analyze Documents") if goals and uploaded_files else None
|
511 |
-
)
|
512 |
-
|
513 |
-
with tab2:
|
514 |
-
# Keep existing similarity search tab
|
515 |
-
search_text = st.text_area("Enter text to find similar documents:")
|
516 |
-
search_limit = st.slider("Number of results", 1, 10, 5)
|
517 |
-
search_button = st.button("Search Similar") if search_text else None
|
518 |
-
|
519 |
-
if analyze_button:
|
520 |
-
analyzer = GoalAnalyzer()
|
521 |
-
vectorizer = DocumentVectorizer()
|
522 |
-
|
523 |
-
# Store vectors
|
524 |
-
doc_vectors = {}
|
525 |
-
goal_vectors = {}
|
526 |
-
|
527 |
-
# Process goals first
|
528 |
-
with st.spinner("Processing goals..."):
|
529 |
-
for i, goal in enumerate(goals):
|
530 |
-
vector = vectorizer.get_embedding(goal)
|
531 |
-
if vector:
|
532 |
-
goal_vectors[f"Goal {i+1}"] = vector
|
533 |
-
vectorizer.store_vector(f"Goal {i+1}", vector, goal, goal)
|
534 |
-
|
535 |
-
# Process documents
|
536 |
-
with st.spinner("Processing documents..."):
|
537 |
-
for file in uploaded_files:
|
538 |
-
st.markdown(f"### Analysis for {file.name}")
|
539 |
-
|
540 |
-
if vectorizer.vector_exists(file.name):
|
541 |
-
st.info(f"Vector already exists for {file.name}")
|
542 |
-
existing_doc = vectorizer.vectors_collection.find_one(
|
543 |
-
{"name": file.name}
|
544 |
-
)
|
545 |
-
doc_vectors[file.name] = existing_doc["vector"]
|
546 |
-
else:
|
547 |
-
text = analyzer.extract_text_from_file(file)
|
548 |
-
if not text:
|
549 |
-
st.warning(f"Could not extract text from {file.name}")
|
550 |
-
continue
|
551 |
-
|
552 |
-
vector = vectorizer.get_embedding(text)
|
553 |
-
if vector:
|
554 |
-
doc_vectors[file.name] = vector
|
555 |
-
vectorizer.store_vector(file.name, vector, text)
|
556 |
-
|
557 |
-
# Display goal similarities
|
558 |
-
st.subheader("Goal Relevance Scores")
|
559 |
-
col1, col2 = st.columns([1, 2])
|
560 |
-
|
561 |
-
with col1:
|
562 |
-
for goal_name, goal_vector in goal_vectors.items():
|
563 |
-
similarity = (
|
564 |
-
vectorizer.calculate_similarity(
|
565 |
-
doc_vectors[file.name], goal_vector
|
566 |
-
)
|
567 |
-
* 100
|
568 |
-
)
|
569 |
-
st.metric(f"{goal_name}", f"{similarity:.1f}%")
|
570 |
-
|
571 |
-
with col2:
|
572 |
-
# Get analysis for all goals combined
|
573 |
-
analysis = asyncio.run(
|
574 |
-
analyzer.get_perplexity_analysis(text, " | ".join(goals))
|
575 |
-
)
|
576 |
-
display_analysis_results(analysis)
|
577 |
-
|
578 |
-
st.divider()
|
579 |
-
|
580 |
-
# Document similarity matrix
|
581 |
-
if len(doc_vectors) > 1:
|
582 |
-
st.markdown("### Document Similarity Matrix")
|
583 |
-
files = list(doc_vectors.keys())
|
584 |
-
similarity_matrix = []
|
585 |
-
|
586 |
-
for file1 in files:
|
587 |
-
row = []
|
588 |
-
for file2 in files:
|
589 |
-
similarity = vectorizer.calculate_similarity(
|
590 |
-
doc_vectors[file1], doc_vectors[file2]
|
591 |
-
)
|
592 |
-
row.append(similarity)
|
593 |
-
similarity_matrix.append(row)
|
594 |
-
|
595 |
-
df = pd.DataFrame(similarity_matrix, columns=files, index=files)
|
596 |
-
st.dataframe(df.style.background_gradient(cmap="RdYlGn"))
|
597 |
-
|
598 |
-
# Add goal-document similarity matrix
|
599 |
-
st.markdown("### Goal-Document Similarity Matrix")
|
600 |
-
goal_doc_matrix = []
|
601 |
-
goal_names = list(goal_vectors.keys())
|
602 |
-
|
603 |
-
for file in files:
|
604 |
-
row = []
|
605 |
-
for goal in goal_names:
|
606 |
-
similarity = vectorizer.calculate_similarity(
|
607 |
-
doc_vectors[file], goal_vectors[goal]
|
608 |
-
)
|
609 |
-
row.append(similarity)
|
610 |
-
goal_doc_matrix.append(row)
|
611 |
-
|
612 |
-
df_goals = pd.DataFrame(
|
613 |
-
goal_doc_matrix, columns=goal_names, index=files
|
614 |
-
)
|
615 |
-
st.dataframe(df_goals.style.background_gradient(cmap="RdYlGn"))
|
616 |
-
|
617 |
-
# Keep existing similarity search functionality
|
618 |
-
elif search_button:
|
619 |
-
vectorizer = DocumentVectorizer()
|
620 |
-
with st.spinner("Searching similar documents..."):
|
621 |
-
query_vector = vectorizer.get_embedding(search_text)
|
622 |
-
if query_vector:
|
623 |
-
similar_docs = vectorizer.vector_search(query_vector, search_limit)
|
624 |
-
|
625 |
-
if similar_docs:
|
626 |
-
st.markdown("### Similar Documents Found")
|
627 |
-
|
628 |
-
# Create DataFrame with numeric similarities
|
629 |
-
df = pd.DataFrame(similar_docs)
|
630 |
-
|
631 |
-
# Apply gradient to numeric column
|
632 |
-
styled_df = df[["name", "similarity"]].style.background_gradient(
|
633 |
-
cmap="RdYlGn", subset=["similarity"]
|
634 |
-
)
|
635 |
-
|
636 |
-
# Format display after styling
|
637 |
-
styled_df = styled_df.format({"similarity": "{:.1%}"})
|
638 |
-
|
639 |
-
st.dataframe(styled_df)
|
640 |
-
|
641 |
-
# Show document contents
|
642 |
-
for doc in similar_docs:
|
643 |
-
with st.expander(
|
644 |
-
f"📄 {doc['name']} (Similarity: {doc['similarity_display']})"
|
645 |
-
):
|
646 |
-
st.text(
|
647 |
-
doc["text"][:20] + "..."
|
648 |
-
if len(doc["text"]) > 20
|
649 |
-
else doc["text"]
|
650 |
-
)
|
651 |
-
else:
|
652 |
-
st.info("No similar documents found")
|
653 |
-
else:
|
654 |
-
st.error("Could not process search query")
|
655 |
-
|
656 |
-
|
657 |
-
if __name__ == "__main__":
|
658 |
-
main()
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from typing import List, Dict
|
3 |
+
import httpx
|
4 |
+
from pathlib import Path
|
5 |
+
import os
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
import json
|
8 |
+
import numpy as np
|
9 |
+
from pymongo import MongoClient
|
10 |
+
from openai import OpenAI
|
11 |
+
from datetime import datetime
|
12 |
+
import asyncio
|
13 |
+
import pandas as pd
|
14 |
+
|
15 |
+
# Load environment variables
|
16 |
+
load_dotenv()
|
17 |
+
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_KEY")
|
18 |
+
MONGODB_URI = os.getenv("MONGO_URI")
|
19 |
+
OPENAI_API_KEY = os.getenv("OPENAI_KEY")
|
20 |
+
|
21 |
+
# Initialize MongoDB client
|
22 |
+
client = MongoClient(MONGODB_URI)
|
23 |
+
db = client["document_analysis"]
|
24 |
+
vectors_collection = db["document_vectors"]
|
25 |
+
|
26 |
+
# Initialize OpenAI client
|
27 |
+
openai_client = OpenAI(api_key=OPENAI_API_KEY)
|
28 |
+
|
29 |
+
|
30 |
+
class GoalAnalyzer:
|
31 |
+
def __init__(self):
|
32 |
+
self.api_key = PERPLEXITY_API_KEY
|
33 |
+
self.base_url = "https://api.perplexity.ai/chat/completions"
|
34 |
+
|
35 |
+
def clean_json_string(self, content: str) -> str:
|
36 |
+
"""Clean and extract valid JSON from string"""
|
37 |
+
# Remove markdown formatting
|
38 |
+
if "```json" in content:
|
39 |
+
content = content.split("```json")[1].split("```")[0]
|
40 |
+
elif "```" in content:
|
41 |
+
content = content.split("```")[1]
|
42 |
+
|
43 |
+
# Find the JSON object boundaries
|
44 |
+
start_idx = content.find("{")
|
45 |
+
end_idx = content.rfind("}") + 1
|
46 |
+
|
47 |
+
if start_idx != -1 and end_idx > 0:
|
48 |
+
content = content[start_idx:end_idx]
|
49 |
+
|
50 |
+
# Clean up common issues
|
51 |
+
content = content.strip()
|
52 |
+
content = content.replace("\n", "")
|
53 |
+
content = content.replace("'", '"')
|
54 |
+
|
55 |
+
return content
|
56 |
+
|
57 |
+
async def get_perplexity_analysis(self, text: str, goal: str) -> Dict:
|
58 |
+
"""Get analysis from Perplexity API"""
|
59 |
+
headers = {
|
60 |
+
"Authorization": f"Bearer {self.api_key}",
|
61 |
+
"Content-Type": "application/json",
|
62 |
+
}
|
63 |
+
|
64 |
+
prompt = f"""
|
65 |
+
Analyze the following text in context of the goal: {goal}
|
66 |
+
|
67 |
+
Text: {text}
|
68 |
+
|
69 |
+
Provide analysis in the following JSON format:
|
70 |
+
{{
|
71 |
+
"themes": ["theme1", "theme2"],
|
72 |
+
"subthemes": {{"theme1": ["subtheme1", "subtheme2"], "theme2": ["subtheme3"]}},
|
73 |
+
"keywords": ["keyword1", "keyword2"],
|
74 |
+
"relevance_score": 0-100
|
75 |
+
}}
|
76 |
+
"""
|
77 |
+
|
78 |
+
try:
|
79 |
+
async with httpx.AsyncClient() as client:
|
80 |
+
payload = {
|
81 |
+
"model": "llama-3.1-sonar-small-128k-chat", # Updated to supported model
|
82 |
+
"messages": [
|
83 |
+
{
|
84 |
+
"role": "system",
|
85 |
+
"content": "You are an AI assistant that analyzes documents and provides structured analysis.",
|
86 |
+
},
|
87 |
+
{"role": "user", "content": prompt},
|
88 |
+
],
|
89 |
+
"max_tokens": 1024,
|
90 |
+
}
|
91 |
+
|
92 |
+
# Debug info using expander
|
93 |
+
with st.expander("Debug Info", expanded=False):
|
94 |
+
st.write("Request payload:", payload)
|
95 |
+
|
96 |
+
response = await client.post(
|
97 |
+
self.base_url, headers=headers, json=payload, timeout=30.0
|
98 |
+
)
|
99 |
+
|
100 |
+
# Debug response info
|
101 |
+
with st.expander("Response Info", expanded=False):
|
102 |
+
st.write("Response status:", response.status_code)
|
103 |
+
st.write("Response headers:", dict(response.headers))
|
104 |
+
st.write("Response content:", response.text)
|
105 |
+
|
106 |
+
if response.status_code != 200:
|
107 |
+
error_detail = (
|
108 |
+
response.json() if response.content else "No error details"
|
109 |
+
)
|
110 |
+
raise Exception(
|
111 |
+
f"API returned status code {response.status_code}. Details: {error_detail}"
|
112 |
+
)
|
113 |
+
|
114 |
+
result = response.json()
|
115 |
+
content = (
|
116 |
+
result.get("choices", [{}])[0].get("message", {}).get("content", "")
|
117 |
+
)
|
118 |
+
|
119 |
+
# Clean and parse JSON
|
120 |
+
cleaned_content = self.clean_json_string(content)
|
121 |
+
|
122 |
+
try:
|
123 |
+
analysis = json.loads(cleaned_content)
|
124 |
+
|
125 |
+
# Validate required fields
|
126 |
+
required_fields = [
|
127 |
+
"themes",
|
128 |
+
"subthemes",
|
129 |
+
"keywords",
|
130 |
+
"relevance_score",
|
131 |
+
]
|
132 |
+
for field in required_fields:
|
133 |
+
if field not in analysis:
|
134 |
+
analysis[field] = [] if field != "relevance_score" else 0
|
135 |
+
|
136 |
+
return analysis
|
137 |
+
|
138 |
+
except json.JSONDecodeError as e:
|
139 |
+
st.error(f"JSON parsing error: {str(e)}")
|
140 |
+
st.error(f"Failed content: {cleaned_content}")
|
141 |
+
return {
|
142 |
+
"themes": ["Error parsing themes"],
|
143 |
+
"subthemes": {"Error": ["Failed to parse subthemes"]},
|
144 |
+
"keywords": ["parsing-error"],
|
145 |
+
"relevance_score": 0,
|
146 |
+
}
|
147 |
+
|
148 |
+
except Exception as e:
|
149 |
+
st.error(f"API Error: {str(e)}")
|
150 |
+
return None
|
151 |
+
|
152 |
+
def extract_text_from_file(self, file) -> str:
|
153 |
+
"""Extract text content from uploaded file"""
|
154 |
+
try:
|
155 |
+
text = ""
|
156 |
+
file_type = file.type
|
157 |
+
|
158 |
+
if file_type == "text/plain":
|
159 |
+
text = file.getvalue().decode("utf-8")
|
160 |
+
elif file_type == "application/pdf":
|
161 |
+
import PyPDF2
|
162 |
+
|
163 |
+
pdf_reader = PyPDF2.PdfReader(file)
|
164 |
+
for page in pdf_reader.pages:
|
165 |
+
text += page.extract_text()
|
166 |
+
elif (
|
167 |
+
file_type
|
168 |
+
== "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
169 |
+
):
|
170 |
+
import docx
|
171 |
+
|
172 |
+
doc = docx.Document(file)
|
173 |
+
text = " ".join([paragraph.text for paragraph in doc.paragraphs])
|
174 |
+
|
175 |
+
return text
|
176 |
+
except Exception as e:
|
177 |
+
st.error(f"Error extracting text: {str(e)}")
|
178 |
+
return ""
|
179 |
+
|
180 |
+
|
181 |
+
class DocumentVectorizer:
|
182 |
+
def __init__(self):
|
183 |
+
self.model = "text-embedding-ada-002"
|
184 |
+
self.client = MongoClient(MONGODB_URI)
|
185 |
+
self.db = self.client["document_analysis"]
|
186 |
+
self.vectors_collection = self.db["document_vectors"]
|
187 |
+
|
188 |
+
# Create vector search index if it doesn't exist
|
189 |
+
try:
|
190 |
+
self.vectors_collection.create_index(
|
191 |
+
[("vector", "2dsphere")], # Changed to 2dsphere for vector indexing
|
192 |
+
{
|
193 |
+
"vectorSearchConfig": {
|
194 |
+
"dimensions": 1536, # OpenAI embedding dimensions
|
195 |
+
"similarity": "cosine",
|
196 |
+
}
|
197 |
+
},
|
198 |
+
)
|
199 |
+
except Exception as e:
|
200 |
+
st.warning(f"Vector index may already exist")
|
201 |
+
|
202 |
+
def get_embedding(self, text: str) -> list:
|
203 |
+
"""Get embedding vector for text using OpenAI"""
|
204 |
+
try:
|
205 |
+
response = openai_client.embeddings.create(model=self.model, input=text)
|
206 |
+
return response.data[0].embedding
|
207 |
+
except Exception as e:
|
208 |
+
st.error(f"Error getting embedding: {str(e)}")
|
209 |
+
return None
|
210 |
+
|
211 |
+
# Add this method to DocumentVectorizer class
|
212 |
+
def vector_exists(self, doc_name: str) -> bool:
|
213 |
+
"""Check if vector exists for document"""
|
214 |
+
return self.vectors_collection.count_documents({"name": doc_name}) > 0
|
215 |
+
|
216 |
+
# Update store_vector method in DocumentVectorizer class
|
217 |
+
def store_vector(self, doc_name: str, vector: list, text: str, goal: str = None):
|
218 |
+
"""Store document/goal vector in MongoDB using upsert"""
|
219 |
+
try:
|
220 |
+
vector_doc = {
|
221 |
+
"name": doc_name,
|
222 |
+
"vector": vector,
|
223 |
+
"text": text,
|
224 |
+
"type": "document" if goal is None else "goal",
|
225 |
+
"goal": goal,
|
226 |
+
"updated_at": datetime.utcnow(),
|
227 |
+
}
|
228 |
+
|
229 |
+
# Use update_one with upsert
|
230 |
+
self.vectors_collection.update_one(
|
231 |
+
{"name": doc_name},
|
232 |
+
{"$set": vector_doc, "$setOnInsert": {"created_at": datetime.utcnow()}},
|
233 |
+
upsert=True,
|
234 |
+
)
|
235 |
+
|
236 |
+
except Exception as e:
|
237 |
+
st.error(f"Error storing vector: {str(e)}")
|
238 |
+
|
239 |
+
# Update vector_search method in DocumentVectorizer class
|
240 |
+
def vector_search(self, query_vector: List[float], limit: int = 5) -> List[Dict]:
|
241 |
+
"""Search for similar documents using vector similarity"""
|
242 |
+
try:
|
243 |
+
# Get all documents
|
244 |
+
documents = list(self.vectors_collection.find({"type": "document"}))
|
245 |
+
|
246 |
+
# Calculate similarities
|
247 |
+
similarities = []
|
248 |
+
for doc in documents:
|
249 |
+
similarity = self.calculate_similarity(query_vector, doc["vector"])
|
250 |
+
similarities.append(
|
251 |
+
{
|
252 |
+
"name": doc["name"],
|
253 |
+
"text": doc["text"],
|
254 |
+
"similarity": similarity, # Keep as float
|
255 |
+
"similarity_display": f"{similarity*100:.1f}%", # Add display version
|
256 |
+
}
|
257 |
+
)
|
258 |
+
|
259 |
+
# Sort by similarity and get top k
|
260 |
+
sorted_docs = sorted(
|
261 |
+
similarities,
|
262 |
+
key=lambda x: x["similarity"], # Sort by float value
|
263 |
+
reverse=True,
|
264 |
+
)[:limit]
|
265 |
+
|
266 |
+
return sorted_docs
|
267 |
+
|
268 |
+
except Exception as e:
|
269 |
+
st.error(f"Vector search error: {str(e)}")
|
270 |
+
return []
|
271 |
+
|
272 |
+
def find_similar_documents(self, text: str, limit: int = 5) -> List[Dict]:
|
273 |
+
"""Find similar documents for given text"""
|
274 |
+
vector = self.get_embedding(text)
|
275 |
+
if vector:
|
276 |
+
return self.vector_search(vector, limit)
|
277 |
+
return []
|
278 |
+
|
279 |
+
def calculate_similarity(self, vector1: list, vector2: list) -> float:
|
280 |
+
"""Calculate cosine similarity between two vectors"""
|
281 |
+
return np.dot(vector1, vector2) / (
|
282 |
+
np.linalg.norm(vector1) * np.linalg.norm(vector2)
|
283 |
+
)
|
284 |
+
|
285 |
+
|
286 |
+
def display_analysis_results(analysis: Dict):
|
287 |
+
"""Display analysis results in Streamlit UI"""
|
288 |
+
if not analysis:
|
289 |
+
return
|
290 |
+
|
291 |
+
# Display Themes
|
292 |
+
st.subheader("Themes")
|
293 |
+
for theme in analysis.get("themes", []):
|
294 |
+
with st.expander(f"🎯 {theme}"):
|
295 |
+
# Display subthemes for this theme
|
296 |
+
subthemes = analysis.get("subthemes", {}).get(theme, [])
|
297 |
+
if subthemes:
|
298 |
+
st.write("**Subthemes:**")
|
299 |
+
for subtheme in subthemes:
|
300 |
+
st.write(f"- {subtheme}")
|
301 |
+
|
302 |
+
# Display Keywords
|
303 |
+
st.subheader("Keywords")
|
304 |
+
keywords = analysis.get("keywords", [])
|
305 |
+
st.write(" | ".join([f"🔑 {keyword}" for keyword in keywords]))
|
306 |
+
|
307 |
+
# Display Relevance Score
|
308 |
+
score = analysis.get("relevance_score", 0)
|
309 |
+
st.metric("Relevance Score", f"{score}%")
|
310 |
+
|
311 |
+
|
312 |
+
def display_analyst_dashboard():
|
313 |
+
st.title("Multi-Goal Document Analysis")
|
314 |
+
|
315 |
+
with st.sidebar:
|
316 |
+
st.markdown("### Input Section")
|
317 |
+
tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
|
318 |
+
# tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
|
319 |
+
|
320 |
+
with tab1:
|
321 |
+
# Multiple goals input
|
322 |
+
num_goals = st.number_input("Number of goals:", min_value=1, value=1)
|
323 |
+
goals = []
|
324 |
+
for i in range(num_goals):
|
325 |
+
goal = st.text_area(f"Goal {i+1}:", key=f"goal_{i}", height=100)
|
326 |
+
if goal:
|
327 |
+
goals.append(goal)
|
328 |
+
|
329 |
+
uploaded_files = st.file_uploader(
|
330 |
+
"Upload documents",
|
331 |
+
accept_multiple_files=True,
|
332 |
+
type=["txt", "pdf", "docx"],
|
333 |
+
)
|
334 |
+
analyze_button = (
|
335 |
+
st.button("Analyze Documents") if goals and uploaded_files else None
|
336 |
+
)
|
337 |
+
|
338 |
+
with tab2:
|
339 |
+
# Keep existing similarity search tab
|
340 |
+
search_text = st.text_area("Enter text to find similar documents:")
|
341 |
+
search_limit = st.slider("Number of results", 1, 10, 5)
|
342 |
+
search_button = st.button("Search Similar") if search_text else None
|
343 |
+
|
344 |
+
if st.button("Logout", use_container_width=True):
|
345 |
+
for key in st.session_state.keys():
|
346 |
+
del st.session_state[key]
|
347 |
+
st.rerun()
|
348 |
+
|
349 |
+
if analyze_button:
|
350 |
+
analyzer = GoalAnalyzer()
|
351 |
+
vectorizer = DocumentVectorizer()
|
352 |
+
|
353 |
+
# Store vectors
|
354 |
+
doc_vectors = {}
|
355 |
+
goal_vectors = {}
|
356 |
+
|
357 |
+
# Process goals first
|
358 |
+
with st.spinner("Processing goals..."):
|
359 |
+
for i, goal in enumerate(goals):
|
360 |
+
vector = vectorizer.get_embedding(goal)
|
361 |
+
if vector:
|
362 |
+
goal_vectors[f"Goal {i+1}"] = vector
|
363 |
+
vectorizer.store_vector(f"Goal {i+1}", vector, goal, goal)
|
364 |
+
|
365 |
+
# Process documents
|
366 |
+
with st.spinner("Processing documents..."):
|
367 |
+
for file in uploaded_files:
|
368 |
+
st.markdown(f"### Analysis for {file.name}")
|
369 |
+
|
370 |
+
if vectorizer.vector_exists(file.name):
|
371 |
+
st.info(f"Vector already exists for {file.name}")
|
372 |
+
existing_doc = vectorizer.vectors_collection.find_one(
|
373 |
+
{"name": file.name}
|
374 |
+
)
|
375 |
+
doc_vectors[file.name] = existing_doc["vector"]
|
376 |
+
else:
|
377 |
+
text = analyzer.extract_text_from_file(file)
|
378 |
+
if not text:
|
379 |
+
st.warning(f"Could not extract text from {file.name}")
|
380 |
+
continue
|
381 |
+
|
382 |
+
vector = vectorizer.get_embedding(text)
|
383 |
+
if vector:
|
384 |
+
doc_vectors[file.name] = vector
|
385 |
+
vectorizer.store_vector(file.name, vector, text)
|
386 |
+
|
387 |
+
# Display goal similarities
|
388 |
+
st.subheader("Goal Relevance Scores")
|
389 |
+
col1, col2 = st.columns([1, 2])
|
390 |
+
|
391 |
+
with col1:
|
392 |
+
for goal_name, goal_vector in goal_vectors.items():
|
393 |
+
similarity = (
|
394 |
+
vectorizer.calculate_similarity(
|
395 |
+
doc_vectors[file.name], goal_vector
|
396 |
+
)
|
397 |
+
* 100
|
398 |
+
)
|
399 |
+
st.metric(f"{goal_name}", f"{similarity:.1f}%")
|
400 |
+
|
401 |
+
with col2:
|
402 |
+
# Get analysis for all goals combined
|
403 |
+
analysis = asyncio.run(
|
404 |
+
analyzer.get_perplexity_analysis(text, " | ".join(goals))
|
405 |
+
)
|
406 |
+
display_analysis_results(analysis)
|
407 |
+
|
408 |
+
st.divider()
|
409 |
+
|
410 |
+
# Document similarity matrix
|
411 |
+
if len(doc_vectors) > 1:
|
412 |
+
st.markdown("### Document Similarity Matrix")
|
413 |
+
files = list(doc_vectors.keys())
|
414 |
+
similarity_matrix = []
|
415 |
+
|
416 |
+
for file1 in files:
|
417 |
+
row = []
|
418 |
+
for file2 in files:
|
419 |
+
similarity = vectorizer.calculate_similarity(
|
420 |
+
doc_vectors[file1], doc_vectors[file2]
|
421 |
+
)
|
422 |
+
row.append(similarity)
|
423 |
+
similarity_matrix.append(row)
|
424 |
+
|
425 |
+
df = pd.DataFrame(similarity_matrix, columns=files, index=files)
|
426 |
+
st.dataframe(df.style.background_gradient(cmap="RdYlGn"))
|
427 |
+
|
428 |
+
# Add goal-document similarity matrix
|
429 |
+
st.markdown("### Goal-Document Similarity Matrix")
|
430 |
+
goal_doc_matrix = []
|
431 |
+
goal_names = list(goal_vectors.keys())
|
432 |
+
|
433 |
+
for file in files:
|
434 |
+
row = []
|
435 |
+
for goal in goal_names:
|
436 |
+
similarity = vectorizer.calculate_similarity(
|
437 |
+
doc_vectors[file], goal_vectors[goal]
|
438 |
+
)
|
439 |
+
row.append(similarity)
|
440 |
+
goal_doc_matrix.append(row)
|
441 |
+
|
442 |
+
df_goals = pd.DataFrame(
|
443 |
+
goal_doc_matrix, columns=goal_names, index=files
|
444 |
+
)
|
445 |
+
st.dataframe(df_goals.style.background_gradient(cmap="RdYlGn"))
|
446 |
+
|
447 |
+
# Keep existing similarity search functionality
|
448 |
+
elif search_button:
|
449 |
+
vectorizer = DocumentVectorizer()
|
450 |
+
with st.spinner("Searching similar documents..."):
|
451 |
+
query_vector = vectorizer.get_embedding(search_text)
|
452 |
+
if query_vector:
|
453 |
+
similar_docs = vectorizer.vector_search(query_vector, search_limit)
|
454 |
+
|
455 |
+
if similar_docs:
|
456 |
+
st.markdown("### Similar Documents Found")
|
457 |
+
|
458 |
+
# Create DataFrame with numeric similarities
|
459 |
+
df = pd.DataFrame(similar_docs)
|
460 |
+
|
461 |
+
# Apply gradient to numeric column
|
462 |
+
styled_df = df[["name", "similarity"]].style.background_gradient(
|
463 |
+
cmap="RdYlGn", subset=["similarity"]
|
464 |
+
)
|
465 |
+
|
466 |
+
# Format display after styling
|
467 |
+
styled_df = styled_df.format({"similarity": "{:.1%}"})
|
468 |
+
|
469 |
+
st.dataframe(styled_df)
|
470 |
+
|
471 |
+
# Show document contents
|
472 |
+
for doc in similar_docs:
|
473 |
+
with st.expander(
|
474 |
+
f"📄 {doc['name']} (Similarity: {doc['similarity_display']})"
|
475 |
+
):
|
476 |
+
st.text(
|
477 |
+
doc["text"][:20] + "..."
|
478 |
+
if len(doc["text"]) > 20
|
479 |
+
else doc["text"]
|
480 |
+
)
|
481 |
+
else:
|
482 |
+
st.info("No similar documents found")
|
483 |
+
else:
|
484 |
+
st.error("Could not process search query")
|
485 |
+
|
486 |
+
|
487 |
+
def main():
|
488 |
+
st.title("Multi-Goal Document Analysis")
|
489 |
+
|
490 |
+
with st.sidebar:
|
491 |
+
st.markdown("### Input Section")
|
492 |
+
tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
|
493 |
+
# tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
|
494 |
+
|
495 |
+
with tab1:
|
496 |
+
# Multiple goals input
|
497 |
+
num_goals = st.number_input("Number of goals:", min_value=1, value=1)
|
498 |
+
goals = []
|
499 |
+
for i in range(num_goals):
|
500 |
+
goal = st.text_area(f"Goal {i+1}:", key=f"goal_{i}", height=100)
|
501 |
+
if goal:
|
502 |
+
goals.append(goal)
|
503 |
+
|
504 |
+
uploaded_files = st.file_uploader(
|
505 |
+
"Upload documents",
|
506 |
+
accept_multiple_files=True,
|
507 |
+
type=["txt", "pdf", "docx"],
|
508 |
+
)
|
509 |
+
analyze_button = (
|
510 |
+
st.button("Analyze Documents") if goals and uploaded_files else None
|
511 |
+
)
|
512 |
+
|
513 |
+
with tab2:
|
514 |
+
# Keep existing similarity search tab
|
515 |
+
search_text = st.text_area("Enter text to find similar documents:")
|
516 |
+
search_limit = st.slider("Number of results", 1, 10, 5)
|
517 |
+
search_button = st.button("Search Similar") if search_text else None
|
518 |
+
|
519 |
+
if analyze_button:
|
520 |
+
analyzer = GoalAnalyzer()
|
521 |
+
vectorizer = DocumentVectorizer()
|
522 |
+
|
523 |
+
# Store vectors
|
524 |
+
doc_vectors = {}
|
525 |
+
goal_vectors = {}
|
526 |
+
|
527 |
+
# Process goals first
|
528 |
+
with st.spinner("Processing goals..."):
|
529 |
+
for i, goal in enumerate(goals):
|
530 |
+
vector = vectorizer.get_embedding(goal)
|
531 |
+
if vector:
|
532 |
+
goal_vectors[f"Goal {i+1}"] = vector
|
533 |
+
vectorizer.store_vector(f"Goal {i+1}", vector, goal, goal)
|
534 |
+
|
535 |
+
# Process documents
|
536 |
+
with st.spinner("Processing documents..."):
|
537 |
+
for file in uploaded_files:
|
538 |
+
st.markdown(f"### Analysis for {file.name}")
|
539 |
+
|
540 |
+
if vectorizer.vector_exists(file.name):
|
541 |
+
st.info(f"Vector already exists for {file.name}")
|
542 |
+
existing_doc = vectorizer.vectors_collection.find_one(
|
543 |
+
{"name": file.name}
|
544 |
+
)
|
545 |
+
doc_vectors[file.name] = existing_doc["vector"]
|
546 |
+
else:
|
547 |
+
text = analyzer.extract_text_from_file(file)
|
548 |
+
if not text:
|
549 |
+
st.warning(f"Could not extract text from {file.name}")
|
550 |
+
continue
|
551 |
+
|
552 |
+
vector = vectorizer.get_embedding(text)
|
553 |
+
if vector:
|
554 |
+
doc_vectors[file.name] = vector
|
555 |
+
vectorizer.store_vector(file.name, vector, text)
|
556 |
+
|
557 |
+
# Display goal similarities
|
558 |
+
st.subheader("Goal Relevance Scores")
|
559 |
+
col1, col2 = st.columns([1, 2])
|
560 |
+
|
561 |
+
with col1:
|
562 |
+
for goal_name, goal_vector in goal_vectors.items():
|
563 |
+
similarity = (
|
564 |
+
vectorizer.calculate_similarity(
|
565 |
+
doc_vectors[file.name], goal_vector
|
566 |
+
)
|
567 |
+
* 100
|
568 |
+
)
|
569 |
+
st.metric(f"{goal_name}", f"{similarity:.1f}%")
|
570 |
+
|
571 |
+
with col2:
|
572 |
+
# Get analysis for all goals combined
|
573 |
+
analysis = asyncio.run(
|
574 |
+
analyzer.get_perplexity_analysis(text, " | ".join(goals))
|
575 |
+
)
|
576 |
+
display_analysis_results(analysis)
|
577 |
+
|
578 |
+
st.divider()
|
579 |
+
|
580 |
+
# Document similarity matrix
|
581 |
+
if len(doc_vectors) > 1:
|
582 |
+
st.markdown("### Document Similarity Matrix")
|
583 |
+
files = list(doc_vectors.keys())
|
584 |
+
similarity_matrix = []
|
585 |
+
|
586 |
+
for file1 in files:
|
587 |
+
row = []
|
588 |
+
for file2 in files:
|
589 |
+
similarity = vectorizer.calculate_similarity(
|
590 |
+
doc_vectors[file1], doc_vectors[file2]
|
591 |
+
)
|
592 |
+
row.append(similarity)
|
593 |
+
similarity_matrix.append(row)
|
594 |
+
|
595 |
+
df = pd.DataFrame(similarity_matrix, columns=files, index=files)
|
596 |
+
st.dataframe(df.style.background_gradient(cmap="RdYlGn"))
|
597 |
+
|
598 |
+
# Add goal-document similarity matrix
|
599 |
+
st.markdown("### Goal-Document Similarity Matrix")
|
600 |
+
goal_doc_matrix = []
|
601 |
+
goal_names = list(goal_vectors.keys())
|
602 |
+
|
603 |
+
for file in files:
|
604 |
+
row = []
|
605 |
+
for goal in goal_names:
|
606 |
+
similarity = vectorizer.calculate_similarity(
|
607 |
+
doc_vectors[file], goal_vectors[goal]
|
608 |
+
)
|
609 |
+
row.append(similarity)
|
610 |
+
goal_doc_matrix.append(row)
|
611 |
+
|
612 |
+
df_goals = pd.DataFrame(
|
613 |
+
goal_doc_matrix, columns=goal_names, index=files
|
614 |
+
)
|
615 |
+
st.dataframe(df_goals.style.background_gradient(cmap="RdYlGn"))
|
616 |
+
|
617 |
+
# Keep existing similarity search functionality
|
618 |
+
elif search_button:
|
619 |
+
vectorizer = DocumentVectorizer()
|
620 |
+
with st.spinner("Searching similar documents..."):
|
621 |
+
query_vector = vectorizer.get_embedding(search_text)
|
622 |
+
if query_vector:
|
623 |
+
similar_docs = vectorizer.vector_search(query_vector, search_limit)
|
624 |
+
|
625 |
+
if similar_docs:
|
626 |
+
st.markdown("### Similar Documents Found")
|
627 |
+
|
628 |
+
# Create DataFrame with numeric similarities
|
629 |
+
df = pd.DataFrame(similar_docs)
|
630 |
+
|
631 |
+
# Apply gradient to numeric column
|
632 |
+
styled_df = df[["name", "similarity"]].style.background_gradient(
|
633 |
+
cmap="RdYlGn", subset=["similarity"]
|
634 |
+
)
|
635 |
+
|
636 |
+
# Format display after styling
|
637 |
+
styled_df = styled_df.format({"similarity": "{:.1%}"})
|
638 |
+
|
639 |
+
st.dataframe(styled_df)
|
640 |
+
|
641 |
+
# Show document contents
|
642 |
+
for doc in similar_docs:
|
643 |
+
with st.expander(
|
644 |
+
f"📄 {doc['name']} (Similarity: {doc['similarity_display']})"
|
645 |
+
):
|
646 |
+
st.text(
|
647 |
+
doc["text"][:20] + "..."
|
648 |
+
if len(doc["text"]) > 20
|
649 |
+
else doc["text"]
|
650 |
+
)
|
651 |
+
else:
|
652 |
+
st.info("No similar documents found")
|
653 |
+
else:
|
654 |
+
st.error("Could not process search query")
|
655 |
+
|
656 |
+
|
657 |
+
if __name__ == "__main__":
|
658 |
+
main()
|
infranew.py
ADDED
@@ -0,0 +1,231 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import networkx as nx
|
4 |
+
from bokeh.models import HoverTool
|
5 |
+
from bokeh.plotting import figure, from_networkx
|
6 |
+
import requests
|
7 |
+
import json
|
8 |
+
import google.generativeai as genai
|
9 |
+
|
10 |
+
PERPLEXITY_API_KEY = "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"
|
11 |
+
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
|
12 |
+
|
13 |
+
|
14 |
+
def extract_edges(keywords):
|
15 |
+
keywords = [kw.strip() for kw in keywords.split(",")]
|
16 |
+
edges = [
|
17 |
+
(keywords[i], keywords[j])
|
18 |
+
for i in range(len(keywords))
|
19 |
+
for j in range(i + 1, len(keywords))
|
20 |
+
]
|
21 |
+
return edges
|
22 |
+
|
23 |
+
|
24 |
+
def create_knowledge_graph(data):
|
25 |
+
G = nx.Graph()
|
26 |
+
|
27 |
+
for _, row in data.iterrows():
|
28 |
+
words = []
|
29 |
+
for col in data.columns:
|
30 |
+
if pd.notnull(row[col]):
|
31 |
+
# Convert to string and handle numeric values
|
32 |
+
cell_value = str(row[col]).strip()
|
33 |
+
if cell_value:
|
34 |
+
words.extend(cell_value.split())
|
35 |
+
|
36 |
+
if words:
|
37 |
+
edges = extract_edges(",".join(words))
|
38 |
+
G.add_edges_from(edges)
|
39 |
+
|
40 |
+
for word in words:
|
41 |
+
word = word.strip()
|
42 |
+
if word not in G:
|
43 |
+
G.add_node(word, title=word, value=len(word))
|
44 |
+
|
45 |
+
return G
|
46 |
+
|
47 |
+
|
48 |
+
def render_graph_bokeh(G):
|
49 |
+
plot = figure(
|
50 |
+
title="Interactive Knowledge Graph",
|
51 |
+
x_range=(-1.5, 1.5),
|
52 |
+
y_range=(-1.5, 1.5),
|
53 |
+
tools="pan,wheel_zoom,box_zoom,reset,tap",
|
54 |
+
active_scroll="wheel_zoom",
|
55 |
+
)
|
56 |
+
plot.add_tools(HoverTool(tooltips="@index"))
|
57 |
+
|
58 |
+
graph_renderer = from_networkx(G, nx.spring_layout, scale=1, center=(0, 0))
|
59 |
+
|
60 |
+
graph_renderer.node_renderer.glyph.size = 10
|
61 |
+
graph_renderer.node_renderer.glyph.fill_color = "blue"
|
62 |
+
graph_renderer.node_renderer.glyph.line_color = "black"
|
63 |
+
|
64 |
+
graph_renderer.edge_renderer.glyph.line_width = 1
|
65 |
+
graph_renderer.edge_renderer.glyph.line_color = "gray"
|
66 |
+
|
67 |
+
plot.renderers.append(graph_renderer)
|
68 |
+
|
69 |
+
return plot
|
70 |
+
|
71 |
+
|
72 |
+
import re
|
73 |
+
|
74 |
+
|
75 |
+
def search_papers(topic: str, num_papers: int) -> list:
|
76 |
+
headers = {
|
77 |
+
"Authorization": f"Bearer {PERPLEXITY_API_KEY}",
|
78 |
+
"Content-Type": "application/json",
|
79 |
+
}
|
80 |
+
|
81 |
+
prompt = f"""Find {num_papers} recent research papers about {topic}.
|
82 |
+
Return ONLY a valid JSON array with the following structure for each paper:
|
83 |
+
[
|
84 |
+
{{
|
85 |
+
"Title": "paper title",
|
86 |
+
"Abstract": "abstract text",
|
87 |
+
"Keywords": "key terms"
|
88 |
+
}}
|
89 |
+
]"""
|
90 |
+
|
91 |
+
payload = {
|
92 |
+
"model": "llama-3.1-sonar-small-128k-chat",
|
93 |
+
"messages": [
|
94 |
+
{
|
95 |
+
"role": "system",
|
96 |
+
"content": "You are a research paper analyzer that returns valid JSON arrays.",
|
97 |
+
},
|
98 |
+
{"role": "user", "content": prompt},
|
99 |
+
],
|
100 |
+
"temperature": 0.1,
|
101 |
+
}
|
102 |
+
|
103 |
+
try:
|
104 |
+
response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
|
105 |
+
response.raise_for_status()
|
106 |
+
content = response.json()["choices"][0]["message"]["content"]
|
107 |
+
|
108 |
+
# Clean response to ensure valid JSON
|
109 |
+
content = content.strip()
|
110 |
+
if not content.startswith("["):
|
111 |
+
content = content[content.find("[") :]
|
112 |
+
if not content.endswith("]"):
|
113 |
+
content = content[: content.rfind("]") + 1]
|
114 |
+
|
115 |
+
# Remove any trailing commas before closing brackets
|
116 |
+
content = re.sub(r",\s*]", "]", content)
|
117 |
+
content = re.sub(r",\s*}", "}", content)
|
118 |
+
|
119 |
+
papers = json.loads(content)
|
120 |
+
if not isinstance(papers, list):
|
121 |
+
raise ValueError("Response is not a JSON array")
|
122 |
+
return papers
|
123 |
+
except requests.exceptions.RequestException as e:
|
124 |
+
st.error(f"API Request Error: {str(e)}")
|
125 |
+
return []
|
126 |
+
except json.JSONDecodeError as e:
|
127 |
+
st.error(f"Invalid JSON response: {str(e)}")
|
128 |
+
st.error(f"Response content: {response.text}")
|
129 |
+
return []
|
130 |
+
except ValueError as e:
|
131 |
+
st.error(f"Error: {str(e)}")
|
132 |
+
return []
|
133 |
+
|
134 |
+
|
135 |
+
import os
|
136 |
+
|
137 |
+
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
138 |
+
GEMINI_API_URL = "https://api.openai.com/v1/engines/davinci-codex/completions"
|
139 |
+
|
140 |
+
|
141 |
+
def call_gemini_api(prompt: str) -> str:
|
142 |
+
headers = {
|
143 |
+
"Authorization": f"Bearer {GEMINI_API_KEY}",
|
144 |
+
"Content-Type": "application/json",
|
145 |
+
}
|
146 |
+
|
147 |
+
payload = {
|
148 |
+
"prompt": prompt,
|
149 |
+
"max_tokens": 150,
|
150 |
+
"temperature": 0.7,
|
151 |
+
}
|
152 |
+
|
153 |
+
try:
|
154 |
+
model = genai.GenerativeModel("gemini-pro")
|
155 |
+
response = model.generate_content(prompt)
|
156 |
+
return response.text
|
157 |
+
except Exception as e:
|
158 |
+
st.error(f"Gemini API Error: {str(e)}")
|
159 |
+
return ""
|
160 |
+
|
161 |
+
|
162 |
+
def generate_gaps_paragraph(gaps):
|
163 |
+
prompt = f"Generate a brief paragraph about the gaps in the research based on the following gaps: {', '.join(gaps)}"
|
164 |
+
return call_gemini_api(prompt)
|
165 |
+
|
166 |
+
|
167 |
+
def generate_insights(G, topic):
|
168 |
+
papers = search_papers(topic, 5)
|
169 |
+
if papers:
|
170 |
+
st.write("### Research Insights from Perplexity API")
|
171 |
+
for paper in papers:
|
172 |
+
st.write(f"**Title:** {paper['Title']}")
|
173 |
+
st.write(f"**Abstract:** {paper['Abstract']}")
|
174 |
+
st.write(f"**Keywords:** {paper['Keywords']}")
|
175 |
+
st.write("---")
|
176 |
+
|
177 |
+
nodes = list(G.nodes(data=True))
|
178 |
+
insights = {}
|
179 |
+
insights["Strong Points"] = [
|
180 |
+
n for n, d in nodes if G.degree(n) > len(G.nodes) * 0.1
|
181 |
+
]
|
182 |
+
insights["Weak Points"] = [n for n, d in nodes if G.degree(n) < len(G.nodes) * 0.05]
|
183 |
+
insights["Gaps"] = [n for n, d in nodes if len(list(nx.neighbors(G, n))) == 0]
|
184 |
+
|
185 |
+
st.write("### Graph-Based Insights")
|
186 |
+
st.write("**Strong Points:**", insights["Strong Points"])
|
187 |
+
st.write("**Weak Points:**", insights["Weak Points"])
|
188 |
+
st.write("**Gaps:**", insights["Gaps"])
|
189 |
+
|
190 |
+
if insights["Gaps"]:
|
191 |
+
with st.spinner("Generating insights about gaps..."):
|
192 |
+
gaps_paragraph = generate_gaps_paragraph(insights["Gaps"])
|
193 |
+
if gaps_paragraph:
|
194 |
+
st.write("### Gaps in Research")
|
195 |
+
st.write(gaps_paragraph)
|
196 |
+
|
197 |
+
|
198 |
+
def main():
|
199 |
+
st.title("Advanced Interactive Knowledge Graph")
|
200 |
+
st.write(
|
201 |
+
"Upload a CSV file to generate a fully interactive and insightful knowledge graph."
|
202 |
+
)
|
203 |
+
|
204 |
+
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
|
205 |
+
|
206 |
+
if uploaded_file is not None:
|
207 |
+
try:
|
208 |
+
data = pd.read_csv(uploaded_file)
|
209 |
+
st.write("Preview of the uploaded data:")
|
210 |
+
st.dataframe(data.head())
|
211 |
+
|
212 |
+
G = create_knowledge_graph(data)
|
213 |
+
|
214 |
+
st.write("Generated Knowledge Graph:")
|
215 |
+
plot = render_graph_bokeh(G)
|
216 |
+
st.bokeh_chart(plot, use_container_width=True)
|
217 |
+
|
218 |
+
topic = st.text_input(
|
219 |
+
"Enter a topic for additional insights:", "knowledge graphs"
|
220 |
+
)
|
221 |
+
if topic:
|
222 |
+
generate_insights(G, topic)
|
223 |
+
|
224 |
+
except Exception as e:
|
225 |
+
st.error(f"An error occurred while processing the file: {e}")
|
226 |
+
else:
|
227 |
+
st.info("Please upload a CSV file to get started.")
|
228 |
+
|
229 |
+
|
230 |
+
if __name__ == "__main__":
|
231 |
+
main()
|
keywords_database_download.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
from pymongo import MongoClient
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
import os
|
6 |
+
import json
|
7 |
+
import re
|
8 |
+
|
9 |
+
# 1. Load environment variables
|
10 |
+
load_dotenv()
|
11 |
+
MONGODB_URI = os.getenv(
|
12 |
+
"MONGODB_UR",
|
13 |
+
"mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
|
14 |
+
)
|
15 |
+
# 2. Create MongoDB connection
|
16 |
+
client = MongoClient(MONGODB_URI)
|
17 |
+
db = client["novascholar_db"]
|
18 |
+
collection = db["research_papers"]
|
19 |
+
|
20 |
+
|
21 |
+
def convert_mixed_columns(df: pd.DataFrame) -> pd.DataFrame:
|
22 |
+
"""
|
23 |
+
Convert any columns that contain lists into comma-separated strings
|
24 |
+
in order to ensure consistent data types for CSV export.
|
25 |
+
"""
|
26 |
+
for col in df.columns:
|
27 |
+
if any(isinstance(val, list) for val in df[col].dropna()):
|
28 |
+
df[col] = df[col].apply(
|
29 |
+
lambda x: (
|
30 |
+
", ".join(map(str, x))
|
31 |
+
if isinstance(x, list)
|
32 |
+
else (str(x) if pd.notna(x) else "")
|
33 |
+
)
|
34 |
+
)
|
35 |
+
return df
|
36 |
+
|
37 |
+
|
38 |
+
def filter_and_export_collection_to_csv(keyword: str, doc_collection=None):
|
39 |
+
"""
|
40 |
+
Find documents in the given collection with a matching keyword
|
41 |
+
in the 'Keywords' field, export them to CSV, and return the DataFrame
|
42 |
+
and CSV filename.
|
43 |
+
"""
|
44 |
+
# Use the default 'research_papers' collection if none provided
|
45 |
+
if doc_collection is None:
|
46 |
+
doc_collection = collection
|
47 |
+
|
48 |
+
docs = list(doc_collection.find({"Keywords": {"$regex": keyword, "$options": "i"}}))
|
49 |
+
if docs:
|
50 |
+
df = pd.DataFrame(docs)
|
51 |
+
df = convert_mixed_columns(df)
|
52 |
+
csv_filename = "papers_filtered_export.csv"
|
53 |
+
df.to_csv(csv_filename, index=False)
|
54 |
+
return df, csv_filename
|
55 |
+
else:
|
56 |
+
# Return an empty DataFrame if no documents found
|
57 |
+
return pd.DataFrame(), None
|
58 |
+
|
59 |
+
|
60 |
+
def main():
|
61 |
+
# st.set_page_config(page_title="Filter and Export Papers", layout="wide")
|
62 |
+
st.title("Filter and Export Papers by Keyword")
|
63 |
+
|
64 |
+
# Let user select the paper type
|
65 |
+
paper_type = st.selectbox(
|
66 |
+
"Select type of research paper:",
|
67 |
+
[
|
68 |
+
"Review Based Paper",
|
69 |
+
"Opinion/Perspective Based Paper",
|
70 |
+
"Empirical Research Paper",
|
71 |
+
"Research Paper (Other)",
|
72 |
+
],
|
73 |
+
)
|
74 |
+
|
75 |
+
# 5. Let user enter the keyword to filter
|
76 |
+
keyword_input = st.text_input(
|
77 |
+
"Enter the exact keyword to filter papers by 'Keywords' field:"
|
78 |
+
)
|
79 |
+
|
80 |
+
# When user clicks button, use the collection for the selected paper type
|
81 |
+
if st.button("Export Filtered Papers to CSV"):
|
82 |
+
with st.spinner("Exporting filtered documents..."):
|
83 |
+
try:
|
84 |
+
# Determine dynamic collection based on paper type
|
85 |
+
collection_name = paper_type.replace(" ", "_").lower()
|
86 |
+
doc_collection = db[collection_name]
|
87 |
+
|
88 |
+
df, csv_filename = filter_and_export_collection_to_csv(
|
89 |
+
keyword_input, doc_collection
|
90 |
+
)
|
91 |
+
if not df.empty and csv_filename:
|
92 |
+
st.success(
|
93 |
+
f"Successfully exported filtered papers to {csv_filename}!"
|
94 |
+
)
|
95 |
+
st.write("Preview of the filtered DataFrame:")
|
96 |
+
st.dataframe(df)
|
97 |
+
else:
|
98 |
+
st.warning("No matching documents found for that keyword.")
|
99 |
+
except Exception as e:
|
100 |
+
st.error(f"Error exporting filtered papers: {str(e)}")
|
101 |
+
|
102 |
+
|
103 |
+
if __name__ == "__main__":
|
104 |
+
main()
|
live_polls.py
CHANGED
@@ -1,115 +1,115 @@
|
|
1 |
-
# live_poll_feature.py
|
2 |
-
|
3 |
-
import streamlit as st
|
4 |
-
import pandas as pd
|
5 |
-
from datetime import datetime
|
6 |
-
from poll_db_operations import PollDatabase
|
7 |
-
|
8 |
-
class LivePollFeature:
|
9 |
-
def __init__(self):
|
10 |
-
self.db = PollDatabase()
|
11 |
-
|
12 |
-
def display_faculty_interface(self, session_id):
|
13 |
-
"""Display the faculty interface for managing polls"""
|
14 |
-
st.subheader("Live Polls Management")
|
15 |
-
|
16 |
-
# Create new poll
|
17 |
-
with st.expander("Create New Poll", expanded=False):
|
18 |
-
question = st.text_input("Poll Question")
|
19 |
-
|
20 |
-
num_options = st.number_input("Number of Options",
|
21 |
-
min_value=2,
|
22 |
-
max_value=6,
|
23 |
-
value=4)
|
24 |
-
|
25 |
-
options = []
|
26 |
-
for i in range(num_options):
|
27 |
-
option = st.text_input(f"Option {i+1}",
|
28 |
-
key=f"option_{i}")
|
29 |
-
if option:
|
30 |
-
options.append(option)
|
31 |
-
|
32 |
-
if st.button("Create Poll") and question and len(options) >= 2:
|
33 |
-
self.db.create_poll(
|
34 |
-
st.session_state.selected_course,
|
35 |
-
session_id,
|
36 |
-
question,
|
37 |
-
options,
|
38 |
-
st.session_state.user_id
|
39 |
-
)
|
40 |
-
st.success("Poll created successfully!")
|
41 |
-
st.rerun()
|
42 |
-
|
43 |
-
# Display active polls
|
44 |
-
active_polls = self.db.get_active_polls(session_id)
|
45 |
-
if active_polls:
|
46 |
-
st.subheader("Active Polls")
|
47 |
-
for poll in active_polls:
|
48 |
-
with st.expander(f"Poll: {poll['question']}", expanded=True):
|
49 |
-
# Display results
|
50 |
-
self._display_poll_results(poll)
|
51 |
-
|
52 |
-
if st.button("Close Poll",
|
53 |
-
key=f"close_{str(poll['_id'])}"):
|
54 |
-
self.db.close_poll(poll['_id'])
|
55 |
-
st.success("Poll closed successfully!")
|
56 |
-
st.rerun()
|
57 |
-
|
58 |
-
def display_student_interface(self, session_id):
|
59 |
-
"""Display the student interface for participating in polls"""
|
60 |
-
st.subheader("Live Polls")
|
61 |
-
|
62 |
-
active_polls = self.db.get_active_polls(session_id)
|
63 |
-
if not active_polls:
|
64 |
-
st.info("No active polls at the moment.")
|
65 |
-
return
|
66 |
-
|
67 |
-
for poll in active_polls:
|
68 |
-
with st.expander(f"Poll: {poll['question']}", expanded=True):
|
69 |
-
selected_option = st.radio(
|
70 |
-
"Your response:",
|
71 |
-
options=poll['options'],
|
72 |
-
key=f"poll_{str(poll['_id'])}"
|
73 |
-
)
|
74 |
-
|
75 |
-
if st.button("Submit Response",
|
76 |
-
key=f"submit_{str(poll['_id'])}"):
|
77 |
-
success, message = self.db.submit_response(
|
78 |
-
poll['_id'],
|
79 |
-
st.session_state.user_id,
|
80 |
-
selected_option
|
81 |
-
)
|
82 |
-
if success:
|
83 |
-
st.success(message)
|
84 |
-
else:
|
85 |
-
st.warning(message)
|
86 |
-
st.rerun()
|
87 |
-
|
88 |
-
# self._display_poll_results(poll)
|
89 |
-
|
90 |
-
def _display_poll_results(self, poll):
|
91 |
-
"""Helper method to display poll results"""
|
92 |
-
responses_df = pd.DataFrame(
|
93 |
-
list(poll['responses'].items()),
|
94 |
-
columns=['Option', 'Votes']
|
95 |
-
)
|
96 |
-
|
97 |
-
total_votes = responses_df['Votes'].sum()
|
98 |
-
|
99 |
-
# Calculate percentages
|
100 |
-
if total_votes > 0:
|
101 |
-
responses_df['Percentage'] = (
|
102 |
-
responses_df['Votes'] / total_votes * 100
|
103 |
-
).round(1)
|
104 |
-
else:
|
105 |
-
responses_df['Percentage'] = 0
|
106 |
-
|
107 |
-
# Display metrics
|
108 |
-
st.metric("Total Responses", total_votes)
|
109 |
-
|
110 |
-
# Display charts
|
111 |
-
st.bar_chart(responses_df.set_index('Option')['Votes'])
|
112 |
-
|
113 |
-
# Display detailed statistics
|
114 |
-
if st.session_state.user_type == 'faculty':
|
115 |
st.dataframe(responses_df)
|
|
|
1 |
+
# live_poll_feature.py
|
2 |
+
|
3 |
+
import streamlit as st
|
4 |
+
import pandas as pd
|
5 |
+
from datetime import datetime
|
6 |
+
from poll_db_operations import PollDatabase
|
7 |
+
|
8 |
+
class LivePollFeature:
|
9 |
+
def __init__(self):
|
10 |
+
self.db = PollDatabase()
|
11 |
+
|
12 |
+
def display_faculty_interface(self, session_id):
|
13 |
+
"""Display the faculty interface for managing polls"""
|
14 |
+
st.subheader("Live Polls Management")
|
15 |
+
|
16 |
+
# Create new poll
|
17 |
+
with st.expander("Create New Poll", expanded=False):
|
18 |
+
question = st.text_input("Poll Question")
|
19 |
+
|
20 |
+
num_options = st.number_input("Number of Options",
|
21 |
+
min_value=2,
|
22 |
+
max_value=6,
|
23 |
+
value=4)
|
24 |
+
|
25 |
+
options = []
|
26 |
+
for i in range(num_options):
|
27 |
+
option = st.text_input(f"Option {i+1}",
|
28 |
+
key=f"option_{i}")
|
29 |
+
if option:
|
30 |
+
options.append(option)
|
31 |
+
|
32 |
+
if st.button("Create Poll") and question and len(options) >= 2:
|
33 |
+
self.db.create_poll(
|
34 |
+
st.session_state.selected_course,
|
35 |
+
session_id,
|
36 |
+
question,
|
37 |
+
options,
|
38 |
+
st.session_state.user_id
|
39 |
+
)
|
40 |
+
st.success("Poll created successfully!")
|
41 |
+
st.rerun()
|
42 |
+
|
43 |
+
# Display active polls
|
44 |
+
active_polls = self.db.get_active_polls(session_id)
|
45 |
+
if active_polls:
|
46 |
+
st.subheader("Active Polls")
|
47 |
+
for poll in active_polls:
|
48 |
+
with st.expander(f"Poll: {poll['question']}", expanded=True):
|
49 |
+
# Display results
|
50 |
+
self._display_poll_results(poll)
|
51 |
+
|
52 |
+
if st.button("Close Poll",
|
53 |
+
key=f"close_{str(poll['_id'])}"):
|
54 |
+
self.db.close_poll(poll['_id'])
|
55 |
+
st.success("Poll closed successfully!")
|
56 |
+
st.rerun()
|
57 |
+
|
58 |
+
def display_student_interface(self, session_id):
|
59 |
+
"""Display the student interface for participating in polls"""
|
60 |
+
st.subheader("Live Polls")
|
61 |
+
|
62 |
+
active_polls = self.db.get_active_polls(session_id)
|
63 |
+
if not active_polls:
|
64 |
+
st.info("No active polls at the moment.")
|
65 |
+
return
|
66 |
+
|
67 |
+
for poll in active_polls:
|
68 |
+
with st.expander(f"Poll: {poll['question']}", expanded=True):
|
69 |
+
selected_option = st.radio(
|
70 |
+
"Your response:",
|
71 |
+
options=poll['options'],
|
72 |
+
key=f"poll_{str(poll['_id'])}"
|
73 |
+
)
|
74 |
+
|
75 |
+
if st.button("Submit Response",
|
76 |
+
key=f"submit_{str(poll['_id'])}"):
|
77 |
+
success, message = self.db.submit_response(
|
78 |
+
poll['_id'],
|
79 |
+
st.session_state.user_id,
|
80 |
+
selected_option
|
81 |
+
)
|
82 |
+
if success:
|
83 |
+
st.success(message)
|
84 |
+
else:
|
85 |
+
st.warning(message)
|
86 |
+
st.rerun()
|
87 |
+
|
88 |
+
# self._display_poll_results(poll)
|
89 |
+
|
90 |
+
def _display_poll_results(self, poll):
|
91 |
+
"""Helper method to display poll results"""
|
92 |
+
responses_df = pd.DataFrame(
|
93 |
+
list(poll['responses'].items()),
|
94 |
+
columns=['Option', 'Votes']
|
95 |
+
)
|
96 |
+
|
97 |
+
total_votes = responses_df['Votes'].sum()
|
98 |
+
|
99 |
+
# Calculate percentages
|
100 |
+
if total_votes > 0:
|
101 |
+
responses_df['Percentage'] = (
|
102 |
+
responses_df['Votes'] / total_votes * 100
|
103 |
+
).round(1)
|
104 |
+
else:
|
105 |
+
responses_df['Percentage'] = 0
|
106 |
+
|
107 |
+
# Display metrics
|
108 |
+
st.metric("Total Responses", total_votes)
|
109 |
+
|
110 |
+
# Display charts
|
111 |
+
st.bar_chart(responses_df.set_index('Option')['Votes'])
|
112 |
+
|
113 |
+
# Display detailed statistics
|
114 |
+
if st.session_state.user_type == 'faculty':
|
115 |
st.dataframe(responses_df)
|
loldude.py
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
5 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
6 |
+
import plotly.express as px
|
7 |
+
import plotly.graph_objects as go
|
8 |
+
from collections import defaultdict
|
9 |
+
|
10 |
+
def load_and_preprocess_data(uploaded_file):
|
11 |
+
"""Load and preprocess the CSV data."""
|
12 |
+
df = pd.read_csv(uploaded_file)
|
13 |
+
# Combine relevant text fields for similarity comparison
|
14 |
+
df['combined_text'] = df['Title'] + ' ' + df['Abstract'] + ' ' + df['Keywords']
|
15 |
+
return df
|
16 |
+
|
17 |
+
def calculate_similarity_matrix(df):
|
18 |
+
"""Calculate cosine similarity matrix based on combined text."""
|
19 |
+
tfidf = TfidfVectorizer(stop_words='english')
|
20 |
+
tfidf_matrix = tfidf.fit_transform(df['combined_text'])
|
21 |
+
similarity_matrix = cosine_similarity(tfidf_matrix)
|
22 |
+
return similarity_matrix
|
23 |
+
|
24 |
+
def find_similar_papers(similarity_matrix, df, threshold=0.7):
|
25 |
+
"""Find pairs of papers with similarity above threshold."""
|
26 |
+
similar_pairs = []
|
27 |
+
for i in range(len(similarity_matrix)):
|
28 |
+
for j in range(i + 1, len(similarity_matrix)):
|
29 |
+
similarity = similarity_matrix[i][j]
|
30 |
+
if similarity >= threshold:
|
31 |
+
similar_pairs.append({
|
32 |
+
'Paper 1': df.iloc[i]['Title'],
|
33 |
+
'Paper 2': df.iloc[j]['Title'],
|
34 |
+
'Similarity': similarity
|
35 |
+
})
|
36 |
+
return pd.DataFrame(similar_pairs)
|
37 |
+
|
38 |
+
def find_outliers(similarity_matrix, df, threshold=0.3):
|
39 |
+
"""Find papers with low average similarity to others."""
|
40 |
+
avg_similarities = np.mean(similarity_matrix, axis=1)
|
41 |
+
outliers = []
|
42 |
+
for i, avg_sim in enumerate(avg_similarities):
|
43 |
+
if avg_sim < threshold:
|
44 |
+
outliers.append({
|
45 |
+
'Title': df.iloc[i]['Title'],
|
46 |
+
'Average Similarity': avg_sim
|
47 |
+
})
|
48 |
+
return pd.DataFrame(outliers)
|
49 |
+
|
50 |
+
def create_similarity_heatmap(similarity_matrix, df):
|
51 |
+
"""Create a heatmap of similarity matrix."""
|
52 |
+
fig = go.Figure(data=go.Heatmap(
|
53 |
+
z=similarity_matrix,
|
54 |
+
x=df['Title'],
|
55 |
+
y=df['Title'],
|
56 |
+
colorscale='Viridis'
|
57 |
+
))
|
58 |
+
fig.update_layout(
|
59 |
+
title='Paper Similarity Heatmap',
|
60 |
+
xaxis_tickangle=-45,
|
61 |
+
height=800
|
62 |
+
)
|
63 |
+
return fig
|
64 |
+
|
65 |
+
def analyze_keywords(df):
|
66 |
+
"""Analyze keyword frequency across papers."""
|
67 |
+
keyword_freq = defaultdict(int)
|
68 |
+
for keywords in df['Keywords']:
|
69 |
+
if isinstance(keywords, str):
|
70 |
+
for keyword in keywords.split(','):
|
71 |
+
keyword = keyword.strip()
|
72 |
+
keyword_freq[keyword] += 1
|
73 |
+
|
74 |
+
keyword_df = pd.DataFrame([
|
75 |
+
{'Keyword': k, 'Frequency': v}
|
76 |
+
for k, v in keyword_freq.items()
|
77 |
+
]).sort_values('Frequency', ascending=False)
|
78 |
+
|
79 |
+
return keyword_df
|
80 |
+
|
81 |
+
def main():
|
82 |
+
st.title('Research Papers Similarity Analysis')
|
83 |
+
|
84 |
+
uploaded_file = st.file_uploader("Upload your research papers CSV file", type=['csv'])
|
85 |
+
|
86 |
+
if uploaded_file is not None:
|
87 |
+
df = load_and_preprocess_data(uploaded_file)
|
88 |
+
similarity_matrix = calculate_similarity_matrix(df)
|
89 |
+
|
90 |
+
st.header('Document Similarity Analysis')
|
91 |
+
|
92 |
+
# Similarity Heatmap
|
93 |
+
st.subheader('Similarity Heatmap')
|
94 |
+
heatmap = create_similarity_heatmap(similarity_matrix, df)
|
95 |
+
st.plotly_chart(heatmap, use_container_width=True)
|
96 |
+
|
97 |
+
# Similar Papers
|
98 |
+
st.subheader('Similar Papers')
|
99 |
+
similarity_threshold = st.slider('Similarity Threshold', 0.0, 1.0, 0.7)
|
100 |
+
similar_papers = find_similar_papers(similarity_matrix, df, similarity_threshold)
|
101 |
+
if not similar_papers.empty:
|
102 |
+
st.dataframe(similar_papers)
|
103 |
+
else:
|
104 |
+
st.write("No papers found above the similarity threshold.")
|
105 |
+
|
106 |
+
# Outliers
|
107 |
+
st.subheader('Outlier Papers')
|
108 |
+
outlier_threshold = st.slider('Outlier Threshold', 0.0, 1.0, 0.3)
|
109 |
+
outliers = find_outliers(similarity_matrix, df, outlier_threshold)
|
110 |
+
if not outliers.empty:
|
111 |
+
st.dataframe(outliers)
|
112 |
+
else:
|
113 |
+
st.write("No outliers found below the threshold.")
|
114 |
+
|
115 |
+
# Keyword Analysis
|
116 |
+
st.header('Keyword Analysis')
|
117 |
+
keyword_freq = analyze_keywords(df)
|
118 |
+
if not keyword_freq.empty:
|
119 |
+
fig = px.bar(keyword_freq, x='Keyword', y='Frequency',
|
120 |
+
title='Keyword Frequency Across Papers')
|
121 |
+
fig.update_xaxes(tickangle=45)
|
122 |
+
st.plotly_chart(fig, use_container_width=True)
|
123 |
+
|
124 |
+
# Basic Statistics
|
125 |
+
st.header('Basic Statistics')
|
126 |
+
col1, col2 = st.columns(2)
|
127 |
+
with col1:
|
128 |
+
st.metric("Total Papers", len(df))
|
129 |
+
st.metric("Average Similarity", f"{np.mean(similarity_matrix):.2f}")
|
130 |
+
with col2:
|
131 |
+
st.metric("Unique Keywords", len(keyword_freq))
|
132 |
+
st.metric("Max Similarity", f"{np.max(similarity_matrix[~np.eye(similarity_matrix.shape[0], dtype=bool)]):.2f}")
|
133 |
+
|
134 |
+
if __name__ == "__main__":
|
135 |
+
main()
|
modify_schema.py
CHANGED
@@ -1,222 +1,222 @@
|
|
1 |
-
from db import courses_collection2
|
2 |
-
from dotenv import load_dotenv
|
3 |
-
import os
|
4 |
-
from pymongo import MongoClient
|
5 |
-
from datetime import datetime
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
load_dotenv()
|
10 |
-
MONGO_URI = os.getenv("MONGO_URI")
|
11 |
-
|
12 |
-
client = MongoClient(MONGO_URI)
|
13 |
-
db = client["novascholar_db"]
|
14 |
-
|
15 |
-
# Define the updated course schema
|
16 |
-
updated_course_schema = {
|
17 |
-
"bsonType": "object",
|
18 |
-
"required": [
|
19 |
-
"course_id",
|
20 |
-
"title",
|
21 |
-
"description",
|
22 |
-
"faculty",
|
23 |
-
"faculty_id",
|
24 |
-
"duration",
|
25 |
-
"created_at",
|
26 |
-
],
|
27 |
-
"properties": {
|
28 |
-
"course_id": {
|
29 |
-
"bsonType": "string",
|
30 |
-
"description": "Unique identifier for the course",
|
31 |
-
},
|
32 |
-
"title": {"bsonType": "string", "description": "Title of the course"},
|
33 |
-
"description": {
|
34 |
-
"bsonType": "string",
|
35 |
-
"description": "Description of the course",
|
36 |
-
},
|
37 |
-
"faculty": {"bsonType": "string", "description": "Name of the faculty"},
|
38 |
-
"duration": {"bsonType": "string", "description": "Duration of the course"},
|
39 |
-
"created_at": {
|
40 |
-
"bsonType": "date",
|
41 |
-
"description": "Date when the course was created",
|
42 |
-
},
|
43 |
-
"sessions": {
|
44 |
-
"bsonType": "array",
|
45 |
-
"description": "List of sessions associated with the course",
|
46 |
-
"items": {
|
47 |
-
"bsonType": "object",
|
48 |
-
"required": ["session_id", "title", "date"],
|
49 |
-
"properties": {
|
50 |
-
"session_id": {
|
51 |
-
"bsonType": "string",
|
52 |
-
"description": "Unique identifier for the session",
|
53 |
-
},
|
54 |
-
"title": {
|
55 |
-
"bsonType": "string",
|
56 |
-
"description": "Title of the session",
|
57 |
-
},
|
58 |
-
"date": {"bsonType": "date", "description": "Date of the session"},
|
59 |
-
"status": {
|
60 |
-
"bsonType": "string",
|
61 |
-
"description": "Status of the session (e.g., completed, upcoming)",
|
62 |
-
},
|
63 |
-
"created_at": {
|
64 |
-
"bsonType": "date",
|
65 |
-
"description": "Date when the session was created",
|
66 |
-
},
|
67 |
-
"pre_class": {
|
68 |
-
"bsonType": "object",
|
69 |
-
"description": "Pre-class segment data",
|
70 |
-
"properties": {
|
71 |
-
"resources": {
|
72 |
-
"bsonType": "array",
|
73 |
-
"description": "List of pre-class resources",
|
74 |
-
"items": {
|
75 |
-
"bsonType": "object",
|
76 |
-
"required": ["type", "title", "url"],
|
77 |
-
"properties": {
|
78 |
-
"type": {
|
79 |
-
"bsonType": "string",
|
80 |
-
"description": "Type of resource (e.g., pdf, video)",
|
81 |
-
},
|
82 |
-
"title": {
|
83 |
-
"bsonType": "string",
|
84 |
-
"description": "Title of the resource",
|
85 |
-
},
|
86 |
-
"url": {
|
87 |
-
"bsonType": "string",
|
88 |
-
"description": "URL of the resource",
|
89 |
-
},
|
90 |
-
"vector": {
|
91 |
-
"bsonType": "array",
|
92 |
-
"description": "Vector representation of the resource",
|
93 |
-
"items": {"bsonType": "double"},
|
94 |
-
},
|
95 |
-
},
|
96 |
-
},
|
97 |
-
},
|
98 |
-
"completion_required": {
|
99 |
-
"bsonType": "bool",
|
100 |
-
"description": "Indicates if completion of pre-class resources is required",
|
101 |
-
},
|
102 |
-
},
|
103 |
-
},
|
104 |
-
"in_class": {
|
105 |
-
"bsonType": "object",
|
106 |
-
"description": "In-class segment data",
|
107 |
-
"properties": {
|
108 |
-
"topics": {
|
109 |
-
"bsonType": "array",
|
110 |
-
"description": "List of topics covered in the session",
|
111 |
-
"items": {"bsonType": "string"},
|
112 |
-
},
|
113 |
-
"quiz": {
|
114 |
-
"bsonType": "object",
|
115 |
-
"description": "Quiz data",
|
116 |
-
"properties": {
|
117 |
-
"title": {
|
118 |
-
"bsonType": "string",
|
119 |
-
"description": "Title of the quiz",
|
120 |
-
},
|
121 |
-
"questions": {
|
122 |
-
"bsonType": "int",
|
123 |
-
"description": "Number of questions in the quiz",
|
124 |
-
},
|
125 |
-
"duration": {
|
126 |
-
"bsonType": "int",
|
127 |
-
"description": "Duration of the quiz in minutes",
|
128 |
-
},
|
129 |
-
},
|
130 |
-
},
|
131 |
-
"polls": {
|
132 |
-
"bsonType": "array",
|
133 |
-
"description": "List of polls conducted during the session",
|
134 |
-
"items": {
|
135 |
-
"bsonType": "object",
|
136 |
-
"required": ["question", "options"],
|
137 |
-
"properties": {
|
138 |
-
"question": {
|
139 |
-
"bsonType": "string",
|
140 |
-
"description": "Poll question",
|
141 |
-
},
|
142 |
-
"options": {
|
143 |
-
"bsonType": "array",
|
144 |
-
"description": "List of poll options",
|
145 |
-
"items": {"bsonType": "string"},
|
146 |
-
},
|
147 |
-
"responses": {
|
148 |
-
"bsonType": "object",
|
149 |
-
"description": "Responses to the poll",
|
150 |
-
"additionalProperties": {"bsonType": "int"},
|
151 |
-
},
|
152 |
-
},
|
153 |
-
},
|
154 |
-
},
|
155 |
-
},
|
156 |
-
},
|
157 |
-
"post_class": {
|
158 |
-
"bsonType": "object",
|
159 |
-
"description": "Post-class segment data",
|
160 |
-
"properties": {
|
161 |
-
"assignments": {
|
162 |
-
"bsonType": "array",
|
163 |
-
"description": "List of assignments",
|
164 |
-
"items": {
|
165 |
-
"bsonType": "object",
|
166 |
-
"required": ["id", "title", "due_date", "status"],
|
167 |
-
"properties": {
|
168 |
-
"id": {
|
169 |
-
"bsonType": ["objectId", "int"],
|
170 |
-
"description": "Assignment ID",
|
171 |
-
},
|
172 |
-
"title": {
|
173 |
-
"bsonType": "string",
|
174 |
-
"description": "Title of the assignment",
|
175 |
-
},
|
176 |
-
"due_date": {
|
177 |
-
"bsonType": "date",
|
178 |
-
"description": "Due date of the assignment",
|
179 |
-
},
|
180 |
-
"status": {
|
181 |
-
"bsonType": "string",
|
182 |
-
"description": "Status of the assignment (e.g., pending, completed)",
|
183 |
-
},
|
184 |
-
"submissions": {
|
185 |
-
"bsonType": "array",
|
186 |
-
"description": "List of submissions",
|
187 |
-
"items": {
|
188 |
-
"bsonType": "object",
|
189 |
-
"properties": {
|
190 |
-
"student_id": {
|
191 |
-
"bsonType": "objectId",
|
192 |
-
"description": "ID of the student who submitted the assignment",
|
193 |
-
},
|
194 |
-
"file_url": {
|
195 |
-
"bsonType": "string",
|
196 |
-
"description": "URL of the submitted file",
|
197 |
-
},
|
198 |
-
"submitted_at": {
|
199 |
-
"bsonType": "date",
|
200 |
-
"description": "Date when the assignment was submitted",
|
201 |
-
},
|
202 |
-
},
|
203 |
-
},
|
204 |
-
},
|
205 |
-
},
|
206 |
-
},
|
207 |
-
}
|
208 |
-
},
|
209 |
-
},
|
210 |
-
},
|
211 |
-
},
|
212 |
-
},
|
213 |
-
},
|
214 |
-
}
|
215 |
-
|
216 |
-
# Update the schema using the collMod command
|
217 |
-
db.command({
|
218 |
-
"collMod": "courses_collection2",
|
219 |
-
"validator": {"$jsonSchema": updated_course_schema}
|
220 |
-
})
|
221 |
-
|
222 |
print("Schema updated successfully!")
|
|
|
1 |
+
from db import courses_collection2
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
import os
|
4 |
+
from pymongo import MongoClient
|
5 |
+
from datetime import datetime
|
6 |
+
|
7 |
+
|
8 |
+
|
9 |
+
load_dotenv()
|
10 |
+
MONGO_URI = os.getenv("MONGO_URI")
|
11 |
+
|
12 |
+
client = MongoClient(MONGO_URI)
|
13 |
+
db = client["novascholar_db"]
|
14 |
+
|
15 |
+
# Define the updated course schema
|
16 |
+
updated_course_schema = {
|
17 |
+
"bsonType": "object",
|
18 |
+
"required": [
|
19 |
+
"course_id",
|
20 |
+
"title",
|
21 |
+
"description",
|
22 |
+
"faculty",
|
23 |
+
"faculty_id",
|
24 |
+
"duration",
|
25 |
+
"created_at",
|
26 |
+
],
|
27 |
+
"properties": {
|
28 |
+
"course_id": {
|
29 |
+
"bsonType": "string",
|
30 |
+
"description": "Unique identifier for the course",
|
31 |
+
},
|
32 |
+
"title": {"bsonType": "string", "description": "Title of the course"},
|
33 |
+
"description": {
|
34 |
+
"bsonType": "string",
|
35 |
+
"description": "Description of the course",
|
36 |
+
},
|
37 |
+
"faculty": {"bsonType": "string", "description": "Name of the faculty"},
|
38 |
+
"duration": {"bsonType": "string", "description": "Duration of the course"},
|
39 |
+
"created_at": {
|
40 |
+
"bsonType": "date",
|
41 |
+
"description": "Date when the course was created",
|
42 |
+
},
|
43 |
+
"sessions": {
|
44 |
+
"bsonType": "array",
|
45 |
+
"description": "List of sessions associated with the course",
|
46 |
+
"items": {
|
47 |
+
"bsonType": "object",
|
48 |
+
"required": ["session_id", "title", "date"],
|
49 |
+
"properties": {
|
50 |
+
"session_id": {
|
51 |
+
"bsonType": "string",
|
52 |
+
"description": "Unique identifier for the session",
|
53 |
+
},
|
54 |
+
"title": {
|
55 |
+
"bsonType": "string",
|
56 |
+
"description": "Title of the session",
|
57 |
+
},
|
58 |
+
"date": {"bsonType": "date", "description": "Date of the session"},
|
59 |
+
"status": {
|
60 |
+
"bsonType": "string",
|
61 |
+
"description": "Status of the session (e.g., completed, upcoming)",
|
62 |
+
},
|
63 |
+
"created_at": {
|
64 |
+
"bsonType": "date",
|
65 |
+
"description": "Date when the session was created",
|
66 |
+
},
|
67 |
+
"pre_class": {
|
68 |
+
"bsonType": "object",
|
69 |
+
"description": "Pre-class segment data",
|
70 |
+
"properties": {
|
71 |
+
"resources": {
|
72 |
+
"bsonType": "array",
|
73 |
+
"description": "List of pre-class resources",
|
74 |
+
"items": {
|
75 |
+
"bsonType": "object",
|
76 |
+
"required": ["type", "title", "url"],
|
77 |
+
"properties": {
|
78 |
+
"type": {
|
79 |
+
"bsonType": "string",
|
80 |
+
"description": "Type of resource (e.g., pdf, video)",
|
81 |
+
},
|
82 |
+
"title": {
|
83 |
+
"bsonType": "string",
|
84 |
+
"description": "Title of the resource",
|
85 |
+
},
|
86 |
+
"url": {
|
87 |
+
"bsonType": "string",
|
88 |
+
"description": "URL of the resource",
|
89 |
+
},
|
90 |
+
"vector": {
|
91 |
+
"bsonType": "array",
|
92 |
+
"description": "Vector representation of the resource",
|
93 |
+
"items": {"bsonType": "double"},
|
94 |
+
},
|
95 |
+
},
|
96 |
+
},
|
97 |
+
},
|
98 |
+
"completion_required": {
|
99 |
+
"bsonType": "bool",
|
100 |
+
"description": "Indicates if completion of pre-class resources is required",
|
101 |
+
},
|
102 |
+
},
|
103 |
+
},
|
104 |
+
"in_class": {
|
105 |
+
"bsonType": "object",
|
106 |
+
"description": "In-class segment data",
|
107 |
+
"properties": {
|
108 |
+
"topics": {
|
109 |
+
"bsonType": "array",
|
110 |
+
"description": "List of topics covered in the session",
|
111 |
+
"items": {"bsonType": "string"},
|
112 |
+
},
|
113 |
+
"quiz": {
|
114 |
+
"bsonType": "object",
|
115 |
+
"description": "Quiz data",
|
116 |
+
"properties": {
|
117 |
+
"title": {
|
118 |
+
"bsonType": "string",
|
119 |
+
"description": "Title of the quiz",
|
120 |
+
},
|
121 |
+
"questions": {
|
122 |
+
"bsonType": "int",
|
123 |
+
"description": "Number of questions in the quiz",
|
124 |
+
},
|
125 |
+
"duration": {
|
126 |
+
"bsonType": "int",
|
127 |
+
"description": "Duration of the quiz in minutes",
|
128 |
+
},
|
129 |
+
},
|
130 |
+
},
|
131 |
+
"polls": {
|
132 |
+
"bsonType": "array",
|
133 |
+
"description": "List of polls conducted during the session",
|
134 |
+
"items": {
|
135 |
+
"bsonType": "object",
|
136 |
+
"required": ["question", "options"],
|
137 |
+
"properties": {
|
138 |
+
"question": {
|
139 |
+
"bsonType": "string",
|
140 |
+
"description": "Poll question",
|
141 |
+
},
|
142 |
+
"options": {
|
143 |
+
"bsonType": "array",
|
144 |
+
"description": "List of poll options",
|
145 |
+
"items": {"bsonType": "string"},
|
146 |
+
},
|
147 |
+
"responses": {
|
148 |
+
"bsonType": "object",
|
149 |
+
"description": "Responses to the poll",
|
150 |
+
"additionalProperties": {"bsonType": "int"},
|
151 |
+
},
|
152 |
+
},
|
153 |
+
},
|
154 |
+
},
|
155 |
+
},
|
156 |
+
},
|
157 |
+
"post_class": {
|
158 |
+
"bsonType": "object",
|
159 |
+
"description": "Post-class segment data",
|
160 |
+
"properties": {
|
161 |
+
"assignments": {
|
162 |
+
"bsonType": "array",
|
163 |
+
"description": "List of assignments",
|
164 |
+
"items": {
|
165 |
+
"bsonType": "object",
|
166 |
+
"required": ["id", "title", "due_date", "status"],
|
167 |
+
"properties": {
|
168 |
+
"id": {
|
169 |
+
"bsonType": ["objectId", "int"],
|
170 |
+
"description": "Assignment ID",
|
171 |
+
},
|
172 |
+
"title": {
|
173 |
+
"bsonType": "string",
|
174 |
+
"description": "Title of the assignment",
|
175 |
+
},
|
176 |
+
"due_date": {
|
177 |
+
"bsonType": "date",
|
178 |
+
"description": "Due date of the assignment",
|
179 |
+
},
|
180 |
+
"status": {
|
181 |
+
"bsonType": "string",
|
182 |
+
"description": "Status of the assignment (e.g., pending, completed)",
|
183 |
+
},
|
184 |
+
"submissions": {
|
185 |
+
"bsonType": "array",
|
186 |
+
"description": "List of submissions",
|
187 |
+
"items": {
|
188 |
+
"bsonType": "object",
|
189 |
+
"properties": {
|
190 |
+
"student_id": {
|
191 |
+
"bsonType": "objectId",
|
192 |
+
"description": "ID of the student who submitted the assignment",
|
193 |
+
},
|
194 |
+
"file_url": {
|
195 |
+
"bsonType": "string",
|
196 |
+
"description": "URL of the submitted file",
|
197 |
+
},
|
198 |
+
"submitted_at": {
|
199 |
+
"bsonType": "date",
|
200 |
+
"description": "Date when the assignment was submitted",
|
201 |
+
},
|
202 |
+
},
|
203 |
+
},
|
204 |
+
},
|
205 |
+
},
|
206 |
+
},
|
207 |
+
}
|
208 |
+
},
|
209 |
+
},
|
210 |
+
},
|
211 |
+
},
|
212 |
+
},
|
213 |
+
},
|
214 |
+
}
|
215 |
+
|
216 |
+
# Update the schema using the collMod command
|
217 |
+
db.command({
|
218 |
+
"collMod": "courses_collection2",
|
219 |
+
"validator": {"$jsonSchema": updated_course_schema}
|
220 |
+
})
|
221 |
+
|
222 |
print("Schema updated successfully!")
|
new_keywords.py
ADDED
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
from pymongo import MongoClient
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
import os
|
6 |
+
import json
|
7 |
+
import re
|
8 |
+
|
9 |
+
# 1. Load environment variables
|
10 |
+
load_dotenv()
|
11 |
+
MONGODB_URI = os.getenv(
|
12 |
+
"MONGODB_UR",
|
13 |
+
"mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
|
14 |
+
)
|
15 |
+
# 2. Create MongoDB connection
|
16 |
+
client = MongoClient(MONGODB_URI)
|
17 |
+
db = client["novascholar_db"]
|
18 |
+
collection = db["research_papers"]
|
19 |
+
|
20 |
+
|
21 |
+
def convert_mixed_columns(df: pd.DataFrame) -> pd.DataFrame:
|
22 |
+
"""
|
23 |
+
Convert any columns that contain lists into comma-separated strings
|
24 |
+
to ensure consistent data types for CSV export.
|
25 |
+
"""
|
26 |
+
for col in df.columns:
|
27 |
+
if any(isinstance(val, list) for val in df[col].dropna()):
|
28 |
+
df[col] = df[col].apply(
|
29 |
+
lambda x: (
|
30 |
+
", ".join(map(str, x))
|
31 |
+
if isinstance(x, list)
|
32 |
+
else (str(x) if pd.notna(x) else "")
|
33 |
+
)
|
34 |
+
)
|
35 |
+
return df
|
36 |
+
|
37 |
+
|
38 |
+
def filter_and_export_collection_to_csv(keywords_list, doc_collection):
|
39 |
+
"""
|
40 |
+
Fetch documents from the specified collection where the 'Keywords' field
|
41 |
+
matches ANY of the keywords in 'keywords_list'. Convert to DataFrame,
|
42 |
+
ensure consistent column types, save to CSV, and return the DataFrame
|
43 |
+
and CSV filename.
|
44 |
+
"""
|
45 |
+
# 3. Retrieve filtered documents from the collection based on 'Keywords' using $in with regex for substring matching
|
46 |
+
regex_keywords = [f".*{keyword}.*" for keyword in keywords_list]
|
47 |
+
docs = list(
|
48 |
+
doc_collection.find(
|
49 |
+
{"Keywords": {"$regex": "|".join(regex_keywords), "$options": "i"}}
|
50 |
+
)
|
51 |
+
)
|
52 |
+
|
53 |
+
# Convert documents to DataFrame
|
54 |
+
df = pd.DataFrame(docs)
|
55 |
+
|
56 |
+
if not df.empty:
|
57 |
+
# 4. Convert mixed columns
|
58 |
+
df = convert_mixed_columns(df)
|
59 |
+
# 5. Export to CSV
|
60 |
+
csv_filename = "filtered_papers_export.csv"
|
61 |
+
df.to_csv(csv_filename, index=False)
|
62 |
+
return df, csv_filename
|
63 |
+
else:
|
64 |
+
# Return an empty DataFrame and None if no documents found
|
65 |
+
return pd.DataFrame(), None
|
66 |
+
|
67 |
+
|
68 |
+
def main():
|
69 |
+
st.title("Filter and Export Papers by Keyword")
|
70 |
+
|
71 |
+
# Let user select the paper type
|
72 |
+
paper_type = st.selectbox(
|
73 |
+
"Select type of research paper:",
|
74 |
+
[
|
75 |
+
"Review Based Paper",
|
76 |
+
"Opinion/Perspective Based Paper",
|
77 |
+
"Empirical Research Paper",
|
78 |
+
"Research Paper (Other)",
|
79 |
+
],
|
80 |
+
)
|
81 |
+
|
82 |
+
# Let user enter the keyword to filter
|
83 |
+
keyword_input = st.text_input(
|
84 |
+
"Enter the exact keyword to filter papers by 'Keywords' field:"
|
85 |
+
)
|
86 |
+
|
87 |
+
# When user clicks button, use the collection for the selected paper type
|
88 |
+
if st.button("Export Filtered Papers to CSV"):
|
89 |
+
with st.spinner("Exporting filtered documents..."):
|
90 |
+
try:
|
91 |
+
# Determine dynamic collection based on paper type
|
92 |
+
collection_name = paper_type.replace(" ", "_").lower()
|
93 |
+
doc_collection = db[collection_name]
|
94 |
+
|
95 |
+
# Split keywords by commas and strip whitespace
|
96 |
+
keywords_list = [
|
97 |
+
kw.strip() for kw in keyword_input.split(",") if kw.strip()
|
98 |
+
]
|
99 |
+
|
100 |
+
if not keywords_list:
|
101 |
+
st.warning("Please enter at least one keyword.")
|
102 |
+
else:
|
103 |
+
df, csv_filename = filter_and_export_collection_to_csv(
|
104 |
+
keywords_list, doc_collection
|
105 |
+
)
|
106 |
+
if not df.empty and csv_filename:
|
107 |
+
st.success(
|
108 |
+
f"Successfully exported filtered papers to {csv_filename}!"
|
109 |
+
)
|
110 |
+
st.download_button(
|
111 |
+
label="Download CSV",
|
112 |
+
data=df.to_csv(index=False).encode("utf-8"),
|
113 |
+
file_name=csv_filename,
|
114 |
+
mime="text/csv",
|
115 |
+
)
|
116 |
+
st.write("Preview of the filtered DataFrame:")
|
117 |
+
st.dataframe(df)
|
118 |
+
else:
|
119 |
+
st.warning(
|
120 |
+
"No matching documents found for the provided keyword(s)."
|
121 |
+
)
|
122 |
+
except Exception as e:
|
123 |
+
st.error(f"Error exporting filtered papers: {str(e)}")
|
124 |
+
|
125 |
+
|
126 |
+
if __name__ == "__main__":
|
127 |
+
main()
|
new_research_paper.py
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import requests
|
4 |
+
import json
|
5 |
+
import os
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
|
8 |
+
# Load environment variables
|
9 |
+
load_dotenv()
|
10 |
+
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
|
11 |
+
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
|
12 |
+
|
13 |
+
|
14 |
+
def call_perplexity_api(prompt: str) -> str:
|
15 |
+
"""Call Perplexity AI with a prompt, return the text response if successful."""
|
16 |
+
headers = {
|
17 |
+
"Authorization": f"Bearer {PERPLEXITY_API_KEY}",
|
18 |
+
"Content-Type": "application/json",
|
19 |
+
}
|
20 |
+
|
21 |
+
payload = {
|
22 |
+
"model": "llama-3.1-sonar-small-128k-chat",
|
23 |
+
"messages": [{"role": "user", "content": prompt}],
|
24 |
+
"temperature": 0.3,
|
25 |
+
}
|
26 |
+
|
27 |
+
try:
|
28 |
+
response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
|
29 |
+
response.raise_for_status()
|
30 |
+
return response.json()["choices"][0]["message"]["content"]
|
31 |
+
except Exception as e:
|
32 |
+
st.error(f"API Error: {str(e)}")
|
33 |
+
return ""
|
34 |
+
|
35 |
+
|
36 |
+
def generate_research_paper(df: pd.DataFrame) -> dict:
|
37 |
+
"""
|
38 |
+
For each column in the DataFrame, generate a research paper section (200-500 words)
|
39 |
+
that addresses the data in that column. Return a dict mapping column -> text.
|
40 |
+
"""
|
41 |
+
paper_sections = {}
|
42 |
+
for col in df.columns:
|
43 |
+
# Convert all non-null rows in the column to strings and join them for context
|
44 |
+
col_values = df[col].dropna().astype(str).tolist()
|
45 |
+
# We'll truncate if this is huge
|
46 |
+
sample_text = " | ".join(col_values[:50]) # limit to first 50 rows for brevity
|
47 |
+
prompt = f"""
|
48 |
+
Topic: {col}
|
49 |
+
Data Sample: {sample_text}
|
50 |
+
|
51 |
+
Generate a professional research paper section for the above column.
|
52 |
+
The section should be at least 100 words and at most 150 words,
|
53 |
+
focusing on key insights, challenges, and potential research angles.
|
54 |
+
Integrate the data samples as context for the content.
|
55 |
+
"""
|
56 |
+
section_text = call_perplexity_api(prompt)
|
57 |
+
paper_sections[col] = section_text.strip() if section_text else ""
|
58 |
+
return paper_sections
|
59 |
+
|
60 |
+
|
61 |
+
def format_paper(paper_dict: dict) -> str:
|
62 |
+
"""
|
63 |
+
Format the generated paper into a Markdown string.
|
64 |
+
Each column name is used as a heading, and the text is placed under it.
|
65 |
+
"""
|
66 |
+
md_text = "# Generated Research Paper\n\n"
|
67 |
+
for col, content in paper_dict.items():
|
68 |
+
md_text += f"## {col}\n{content}\n\n"
|
69 |
+
return md_text
|
70 |
+
|
71 |
+
|
72 |
+
def main():
|
73 |
+
st.title("Corpus-based Research Paper Generator")
|
74 |
+
|
75 |
+
uploaded_file = st.file_uploader("Upload CSV corpus file", type="csv")
|
76 |
+
if uploaded_file:
|
77 |
+
df = pd.read_csv(uploaded_file)
|
78 |
+
st.write("### Preview of Uploaded Data")
|
79 |
+
st.dataframe(df.head())
|
80 |
+
|
81 |
+
if st.button("Generate Research Paper"):
|
82 |
+
st.info("Generating paper based on the columns of your corpus...")
|
83 |
+
with st.spinner("Calling Perplexity AI..."):
|
84 |
+
paper = generate_research_paper(df)
|
85 |
+
if paper:
|
86 |
+
formatted_paper = format_paper(paper)
|
87 |
+
st.success("Research Paper Generated Successfully!")
|
88 |
+
st.write(formatted_paper)
|
89 |
+
|
90 |
+
st.download_button(
|
91 |
+
label="Download Paper as Markdown",
|
92 |
+
data=formatted_paper,
|
93 |
+
file_name="research_paper.md",
|
94 |
+
mime="text/markdown",
|
95 |
+
)
|
96 |
+
else:
|
97 |
+
st.error(
|
98 |
+
"Paper generation failed. Please check Perplexity API key."
|
99 |
+
)
|
100 |
+
|
101 |
+
|
102 |
+
if __name__ == "__main__":
|
103 |
+
main()
|
poll_db_operations.py
CHANGED
@@ -1,70 +1,70 @@
|
|
1 |
-
from pymongo import MongoClient
|
2 |
-
from datetime import datetime
|
3 |
-
from bson import ObjectId
|
4 |
-
from dotenv import load_dotenv
|
5 |
-
import os
|
6 |
-
|
7 |
-
load_dotenv()
|
8 |
-
MONGO_URI = os.getenv('MONGO_URI')
|
9 |
-
class PollDatabase:
|
10 |
-
def __init__(self):
|
11 |
-
self.client = MongoClient(MONGO_URI)
|
12 |
-
self.db = self.client["novascholar_db"]
|
13 |
-
|
14 |
-
def create_poll(self, course_id, session_id, question, options, faculty_id):
|
15 |
-
"""Create a new poll"""
|
16 |
-
poll = {
|
17 |
-
"course_id": course_id,
|
18 |
-
"session_id": session_id,
|
19 |
-
"faculty_id": faculty_id,
|
20 |
-
"question": question,
|
21 |
-
"options": options,
|
22 |
-
"status": "active",
|
23 |
-
"created_at": datetime.now(),
|
24 |
-
"responses": {option: 0 for option in options}
|
25 |
-
}
|
26 |
-
return self.db.polls.insert_one(poll)
|
27 |
-
|
28 |
-
def get_active_polls(self, session_id):
|
29 |
-
"""Get all active polls for a session"""
|
30 |
-
return list(self.db.polls.find({
|
31 |
-
"session_id": session_id,
|
32 |
-
"status": "active"
|
33 |
-
}))
|
34 |
-
|
35 |
-
def submit_response(self, poll_id, student_id, selected_option):
|
36 |
-
"""Submit a student's response to a poll"""
|
37 |
-
try:
|
38 |
-
# Record individual response
|
39 |
-
response = {
|
40 |
-
"poll_id": poll_id,
|
41 |
-
"student_id": student_id,
|
42 |
-
"selected_option": selected_option,
|
43 |
-
"submitted_at": datetime.now()
|
44 |
-
}
|
45 |
-
self.db.poll_responses.insert_one(response)
|
46 |
-
|
47 |
-
# Update aggregated results
|
48 |
-
self.db.polls.update_one(
|
49 |
-
{"_id": ObjectId(poll_id)},
|
50 |
-
{"$inc": {f"responses.{selected_option}": 1}}
|
51 |
-
)
|
52 |
-
return True, "Vote recorded successfully"
|
53 |
-
|
54 |
-
except Exception as e:
|
55 |
-
if "duplicate key error" in str(e):
|
56 |
-
return False, "You have already voted in this poll"
|
57 |
-
return False, f"Error recording vote: {str(e)}"
|
58 |
-
|
59 |
-
def close_poll(self, poll_id):
|
60 |
-
"""Close a poll"""
|
61 |
-
return self.db.polls.update_one(
|
62 |
-
{"_id": ObjectId(poll_id)},
|
63 |
-
{"$set": {"status": "closed"}}
|
64 |
-
)
|
65 |
-
|
66 |
-
def get_poll_analytics(self, poll_id):
|
67 |
-
"""Get detailed analytics for a poll"""
|
68 |
-
poll = self.db.polls.find_one({"_id": ObjectId(poll_id)})
|
69 |
-
responses = self.db.poll_responses.find({"poll_id": ObjectId(poll_id)})
|
70 |
return poll, list(responses)
|
|
|
1 |
+
from pymongo import MongoClient
|
2 |
+
from datetime import datetime
|
3 |
+
from bson import ObjectId
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
import os
|
6 |
+
|
7 |
+
load_dotenv()
|
8 |
+
MONGO_URI = os.getenv('MONGO_URI')
|
9 |
+
class PollDatabase:
|
10 |
+
def __init__(self):
|
11 |
+
self.client = MongoClient(MONGO_URI)
|
12 |
+
self.db = self.client["novascholar_db"]
|
13 |
+
|
14 |
+
def create_poll(self, course_id, session_id, question, options, faculty_id):
|
15 |
+
"""Create a new poll"""
|
16 |
+
poll = {
|
17 |
+
"course_id": course_id,
|
18 |
+
"session_id": session_id,
|
19 |
+
"faculty_id": faculty_id,
|
20 |
+
"question": question,
|
21 |
+
"options": options,
|
22 |
+
"status": "active",
|
23 |
+
"created_at": datetime.now(),
|
24 |
+
"responses": {option: 0 for option in options}
|
25 |
+
}
|
26 |
+
return self.db.polls.insert_one(poll)
|
27 |
+
|
28 |
+
def get_active_polls(self, session_id):
|
29 |
+
"""Get all active polls for a session"""
|
30 |
+
return list(self.db.polls.find({
|
31 |
+
"session_id": session_id,
|
32 |
+
"status": "active"
|
33 |
+
}))
|
34 |
+
|
35 |
+
def submit_response(self, poll_id, student_id, selected_option):
|
36 |
+
"""Submit a student's response to a poll"""
|
37 |
+
try:
|
38 |
+
# Record individual response
|
39 |
+
response = {
|
40 |
+
"poll_id": poll_id,
|
41 |
+
"student_id": student_id,
|
42 |
+
"selected_option": selected_option,
|
43 |
+
"submitted_at": datetime.now()
|
44 |
+
}
|
45 |
+
self.db.poll_responses.insert_one(response)
|
46 |
+
|
47 |
+
# Update aggregated results
|
48 |
+
self.db.polls.update_one(
|
49 |
+
{"_id": ObjectId(poll_id)},
|
50 |
+
{"$inc": {f"responses.{selected_option}": 1}}
|
51 |
+
)
|
52 |
+
return True, "Vote recorded successfully"
|
53 |
+
|
54 |
+
except Exception as e:
|
55 |
+
if "duplicate key error" in str(e):
|
56 |
+
return False, "You have already voted in this poll"
|
57 |
+
return False, f"Error recording vote: {str(e)}"
|
58 |
+
|
59 |
+
def close_poll(self, poll_id):
|
60 |
+
"""Close a poll"""
|
61 |
+
return self.db.polls.update_one(
|
62 |
+
{"_id": ObjectId(poll_id)},
|
63 |
+
{"$set": {"status": "closed"}}
|
64 |
+
)
|
65 |
+
|
66 |
+
def get_poll_analytics(self, poll_id):
|
67 |
+
"""Get detailed analytics for a poll"""
|
68 |
+
poll = self.db.polls.find_one({"_id": ObjectId(poll_id)})
|
69 |
+
responses = self.db.poll_responses.find({"poll_id": ObjectId(poll_id)})
|
70 |
return poll, list(responses)
|
poll_db_setup.py
CHANGED
@@ -1,35 +1,35 @@
|
|
1 |
-
from pymongo import MongoClient
|
2 |
-
from datetime import datetime
|
3 |
-
from dotenv import load_dotenv
|
4 |
-
import os
|
5 |
-
|
6 |
-
load_dotenv()
|
7 |
-
MONGO_URI = os.getenv('MONGO_URI')
|
8 |
-
def setup_mongodb():
|
9 |
-
"""Initialize MongoDB connection and create collections with indexes"""
|
10 |
-
client = MongoClient(MONGO_URI)
|
11 |
-
db = client["novascholar_db"]
|
12 |
-
|
13 |
-
# Create indexes for polls collection
|
14 |
-
db.polls.create_index([("session_id", 1), ("status", 1)])
|
15 |
-
db.polls.create_index([("course_id", 1)])
|
16 |
-
|
17 |
-
# Create unique index for poll_responses to prevent duplicate votes
|
18 |
-
db.poll_responses.create_index(
|
19 |
-
[("poll_id", 1), ("student_id", 1)],
|
20 |
-
unique=True
|
21 |
-
)
|
22 |
-
|
23 |
-
return "Database setup completed successfully"
|
24 |
-
|
25 |
-
def print_all_polls():
|
26 |
-
"""Print all polls in the database"""
|
27 |
-
client = MongoClient(MONGO_URI)
|
28 |
-
db = client["novascholar_db"]
|
29 |
-
|
30 |
-
polls = db.polls.find()
|
31 |
-
for poll in polls:
|
32 |
-
print(poll)
|
33 |
-
|
34 |
-
if __name__ == "__main__":
|
35 |
print(print_all_polls())
|
|
|
1 |
+
from pymongo import MongoClient
|
2 |
+
from datetime import datetime
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
import os
|
5 |
+
|
6 |
+
load_dotenv()
|
7 |
+
MONGO_URI = os.getenv('MONGO_URI')
|
8 |
+
def setup_mongodb():
|
9 |
+
"""Initialize MongoDB connection and create collections with indexes"""
|
10 |
+
client = MongoClient(MONGO_URI)
|
11 |
+
db = client["novascholar_db"]
|
12 |
+
|
13 |
+
# Create indexes for polls collection
|
14 |
+
db.polls.create_index([("session_id", 1), ("status", 1)])
|
15 |
+
db.polls.create_index([("course_id", 1)])
|
16 |
+
|
17 |
+
# Create unique index for poll_responses to prevent duplicate votes
|
18 |
+
db.poll_responses.create_index(
|
19 |
+
[("poll_id", 1), ("student_id", 1)],
|
20 |
+
unique=True
|
21 |
+
)
|
22 |
+
|
23 |
+
return "Database setup completed successfully"
|
24 |
+
|
25 |
+
def print_all_polls():
|
26 |
+
"""Print all polls in the database"""
|
27 |
+
client = MongoClient(MONGO_URI)
|
28 |
+
db = client["novascholar_db"]
|
29 |
+
|
30 |
+
polls = db.polls.find()
|
31 |
+
for poll in polls:
|
32 |
+
print(poll)
|
33 |
+
|
34 |
+
if __name__ == "__main__":
|
35 |
print(print_all_polls())
|
pre_class_analytics2.py
CHANGED
@@ -1,759 +1,759 @@
|
|
1 |
-
import json
|
2 |
-
import typing_extensions as typing
|
3 |
-
import google.generativeai as genai
|
4 |
-
from typing import List, Dict, Any
|
5 |
-
import numpy as np
|
6 |
-
from collections import defaultdict
|
7 |
-
|
8 |
-
from dotenv import load_dotenv
|
9 |
-
import os
|
10 |
-
import pymongo
|
11 |
-
from pymongo import MongoClient
|
12 |
-
|
13 |
-
load_dotenv()
|
14 |
-
GEMINI_API_KEY = os.getenv('GEMINI_KEY')
|
15 |
-
|
16 |
-
class EngagementMetrics(typing.TypedDict):
|
17 |
-
participation_level: str # "high" | "medium" | "low"
|
18 |
-
question_quality: str # "advanced" | "intermediate" | "basic"
|
19 |
-
concept_understanding: str # "strong" | "moderate" | "needs_improvement"
|
20 |
-
|
21 |
-
class StudentInsight(typing.TypedDict):
|
22 |
-
student_id: str
|
23 |
-
performance_level: str # "high_performer" | "average" | "at_risk"
|
24 |
-
struggling_topics: list[str]
|
25 |
-
engagement_metrics: EngagementMetrics
|
26 |
-
|
27 |
-
class TopicInsight(typing.TypedDict):
|
28 |
-
topic: str
|
29 |
-
difficulty_level: float # 0 to 1
|
30 |
-
student_count: int
|
31 |
-
common_issues: list[str]
|
32 |
-
key_misconceptions: list[str]
|
33 |
-
|
34 |
-
class RecommendedAction(typing.TypedDict):
|
35 |
-
action: str
|
36 |
-
priority: str # "high" | "medium" | "low"
|
37 |
-
target_group: str # "all_students" | "specific_students" | "faculty"
|
38 |
-
reasoning: str
|
39 |
-
expected_impact: str
|
40 |
-
|
41 |
-
class ClassDistribution(typing.TypedDict):
|
42 |
-
high_performers: float
|
43 |
-
average_performers: float
|
44 |
-
at_risk: float
|
45 |
-
|
46 |
-
class CourseHealth(typing.TypedDict):
|
47 |
-
overall_engagement: float # 0 to 1
|
48 |
-
critical_topics: list[str]
|
49 |
-
class_distribution: ClassDistribution
|
50 |
-
|
51 |
-
class InterventionMetrics(typing.TypedDict):
|
52 |
-
immediate_attention_needed: list[str] # student_ids
|
53 |
-
monitoring_required: list[str] # student_ids
|
54 |
-
|
55 |
-
class AnalyticsResponse(typing.TypedDict):
|
56 |
-
topic_insights: list[TopicInsight]
|
57 |
-
student_insights: list[StudentInsight]
|
58 |
-
recommended_actions: list[RecommendedAction]
|
59 |
-
course_health: CourseHealth
|
60 |
-
intervention_metrics: InterventionMetrics
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
class NovaScholarAnalytics:
|
65 |
-
def __init__(self, model_name: str = "gemini-1.5-flash"):
|
66 |
-
genai.configure(api_key=GEMINI_API_KEY)
|
67 |
-
self.model = genai.GenerativeModel(model_name)
|
68 |
-
|
69 |
-
def _create_analytics_prompt(self, chat_histories: List[Dict], all_topics: List[str]) -> str:
|
70 |
-
"""Creates a structured prompt for Gemini to analyze chat histories."""
|
71 |
-
# Prompt 1:
|
72 |
-
# return f"""Analyze these student chat histories for a university course and provide detailed analytics.
|
73 |
-
|
74 |
-
# Context:
|
75 |
-
# - These are pre-class chat interactions between students and an AI tutor
|
76 |
-
# - Topics covered: {', '.join(all_topics)}
|
77 |
-
|
78 |
-
# Chat histories: {json.dumps(chat_histories, indent=2)}
|
79 |
-
|
80 |
-
# Return the analysis in JSON format matching this exact schema:
|
81 |
-
# {AnalyticsResponse.__annotations__}
|
82 |
-
|
83 |
-
# Ensure all numeric values are between 0 and 1 (accuracy upto 3 decimal places) where applicable.
|
84 |
-
|
85 |
-
# Important analysis guidelines:
|
86 |
-
# 1. Identify topics where students show confusion or ask multiple follow-up questions
|
87 |
-
# 2. Look for patterns in question types and complexity
|
88 |
-
# 3. Analyze response understanding based on follow-up questions
|
89 |
-
# 4. Consider both explicit and implicit signs of difficulty
|
90 |
-
# 5. Focus on concept relationships and prerequisite understanding"""
|
91 |
-
|
92 |
-
# Prompt 2:
|
93 |
-
# return f"""Analyze the provided student chat histories for a university course and generate concise, actionable analytics.
|
94 |
-
|
95 |
-
# Context:
|
96 |
-
# - Chat histories: {json.dumps(chat_histories, indent=2)}
|
97 |
-
# - These are pre-class interactions between students and an AI tutor aimed at identifying learning difficulties and improving course delivery.
|
98 |
-
# - Topics covered: {', '.join(all_topics)}.
|
99 |
-
|
100 |
-
# Your task is to extract key insights that will help faculty address challenges effectively and enhance learning outcomes.
|
101 |
-
|
102 |
-
# Output Format:
|
103 |
-
# 1. Topics where students face significant difficulties:
|
104 |
-
# - Provide a ranked list of topics where the majority of students are struggling, based on the frequency and nature of their questions or misconceptions.
|
105 |
-
# - Include the percentage of students who found each topic challenging.
|
106 |
-
|
107 |
-
# 2. AI-recommended actions for faculty:
|
108 |
-
# - Suggest actionable steps to address the difficulties identified in each critical topic.
|
109 |
-
# - Specify the priority of each action (high, medium, low) based on the urgency and impact.
|
110 |
-
# - Explain the reasoning behind each recommendation and its expected impact on student outcomes.
|
111 |
-
|
112 |
-
# 3. Student-specific analytics (focusing on at-risk students):
|
113 |
-
# - Identify students categorized as "at-risk" based on their engagement levels, question complexity, and recurring struggles.
|
114 |
-
# - For each at-risk student, list their top 3 struggling topics and their engagement metrics (participation level, concept understanding).
|
115 |
-
# - Provide personalized recommendations for improving their understanding.
|
116 |
-
|
117 |
-
# Guidelines for Analysis:
|
118 |
-
# - Focus on actionable and concise insights rather than exhaustive details.
|
119 |
-
# - Use both explicit (e.g., direct questions) and implicit (e.g., repeated follow-ups) cues to identify areas of difficulty.
|
120 |
-
# - Prioritize topics with higher difficulty scores or more students struggling.
|
121 |
-
# - Ensure numerical values (e.g., difficulty levels, percentages) are between 0 and 1 where applicable.
|
122 |
-
|
123 |
-
# The response must be well-structured, concise, and highly actionable for faculty to implement improvements effectively."""
|
124 |
-
|
125 |
-
# Prompt 3:
|
126 |
-
return f"""Analyze the provided student chat histories for a university course and generate concise, actionable analytics.
|
127 |
-
Context:
|
128 |
-
- Chat histories: {json.dumps(chat_histories, indent=2)}
|
129 |
-
- These are pre-class interactions between students and an AI tutor aimed at identifying learning difficulties and improving course delivery.
|
130 |
-
- Topics covered: {', '.join(all_topics)}.
|
131 |
-
|
132 |
-
Your task is to provide detailed analytics that will help faculty address challenges effectively and enhance learning outcomes.
|
133 |
-
|
134 |
-
Output Format (strictly follow this JSON structure):
|
135 |
-
{{
|
136 |
-
"topic_wise_insights": [
|
137 |
-
{{
|
138 |
-
"topic": "<string>",
|
139 |
-
"struggling_percentage": <number between 0 and 1>,
|
140 |
-
"key_issues": ["<string>", "<string>", ...],
|
141 |
-
"key_misconceptions": ["<string>", "<string>", ...],
|
142 |
-
"recommended_actions": {{
|
143 |
-
"description": "<string>",
|
144 |
-
"priority": "high|medium|low",
|
145 |
-
"expected_outcome": "<string>"
|
146 |
-
}}
|
147 |
-
}}
|
148 |
-
],
|
149 |
-
"ai_recommended_actions": [
|
150 |
-
{{
|
151 |
-
"action": "<string>",
|
152 |
-
"priority": "high|medium|low",
|
153 |
-
"reasoning": "<string>",
|
154 |
-
"expected_outcome": "<string>",
|
155 |
-
"pedagogy_recommendations": {{
|
156 |
-
"methods": ["<string>", "<string>", ...],
|
157 |
-
"resources": ["<string>", "<string>", ...],
|
158 |
-
"expected_impact": "<string>"
|
159 |
-
}}
|
160 |
-
}}
|
161 |
-
],
|
162 |
-
"student_analytics": [
|
163 |
-
{{
|
164 |
-
"student_id": "<string>",
|
165 |
-
"engagement_metrics": {{
|
166 |
-
"participation_level": <number between 0 and 1>,
|
167 |
-
"concept_understanding": "strong|moderate|needs_improvement",
|
168 |
-
"question_quality": "advanced|intermediate|basic"
|
169 |
-
}},
|
170 |
-
"struggling_topics": ["<string>", "<string>", ...],
|
171 |
-
"personalized_recommendation": "<string>"
|
172 |
-
}}
|
173 |
-
]
|
174 |
-
}}
|
175 |
-
|
176 |
-
Guidelines for Analysis:
|
177 |
-
- Focus on actionable and concise insights rather than exhaustive details.
|
178 |
-
- Use both explicit (e.g., direct questions) and implicit (e.g., repeated follow-ups) cues to identify areas of difficulty.
|
179 |
-
- Prioritize topics with higher difficulty scores or more students struggling.
|
180 |
-
- Ensure numerical values (e.g., difficulty levels, percentages) are between 0 and 1 where applicable.
|
181 |
-
- Make sure to include All** students in the analysis, not just a subset.
|
182 |
-
- for the ai_recommended_actions:
|
183 |
-
- Prioritize pedagogy recommendations for critical topics with the high difficulty scores or struggling percentages.
|
184 |
-
- For each action:
|
185 |
-
- Include specific teaching methods (e.g., interactive discussions or quizzes, problem-based learning, practical examples etc).
|
186 |
-
- Recommend supporting resources (e.g., videos, handouts, simulations).
|
187 |
-
- Provide reasoning for the recommendation and the expected outcomes for student learning.
|
188 |
-
- Example:
|
189 |
-
- **Action:** Conduct an interactive problem-solving session on "<Topic Name>".
|
190 |
-
- **Reasoning:** Students showed difficulty in applying concepts to practical problems.
|
191 |
-
- **Expected Outcome:** Improved practical understanding and application of the topic.
|
192 |
-
- **Pedagogy Recommendations:**
|
193 |
-
- **Methods:** Group discussions, real-world case studies.
|
194 |
-
- **Resources:** Online interactive tools, relevant case studies, video walkthroughs.
|
195 |
-
- **Expected Impact:** Enhance conceptual clarity by 40% and practical application by 30%.
|
196 |
-
|
197 |
-
The response must adhere strictly to the above JSON structure, with all fields populated appropriately."""
|
198 |
-
|
199 |
-
|
200 |
-
def _calculate_class_distribution(self, analytics: Dict) -> Dict:
|
201 |
-
"""Calculate the distribution of students across performance levels."""
|
202 |
-
try:
|
203 |
-
total_students = len(analytics.get("student_insights", []))
|
204 |
-
if total_students == 0:
|
205 |
-
return {
|
206 |
-
"high_performers": 0,
|
207 |
-
"average_performers": 0,
|
208 |
-
"at_risk": 0
|
209 |
-
}
|
210 |
-
|
211 |
-
distribution = defaultdict(int)
|
212 |
-
|
213 |
-
for student in analytics.get("student_insights", []):
|
214 |
-
performance_level = student.get("performance_level", "average")
|
215 |
-
# Map performance levels to our three categories
|
216 |
-
if performance_level in ["excellent", "high", "high_performer"]:
|
217 |
-
distribution["high_performers"] += 1
|
218 |
-
elif performance_level in ["struggling", "low", "at_risk"]:
|
219 |
-
distribution["at_risk"] += 1
|
220 |
-
else:
|
221 |
-
distribution["average_performers"] += 1
|
222 |
-
|
223 |
-
# Convert to percentages
|
224 |
-
return {
|
225 |
-
level: count/total_students
|
226 |
-
for level, count in distribution.items()
|
227 |
-
}
|
228 |
-
except Exception as e:
|
229 |
-
print(f"Error calculating class distribution: {str(e)}")
|
230 |
-
return {
|
231 |
-
"high_performers": 0,
|
232 |
-
"average_performers": 0,
|
233 |
-
"at_risk": 0
|
234 |
-
}
|
235 |
-
|
236 |
-
def _identify_urgent_cases(self, analytics: Dict) -> List[str]:
|
237 |
-
"""Identify students needing immediate attention."""
|
238 |
-
try:
|
239 |
-
urgent_cases = []
|
240 |
-
for student in analytics.get("student_insights", []):
|
241 |
-
student_id = student.get("student_id")
|
242 |
-
if not student_id:
|
243 |
-
continue
|
244 |
-
|
245 |
-
# Check multiple risk factors
|
246 |
-
risk_factors = 0
|
247 |
-
|
248 |
-
# Factor 1: Performance level
|
249 |
-
if student.get("performance_level") in ["struggling", "at_risk", "low"]:
|
250 |
-
risk_factors += 1
|
251 |
-
|
252 |
-
# Factor 2: Number of struggling topics
|
253 |
-
if len(student.get("struggling_topics", [])) >= 2:
|
254 |
-
risk_factors += 1
|
255 |
-
|
256 |
-
# Factor 3: Engagement metrics
|
257 |
-
engagement = student.get("engagement_metrics", {})
|
258 |
-
if (engagement.get("participation_level") == "low" or
|
259 |
-
engagement.get("concept_understanding") == "needs_improvement"):
|
260 |
-
risk_factors += 1
|
261 |
-
|
262 |
-
# If student has multiple risk factors, add to urgent cases
|
263 |
-
if risk_factors >= 2:
|
264 |
-
urgent_cases.append(student_id)
|
265 |
-
|
266 |
-
return urgent_cases
|
267 |
-
except Exception as e:
|
268 |
-
print(f"Error identifying urgent cases: {str(e)}")
|
269 |
-
return []
|
270 |
-
|
271 |
-
def _identify_monitoring_cases(self, analytics: Dict) -> List[str]:
|
272 |
-
"""Identify students who need monitoring but aren't urgent cases."""
|
273 |
-
try:
|
274 |
-
monitoring_cases = []
|
275 |
-
urgent_cases = set(self._identify_urgent_cases(analytics))
|
276 |
-
|
277 |
-
for student in analytics.get("student_insights", []):
|
278 |
-
student_id = student.get("student_id")
|
279 |
-
if not student_id or student_id in urgent_cases:
|
280 |
-
continue
|
281 |
-
|
282 |
-
# Check monitoring criteria
|
283 |
-
monitoring_needed = False
|
284 |
-
|
285 |
-
# Criterion 1: Has some struggling topics but not enough for urgent
|
286 |
-
if len(student.get("struggling_topics", [])) == 1:
|
287 |
-
monitoring_needed = True
|
288 |
-
|
289 |
-
# Criterion 2: Medium-low engagement
|
290 |
-
engagement = student.get("engagement_metrics", {})
|
291 |
-
if engagement.get("participation_level") == "medium":
|
292 |
-
monitoring_needed = True
|
293 |
-
|
294 |
-
# Criterion 3: Recent performance decline
|
295 |
-
if student.get("performance_level") == "average":
|
296 |
-
monitoring_needed = True
|
297 |
-
|
298 |
-
if monitoring_needed:
|
299 |
-
monitoring_cases.append(student_id)
|
300 |
-
|
301 |
-
return monitoring_cases
|
302 |
-
except Exception as e:
|
303 |
-
print(f"Error identifying monitoring cases: {str(e)}")
|
304 |
-
return []
|
305 |
-
|
306 |
-
def _identify_critical_topics(self, analytics: Dict) -> List[str]:
|
307 |
-
"""
|
308 |
-
Identify critical topics that need attention based on multiple factors.
|
309 |
-
Returns a list of topic names that are considered critical.
|
310 |
-
"""
|
311 |
-
try:
|
312 |
-
critical_topics = []
|
313 |
-
topics = analytics.get("topic_insights", [])
|
314 |
-
|
315 |
-
for topic in topics:
|
316 |
-
if not isinstance(topic, dict):
|
317 |
-
continue
|
318 |
-
|
319 |
-
# Initialize score for topic criticality
|
320 |
-
critical_score = 0
|
321 |
-
|
322 |
-
# Factor 1: High difficulty level
|
323 |
-
difficulty_level = topic.get("difficulty_level", 0)
|
324 |
-
if difficulty_level > 0.7:
|
325 |
-
critical_score += 2
|
326 |
-
elif difficulty_level > 0.5:
|
327 |
-
critical_score += 1
|
328 |
-
|
329 |
-
# Factor 2: Number of students struggling
|
330 |
-
student_count = topic.get("student_count", 0)
|
331 |
-
total_students = len(analytics.get("student_insights", []))
|
332 |
-
if total_students > 0:
|
333 |
-
struggle_ratio = student_count / total_students
|
334 |
-
if struggle_ratio > 0.5:
|
335 |
-
critical_score += 2
|
336 |
-
elif struggle_ratio > 0.3:
|
337 |
-
critical_score += 1
|
338 |
-
|
339 |
-
# Factor 3: Number of common issues
|
340 |
-
if len(topic.get("common_issues", [])) > 2:
|
341 |
-
critical_score += 1
|
342 |
-
|
343 |
-
# Factor 4: Number of key misconceptions
|
344 |
-
if len(topic.get("key_misconceptions", [])) > 1:
|
345 |
-
critical_score += 1
|
346 |
-
|
347 |
-
# If topic exceeds threshold, mark as critical
|
348 |
-
if critical_score >= 3:
|
349 |
-
critical_topics.append(topic.get("topic", "Unknown Topic"))
|
350 |
-
|
351 |
-
return critical_topics
|
352 |
-
|
353 |
-
except Exception as e:
|
354 |
-
print(f"Error identifying critical topics: {str(e)}")
|
355 |
-
return []
|
356 |
-
|
357 |
-
def _calculate_engagement(self, analytics: Dict) -> Dict:
|
358 |
-
"""
|
359 |
-
Calculate detailed engagement metrics across all students.
|
360 |
-
Returns a dictionary with engagement statistics.
|
361 |
-
"""
|
362 |
-
try:
|
363 |
-
total_students = len(analytics.get("student_insights", []))
|
364 |
-
if total_students == 0:
|
365 |
-
return {
|
366 |
-
"total_students": 0,
|
367 |
-
"overall_score": 0,
|
368 |
-
"engagement_distribution": {
|
369 |
-
"high": 0,
|
370 |
-
"medium": 0,
|
371 |
-
"low": 0
|
372 |
-
},
|
373 |
-
"participation_metrics": {
|
374 |
-
"average_topics_per_student": 0,
|
375 |
-
"active_participants": 0
|
376 |
-
}
|
377 |
-
}
|
378 |
-
|
379 |
-
engagement_levels = defaultdict(int)
|
380 |
-
total_topics_engaged = 0
|
381 |
-
active_participants = 0
|
382 |
-
|
383 |
-
for student in analytics.get("student_insights", []):
|
384 |
-
# Get engagement metrics
|
385 |
-
metrics = student.get("engagement_metrics", {})
|
386 |
-
|
387 |
-
# Calculate participation level
|
388 |
-
participation = metrics.get("participation_level", "low").lower()
|
389 |
-
engagement_levels[participation] += 1
|
390 |
-
|
391 |
-
# Count topics student is engaged with
|
392 |
-
topics_count = len(student.get("struggling_topics", []))
|
393 |
-
total_topics_engaged += topics_count
|
394 |
-
|
395 |
-
# Count active participants (students engaging with any topics)
|
396 |
-
if topics_count > 0:
|
397 |
-
active_participants += 1
|
398 |
-
|
399 |
-
# Calculate overall engagement score (0-1)
|
400 |
-
weighted_score = (
|
401 |
-
(engagement_levels["high"] * 1.0 +
|
402 |
-
engagement_levels["medium"] * 0.6 +
|
403 |
-
engagement_levels["low"] * 0.2) / total_students
|
404 |
-
)
|
405 |
-
|
406 |
-
return {
|
407 |
-
"total_students": total_students,
|
408 |
-
"overall_score": round(weighted_score, 2),
|
409 |
-
"engagement_distribution": {
|
410 |
-
level: count/total_students
|
411 |
-
for level, count in engagement_levels.items()
|
412 |
-
},
|
413 |
-
"participation_metrics": {
|
414 |
-
"average_topics_per_student": round(total_topics_engaged / total_students, 2),
|
415 |
-
"active_participants_ratio": round(active_participants / total_students, 2)
|
416 |
-
}
|
417 |
-
}
|
418 |
-
|
419 |
-
except Exception as e:
|
420 |
-
print(f"Error calculating engagement: {str(e)}")
|
421 |
-
return {
|
422 |
-
"total_students": 0,
|
423 |
-
"overall_score": 0,
|
424 |
-
"engagement_distribution": {
|
425 |
-
"high": 0,
|
426 |
-
"medium": 0,
|
427 |
-
"low": 0
|
428 |
-
},
|
429 |
-
"participation_metrics": {
|
430 |
-
"average_topics_per_student": 0,
|
431 |
-
"active_participants_ratio": 0
|
432 |
-
}
|
433 |
-
}
|
434 |
-
|
435 |
-
def _process_gemini_response(self, response: str) -> Dict:
|
436 |
-
"""Process and validate Gemini's response."""
|
437 |
-
# try:
|
438 |
-
# analytics = json.loads(response)
|
439 |
-
# return self._enrich_analytics(analytics)
|
440 |
-
# except json.JSONDecodeError as e:
|
441 |
-
# print(f"Error decoding Gemini response: {e}")
|
442 |
-
# return self._fallback_analytics()
|
443 |
-
try:
|
444 |
-
# Parse JSON response
|
445 |
-
analytics = json.loads(response)
|
446 |
-
|
447 |
-
# Validate required fields exist
|
448 |
-
required_fields = {
|
449 |
-
"topic_insights": [],
|
450 |
-
"student_insights": [],
|
451 |
-
"recommended_actions": []
|
452 |
-
}
|
453 |
-
|
454 |
-
# Ensure all required fields exist with default values
|
455 |
-
for field, default_value in required_fields.items():
|
456 |
-
if field not in analytics or not analytics[field]:
|
457 |
-
analytics[field] = default_value
|
458 |
-
|
459 |
-
# Now enrich the validated analytics
|
460 |
-
return self._enrich_analytics(analytics)
|
461 |
-
|
462 |
-
except (json.JSONDecodeError, KeyError, TypeError) as e:
|
463 |
-
print(f"Error processing Gemini response: {str(e)}")
|
464 |
-
print(f"Raw response: {response}")
|
465 |
-
return self._fallback_analytics()
|
466 |
-
|
467 |
-
def _enrich_analytics(self, analytics: Dict) -> Dict:
|
468 |
-
"""Add derived insights and metrics to the analytics."""
|
469 |
-
# Add overall course health metrics
|
470 |
-
analytics["course_health"] = {
|
471 |
-
"overall_engagement": self._calculate_engagement(analytics),
|
472 |
-
"critical_topics": self._identify_critical_topics(analytics),
|
473 |
-
"class_distribution": self._calculate_class_distribution(analytics)
|
474 |
-
}
|
475 |
-
|
476 |
-
# Add intervention urgency scores
|
477 |
-
analytics["intervention_metrics"] = {
|
478 |
-
"immediate_attention_needed": self._identify_urgent_cases(analytics),
|
479 |
-
"monitoring_required": self._identify_monitoring_cases(analytics)
|
480 |
-
}
|
481 |
-
|
482 |
-
return analytics
|
483 |
-
|
484 |
-
def _calculate_engagement(self, analytics: Dict) -> Dict:
|
485 |
-
# """Calculate overall engagement metrics."""
|
486 |
-
# total_students = len(analytics["student_insights"])
|
487 |
-
# engagement_levels = defaultdict(int)
|
488 |
-
|
489 |
-
# for student in analytics["student_insights"]:
|
490 |
-
# engagement_levels[student["engagement_metrics"]["participation_level"]] += 1
|
491 |
-
|
492 |
-
# return {
|
493 |
-
# "total_students": total_students,
|
494 |
-
# "engagement_distribution": {
|
495 |
-
# level: count/total_students
|
496 |
-
# for level, count in engagement_levels.items()
|
497 |
-
# }
|
498 |
-
# }
|
499 |
-
"""Calculate overall engagement metrics with defensive programming."""
|
500 |
-
try:
|
501 |
-
total_students = len(analytics.get("student_insights", []))
|
502 |
-
if total_students == 0:
|
503 |
-
return {
|
504 |
-
"total_students": 0,
|
505 |
-
"engagement_distribution": {
|
506 |
-
"high": 0,
|
507 |
-
"medium": 0,
|
508 |
-
"low": 0
|
509 |
-
}
|
510 |
-
}
|
511 |
-
|
512 |
-
engagement_levels = defaultdict(int)
|
513 |
-
|
514 |
-
for student in analytics.get("student_insights", []):
|
515 |
-
metrics = student.get("engagement_metrics", {})
|
516 |
-
level = metrics.get("participation_level", "low")
|
517 |
-
engagement_levels[level] += 1
|
518 |
-
|
519 |
-
return {
|
520 |
-
"total_students": total_students,
|
521 |
-
"engagement_distribution": {
|
522 |
-
level: count/total_students
|
523 |
-
for level, count in engagement_levels.items()
|
524 |
-
}
|
525 |
-
}
|
526 |
-
except Exception as e:
|
527 |
-
print(f"Error calculating engagement: {str(e)}")
|
528 |
-
return {
|
529 |
-
"total_students": 0,
|
530 |
-
"engagement_distribution": {
|
531 |
-
"high": 0,
|
532 |
-
"medium": 0,
|
533 |
-
"low": 0
|
534 |
-
}
|
535 |
-
}
|
536 |
-
|
537 |
-
def _identify_critical_topics(self, analytics: Dict) -> List[Dict]:
|
538 |
-
# """Identify topics needing immediate attention."""
|
539 |
-
# return [
|
540 |
-
# topic for topic in analytics["topic_insights"]
|
541 |
-
# if topic["difficulty_level"] > 0.7 or
|
542 |
-
# len(topic["common_issues"]) > 2
|
543 |
-
# ]
|
544 |
-
"""Identify topics needing immediate attention with defensive programming."""
|
545 |
-
try:
|
546 |
-
return [
|
547 |
-
topic for topic in analytics.get("topic_insights", [])
|
548 |
-
if topic.get("difficulty_level", 0) > 0.7 or
|
549 |
-
len(topic.get("common_issues", [])) > 2
|
550 |
-
]
|
551 |
-
except Exception as e:
|
552 |
-
print(f"Error identifying critical topics: {str(e)}")
|
553 |
-
return []
|
554 |
-
|
555 |
-
def generate_analytics(self, chat_histories: List[Dict], all_topics: List[str]) -> Dict:
|
556 |
-
# Method 1: (caused key 'student_insights' error):
|
557 |
-
# """Main method to generate analytics from chat histories."""
|
558 |
-
# # Preprocess chat histories
|
559 |
-
# processed_histories = self._preprocess_chat_histories(chat_histories)
|
560 |
-
|
561 |
-
# # Create and send prompt to Gemini
|
562 |
-
# prompt = self._create_analytics_prompt(processed_histories, all_topics)
|
563 |
-
# response = self.model.generate_content(
|
564 |
-
# prompt,
|
565 |
-
# generation_config=genai.GenerationConfig(
|
566 |
-
# response_mime_type="application/json",
|
567 |
-
# response_schema=AnalyticsResponse
|
568 |
-
# )
|
569 |
-
# )
|
570 |
-
|
571 |
-
# # # Process and enrich analytics
|
572 |
-
# # analytics = self._process_gemini_response(response.text)
|
573 |
-
# # return analytics
|
574 |
-
# # Process, validate, and enrich the response
|
575 |
-
# analytics = self._process_gemini_response(response.text)
|
576 |
-
|
577 |
-
# # Then cast it to satisfy the type checker
|
578 |
-
# return typing.cast(AnalyticsResponse, analytics)
|
579 |
-
|
580 |
-
# Method 2 (possible fix):
|
581 |
-
# """Main method to generate analytics with better error handling."""
|
582 |
-
# try:
|
583 |
-
# processed_histories = self._preprocess_chat_histories(chat_histories)
|
584 |
-
# prompt = self._create_analytics_prompt(processed_histories, all_topics)
|
585 |
-
|
586 |
-
# response = self.model.generate_content(
|
587 |
-
# prompt,
|
588 |
-
# generation_config=genai.GenerationConfig(
|
589 |
-
# response_mime_type="application/json",
|
590 |
-
# temperature=0.15
|
591 |
-
# # response_schema=AnalyticsResponse
|
592 |
-
# )
|
593 |
-
# )
|
594 |
-
|
595 |
-
# if not response.text:
|
596 |
-
# print("Empty response from Gemini")
|
597 |
-
# return self._fallback_analytics()
|
598 |
-
|
599 |
-
# # analytics = self._process_gemini_response(response.text)
|
600 |
-
# # return typing.cast(AnalyticsResponse, analytics)
|
601 |
-
# # return response.text;
|
602 |
-
# analytics = json.loads(response.text)
|
603 |
-
# return analytics
|
604 |
-
|
605 |
-
# except Exception as e:
|
606 |
-
# print(f"Error generating analytics: {str(e)}")
|
607 |
-
# return self._fallback_analytics()
|
608 |
-
|
609 |
-
|
610 |
-
# Debugging code:
|
611 |
-
"""Main method to generate analytics with better error handling."""
|
612 |
-
try:
|
613 |
-
# Debug print for input validation
|
614 |
-
print("Input validation:")
|
615 |
-
print(f"Chat histories: {len(chat_histories)} entries")
|
616 |
-
print(f"Topics: {all_topics}")
|
617 |
-
|
618 |
-
if not chat_histories or not all_topics:
|
619 |
-
print("Missing required input data")
|
620 |
-
return self._fallback_analytics()
|
621 |
-
|
622 |
-
# Debug the preprocessing step
|
623 |
-
try:
|
624 |
-
processed_histories = self._preprocess_chat_histories(chat_histories)
|
625 |
-
print("Successfully preprocessed chat histories")
|
626 |
-
except Exception as preprocess_error:
|
627 |
-
print(f"Error in preprocessing: {str(preprocess_error)}")
|
628 |
-
return self._fallback_analytics()
|
629 |
-
|
630 |
-
# Debug the prompt creation
|
631 |
-
try:
|
632 |
-
prompt = self._create_analytics_prompt(processed_histories, all_topics)
|
633 |
-
print("Successfully created prompt")
|
634 |
-
print("Prompt preview:", prompt[:200] + "...") # Print first 200 chars
|
635 |
-
except Exception as prompt_error:
|
636 |
-
print(f"Error in prompt creation: {str(prompt_error)}")
|
637 |
-
return self._fallback_analytics()
|
638 |
-
|
639 |
-
# Rest of the function remains the same
|
640 |
-
response = self.model.generate_content(
|
641 |
-
prompt,
|
642 |
-
generation_config=genai.GenerationConfig(
|
643 |
-
response_mime_type="application/json",
|
644 |
-
temperature=0.15
|
645 |
-
)
|
646 |
-
)
|
647 |
-
|
648 |
-
if not response.text:
|
649 |
-
print("Empty response from Gemini")
|
650 |
-
return self._fallback_analytics()
|
651 |
-
|
652 |
-
analytics = json.loads(response.text)
|
653 |
-
return analytics
|
654 |
-
|
655 |
-
except Exception as e:
|
656 |
-
print(f"Error generating analytics: {str(e)}")
|
657 |
-
print(f"Error type: {type(e)}")
|
658 |
-
import traceback
|
659 |
-
print("Full traceback:", traceback.format_exc())
|
660 |
-
return self._fallback_analytics()
|
661 |
-
|
662 |
-
def _preprocess_chat_histories(self, chat_histories: List[Dict]) -> List[Dict]:
|
663 |
-
# """Preprocess chat histories to focus on relevant information."""
|
664 |
-
# processed = []
|
665 |
-
|
666 |
-
# for chat in chat_histories:
|
667 |
-
# print(str(chat["user_id"]))
|
668 |
-
# processed_chat = {
|
669 |
-
# "user_id": str(chat["user_id"]),
|
670 |
-
# "messages": [
|
671 |
-
# {
|
672 |
-
# "prompt": msg["prompt"],
|
673 |
-
# "response": msg["response"]
|
674 |
-
# }
|
675 |
-
# for msg in chat["messages"]
|
676 |
-
# ]
|
677 |
-
# }
|
678 |
-
# processed.append(processed_chat)
|
679 |
-
|
680 |
-
# return processed
|
681 |
-
|
682 |
-
# Code 2:
|
683 |
-
"""Preprocess chat histories to focus on relevant information."""
|
684 |
-
processed = []
|
685 |
-
|
686 |
-
for chat in chat_histories:
|
687 |
-
# Convert ObjectId to string if it's an ObjectId
|
688 |
-
user_id = str(chat["user_id"]["$oid"]) if isinstance(chat["user_id"], dict) and "$oid" in chat["user_id"] else str(chat["user_id"])
|
689 |
-
|
690 |
-
try:
|
691 |
-
processed_chat = {
|
692 |
-
"user_id": user_id,
|
693 |
-
"messages": [
|
694 |
-
{
|
695 |
-
"prompt": msg["prompt"],
|
696 |
-
"response": msg["response"]
|
697 |
-
}
|
698 |
-
for msg in chat["messages"]
|
699 |
-
]
|
700 |
-
}
|
701 |
-
processed.append(processed_chat)
|
702 |
-
print(f"Successfully processed chat for user: {user_id}")
|
703 |
-
except Exception as e:
|
704 |
-
print(f"Error processing chat for user: {user_id}")
|
705 |
-
print(f"Error details: {str(e)}")
|
706 |
-
continue
|
707 |
-
|
708 |
-
return processed
|
709 |
-
|
710 |
-
def _fallback_analytics(self) -> Dict:
|
711 |
-
# """Provide basic analytics in case of LLM processing failure."""
|
712 |
-
# return {
|
713 |
-
# "topic_insights": [],
|
714 |
-
# "student_insights": [],
|
715 |
-
# "recommended_actions": [
|
716 |
-
# {
|
717 |
-
# "action": "Review analytics generation process",
|
718 |
-
# "priority": "high",
|
719 |
-
# "target_group": "system_administrators",
|
720 |
-
# "reasoning": "Analytics generation failed",
|
721 |
-
# "expected_impact": "Restore analytics functionality"
|
722 |
-
# }
|
723 |
-
# ]
|
724 |
-
# }
|
725 |
-
"""Provide comprehensive fallback analytics that match our schema."""
|
726 |
-
return {
|
727 |
-
"topic_insights": [],
|
728 |
-
"student_insights": [],
|
729 |
-
"recommended_actions": [
|
730 |
-
{
|
731 |
-
"action": "Review analytics generation process",
|
732 |
-
"priority": "high",
|
733 |
-
"target_group": "system_administrators",
|
734 |
-
"reasoning": "Analytics generation failed",
|
735 |
-
"expected_impact": "Restore analytics functionality"
|
736 |
-
}
|
737 |
-
],
|
738 |
-
"course_health": {
|
739 |
-
"overall_engagement": 0,
|
740 |
-
"critical_topics": [],
|
741 |
-
"class_distribution": {
|
742 |
-
"high_performers": 0,
|
743 |
-
"average_performers": 0,
|
744 |
-
"at_risk": 0
|
745 |
-
}
|
746 |
-
},
|
747 |
-
"intervention_metrics": {
|
748 |
-
"immediate_attention_needed": [],
|
749 |
-
"monitoring_required": []
|
750 |
-
}
|
751 |
-
}
|
752 |
-
|
753 |
-
# if __name__ == "__main__":
|
754 |
-
# # Example usage
|
755 |
-
|
756 |
-
|
757 |
-
# analytics_generator = NovaScholarAnalytics()
|
758 |
-
# analytics = analytics_generator.generate_analytics(chat_histories, all_topics)
|
759 |
# print(json.dumps(analytics, indent=2))
|
|
|
1 |
+
import json
|
2 |
+
import typing_extensions as typing
|
3 |
+
import google.generativeai as genai
|
4 |
+
from typing import List, Dict, Any
|
5 |
+
import numpy as np
|
6 |
+
from collections import defaultdict
|
7 |
+
|
8 |
+
from dotenv import load_dotenv
|
9 |
+
import os
|
10 |
+
import pymongo
|
11 |
+
from pymongo import MongoClient
|
12 |
+
|
13 |
+
load_dotenv()
|
14 |
+
GEMINI_API_KEY = os.getenv('GEMINI_KEY')
|
15 |
+
|
16 |
+
class EngagementMetrics(typing.TypedDict):
|
17 |
+
participation_level: str # "high" | "medium" | "low"
|
18 |
+
question_quality: str # "advanced" | "intermediate" | "basic"
|
19 |
+
concept_understanding: str # "strong" | "moderate" | "needs_improvement"
|
20 |
+
|
21 |
+
class StudentInsight(typing.TypedDict):
|
22 |
+
student_id: str
|
23 |
+
performance_level: str # "high_performer" | "average" | "at_risk"
|
24 |
+
struggling_topics: list[str]
|
25 |
+
engagement_metrics: EngagementMetrics
|
26 |
+
|
27 |
+
class TopicInsight(typing.TypedDict):
|
28 |
+
topic: str
|
29 |
+
difficulty_level: float # 0 to 1
|
30 |
+
student_count: int
|
31 |
+
common_issues: list[str]
|
32 |
+
key_misconceptions: list[str]
|
33 |
+
|
34 |
+
class RecommendedAction(typing.TypedDict):
|
35 |
+
action: str
|
36 |
+
priority: str # "high" | "medium" | "low"
|
37 |
+
target_group: str # "all_students" | "specific_students" | "faculty"
|
38 |
+
reasoning: str
|
39 |
+
expected_impact: str
|
40 |
+
|
41 |
+
class ClassDistribution(typing.TypedDict):
|
42 |
+
high_performers: float
|
43 |
+
average_performers: float
|
44 |
+
at_risk: float
|
45 |
+
|
46 |
+
class CourseHealth(typing.TypedDict):
|
47 |
+
overall_engagement: float # 0 to 1
|
48 |
+
critical_topics: list[str]
|
49 |
+
class_distribution: ClassDistribution
|
50 |
+
|
51 |
+
class InterventionMetrics(typing.TypedDict):
|
52 |
+
immediate_attention_needed: list[str] # student_ids
|
53 |
+
monitoring_required: list[str] # student_ids
|
54 |
+
|
55 |
+
class AnalyticsResponse(typing.TypedDict):
|
56 |
+
topic_insights: list[TopicInsight]
|
57 |
+
student_insights: list[StudentInsight]
|
58 |
+
recommended_actions: list[RecommendedAction]
|
59 |
+
course_health: CourseHealth
|
60 |
+
intervention_metrics: InterventionMetrics
|
61 |
+
|
62 |
+
|
63 |
+
|
64 |
+
class NovaScholarAnalytics:
|
65 |
+
def __init__(self, model_name: str = "gemini-1.5-flash"):
|
66 |
+
genai.configure(api_key=GEMINI_API_KEY)
|
67 |
+
self.model = genai.GenerativeModel(model_name)
|
68 |
+
|
69 |
+
def _create_analytics_prompt(self, chat_histories: List[Dict], all_topics: List[str]) -> str:
|
70 |
+
"""Creates a structured prompt for Gemini to analyze chat histories."""
|
71 |
+
# Prompt 1:
|
72 |
+
# return f"""Analyze these student chat histories for a university course and provide detailed analytics.
|
73 |
+
|
74 |
+
# Context:
|
75 |
+
# - These are pre-class chat interactions between students and an AI tutor
|
76 |
+
# - Topics covered: {', '.join(all_topics)}
|
77 |
+
|
78 |
+
# Chat histories: {json.dumps(chat_histories, indent=2)}
|
79 |
+
|
80 |
+
# Return the analysis in JSON format matching this exact schema:
|
81 |
+
# {AnalyticsResponse.__annotations__}
|
82 |
+
|
83 |
+
# Ensure all numeric values are between 0 and 1 (accuracy upto 3 decimal places) where applicable.
|
84 |
+
|
85 |
+
# Important analysis guidelines:
|
86 |
+
# 1. Identify topics where students show confusion or ask multiple follow-up questions
|
87 |
+
# 2. Look for patterns in question types and complexity
|
88 |
+
# 3. Analyze response understanding based on follow-up questions
|
89 |
+
# 4. Consider both explicit and implicit signs of difficulty
|
90 |
+
# 5. Focus on concept relationships and prerequisite understanding"""
|
91 |
+
|
92 |
+
# Prompt 2:
|
93 |
+
# return f"""Analyze the provided student chat histories for a university course and generate concise, actionable analytics.
|
94 |
+
|
95 |
+
# Context:
|
96 |
+
# - Chat histories: {json.dumps(chat_histories, indent=2)}
|
97 |
+
# - These are pre-class interactions between students and an AI tutor aimed at identifying learning difficulties and improving course delivery.
|
98 |
+
# - Topics covered: {', '.join(all_topics)}.
|
99 |
+
|
100 |
+
# Your task is to extract key insights that will help faculty address challenges effectively and enhance learning outcomes.
|
101 |
+
|
102 |
+
# Output Format:
|
103 |
+
# 1. Topics where students face significant difficulties:
|
104 |
+
# - Provide a ranked list of topics where the majority of students are struggling, based on the frequency and nature of their questions or misconceptions.
|
105 |
+
# - Include the percentage of students who found each topic challenging.
|
106 |
+
|
107 |
+
# 2. AI-recommended actions for faculty:
|
108 |
+
# - Suggest actionable steps to address the difficulties identified in each critical topic.
|
109 |
+
# - Specify the priority of each action (high, medium, low) based on the urgency and impact.
|
110 |
+
# - Explain the reasoning behind each recommendation and its expected impact on student outcomes.
|
111 |
+
|
112 |
+
# 3. Student-specific analytics (focusing on at-risk students):
|
113 |
+
# - Identify students categorized as "at-risk" based on their engagement levels, question complexity, and recurring struggles.
|
114 |
+
# - For each at-risk student, list their top 3 struggling topics and their engagement metrics (participation level, concept understanding).
|
115 |
+
# - Provide personalized recommendations for improving their understanding.
|
116 |
+
|
117 |
+
# Guidelines for Analysis:
|
118 |
+
# - Focus on actionable and concise insights rather than exhaustive details.
|
119 |
+
# - Use both explicit (e.g., direct questions) and implicit (e.g., repeated follow-ups) cues to identify areas of difficulty.
|
120 |
+
# - Prioritize topics with higher difficulty scores or more students struggling.
|
121 |
+
# - Ensure numerical values (e.g., difficulty levels, percentages) are between 0 and 1 where applicable.
|
122 |
+
|
123 |
+
# The response must be well-structured, concise, and highly actionable for faculty to implement improvements effectively."""
|
124 |
+
|
125 |
+
# Prompt 3:
|
126 |
+
return f"""Analyze the provided student chat histories for a university course and generate concise, actionable analytics.
|
127 |
+
Context:
|
128 |
+
- Chat histories: {json.dumps(chat_histories, indent=2)}
|
129 |
+
- These are pre-class interactions between students and an AI tutor aimed at identifying learning difficulties and improving course delivery.
|
130 |
+
- Topics covered: {', '.join(all_topics)}.
|
131 |
+
|
132 |
+
Your task is to provide detailed analytics that will help faculty address challenges effectively and enhance learning outcomes.
|
133 |
+
|
134 |
+
Output Format (strictly follow this JSON structure):
|
135 |
+
{{
|
136 |
+
"topic_wise_insights": [
|
137 |
+
{{
|
138 |
+
"topic": "<string>",
|
139 |
+
"struggling_percentage": <number between 0 and 1>,
|
140 |
+
"key_issues": ["<string>", "<string>", ...],
|
141 |
+
"key_misconceptions": ["<string>", "<string>", ...],
|
142 |
+
"recommended_actions": {{
|
143 |
+
"description": "<string>",
|
144 |
+
"priority": "high|medium|low",
|
145 |
+
"expected_outcome": "<string>"
|
146 |
+
}}
|
147 |
+
}}
|
148 |
+
],
|
149 |
+
"ai_recommended_actions": [
|
150 |
+
{{
|
151 |
+
"action": "<string>",
|
152 |
+
"priority": "high|medium|low",
|
153 |
+
"reasoning": "<string>",
|
154 |
+
"expected_outcome": "<string>",
|
155 |
+
"pedagogy_recommendations": {{
|
156 |
+
"methods": ["<string>", "<string>", ...],
|
157 |
+
"resources": ["<string>", "<string>", ...],
|
158 |
+
"expected_impact": "<string>"
|
159 |
+
}}
|
160 |
+
}}
|
161 |
+
],
|
162 |
+
"student_analytics": [
|
163 |
+
{{
|
164 |
+
"student_id": "<string>",
|
165 |
+
"engagement_metrics": {{
|
166 |
+
"participation_level": <number between 0 and 1>,
|
167 |
+
"concept_understanding": "strong|moderate|needs_improvement",
|
168 |
+
"question_quality": "advanced|intermediate|basic"
|
169 |
+
}},
|
170 |
+
"struggling_topics": ["<string>", "<string>", ...],
|
171 |
+
"personalized_recommendation": "<string>"
|
172 |
+
}}
|
173 |
+
]
|
174 |
+
}}
|
175 |
+
|
176 |
+
Guidelines for Analysis:
|
177 |
+
- Focus on actionable and concise insights rather than exhaustive details.
|
178 |
+
- Use both explicit (e.g., direct questions) and implicit (e.g., repeated follow-ups) cues to identify areas of difficulty.
|
179 |
+
- Prioritize topics with higher difficulty scores or more students struggling.
|
180 |
+
- Ensure numerical values (e.g., difficulty levels, percentages) are between 0 and 1 where applicable.
|
181 |
+
- Make sure to include All** students in the analysis, not just a subset.
|
182 |
+
- for the ai_recommended_actions:
|
183 |
+
- Prioritize pedagogy recommendations for critical topics with the high difficulty scores or struggling percentages.
|
184 |
+
- For each action:
|
185 |
+
- Include specific teaching methods (e.g., interactive discussions or quizzes, problem-based learning, practical examples etc).
|
186 |
+
- Recommend supporting resources (e.g., videos, handouts, simulations).
|
187 |
+
- Provide reasoning for the recommendation and the expected outcomes for student learning.
|
188 |
+
- Example:
|
189 |
+
- **Action:** Conduct an interactive problem-solving session on "<Topic Name>".
|
190 |
+
- **Reasoning:** Students showed difficulty in applying concepts to practical problems.
|
191 |
+
- **Expected Outcome:** Improved practical understanding and application of the topic.
|
192 |
+
- **Pedagogy Recommendations:**
|
193 |
+
- **Methods:** Group discussions, real-world case studies.
|
194 |
+
- **Resources:** Online interactive tools, relevant case studies, video walkthroughs.
|
195 |
+
- **Expected Impact:** Enhance conceptual clarity by 40% and practical application by 30%.
|
196 |
+
|
197 |
+
The response must adhere strictly to the above JSON structure, with all fields populated appropriately."""
|
198 |
+
|
199 |
+
|
200 |
+
def _calculate_class_distribution(self, analytics: Dict) -> Dict:
|
201 |
+
"""Calculate the distribution of students across performance levels."""
|
202 |
+
try:
|
203 |
+
total_students = len(analytics.get("student_insights", []))
|
204 |
+
if total_students == 0:
|
205 |
+
return {
|
206 |
+
"high_performers": 0,
|
207 |
+
"average_performers": 0,
|
208 |
+
"at_risk": 0
|
209 |
+
}
|
210 |
+
|
211 |
+
distribution = defaultdict(int)
|
212 |
+
|
213 |
+
for student in analytics.get("student_insights", []):
|
214 |
+
performance_level = student.get("performance_level", "average")
|
215 |
+
# Map performance levels to our three categories
|
216 |
+
if performance_level in ["excellent", "high", "high_performer"]:
|
217 |
+
distribution["high_performers"] += 1
|
218 |
+
elif performance_level in ["struggling", "low", "at_risk"]:
|
219 |
+
distribution["at_risk"] += 1
|
220 |
+
else:
|
221 |
+
distribution["average_performers"] += 1
|
222 |
+
|
223 |
+
# Convert to percentages
|
224 |
+
return {
|
225 |
+
level: count/total_students
|
226 |
+
for level, count in distribution.items()
|
227 |
+
}
|
228 |
+
except Exception as e:
|
229 |
+
print(f"Error calculating class distribution: {str(e)}")
|
230 |
+
return {
|
231 |
+
"high_performers": 0,
|
232 |
+
"average_performers": 0,
|
233 |
+
"at_risk": 0
|
234 |
+
}
|
235 |
+
|
236 |
+
def _identify_urgent_cases(self, analytics: Dict) -> List[str]:
|
237 |
+
"""Identify students needing immediate attention."""
|
238 |
+
try:
|
239 |
+
urgent_cases = []
|
240 |
+
for student in analytics.get("student_insights", []):
|
241 |
+
student_id = student.get("student_id")
|
242 |
+
if not student_id:
|
243 |
+
continue
|
244 |
+
|
245 |
+
# Check multiple risk factors
|
246 |
+
risk_factors = 0
|
247 |
+
|
248 |
+
# Factor 1: Performance level
|
249 |
+
if student.get("performance_level") in ["struggling", "at_risk", "low"]:
|
250 |
+
risk_factors += 1
|
251 |
+
|
252 |
+
# Factor 2: Number of struggling topics
|
253 |
+
if len(student.get("struggling_topics", [])) >= 2:
|
254 |
+
risk_factors += 1
|
255 |
+
|
256 |
+
# Factor 3: Engagement metrics
|
257 |
+
engagement = student.get("engagement_metrics", {})
|
258 |
+
if (engagement.get("participation_level") == "low" or
|
259 |
+
engagement.get("concept_understanding") == "needs_improvement"):
|
260 |
+
risk_factors += 1
|
261 |
+
|
262 |
+
# If student has multiple risk factors, add to urgent cases
|
263 |
+
if risk_factors >= 2:
|
264 |
+
urgent_cases.append(student_id)
|
265 |
+
|
266 |
+
return urgent_cases
|
267 |
+
except Exception as e:
|
268 |
+
print(f"Error identifying urgent cases: {str(e)}")
|
269 |
+
return []
|
270 |
+
|
271 |
+
def _identify_monitoring_cases(self, analytics: Dict) -> List[str]:
|
272 |
+
"""Identify students who need monitoring but aren't urgent cases."""
|
273 |
+
try:
|
274 |
+
monitoring_cases = []
|
275 |
+
urgent_cases = set(self._identify_urgent_cases(analytics))
|
276 |
+
|
277 |
+
for student in analytics.get("student_insights", []):
|
278 |
+
student_id = student.get("student_id")
|
279 |
+
if not student_id or student_id in urgent_cases:
|
280 |
+
continue
|
281 |
+
|
282 |
+
# Check monitoring criteria
|
283 |
+
monitoring_needed = False
|
284 |
+
|
285 |
+
# Criterion 1: Has some struggling topics but not enough for urgent
|
286 |
+
if len(student.get("struggling_topics", [])) == 1:
|
287 |
+
monitoring_needed = True
|
288 |
+
|
289 |
+
# Criterion 2: Medium-low engagement
|
290 |
+
engagement = student.get("engagement_metrics", {})
|
291 |
+
if engagement.get("participation_level") == "medium":
|
292 |
+
monitoring_needed = True
|
293 |
+
|
294 |
+
# Criterion 3: Recent performance decline
|
295 |
+
if student.get("performance_level") == "average":
|
296 |
+
monitoring_needed = True
|
297 |
+
|
298 |
+
if monitoring_needed:
|
299 |
+
monitoring_cases.append(student_id)
|
300 |
+
|
301 |
+
return monitoring_cases
|
302 |
+
except Exception as e:
|
303 |
+
print(f"Error identifying monitoring cases: {str(e)}")
|
304 |
+
return []
|
305 |
+
|
306 |
+
def _identify_critical_topics(self, analytics: Dict) -> List[str]:
|
307 |
+
"""
|
308 |
+
Identify critical topics that need attention based on multiple factors.
|
309 |
+
Returns a list of topic names that are considered critical.
|
310 |
+
"""
|
311 |
+
try:
|
312 |
+
critical_topics = []
|
313 |
+
topics = analytics.get("topic_insights", [])
|
314 |
+
|
315 |
+
for topic in topics:
|
316 |
+
if not isinstance(topic, dict):
|
317 |
+
continue
|
318 |
+
|
319 |
+
# Initialize score for topic criticality
|
320 |
+
critical_score = 0
|
321 |
+
|
322 |
+
# Factor 1: High difficulty level
|
323 |
+
difficulty_level = topic.get("difficulty_level", 0)
|
324 |
+
if difficulty_level > 0.7:
|
325 |
+
critical_score += 2
|
326 |
+
elif difficulty_level > 0.5:
|
327 |
+
critical_score += 1
|
328 |
+
|
329 |
+
# Factor 2: Number of students struggling
|
330 |
+
student_count = topic.get("student_count", 0)
|
331 |
+
total_students = len(analytics.get("student_insights", []))
|
332 |
+
if total_students > 0:
|
333 |
+
struggle_ratio = student_count / total_students
|
334 |
+
if struggle_ratio > 0.5:
|
335 |
+
critical_score += 2
|
336 |
+
elif struggle_ratio > 0.3:
|
337 |
+
critical_score += 1
|
338 |
+
|
339 |
+
# Factor 3: Number of common issues
|
340 |
+
if len(topic.get("common_issues", [])) > 2:
|
341 |
+
critical_score += 1
|
342 |
+
|
343 |
+
# Factor 4: Number of key misconceptions
|
344 |
+
if len(topic.get("key_misconceptions", [])) > 1:
|
345 |
+
critical_score += 1
|
346 |
+
|
347 |
+
# If topic exceeds threshold, mark as critical
|
348 |
+
if critical_score >= 3:
|
349 |
+
critical_topics.append(topic.get("topic", "Unknown Topic"))
|
350 |
+
|
351 |
+
return critical_topics
|
352 |
+
|
353 |
+
except Exception as e:
|
354 |
+
print(f"Error identifying critical topics: {str(e)}")
|
355 |
+
return []
|
356 |
+
|
357 |
+
def _calculate_engagement(self, analytics: Dict) -> Dict:
|
358 |
+
"""
|
359 |
+
Calculate detailed engagement metrics across all students.
|
360 |
+
Returns a dictionary with engagement statistics.
|
361 |
+
"""
|
362 |
+
try:
|
363 |
+
total_students = len(analytics.get("student_insights", []))
|
364 |
+
if total_students == 0:
|
365 |
+
return {
|
366 |
+
"total_students": 0,
|
367 |
+
"overall_score": 0,
|
368 |
+
"engagement_distribution": {
|
369 |
+
"high": 0,
|
370 |
+
"medium": 0,
|
371 |
+
"low": 0
|
372 |
+
},
|
373 |
+
"participation_metrics": {
|
374 |
+
"average_topics_per_student": 0,
|
375 |
+
"active_participants": 0
|
376 |
+
}
|
377 |
+
}
|
378 |
+
|
379 |
+
engagement_levels = defaultdict(int)
|
380 |
+
total_topics_engaged = 0
|
381 |
+
active_participants = 0
|
382 |
+
|
383 |
+
for student in analytics.get("student_insights", []):
|
384 |
+
# Get engagement metrics
|
385 |
+
metrics = student.get("engagement_metrics", {})
|
386 |
+
|
387 |
+
# Calculate participation level
|
388 |
+
participation = metrics.get("participation_level", "low").lower()
|
389 |
+
engagement_levels[participation] += 1
|
390 |
+
|
391 |
+
# Count topics student is engaged with
|
392 |
+
topics_count = len(student.get("struggling_topics", []))
|
393 |
+
total_topics_engaged += topics_count
|
394 |
+
|
395 |
+
# Count active participants (students engaging with any topics)
|
396 |
+
if topics_count > 0:
|
397 |
+
active_participants += 1
|
398 |
+
|
399 |
+
# Calculate overall engagement score (0-1)
|
400 |
+
weighted_score = (
|
401 |
+
(engagement_levels["high"] * 1.0 +
|
402 |
+
engagement_levels["medium"] * 0.6 +
|
403 |
+
engagement_levels["low"] * 0.2) / total_students
|
404 |
+
)
|
405 |
+
|
406 |
+
return {
|
407 |
+
"total_students": total_students,
|
408 |
+
"overall_score": round(weighted_score, 2),
|
409 |
+
"engagement_distribution": {
|
410 |
+
level: count/total_students
|
411 |
+
for level, count in engagement_levels.items()
|
412 |
+
},
|
413 |
+
"participation_metrics": {
|
414 |
+
"average_topics_per_student": round(total_topics_engaged / total_students, 2),
|
415 |
+
"active_participants_ratio": round(active_participants / total_students, 2)
|
416 |
+
}
|
417 |
+
}
|
418 |
+
|
419 |
+
except Exception as e:
|
420 |
+
print(f"Error calculating engagement: {str(e)}")
|
421 |
+
return {
|
422 |
+
"total_students": 0,
|
423 |
+
"overall_score": 0,
|
424 |
+
"engagement_distribution": {
|
425 |
+
"high": 0,
|
426 |
+
"medium": 0,
|
427 |
+
"low": 0
|
428 |
+
},
|
429 |
+
"participation_metrics": {
|
430 |
+
"average_topics_per_student": 0,
|
431 |
+
"active_participants_ratio": 0
|
432 |
+
}
|
433 |
+
}
|
434 |
+
|
435 |
+
def _process_gemini_response(self, response: str) -> Dict:
|
436 |
+
"""Process and validate Gemini's response."""
|
437 |
+
# try:
|
438 |
+
# analytics = json.loads(response)
|
439 |
+
# return self._enrich_analytics(analytics)
|
440 |
+
# except json.JSONDecodeError as e:
|
441 |
+
# print(f"Error decoding Gemini response: {e}")
|
442 |
+
# return self._fallback_analytics()
|
443 |
+
try:
|
444 |
+
# Parse JSON response
|
445 |
+
analytics = json.loads(response)
|
446 |
+
|
447 |
+
# Validate required fields exist
|
448 |
+
required_fields = {
|
449 |
+
"topic_insights": [],
|
450 |
+
"student_insights": [],
|
451 |
+
"recommended_actions": []
|
452 |
+
}
|
453 |
+
|
454 |
+
# Ensure all required fields exist with default values
|
455 |
+
for field, default_value in required_fields.items():
|
456 |
+
if field not in analytics or not analytics[field]:
|
457 |
+
analytics[field] = default_value
|
458 |
+
|
459 |
+
# Now enrich the validated analytics
|
460 |
+
return self._enrich_analytics(analytics)
|
461 |
+
|
462 |
+
except (json.JSONDecodeError, KeyError, TypeError) as e:
|
463 |
+
print(f"Error processing Gemini response: {str(e)}")
|
464 |
+
print(f"Raw response: {response}")
|
465 |
+
return self._fallback_analytics()
|
466 |
+
|
467 |
+
def _enrich_analytics(self, analytics: Dict) -> Dict:
|
468 |
+
"""Add derived insights and metrics to the analytics."""
|
469 |
+
# Add overall course health metrics
|
470 |
+
analytics["course_health"] = {
|
471 |
+
"overall_engagement": self._calculate_engagement(analytics),
|
472 |
+
"critical_topics": self._identify_critical_topics(analytics),
|
473 |
+
"class_distribution": self._calculate_class_distribution(analytics)
|
474 |
+
}
|
475 |
+
|
476 |
+
# Add intervention urgency scores
|
477 |
+
analytics["intervention_metrics"] = {
|
478 |
+
"immediate_attention_needed": self._identify_urgent_cases(analytics),
|
479 |
+
"monitoring_required": self._identify_monitoring_cases(analytics)
|
480 |
+
}
|
481 |
+
|
482 |
+
return analytics
|
483 |
+
|
484 |
+
def _calculate_engagement(self, analytics: Dict) -> Dict:
|
485 |
+
# """Calculate overall engagement metrics."""
|
486 |
+
# total_students = len(analytics["student_insights"])
|
487 |
+
# engagement_levels = defaultdict(int)
|
488 |
+
|
489 |
+
# for student in analytics["student_insights"]:
|
490 |
+
# engagement_levels[student["engagement_metrics"]["participation_level"]] += 1
|
491 |
+
|
492 |
+
# return {
|
493 |
+
# "total_students": total_students,
|
494 |
+
# "engagement_distribution": {
|
495 |
+
# level: count/total_students
|
496 |
+
# for level, count in engagement_levels.items()
|
497 |
+
# }
|
498 |
+
# }
|
499 |
+
"""Calculate overall engagement metrics with defensive programming."""
|
500 |
+
try:
|
501 |
+
total_students = len(analytics.get("student_insights", []))
|
502 |
+
if total_students == 0:
|
503 |
+
return {
|
504 |
+
"total_students": 0,
|
505 |
+
"engagement_distribution": {
|
506 |
+
"high": 0,
|
507 |
+
"medium": 0,
|
508 |
+
"low": 0
|
509 |
+
}
|
510 |
+
}
|
511 |
+
|
512 |
+
engagement_levels = defaultdict(int)
|
513 |
+
|
514 |
+
for student in analytics.get("student_insights", []):
|
515 |
+
metrics = student.get("engagement_metrics", {})
|
516 |
+
level = metrics.get("participation_level", "low")
|
517 |
+
engagement_levels[level] += 1
|
518 |
+
|
519 |
+
return {
|
520 |
+
"total_students": total_students,
|
521 |
+
"engagement_distribution": {
|
522 |
+
level: count/total_students
|
523 |
+
for level, count in engagement_levels.items()
|
524 |
+
}
|
525 |
+
}
|
526 |
+
except Exception as e:
|
527 |
+
print(f"Error calculating engagement: {str(e)}")
|
528 |
+
return {
|
529 |
+
"total_students": 0,
|
530 |
+
"engagement_distribution": {
|
531 |
+
"high": 0,
|
532 |
+
"medium": 0,
|
533 |
+
"low": 0
|
534 |
+
}
|
535 |
+
}
|
536 |
+
|
537 |
+
def _identify_critical_topics(self, analytics: Dict) -> List[Dict]:
|
538 |
+
# """Identify topics needing immediate attention."""
|
539 |
+
# return [
|
540 |
+
# topic for topic in analytics["topic_insights"]
|
541 |
+
# if topic["difficulty_level"] > 0.7 or
|
542 |
+
# len(topic["common_issues"]) > 2
|
543 |
+
# ]
|
544 |
+
"""Identify topics needing immediate attention with defensive programming."""
|
545 |
+
try:
|
546 |
+
return [
|
547 |
+
topic for topic in analytics.get("topic_insights", [])
|
548 |
+
if topic.get("difficulty_level", 0) > 0.7 or
|
549 |
+
len(topic.get("common_issues", [])) > 2
|
550 |
+
]
|
551 |
+
except Exception as e:
|
552 |
+
print(f"Error identifying critical topics: {str(e)}")
|
553 |
+
return []
|
554 |
+
|
555 |
+
def generate_analytics(self, chat_histories: List[Dict], all_topics: List[str]) -> Dict:
|
556 |
+
# Method 1: (caused key 'student_insights' error):
|
557 |
+
# """Main method to generate analytics from chat histories."""
|
558 |
+
# # Preprocess chat histories
|
559 |
+
# processed_histories = self._preprocess_chat_histories(chat_histories)
|
560 |
+
|
561 |
+
# # Create and send prompt to Gemini
|
562 |
+
# prompt = self._create_analytics_prompt(processed_histories, all_topics)
|
563 |
+
# response = self.model.generate_content(
|
564 |
+
# prompt,
|
565 |
+
# generation_config=genai.GenerationConfig(
|
566 |
+
# response_mime_type="application/json",
|
567 |
+
# response_schema=AnalyticsResponse
|
568 |
+
# )
|
569 |
+
# )
|
570 |
+
|
571 |
+
# # # Process and enrich analytics
|
572 |
+
# # analytics = self._process_gemini_response(response.text)
|
573 |
+
# # return analytics
|
574 |
+
# # Process, validate, and enrich the response
|
575 |
+
# analytics = self._process_gemini_response(response.text)
|
576 |
+
|
577 |
+
# # Then cast it to satisfy the type checker
|
578 |
+
# return typing.cast(AnalyticsResponse, analytics)
|
579 |
+
|
580 |
+
# Method 2 (possible fix):
|
581 |
+
# """Main method to generate analytics with better error handling."""
|
582 |
+
# try:
|
583 |
+
# processed_histories = self._preprocess_chat_histories(chat_histories)
|
584 |
+
# prompt = self._create_analytics_prompt(processed_histories, all_topics)
|
585 |
+
|
586 |
+
# response = self.model.generate_content(
|
587 |
+
# prompt,
|
588 |
+
# generation_config=genai.GenerationConfig(
|
589 |
+
# response_mime_type="application/json",
|
590 |
+
# temperature=0.15
|
591 |
+
# # response_schema=AnalyticsResponse
|
592 |
+
# )
|
593 |
+
# )
|
594 |
+
|
595 |
+
# if not response.text:
|
596 |
+
# print("Empty response from Gemini")
|
597 |
+
# return self._fallback_analytics()
|
598 |
+
|
599 |
+
# # analytics = self._process_gemini_response(response.text)
|
600 |
+
# # return typing.cast(AnalyticsResponse, analytics)
|
601 |
+
# # return response.text;
|
602 |
+
# analytics = json.loads(response.text)
|
603 |
+
# return analytics
|
604 |
+
|
605 |
+
# except Exception as e:
|
606 |
+
# print(f"Error generating analytics: {str(e)}")
|
607 |
+
# return self._fallback_analytics()
|
608 |
+
|
609 |
+
|
610 |
+
# Debugging code:
|
611 |
+
"""Main method to generate analytics with better error handling."""
|
612 |
+
try:
|
613 |
+
# Debug print for input validation
|
614 |
+
print("Input validation:")
|
615 |
+
print(f"Chat histories: {len(chat_histories)} entries")
|
616 |
+
print(f"Topics: {all_topics}")
|
617 |
+
|
618 |
+
if not chat_histories or not all_topics:
|
619 |
+
print("Missing required input data")
|
620 |
+
return self._fallback_analytics()
|
621 |
+
|
622 |
+
# Debug the preprocessing step
|
623 |
+
try:
|
624 |
+
processed_histories = self._preprocess_chat_histories(chat_histories)
|
625 |
+
print("Successfully preprocessed chat histories")
|
626 |
+
except Exception as preprocess_error:
|
627 |
+
print(f"Error in preprocessing: {str(preprocess_error)}")
|
628 |
+
return self._fallback_analytics()
|
629 |
+
|
630 |
+
# Debug the prompt creation
|
631 |
+
try:
|
632 |
+
prompt = self._create_analytics_prompt(processed_histories, all_topics)
|
633 |
+
print("Successfully created prompt")
|
634 |
+
print("Prompt preview:", prompt[:200] + "...") # Print first 200 chars
|
635 |
+
except Exception as prompt_error:
|
636 |
+
print(f"Error in prompt creation: {str(prompt_error)}")
|
637 |
+
return self._fallback_analytics()
|
638 |
+
|
639 |
+
# Rest of the function remains the same
|
640 |
+
response = self.model.generate_content(
|
641 |
+
prompt,
|
642 |
+
generation_config=genai.GenerationConfig(
|
643 |
+
response_mime_type="application/json",
|
644 |
+
temperature=0.15
|
645 |
+
)
|
646 |
+
)
|
647 |
+
|
648 |
+
if not response.text:
|
649 |
+
print("Empty response from Gemini")
|
650 |
+
return self._fallback_analytics()
|
651 |
+
|
652 |
+
analytics = json.loads(response.text)
|
653 |
+
return analytics
|
654 |
+
|
655 |
+
except Exception as e:
|
656 |
+
print(f"Error generating analytics: {str(e)}")
|
657 |
+
print(f"Error type: {type(e)}")
|
658 |
+
import traceback
|
659 |
+
print("Full traceback:", traceback.format_exc())
|
660 |
+
return self._fallback_analytics()
|
661 |
+
|
662 |
+
def _preprocess_chat_histories(self, chat_histories: List[Dict]) -> List[Dict]:
|
663 |
+
# """Preprocess chat histories to focus on relevant information."""
|
664 |
+
# processed = []
|
665 |
+
|
666 |
+
# for chat in chat_histories:
|
667 |
+
# print(str(chat["user_id"]))
|
668 |
+
# processed_chat = {
|
669 |
+
# "user_id": str(chat["user_id"]),
|
670 |
+
# "messages": [
|
671 |
+
# {
|
672 |
+
# "prompt": msg["prompt"],
|
673 |
+
# "response": msg["response"]
|
674 |
+
# }
|
675 |
+
# for msg in chat["messages"]
|
676 |
+
# ]
|
677 |
+
# }
|
678 |
+
# processed.append(processed_chat)
|
679 |
+
|
680 |
+
# return processed
|
681 |
+
|
682 |
+
# Code 2:
|
683 |
+
"""Preprocess chat histories to focus on relevant information."""
|
684 |
+
processed = []
|
685 |
+
|
686 |
+
for chat in chat_histories:
|
687 |
+
# Convert ObjectId to string if it's an ObjectId
|
688 |
+
user_id = str(chat["user_id"]["$oid"]) if isinstance(chat["user_id"], dict) and "$oid" in chat["user_id"] else str(chat["user_id"])
|
689 |
+
|
690 |
+
try:
|
691 |
+
processed_chat = {
|
692 |
+
"user_id": user_id,
|
693 |
+
"messages": [
|
694 |
+
{
|
695 |
+
"prompt": msg["prompt"],
|
696 |
+
"response": msg["response"]
|
697 |
+
}
|
698 |
+
for msg in chat["messages"]
|
699 |
+
]
|
700 |
+
}
|
701 |
+
processed.append(processed_chat)
|
702 |
+
print(f"Successfully processed chat for user: {user_id}")
|
703 |
+
except Exception as e:
|
704 |
+
print(f"Error processing chat for user: {user_id}")
|
705 |
+
print(f"Error details: {str(e)}")
|
706 |
+
continue
|
707 |
+
|
708 |
+
return processed
|
709 |
+
|
710 |
+
def _fallback_analytics(self) -> Dict:
|
711 |
+
# """Provide basic analytics in case of LLM processing failure."""
|
712 |
+
# return {
|
713 |
+
# "topic_insights": [],
|
714 |
+
# "student_insights": [],
|
715 |
+
# "recommended_actions": [
|
716 |
+
# {
|
717 |
+
# "action": "Review analytics generation process",
|
718 |
+
# "priority": "high",
|
719 |
+
# "target_group": "system_administrators",
|
720 |
+
# "reasoning": "Analytics generation failed",
|
721 |
+
# "expected_impact": "Restore analytics functionality"
|
722 |
+
# }
|
723 |
+
# ]
|
724 |
+
# }
|
725 |
+
"""Provide comprehensive fallback analytics that match our schema."""
|
726 |
+
return {
|
727 |
+
"topic_insights": [],
|
728 |
+
"student_insights": [],
|
729 |
+
"recommended_actions": [
|
730 |
+
{
|
731 |
+
"action": "Review analytics generation process",
|
732 |
+
"priority": "high",
|
733 |
+
"target_group": "system_administrators",
|
734 |
+
"reasoning": "Analytics generation failed",
|
735 |
+
"expected_impact": "Restore analytics functionality"
|
736 |
+
}
|
737 |
+
],
|
738 |
+
"course_health": {
|
739 |
+
"overall_engagement": 0,
|
740 |
+
"critical_topics": [],
|
741 |
+
"class_distribution": {
|
742 |
+
"high_performers": 0,
|
743 |
+
"average_performers": 0,
|
744 |
+
"at_risk": 0
|
745 |
+
}
|
746 |
+
},
|
747 |
+
"intervention_metrics": {
|
748 |
+
"immediate_attention_needed": [],
|
749 |
+
"monitoring_required": []
|
750 |
+
}
|
751 |
+
}
|
752 |
+
|
753 |
+
# if __name__ == "__main__":
|
754 |
+
# # Example usage
|
755 |
+
|
756 |
+
|
757 |
+
# analytics_generator = NovaScholarAnalytics()
|
758 |
+
# analytics = analytics_generator.generate_analytics(chat_histories, all_topics)
|
759 |
# print(json.dumps(analytics, indent=2))
|
pre_class_analytics4.py
CHANGED
@@ -1,592 +1,592 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
import numpy as np
|
3 |
-
from datetime import datetime
|
4 |
-
from typing import List, Dict, Any, Tuple
|
5 |
-
import spacy
|
6 |
-
from collections import Counter, defaultdict
|
7 |
-
from sklearn.feature_extraction.text import TfidfVectorizer
|
8 |
-
from sklearn.metrics.pairwise import cosine_similarity
|
9 |
-
from textblob import TextBlob
|
10 |
-
import networkx as nx
|
11 |
-
from scipy import stats
|
12 |
-
import logging
|
13 |
-
import json
|
14 |
-
from dataclasses import dataclass
|
15 |
-
from enum import Enum
|
16 |
-
|
17 |
-
# Configure logging
|
18 |
-
logging.basicConfig(level=logging.INFO)
|
19 |
-
logger = logging.getLogger(__name__)
|
20 |
-
|
21 |
-
class TopicDifficulty(Enum):
|
22 |
-
EASY = "easy"
|
23 |
-
MODERATE = "moderate"
|
24 |
-
DIFFICULT = "difficult"
|
25 |
-
VERY_DIFFICULT = "very_difficult"
|
26 |
-
|
27 |
-
|
28 |
-
@dataclass
|
29 |
-
class QuestionMetrics:
|
30 |
-
complexity_score: float
|
31 |
-
follow_up_count: int
|
32 |
-
clarification_count: int
|
33 |
-
time_spent: float
|
34 |
-
sentiment_score: float
|
35 |
-
|
36 |
-
@dataclass
|
37 |
-
class TopicInsights:
|
38 |
-
difficulty_level: TopicDifficulty
|
39 |
-
common_confusion_points: List[str]
|
40 |
-
question_patterns: List[str]
|
41 |
-
time_distribution: Dict[str, float]
|
42 |
-
engagement_metrics: Dict[str, float]
|
43 |
-
recommended_focus_areas: List[str]
|
44 |
-
|
45 |
-
def to_dict(self):
|
46 |
-
return {
|
47 |
-
"difficulty_level": self.difficulty_level.value, # Convert enum to its value
|
48 |
-
"common_confusion_points": self.common_confusion_points,
|
49 |
-
"question_patterns": self.question_patterns,
|
50 |
-
"time_distribution": {str(k): v for k, v in self.time_distribution.items()},
|
51 |
-
"engagement_metrics": self.engagement_metrics,
|
52 |
-
"recommended_focus_areas": self.recommended_focus_areas,
|
53 |
-
}
|
54 |
-
|
55 |
-
class PreClassAnalytics:
|
56 |
-
def __init__(self, nlp_model: str = "en_core_web_lg"):
|
57 |
-
"""Initialize the analytics system with necessary components."""
|
58 |
-
self.nlp = spacy.load(nlp_model)
|
59 |
-
self.question_indicators = {
|
60 |
-
"what", "why", "how", "when", "where", "which", "who",
|
61 |
-
"whose", "whom", "can", "could", "would", "will", "explain"
|
62 |
-
}
|
63 |
-
self.confusion_indicators = {
|
64 |
-
"confused", "don't understand", "unclear", "not clear",
|
65 |
-
"stuck", "difficult", "hard", "help", "explain again"
|
66 |
-
}
|
67 |
-
self.follow_up_indicators = {
|
68 |
-
"also", "another", "additionally", "furthermore", "moreover",
|
69 |
-
"besides", "related", "similarly", "again"
|
70 |
-
}
|
71 |
-
|
72 |
-
def preprocess_chat_history(self, chat_history: List[Dict]) -> pd.DataFrame:
|
73 |
-
"""Convert chat history to DataFrame with enhanced features."""
|
74 |
-
messages = []
|
75 |
-
for chat in chat_history:
|
76 |
-
user_id = chat['user_id']['$oid']
|
77 |
-
for msg in chat['messages']:
|
78 |
-
try:
|
79 |
-
# Ensure the timestamp is in the correct format
|
80 |
-
if isinstance(msg['timestamp'], dict) and '$date' in msg['timestamp']:
|
81 |
-
timestamp = pd.to_datetime(msg['timestamp']['$date'])
|
82 |
-
elif isinstance(msg['timestamp'], str):
|
83 |
-
timestamp = pd.to_datetime(msg['timestamp'])
|
84 |
-
else:
|
85 |
-
raise ValueError("Invalid timestamp format")
|
86 |
-
except Exception as e:
|
87 |
-
print(f"Error parsing timestamp: {msg['timestamp']}, error: {e}")
|
88 |
-
timestamp = pd.NaT # Use NaT (Not a Time) for invalid timestamps
|
89 |
-
|
90 |
-
messages.append({
|
91 |
-
'user_id': user_id,
|
92 |
-
'timestamp': timestamp,
|
93 |
-
'prompt': msg['prompt'],
|
94 |
-
'response': msg['response'],
|
95 |
-
'is_question': any(q in msg['prompt'].lower() for q in self.question_indicators),
|
96 |
-
'shows_confusion': any(c in msg['prompt'].lower() for c in self.confusion_indicators),
|
97 |
-
'is_followup': any(f in msg['prompt'].lower() for f in self.follow_up_indicators)
|
98 |
-
})
|
99 |
-
|
100 |
-
df = pd.DataFrame(messages)
|
101 |
-
df['sentiment'] = df['prompt'].apply(lambda x: TextBlob(x).sentiment.polarity)
|
102 |
-
return df
|
103 |
-
|
104 |
-
def extract_topic_hierarchies(self, df: pd.DataFrame) -> Dict[str, List[str]]:
|
105 |
-
"""Extract hierarchical topic relationships from conversations."""
|
106 |
-
topic_hierarchy = defaultdict(list)
|
107 |
-
|
108 |
-
for _, row in df.iterrows():
|
109 |
-
doc = self.nlp(row['prompt'])
|
110 |
-
|
111 |
-
# Extract main topics and subtopics using noun chunks and dependencies
|
112 |
-
main_topics = []
|
113 |
-
subtopics = []
|
114 |
-
|
115 |
-
for chunk in doc.noun_chunks:
|
116 |
-
if chunk.root.dep_ in ('nsubj', 'dobj'):
|
117 |
-
main_topics.append(chunk.text.lower())
|
118 |
-
else:
|
119 |
-
subtopics.append(chunk.text.lower())
|
120 |
-
|
121 |
-
# Build hierarchy
|
122 |
-
for main_topic in main_topics:
|
123 |
-
topic_hierarchy[main_topic].extend(subtopics)
|
124 |
-
|
125 |
-
# Clean and deduplicate
|
126 |
-
return {k: list(set(v)) for k, v in topic_hierarchy.items()}
|
127 |
-
|
128 |
-
def analyze_topic_difficulty(self, df: pd.DataFrame, topic: str) -> TopicDifficulty:
|
129 |
-
"""Determine topic difficulty based on various metrics."""
|
130 |
-
topic_msgs = df[df['prompt'].str.contains(topic, case=False)]
|
131 |
-
|
132 |
-
# Calculate difficulty indicators
|
133 |
-
confusion_rate = topic_msgs['shows_confusion'].mean()
|
134 |
-
question_rate = topic_msgs['is_question'].mean()
|
135 |
-
follow_up_rate = topic_msgs['is_followup'].mean()
|
136 |
-
avg_sentiment = topic_msgs['sentiment'].mean()
|
137 |
-
|
138 |
-
# Calculate composite difficulty score
|
139 |
-
difficulty_score = (
|
140 |
-
confusion_rate * 0.4 +
|
141 |
-
question_rate * 0.3 +
|
142 |
-
follow_up_rate * 0.2 +
|
143 |
-
(1 - (avg_sentiment + 1) / 2) * 0.1
|
144 |
-
)
|
145 |
-
|
146 |
-
# Map score to difficulty level
|
147 |
-
if difficulty_score < 0.3:
|
148 |
-
return TopicDifficulty.EASY
|
149 |
-
elif difficulty_score < 0.5:
|
150 |
-
return TopicDifficulty.MODERATE
|
151 |
-
elif difficulty_score < 0.7:
|
152 |
-
return TopicDifficulty.DIFFICULT
|
153 |
-
else:
|
154 |
-
return TopicDifficulty.VERY_DIFFICULT
|
155 |
-
|
156 |
-
def identify_confusion_patterns(self, df: pd.DataFrame, topic: str) -> List[str]:
|
157 |
-
"""Identify common patterns in student confusion."""
|
158 |
-
confused_msgs = df[
|
159 |
-
(df['prompt'].str.contains(topic, case=False)) &
|
160 |
-
(df['shows_confusion'])
|
161 |
-
]['prompt']
|
162 |
-
|
163 |
-
patterns = []
|
164 |
-
for msg in confused_msgs:
|
165 |
-
doc = self.nlp(msg)
|
166 |
-
|
167 |
-
# Extract key phrases around confusion indicators
|
168 |
-
for sent in doc.sents:
|
169 |
-
for token in sent:
|
170 |
-
if token.text.lower() in self.confusion_indicators:
|
171 |
-
# Get context window around confusion indicator
|
172 |
-
context = sent.text
|
173 |
-
patterns.append(context)
|
174 |
-
|
175 |
-
# Group similar patterns
|
176 |
-
if patterns:
|
177 |
-
vectorizer = TfidfVectorizer(ngram_range=(1, 3))
|
178 |
-
tfidf_matrix = vectorizer.fit_transform(patterns)
|
179 |
-
similarity_matrix = cosine_similarity(tfidf_matrix)
|
180 |
-
|
181 |
-
# Cluster similar patterns
|
182 |
-
G = nx.Graph()
|
183 |
-
for i in range(len(patterns)):
|
184 |
-
for j in range(i + 1, len(patterns)):
|
185 |
-
if similarity_matrix[i][j] > 0.5: # Similarity threshold
|
186 |
-
G.add_edge(i, j)
|
187 |
-
|
188 |
-
# Extract representative patterns from each cluster
|
189 |
-
clusters = list(nx.connected_components(G))
|
190 |
-
return [patterns[min(cluster)] for cluster in clusters]
|
191 |
-
|
192 |
-
return []
|
193 |
-
|
194 |
-
def analyze_question_patterns(self, df: pd.DataFrame, topic: str) -> List[str]:
|
195 |
-
"""Analyze patterns in student questions about the topic."""
|
196 |
-
topic_questions = df[
|
197 |
-
(df['prompt'].str.contains(topic, case=False)) &
|
198 |
-
(df['is_question'])
|
199 |
-
]['prompt']
|
200 |
-
|
201 |
-
question_types = defaultdict(list)
|
202 |
-
for question in topic_questions:
|
203 |
-
doc = self.nlp(question)
|
204 |
-
|
205 |
-
# Categorize questions
|
206 |
-
if any(token.text.lower() in {"what", "define", "explain"} for token in doc):
|
207 |
-
question_types["conceptual"].append(question)
|
208 |
-
elif any(token.text.lower() in {"how", "steps", "process"} for token in doc):
|
209 |
-
question_types["procedural"].append(question)
|
210 |
-
elif any(token.text.lower() in {"why", "reason", "because"} for token in doc):
|
211 |
-
question_types["reasoning"].append(question)
|
212 |
-
else:
|
213 |
-
question_types["other"].append(question)
|
214 |
-
|
215 |
-
# Extract patterns from each category
|
216 |
-
patterns = []
|
217 |
-
for category, questions in question_types.items():
|
218 |
-
if questions:
|
219 |
-
vectorizer = TfidfVectorizer(ngram_range=(1, 3))
|
220 |
-
tfidf_matrix = vectorizer.fit_transform(questions)
|
221 |
-
|
222 |
-
# Get most representative questions
|
223 |
-
feature_array = np.mean(tfidf_matrix.toarray(), axis=0)
|
224 |
-
tfidf_sorting = np.argsort(feature_array)[::-1]
|
225 |
-
features = vectorizer.get_feature_names_out()
|
226 |
-
|
227 |
-
patterns.append(f"{category}: {' '.join(features[tfidf_sorting[:3]])}")
|
228 |
-
|
229 |
-
return patterns
|
230 |
-
|
231 |
-
def analyze_time_distribution(self, df: pd.DataFrame, topic: str) -> Dict[str, float]:
|
232 |
-
"""Analyze time spent on different aspects of the topic."""
|
233 |
-
topic_msgs = df[df['prompt'].str.contains(topic, case=False)].copy()
|
234 |
-
if len(topic_msgs) < 2:
|
235 |
-
return {}
|
236 |
-
|
237 |
-
topic_msgs['time_diff'] = topic_msgs['timestamp'].diff()
|
238 |
-
|
239 |
-
# Calculate time distribution
|
240 |
-
distribution = {
|
241 |
-
'total_time': topic_msgs['time_diff'].sum().total_seconds() / 60,
|
242 |
-
'avg_time_per_message': topic_msgs['time_diff'].mean().total_seconds() / 60,
|
243 |
-
'max_time_gap': topic_msgs['time_diff'].max().total_seconds() / 60,
|
244 |
-
'time_spent_on_questions': topic_msgs[topic_msgs['is_question']]['time_diff'].sum().total_seconds() / 60,
|
245 |
-
'time_spent_on_confusion': topic_msgs[topic_msgs['shows_confusion']]['time_diff'].sum().total_seconds() / 60
|
246 |
-
}
|
247 |
-
|
248 |
-
return distribution
|
249 |
-
|
250 |
-
def calculate_engagement_metrics(self, df: pd.DataFrame, topic: str) -> Dict[str, float]:
|
251 |
-
"""Calculate student engagement metrics for the topic."""
|
252 |
-
topic_msgs = df[df['prompt'].str.contains(topic, case=False)]
|
253 |
-
|
254 |
-
metrics = {
|
255 |
-
'message_count': len(topic_msgs),
|
256 |
-
'question_ratio': topic_msgs['is_question'].mean(),
|
257 |
-
'confusion_ratio': topic_msgs['shows_confusion'].mean(),
|
258 |
-
'follow_up_ratio': topic_msgs['is_followup'].mean(),
|
259 |
-
'avg_sentiment': topic_msgs['sentiment'].mean(),
|
260 |
-
'engagement_score': 0.0 # Will be calculated below
|
261 |
-
}
|
262 |
-
|
263 |
-
# Calculate engagement score
|
264 |
-
metrics['engagement_score'] = (
|
265 |
-
metrics['message_count'] * 0.3 +
|
266 |
-
metrics['question_ratio'] * 0.25 +
|
267 |
-
metrics['follow_up_ratio'] * 0.25 +
|
268 |
-
(metrics['avg_sentiment'] + 1) / 2 * 0.2 # Normalize sentiment to 0-1
|
269 |
-
)
|
270 |
-
|
271 |
-
return metrics
|
272 |
-
|
273 |
-
def generate_topic_insights(self, df: pd.DataFrame, topic: str) -> TopicInsights:
|
274 |
-
"""Generate comprehensive insights for a topic."""
|
275 |
-
difficulty = self.analyze_topic_difficulty(df, topic)
|
276 |
-
confusion_points = self.identify_confusion_patterns(df, topic)
|
277 |
-
question_patterns = self.analyze_question_patterns(df, topic)
|
278 |
-
time_distribution = self.analyze_time_distribution(df, topic)
|
279 |
-
engagement_metrics = self.calculate_engagement_metrics(df, topic)
|
280 |
-
|
281 |
-
# Generate recommended focus areas based on insights
|
282 |
-
focus_areas = []
|
283 |
-
|
284 |
-
if difficulty in (TopicDifficulty.DIFFICULT, TopicDifficulty.VERY_DIFFICULT):
|
285 |
-
focus_areas.append("Fundamental concept reinforcement needed")
|
286 |
-
|
287 |
-
if confusion_points:
|
288 |
-
focus_areas.append(f"Address common confusion around: {', '.join(confusion_points[:3])}")
|
289 |
-
|
290 |
-
if engagement_metrics['confusion_ratio'] > 0.3:
|
291 |
-
focus_areas.append("Consider alternative teaching approaches")
|
292 |
-
|
293 |
-
if time_distribution.get('time_spent_on_questions', 0) > time_distribution.get('total_time', 0) * 0.5:
|
294 |
-
focus_areas.append("More practical examples or demonstrations needed")
|
295 |
-
|
296 |
-
return TopicInsights(
|
297 |
-
difficulty_level=difficulty,
|
298 |
-
common_confusion_points=confusion_points,
|
299 |
-
question_patterns=question_patterns,
|
300 |
-
time_distribution=time_distribution,
|
301 |
-
engagement_metrics=engagement_metrics,
|
302 |
-
recommended_focus_areas=focus_areas
|
303 |
-
)
|
304 |
-
|
305 |
-
def analyze_student_progress(self, df: pd.DataFrame) -> Dict[str, Any]:
|
306 |
-
"""Analyze individual student progress and learning patterns."""
|
307 |
-
student_progress = {}
|
308 |
-
|
309 |
-
for student_id in df['user_id'].unique():
|
310 |
-
student_msgs = df[df['user_id'] == student_id]
|
311 |
-
|
312 |
-
# Calculate student-specific metrics
|
313 |
-
progress = {
|
314 |
-
'total_messages': len(student_msgs),
|
315 |
-
'questions_asked': student_msgs['is_question'].sum(),
|
316 |
-
'confusion_instances': student_msgs['shows_confusion'].sum(),
|
317 |
-
'avg_sentiment': student_msgs['sentiment'].mean(),
|
318 |
-
'topic_engagement': {},
|
319 |
-
'learning_pattern': self._identify_learning_pattern(student_msgs)
|
320 |
-
}
|
321 |
-
|
322 |
-
# Analyze topic-specific engagement
|
323 |
-
topics = self.extract_topic_hierarchies(student_msgs)
|
324 |
-
for topic in topics:
|
325 |
-
topic_msgs = student_msgs[student_msgs['prompt'].str.contains(topic, case=False)]
|
326 |
-
progress['topic_engagement'][topic] = {
|
327 |
-
'message_count': len(topic_msgs),
|
328 |
-
'confusion_rate': topic_msgs['shows_confusion'].mean(),
|
329 |
-
'sentiment_trend': stats.linregress(
|
330 |
-
range(len(topic_msgs)),
|
331 |
-
topic_msgs['sentiment']
|
332 |
-
).slope
|
333 |
-
}
|
334 |
-
|
335 |
-
student_progress[student_id] = progress
|
336 |
-
|
337 |
-
return student_progress
|
338 |
-
|
339 |
-
def _identify_learning_pattern(self, student_msgs: pd.DataFrame) -> str:
|
340 |
-
"""Identify student's learning pattern based on their interaction style."""
|
341 |
-
# Calculate key metrics
|
342 |
-
question_ratio = student_msgs['is_question'].mean()
|
343 |
-
confusion_ratio = student_msgs['shows_confusion'].mean()
|
344 |
-
follow_up_ratio = student_msgs['is_followup'].mean()
|
345 |
-
sentiment_trend = stats.linregress(
|
346 |
-
range(len(student_msgs)),
|
347 |
-
student_msgs['sentiment']
|
348 |
-
).slope
|
349 |
-
|
350 |
-
# Identify pattern
|
351 |
-
if question_ratio > 0.6:
|
352 |
-
return "Inquisitive Learner"
|
353 |
-
elif confusion_ratio > 0.4:
|
354 |
-
return "Needs Additional Support"
|
355 |
-
elif follow_up_ratio > 0.5:
|
356 |
-
return "Deep Dive Learner"
|
357 |
-
elif sentiment_trend > 0:
|
358 |
-
return "Progressive Learner"
|
359 |
-
else:
|
360 |
-
return "Steady Learner"
|
361 |
-
|
362 |
-
def generate_comprehensive_report(self, chat_history: List[Dict]) -> Dict[str, Any]:
|
363 |
-
"""Generate a comprehensive analytics report."""
|
364 |
-
# Preprocess chat history
|
365 |
-
df = self.preprocess_chat_history(chat_history)
|
366 |
-
|
367 |
-
# Extract topics
|
368 |
-
topics = self.extract_topic_hierarchies(df)
|
369 |
-
|
370 |
-
report = {
|
371 |
-
'topics': {},
|
372 |
-
'student_progress': self.analyze_student_progress(df),
|
373 |
-
'overall_metrics': {
|
374 |
-
'total_conversations': len(df),
|
375 |
-
'unique_students': df['user_id'].nunique(),
|
376 |
-
'avg_sentiment': df['sentiment'].mean(),
|
377 |
-
'most_discussed_topics': Counter(
|
378 |
-
topic for topics_list in topics.values()
|
379 |
-
for topic in topics_list
|
380 |
-
).most_common(5)
|
381 |
-
}
|
382 |
-
}
|
383 |
-
|
384 |
-
# Generate topic-specific insights
|
385 |
-
for main_topic, subtopics in topics.items():
|
386 |
-
subtopic_insights = {}
|
387 |
-
for subtopic in subtopics:
|
388 |
-
subtopic_insights[subtopic] = {
|
389 |
-
'insights': self.generate_topic_insights(df, subtopic),
|
390 |
-
'related_topics': [t for t in subtopics if t != subtopic],
|
391 |
-
'student_engagement': {
|
392 |
-
student_id: self.calculate_engagement_metrics(
|
393 |
-
df[df['user_id'] == student_id],
|
394 |
-
subtopic
|
395 |
-
)
|
396 |
-
for student_id in df['user_id'].unique()
|
397 |
-
}
|
398 |
-
}
|
399 |
-
|
400 |
-
report['topics'][main_topic] = {
|
401 |
-
'insights': self.generate_topic_insights(df, main_topic),
|
402 |
-
'subtopics': subtopic_insights,
|
403 |
-
'topic_relationships': {
|
404 |
-
'hierarchy_depth': len(subtopics),
|
405 |
-
'connection_strength': self._calculate_topic_connections(df, main_topic, subtopics),
|
406 |
-
'progression_path': self._identify_topic_progression(df, main_topic, subtopics)
|
407 |
-
}
|
408 |
-
}
|
409 |
-
|
410 |
-
# Add temporal analysis
|
411 |
-
report['temporal_analysis'] = {
|
412 |
-
'daily_engagement': df.groupby(df['timestamp'].dt.date).agg({
|
413 |
-
'user_id': 'count',
|
414 |
-
'is_question': 'sum',
|
415 |
-
'shows_confusion': 'sum',
|
416 |
-
'sentiment': 'mean'
|
417 |
-
}).to_dict(),
|
418 |
-
'peak_activity_hours': df.groupby(df['timestamp'].dt.hour)['user_id'].count().nlargest(3).to_dict(),
|
419 |
-
'learning_trends': self._analyze_learning_trends(df)
|
420 |
-
}
|
421 |
-
|
422 |
-
# Add recommendations
|
423 |
-
report['recommendations'] = self._generate_recommendations(report)
|
424 |
-
|
425 |
-
return report
|
426 |
-
|
427 |
-
def _calculate_topic_connections(self, df: pd.DataFrame, main_topic: str, subtopics: List[str]) -> Dict[str, float]:
|
428 |
-
"""Calculate connection strength between topics based on co-occurrence."""
|
429 |
-
connections = {}
|
430 |
-
main_topic_msgs = df[df['prompt'].str.contains(main_topic, case=False)]
|
431 |
-
|
432 |
-
for subtopic in subtopics:
|
433 |
-
cooccurrence = df[
|
434 |
-
df['prompt'].str.contains(main_topic, case=False) &
|
435 |
-
df['prompt'].str.contains(subtopic, case=False)
|
436 |
-
].shape[0]
|
437 |
-
|
438 |
-
connection_strength = cooccurrence / len(main_topic_msgs) if len(main_topic_msgs) > 0 else 0
|
439 |
-
connections[subtopic] = connection_strength
|
440 |
-
|
441 |
-
return connections
|
442 |
-
|
443 |
-
def _identify_topic_progression(self, df: pd.DataFrame, main_topic: str, subtopics: List[str]) -> List[str]:
|
444 |
-
"""Identify optimal topic progression path based on student interactions."""
|
445 |
-
topic_difficulties = {}
|
446 |
-
|
447 |
-
for subtopic in subtopics:
|
448 |
-
difficulty = self.analyze_topic_difficulty(df, subtopic)
|
449 |
-
topic_difficulties[subtopic] = difficulty.value
|
450 |
-
|
451 |
-
# Sort subtopics by difficulty
|
452 |
-
return sorted(subtopics, key=lambda x: topic_difficulties[x])
|
453 |
-
|
454 |
-
def _analyze_learning_trends(self, df: pd.DataFrame) -> Dict[str, Any]:
|
455 |
-
"""Analyze overall learning trends across the dataset."""
|
456 |
-
return {
|
457 |
-
'sentiment_trend': stats.linregress(
|
458 |
-
range(len(df)),
|
459 |
-
df['sentiment']
|
460 |
-
)._asdict(),
|
461 |
-
'confusion_trend': stats.linregress(
|
462 |
-
range(len(df)),
|
463 |
-
df['shows_confusion']
|
464 |
-
)._asdict(),
|
465 |
-
'engagement_progression': self._calculate_engagement_progression(df)
|
466 |
-
}
|
467 |
-
|
468 |
-
def _calculate_engagement_progression(self, df: pd.DataFrame) -> Dict[str, float]:
|
469 |
-
"""Calculate how student engagement changes over time."""
|
470 |
-
df['week'] = df['timestamp'].dt.isocalendar().week
|
471 |
-
weekly_engagement = df.groupby('week').agg({
|
472 |
-
'is_question': 'mean',
|
473 |
-
'shows_confusion': 'mean',
|
474 |
-
'is_followup': 'mean',
|
475 |
-
'sentiment': 'mean'
|
476 |
-
})
|
477 |
-
|
478 |
-
return {
|
479 |
-
'question_trend': stats.linregress(
|
480 |
-
range(len(weekly_engagement)),
|
481 |
-
weekly_engagement['is_question']
|
482 |
-
).slope,
|
483 |
-
'confusion_trend': stats.linregress(
|
484 |
-
range(len(weekly_engagement)),
|
485 |
-
weekly_engagement['shows_confusion']
|
486 |
-
).slope,
|
487 |
-
'follow_up_trend': stats.linregress(
|
488 |
-
range(len(weekly_engagement)),
|
489 |
-
weekly_engagement['is_followup']
|
490 |
-
).slope,
|
491 |
-
'sentiment_trend': stats.linregress(
|
492 |
-
range(len(weekly_engagement)),
|
493 |
-
weekly_engagement['sentiment']
|
494 |
-
).slope
|
495 |
-
}
|
496 |
-
|
497 |
-
def _generate_recommendations(self, report: Dict[str, Any]) -> List[str]:
|
498 |
-
"""Generate actionable recommendations based on the analysis."""
|
499 |
-
recommendations = []
|
500 |
-
|
501 |
-
# Analyze difficulty distribution
|
502 |
-
difficult_topics = [
|
503 |
-
topic for topic, data in report['topics'].items()
|
504 |
-
if data['insights'].difficulty_level in
|
505 |
-
(TopicDifficulty.DIFFICULT, TopicDifficulty.VERY_DIFFICULT)
|
506 |
-
]
|
507 |
-
|
508 |
-
if difficult_topics:
|
509 |
-
recommendations.append(
|
510 |
-
f"Consider providing additional resources for challenging topics: {', '.join(difficult_topics)}"
|
511 |
-
)
|
512 |
-
|
513 |
-
# Analyze student engagement
|
514 |
-
avg_engagement = np.mean([
|
515 |
-
progress['questions_asked'] / progress['total_messages']
|
516 |
-
for progress in report['student_progress'].values()
|
517 |
-
])
|
518 |
-
|
519 |
-
if avg_engagement < 0.3:
|
520 |
-
recommendations.append(
|
521 |
-
"Implement more interactive elements to increase student engagement"
|
522 |
-
)
|
523 |
-
|
524 |
-
# Analyze temporal patterns
|
525 |
-
peak_hours = list(report['temporal_analysis']['peak_activity_hours'].keys())
|
526 |
-
recommendations.append(
|
527 |
-
f"Consider scheduling additional support during peak activity hours: {peak_hours}"
|
528 |
-
)
|
529 |
-
|
530 |
-
# Analyze learning trends
|
531 |
-
# sentiment_trend = report['temporal_analysis']['learning_trends']['sentiment_trend']
|
532 |
-
# if sentiment_trend < 0:
|
533 |
-
# recommendations.append(
|
534 |
-
# "Review teaching approach to address declining student satisfaction"
|
535 |
-
# )
|
536 |
-
# Analyze learning trends
|
537 |
-
# Analyze learning trends
|
538 |
-
sentiment_trend = report.get('temporal_analysis', {}).get('learning_trends', {}).get('sentiment_trend', None)
|
539 |
-
if isinstance(sentiment_trend, (int, float)):
|
540 |
-
if sentiment_trend < 0:
|
541 |
-
recommendations.append(
|
542 |
-
"Review teaching approach to address declining student satisfaction"
|
543 |
-
)
|
544 |
-
elif isinstance(sentiment_trend, dict):
|
545 |
-
# Handle the case where sentiment_trend is a dictionary
|
546 |
-
print(f"Unexpected dict format for sentiment_trend: {sentiment_trend}")
|
547 |
-
else:
|
548 |
-
print(f"Unexpected type for sentiment_trend: {type(sentiment_trend)}")
|
549 |
-
|
550 |
-
return recommendations
|
551 |
-
|
552 |
-
class CustomJSONEncoder(json.JSONEncoder):
|
553 |
-
def default(self, obj):
|
554 |
-
if isinstance(obj, TopicDifficulty):
|
555 |
-
return obj.value
|
556 |
-
if isinstance(obj, TopicInsights):
|
557 |
-
return obj.to_dict()
|
558 |
-
if isinstance(obj, np.integer):
|
559 |
-
return int(obj)
|
560 |
-
if isinstance(obj, np.floating):
|
561 |
-
return float(obj)
|
562 |
-
if isinstance(obj, np.ndarray):
|
563 |
-
return obj.tolist()
|
564 |
-
if isinstance(obj, datetime):
|
565 |
-
return obj.isoformat()
|
566 |
-
return super().default(obj)
|
567 |
-
|
568 |
-
def convert_insights_to_dict(report):
|
569 |
-
for main_topic, data in report['topics'].items():
|
570 |
-
if isinstance(data['insights'], TopicInsights):
|
571 |
-
data['insights'] = data['insights'].to_dict()
|
572 |
-
for subtopic, subdata in data['subtopics'].items():
|
573 |
-
if isinstance(subdata['insights'], TopicInsights):
|
574 |
-
subdata['insights'] = subdata['insights'].to_dict()
|
575 |
-
|
576 |
-
if __name__ == "__main__":
|
577 |
-
# Load chat history data
|
578 |
-
chat_history = None
|
579 |
-
with open('sample_files/chat_history_corpus.json', 'r', encoding="utf-8") as file:
|
580 |
-
chat_history = json.load(file)
|
581 |
-
|
582 |
-
# Initialize analytics system
|
583 |
-
analytics = PreClassAnalytics()
|
584 |
-
|
585 |
-
# Generate comprehensive report
|
586 |
-
report = analytics.generate_comprehensive_report(chat_history)
|
587 |
-
|
588 |
-
# Convert insights to dictionary
|
589 |
-
# convert_insights_to_dict(report)
|
590 |
-
|
591 |
-
print(json.dumps(report, indent=4, cls=CustomJSONEncoder))
|
592 |
# print(report)
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
from datetime import datetime
|
4 |
+
from typing import List, Dict, Any, Tuple
|
5 |
+
import spacy
|
6 |
+
from collections import Counter, defaultdict
|
7 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
8 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
9 |
+
from textblob import TextBlob
|
10 |
+
import networkx as nx
|
11 |
+
from scipy import stats
|
12 |
+
import logging
|
13 |
+
import json
|
14 |
+
from dataclasses import dataclass
|
15 |
+
from enum import Enum
|
16 |
+
|
17 |
+
# Configure logging
|
18 |
+
logging.basicConfig(level=logging.INFO)
|
19 |
+
logger = logging.getLogger(__name__)
|
20 |
+
|
21 |
+
class TopicDifficulty(Enum):
|
22 |
+
EASY = "easy"
|
23 |
+
MODERATE = "moderate"
|
24 |
+
DIFFICULT = "difficult"
|
25 |
+
VERY_DIFFICULT = "very_difficult"
|
26 |
+
|
27 |
+
|
28 |
+
@dataclass
|
29 |
+
class QuestionMetrics:
|
30 |
+
complexity_score: float
|
31 |
+
follow_up_count: int
|
32 |
+
clarification_count: int
|
33 |
+
time_spent: float
|
34 |
+
sentiment_score: float
|
35 |
+
|
36 |
+
@dataclass
|
37 |
+
class TopicInsights:
|
38 |
+
difficulty_level: TopicDifficulty
|
39 |
+
common_confusion_points: List[str]
|
40 |
+
question_patterns: List[str]
|
41 |
+
time_distribution: Dict[str, float]
|
42 |
+
engagement_metrics: Dict[str, float]
|
43 |
+
recommended_focus_areas: List[str]
|
44 |
+
|
45 |
+
def to_dict(self):
|
46 |
+
return {
|
47 |
+
"difficulty_level": self.difficulty_level.value, # Convert enum to its value
|
48 |
+
"common_confusion_points": self.common_confusion_points,
|
49 |
+
"question_patterns": self.question_patterns,
|
50 |
+
"time_distribution": {str(k): v for k, v in self.time_distribution.items()},
|
51 |
+
"engagement_metrics": self.engagement_metrics,
|
52 |
+
"recommended_focus_areas": self.recommended_focus_areas,
|
53 |
+
}
|
54 |
+
|
55 |
+
class PreClassAnalytics:
|
56 |
+
def __init__(self, nlp_model: str = "en_core_web_lg"):
|
57 |
+
"""Initialize the analytics system with necessary components."""
|
58 |
+
self.nlp = spacy.load(nlp_model)
|
59 |
+
self.question_indicators = {
|
60 |
+
"what", "why", "how", "when", "where", "which", "who",
|
61 |
+
"whose", "whom", "can", "could", "would", "will", "explain"
|
62 |
+
}
|
63 |
+
self.confusion_indicators = {
|
64 |
+
"confused", "don't understand", "unclear", "not clear",
|
65 |
+
"stuck", "difficult", "hard", "help", "explain again"
|
66 |
+
}
|
67 |
+
self.follow_up_indicators = {
|
68 |
+
"also", "another", "additionally", "furthermore", "moreover",
|
69 |
+
"besides", "related", "similarly", "again"
|
70 |
+
}
|
71 |
+
|
72 |
+
def preprocess_chat_history(self, chat_history: List[Dict]) -> pd.DataFrame:
|
73 |
+
"""Convert chat history to DataFrame with enhanced features."""
|
74 |
+
messages = []
|
75 |
+
for chat in chat_history:
|
76 |
+
user_id = chat['user_id']['$oid']
|
77 |
+
for msg in chat['messages']:
|
78 |
+
try:
|
79 |
+
# Ensure the timestamp is in the correct format
|
80 |
+
if isinstance(msg['timestamp'], dict) and '$date' in msg['timestamp']:
|
81 |
+
timestamp = pd.to_datetime(msg['timestamp']['$date'])
|
82 |
+
elif isinstance(msg['timestamp'], str):
|
83 |
+
timestamp = pd.to_datetime(msg['timestamp'])
|
84 |
+
else:
|
85 |
+
raise ValueError("Invalid timestamp format")
|
86 |
+
except Exception as e:
|
87 |
+
print(f"Error parsing timestamp: {msg['timestamp']}, error: {e}")
|
88 |
+
timestamp = pd.NaT # Use NaT (Not a Time) for invalid timestamps
|
89 |
+
|
90 |
+
messages.append({
|
91 |
+
'user_id': user_id,
|
92 |
+
'timestamp': timestamp,
|
93 |
+
'prompt': msg['prompt'],
|
94 |
+
'response': msg['response'],
|
95 |
+
'is_question': any(q in msg['prompt'].lower() for q in self.question_indicators),
|
96 |
+
'shows_confusion': any(c in msg['prompt'].lower() for c in self.confusion_indicators),
|
97 |
+
'is_followup': any(f in msg['prompt'].lower() for f in self.follow_up_indicators)
|
98 |
+
})
|
99 |
+
|
100 |
+
df = pd.DataFrame(messages)
|
101 |
+
df['sentiment'] = df['prompt'].apply(lambda x: TextBlob(x).sentiment.polarity)
|
102 |
+
return df
|
103 |
+
|
104 |
+
def extract_topic_hierarchies(self, df: pd.DataFrame) -> Dict[str, List[str]]:
|
105 |
+
"""Extract hierarchical topic relationships from conversations."""
|
106 |
+
topic_hierarchy = defaultdict(list)
|
107 |
+
|
108 |
+
for _, row in df.iterrows():
|
109 |
+
doc = self.nlp(row['prompt'])
|
110 |
+
|
111 |
+
# Extract main topics and subtopics using noun chunks and dependencies
|
112 |
+
main_topics = []
|
113 |
+
subtopics = []
|
114 |
+
|
115 |
+
for chunk in doc.noun_chunks:
|
116 |
+
if chunk.root.dep_ in ('nsubj', 'dobj'):
|
117 |
+
main_topics.append(chunk.text.lower())
|
118 |
+
else:
|
119 |
+
subtopics.append(chunk.text.lower())
|
120 |
+
|
121 |
+
# Build hierarchy
|
122 |
+
for main_topic in main_topics:
|
123 |
+
topic_hierarchy[main_topic].extend(subtopics)
|
124 |
+
|
125 |
+
# Clean and deduplicate
|
126 |
+
return {k: list(set(v)) for k, v in topic_hierarchy.items()}
|
127 |
+
|
128 |
+
def analyze_topic_difficulty(self, df: pd.DataFrame, topic: str) -> TopicDifficulty:
|
129 |
+
"""Determine topic difficulty based on various metrics."""
|
130 |
+
topic_msgs = df[df['prompt'].str.contains(topic, case=False)]
|
131 |
+
|
132 |
+
# Calculate difficulty indicators
|
133 |
+
confusion_rate = topic_msgs['shows_confusion'].mean()
|
134 |
+
question_rate = topic_msgs['is_question'].mean()
|
135 |
+
follow_up_rate = topic_msgs['is_followup'].mean()
|
136 |
+
avg_sentiment = topic_msgs['sentiment'].mean()
|
137 |
+
|
138 |
+
# Calculate composite difficulty score
|
139 |
+
difficulty_score = (
|
140 |
+
confusion_rate * 0.4 +
|
141 |
+
question_rate * 0.3 +
|
142 |
+
follow_up_rate * 0.2 +
|
143 |
+
(1 - (avg_sentiment + 1) / 2) * 0.1
|
144 |
+
)
|
145 |
+
|
146 |
+
# Map score to difficulty level
|
147 |
+
if difficulty_score < 0.3:
|
148 |
+
return TopicDifficulty.EASY
|
149 |
+
elif difficulty_score < 0.5:
|
150 |
+
return TopicDifficulty.MODERATE
|
151 |
+
elif difficulty_score < 0.7:
|
152 |
+
return TopicDifficulty.DIFFICULT
|
153 |
+
else:
|
154 |
+
return TopicDifficulty.VERY_DIFFICULT
|
155 |
+
|
156 |
+
def identify_confusion_patterns(self, df: pd.DataFrame, topic: str) -> List[str]:
|
157 |
+
"""Identify common patterns in student confusion."""
|
158 |
+
confused_msgs = df[
|
159 |
+
(df['prompt'].str.contains(topic, case=False)) &
|
160 |
+
(df['shows_confusion'])
|
161 |
+
]['prompt']
|
162 |
+
|
163 |
+
patterns = []
|
164 |
+
for msg in confused_msgs:
|
165 |
+
doc = self.nlp(msg)
|
166 |
+
|
167 |
+
# Extract key phrases around confusion indicators
|
168 |
+
for sent in doc.sents:
|
169 |
+
for token in sent:
|
170 |
+
if token.text.lower() in self.confusion_indicators:
|
171 |
+
# Get context window around confusion indicator
|
172 |
+
context = sent.text
|
173 |
+
patterns.append(context)
|
174 |
+
|
175 |
+
# Group similar patterns
|
176 |
+
if patterns:
|
177 |
+
vectorizer = TfidfVectorizer(ngram_range=(1, 3))
|
178 |
+
tfidf_matrix = vectorizer.fit_transform(patterns)
|
179 |
+
similarity_matrix = cosine_similarity(tfidf_matrix)
|
180 |
+
|
181 |
+
# Cluster similar patterns
|
182 |
+
G = nx.Graph()
|
183 |
+
for i in range(len(patterns)):
|
184 |
+
for j in range(i + 1, len(patterns)):
|
185 |
+
if similarity_matrix[i][j] > 0.5: # Similarity threshold
|
186 |
+
G.add_edge(i, j)
|
187 |
+
|
188 |
+
# Extract representative patterns from each cluster
|
189 |
+
clusters = list(nx.connected_components(G))
|
190 |
+
return [patterns[min(cluster)] for cluster in clusters]
|
191 |
+
|
192 |
+
return []
|
193 |
+
|
194 |
+
def analyze_question_patterns(self, df: pd.DataFrame, topic: str) -> List[str]:
|
195 |
+
"""Analyze patterns in student questions about the topic."""
|
196 |
+
topic_questions = df[
|
197 |
+
(df['prompt'].str.contains(topic, case=False)) &
|
198 |
+
(df['is_question'])
|
199 |
+
]['prompt']
|
200 |
+
|
201 |
+
question_types = defaultdict(list)
|
202 |
+
for question in topic_questions:
|
203 |
+
doc = self.nlp(question)
|
204 |
+
|
205 |
+
# Categorize questions
|
206 |
+
if any(token.text.lower() in {"what", "define", "explain"} for token in doc):
|
207 |
+
question_types["conceptual"].append(question)
|
208 |
+
elif any(token.text.lower() in {"how", "steps", "process"} for token in doc):
|
209 |
+
question_types["procedural"].append(question)
|
210 |
+
elif any(token.text.lower() in {"why", "reason", "because"} for token in doc):
|
211 |
+
question_types["reasoning"].append(question)
|
212 |
+
else:
|
213 |
+
question_types["other"].append(question)
|
214 |
+
|
215 |
+
# Extract patterns from each category
|
216 |
+
patterns = []
|
217 |
+
for category, questions in question_types.items():
|
218 |
+
if questions:
|
219 |
+
vectorizer = TfidfVectorizer(ngram_range=(1, 3))
|
220 |
+
tfidf_matrix = vectorizer.fit_transform(questions)
|
221 |
+
|
222 |
+
# Get most representative questions
|
223 |
+
feature_array = np.mean(tfidf_matrix.toarray(), axis=0)
|
224 |
+
tfidf_sorting = np.argsort(feature_array)[::-1]
|
225 |
+
features = vectorizer.get_feature_names_out()
|
226 |
+
|
227 |
+
patterns.append(f"{category}: {' '.join(features[tfidf_sorting[:3]])}")
|
228 |
+
|
229 |
+
return patterns
|
230 |
+
|
231 |
+
def analyze_time_distribution(self, df: pd.DataFrame, topic: str) -> Dict[str, float]:
|
232 |
+
"""Analyze time spent on different aspects of the topic."""
|
233 |
+
topic_msgs = df[df['prompt'].str.contains(topic, case=False)].copy()
|
234 |
+
if len(topic_msgs) < 2:
|
235 |
+
return {}
|
236 |
+
|
237 |
+
topic_msgs['time_diff'] = topic_msgs['timestamp'].diff()
|
238 |
+
|
239 |
+
# Calculate time distribution
|
240 |
+
distribution = {
|
241 |
+
'total_time': topic_msgs['time_diff'].sum().total_seconds() / 60,
|
242 |
+
'avg_time_per_message': topic_msgs['time_diff'].mean().total_seconds() / 60,
|
243 |
+
'max_time_gap': topic_msgs['time_diff'].max().total_seconds() / 60,
|
244 |
+
'time_spent_on_questions': topic_msgs[topic_msgs['is_question']]['time_diff'].sum().total_seconds() / 60,
|
245 |
+
'time_spent_on_confusion': topic_msgs[topic_msgs['shows_confusion']]['time_diff'].sum().total_seconds() / 60
|
246 |
+
}
|
247 |
+
|
248 |
+
return distribution
|
249 |
+
|
250 |
+
def calculate_engagement_metrics(self, df: pd.DataFrame, topic: str) -> Dict[str, float]:
|
251 |
+
"""Calculate student engagement metrics for the topic."""
|
252 |
+
topic_msgs = df[df['prompt'].str.contains(topic, case=False)]
|
253 |
+
|
254 |
+
metrics = {
|
255 |
+
'message_count': len(topic_msgs),
|
256 |
+
'question_ratio': topic_msgs['is_question'].mean(),
|
257 |
+
'confusion_ratio': topic_msgs['shows_confusion'].mean(),
|
258 |
+
'follow_up_ratio': topic_msgs['is_followup'].mean(),
|
259 |
+
'avg_sentiment': topic_msgs['sentiment'].mean(),
|
260 |
+
'engagement_score': 0.0 # Will be calculated below
|
261 |
+
}
|
262 |
+
|
263 |
+
# Calculate engagement score
|
264 |
+
metrics['engagement_score'] = (
|
265 |
+
metrics['message_count'] * 0.3 +
|
266 |
+
metrics['question_ratio'] * 0.25 +
|
267 |
+
metrics['follow_up_ratio'] * 0.25 +
|
268 |
+
(metrics['avg_sentiment'] + 1) / 2 * 0.2 # Normalize sentiment to 0-1
|
269 |
+
)
|
270 |
+
|
271 |
+
return metrics
|
272 |
+
|
273 |
+
def generate_topic_insights(self, df: pd.DataFrame, topic: str) -> TopicInsights:
|
274 |
+
"""Generate comprehensive insights for a topic."""
|
275 |
+
difficulty = self.analyze_topic_difficulty(df, topic)
|
276 |
+
confusion_points = self.identify_confusion_patterns(df, topic)
|
277 |
+
question_patterns = self.analyze_question_patterns(df, topic)
|
278 |
+
time_distribution = self.analyze_time_distribution(df, topic)
|
279 |
+
engagement_metrics = self.calculate_engagement_metrics(df, topic)
|
280 |
+
|
281 |
+
# Generate recommended focus areas based on insights
|
282 |
+
focus_areas = []
|
283 |
+
|
284 |
+
if difficulty in (TopicDifficulty.DIFFICULT, TopicDifficulty.VERY_DIFFICULT):
|
285 |
+
focus_areas.append("Fundamental concept reinforcement needed")
|
286 |
+
|
287 |
+
if confusion_points:
|
288 |
+
focus_areas.append(f"Address common confusion around: {', '.join(confusion_points[:3])}")
|
289 |
+
|
290 |
+
if engagement_metrics['confusion_ratio'] > 0.3:
|
291 |
+
focus_areas.append("Consider alternative teaching approaches")
|
292 |
+
|
293 |
+
if time_distribution.get('time_spent_on_questions', 0) > time_distribution.get('total_time', 0) * 0.5:
|
294 |
+
focus_areas.append("More practical examples or demonstrations needed")
|
295 |
+
|
296 |
+
return TopicInsights(
|
297 |
+
difficulty_level=difficulty,
|
298 |
+
common_confusion_points=confusion_points,
|
299 |
+
question_patterns=question_patterns,
|
300 |
+
time_distribution=time_distribution,
|
301 |
+
engagement_metrics=engagement_metrics,
|
302 |
+
recommended_focus_areas=focus_areas
|
303 |
+
)
|
304 |
+
|
305 |
+
def analyze_student_progress(self, df: pd.DataFrame) -> Dict[str, Any]:
|
306 |
+
"""Analyze individual student progress and learning patterns."""
|
307 |
+
student_progress = {}
|
308 |
+
|
309 |
+
for student_id in df['user_id'].unique():
|
310 |
+
student_msgs = df[df['user_id'] == student_id]
|
311 |
+
|
312 |
+
# Calculate student-specific metrics
|
313 |
+
progress = {
|
314 |
+
'total_messages': len(student_msgs),
|
315 |
+
'questions_asked': student_msgs['is_question'].sum(),
|
316 |
+
'confusion_instances': student_msgs['shows_confusion'].sum(),
|
317 |
+
'avg_sentiment': student_msgs['sentiment'].mean(),
|
318 |
+
'topic_engagement': {},
|
319 |
+
'learning_pattern': self._identify_learning_pattern(student_msgs)
|
320 |
+
}
|
321 |
+
|
322 |
+
# Analyze topic-specific engagement
|
323 |
+
topics = self.extract_topic_hierarchies(student_msgs)
|
324 |
+
for topic in topics:
|
325 |
+
topic_msgs = student_msgs[student_msgs['prompt'].str.contains(topic, case=False)]
|
326 |
+
progress['topic_engagement'][topic] = {
|
327 |
+
'message_count': len(topic_msgs),
|
328 |
+
'confusion_rate': topic_msgs['shows_confusion'].mean(),
|
329 |
+
'sentiment_trend': stats.linregress(
|
330 |
+
range(len(topic_msgs)),
|
331 |
+
topic_msgs['sentiment']
|
332 |
+
).slope
|
333 |
+
}
|
334 |
+
|
335 |
+
student_progress[student_id] = progress
|
336 |
+
|
337 |
+
return student_progress
|
338 |
+
|
339 |
+
def _identify_learning_pattern(self, student_msgs: pd.DataFrame) -> str:
|
340 |
+
"""Identify student's learning pattern based on their interaction style."""
|
341 |
+
# Calculate key metrics
|
342 |
+
question_ratio = student_msgs['is_question'].mean()
|
343 |
+
confusion_ratio = student_msgs['shows_confusion'].mean()
|
344 |
+
follow_up_ratio = student_msgs['is_followup'].mean()
|
345 |
+
sentiment_trend = stats.linregress(
|
346 |
+
range(len(student_msgs)),
|
347 |
+
student_msgs['sentiment']
|
348 |
+
).slope
|
349 |
+
|
350 |
+
# Identify pattern
|
351 |
+
if question_ratio > 0.6:
|
352 |
+
return "Inquisitive Learner"
|
353 |
+
elif confusion_ratio > 0.4:
|
354 |
+
return "Needs Additional Support"
|
355 |
+
elif follow_up_ratio > 0.5:
|
356 |
+
return "Deep Dive Learner"
|
357 |
+
elif sentiment_trend > 0:
|
358 |
+
return "Progressive Learner"
|
359 |
+
else:
|
360 |
+
return "Steady Learner"
|
361 |
+
|
362 |
+
def generate_comprehensive_report(self, chat_history: List[Dict]) -> Dict[str, Any]:
|
363 |
+
"""Generate a comprehensive analytics report."""
|
364 |
+
# Preprocess chat history
|
365 |
+
df = self.preprocess_chat_history(chat_history)
|
366 |
+
|
367 |
+
# Extract topics
|
368 |
+
topics = self.extract_topic_hierarchies(df)
|
369 |
+
|
370 |
+
report = {
|
371 |
+
'topics': {},
|
372 |
+
'student_progress': self.analyze_student_progress(df),
|
373 |
+
'overall_metrics': {
|
374 |
+
'total_conversations': len(df),
|
375 |
+
'unique_students': df['user_id'].nunique(),
|
376 |
+
'avg_sentiment': df['sentiment'].mean(),
|
377 |
+
'most_discussed_topics': Counter(
|
378 |
+
topic for topics_list in topics.values()
|
379 |
+
for topic in topics_list
|
380 |
+
).most_common(5)
|
381 |
+
}
|
382 |
+
}
|
383 |
+
|
384 |
+
# Generate topic-specific insights
|
385 |
+
for main_topic, subtopics in topics.items():
|
386 |
+
subtopic_insights = {}
|
387 |
+
for subtopic in subtopics:
|
388 |
+
subtopic_insights[subtopic] = {
|
389 |
+
'insights': self.generate_topic_insights(df, subtopic),
|
390 |
+
'related_topics': [t for t in subtopics if t != subtopic],
|
391 |
+
'student_engagement': {
|
392 |
+
student_id: self.calculate_engagement_metrics(
|
393 |
+
df[df['user_id'] == student_id],
|
394 |
+
subtopic
|
395 |
+
)
|
396 |
+
for student_id in df['user_id'].unique()
|
397 |
+
}
|
398 |
+
}
|
399 |
+
|
400 |
+
report['topics'][main_topic] = {
|
401 |
+
'insights': self.generate_topic_insights(df, main_topic),
|
402 |
+
'subtopics': subtopic_insights,
|
403 |
+
'topic_relationships': {
|
404 |
+
'hierarchy_depth': len(subtopics),
|
405 |
+
'connection_strength': self._calculate_topic_connections(df, main_topic, subtopics),
|
406 |
+
'progression_path': self._identify_topic_progression(df, main_topic, subtopics)
|
407 |
+
}
|
408 |
+
}
|
409 |
+
|
410 |
+
# Add temporal analysis
|
411 |
+
report['temporal_analysis'] = {
|
412 |
+
'daily_engagement': df.groupby(df['timestamp'].dt.date).agg({
|
413 |
+
'user_id': 'count',
|
414 |
+
'is_question': 'sum',
|
415 |
+
'shows_confusion': 'sum',
|
416 |
+
'sentiment': 'mean'
|
417 |
+
}).to_dict(),
|
418 |
+
'peak_activity_hours': df.groupby(df['timestamp'].dt.hour)['user_id'].count().nlargest(3).to_dict(),
|
419 |
+
'learning_trends': self._analyze_learning_trends(df)
|
420 |
+
}
|
421 |
+
|
422 |
+
# Add recommendations
|
423 |
+
report['recommendations'] = self._generate_recommendations(report)
|
424 |
+
|
425 |
+
return report
|
426 |
+
|
427 |
+
def _calculate_topic_connections(self, df: pd.DataFrame, main_topic: str, subtopics: List[str]) -> Dict[str, float]:
|
428 |
+
"""Calculate connection strength between topics based on co-occurrence."""
|
429 |
+
connections = {}
|
430 |
+
main_topic_msgs = df[df['prompt'].str.contains(main_topic, case=False)]
|
431 |
+
|
432 |
+
for subtopic in subtopics:
|
433 |
+
cooccurrence = df[
|
434 |
+
df['prompt'].str.contains(main_topic, case=False) &
|
435 |
+
df['prompt'].str.contains(subtopic, case=False)
|
436 |
+
].shape[0]
|
437 |
+
|
438 |
+
connection_strength = cooccurrence / len(main_topic_msgs) if len(main_topic_msgs) > 0 else 0
|
439 |
+
connections[subtopic] = connection_strength
|
440 |
+
|
441 |
+
return connections
|
442 |
+
|
443 |
+
def _identify_topic_progression(self, df: pd.DataFrame, main_topic: str, subtopics: List[str]) -> List[str]:
|
444 |
+
"""Identify optimal topic progression path based on student interactions."""
|
445 |
+
topic_difficulties = {}
|
446 |
+
|
447 |
+
for subtopic in subtopics:
|
448 |
+
difficulty = self.analyze_topic_difficulty(df, subtopic)
|
449 |
+
topic_difficulties[subtopic] = difficulty.value
|
450 |
+
|
451 |
+
# Sort subtopics by difficulty
|
452 |
+
return sorted(subtopics, key=lambda x: topic_difficulties[x])
|
453 |
+
|
454 |
+
def _analyze_learning_trends(self, df: pd.DataFrame) -> Dict[str, Any]:
|
455 |
+
"""Analyze overall learning trends across the dataset."""
|
456 |
+
return {
|
457 |
+
'sentiment_trend': stats.linregress(
|
458 |
+
range(len(df)),
|
459 |
+
df['sentiment']
|
460 |
+
)._asdict(),
|
461 |
+
'confusion_trend': stats.linregress(
|
462 |
+
range(len(df)),
|
463 |
+
df['shows_confusion']
|
464 |
+
)._asdict(),
|
465 |
+
'engagement_progression': self._calculate_engagement_progression(df)
|
466 |
+
}
|
467 |
+
|
468 |
+
def _calculate_engagement_progression(self, df: pd.DataFrame) -> Dict[str, float]:
|
469 |
+
"""Calculate how student engagement changes over time."""
|
470 |
+
df['week'] = df['timestamp'].dt.isocalendar().week
|
471 |
+
weekly_engagement = df.groupby('week').agg({
|
472 |
+
'is_question': 'mean',
|
473 |
+
'shows_confusion': 'mean',
|
474 |
+
'is_followup': 'mean',
|
475 |
+
'sentiment': 'mean'
|
476 |
+
})
|
477 |
+
|
478 |
+
return {
|
479 |
+
'question_trend': stats.linregress(
|
480 |
+
range(len(weekly_engagement)),
|
481 |
+
weekly_engagement['is_question']
|
482 |
+
).slope,
|
483 |
+
'confusion_trend': stats.linregress(
|
484 |
+
range(len(weekly_engagement)),
|
485 |
+
weekly_engagement['shows_confusion']
|
486 |
+
).slope,
|
487 |
+
'follow_up_trend': stats.linregress(
|
488 |
+
range(len(weekly_engagement)),
|
489 |
+
weekly_engagement['is_followup']
|
490 |
+
).slope,
|
491 |
+
'sentiment_trend': stats.linregress(
|
492 |
+
range(len(weekly_engagement)),
|
493 |
+
weekly_engagement['sentiment']
|
494 |
+
).slope
|
495 |
+
}
|
496 |
+
|
497 |
+
def _generate_recommendations(self, report: Dict[str, Any]) -> List[str]:
|
498 |
+
"""Generate actionable recommendations based on the analysis."""
|
499 |
+
recommendations = []
|
500 |
+
|
501 |
+
# Analyze difficulty distribution
|
502 |
+
difficult_topics = [
|
503 |
+
topic for topic, data in report['topics'].items()
|
504 |
+
if data['insights'].difficulty_level in
|
505 |
+
(TopicDifficulty.DIFFICULT, TopicDifficulty.VERY_DIFFICULT)
|
506 |
+
]
|
507 |
+
|
508 |
+
if difficult_topics:
|
509 |
+
recommendations.append(
|
510 |
+
f"Consider providing additional resources for challenging topics: {', '.join(difficult_topics)}"
|
511 |
+
)
|
512 |
+
|
513 |
+
# Analyze student engagement
|
514 |
+
avg_engagement = np.mean([
|
515 |
+
progress['questions_asked'] / progress['total_messages']
|
516 |
+
for progress in report['student_progress'].values()
|
517 |
+
])
|
518 |
+
|
519 |
+
if avg_engagement < 0.3:
|
520 |
+
recommendations.append(
|
521 |
+
"Implement more interactive elements to increase student engagement"
|
522 |
+
)
|
523 |
+
|
524 |
+
# Analyze temporal patterns
|
525 |
+
peak_hours = list(report['temporal_analysis']['peak_activity_hours'].keys())
|
526 |
+
recommendations.append(
|
527 |
+
f"Consider scheduling additional support during peak activity hours: {peak_hours}"
|
528 |
+
)
|
529 |
+
|
530 |
+
# Analyze learning trends
|
531 |
+
# sentiment_trend = report['temporal_analysis']['learning_trends']['sentiment_trend']
|
532 |
+
# if sentiment_trend < 0:
|
533 |
+
# recommendations.append(
|
534 |
+
# "Review teaching approach to address declining student satisfaction"
|
535 |
+
# )
|
536 |
+
# Analyze learning trends
|
537 |
+
# Analyze learning trends
|
538 |
+
sentiment_trend = report.get('temporal_analysis', {}).get('learning_trends', {}).get('sentiment_trend', None)
|
539 |
+
if isinstance(sentiment_trend, (int, float)):
|
540 |
+
if sentiment_trend < 0:
|
541 |
+
recommendations.append(
|
542 |
+
"Review teaching approach to address declining student satisfaction"
|
543 |
+
)
|
544 |
+
elif isinstance(sentiment_trend, dict):
|
545 |
+
# Handle the case where sentiment_trend is a dictionary
|
546 |
+
print(f"Unexpected dict format for sentiment_trend: {sentiment_trend}")
|
547 |
+
else:
|
548 |
+
print(f"Unexpected type for sentiment_trend: {type(sentiment_trend)}")
|
549 |
+
|
550 |
+
return recommendations
|
551 |
+
|
552 |
+
class CustomJSONEncoder(json.JSONEncoder):
|
553 |
+
def default(self, obj):
|
554 |
+
if isinstance(obj, TopicDifficulty):
|
555 |
+
return obj.value
|
556 |
+
if isinstance(obj, TopicInsights):
|
557 |
+
return obj.to_dict()
|
558 |
+
if isinstance(obj, np.integer):
|
559 |
+
return int(obj)
|
560 |
+
if isinstance(obj, np.floating):
|
561 |
+
return float(obj)
|
562 |
+
if isinstance(obj, np.ndarray):
|
563 |
+
return obj.tolist()
|
564 |
+
if isinstance(obj, datetime):
|
565 |
+
return obj.isoformat()
|
566 |
+
return super().default(obj)
|
567 |
+
|
568 |
+
def convert_insights_to_dict(report):
|
569 |
+
for main_topic, data in report['topics'].items():
|
570 |
+
if isinstance(data['insights'], TopicInsights):
|
571 |
+
data['insights'] = data['insights'].to_dict()
|
572 |
+
for subtopic, subdata in data['subtopics'].items():
|
573 |
+
if isinstance(subdata['insights'], TopicInsights):
|
574 |
+
subdata['insights'] = subdata['insights'].to_dict()
|
575 |
+
|
576 |
+
if __name__ == "__main__":
|
577 |
+
# Load chat history data
|
578 |
+
chat_history = None
|
579 |
+
with open('sample_files/chat_history_corpus.json', 'r', encoding="utf-8") as file:
|
580 |
+
chat_history = json.load(file)
|
581 |
+
|
582 |
+
# Initialize analytics system
|
583 |
+
analytics = PreClassAnalytics()
|
584 |
+
|
585 |
+
# Generate comprehensive report
|
586 |
+
report = analytics.generate_comprehensive_report(chat_history)
|
587 |
+
|
588 |
+
# Convert insights to dictionary
|
589 |
+
# convert_insights_to_dict(report)
|
590 |
+
|
591 |
+
print(json.dumps(report, indent=4, cls=CustomJSONEncoder))
|
592 |
# print(report)
|
requirements.txt
CHANGED
@@ -1,31 +1,36 @@
|
|
1 |
-
streamlit
|
2 |
-
pymongo
|
3 |
-
PyPDF2
|
4 |
-
python-docx
|
5 |
-
openai
|
6 |
-
google-generativeai
|
7 |
-
llama-index
|
8 |
-
werkzeug
|
9 |
-
numpy
|
10 |
-
pandas
|
11 |
-
plotly
|
12 |
-
scikit-learn
|
13 |
-
networkx
|
14 |
-
community
|
15 |
-
umap-learn
|
16 |
-
seaborn
|
17 |
-
matplotlib
|
18 |
-
scipy
|
19 |
-
Pillow
|
20 |
-
python-dotenv
|
21 |
-
zoomus
|
22 |
-
asyncio
|
23 |
-
google-auth-oauthlib
|
24 |
-
google-auth
|
25 |
-
transformers
|
26 |
-
textstat
|
27 |
-
spacy
|
28 |
-
streamlit_option_menu
|
29 |
-
beautifulsoup4
|
30 |
-
youtube-transcript-api
|
31 |
-
requests
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pymongo
|
3 |
+
PyPDF2
|
4 |
+
python-docx
|
5 |
+
openai
|
6 |
+
google-generativeai
|
7 |
+
llama-index
|
8 |
+
werkzeug
|
9 |
+
numpy
|
10 |
+
pandas
|
11 |
+
plotly
|
12 |
+
scikit-learn
|
13 |
+
networkx
|
14 |
+
community
|
15 |
+
umap-learn
|
16 |
+
seaborn
|
17 |
+
matplotlib
|
18 |
+
scipy
|
19 |
+
Pillow
|
20 |
+
python-dotenv
|
21 |
+
zoomus
|
22 |
+
asyncio
|
23 |
+
google-auth-oauthlib
|
24 |
+
google-auth
|
25 |
+
transformers
|
26 |
+
textstat
|
27 |
+
spacy
|
28 |
+
streamlit_option_menu
|
29 |
+
beautifulsoup4
|
30 |
+
youtube-transcript-api
|
31 |
+
requests
|
32 |
+
xml==0.0.1
|
33 |
+
networkx==3.1
|
34 |
+
bokeh==3.2.1
|
35 |
+
scikit-learn==1.2.2
|
36 |
+
langchain==0.0.208
|
research22.py
ADDED
@@ -0,0 +1,517 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# if __name__ == "__main__":
|
2 |
+
# main()
|
3 |
+
import streamlit as st
|
4 |
+
import google.generativeai as genai
|
5 |
+
from typing import Dict, Any
|
6 |
+
import PyPDF2
|
7 |
+
import io
|
8 |
+
from pymongo import MongoClient
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
import os
|
11 |
+
import json
|
12 |
+
import re
|
13 |
+
|
14 |
+
# --------------------------------------------------------------------------------
|
15 |
+
# 1. Environment Setup
|
16 |
+
# --------------------------------------------------------------------------------
|
17 |
+
load_dotenv()
|
18 |
+
# MongoDB
|
19 |
+
MONGODB_URI = os.getenv(
|
20 |
+
"MONGODB_UR",
|
21 |
+
"mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
|
22 |
+
)
|
23 |
+
# Gemini
|
24 |
+
GEMINI_KEY = os.getenv("GEMINI_KEY", "AIzaSyCFIvntck54HOCS5pxxiy9wpr5HJN3r02I")
|
25 |
+
|
26 |
+
# Configure Gemini
|
27 |
+
genai.configure(api_key=GEMINI_KEY)
|
28 |
+
|
29 |
+
|
30 |
+
# --------------------------------------------------------------------------------
|
31 |
+
# 2. Database Connection
|
32 |
+
# --------------------------------------------------------------------------------
|
33 |
+
def create_db_connection():
|
34 |
+
"""
|
35 |
+
Create MongoDB connection and return the 'papers' collection.
|
36 |
+
"""
|
37 |
+
try:
|
38 |
+
client = MongoClient(MONGODB_URI)
|
39 |
+
db = client["novascholar_db"] # Database name
|
40 |
+
collection = db["research_papers"] # Collection name
|
41 |
+
# Ping to confirm connection
|
42 |
+
client.admin.command("ping")
|
43 |
+
return db
|
44 |
+
except Exception as e:
|
45 |
+
st.error(f"Database connection error: {str(e)}")
|
46 |
+
return None
|
47 |
+
|
48 |
+
|
49 |
+
# --------------------------------------------------------------------------------
|
50 |
+
# 3. PDF Text Extraction
|
51 |
+
# --------------------------------------------------------------------------------
|
52 |
+
def extract_text_from_pdf(pdf_file) -> str:
|
53 |
+
"""
|
54 |
+
Extract all text from a PDF.
|
55 |
+
"""
|
56 |
+
try:
|
57 |
+
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
58 |
+
text = ""
|
59 |
+
for page in pdf_reader.pages:
|
60 |
+
text += page.extract_text() + "\n"
|
61 |
+
return text
|
62 |
+
except Exception as e:
|
63 |
+
st.error(f"Error processing PDF: {str(e)}")
|
64 |
+
return ""
|
65 |
+
|
66 |
+
|
67 |
+
# --------------------------------------------------------------------------------
|
68 |
+
# 4. Gemini Response Helper
|
69 |
+
# --------------------------------------------------------------------------------
|
70 |
+
def get_gemini_response(prompt: str) -> str:
|
71 |
+
"""
|
72 |
+
Sends a prompt to Google's Gemini model and returns the response text.
|
73 |
+
Adjust this function as needed for your generative AI usage.
|
74 |
+
"""
|
75 |
+
try:
|
76 |
+
model = genai.GenerativeModel("gemini-pro")
|
77 |
+
response = model.generate_content(prompt)
|
78 |
+
return response.text
|
79 |
+
except Exception as e:
|
80 |
+
st.error(f"Gemini API Error: {str(e)}")
|
81 |
+
return ""
|
82 |
+
|
83 |
+
|
84 |
+
# --------------------------------------------------------------------------------
|
85 |
+
# 5. Basic Info Extraction
|
86 |
+
# --------------------------------------------------------------------------------
|
87 |
+
def extract_basic_info(text: str) -> Dict[str, str]:
|
88 |
+
"""
|
89 |
+
Extract title, publication, journal/conference, abstract, keywords, author, and date from the paper text.
|
90 |
+
Return a dictionary with these fields.
|
91 |
+
"""
|
92 |
+
prompt = f"""
|
93 |
+
Extract the following fields from the research paper text below:
|
94 |
+
|
95 |
+
Title
|
96 |
+
Publication
|
97 |
+
Journal_Conference
|
98 |
+
Abstract
|
99 |
+
Keywords
|
100 |
+
Author
|
101 |
+
Date_of_Publication
|
102 |
+
|
103 |
+
Paper text:
|
104 |
+
{text}
|
105 |
+
|
106 |
+
Return them in this format:
|
107 |
+
Title: ...
|
108 |
+
Publication: ...
|
109 |
+
Journal_Conference: ...
|
110 |
+
Abstract: ...
|
111 |
+
Keywords: ...
|
112 |
+
Author: ...
|
113 |
+
Date_of_Publication: ...
|
114 |
+
"""
|
115 |
+
response = get_gemini_response(prompt)
|
116 |
+
if not response:
|
117 |
+
return {}
|
118 |
+
info = {}
|
119 |
+
lines = response.split("\n")
|
120 |
+
for line in lines:
|
121 |
+
if ":" in line:
|
122 |
+
key, value = line.split(":", 1)
|
123 |
+
info[key.strip()] = value.strip()
|
124 |
+
return info
|
125 |
+
|
126 |
+
|
127 |
+
# --------------------------------------------------------------------------------
|
128 |
+
# 6. Content Sections Extraction
|
129 |
+
# --------------------------------------------------------------------------------
|
130 |
+
def extract_content_sections(text: str) -> Dict[str, str]:
|
131 |
+
"""
|
132 |
+
Extract expanded sections: Intro, Literature_Review, Research_Models_Used,
|
133 |
+
Methodology, Discussion, Future_Scope, Theory.
|
134 |
+
"""
|
135 |
+
prompt = f"""Please extract these sections from the research paper:
|
136 |
+
1. Introduction
|
137 |
+
2. Literature Review
|
138 |
+
3. Research Models Used
|
139 |
+
4. Methodology
|
140 |
+
5. Discussion
|
141 |
+
6. Future Scope
|
142 |
+
7. Theory
|
143 |
+
|
144 |
+
Paper text: {text}
|
145 |
+
|
146 |
+
Return in this exact format without any additional text or explanations also make sure
|
147 |
+
no data should be empty (at least 10-15 words) and it should be meaningful:
|
148 |
+
Intro: <text>
|
149 |
+
Literature_Review: <text>
|
150 |
+
Research_Models_Used: <text>
|
151 |
+
Methodology: <text>
|
152 |
+
Discussion: <text>
|
153 |
+
Future_Scope: <text>
|
154 |
+
Theory: <text>
|
155 |
+
"""
|
156 |
+
response = get_gemini_response(prompt)
|
157 |
+
if not response:
|
158 |
+
return {}
|
159 |
+
sections = {}
|
160 |
+
lines = response.split("\n")
|
161 |
+
for line in lines:
|
162 |
+
if ":" in line:
|
163 |
+
key, value = line.split(":", 1)
|
164 |
+
sections[key.strip()] = value.strip()
|
165 |
+
return sections
|
166 |
+
|
167 |
+
|
168 |
+
# --------------------------------------------------------------------------------
|
169 |
+
# 7. Variables Extraction
|
170 |
+
# --------------------------------------------------------------------------------
|
171 |
+
def extract_variables(text: str) -> Dict[str, Any]:
|
172 |
+
"""
|
173 |
+
Extract variable data: Independent_Variables, nof_Independent_Variables,
|
174 |
+
Dependent_Variables, nof_Dependent_Variables, Control_Variables,
|
175 |
+
Extraneous_Variables, nof_Control_Variables, nof_Extraneous_Variables
|
176 |
+
"""
|
177 |
+
prompt = f"""From the paper text, extract the following fields:
|
178 |
+
1. Independent_Variables
|
179 |
+
2. nof_Independent_Variables
|
180 |
+
3. Dependent_Variables
|
181 |
+
4. nof_Dependent_Variables
|
182 |
+
5. Control_Variables
|
183 |
+
6. Extraneous_Variables
|
184 |
+
7. nof_Control_Variables
|
185 |
+
8. nof_Extraneous_Variables
|
186 |
+
|
187 |
+
Return them in this format:
|
188 |
+
Independent_Variables: <list>
|
189 |
+
nof_Independent_Variables: <integer>
|
190 |
+
Dependent_Variables: <list>
|
191 |
+
nof_Dependent_Variables: <integer>
|
192 |
+
Control_Variables: <list>
|
193 |
+
Extraneous_Variables: <list>
|
194 |
+
nof_Control_Variables: <integer>
|
195 |
+
nof_Extraneous_Variables: <integer>
|
196 |
+
|
197 |
+
Paper text: {text}
|
198 |
+
"""
|
199 |
+
response = get_gemini_response(prompt)
|
200 |
+
if not response:
|
201 |
+
return {}
|
202 |
+
variables = {}
|
203 |
+
lines = response.split("\n")
|
204 |
+
for line in lines:
|
205 |
+
if ":" in line:
|
206 |
+
key, value = line.split(":", 1)
|
207 |
+
# Attempt to convert to integer where appropriate
|
208 |
+
clean_key = key.strip()
|
209 |
+
clean_value = value.strip()
|
210 |
+
if clean_key.startswith("nof_"):
|
211 |
+
try:
|
212 |
+
variables[clean_key] = int(clean_value)
|
213 |
+
except ValueError:
|
214 |
+
# fallback if it's not an integer
|
215 |
+
variables[clean_key] = 0
|
216 |
+
else:
|
217 |
+
variables[clean_key] = clean_value
|
218 |
+
return variables
|
219 |
+
|
220 |
+
|
221 |
+
# --------------------------------------------------------------------------------
|
222 |
+
# 8. Utility to ensure no empty fields (example logic)
|
223 |
+
# --------------------------------------------------------------------------------
|
224 |
+
def ensure_non_empty_values(data: Dict[str, Any], fallback_text: str) -> Dict[str, Any]:
|
225 |
+
"""
|
226 |
+
Ensure each extracted field has meaningful content. If empty, fill with default text.
|
227 |
+
"""
|
228 |
+
for k, v in data.items():
|
229 |
+
if not v or len(str(v).split()) < 3: # example check for minimal words
|
230 |
+
data[k] = f"No sufficient data found for {k}. Could not parse."
|
231 |
+
return data
|
232 |
+
|
233 |
+
|
234 |
+
# --------------------------------------------------------------------------------
|
235 |
+
# 9. Processing the Paper
|
236 |
+
# --------------------------------------------------------------------------------
|
237 |
+
# def process_paper(text: str) -> Dict[str, Any]:
|
238 |
+
# """
|
239 |
+
# Orchestrate calls to extract basic info, content sections, and variables.
|
240 |
+
# Return a dictionary containing all the fields with consistent naming.
|
241 |
+
# """
|
242 |
+
# with st.spinner("Extracting basic information..."):
|
243 |
+
# basic_info = extract_basic_info(text)
|
244 |
+
# basic_info = ensure_non_empty_values(basic_info, text)
|
245 |
+
|
246 |
+
# with st.spinner("Extracting content sections..."):
|
247 |
+
# content_sections = extract_content_sections(text)
|
248 |
+
# content_sections = ensure_non_empty_values(content_sections, text)
|
249 |
+
|
250 |
+
# with st.spinner("Extracting variables..."):
|
251 |
+
# variables_info = extract_variables(text)
|
252 |
+
# variables_info = ensure_non_empty_values(variables_info, text)
|
253 |
+
|
254 |
+
# # Create a single dictionary with all fields
|
255 |
+
# paper_doc = {
|
256 |
+
# "Title": basic_info.get("Title", ""),
|
257 |
+
# "Publication": basic_info.get("Publication", ""),
|
258 |
+
# "Journal_Conference": basic_info.get("Journal_Conference", ""),
|
259 |
+
# "Abstract": basic_info.get("Abstract", ""),
|
260 |
+
# "Keywords": basic_info.get("Keywords", ""),
|
261 |
+
# "Author": basic_info.get("Author", ""),
|
262 |
+
# "Date_of_Publication": basic_info.get("Date_of_Publication", ""),
|
263 |
+
# "Intro": content_sections.get("Intro", ""),
|
264 |
+
# "Literature_Review": content_sections.get("Literature_Review", ""),
|
265 |
+
# "Research_Models_Used": content_sections.get("Research_Models_Used", ""),
|
266 |
+
# "Methodology": content_sections.get("Methodology", ""),
|
267 |
+
# "Discussion": content_sections.get("Discussion", ""),
|
268 |
+
# "Future_Scope": content_sections.get("Future_Scope", ""),
|
269 |
+
# "Theory": content_sections.get("Theory", ""),
|
270 |
+
# "Independent_Variables": variables_info.get("Independent_Variables", ""),
|
271 |
+
# "nof_Independent_Variables": variables_info.get("nof_Independent_Variables", 0),
|
272 |
+
# "Dependent_Variables": variables_info.get("Dependent_Variables", ""),
|
273 |
+
# "nof_Dependent_Variables": variables_info.get("nof_Dependent_Variables", 0),
|
274 |
+
# "Control_Variables": variables_info.get("Control_Variables", ""),
|
275 |
+
# "Extraneous_Variables": variables_info.get("Extraneous_Variables", ""),
|
276 |
+
# "nof_Control_Variables": variables_info.get("nof_Control_Variables", 0),
|
277 |
+
# "nof_Extraneous_Variables": variables_info.get("nof_Extraneous_Variables", 0),
|
278 |
+
# }
|
279 |
+
|
280 |
+
# return paper_doc
|
281 |
+
|
282 |
+
# filepath: /c:/Users/acer/OneDrive/Documents/GitHub/res-cor/research22.py
|
283 |
+
# ...existing code continues...
|
284 |
+
|
285 |
+
# --------------------------------------------------------------------------------
|
286 |
+
# 3. Paper Type Attributes
|
287 |
+
# --------------------------------------------------------------------------------
|
288 |
+
PAPER_TYPE_ATTRIBUTES = {
|
289 |
+
"Review Based Paper": [
|
290 |
+
"Title",
|
291 |
+
"Publication",
|
292 |
+
"Journal_Conference",
|
293 |
+
"Abstract",
|
294 |
+
"Keywords",
|
295 |
+
"Author",
|
296 |
+
"Date_of_Publication",
|
297 |
+
"Intro",
|
298 |
+
"Literature_Review",
|
299 |
+
"Body",
|
300 |
+
"Protocol",
|
301 |
+
"Search String",
|
302 |
+
"Included Studies",
|
303 |
+
"Data Collection and Analysis Methods",
|
304 |
+
"Data Extraction Table",
|
305 |
+
"Synthesis and Analysis",
|
306 |
+
"Conclusion",
|
307 |
+
"Limitations",
|
308 |
+
"Results",
|
309 |
+
"References",
|
310 |
+
"Risk of Bias Assessment",
|
311 |
+
],
|
312 |
+
"Opinion/Perspective Based Paper": [
|
313 |
+
"Title",
|
314 |
+
"Publication",
|
315 |
+
"Journal_Conference",
|
316 |
+
"Abstract",
|
317 |
+
"Keywords",
|
318 |
+
"Author",
|
319 |
+
"Date_of_Publication",
|
320 |
+
"Intro",
|
321 |
+
"Literature_Review",
|
322 |
+
"Introduction",
|
323 |
+
"Body",
|
324 |
+
"Results and Discussion",
|
325 |
+
"Conclusion",
|
326 |
+
"References",
|
327 |
+
],
|
328 |
+
"Empirical Research Paper": [
|
329 |
+
"Title",
|
330 |
+
"Publication",
|
331 |
+
"Journal_Conference",
|
332 |
+
"Abstract",
|
333 |
+
"Keywords",
|
334 |
+
"Author",
|
335 |
+
"Date_of_Publication",
|
336 |
+
"Intro",
|
337 |
+
"Literature_Review",
|
338 |
+
"Introduction",
|
339 |
+
"Body",
|
340 |
+
"Methodology",
|
341 |
+
"Participants",
|
342 |
+
"Survey Instrument",
|
343 |
+
"Data Collection",
|
344 |
+
"Data Analysis",
|
345 |
+
"Results and Discussion",
|
346 |
+
"Conclusion",
|
347 |
+
"References",
|
348 |
+
],
|
349 |
+
"Research Paper (Other)": [
|
350 |
+
"Title",
|
351 |
+
"Publication",
|
352 |
+
"Journal_Conference",
|
353 |
+
"Abstract",
|
354 |
+
"Keywords",
|
355 |
+
"Author",
|
356 |
+
"Date_of_Publication",
|
357 |
+
"Intro",
|
358 |
+
"Literature_Review",
|
359 |
+
"Research_Models_Used",
|
360 |
+
"Methodology",
|
361 |
+
"Discussion",
|
362 |
+
"Future_Scope",
|
363 |
+
"Theory",
|
364 |
+
"Independent_Variables",
|
365 |
+
"nof_Independent_Variables",
|
366 |
+
"Dependent_Variables",
|
367 |
+
"nof_Dependent_Variables",
|
368 |
+
"Control_Variables",
|
369 |
+
"Extraneous_Variables",
|
370 |
+
"nof_Control_Variables",
|
371 |
+
"nof_Extraneous_Variables",
|
372 |
+
],
|
373 |
+
}
|
374 |
+
|
375 |
+
|
376 |
+
# --------------------------------------------------------------------------------
|
377 |
+
# 4. Extract Paper Fields
|
378 |
+
# --------------------------------------------------------------------------------
|
379 |
+
def extract_paper_fields(text: str, paper_type: str) -> Dict[str, Any]:
|
380 |
+
"""
|
381 |
+
Use Gemini to extract fields based on the paper type attributes,
|
382 |
+
then return a dictionary of extracted fields.
|
383 |
+
"""
|
384 |
+
if paper_type not in PAPER_TYPE_ATTRIBUTES:
|
385 |
+
st.error("Invalid paper type selected.")
|
386 |
+
return {}
|
387 |
+
|
388 |
+
selected_attrs = PAPER_TYPE_ATTRIBUTES[paper_type]
|
389 |
+
prompt = f"""
|
390 |
+
Extract the following fields from the research paper text below:
|
391 |
+
|
392 |
+
{", ".join(selected_attrs)}
|
393 |
+
|
394 |
+
Paper text:
|
395 |
+
{text}
|
396 |
+
|
397 |
+
Return them in this JSON format strictly, with no extra text:
|
398 |
+
[
|
399 |
+
{{
|
400 |
+
{", ".join([f'"{attr}": "value"' for attr in selected_attrs])}
|
401 |
+
}}
|
402 |
+
]
|
403 |
+
"""
|
404 |
+
|
405 |
+
try:
|
406 |
+
response = get_gemini_response(prompt)
|
407 |
+
if not response:
|
408 |
+
st.error("No response from Gemini.")
|
409 |
+
return {}
|
410 |
+
|
411 |
+
# Clean up any text around JSON
|
412 |
+
# Clean up any text around JSON
|
413 |
+
raw_text = response.strip()
|
414 |
+
|
415 |
+
# Find start and end of JSON
|
416 |
+
json_start = raw_text.find("[")
|
417 |
+
json_end = raw_text.rfind("]") + 1
|
418 |
+
json_str = raw_text[json_start:json_end]
|
419 |
+
|
420 |
+
# Try removing trailing commas, extra quotes, etc.
|
421 |
+
json_str = re.sub(r",\s*}", "}", json_str)
|
422 |
+
json_str = re.sub(r",\s*\]", "]", json_str)
|
423 |
+
|
424 |
+
try:
|
425 |
+
data = json.loads(json_str)
|
426 |
+
except json.JSONDecodeError as e:
|
427 |
+
st.warning(f"Fixing JSON errors: {str(e)}")
|
428 |
+
# As a last-resort attempt, remove anything after the last curly bracket
|
429 |
+
bracket_pos = json_str.rfind("}")
|
430 |
+
if bracket_pos != -1:
|
431 |
+
json_str = json_str[: bracket_pos + 1]
|
432 |
+
# Try again
|
433 |
+
data = json.loads(json_str)
|
434 |
+
|
435 |
+
if isinstance(data, list) and len(data) > 0:
|
436 |
+
return data[0]
|
437 |
+
else:
|
438 |
+
st.error("Gemini did not return a valid JSON array.")
|
439 |
+
return {}
|
440 |
+
except Exception as e:
|
441 |
+
st.error(f"Error in Gemini extraction: {str(e)}")
|
442 |
+
return {}
|
443 |
+
|
444 |
+
|
445 |
+
# --------------------------------------------------------------------------------
|
446 |
+
# 5. Process Paper and Save
|
447 |
+
# --------------------------------------------------------------------------------
|
448 |
+
def process_paper(text: str, paper_type: str):
|
449 |
+
"""
|
450 |
+
Extract paper fields based on paper type, then save to
|
451 |
+
the corresponding MongoDB collection.
|
452 |
+
"""
|
453 |
+
db = create_db_connection()
|
454 |
+
if not db:
|
455 |
+
return
|
456 |
+
|
457 |
+
# Determine collection name
|
458 |
+
collection_name = paper_type.replace(" ", "_").lower()
|
459 |
+
collection = db[collection_name]
|
460 |
+
|
461 |
+
# Extract fields
|
462 |
+
extracted_data = extract_paper_fields(text, paper_type)
|
463 |
+
if extracted_data:
|
464 |
+
# Insert into MongoDB
|
465 |
+
collection.insert_one(extracted_data)
|
466 |
+
return extracted_data
|
467 |
+
return {}
|
468 |
+
|
469 |
+
|
470 |
+
# --------------------------------------------------------------------------------
|
471 |
+
# 6. Streamlit UI for Paper Extraction
|
472 |
+
# --------------------------------------------------------------------------------
|
473 |
+
def main():
|
474 |
+
# st.set_page_config(page_title="Extract Research Paper", layout="wide")
|
475 |
+
st.title("Extract Research Paper")
|
476 |
+
|
477 |
+
paper_type = st.selectbox(
|
478 |
+
"Select type of research paper:",
|
479 |
+
[
|
480 |
+
"Review Based Paper",
|
481 |
+
"Opinion/Perspective Based Paper",
|
482 |
+
"Empirical Research Paper",
|
483 |
+
"Research Paper (Other)",
|
484 |
+
],
|
485 |
+
)
|
486 |
+
|
487 |
+
uploaded_file = st.file_uploader("Upload a PDF or text file", type=["pdf", "txt"])
|
488 |
+
|
489 |
+
if st.button("Extract & Save") and uploaded_file:
|
490 |
+
try:
|
491 |
+
# Read file content
|
492 |
+
if uploaded_file.type == "application/pdf":
|
493 |
+
pdf_reader = PyPDF2.PdfReader(uploaded_file)
|
494 |
+
text_content = ""
|
495 |
+
for page in pdf_reader.pages:
|
496 |
+
text_content += page.extract_text()
|
497 |
+
else:
|
498 |
+
text_content = uploaded_file.read().decode("utf-8", errors="replace")
|
499 |
+
|
500 |
+
with st.spinner("Extracting fields..."):
|
501 |
+
data = process_paper(text_content, paper_type)
|
502 |
+
|
503 |
+
if data:
|
504 |
+
st.success(
|
505 |
+
f"Paper extracted and saved to MongoDB in '{paper_type}' collection!"
|
506 |
+
)
|
507 |
+
st.write("Extracted fields:")
|
508 |
+
st.json(data)
|
509 |
+
|
510 |
+
except Exception as e:
|
511 |
+
st.error(f"An error occurred: {str(e)}")
|
512 |
+
|
513 |
+
|
514 |
+
# ...existing code (if any)...
|
515 |
+
|
516 |
+
if __name__ == "__main__":
|
517 |
+
main()
|
research3.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import requests
|
4 |
+
import json
|
5 |
+
import os
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
|
8 |
+
# Load environment variables
|
9 |
+
load_dotenv()
|
10 |
+
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
|
11 |
+
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
|
12 |
+
|
13 |
+
|
14 |
+
def call_perplexity_api(prompt: str) -> str:
|
15 |
+
"""Call Perplexity AI with a prompt, return the text response if successful."""
|
16 |
+
headers = {
|
17 |
+
"Authorization": f"Bearer {PERPLEXITY_API_KEY}",
|
18 |
+
"Content-Type": "application/json",
|
19 |
+
}
|
20 |
+
payload = {
|
21 |
+
"model": "llama-3.1-sonar-small-128k-chat",
|
22 |
+
"messages": [{"role": "user", "content": prompt}],
|
23 |
+
"temperature": 0.3,
|
24 |
+
}
|
25 |
+
|
26 |
+
try:
|
27 |
+
response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
|
28 |
+
response.raise_for_status()
|
29 |
+
return response.json()["choices"][0]["message"]["content"]
|
30 |
+
except Exception as e:
|
31 |
+
st.error(f"API Error: {str(e)}")
|
32 |
+
return ""
|
33 |
+
|
34 |
+
|
35 |
+
def generate_research_paper(df: pd.DataFrame, topic: str) -> dict:
|
36 |
+
"""
|
37 |
+
For each column in the DataFrame, generate a research paper section (200-500 words)
|
38 |
+
that addresses the data in that column on the given topic. Return a dict: column -> text.
|
39 |
+
"""
|
40 |
+
paper_sections = {}
|
41 |
+
for col in df.columns:
|
42 |
+
# Convert all non-null rows in the column to strings and join them for context
|
43 |
+
col_values = df[col].dropna().astype(str).tolist()
|
44 |
+
# We'll truncate if there's a ton of text
|
45 |
+
sample_text = " | ".join(col_values[:50]) # limit to first 50 rows for brevity
|
46 |
+
|
47 |
+
prompt = f"""
|
48 |
+
Topic: {topic}
|
49 |
+
Column: {col}
|
50 |
+
Data Samples: {sample_text}
|
51 |
+
|
52 |
+
Generate a well-structured research paper section that addresses the topic above,
|
53 |
+
referencing relevant information from the column data.
|
54 |
+
The section should be at least 100 words and at most 150 words.
|
55 |
+
Provide insights, examples, and possible research directions integrating the corpus data.
|
56 |
+
"""
|
57 |
+
section_text = call_perplexity_api(prompt)
|
58 |
+
paper_sections[col] = section_text.strip() if section_text else ""
|
59 |
+
return paper_sections
|
60 |
+
|
61 |
+
|
62 |
+
def format_paper(paper_dict: dict, topic: str) -> str:
|
63 |
+
"""
|
64 |
+
Format the generated paper into a Markdown string.
|
65 |
+
Add the topic as the main title, each column name as a heading, and
|
66 |
+
the corresponding text as paragraph content.
|
67 |
+
"""
|
68 |
+
md_text = f"# Research Paper on: {topic}\n\n"
|
69 |
+
for col, content in paper_dict.items():
|
70 |
+
md_text += f"## {col}\n{content}\n\n"
|
71 |
+
return md_text
|
72 |
+
|
73 |
+
|
74 |
+
def main():
|
75 |
+
st.title("Topic + Corpus-Based Research Paper Generator")
|
76 |
+
|
77 |
+
topic_input = st.text_input("Enter the topic for the research paper:")
|
78 |
+
uploaded_file = st.file_uploader("Upload CSV corpus file", type="csv")
|
79 |
+
|
80 |
+
if uploaded_file:
|
81 |
+
df = pd.read_csv(uploaded_file)
|
82 |
+
st.write("### Preview of Uploaded Data")
|
83 |
+
st.dataframe(df.head())
|
84 |
+
|
85 |
+
if st.button("Generate Research Paper"):
|
86 |
+
if topic_input.strip():
|
87 |
+
st.info("Generating paper based on the topic and the corpus columns...")
|
88 |
+
with st.spinner("Calling Perplexity AI..."):
|
89 |
+
paper = generate_research_paper(df, topic_input)
|
90 |
+
if paper:
|
91 |
+
formatted_paper = format_paper(paper, topic_input)
|
92 |
+
st.success("Research Paper Generated Successfully!")
|
93 |
+
st.write(formatted_paper)
|
94 |
+
|
95 |
+
st.download_button(
|
96 |
+
label="Download Paper as Markdown",
|
97 |
+
data=formatted_paper,
|
98 |
+
file_name="research_paper.md",
|
99 |
+
mime="text/markdown",
|
100 |
+
)
|
101 |
+
else:
|
102 |
+
st.error(
|
103 |
+
"Paper generation failed. Please check Perplexity API key."
|
104 |
+
)
|
105 |
+
else:
|
106 |
+
st.warning("Please enter a valid topic.")
|
107 |
+
|
108 |
+
|
109 |
+
if __name__ == "__main__":
|
110 |
+
main()
|
research_assistant_dashboard.py
CHANGED
@@ -1,342 +1,349 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
from openai import OpenAI
|
3 |
-
import os
|
4 |
-
from dotenv import load_dotenv
|
5 |
-
from llama_index.core import (
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
)
|
11 |
-
from bson import ObjectId
|
12 |
-
import requests
|
13 |
-
import openai
|
14 |
-
import numpy as np
|
15 |
-
from pymongo import MongoClient
|
16 |
-
from bson import ObjectId
|
17 |
-
from datetime import datetime
|
18 |
-
from llama_index.embeddings.openai import OpenAIEmbedding
|
19 |
-
from typing import List, Dict
|
20 |
-
|
21 |
-
# Initialize Perplexity API and OpenAI API
|
22 |
-
load_dotenv()
|
23 |
-
perplexity_api_key = os.getenv("PERPLEXITY_KEY")
|
24 |
-
openai.api_key = os.getenv("OPENAI_KEY")
|
25 |
-
|
26 |
-
# MongoDB setup
|
27 |
-
MONGO_URI = os.getenv("MONGO_URI")
|
28 |
-
client = MongoClient(MONGO_URI)
|
29 |
-
db = client["novascholar_db"]
|
30 |
-
research_papers_collection = db["research_papers"]
|
31 |
-
|
32 |
-
|
33 |
-
def fetch_perplexity_data(api_key, topic):
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
def split_and_vectorize_papers(content: str) -> List[Dict]:
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
def store_papers_in_mongodb(papers):
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
def get_research_papers(query):
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
def analyze_research_gaps(papers):
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
def create_research_paper(gaps, topic, papers):
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
def cosine_similarity(vec1, vec2):
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
def calculate_cosine_similarity(vec1: List[float], vec2: List[float]) -> float:
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
def display_research_assistant_dashboard():
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
# Run the dashboard
|
339 |
-
if __name__ == "__main__":
|
340 |
-
|
341 |
-
|
342 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# import streamlit as st
|
2 |
+
# from openai import OpenAI
|
3 |
+
# import os
|
4 |
+
# from dotenv import load_dotenv
|
5 |
+
# from llama_index.core import (
|
6 |
+
# VectorStoreIndex,
|
7 |
+
# SimpleDirectoryReader,
|
8 |
+
# Document,
|
9 |
+
# GPTVectorStoreIndex,
|
10 |
+
# )
|
11 |
+
# from bson import ObjectId
|
12 |
+
# import requests
|
13 |
+
# import openai
|
14 |
+
# import numpy as np
|
15 |
+
# from pymongo import MongoClient
|
16 |
+
# from bson import ObjectId
|
17 |
+
# from datetime import datetime
|
18 |
+
# from llama_index.embeddings.openai import OpenAIEmbedding
|
19 |
+
# from typing import List, Dict
|
20 |
+
|
21 |
+
# # Initialize Perplexity API and OpenAI API
|
22 |
+
# load_dotenv()
|
23 |
+
# perplexity_api_key = os.getenv("PERPLEXITY_KEY")
|
24 |
+
# openai.api_key = os.getenv("OPENAI_KEY")
|
25 |
+
|
26 |
+
# # MongoDB setup
|
27 |
+
# MONGO_URI = os.getenv("MONGO_URI")
|
28 |
+
# client = MongoClient(MONGO_URI)
|
29 |
+
# db = client["novascholar_db"]
|
30 |
+
# research_papers_collection = db["research_papers"]
|
31 |
+
|
32 |
+
|
33 |
+
# def fetch_perplexity_data(api_key, topic):
|
34 |
+
# """
|
35 |
+
# Fetch research papers data from Perplexity API with proper formatting
|
36 |
+
# """
|
37 |
+
# headers = {
|
38 |
+
# "accept": "application/json",
|
39 |
+
# "content-type": "application/json",
|
40 |
+
# "authorization": f"Bearer {api_key}",
|
41 |
+
# }
|
42 |
+
|
43 |
+
# # Structured prompt to get properly formatted response
|
44 |
+
# messages = [
|
45 |
+
# {
|
46 |
+
# "role": "system",
|
47 |
+
# "content": """You are a research paper retrieval expert. For the given topic, return exactly 10 research papers in the following format:
|
48 |
+
# Title: Paper Title
|
49 |
+
# Authors: Author 1, Author 2
|
50 |
+
# Year: YYYY
|
51 |
+
# Content: Detailed paper content with abstract and key findings
|
52 |
+
# URL: DOI or paper URL
|
53 |
+
# """,
|
54 |
+
# },
|
55 |
+
# {"role": "user", "content": f"Find 10 research papers about: {topic}"},
|
56 |
+
# ]
|
57 |
+
|
58 |
+
# try:
|
59 |
+
# client = OpenAI(api_key=api_key, base_url="https://api.perplexity.ai")
|
60 |
+
# response = client.chat.completions.create(
|
61 |
+
# model="llama-3.1-sonar-small-128k-chat", # Use the best Perplexity model
|
62 |
+
# messages=messages,
|
63 |
+
# )
|
64 |
+
|
65 |
+
# # Extract and validate response
|
66 |
+
# content = response.choices[0].message.content
|
67 |
+
# st.write("Fetched Data:", content) # Debugging line to check the fetched data
|
68 |
+
|
69 |
+
# return content
|
70 |
+
|
71 |
+
# except Exception as e:
|
72 |
+
# st.error(f"Failed to fetch data from Perplexity API: {str(e)}")
|
73 |
+
# return ""
|
74 |
+
|
75 |
+
|
76 |
+
# def split_and_vectorize_papers(content: str) -> List[Dict]:
|
77 |
+
# """Split and vectorize papers using OpenAI embeddings"""
|
78 |
+
# papers = content.split("\n\n")
|
79 |
+
|
80 |
+
# # Initialize OpenAI client
|
81 |
+
# # client = OpenAI() # Uses api_key from environment variable
|
82 |
+
# vectors = []
|
83 |
+
|
84 |
+
# for paper in papers:
|
85 |
+
# try:
|
86 |
+
# # Get embedding using OpenAI's API directly
|
87 |
+
# response = openai.embeddings.create(
|
88 |
+
# model="text-embedding-ada-002", input=paper, encoding_format="float"
|
89 |
+
# )
|
90 |
+
|
91 |
+
# # Extract embedding from response
|
92 |
+
# embedding = response.data[0].embedding
|
93 |
+
|
94 |
+
# vectors.append(
|
95 |
+
# {"content": paper, "vector": embedding, "timestamp": datetime.utcnow()}
|
96 |
+
# )
|
97 |
+
|
98 |
+
# except Exception as e:
|
99 |
+
# st.error(f"Error vectorizing paper: {str(e)}")
|
100 |
+
# continue
|
101 |
+
|
102 |
+
# return vectors
|
103 |
+
|
104 |
+
|
105 |
+
# def store_papers_in_mongodb(papers):
|
106 |
+
# """Store papers with vectors in MongoDB"""
|
107 |
+
# try:
|
108 |
+
# for paper in papers:
|
109 |
+
# # Prepare MongoDB document
|
110 |
+
# mongo_doc = {
|
111 |
+
# "content": paper["content"],
|
112 |
+
# "vector": paper["vector"],
|
113 |
+
# "created_at": datetime.utcnow(),
|
114 |
+
# }
|
115 |
+
|
116 |
+
# # Insert into MongoDB
|
117 |
+
# db.papers.update_one(
|
118 |
+
# {"content": paper["content"]}, {"$set": mongo_doc}, upsert=True
|
119 |
+
# )
|
120 |
+
|
121 |
+
# st.success(f"Stored {len(papers)} papers in database")
|
122 |
+
# return True
|
123 |
+
# except Exception as e:
|
124 |
+
# st.error(f"Error storing papers: {str(e)}")
|
125 |
+
|
126 |
+
|
127 |
+
# def get_research_papers(query):
|
128 |
+
# """
|
129 |
+
# Get and store research papers with improved error handling
|
130 |
+
# """
|
131 |
+
# # Fetch papers from Perplexity
|
132 |
+
# content = fetch_perplexity_data(perplexity_api_key, query)
|
133 |
+
|
134 |
+
# if not content:
|
135 |
+
# return []
|
136 |
+
|
137 |
+
# # Split and vectorize papers
|
138 |
+
# papers = split_and_vectorize_papers(content)
|
139 |
+
|
140 |
+
# # Store papers in MongoDB
|
141 |
+
# if store_papers_in_mongodb(papers):
|
142 |
+
# return papers
|
143 |
+
# else:
|
144 |
+
# st.warning("Failed to store papers in database, but returning fetched results")
|
145 |
+
# return papers
|
146 |
+
|
147 |
+
|
148 |
+
# def analyze_research_gaps(papers):
|
149 |
+
# """
|
150 |
+
# Analyze research gaps with improved prompt and error handling
|
151 |
+
# """
|
152 |
+
# if not papers:
|
153 |
+
# return "No papers provided for analysis"
|
154 |
+
|
155 |
+
# # Prepare paper summaries for analysis
|
156 |
+
# paper_summaries = "\n\n".join(
|
157 |
+
# [
|
158 |
+
# f"Key Findings: {paper['content'][:500]}..."
|
159 |
+
# # f"Title: {paper['title']}\nYear: {paper['year']}\nKey Findings: {paper['content'][:500]}..."
|
160 |
+
# for paper in papers
|
161 |
+
# ]
|
162 |
+
# )
|
163 |
+
|
164 |
+
# headers = {
|
165 |
+
# "Authorization": f"Bearer {perplexity_api_key}",
|
166 |
+
# "Content-Type": "application/json",
|
167 |
+
# }
|
168 |
+
|
169 |
+
# data = {
|
170 |
+
# "messages": [
|
171 |
+
# {
|
172 |
+
# "role": "system",
|
173 |
+
# "content": "You are a research analysis expert. Identify specific research gaps and future research directions based on the provided papers. Format your response with clear sections: Current State, Identified Gaps, and Future Directions.",
|
174 |
+
# },
|
175 |
+
# {
|
176 |
+
# "role": "user",
|
177 |
+
# "content": f"Analyze these papers and identify research gaps:\n\n{paper_summaries}",
|
178 |
+
# },
|
179 |
+
# ]
|
180 |
+
# }
|
181 |
+
|
182 |
+
# try:
|
183 |
+
# client = OpenAI(
|
184 |
+
# api_key=perplexity_api_key, base_url="https://api.perplexity.ai"
|
185 |
+
# )
|
186 |
+
# response = client.chat.completions.create(
|
187 |
+
# model="llama-3.1-sonar-small-128k-chat", # Use the best Perplexity model
|
188 |
+
# messages=data["messages"],
|
189 |
+
# )
|
190 |
+
# return response.choices[0].message.content
|
191 |
+
|
192 |
+
# except Exception as e:
|
193 |
+
# st.error(f"Failed to analyze research gaps: {str(e)}")
|
194 |
+
# return "Error analyzing research gaps"
|
195 |
+
|
196 |
+
|
197 |
+
# def create_research_paper(gaps, topic, papers):
|
198 |
+
# """
|
199 |
+
# Create a research paper that addresses the identified gaps using Perplexity API
|
200 |
+
# """
|
201 |
+
# full_texts = "\n\n".join([paper["content"] for paper in papers])
|
202 |
+
# headers = {
|
203 |
+
# "Authorization": f"Bearer {perplexity_api_key}",
|
204 |
+
# "Content-Type": "application/json",
|
205 |
+
# }
|
206 |
+
# data = {
|
207 |
+
# "messages": [
|
208 |
+
# {
|
209 |
+
# "role": "system",
|
210 |
+
# "content": "You are a research paper generation expert. Create a comprehensive research paper that addresses the identified gaps based on the provided papers. Format your response with clear sections: Introduction, Literature Review, Methodology, Results, Discussion, Conclusion, and References.",
|
211 |
+
# },
|
212 |
+
# {
|
213 |
+
# "role": "user",
|
214 |
+
# "content": f"Create a research paper on the topic '{topic}' that addresses the following research gaps:\n\n{gaps}\n\nBased on the following papers:\n\n{full_texts}",
|
215 |
+
# },
|
216 |
+
# ]
|
217 |
+
# }
|
218 |
+
# try:
|
219 |
+
# client = OpenAI(
|
220 |
+
# api_key=perplexity_api_key, base_url="https://api.perplexity.ai"
|
221 |
+
# )
|
222 |
+
# response = client.chat.completions.create(
|
223 |
+
# model="llama-3.1-sonar-small-128k-chat", # Use the best Perplexity model
|
224 |
+
# messages=data["messages"],
|
225 |
+
# )
|
226 |
+
# return response.choices[0].message.content
|
227 |
+
|
228 |
+
# except Exception as e:
|
229 |
+
# st.error(f"Failed to create research paper: {str(e)}")
|
230 |
+
# return "Error creating research paper"
|
231 |
+
|
232 |
+
|
233 |
+
# def cosine_similarity(vec1, vec2):
|
234 |
+
# """Calculate the cosine similarity between two vectors"""
|
235 |
+
# vec1 = np.array(vec1)
|
236 |
+
# vec2 = np.array(vec2)
|
237 |
+
# return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
|
238 |
+
|
239 |
+
|
240 |
+
# def calculate_cosine_similarity(vec1: List[float], vec2: List[float]) -> float:
|
241 |
+
# """Calculate cosine similarity between two vectors"""
|
242 |
+
# return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
|
243 |
+
|
244 |
+
|
245 |
+
# def display_research_assistant_dashboard():
|
246 |
+
# """Display research assistant dashboard"""
|
247 |
+
# # Initialize session state for recommendations
|
248 |
+
# if "recommendations" not in st.session_state:
|
249 |
+
# st.session_state.recommendations = None
|
250 |
+
# if "vectors" not in st.session_state:
|
251 |
+
# st.session_state.vectors = None
|
252 |
+
# if "generated_paper" not in st.session_state:
|
253 |
+
# st.session_state.generated_paper = None
|
254 |
+
|
255 |
+
# # Sidebar
|
256 |
+
# with st.sidebar:
|
257 |
+
# st.title(f"Welcome, {st.session_state.username}")
|
258 |
+
# if st.button("Logout", use_container_width=True):
|
259 |
+
# for key in st.session_state.keys():
|
260 |
+
# del st.session_state[key]
|
261 |
+
# st.rerun()
|
262 |
+
|
263 |
+
# # Main content
|
264 |
+
# st.title("Research Paper Recommendations")
|
265 |
+
# search_query = st.text_input("Enter research topic:")
|
266 |
+
# col1, col2 = st.columns(2)
|
267 |
+
# with col1:
|
268 |
+
# if st.button("Get Research Papers"):
|
269 |
+
# if search_query:
|
270 |
+
# with st.spinner("Fetching recommendations..."):
|
271 |
+
# st.session_state.recommendations = get_research_papers(search_query)
|
272 |
+
# st.session_state.vectors = [
|
273 |
+
# paper["vector"] for paper in st.session_state.recommendations
|
274 |
+
# ]
|
275 |
+
# st.markdown(
|
276 |
+
# "\n\n".join(
|
277 |
+
# [
|
278 |
+
# f"**{i+1}.**\n{paper['content']}"
|
279 |
+
# # f"**{i+1}. {paper['title']}**\n{paper['content']}"
|
280 |
+
# for i, paper in enumerate(
|
281 |
+
# st.session_state.recommendations
|
282 |
+
# )
|
283 |
+
# ]
|
284 |
+
# )
|
285 |
+
# )
|
286 |
+
# else:
|
287 |
+
# st.warning("Please enter a search query")
|
288 |
+
# with col2:
|
289 |
+
# if st.button("Analyze Research Gaps"):
|
290 |
+
# if st.session_state.recommendations:
|
291 |
+
# with st.spinner("Analyzing research gaps..."):
|
292 |
+
# gaps = analyze_research_gaps(st.session_state.recommendations)
|
293 |
+
# st.session_state.generated_paper = create_research_paper(
|
294 |
+
# gaps, search_query, st.session_state.recommendations
|
295 |
+
# )
|
296 |
+
# st.markdown("### Potential Research Gaps")
|
297 |
+
# st.markdown(gaps)
|
298 |
+
# else:
|
299 |
+
# st.warning("Please get research papers first")
|
300 |
+
|
301 |
+
# if st.button("Save and Vectorize"):
|
302 |
+
# if st.session_state.generated_paper:
|
303 |
+
# try:
|
304 |
+
# # Initialize OpenAI client
|
305 |
+
|
306 |
+
# # Get embedding for generated paper
|
307 |
+
# response = openai.embeddings.create(
|
308 |
+
# model="text-embedding-ada-002",
|
309 |
+
# input=st.session_state.generated_paper,
|
310 |
+
# encoding_format="float",
|
311 |
+
# )
|
312 |
+
# generated_vector = response.data[0].embedding
|
313 |
+
|
314 |
+
# # Calculate similarities with stored vectors
|
315 |
+
# similarities = [
|
316 |
+
# calculate_cosine_similarity(generated_vector, paper_vector)
|
317 |
+
# for paper_vector in st.session_state.vectors
|
318 |
+
# ]
|
319 |
+
|
320 |
+
# # Display results
|
321 |
+
# st.markdown("### Generated Research Paper")
|
322 |
+
# st.markdown(st.session_state.generated_paper)
|
323 |
+
|
324 |
+
# st.markdown("### Cosine Similarities with Original Papers")
|
325 |
+
# for i, similarity in enumerate(similarities):
|
326 |
+
# st.metric(
|
327 |
+
# f"Paper {i+1}",
|
328 |
+
# value=f"{similarity:.3f}",
|
329 |
+
# help="Cosine similarity (1.0 = identical, 0.0 = completely different)",
|
330 |
+
# )
|
331 |
+
|
332 |
+
# except Exception as e:
|
333 |
+
# st.error(f"Error during vectorization: {str(e)}")
|
334 |
+
# else:
|
335 |
+
# st.warning("Please analyze research gaps first")
|
336 |
+
|
337 |
+
|
338 |
+
# # Run the dashboard
|
339 |
+
# if __name__ == "__main__":
|
340 |
+
# display_research_assistant_dashboard()
|
341 |
+
|
342 |
+
import research_combine2
|
343 |
+
# if __name__ == "__main__":
|
344 |
+
# display_research_assistant_dashboard()
|
345 |
+
def display_research_assistant_dashboard():
|
346 |
+
research_combine2.display_research_assistant_dashboard()
|
347 |
+
|
348 |
+
|
349 |
+
|
research_combine.py
ADDED
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import os
|
3 |
+
import json
|
4 |
+
import requests
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
from pymongo import MongoClient
|
7 |
+
from typing import Dict, Any
|
8 |
+
|
9 |
+
# Load environment variables
|
10 |
+
load_dotenv()
|
11 |
+
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
|
12 |
+
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
|
13 |
+
MONGODB_URI = os.getenv(
|
14 |
+
"MONGODB_UR",
|
15 |
+
"mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
|
16 |
+
)
|
17 |
+
|
18 |
+
# MongoDB setup
|
19 |
+
client = MongoClient(MONGODB_URI)
|
20 |
+
db = client["novascholar_db"]
|
21 |
+
collection = db["research_papers"]
|
22 |
+
|
23 |
+
|
24 |
+
def search_papers(topic: str, num_papers: int) -> str:
|
25 |
+
headers = {
|
26 |
+
"Authorization": f"Bearer {PERPLEXITY_API_KEY}",
|
27 |
+
"Content-Type": "application/json",
|
28 |
+
}
|
29 |
+
|
30 |
+
prompt = f"""Find {num_papers} recent research papers about {topic}.
|
31 |
+
Return ONLY a valid JSON array with the following structure for each paper, no additional text:
|
32 |
+
[
|
33 |
+
{{
|
34 |
+
"Title": "paper title",
|
35 |
+
"Publication": "publication name",
|
36 |
+
"Journal_Conference": "venue name",
|
37 |
+
"Abstract": "abstract text",
|
38 |
+
"Keywords": "key terms",
|
39 |
+
"Author": "author names",
|
40 |
+
"Date_of_Publication": "publication date",
|
41 |
+
"Intro": "introduction summary",
|
42 |
+
"Literature_Review": "literature review summary",
|
43 |
+
"Research_Models_Used": "models description",
|
44 |
+
"Methodology": "methodology description",
|
45 |
+
"Discussion": "discussion summary",
|
46 |
+
"Future_Scope": "future work",
|
47 |
+
"Theory": "theoretical framework",
|
48 |
+
"Independent_Variables": "list of variables",
|
49 |
+
"nof_Independent_Variables": 0,
|
50 |
+
"Dependent_Variables": "list of variables",
|
51 |
+
"nof_Dependent_Variables": 0,
|
52 |
+
"Control_Variables": "list of variables",
|
53 |
+
"nof_Control_Variables": 0,
|
54 |
+
"Extraneous_Variables": "list of variables",
|
55 |
+
"nof_Extraneous_Variables": 0
|
56 |
+
}}
|
57 |
+
]"""
|
58 |
+
|
59 |
+
payload = {
|
60 |
+
"model": "llama-3.1-sonar-small-128k-chat",
|
61 |
+
"messages": [
|
62 |
+
{
|
63 |
+
"role": "system",
|
64 |
+
"content": "You are a research paper analyzer that returns only valid JSON arrays.",
|
65 |
+
},
|
66 |
+
{"role": "user", "content": prompt},
|
67 |
+
],
|
68 |
+
"temperature": 0.1,
|
69 |
+
}
|
70 |
+
|
71 |
+
try:
|
72 |
+
response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
|
73 |
+
response.raise_for_status()
|
74 |
+
content = response.json()["choices"][0]["message"]["content"]
|
75 |
+
|
76 |
+
# Clean response and ensure it's valid JSON
|
77 |
+
content = content.strip()
|
78 |
+
if not content.startswith("["):
|
79 |
+
content = content[content.find("[") :]
|
80 |
+
if not content.endswith("]"):
|
81 |
+
content = content[: content.rfind("]") + 1]
|
82 |
+
|
83 |
+
# Validate JSON
|
84 |
+
papers = json.loads(content)
|
85 |
+
if not isinstance(papers, list):
|
86 |
+
raise ValueError("Response is not a JSON array")
|
87 |
+
|
88 |
+
# Insert into MongoDB
|
89 |
+
if papers:
|
90 |
+
collection.insert_many(papers)
|
91 |
+
return content
|
92 |
+
return "[]"
|
93 |
+
|
94 |
+
except json.JSONDecodeError as e:
|
95 |
+
st.error(f"Invalid JSON response: {str(e)}")
|
96 |
+
return None
|
97 |
+
except Exception as e:
|
98 |
+
st.error(f"Error: {str(e)}")
|
99 |
+
return None
|
100 |
+
|
101 |
+
|
102 |
+
import research22
|
103 |
+
import keywords_database_download
|
104 |
+
import new_keywords
|
105 |
+
import infranew
|
106 |
+
import loldude
|
107 |
+
import new_research_paper
|
108 |
+
import research3
|
109 |
+
import entire_download
|
110 |
+
|
111 |
+
|
112 |
+
def main():
|
113 |
+
st.set_page_config(page_title="Research Papers", layout="wide")
|
114 |
+
|
115 |
+
st.title("Research Papers")
|
116 |
+
|
117 |
+
# Sidebar radio
|
118 |
+
option = st.sidebar.radio(
|
119 |
+
"Select an option",
|
120 |
+
[
|
121 |
+
"Search Papers",
|
122 |
+
"Upload Paper",
|
123 |
+
"Single Keyword Search",
|
124 |
+
"Multiple Keywords Search",
|
125 |
+
"Knowledge Graph",
|
126 |
+
"Cosine Similarity",
|
127 |
+
"Paper Generator",
|
128 |
+
"Paper from Topic",
|
129 |
+
"Download Entire Corpus",
|
130 |
+
],
|
131 |
+
)
|
132 |
+
|
133 |
+
if option == "Search Papers":
|
134 |
+
st.subheader("Search and Store Papers")
|
135 |
+
|
136 |
+
topic = st.text_input("Enter research topic")
|
137 |
+
num_papers = st.number_input(
|
138 |
+
"Number of papers", min_value=1, max_value=10, value=5
|
139 |
+
)
|
140 |
+
|
141 |
+
if st.button("Search and Store"):
|
142 |
+
if topic:
|
143 |
+
with st.spinner(f"Searching and storing papers about {topic}..."):
|
144 |
+
results = search_papers(topic, num_papers)
|
145 |
+
if results:
|
146 |
+
st.success(
|
147 |
+
f"Successfully stored {num_papers} papers in MongoDB"
|
148 |
+
)
|
149 |
+
# Display results
|
150 |
+
papers = json.loads(results)
|
151 |
+
for paper in papers:
|
152 |
+
with st.expander(paper["Title"]):
|
153 |
+
for key, value in paper.items():
|
154 |
+
if key != "Title":
|
155 |
+
st.write(f"**{key}:** {value}")
|
156 |
+
else:
|
157 |
+
st.warning("Please enter a research topic")
|
158 |
+
|
159 |
+
# Add MongoDB connection status
|
160 |
+
if st.sidebar.button("Check Database Connection"):
|
161 |
+
try:
|
162 |
+
client.admin.command("ping")
|
163 |
+
print(MONGODB_URI)
|
164 |
+
st.sidebar.success("Connected to MongoDB")
|
165 |
+
except Exception as e:
|
166 |
+
st.sidebar.error(f"MongoDB Connection Error: {str(e)}")
|
167 |
+
elif option == "Single Keyword Search":
|
168 |
+
keywords_database_download.main()
|
169 |
+
elif option == "Multiple Keywords Search":
|
170 |
+
new_keywords.main()
|
171 |
+
elif option == "Knowledge Graph":
|
172 |
+
infranew.main()
|
173 |
+
elif option == "Cosine Similarity":
|
174 |
+
loldude.main()
|
175 |
+
elif option == "Paper Generator":
|
176 |
+
new_research_paper.main()
|
177 |
+
elif option == "Paper from Topic":
|
178 |
+
research3.main()
|
179 |
+
elif option == "Download Entire Corpus":
|
180 |
+
entire_download.main()
|
181 |
+
else:
|
182 |
+
# st.subheader("Blank Page")
|
183 |
+
# st.write("This is a placeholder for alternative content.")
|
184 |
+
research22.main()
|
185 |
+
|
186 |
+
|
187 |
+
if __name__ == "__main__":
|
188 |
+
main()
|
research_combine2.py
ADDED
@@ -0,0 +1,269 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import new_research_paper
|
2 |
+
import research3
|
3 |
+
import entire_download
|
4 |
+
import streamlit as st
|
5 |
+
import os
|
6 |
+
import json
|
7 |
+
import requests
|
8 |
+
from dotenv import load_dotenv
|
9 |
+
from pymongo import MongoClient
|
10 |
+
from typing import Dict, Any
|
11 |
+
import research22
|
12 |
+
import keywords_database_download
|
13 |
+
import new_keywords
|
14 |
+
import infranew
|
15 |
+
import loldude
|
16 |
+
import new_research_paper
|
17 |
+
import research3
|
18 |
+
import entire_download
|
19 |
+
import sciclone
|
20 |
+
import extract
|
21 |
+
|
22 |
+
# Load environment variables
|
23 |
+
load_dotenv()
|
24 |
+
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
|
25 |
+
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
|
26 |
+
MONGODB_URI = os.getenv(
|
27 |
+
"MONGODB_UR",
|
28 |
+
"mongodb+srv://milind:[email protected]/?retryWrites=true&w=majority&appName=Cluster0",
|
29 |
+
)
|
30 |
+
|
31 |
+
# MongoDB setup
|
32 |
+
client = MongoClient(MONGODB_URI)
|
33 |
+
db = client["novascholar_db"]
|
34 |
+
|
35 |
+
|
36 |
+
def search_papers(topic: str, num_papers: int, paper_type: str) -> str:
|
37 |
+
headers = {
|
38 |
+
"Authorization": f"Bearer {PERPLEXITY_API_KEY}",
|
39 |
+
"Content-Type": "application/json",
|
40 |
+
}
|
41 |
+
|
42 |
+
attributes = {
|
43 |
+
"Review Based Paper": [
|
44 |
+
"Title",
|
45 |
+
"Publication",
|
46 |
+
"Journal_Conference",
|
47 |
+
"Abstract",
|
48 |
+
"Keywords",
|
49 |
+
"Author",
|
50 |
+
"Date_of_Publication",
|
51 |
+
"Intro",
|
52 |
+
"Literature_Review",
|
53 |
+
"Body",
|
54 |
+
"Protocol",
|
55 |
+
"Search String",
|
56 |
+
"Included Studies",
|
57 |
+
"Data Collection and Analysis Methods",
|
58 |
+
"Data Extraction Table",
|
59 |
+
"Synthesis and Analysis",
|
60 |
+
"Conclusion",
|
61 |
+
"Limitations",
|
62 |
+
"Results",
|
63 |
+
"References",
|
64 |
+
"Risk of Bias Assessment",
|
65 |
+
],
|
66 |
+
"Opinion/Perspective Based Paper": [
|
67 |
+
"Title",
|
68 |
+
"Publication",
|
69 |
+
"Journal_Conference",
|
70 |
+
"Abstract",
|
71 |
+
"Keywords",
|
72 |
+
"Author",
|
73 |
+
"Date_of_Publication",
|
74 |
+
"Intro",
|
75 |
+
"Literature_Review",
|
76 |
+
"Introduction",
|
77 |
+
"Body",
|
78 |
+
"Results and Discussion",
|
79 |
+
"Conclusion",
|
80 |
+
"References",
|
81 |
+
],
|
82 |
+
"Empirical Research Paper": [
|
83 |
+
"Title",
|
84 |
+
"Publication",
|
85 |
+
"Journal_Conference",
|
86 |
+
"Abstract",
|
87 |
+
"Keywords",
|
88 |
+
"Author",
|
89 |
+
"Date_of_Publication",
|
90 |
+
"Intro",
|
91 |
+
"Literature_Review",
|
92 |
+
"Introduction",
|
93 |
+
"Body",
|
94 |
+
"Methodology",
|
95 |
+
"Participants",
|
96 |
+
"Survey Instrument",
|
97 |
+
"Data Collection",
|
98 |
+
"Data Analysis",
|
99 |
+
"Results and Discussion",
|
100 |
+
"Conclusion",
|
101 |
+
"References",
|
102 |
+
],
|
103 |
+
"Research Paper (Other)": [
|
104 |
+
"Title",
|
105 |
+
"Publication",
|
106 |
+
"Journal_Conference",
|
107 |
+
"Abstract",
|
108 |
+
"Keywords",
|
109 |
+
"Author",
|
110 |
+
"Date_of_Publication",
|
111 |
+
"Intro",
|
112 |
+
"Literature_Review",
|
113 |
+
"Research_Models_Used",
|
114 |
+
"Methodology",
|
115 |
+
"Discussion",
|
116 |
+
"Future_Scope",
|
117 |
+
"Theory",
|
118 |
+
"Independent_Variables",
|
119 |
+
"nof_Independent_Variables",
|
120 |
+
"Dependent_Variables",
|
121 |
+
"nof_Dependent_Variables",
|
122 |
+
"Control_Variables",
|
123 |
+
"Extraneous_Variables",
|
124 |
+
"nof_Control_Variables",
|
125 |
+
"nof_Extraneous_Variables",
|
126 |
+
],
|
127 |
+
}
|
128 |
+
|
129 |
+
selected_attributes = attributes[paper_type]
|
130 |
+
prompt = f"""Find {num_papers} recent research papers about {topic}.
|
131 |
+
Return ONLY a valid JSON array with the following structure for each paper, no additional text:
|
132 |
+
[{{
|
133 |
+
{", ".join([f'"{attr}": "value"' for attr in selected_attributes])}
|
134 |
+
}}]"""
|
135 |
+
|
136 |
+
payload = {
|
137 |
+
"model": "llama-3.1-sonar-small-128k-chat",
|
138 |
+
"messages": [
|
139 |
+
{
|
140 |
+
"role": "system",
|
141 |
+
"content": "You are a research paper analyzer that returns only valid JSON arrays.",
|
142 |
+
},
|
143 |
+
{"role": "user", "content": prompt},
|
144 |
+
],
|
145 |
+
"temperature": 0.1,
|
146 |
+
}
|
147 |
+
|
148 |
+
try:
|
149 |
+
response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
|
150 |
+
response.raise_for_status()
|
151 |
+
content = response.json()["choices"][0]["message"]["content"]
|
152 |
+
|
153 |
+
# Clean response and ensure it's valid JSON
|
154 |
+
content = content.strip()
|
155 |
+
if not content.startswith("["):
|
156 |
+
content = content[content.find("[") :]
|
157 |
+
if not content.endswith("]"):
|
158 |
+
content = content[: content.rfind("]") + 1]
|
159 |
+
|
160 |
+
# Validate JSON
|
161 |
+
papers = json.loads(content)
|
162 |
+
if not isinstance(papers, list):
|
163 |
+
raise ValueError("Response is not a JSON array")
|
164 |
+
|
165 |
+
# Insert into MongoDB
|
166 |
+
collection = db[paper_type.replace(" ", "_").lower()]
|
167 |
+
if papers:
|
168 |
+
collection.insert_many(papers)
|
169 |
+
return content
|
170 |
+
return "[]"
|
171 |
+
|
172 |
+
except json.JSONDecodeError as e:
|
173 |
+
st.error(f"Invalid JSON response: {str(e)}")
|
174 |
+
return None
|
175 |
+
except Exception as e:
|
176 |
+
st.error(f"Error: {str(e)}")
|
177 |
+
return None
|
178 |
+
|
179 |
+
|
180 |
+
def display_research_assistant_dashboard():
|
181 |
+
#st.set_page_config(page_title="Research Papers", layout="wide")
|
182 |
+
|
183 |
+
# st.title("Research Papers")
|
184 |
+
|
185 |
+
# Sidebar radio
|
186 |
+
option = st.sidebar.radio(
|
187 |
+
"Select an option",
|
188 |
+
[
|
189 |
+
"Search Papers",
|
190 |
+
"Upload Paper",
|
191 |
+
"Single Keyword Search",
|
192 |
+
"Multiple Keywords Search",
|
193 |
+
"Knowledge Graph",
|
194 |
+
"Cosine Similarity",
|
195 |
+
"Paper Generator",
|
196 |
+
"Paper from Topic",
|
197 |
+
"Download Entire Corpus",
|
198 |
+
"Research Copilot",
|
199 |
+
"Research Paper Analysis Tool",
|
200 |
+
],
|
201 |
+
)
|
202 |
+
|
203 |
+
if option == "Search Papers":
|
204 |
+
st.subheader("Search and Store Papers")
|
205 |
+
|
206 |
+
topic = st.text_input("Enter research topic")
|
207 |
+
num_papers = st.number_input(
|
208 |
+
"Number of papers", min_value=1, max_value=10, value=5
|
209 |
+
)
|
210 |
+
paper_type = st.selectbox(
|
211 |
+
"Select type of research paper",
|
212 |
+
[
|
213 |
+
"Review Based Paper",
|
214 |
+
"Opinion/Perspective Based Paper",
|
215 |
+
"Empirical Research Paper",
|
216 |
+
"Research Paper (Other)",
|
217 |
+
],
|
218 |
+
)
|
219 |
+
|
220 |
+
if st.button("Search and Store"):
|
221 |
+
if topic:
|
222 |
+
with st.spinner(f"Searching and storing papers about {topic}..."):
|
223 |
+
results = search_papers(topic, num_papers, paper_type)
|
224 |
+
if results:
|
225 |
+
st.success(
|
226 |
+
f"Successfully stored {num_papers} papers in MongoDB"
|
227 |
+
)
|
228 |
+
# Display results
|
229 |
+
papers = json.loads(results)
|
230 |
+
for paper in papers:
|
231 |
+
with st.expander(paper["Title"]):
|
232 |
+
for key, value in paper.items():
|
233 |
+
if key != "Title":
|
234 |
+
st.write(f"**{key}:** {value}")
|
235 |
+
else:
|
236 |
+
st.warning("Please enter a research topic")
|
237 |
+
|
238 |
+
# Add MongoDB connection status
|
239 |
+
if st.sidebar.button("Check Database Connection"):
|
240 |
+
try:
|
241 |
+
client.admin.command("ping")
|
242 |
+
print(MONGODB_URI)
|
243 |
+
st.sidebar.success("Connected to MongoDB")
|
244 |
+
except Exception as e:
|
245 |
+
st.sidebar.error(f"MongoDB Connection Error: {str(e)}")
|
246 |
+
elif option == "Single Keyword Search":
|
247 |
+
keywords_database_download.main()
|
248 |
+
elif option == "Multiple Keywords Search":
|
249 |
+
new_keywords.main()
|
250 |
+
elif option == "Knowledge Graph":
|
251 |
+
infranew.main()
|
252 |
+
elif option == "Cosine Similarity":
|
253 |
+
loldude.main()
|
254 |
+
elif option == "Paper Generator":
|
255 |
+
new_research_paper.main()
|
256 |
+
elif option == "Paper from Topic":
|
257 |
+
research3.main()
|
258 |
+
elif option == "Download Entire Corpus":
|
259 |
+
entire_download.main()
|
260 |
+
elif option == "Research Copilot":
|
261 |
+
sciclone.main()
|
262 |
+
elif option == "Research Paper Analysis Tool":
|
263 |
+
extract.main()
|
264 |
+
else:
|
265 |
+
research22.main()
|
266 |
+
|
267 |
+
|
268 |
+
if __name__ == "__main__":
|
269 |
+
display_research_assistant_dashboard()
|
rubrics.py
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from pymongo import MongoClient
|
3 |
+
from openai import OpenAI
|
4 |
+
from bson import ObjectId
|
5 |
+
import json
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
import os
|
8 |
+
|
9 |
+
load_dotenv()
|
10 |
+
MONGO_URI = os.getenv('MONGO_URI')
|
11 |
+
OPENAI_API_KEY = os.getenv('OPENAI_KEY')
|
12 |
+
|
13 |
+
client = MongoClient(MONGO_URI)
|
14 |
+
db = client['novascholar_db']
|
15 |
+
# db.create_collection("rubrics")
|
16 |
+
rubrics_collection = db['rubrics']
|
17 |
+
resources_collection = db['resources']
|
18 |
+
courses_collection = db['courses']
|
19 |
+
|
20 |
+
def generate_rubrics(api_key, session_title, outcome_description, taxonomy, pre_class_material):
|
21 |
+
prompt = f"""
|
22 |
+
You are an expert educational AI assistant specializing in instructional design. Generate a detailed rubric for the session titled "{session_title}". The rubric should be aligned with Bloom's Taxonomy level "{taxonomy}" and use numerical scoring levels (4,3,2,1) instead of descriptive levels. Use the following context:
|
23 |
+
|
24 |
+
Session Outcome Description:
|
25 |
+
{outcome_description}
|
26 |
+
|
27 |
+
Pre-class Material:
|
28 |
+
{pre_class_material}
|
29 |
+
|
30 |
+
Please generate the rubric in JSON format with these specifications:
|
31 |
+
1. Use numerical levels (4=Highest, 1=Lowest) instead of descriptive levels
|
32 |
+
2. Include 4-5 relevant criteria based on the session outcome
|
33 |
+
3. Each criterion should have clear descriptors for each numerical level
|
34 |
+
4. Focus on objectively measurable aspects for evaluation
|
35 |
+
5. Structure should be suitable for evaluating assignments and test answers
|
36 |
+
|
37 |
+
***IMPORTANT: DO NOT INCLUDE THE WORD JSON IN THE OUTPUT STRING, DO NOT INCLUDE BACKTICKS (```) IN THE OUTPUT, AND DO NOT INCLUDE ANY OTHER TEXT, OTHER THAN THE ACTUAL JSON RESPONSE. START THE RESPONSE STRING WITH AN OPEN CURLY BRACE {{ AND END WITH A CLOSING CURLY BRACE }}.***
|
38 |
+
"""
|
39 |
+
|
40 |
+
messages = [
|
41 |
+
{
|
42 |
+
"role": "system",
|
43 |
+
"content": "You are an expert educational AI assistant specializing in instructional design.",
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"role": "user",
|
47 |
+
"content": prompt
|
48 |
+
},
|
49 |
+
]
|
50 |
+
|
51 |
+
try:
|
52 |
+
client = OpenAI(api_key=api_key)
|
53 |
+
response = client.chat.completions.create(
|
54 |
+
model="gpt-4-0125-preview",
|
55 |
+
messages=messages
|
56 |
+
)
|
57 |
+
return response.choices[0].message.content
|
58 |
+
except Exception as e:
|
59 |
+
st.error(f"Failed to generate rubrics: {e}")
|
60 |
+
return None
|
61 |
+
|
62 |
+
def display_rubrics_tab(session, course_id):
|
63 |
+
st.subheader("Generated Rubrics")
|
64 |
+
|
65 |
+
# Fetch session details from the courses collection
|
66 |
+
course_data = courses_collection.find_one(
|
67 |
+
{"course_id": course_id, "sessions.session_id": session['session_id']},
|
68 |
+
{"sessions.$": 1}
|
69 |
+
)
|
70 |
+
|
71 |
+
if course_data and 'sessions' in course_data and len(course_data['sessions']) > 0:
|
72 |
+
session_data = course_data['sessions'][0]
|
73 |
+
|
74 |
+
# Extract session learning outcomes
|
75 |
+
if 'session_learning_outcomes' in session_data and len(session_data['session_learning_outcomes']) > 0:
|
76 |
+
outcome = session_data['session_learning_outcomes'][0]
|
77 |
+
outcome_description = outcome.get('outcome_description', '')
|
78 |
+
taxonomy_level = outcome.get('bloom_taxonomy_level', '')
|
79 |
+
|
80 |
+
# Display fetched information
|
81 |
+
st.markdown("### Session Information")
|
82 |
+
st.markdown(f"**Session Title:** {session['title']}")
|
83 |
+
st.markdown(f"**Learning Outcome:** {outcome_description}")
|
84 |
+
st.markdown(f"**Taxonomy Level:** {taxonomy_level}")
|
85 |
+
|
86 |
+
# Fetch pre-class material
|
87 |
+
pre_class_material_docs = resources_collection.find({"session_id": session['session_id']})
|
88 |
+
pre_class_material = "\n".join([f"{doc.get('title', 'No Title')}: {doc.get('url', 'No URL')}" for doc in pre_class_material_docs])
|
89 |
+
|
90 |
+
if st.button("Generate Rubric"):
|
91 |
+
rubric = generate_rubrics(
|
92 |
+
OPENAI_API_KEY,
|
93 |
+
session['title'],
|
94 |
+
outcome_description,
|
95 |
+
taxonomy_level,
|
96 |
+
pre_class_material
|
97 |
+
)
|
98 |
+
|
99 |
+
if rubric:
|
100 |
+
st.json(rubric)
|
101 |
+
if st.button("Save Rubric"):
|
102 |
+
rubric_data = {
|
103 |
+
"course_id": course_id,
|
104 |
+
"session_id": session['session_id'],
|
105 |
+
"rubric": json.loads(rubric)
|
106 |
+
}
|
107 |
+
rubrics_collection.insert_one(rubric_data)
|
108 |
+
st.success("Rubric saved successfully!")
|
109 |
+
else:
|
110 |
+
st.error("No learning outcomes found for this session")
|
111 |
+
else:
|
112 |
+
st.error("Session data not found")
|
sciclone.py
ADDED
@@ -0,0 +1,466 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import requests
|
3 |
+
import PyPDF2
|
4 |
+
from typing import Optional, Dict, List
|
5 |
+
import json
|
6 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
7 |
+
from concurrent.futures import ThreadPoolExecutor
|
8 |
+
import xml.etree.ElementTree as ET
|
9 |
+
import re
|
10 |
+
from datetime import datetime
|
11 |
+
import time
|
12 |
+
from dotenv import load_dotenv
|
13 |
+
import os
|
14 |
+
import pandas as pd
|
15 |
+
|
16 |
+
# Load environment variables
|
17 |
+
load_dotenv()
|
18 |
+
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
|
19 |
+
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
|
20 |
+
SAPLING_API_KEY = os.getenv("SAPLING_API_KEY")
|
21 |
+
|
22 |
+
|
23 |
+
def call_perplexity_api(prompt: str) -> str:
|
24 |
+
"""Call Perplexity AI with a prompt, return the text response if successful."""
|
25 |
+
headers = {
|
26 |
+
"Authorization": f"Bearer {PERPLEXITY_API_KEY}",
|
27 |
+
"Content-Type": "application/json",
|
28 |
+
}
|
29 |
+
|
30 |
+
payload = {
|
31 |
+
"model": "llama-3.1-sonar-small-128k-chat",
|
32 |
+
"messages": [{"role": "user", "content": prompt}],
|
33 |
+
"temperature": 0.3,
|
34 |
+
}
|
35 |
+
|
36 |
+
try:
|
37 |
+
response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload)
|
38 |
+
response.raise_for_status()
|
39 |
+
return response.json()["choices"][0]["message"]["content"]
|
40 |
+
except Exception as e:
|
41 |
+
st.error(f"API Error: {str(e)}")
|
42 |
+
return ""
|
43 |
+
|
44 |
+
|
45 |
+
def extract_text_from_pdf(pdf_file):
|
46 |
+
"""Extract text content from a PDF file."""
|
47 |
+
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
48 |
+
text = ""
|
49 |
+
for page in pdf_reader.pages:
|
50 |
+
text += page.extract_text() + "\n"
|
51 |
+
return text
|
52 |
+
|
53 |
+
|
54 |
+
def analyze_paper(text: str, category: str) -> str:
|
55 |
+
"""Generate a prompt and get analysis for a specific category."""
|
56 |
+
prompts = {
|
57 |
+
"Summarized Abstract": "Extract and summarize the abstract from this research paper:",
|
58 |
+
"Results": "What are the main results and findings from this research paper:",
|
59 |
+
"Summarized Introduction": "Summarize the introduction section of this research paper:",
|
60 |
+
"Methods Used": "What are the main methods and methodologies used in this research:",
|
61 |
+
"Literature Survey": "Summarize the literature review or related work from this paper:",
|
62 |
+
"Limitations": "What are the limitations mentioned in this research:",
|
63 |
+
"Contributions": "What are the main contributions of this research:",
|
64 |
+
"Practical Implications": "What are the practical implications of this research:",
|
65 |
+
"Objectives": "What are the main objectives of this research:",
|
66 |
+
"Findings": "What are the key findings from this research:",
|
67 |
+
"Future Research": "What future research directions are suggested in this paper:",
|
68 |
+
"Dependent Variables": "What are the dependent variables studied in this research:",
|
69 |
+
"Independent Variables": "What are the independent variables studied in this research:",
|
70 |
+
"Dataset": "What dataset(s) were used in this research:",
|
71 |
+
"Problem Statement": "What is the main problem statement or research question:",
|
72 |
+
"Challenges": "What challenges were faced or addressed in this research:",
|
73 |
+
"Applications": "What are the potential applications of this research:",
|
74 |
+
}
|
75 |
+
|
76 |
+
prompt = f"{prompts[category]}\n\nPaper text: {text[:5000]}" # Limit text to avoid token limits
|
77 |
+
return call_perplexity_api(prompt)
|
78 |
+
|
79 |
+
|
80 |
+
class ResearchAssistant:
|
81 |
+
def __init__(self, perplexity_key: str):
|
82 |
+
self.perplexity_key = perplexity_key
|
83 |
+
|
84 |
+
def chat_with_pdf(self, pdf_text: str, query: str) -> Dict:
|
85 |
+
chunks = self._split_text(pdf_text)
|
86 |
+
relevant_chunks = self._get_relevant_chunks(chunks, query)
|
87 |
+
|
88 |
+
prompt = f"Context from PDF:\n\n{relevant_chunks}\n\nQuestion: {query}"
|
89 |
+
response_text = call_perplexity_api(prompt)
|
90 |
+
return {"choices": [{"message": {"content": response_text}}]}
|
91 |
+
|
92 |
+
def generate_literature_review(self, topic: str) -> Dict:
|
93 |
+
try:
|
94 |
+
# Search arXiv for papers
|
95 |
+
papers = self._search_arxiv(topic)
|
96 |
+
if not papers:
|
97 |
+
return {"error": "No papers found on the topic"}
|
98 |
+
|
99 |
+
# Format paper information
|
100 |
+
papers_summary = "\n\n".join(
|
101 |
+
[
|
102 |
+
f"Paper: {p['title']}\nAuthors: {', '.join(p['authors'])}\nSummary: {p['summary']}"
|
103 |
+
for p in papers
|
104 |
+
]
|
105 |
+
)
|
106 |
+
|
107 |
+
prompt = f"""Generate a comprehensive literature review on '{topic}'. Based on these papers:
|
108 |
+
|
109 |
+
{papers_summary}
|
110 |
+
|
111 |
+
Structure the review as follows:
|
112 |
+
1. Introduction and Background
|
113 |
+
2. Current Research Trends
|
114 |
+
3. Key Findings and Themes
|
115 |
+
4. Research Gaps
|
116 |
+
5. Future Directions"""
|
117 |
+
|
118 |
+
response_text = call_perplexity_api(prompt)
|
119 |
+
return {"choices": [{"message": {"content": response_text}}]}
|
120 |
+
except Exception as e:
|
121 |
+
return {"error": f"Literature review generation failed: {str(e)}"}
|
122 |
+
|
123 |
+
def ai_writer(self, outline: str, references: List[str]) -> Dict:
|
124 |
+
prompt = f"""Write a research paper following this structure:
|
125 |
+
|
126 |
+
Outline:
|
127 |
+
{outline}
|
128 |
+
|
129 |
+
References to incorporate:
|
130 |
+
{json.dumps(references)}
|
131 |
+
|
132 |
+
Instructions:
|
133 |
+
- Follow academic writing style
|
134 |
+
- Include appropriate citations
|
135 |
+
- Maintain logical flow
|
136 |
+
- Include introduction and conclusion"""
|
137 |
+
|
138 |
+
response_text = call_perplexity_api(prompt)
|
139 |
+
return {"choices": [{"message": {"content": response_text}}]}
|
140 |
+
|
141 |
+
def refine_response(self, response: str, column: str) -> str:
|
142 |
+
prompt = f"""Refine the following response to fit the '{column}' column in a research paper CSV format:
|
143 |
+
|
144 |
+
Response: {response}
|
145 |
+
|
146 |
+
Ensure the response is clear, concise, and fits the context of the column."""
|
147 |
+
|
148 |
+
refined_response = call_perplexity_api(prompt)
|
149 |
+
return refined_response
|
150 |
+
|
151 |
+
def paraphrase(self, text: str) -> Dict:
|
152 |
+
prompt = f"""Paraphrase the following text while:
|
153 |
+
- Maintaining academic tone
|
154 |
+
- Preserving key meaning
|
155 |
+
- Improving clarity
|
156 |
+
|
157 |
+
Text: {text}"""
|
158 |
+
|
159 |
+
response_text = call_perplexity_api(prompt)
|
160 |
+
return {"choices": [{"message": {"content": response_text}}]}
|
161 |
+
|
162 |
+
def generate_citation(self, paper_info: Dict, style: str = "APA") -> Dict:
|
163 |
+
prompt = f"""Generate a {style} citation for:
|
164 |
+
Title: {paper_info['title']}
|
165 |
+
Authors: {', '.join(paper_info['authors'])}
|
166 |
+
Year: {paper_info['year']}
|
167 |
+
|
168 |
+
Follow exact {style} format guidelines."""
|
169 |
+
|
170 |
+
response_text = call_perplexity_api(prompt)
|
171 |
+
return {"citation": response_text}
|
172 |
+
|
173 |
+
def detect_ai_content(self, text: str) -> Dict:
|
174 |
+
prompt = f"""You are an AI content detector. Analyze the text for:
|
175 |
+
1. Writing style consistency
|
176 |
+
2. Language patterns
|
177 |
+
3. Contextual coherence
|
178 |
+
4. Common AI patterns
|
179 |
+
Provide a clear analysis with confidence level.
|
180 |
+
|
181 |
+
Text: {text}"""
|
182 |
+
|
183 |
+
response = requests.post(
|
184 |
+
"https://api.sapling.ai/api/v1/aidetect",
|
185 |
+
json={"key": SAPLING_API_KEY, "text": text},
|
186 |
+
)
|
187 |
+
st.info(
|
188 |
+
"A score from 0 to 1 will be returned, with 0 indicating the maximum confidence that the text is human-written, and 1 indicating the maximum confidence that the text is AI-generated."
|
189 |
+
)
|
190 |
+
|
191 |
+
if response.status_code == 200:
|
192 |
+
return {"choices": [{"message": {"content": response.json()}}]}
|
193 |
+
else:
|
194 |
+
return {
|
195 |
+
"error": f"Sapling API Error: {response.status_code} - {response.text}"
|
196 |
+
}
|
197 |
+
|
198 |
+
def _split_text(self, text: str) -> List[str]:
|
199 |
+
splitter = RecursiveCharacterTextSplitter(
|
200 |
+
chunk_size=1000, chunk_overlap=200, separators=["\n\n", "\n", ". ", " ", ""]
|
201 |
+
)
|
202 |
+
return splitter.split_text(text)
|
203 |
+
|
204 |
+
def _get_relevant_chunks(self, chunks: List[str], query: str) -> str:
|
205 |
+
# Simple keyword-based relevance scoring
|
206 |
+
query_words = set(query.lower().split())
|
207 |
+
scored_chunks = []
|
208 |
+
|
209 |
+
for chunk in chunks:
|
210 |
+
chunk_words = set(chunk.lower().split())
|
211 |
+
score = len(query_words.intersection(chunk_words))
|
212 |
+
scored_chunks.append((score, chunk))
|
213 |
+
|
214 |
+
scored_chunks.sort(reverse=True)
|
215 |
+
return "\n\n".join(chunk for _, chunk in scored_chunks[:3])
|
216 |
+
|
217 |
+
def _search_arxiv(self, topic: str) -> List[Dict]:
|
218 |
+
try:
|
219 |
+
query = "+AND+".join(topic.split())
|
220 |
+
url = f"http://export.arxiv.org/api/query?search_query=all:{query}&start=0&max_results=5"
|
221 |
+
response = requests.get(url, timeout=10)
|
222 |
+
response.raise_for_status()
|
223 |
+
return self._parse_arxiv_response(response.text)
|
224 |
+
except Exception as e:
|
225 |
+
print(f"arXiv search failed: {str(e)}")
|
226 |
+
return []
|
227 |
+
|
228 |
+
def _parse_arxiv_response(self, response_text: str) -> List[Dict]:
|
229 |
+
try:
|
230 |
+
root = ET.fromstring(response_text)
|
231 |
+
papers = []
|
232 |
+
for entry in root.findall("{http://www.w3.org/2005/Atom}entry"):
|
233 |
+
paper = {
|
234 |
+
"id": entry.find("{http://www.w3.org/2005/Atom}id").text,
|
235 |
+
"title": entry.find(
|
236 |
+
"{http://www.w3.org/2005/Atom}title"
|
237 |
+
).text.strip(),
|
238 |
+
"summary": entry.find(
|
239 |
+
"{http://www.w3.org/2005/Atom}summary"
|
240 |
+
).text.strip(),
|
241 |
+
"authors": [
|
242 |
+
author.find("{http://www.w3.org/2005/Atom}name").text.strip()
|
243 |
+
for author in entry.findall(
|
244 |
+
"{http://www.w3.org/2005/Atom}author"
|
245 |
+
)
|
246 |
+
],
|
247 |
+
"published": entry.find(
|
248 |
+
"{http://www.w3.org/2005/Atom}published"
|
249 |
+
).text[:10],
|
250 |
+
}
|
251 |
+
papers.append(paper)
|
252 |
+
return papers
|
253 |
+
except Exception as e:
|
254 |
+
print(f"arXiv response parsing failed: {str(e)}")
|
255 |
+
return []
|
256 |
+
|
257 |
+
|
258 |
+
def main():
|
259 |
+
# st.set_page_config(page_title="Research Assistant", layout="wide")
|
260 |
+
st.title("Research Copilot")
|
261 |
+
|
262 |
+
if not PERPLEXITY_API_KEY:
|
263 |
+
st.warning("Perplexity API key not found in environment variables.")
|
264 |
+
return
|
265 |
+
|
266 |
+
assistant = ResearchAssistant(PERPLEXITY_API_KEY)
|
267 |
+
|
268 |
+
tabs = st.tabs(
|
269 |
+
[
|
270 |
+
"Chat with PDF",
|
271 |
+
"Literature Review",
|
272 |
+
"AI Writer",
|
273 |
+
"Extract Data",
|
274 |
+
"Paraphraser",
|
275 |
+
"Citation Generator",
|
276 |
+
"AI Detector",
|
277 |
+
]
|
278 |
+
)
|
279 |
+
|
280 |
+
with tabs[0]: # Chat with PDF
|
281 |
+
st.header("Chat with PDF")
|
282 |
+
|
283 |
+
# File uploader with clear button
|
284 |
+
col1, col2 = st.columns([3, 1])
|
285 |
+
with col1:
|
286 |
+
uploaded_file = st.file_uploader("Upload PDF", type="pdf", key="pdf_chat")
|
287 |
+
with col2:
|
288 |
+
if st.button("Clear PDF"):
|
289 |
+
st.session_state.pop("pdf_text", None)
|
290 |
+
st.rerun()
|
291 |
+
|
292 |
+
if uploaded_file:
|
293 |
+
if "pdf_text" not in st.session_state:
|
294 |
+
with st.spinner("Processing PDF..."):
|
295 |
+
reader = PyPDF2.PdfReader(uploaded_file)
|
296 |
+
st.session_state.pdf_text = ""
|
297 |
+
for page in reader.pages:
|
298 |
+
st.session_state.pdf_text += page.extract_text()
|
299 |
+
st.success("PDF processed successfully!")
|
300 |
+
|
301 |
+
query = st.text_input("Ask a question about the PDF")
|
302 |
+
if query:
|
303 |
+
with st.spinner("Analyzing..."):
|
304 |
+
response = assistant.chat_with_pdf(st.session_state.pdf_text, query)
|
305 |
+
if "error" in response:
|
306 |
+
st.error(response["error"])
|
307 |
+
else:
|
308 |
+
st.write(response["choices"][0]["message"]["content"])
|
309 |
+
|
310 |
+
with tabs[1]: # Literature Review
|
311 |
+
st.header("Literature Review")
|
312 |
+
topic = st.text_input("Enter research topic")
|
313 |
+
if st.button("Generate Review") and topic:
|
314 |
+
with st.spinner("Generating literature review..."):
|
315 |
+
review = assistant.generate_literature_review(topic)
|
316 |
+
if "error" in review:
|
317 |
+
st.error(review["error"])
|
318 |
+
else:
|
319 |
+
st.write(review["choices"][0]["message"]["content"])
|
320 |
+
|
321 |
+
with tabs[2]: # AI Writer
|
322 |
+
st.header("AI Writer")
|
323 |
+
outline = st.text_area("Enter paper outline")
|
324 |
+
references = st.text_area("Enter references (one per line)")
|
325 |
+
if st.button("Generate Paper") and outline:
|
326 |
+
with st.spinner("Writing paper..."):
|
327 |
+
paper = assistant.ai_writer(outline, references.split("\n"))
|
328 |
+
if "error" in paper:
|
329 |
+
st.error(paper["error"])
|
330 |
+
else:
|
331 |
+
st.write(paper["choices"][0]["message"]["content"])
|
332 |
+
|
333 |
+
with tabs[3]: # Extract Data
|
334 |
+
st.header("Extract Data")
|
335 |
+
|
336 |
+
uploaded_files = st.file_uploader(
|
337 |
+
"Upload multiple PDF files", type="pdf", accept_multiple_files=True
|
338 |
+
)
|
339 |
+
|
340 |
+
if uploaded_files:
|
341 |
+
if st.button("Process Papers"):
|
342 |
+
# Initialize progress bar
|
343 |
+
progress_bar = st.progress(0)
|
344 |
+
status_text = st.empty()
|
345 |
+
|
346 |
+
# Initialize results dictionary
|
347 |
+
results = []
|
348 |
+
|
349 |
+
# Define categories
|
350 |
+
categories = [
|
351 |
+
"Summarized Abstract",
|
352 |
+
"Results",
|
353 |
+
"Summarized Introduction",
|
354 |
+
"Methods Used",
|
355 |
+
"Literature Survey",
|
356 |
+
"Limitations",
|
357 |
+
"Contributions",
|
358 |
+
"Practical Implications",
|
359 |
+
"Objectives",
|
360 |
+
"Findings",
|
361 |
+
"Future Research",
|
362 |
+
"Dependent Variables",
|
363 |
+
"Independent Variables",
|
364 |
+
"Dataset",
|
365 |
+
"Problem Statement",
|
366 |
+
"Challenges",
|
367 |
+
"Applications",
|
368 |
+
]
|
369 |
+
|
370 |
+
# Process each file
|
371 |
+
for i, file in enumerate(uploaded_files):
|
372 |
+
status_text.text(f"Processing {file.name}...")
|
373 |
+
|
374 |
+
# Extract text from PDF
|
375 |
+
text = extract_text_from_pdf(file)
|
376 |
+
|
377 |
+
# Initialize paper results
|
378 |
+
paper_results = {"Filename": file.name}
|
379 |
+
|
380 |
+
# Analyze each category
|
381 |
+
for j, category in enumerate(categories):
|
382 |
+
status_text.text(f"Processing {file.name} - {category}")
|
383 |
+
paper_results[category] = analyze_paper(text, category)
|
384 |
+
|
385 |
+
# Update progress
|
386 |
+
progress = (i * len(categories) + j + 1) / (
|
387 |
+
len(uploaded_files) * len(categories)
|
388 |
+
)
|
389 |
+
progress_bar.progress(progress)
|
390 |
+
|
391 |
+
# Add small delay to avoid API rate limits
|
392 |
+
time.sleep(1)
|
393 |
+
|
394 |
+
results.append(paper_results)
|
395 |
+
|
396 |
+
# Create DataFrame
|
397 |
+
df = pd.DataFrame(results)
|
398 |
+
|
399 |
+
# Convert DataFrame to CSV
|
400 |
+
csv = df.to_csv(index=False)
|
401 |
+
|
402 |
+
# Create download button
|
403 |
+
st.download_button(
|
404 |
+
label="Download Results as CSV",
|
405 |
+
data=csv,
|
406 |
+
file_name="research_papers_analysis.csv",
|
407 |
+
mime="text/csv",
|
408 |
+
)
|
409 |
+
|
410 |
+
# Display results in the app
|
411 |
+
st.subheader("Analysis Results")
|
412 |
+
st.dataframe(df)
|
413 |
+
|
414 |
+
status_text.text("Processing complete!")
|
415 |
+
progress_bar.progress(1.0)
|
416 |
+
|
417 |
+
with tabs[4]: # Paraphraser
|
418 |
+
st.header("Paraphraser")
|
419 |
+
text = st.text_area("Enter text to paraphrase")
|
420 |
+
if st.button("Paraphrase") and text:
|
421 |
+
with st.spinner("Paraphrasing..."):
|
422 |
+
result = assistant.paraphrase(text)
|
423 |
+
if "error" in result:
|
424 |
+
st.error(result["error"])
|
425 |
+
else:
|
426 |
+
st.write(result["choices"][0]["message"]["content"])
|
427 |
+
|
428 |
+
with tabs[5]: # Citation Generator
|
429 |
+
st.header("Citation Generator")
|
430 |
+
col1, col2 = st.columns(2)
|
431 |
+
with col1:
|
432 |
+
title = st.text_input("Paper Title")
|
433 |
+
authors = st.text_input("Authors (comma-separated)")
|
434 |
+
with col2:
|
435 |
+
year = st.text_input("Year")
|
436 |
+
style = st.selectbox("Citation Style", ["APA", "MLA", "Chicago"])
|
437 |
+
|
438 |
+
if st.button("Generate Citation") and title:
|
439 |
+
with st.spinner("Generating citation..."):
|
440 |
+
citation = assistant.generate_citation(
|
441 |
+
{
|
442 |
+
"title": title,
|
443 |
+
"authors": [a.strip() for a in authors.split(",")],
|
444 |
+
"year": year,
|
445 |
+
},
|
446 |
+
style,
|
447 |
+
)
|
448 |
+
if "error" in citation:
|
449 |
+
st.error(citation["error"])
|
450 |
+
else:
|
451 |
+
st.code(citation["citation"], language="text")
|
452 |
+
|
453 |
+
with tabs[6]: # AI Detector
|
454 |
+
st.header("AI Detector")
|
455 |
+
text = st.text_area("Enter text to analyze")
|
456 |
+
if st.button("Detect AI Content") and text:
|
457 |
+
with st.spinner("Analyzing..."):
|
458 |
+
result = assistant.detect_ai_content(text)
|
459 |
+
if "error" in result:
|
460 |
+
st.error(result["error"])
|
461 |
+
else:
|
462 |
+
st.write(result["choices"][0]["message"]["content"])
|
463 |
+
|
464 |
+
|
465 |
+
if __name__ == "__main__":
|
466 |
+
main()
|
session_page.py
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
subjective_test_evaluation.py
ADDED
@@ -0,0 +1,247 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from datetime import datetime
|
3 |
+
from pymongo import MongoClient
|
4 |
+
import os
|
5 |
+
from openai import OpenAI
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
from bson import ObjectId
|
8 |
+
|
9 |
+
load_dotenv()
|
10 |
+
|
11 |
+
# MongoDB setup
|
12 |
+
MONGO_URI = os.getenv('MONGO_URI')
|
13 |
+
client = MongoClient(MONGO_URI)
|
14 |
+
db = client["novascholar_db"]
|
15 |
+
subjective_tests_collection = db["subjective_tests"]
|
16 |
+
subjective_test_evaluation_collection = db["subjective_test_evaluation"]
|
17 |
+
resources_collection = db["resources"]
|
18 |
+
students_collection = db["students"]
|
19 |
+
|
20 |
+
def evaluate_subjective_answers(session_id, student_id, test_id):
|
21 |
+
"""
|
22 |
+
Generate evaluation and analysis for subjective test answers
|
23 |
+
"""
|
24 |
+
try:
|
25 |
+
# Fetch test and student submission
|
26 |
+
test = subjective_tests_collection.find_one({"_id": test_id})
|
27 |
+
if not test:
|
28 |
+
return None
|
29 |
+
|
30 |
+
# Find student's submission
|
31 |
+
submission = next(
|
32 |
+
(sub for sub in test.get('submissions', [])
|
33 |
+
if sub['student_id'] == str(student_id)),
|
34 |
+
None
|
35 |
+
)
|
36 |
+
if not submission:
|
37 |
+
return None
|
38 |
+
|
39 |
+
# Fetch pre-class materials
|
40 |
+
pre_class_materials = resources_collection.find({"session_id": session_id})
|
41 |
+
pre_class_content = ""
|
42 |
+
for material in pre_class_materials:
|
43 |
+
if 'text_content' in material:
|
44 |
+
pre_class_content += material['text_content'] + "\n"
|
45 |
+
|
46 |
+
# Default rubric (can be customized later)
|
47 |
+
default_rubric = """
|
48 |
+
1. Content Understanding (1-4):
|
49 |
+
- Demonstrates comprehensive understanding of core concepts
|
50 |
+
- Accurately applies relevant theories and principles
|
51 |
+
- Provides specific examples and evidence
|
52 |
+
|
53 |
+
2. Critical Analysis (1-4):
|
54 |
+
- Shows depth of analysis
|
55 |
+
- Makes meaningful connections
|
56 |
+
- Demonstrates original thinking
|
57 |
+
|
58 |
+
3. Organization & Clarity (1-4):
|
59 |
+
- Clear structure and flow
|
60 |
+
- Well-developed arguments
|
61 |
+
- Effective use of examples
|
62 |
+
"""
|
63 |
+
|
64 |
+
# Initialize OpenAI client
|
65 |
+
client = OpenAI(api_key=os.getenv('OPENAI_KEY'))
|
66 |
+
|
67 |
+
evaluations = []
|
68 |
+
for i, (question, answer) in enumerate(zip(test['questions'], submission['answers'])):
|
69 |
+
analysis_content = f"""
|
70 |
+
Question: {question['question']}
|
71 |
+
Student Answer: {answer}
|
72 |
+
"""
|
73 |
+
|
74 |
+
prompt_template = f"""As an educational assessor, evaluate this student's answer based on the provided rubric criteria and pre-class materials. Follow these assessment guidelines:
|
75 |
+
|
76 |
+
1. Evaluation Process:
|
77 |
+
- Use each rubric criterion (scored 1-4) for internal assessment
|
78 |
+
- Compare response with pre-class materials
|
79 |
+
- Check alignment with all rubric requirements
|
80 |
+
- Calculate final score: sum of criteria scores converted to 10-point scale
|
81 |
+
|
82 |
+
Pre-class Materials:
|
83 |
+
{pre_class_content[:1000]} # Truncate to avoid token limits
|
84 |
+
|
85 |
+
Rubric Criteria:
|
86 |
+
{default_rubric}
|
87 |
+
|
88 |
+
Question and Answer:
|
89 |
+
{analysis_content}
|
90 |
+
|
91 |
+
Provide your assessment in the following format:
|
92 |
+
|
93 |
+
**Score and Evidence**
|
94 |
+
- Score: [X]/10
|
95 |
+
- Evidence for deduction: [One-line reference to most significant gap or inaccuracy]
|
96 |
+
|
97 |
+
**Key Areas for Improvement**
|
98 |
+
- [Concise improvement point 1]
|
99 |
+
- [Concise improvement point 2]
|
100 |
+
- [Concise improvement point 3]
|
101 |
+
"""
|
102 |
+
|
103 |
+
# Generate evaluation using OpenAI
|
104 |
+
response = client.chat.completions.create(
|
105 |
+
model="gpt-4o-mini",
|
106 |
+
messages=[{"role": "user", "content": prompt_template}],
|
107 |
+
max_tokens=500,
|
108 |
+
temperature=0.4
|
109 |
+
)
|
110 |
+
|
111 |
+
evaluations.append({
|
112 |
+
"question_number": i + 1,
|
113 |
+
"question": question['question'],
|
114 |
+
"answer": answer,
|
115 |
+
"evaluation": response.choices[0].message.content
|
116 |
+
})
|
117 |
+
|
118 |
+
# Store evaluation in MongoDB
|
119 |
+
evaluation_doc = {
|
120 |
+
"test_id": test_id,
|
121 |
+
"student_id": student_id,
|
122 |
+
"session_id": session_id,
|
123 |
+
"evaluations": evaluations,
|
124 |
+
"evaluated_at": datetime.utcnow()
|
125 |
+
}
|
126 |
+
|
127 |
+
subjective_test_evaluation_collection.insert_one(evaluation_doc)
|
128 |
+
return evaluation_doc
|
129 |
+
|
130 |
+
except Exception as e:
|
131 |
+
print(f"Error in evaluate_subjective_answers: {str(e)}")
|
132 |
+
return None
|
133 |
+
|
134 |
+
def display_evaluation_to_faculty(session_id, student_id, course_id):
|
135 |
+
"""
|
136 |
+
Display interface for faculty to generate and view evaluations
|
137 |
+
"""
|
138 |
+
st.header("Evaluate Subjective Tests")
|
139 |
+
|
140 |
+
try:
|
141 |
+
# Fetch available tests
|
142 |
+
tests = list(subjective_tests_collection.find({
|
143 |
+
"session_id": str(session_id),
|
144 |
+
"status": "active"
|
145 |
+
}))
|
146 |
+
|
147 |
+
if not tests:
|
148 |
+
st.info("No subjective tests found for this session.")
|
149 |
+
return
|
150 |
+
|
151 |
+
# Select test
|
152 |
+
test_options = {
|
153 |
+
f"{test['title']} (Created: {test['created_at'].strftime('%Y-%m-%d %H:%M')})" if 'created_at' in test else test['title']: test['_id']
|
154 |
+
for test in tests
|
155 |
+
}
|
156 |
+
|
157 |
+
if test_options:
|
158 |
+
selected_test = st.selectbox(
|
159 |
+
"Select Test to Evaluate",
|
160 |
+
options=list(test_options.keys())
|
161 |
+
)
|
162 |
+
|
163 |
+
if selected_test:
|
164 |
+
test_id = test_options[selected_test]
|
165 |
+
test = subjective_tests_collection.find_one({"_id": test_id})
|
166 |
+
|
167 |
+
if test:
|
168 |
+
submissions = test.get('submissions', [])
|
169 |
+
if not submissions:
|
170 |
+
st.warning("No submissions found for this test.")
|
171 |
+
return
|
172 |
+
|
173 |
+
# Create a dropdown for student submissions
|
174 |
+
student_options = {
|
175 |
+
f"{students_collection.find_one({'_id': ObjectId(sub['student_id'])})['full_name']} (Submitted: {sub['submitted_at'].strftime('%Y-%m-%d %H:%M')})": sub['student_id']
|
176 |
+
for sub in submissions
|
177 |
+
}
|
178 |
+
|
179 |
+
selected_student = st.selectbox(
|
180 |
+
"Select Student Submission",
|
181 |
+
options=list(student_options.keys())
|
182 |
+
)
|
183 |
+
|
184 |
+
if selected_student:
|
185 |
+
student_id = student_options[selected_student]
|
186 |
+
submission = next(sub for sub in submissions if sub['student_id'] == student_id)
|
187 |
+
|
188 |
+
st.markdown(f"**Submission Date:** {submission.get('submitted_at', 'No submission date')}")
|
189 |
+
st.markdown("---")
|
190 |
+
|
191 |
+
# Display questions and answers
|
192 |
+
st.subheader("Submission Details")
|
193 |
+
for i, (question, answer) in enumerate(zip(test['questions'], submission['answers'])):
|
194 |
+
st.markdown(f"**Question {i+1}:** {question['question']}")
|
195 |
+
st.markdown(f"**Answer:** {answer}")
|
196 |
+
st.markdown("---")
|
197 |
+
|
198 |
+
# Check for existing evaluation
|
199 |
+
existing_eval = subjective_test_evaluation_collection.find_one({
|
200 |
+
"test_id": test_id,
|
201 |
+
"student_id": student_id,
|
202 |
+
"session_id": str(session_id)
|
203 |
+
})
|
204 |
+
|
205 |
+
if existing_eval:
|
206 |
+
st.subheader("Evaluation Results")
|
207 |
+
for eval_item in existing_eval['evaluations']:
|
208 |
+
st.markdown(f"### Evaluation for Question {eval_item['question_number']}")
|
209 |
+
st.markdown(eval_item['evaluation'])
|
210 |
+
st.markdown("---")
|
211 |
+
|
212 |
+
st.success("✓ Evaluation completed")
|
213 |
+
if st.button("Regenerate Evaluation", key=f"regenerate_{student_id}_{test_id}"):
|
214 |
+
with st.spinner("Regenerating evaluation..."):
|
215 |
+
evaluation = evaluate_subjective_answers(
|
216 |
+
str(session_id),
|
217 |
+
student_id,
|
218 |
+
test_id
|
219 |
+
)
|
220 |
+
if evaluation:
|
221 |
+
st.success("Evaluation regenerated successfully!")
|
222 |
+
st.rerun()
|
223 |
+
else:
|
224 |
+
st.error("Error regenerating evaluation.")
|
225 |
+
else:
|
226 |
+
st.subheader("Generate Evaluation")
|
227 |
+
if st.button("Generate Evaluation", key=f"evaluate_{student_id}_{test_id}"):
|
228 |
+
with st.spinner("Generating evaluation..."):
|
229 |
+
evaluation = evaluate_subjective_answers(
|
230 |
+
str(session_id),
|
231 |
+
student_id,
|
232 |
+
test_id
|
233 |
+
)
|
234 |
+
if evaluation:
|
235 |
+
st.success("Evaluation generated successfully!")
|
236 |
+
st.markdown("### Generated Evaluation")
|
237 |
+
for eval_item in evaluation['evaluations']:
|
238 |
+
st.markdown(f"#### Question {eval_item['question_number']}")
|
239 |
+
st.markdown(eval_item['evaluation'])
|
240 |
+
st.markdown("---")
|
241 |
+
st.rerun()
|
242 |
+
else:
|
243 |
+
st.error("Error generating evaluation.")
|
244 |
+
|
245 |
+
except Exception as e:
|
246 |
+
st.error(f"An error occurred while loading the evaluations: {str(e)}")
|
247 |
+
print(f"Error in display_evaluation_to_faculty: {str(e)}")
|
ui.py
CHANGED
@@ -1,111 +1,111 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
from streamlit_option_menu import option_menu
|
3 |
-
|
4 |
-
|
5 |
-
# Page Configuration
|
6 |
-
st.set_page_config(page_title="Enhanced Navigation Demo", layout="wide")
|
7 |
-
|
8 |
-
# Top Navigation Bar using option_menu
|
9 |
-
selected = option_menu(
|
10 |
-
menu_title=None,
|
11 |
-
options=["Home", "Documentation", "Examples", "Community", "About"],
|
12 |
-
icons=["house", "book", "code", "people", "info-circle"],
|
13 |
-
menu_icon="cast",
|
14 |
-
default_index=0,
|
15 |
-
orientation="horizontal",
|
16 |
-
styles={
|
17 |
-
"container": {"padding": "0!important", "background-color": "#fafafa"},
|
18 |
-
"icon": {"color": "orange", "font-size": "25px"},
|
19 |
-
"nav-link": {
|
20 |
-
"font-size": "15px",
|
21 |
-
"text-align": "center",
|
22 |
-
"margin":"0px",
|
23 |
-
"--hover-color": "#eee",
|
24 |
-
},
|
25 |
-
"nav-link-selected": {"background-color": "#0083B8"},
|
26 |
-
}
|
27 |
-
)
|
28 |
-
|
29 |
-
# Sidebar Navigation
|
30 |
-
with st.sidebar:
|
31 |
-
st.header("Navigation Menu")
|
32 |
-
|
33 |
-
# Main Menu Items
|
34 |
-
selected_side = option_menu(
|
35 |
-
menu_title="Go to",
|
36 |
-
options=["Dashboard", "Analytics", "Reports", "Settings"],
|
37 |
-
icons=["speedometer2", "graph-up", "file-text", "gear"],
|
38 |
-
menu_icon="list",
|
39 |
-
default_index=0,
|
40 |
-
)
|
41 |
-
|
42 |
-
# Expandable Reports Section
|
43 |
-
if selected_side == "Reports":
|
44 |
-
with st.expander("Reports", expanded=True):
|
45 |
-
st.button("Weekly Report")
|
46 |
-
st.button("Monthly Report")
|
47 |
-
st.button("Annual Report")
|
48 |
-
|
49 |
-
# Main Content Area based on top navigation
|
50 |
-
if selected == "Home":
|
51 |
-
st.title("Welcome to Home")
|
52 |
-
st.write("This is the home page content.")
|
53 |
-
|
54 |
-
# Dashboard Content
|
55 |
-
st.header("Dashboard")
|
56 |
-
col1, col2, col3 = st.columns(3)
|
57 |
-
with col1:
|
58 |
-
st.metric("Sales", "$12,345", "+2.5%")
|
59 |
-
with col2:
|
60 |
-
st.metric("Users", "1,234", "-8%")
|
61 |
-
with col3:
|
62 |
-
st.metric("Conversion", "3.2%", "+1.2%")
|
63 |
-
|
64 |
-
elif selected == "Documentation":
|
65 |
-
st.title("Documentation")
|
66 |
-
st.write("Documentation content goes here.")
|
67 |
-
|
68 |
-
elif selected == "Examples":
|
69 |
-
st.title("Examples")
|
70 |
-
st.write("Example content goes here.")
|
71 |
-
|
72 |
-
elif selected == "Community":
|
73 |
-
st.title("Community")
|
74 |
-
st.write("Community content goes here.")
|
75 |
-
|
76 |
-
elif selected == "About":
|
77 |
-
st.title("About")
|
78 |
-
st.write("About content goes here.")
|
79 |
-
|
80 |
-
# Content based on sidebar selection
|
81 |
-
if selected_side == "Analytics":
|
82 |
-
st.header("Analytics")
|
83 |
-
st.line_chart({"data": [1, 5, 2, 6, 2, 1]})
|
84 |
-
elif selected_side == "Settings":
|
85 |
-
st.header("Settings")
|
86 |
-
st.toggle("Dark Mode")
|
87 |
-
st.toggle("Notifications")
|
88 |
-
st.slider("Volume", 0, 100, 50)
|
89 |
-
|
90 |
-
# Footer
|
91 |
-
st.markdown(
|
92 |
-
"""
|
93 |
-
<style>
|
94 |
-
.footer {
|
95 |
-
position: fixed;
|
96 |
-
left: 0;
|
97 |
-
bottom: 0;
|
98 |
-
width: 100%;
|
99 |
-
background-color: #0E1117;
|
100 |
-
color: white;
|
101 |
-
text-align: center;
|
102 |
-
padding: 10px;
|
103 |
-
font-size: 14px;
|
104 |
-
}
|
105 |
-
</style>
|
106 |
-
<div class='footer'>
|
107 |
-
© 2024 Your App Name • Privacy Policy • Terms of Service
|
108 |
-
</div>
|
109 |
-
""",
|
110 |
-
unsafe_allow_html=True
|
111 |
)
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from streamlit_option_menu import option_menu
|
3 |
+
|
4 |
+
|
5 |
+
# Page Configuration
|
6 |
+
st.set_page_config(page_title="Enhanced Navigation Demo", layout="wide")
|
7 |
+
|
8 |
+
# Top Navigation Bar using option_menu
|
9 |
+
selected = option_menu(
|
10 |
+
menu_title=None,
|
11 |
+
options=["Home", "Documentation", "Examples", "Community", "About"],
|
12 |
+
icons=["house", "book", "code", "people", "info-circle"],
|
13 |
+
menu_icon="cast",
|
14 |
+
default_index=0,
|
15 |
+
orientation="horizontal",
|
16 |
+
styles={
|
17 |
+
"container": {"padding": "0!important", "background-color": "#fafafa"},
|
18 |
+
"icon": {"color": "orange", "font-size": "25px"},
|
19 |
+
"nav-link": {
|
20 |
+
"font-size": "15px",
|
21 |
+
"text-align": "center",
|
22 |
+
"margin":"0px",
|
23 |
+
"--hover-color": "#eee",
|
24 |
+
},
|
25 |
+
"nav-link-selected": {"background-color": "#0083B8"},
|
26 |
+
}
|
27 |
+
)
|
28 |
+
|
29 |
+
# Sidebar Navigation
|
30 |
+
with st.sidebar:
|
31 |
+
st.header("Navigation Menu")
|
32 |
+
|
33 |
+
# Main Menu Items
|
34 |
+
selected_side = option_menu(
|
35 |
+
menu_title="Go to",
|
36 |
+
options=["Dashboard", "Analytics", "Reports", "Settings"],
|
37 |
+
icons=["speedometer2", "graph-up", "file-text", "gear"],
|
38 |
+
menu_icon="list",
|
39 |
+
default_index=0,
|
40 |
+
)
|
41 |
+
|
42 |
+
# Expandable Reports Section
|
43 |
+
if selected_side == "Reports":
|
44 |
+
with st.expander("Reports", expanded=True):
|
45 |
+
st.button("Weekly Report")
|
46 |
+
st.button("Monthly Report")
|
47 |
+
st.button("Annual Report")
|
48 |
+
|
49 |
+
# Main Content Area based on top navigation
|
50 |
+
if selected == "Home":
|
51 |
+
st.title("Welcome to Home")
|
52 |
+
st.write("This is the home page content.")
|
53 |
+
|
54 |
+
# Dashboard Content
|
55 |
+
st.header("Dashboard")
|
56 |
+
col1, col2, col3 = st.columns(3)
|
57 |
+
with col1:
|
58 |
+
st.metric("Sales", "$12,345", "+2.5%")
|
59 |
+
with col2:
|
60 |
+
st.metric("Users", "1,234", "-8%")
|
61 |
+
with col3:
|
62 |
+
st.metric("Conversion", "3.2%", "+1.2%")
|
63 |
+
|
64 |
+
elif selected == "Documentation":
|
65 |
+
st.title("Documentation")
|
66 |
+
st.write("Documentation content goes here.")
|
67 |
+
|
68 |
+
elif selected == "Examples":
|
69 |
+
st.title("Examples")
|
70 |
+
st.write("Example content goes here.")
|
71 |
+
|
72 |
+
elif selected == "Community":
|
73 |
+
st.title("Community")
|
74 |
+
st.write("Community content goes here.")
|
75 |
+
|
76 |
+
elif selected == "About":
|
77 |
+
st.title("About")
|
78 |
+
st.write("About content goes here.")
|
79 |
+
|
80 |
+
# Content based on sidebar selection
|
81 |
+
if selected_side == "Analytics":
|
82 |
+
st.header("Analytics")
|
83 |
+
st.line_chart({"data": [1, 5, 2, 6, 2, 1]})
|
84 |
+
elif selected_side == "Settings":
|
85 |
+
st.header("Settings")
|
86 |
+
st.toggle("Dark Mode")
|
87 |
+
st.toggle("Notifications")
|
88 |
+
st.slider("Volume", 0, 100, 50)
|
89 |
+
|
90 |
+
# Footer
|
91 |
+
st.markdown(
|
92 |
+
"""
|
93 |
+
<style>
|
94 |
+
.footer {
|
95 |
+
position: fixed;
|
96 |
+
left: 0;
|
97 |
+
bottom: 0;
|
98 |
+
width: 100%;
|
99 |
+
background-color: #0E1117;
|
100 |
+
color: white;
|
101 |
+
text-align: center;
|
102 |
+
padding: 10px;
|
103 |
+
font-size: 14px;
|
104 |
+
}
|
105 |
+
</style>
|
106 |
+
<div class='footer'>
|
107 |
+
© 2024 Your App Name • Privacy Policy • Terms of Service
|
108 |
+
</div>
|
109 |
+
""",
|
110 |
+
unsafe_allow_html=True
|
111 |
)
|