svaze commited on
Commit
b572b15
·
1 Parent(s): 6d34e3c

Goals wala integrate

Browse files
Files changed (3) hide show
  1. db.py +59 -1
  2. goals2.py +175 -0
  3. main.py +116 -7
db.py CHANGED
@@ -80,6 +80,64 @@ def insert_sample_research_assistants():
80
 
81
  ###########
82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  # Define the course schema
85
  course_schema = {
@@ -635,4 +693,4 @@ research_assistants_collection.create_index("email", unique=True)
635
  # print(f"Error inserting sample research assistants: {e}")
636
 
637
  # if __name__ == "__main__":
638
- # insert_sample_research_assistants()
 
80
 
81
  ###########
82
 
83
+ ###############
84
+ # Add after research assistant schema
85
+
86
+ # Analyst Schema
87
+ analyst_schema = {
88
+ "bsonType": "object",
89
+ "required": ["full_name", "password", "email", "courses_analyzed"],
90
+ "properties": {
91
+ "full_name": {"bsonType": "string", "description": "Full name of the analyst"},
92
+ "password": {
93
+ "bsonType": "string",
94
+ "description": "Hashed password of the analyst",
95
+ },
96
+ "email": {"bsonType": "string", "description": "Email address of the analyst"},
97
+ "courses_analyzed": {
98
+ "bsonType": "array",
99
+ "description": "List of courses the analyst is analyzing",
100
+ "items": {
101
+ "bsonType": "object",
102
+ "required": ["course_id"],
103
+ "properties": {
104
+ "course_id": {
105
+ "bsonType": "string",
106
+ "description": "ID of the course",
107
+ }
108
+ },
109
+ },
110
+ },
111
+ },
112
+ }
113
+
114
+ # Create analysts collection
115
+ analysts_collection = db["analysts"]
116
+
117
+ # Create indexes for analysts
118
+ analysts_collection.create_index("full_name", unique=True)
119
+ analysts_collection.create_index("email", unique=True)
120
+
121
+
122
+ def insert_sample_analysts():
123
+ sample_analysts = [
124
+ {
125
+ "full_name": "jane",
126
+ "password": generate_password_hash("jane"),
127
+ "email": "[email protected]",
128
+ "courses_analyzed": [{"course_id": "CS101"}, {"course_id": "CS102"}],
129
+ }
130
+ ]
131
+
132
+ try:
133
+ analysts_collection.insert_many(sample_analysts)
134
+ print("Sample analysts inserted successfully!")
135
+ except Exception as e:
136
+ print(f"Error inserting sample analysts: {e}")
137
+
138
+
139
+ ##############@
140
+
141
 
142
  # Define the course schema
143
  course_schema = {
 
693
  # print(f"Error inserting sample research assistants: {e}")
694
 
695
  # if __name__ == "__main__":
696
+ # insert_sample_analysts()
goals2.py CHANGED
@@ -309,6 +309,181 @@ def display_analysis_results(analysis: Dict):
309
  st.metric("Relevance Score", f"{score}%")
310
 
311
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
  def main():
313
  st.title("Multi-Goal Document Analysis")
314
 
 
309
  st.metric("Relevance Score", f"{score}%")
310
 
311
 
312
+ def display_analyst_dashboard():
313
+ st.title("Multi-Goal Document Analysis")
314
+
315
+ with st.sidebar:
316
+ st.markdown("### Input Section")
317
+ tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
318
+ # tab1, tab2 = st.tabs(["Document Analysis", "Similarity Search"])
319
+
320
+ with tab1:
321
+ # Multiple goals input
322
+ num_goals = st.number_input("Number of goals:", min_value=1, value=1)
323
+ goals = []
324
+ for i in range(num_goals):
325
+ goal = st.text_area(f"Goal {i+1}:", key=f"goal_{i}", height=100)
326
+ if goal:
327
+ goals.append(goal)
328
+
329
+ uploaded_files = st.file_uploader(
330
+ "Upload documents",
331
+ accept_multiple_files=True,
332
+ type=["txt", "pdf", "docx"],
333
+ )
334
+ analyze_button = (
335
+ st.button("Analyze Documents") if goals and uploaded_files else None
336
+ )
337
+
338
+ with tab2:
339
+ # Keep existing similarity search tab
340
+ search_text = st.text_area("Enter text to find similar documents:")
341
+ search_limit = st.slider("Number of results", 1, 10, 5)
342
+ search_button = st.button("Search Similar") if search_text else None
343
+
344
+ if st.button("Logout", use_container_width=True):
345
+ for key in st.session_state.keys():
346
+ del st.session_state[key]
347
+ st.rerun()
348
+
349
+ if analyze_button:
350
+ analyzer = GoalAnalyzer()
351
+ vectorizer = DocumentVectorizer()
352
+
353
+ # Store vectors
354
+ doc_vectors = {}
355
+ goal_vectors = {}
356
+
357
+ # Process goals first
358
+ with st.spinner("Processing goals..."):
359
+ for i, goal in enumerate(goals):
360
+ vector = vectorizer.get_embedding(goal)
361
+ if vector:
362
+ goal_vectors[f"Goal {i+1}"] = vector
363
+ vectorizer.store_vector(f"Goal {i+1}", vector, goal, goal)
364
+
365
+ # Process documents
366
+ with st.spinner("Processing documents..."):
367
+ for file in uploaded_files:
368
+ st.markdown(f"### Analysis for {file.name}")
369
+
370
+ if vectorizer.vector_exists(file.name):
371
+ st.info(f"Vector already exists for {file.name}")
372
+ existing_doc = vectorizer.vectors_collection.find_one(
373
+ {"name": file.name}
374
+ )
375
+ doc_vectors[file.name] = existing_doc["vector"]
376
+ else:
377
+ text = analyzer.extract_text_from_file(file)
378
+ if not text:
379
+ st.warning(f"Could not extract text from {file.name}")
380
+ continue
381
+
382
+ vector = vectorizer.get_embedding(text)
383
+ if vector:
384
+ doc_vectors[file.name] = vector
385
+ vectorizer.store_vector(file.name, vector, text)
386
+
387
+ # Display goal similarities
388
+ st.subheader("Goal Relevance Scores")
389
+ col1, col2 = st.columns([1, 2])
390
+
391
+ with col1:
392
+ for goal_name, goal_vector in goal_vectors.items():
393
+ similarity = (
394
+ vectorizer.calculate_similarity(
395
+ doc_vectors[file.name], goal_vector
396
+ )
397
+ * 100
398
+ )
399
+ st.metric(f"{goal_name}", f"{similarity:.1f}%")
400
+
401
+ with col2:
402
+ # Get analysis for all goals combined
403
+ analysis = asyncio.run(
404
+ analyzer.get_perplexity_analysis(text, " | ".join(goals))
405
+ )
406
+ display_analysis_results(analysis)
407
+
408
+ st.divider()
409
+
410
+ # Document similarity matrix
411
+ if len(doc_vectors) > 1:
412
+ st.markdown("### Document Similarity Matrix")
413
+ files = list(doc_vectors.keys())
414
+ similarity_matrix = []
415
+
416
+ for file1 in files:
417
+ row = []
418
+ for file2 in files:
419
+ similarity = vectorizer.calculate_similarity(
420
+ doc_vectors[file1], doc_vectors[file2]
421
+ )
422
+ row.append(similarity)
423
+ similarity_matrix.append(row)
424
+
425
+ df = pd.DataFrame(similarity_matrix, columns=files, index=files)
426
+ st.dataframe(df.style.background_gradient(cmap="RdYlGn"))
427
+
428
+ # Add goal-document similarity matrix
429
+ st.markdown("### Goal-Document Similarity Matrix")
430
+ goal_doc_matrix = []
431
+ goal_names = list(goal_vectors.keys())
432
+
433
+ for file in files:
434
+ row = []
435
+ for goal in goal_names:
436
+ similarity = vectorizer.calculate_similarity(
437
+ doc_vectors[file], goal_vectors[goal]
438
+ )
439
+ row.append(similarity)
440
+ goal_doc_matrix.append(row)
441
+
442
+ df_goals = pd.DataFrame(
443
+ goal_doc_matrix, columns=goal_names, index=files
444
+ )
445
+ st.dataframe(df_goals.style.background_gradient(cmap="RdYlGn"))
446
+
447
+ # Keep existing similarity search functionality
448
+ elif search_button:
449
+ vectorizer = DocumentVectorizer()
450
+ with st.spinner("Searching similar documents..."):
451
+ query_vector = vectorizer.get_embedding(search_text)
452
+ if query_vector:
453
+ similar_docs = vectorizer.vector_search(query_vector, search_limit)
454
+
455
+ if similar_docs:
456
+ st.markdown("### Similar Documents Found")
457
+
458
+ # Create DataFrame with numeric similarities
459
+ df = pd.DataFrame(similar_docs)
460
+
461
+ # Apply gradient to numeric column
462
+ styled_df = df[["name", "similarity"]].style.background_gradient(
463
+ cmap="RdYlGn", subset=["similarity"]
464
+ )
465
+
466
+ # Format display after styling
467
+ styled_df = styled_df.format({"similarity": "{:.1%}"})
468
+
469
+ st.dataframe(styled_df)
470
+
471
+ # Show document contents
472
+ for doc in similar_docs:
473
+ with st.expander(
474
+ f"📄 {doc['name']} (Similarity: {doc['similarity_display']})"
475
+ ):
476
+ st.text(
477
+ doc["text"][:20] + "..."
478
+ if len(doc["text"]) > 20
479
+ else doc["text"]
480
+ )
481
+ else:
482
+ st.info("No similar documents found")
483
+ else:
484
+ st.error("Could not process search query")
485
+
486
+
487
  def main():
488
  st.title("Multi-Goal Document Analysis")
489
 
main.py CHANGED
@@ -8,6 +8,7 @@ from db import (
8
  faculty_collection,
9
  students_collection,
10
  research_assistants_collection,
 
11
  )
12
  from werkzeug.security import generate_password_hash, check_password_hash
13
  import os
@@ -83,6 +84,8 @@ def login_user(username, password, user_type):
83
  user = faculty_collection.find_one({"full_name": username})
84
  elif user_type == "research_assistant":
85
  user = research_assistants_collection.find_one({"full_name": username})
 
 
86
 
87
  if user and check_password_hash(user["password"], password):
88
  st.session_state.user_id = user["_id"]
@@ -99,7 +102,7 @@ def login_form():
99
 
100
  with st.form("login_form"):
101
  user_type = st.selectbox(
102
- "Select User Type", ["student", "faculty", "research_assistant"]
103
  )
104
  username = st.text_input("Username")
105
  password = st.text_input("Password", type="password")
@@ -309,6 +312,97 @@ def get_new_course_id():
309
  return new_course_id
310
 
311
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
  def register_page():
313
  st.title("Register")
314
  if "user_type" not in st.session_state:
@@ -316,14 +410,12 @@ def register_page():
316
 
317
  # Select user type
318
  st.session_state.user_type = st.selectbox(
319
- "Select User Type", ["student", "faculty", "research_assistant"]
320
  )
321
  user_type = st.session_state.user_type
322
  print(user_type)
323
 
324
  with st.form("register_form"):
325
- # user_type = st.selectbox("Select User Type", ["student", "faculty", "research_assistant"])
326
- # print(user_type)
327
  full_name = st.text_input("Full Name")
328
  password = st.text_input("Password", type="password")
329
  confirm_password = st.text_input("Confirm Password", type="password")
@@ -385,6 +477,17 @@ def register_page():
385
  }
386
  )
387
  st.success("Research Assistant registered successfully!")
 
 
 
 
 
 
 
 
 
 
 
388
  else:
389
  st.error("Passwords do not match")
390
 
@@ -452,11 +555,14 @@ def create_course_form(faculty_name, faculty_id):
452
 
453
  from research_assistant_dashboard import display_research_assistant_dashboard
454
 
 
 
455
 
456
  def main_dashboard():
457
  if st.session_state.user_type == "research_assistant":
458
  display_research_assistant_dashboard()
459
-
 
460
  else:
461
  selected_course_id = None
462
  create_session = False
@@ -473,8 +579,10 @@ def main_dashboard():
473
  )
474
 
475
  if st.session_state.user_type == "faculty":
476
- if st.button("Create New Course", key="create_course", use_container_width=True):
477
- st.session_state.show_create_course_form = True
 
 
478
 
479
  if not enrolled_courses:
480
  st.warning("No courses found")
@@ -538,6 +646,7 @@ def main_dashboard():
538
  # if create_session:
539
  # create_session_form(selected_course_id)
540
 
 
541
  def main():
542
  st.set_page_config(page_title="NOVAScholar", page_icon="📚", layout="wide")
543
  init_session_state()
 
8
  faculty_collection,
9
  students_collection,
10
  research_assistants_collection,
11
+ analysts_collection,
12
  )
13
  from werkzeug.security import generate_password_hash, check_password_hash
14
  import os
 
84
  user = faculty_collection.find_one({"full_name": username})
85
  elif user_type == "research_assistant":
86
  user = research_assistants_collection.find_one({"full_name": username})
87
+ elif user_type == "analyst":
88
+ user = analysts_collection.find_one({"full_name": username})
89
 
90
  if user and check_password_hash(user["password"], password):
91
  st.session_state.user_id = user["_id"]
 
102
 
103
  with st.form("login_form"):
104
  user_type = st.selectbox(
105
+ "Select User Type", ["student", "faculty", "research_assistant", "analyst"]
106
  )
107
  username = st.text_input("Username")
108
  password = st.text_input("Password", type="password")
 
312
  return new_course_id
313
 
314
 
315
+ # def register_page():
316
+ # st.title("Register")
317
+ # if "user_type" not in st.session_state:
318
+ # st.session_state.user_type = "student"
319
+
320
+ # # Select user type
321
+ # st.session_state.user_type = st.selectbox(
322
+ # "Select User Type", ["student", "faculty", "research_assistant"]
323
+ # )
324
+ # user_type = st.session_state.user_type
325
+ # print(user_type)
326
+
327
+ # with st.form("register_form"):
328
+ # # user_type = st.selectbox("Select User Type", ["student", "faculty", "research_assistant"])
329
+ # # print(user_type)
330
+ # full_name = st.text_input("Full Name")
331
+ # password = st.text_input("Password", type="password")
332
+ # confirm_password = st.text_input("Confirm Password", type="password")
333
+
334
+ # if user_type == "student":
335
+ # # Fetch courses for students to select from
336
+ # courses = list(courses_collection2.find({}, {"course_id": 1, "title": 1}))
337
+ # course_options = [
338
+ # f"{course['title']} ({course['course_id']})" for course in courses
339
+ # ]
340
+ # selected_courses = st.multiselect("Available Courses", course_options)
341
+
342
+ # submit = st.form_submit_button("Register")
343
+
344
+ # if submit:
345
+ # if password == confirm_password:
346
+ # hashed_password = generate_password_hash(password)
347
+ # if user_type == "student":
348
+ # new_student_id = get_new_student_id()
349
+ # enrolled_courses = [
350
+ # {
351
+ # "course_id": course.split("(")[-1][:-1],
352
+ # "title": course.split(" (")[0],
353
+ # }
354
+ # for course in selected_courses
355
+ # ]
356
+ # students_collection.insert_one(
357
+ # {
358
+ # "SID": new_student_id,
359
+ # "full_name": full_name,
360
+ # "password": hashed_password,
361
+ # "enrolled_courses": enrolled_courses,
362
+ # "created_at": datetime.utcnow(),
363
+ # }
364
+ # )
365
+ # st.success(
366
+ # f"Student registered successfully with ID: {new_student_id}"
367
+ # )
368
+ # elif user_type == "faculty":
369
+ # new_faculty_id = get_new_faculty_id()
370
+ # faculty_collection.insert_one(
371
+ # {
372
+ # "TID": new_faculty_id,
373
+ # "full_name": full_name,
374
+ # "password": hashed_password,
375
+ # "courses_taught": [],
376
+ # "created_at": datetime.utcnow(),
377
+ # }
378
+ # )
379
+ # st.success(
380
+ # f"Faculty registered successfully with ID: {new_faculty_id}"
381
+ # )
382
+ # elif user_type == "research_assistant":
383
+ # research_assistants_collection.insert_one(
384
+ # {
385
+ # "full_name": full_name,
386
+ # "password": hashed_password,
387
+ # "created_at": datetime.utcnow(),
388
+ # }
389
+ # )
390
+ # st.success("Research Assistant registered successfully!")
391
+ # else:
392
+ # st.error("Passwords do not match")
393
+
394
+
395
+ def get_new_analyst_id():
396
+ """Generate a new analyst ID by incrementing the last analyst ID"""
397
+ last_analyst = analysts_collection.find_one(sort=[("AID", -1)])
398
+ if last_analyst:
399
+ last_id = int(last_analyst["AID"][1:])
400
+ new_id = f"A{last_id + 1}"
401
+ else:
402
+ new_id = "A1"
403
+ return new_id
404
+
405
+
406
  def register_page():
407
  st.title("Register")
408
  if "user_type" not in st.session_state:
 
410
 
411
  # Select user type
412
  st.session_state.user_type = st.selectbox(
413
+ "Select User Type", ["student", "faculty", "research_assistant", "analyst"]
414
  )
415
  user_type = st.session_state.user_type
416
  print(user_type)
417
 
418
  with st.form("register_form"):
 
 
419
  full_name = st.text_input("Full Name")
420
  password = st.text_input("Password", type="password")
421
  confirm_password = st.text_input("Confirm Password", type="password")
 
477
  }
478
  )
479
  st.success("Research Assistant registered successfully!")
480
+ elif user_type == "analyst":
481
+ # new_analyst_id = get_new_analyst_id()
482
+ analysts_collection.insert_one(
483
+ {
484
+ # "AID": new_analyst_id,
485
+ "full_name": full_name,
486
+ "password": hashed_password,
487
+ "created_at": datetime.utcnow(),
488
+ }
489
+ )
490
+ st.success("Analyst registered successfully!")
491
  else:
492
  st.error("Passwords do not match")
493
 
 
555
 
556
  from research_assistant_dashboard import display_research_assistant_dashboard
557
 
558
+ from goals2 import display_analyst_dashboard
559
+
560
 
561
  def main_dashboard():
562
  if st.session_state.user_type == "research_assistant":
563
  display_research_assistant_dashboard()
564
+ elif st.session_state.user_type == "analyst":
565
+ display_analyst_dashboard()
566
  else:
567
  selected_course_id = None
568
  create_session = False
 
579
  )
580
 
581
  if st.session_state.user_type == "faculty":
582
+ if st.button(
583
+ "Create New Course", key="create_course", use_container_width=True
584
+ ):
585
+ st.session_state.show_create_course_form = True
586
 
587
  if not enrolled_courses:
588
  st.warning("No courses found")
 
646
  # if create_session:
647
  # create_session_form(selected_course_id)
648
 
649
+
650
  def main():
651
  st.set_page_config(page_title="NOVAScholar", page_icon="📚", layout="wide")
652
  init_session_state()