File size: 4,294 Bytes
d562319
 
 
 
 
64809af
d562319
 
72f2133
 
d562319
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
889921b
d562319
81c9cfc
a906503
81c9cfc
 
 
1296882
d562319
d9a6e1a
bd698d1
a906503
d562319
 
 
 
 
4ad2785
64809af
 
d562319
 
 
 
 
 
72f2133
d9a6e1a
 
 
d562319
 
4ad2785
64809af
 
d562319
72f2133
 
d562319
 
 
 
72f2133
d9a6e1a
 
1296882
d562319
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import streamlit as st
import pickle
import pandas as pd
import torch
import numpy as np
import requests

cosine_scores = pickle.load(open('cosine_scores.pkl','rb'))
coursedf = pd.read_pickle('course_df_new.pkl')          # course_df uses titles to generate course recommendations
#course_df_new = pd.read_pickle('course_df_new.pkl') #course_df_new makes recommendations using the entire description

course_title_list = [i + ": " + j for i, j in zip(coursedf['ref'].to_list(), coursedf['title'].to_list())]

def get_random_course():
    row=coursedf.sample(1)
    return row['ref'], row['title'] 

def recommend(index):
    pairs = {}

    for i in range(len(coursedf)):
        pairs[coursedf.iloc[i,1]]=cosine_scores[index][i]

    sorttemp = sorted(pairs.items(), key=lambda x:x[1], reverse=True)
    sorted_final = dict(sorttemp[1:31])

    return list(sorted_final.keys())

st.set_page_config(page_title='DiscoverCourses', page_icon=':bird:')
st.header('DiscoverCourses')
st.subheader('Course recommendations based on cosine similarity between vector embeddings of lemmatized text')
st.write('')
st.write("Do you like the tech + social impact focus of CS51? Excited by film-centered courses like FILMEDIA245B? Saw a cool study-abroad course (OSPISTAN76) and want to study that topic on campus?")
st.write('')
st.write("Enter DiscoverCourses. Just pick a course and get dozens of recommendations for similar courses based on titles or descriptions. Give it a go! If you have any thoughts on DiscoverCourses (or project ideas or a book recommendation or really anything), shoot me an email at [email protected].")
st.write('')

st.markdown('<style> a:link {color: white;background-color: transparent;text-decoration: underline;}</style>',unsafe_allow_html=True)

selected_course = st.selectbox('Pick a course from the dropdown (or click on it and start typing to search).',course_title_list)
#st.write("Description: "+coursedf.iloc[np.where((coursedf['ref']+": "+coursedf['title'])==selected_course)[0][0],3])
#st.write('')

container = st.container()
maincol1, maincol2 = container.columns(2)
st.write('')

if maincol1.button('Discover by title',use_container_width=True):
    url='https://datadrop.wolframcloud.com/api/v1.0/Add?bin=1fYEdJizg&data='+selected_course.replace(":","")
    x=requests.get(url)
    output=recommend(np.where((coursedf['ref']+": "+coursedf['title']) == selected_course)[0][0])
    for result in output:
        index=np.where(coursedf['title'] == result)[0][0]
        course_id=coursedf.iloc[index,0]
        st.subheader(course_id+": "+result)
        with st.expander("See description"):
            st.write(coursedf.iloc[index,3]) #Using the new coursedf because it has proper descriptions for each course
        link1 = "[ExploreCourses ↗](https://explorecourses.stanford.edu/search?q="+course_id+"+"+result.replace(" ","+")+")"
        link2 = "[Carta ↗](https://carta-beta.stanford.edu/results/"+course_id+")"
        st.markdown(link1+"   "+link2, unsafe_allow_html=True)
        st.divider()
        
if maincol2.button('Discover by description',use_container_width=True):
    url='https://datadrop.wolframcloud.com/api/v1.0/Add?bin=1fYEdJizg&data='+selected_course.replace(":","")
    x=requests.get(url)
    index_new=np.where((coursedf['ref']+": "+coursedf['title']) == selected_course)[0][0]
    rec_list=coursedf.iloc[index_new,2]
    for result in rec_list[1:]:
        index=np.where(coursedf['title'] == result)[0][0]
        course_id=coursedf.iloc[index,0]
        st.subheader(course_id+": "+result)
        with st.expander("See description"):
            st.write(coursedf.iloc[index,3]) #Using the new coursedf because it has proper descriptions for each course
        link1 = "[ExploreCourses ↗](https://explorecourses.stanford.edu/search?q="+course_id+"+"+result.replace(" ","+")+")"
        link2 = "[Carta ↗](https://carta-beta.stanford.edu/results/"+course_id+")"
        st.markdown(link1+"   "+link2, unsafe_allow_html=True)
        st.divider()

st.write('© 2023 Rushank Goyal. All rights reserved. Source for the all-MiniLM-L6-v2 model: Wang, Wenhui, et al. "MiniLM: Deep Self-Attention Distillation for Task-Agnostic Compression of Pre-Trained Transformers." arXiv, 25 Feb. 2020, doi:10.48550/arXiv.2002.10957.')