Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pickle | |
import pandas as pd | |
import torch | |
import numpy as np | |
import requests | |
cosine_scores = pickle.load(open('cosine_scores.pkl','rb')) | |
coursedf = pd.read_pickle('course_df_new.pkl') # course_df uses titles to generate course recommendations | |
#course_df_new = pd.read_pickle('course_df_new.pkl') #course_df_new makes recommendations using the entire description | |
course_title_list = [i + ": " + j for i, j in zip(coursedf['ref'].to_list(), coursedf['title'].to_list())] | |
def get_random_course(): | |
row=coursedf.sample(1) | |
return row['ref'], row['title'] | |
def recommend(index): | |
pairs = {} | |
for i in range(len(coursedf)): | |
pairs[coursedf.iloc[i,1]]=cosine_scores[index][i] | |
sorttemp = sorted(pairs.items(), key=lambda x:x[1], reverse=True) | |
sorted_final = dict(sorttemp[1:31]) | |
return list(sorted_final.keys()) | |
st.set_page_config(page_title='DiscoverCourses', page_icon=':bird:') | |
st.header('DiscoverCourses') | |
st.subheader('Course recommendations based on cosine similarity between vector embeddings of lemmatized text') | |
st.write('') | |
st.write("Do you like the tech + social impact focus of CS51? Excited by film-centered courses like FILMEDIA245B? Saw a cool study-abroad course (OSPISTAN76) and want to study that topic on campus?") | |
st.write('') | |
st.write("Enter DiscoverCourses. Just pick a course and get dozens of recommendations for similar courses based on titles or descriptions. Give it a go! If you have any thoughts on DiscoverCourses (or project ideas or a book recommendation or really anything), shoot me an email at [email protected].") | |
st.write('') | |
st.markdown('<style> a:link {color: white;background-color: transparent;text-decoration: underline;}</style>',unsafe_allow_html=True) | |
selected_course = st.selectbox('Pick a course from the dropdown (or click on it and start typing to search).',course_title_list) | |
#st.write("Description: "+coursedf.iloc[np.where((coursedf['ref']+": "+coursedf['title'])==selected_course)[0][0],3]) | |
#st.write('') | |
container = st.container() | |
maincol1, maincol2 = container.columns(2) | |
st.write('') | |
if maincol1.button('Discover by title',use_container_width=True): | |
url='https://datadrop.wolframcloud.com/api/v1.0/Add?bin=1fYEdJizg&data='+selected_course.replace(":","") | |
x=requests.get(url) | |
output=recommend(np.where((coursedf['ref']+": "+coursedf['title']) == selected_course)[0][0]) | |
for result in output: | |
index=np.where(coursedf['title'] == result)[0][0] | |
course_id=coursedf.iloc[index,0] | |
st.subheader(course_id+": "+result) | |
with st.expander("See description"): | |
st.write(coursedf.iloc[index,3]) #Using the new coursedf because it has proper descriptions for each course | |
link1 = "[ExploreCourses ↗](https://explorecourses.stanford.edu/search?q="+course_id+"+"+result.replace(" ","+")+")" | |
link2 = "[Carta ↗](https://carta-beta.stanford.edu/results/"+course_id+")" | |
st.markdown(link1+" "+link2, unsafe_allow_html=True) | |
st.divider() | |
if maincol2.button('Discover by description',use_container_width=True): | |
url='https://datadrop.wolframcloud.com/api/v1.0/Add?bin=1fYEdJizg&data='+selected_course.replace(":","") | |
x=requests.get(url) | |
index_new=np.where((coursedf['ref']+": "+coursedf['title']) == selected_course)[0][0] | |
rec_list=coursedf.iloc[index_new,2] | |
for result in rec_list[1:]: | |
index=np.where(coursedf['title'] == result)[0][0] | |
course_id=coursedf.iloc[index,0] | |
st.subheader(course_id+": "+result) | |
with st.expander("See description"): | |
st.write(coursedf.iloc[index,3]) #Using the new coursedf because it has proper descriptions for each course | |
link1 = "[ExploreCourses ↗](https://explorecourses.stanford.edu/search?q="+course_id+"+"+result.replace(" ","+")+")" | |
link2 = "[Carta ↗](https://carta-beta.stanford.edu/results/"+course_id+")" | |
st.markdown(link1+" "+link2, unsafe_allow_html=True) | |
st.divider() | |
st.write('© 2023 Rushank Goyal. All rights reserved. Source for the all-MiniLM-L6-v2 model: Wang, Wenhui, et al. "MiniLM: Deep Self-Attention Distillation for Task-Agnostic Compression of Pre-Trained Transformers." arXiv, 25 Feb. 2020, doi:10.48550/arXiv.2002.10957.') | |