Spaces:

GoTogether
/

App_Prototype

Sleeping

App Files Files Community

NursNurs commited on Oct 14, 2023

Commit

1ad084f

1 Parent(s): 26e2925

Added sorting by price, relevancy, rating

Browse files

Files changed (1) hide show

app.py +255 -64

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ from transformers import BertTokenizer, BertModel
 from collections import defaultdict, Counter
 from tqdm.auto import tqdm
 from sklearn.metrics.pairwise import cosine_similarity
 #Loading the model
 @st.cache_resource
@@ -31,13 +32,26 @@ def load_data():
     vectors_df = pd.read_csv('restaurants_dataframe_with_embeddings.csv')
     embeds = dict(enumerate(vectors_df['Embeddings']))
     rest_names = list(vectors_df['Names'])
     return embeds, rest_names, vectors_df
 #type: dict; keys: 0-n
-restaurants_embeds, rest_names, df = load_data()
 model, tokenizer = get_models()
 #a function that takes a sentence and converts it into embeddings
 def get_bert_embeddings(sentence, model, tokenizer):
     inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True)
@@ -47,19 +61,70 @@ def get_bert_embeddings(sentence, model, tokenizer):
     return embeddings
 # a function that return top-K best restaurants
-def return_top_k(query, k=10):
     embedded_query = get_bert_embeddings(query, model, tokenizer)
     embedded_query = embedded_query.numpy()
-    top_similar = dict()
     for i in range(len(restaurants_embeds)):
         name = rest_names[i]
-        top_similar[i] = cosine_similarity(embedded_query, str_to_numpy(restaurants_embeds[i]))[0][0]
-    top_similar = dict(sorted(top_similar.items(), key=lambda item: item[1], reverse=True))
-    top_similar = dict([(key, value) for key, value in top_similar.items()][:k])
-    names = [rest_names[i] for i in top_similar.keys()]
-    result = dict(zip(names, top_similar.values()))
     return result
 #combines 2 users preferences into 1 string and fetches best options
@@ -67,14 +132,69 @@ def get_combined_preferences(user1, user2):
     #TODO: optimize for more users
     shared_pref = ''
     for pref in user1:
-        shared_pref += pref
         shared_pref += " "
     shared_pref += " "
     for pref in user2:
-        shared_pref += pref
         shared_pref += " "
-    return shared_pref
 if 'preferences_1' not in st.session_state:
   st.session_state.preferences_1 = []
@@ -87,81 +207,152 @@ if 'food' not in st.session_state:
 if 'ambiance' not in st.session_state:
   st.session_state.ambiance = ['Romantic date', 'Friends catching up', 'Family gathering', 'Big group', 'Business-meeting', 'Other']
 if 'price' not in st.session_state:
-  st.session_state.price = dict(enumerate(['$', '$$', '$$$', '$$$$']))
 # Configure Streamlit page and state
 st.title("GoTogether!")
-st.markdown(
-    "Tell us about your preferences!")
 st.caption("In section 'Others', you can describe any wishes.")
-st.write('User 1')
 food_1 = st.selectbox('Select the food type you prefer', st.session_state.food, key=1)
 if food_1 == 'Other':
     food_1 = st.text_input(label="Your description", placeholder="What kind of food would you like to eat?", key=10)
-st.session_state.preferences_1.append(food_1)
 ambiance_1 = st.selectbox('What describes your occasion the best?', st.session_state.ambiance, key=2)
 if ambiance_1 == 'Other':
-    ambiance_1 = st.text_input(label="Your description", placeholder="How would you describe your meeting?", key=11)
-price_1 = st.select_slider("Your preferred price range", options=('$', '$$', '$$$', '$$$$'), key=3)
-st.session_state.preferences_1.append(ambiance_1)
-st.write('User 2')
-food_2 = st.selectbox('Select the food type you prefer', st.session_state.food, key=4)
 if food_2 == 'Other':
-    food_2 = st.text_input(label="Your description", placeholder="What kind of food would you like to eat?", key=13)
-st.session_state.preferences_2.append(food_2)
 ambiance_2 = st.selectbox('What describes your occasion the best?', st.session_state.ambiance, key=5)
 if ambiance_2 == 'Other':
-    ambiance_2 = st.text_input(label="Your description", placeholder="How would you describe your meeting?", key=12)
-price_2 = st.select_slider("Your preferred price range", options=('$', '$$', '$$$', '$$$$'), key=6)
-st.session_state.preferences_2.append(ambiance_2)
-submit = st.button("Submit")
-if submit:
-    with st.spinner("Please wait while we are finding the best solution..."):
-        query = get_combined_preferences(st.session_state.preferences_1, st.session_state.preferences_2)
-        st.write("Your query is:", query)
-        results = return_top_k(query, k=10)
-    st.write("Here are the best matches to your preferences:")
-    i = 1
-    for name, score in results.items():
-        st.write("Top", i, ':', name, score)
-        condition = df['Names'] == name
-        # Use the condition to extract the value(s)
-        description = df.loc[condition, 'Strings']
-        st.write(description)
-        i+=1
-st.session_state.preferences_1, st.session_state.preferences_2 = [], []
-#TODO: include rating and price as variables
-# if input:
-#     input_embed = model.encode(input)
-#     sim_score = similarity_top(input_embed, icd_embeddings)
-#     i = 1
-#     for dis, value in sim_score:
-#         st.write(f":green[Prediction number] {i}:")
-#         st.write(f"{dis} (similarity score:", value, ")")
-#         i+= 1
-#     text_spinner_placeholder = st.empty()
-#     with st.spinner("Please wait while your visualizations are being generated..."):
-#         time.sleep(5)
-#     vis_results_2d(input_embed)
-#     vis_results_3d(input_embed)
 # #TODO: implement price range as a sliding bar

 from collections import defaultdict, Counter
 from tqdm.auto import tqdm
 from sklearn.metrics.pairwise import cosine_similarity
+import time
 #Loading the model
 @st.cache_resource
     vectors_df = pd.read_csv('restaurants_dataframe_with_embeddings.csv')
     embeds = dict(enumerate(vectors_df['Embeddings']))
     rest_names = list(vectors_df['Names'])
+    vectors_df['Weights'] = [1]*len(vectors_df)
     return embeds, rest_names, vectors_df
 #type: dict; keys: 0-n
+restaurants_embeds, rest_names, init_df = load_data()
 model, tokenizer = get_models()
+# query_params = st.experimental_get_query_params()
+# st.write("query_params")
+# st.write(query_params)
+# def update_params():
+#     st.experimental_set_query_params(
+#         sorting=st.session_state.sort_by)
+# if query_params:
+#     sort_by = query_params["sorting"][0]
+#     st.session_state.sort_by = sort_by
 #a function that takes a sentence and converts it into embeddings
 def get_bert_embeddings(sentence, model, tokenizer):
     inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True)
     return embeddings
 # a function that return top-K best restaurants
+def compute_cos_sim(query):
     embedded_query = get_bert_embeddings(query, model, tokenizer)
     embedded_query = embedded_query.numpy()
+    top_similar = np.array([])
     for i in range(len(restaurants_embeds)):
         name = rest_names[i]
+        top_similar = np.append(top_similar, cosine_similarity(embedded_query, str_to_numpy(restaurants_embeds[i]))[0][0])
+    st.session_state.df['cos_sim'] = top_similar.tolist()
+    weights = np.array(st.session_state.df['Weights'])
+    #multiply weights by the cosine similarity
+    top_similar_weighted = dict(enumerate(np.multiply(top_similar, weights)))
+    st.session_state.df['Relevancy'] = top_similar_weighted.values()
+    return st.session_state.df
+def sort_by_relevancy(k):
+    '''
+    k - int - how many top-matching places to show
+    '''
+    top_similar_weighted = dict(enumerate(st.session_state.precalculated_df['Relevancy']))
+    #sort in the descending order
+    top_similar_weighted = dict(sorted(top_similar_weighted.items(), key=lambda item: item[1], reverse=True))
+    #leave only K recommendations
+    top_k_similar = dict([(key, value) for key, value in top_similar_weighted.items()][:k])
+    #get restaurant names
+    names = [rest_names[i] for i in top_k_similar.keys()]
+    result = dict(zip(names, top_k_similar.values()))
+    return result
+def sort_by_price(k):
+    '''
+    k - int - how many top-matching places to show
+    '''
+    relevance = np.array(st.session_state.precalculated_df['Relevancy'])
+    prices = np.array([st.session_state.price[str(val)] for val in st.session_state.precalculated_df['Price']])
+    top_similar_by_price = dict(enumerate(np.multiply(relevance, prices)))
+    st.session_state.precalculated_df['Sort_price'] = top_similar_by_price.values()
+    #sort in the descending order
+    top_similar_by_price = dict(sorted(top_similar_by_price.items(), key=lambda item: item[1], reverse=True))
+    #leave only K recommendations
+    top_k_similar = dict([(key, value) for key, value in top_similar_by_price.items()][:k])
+    #get restaurant names
+    names = [rest_names[i] for i in top_k_similar.keys()]
+    result = dict(zip(names, top_k_similar.values()))
+    return result
+def sort_by_rating(k):
+    '''
+    k - int - how many top-matching places to show
+    '''
+    relevance = np.array(st.session_state.precalculated_df['Relevancy'])
+    rating = np.array(list(st.session_state.precalculated_df['Rating']))
+    top_similar_by_rating = dict(enumerate(np.multiply(relevance, rating)))
+    st.session_state.precalculated_df['Sort_rating'] = top_similar_by_rating.values()
+    #sort in the descending order
+    top_similar_by_rating = dict(sorted(top_similar_by_rating.items(), key=lambda item: item[1], reverse=True))
+    #leave only K recommendations
+    top_k_similar = dict([(key, value) for key, value in top_similar_by_rating.items()][:k])
+    #get restaurant names
+    names = [rest_names[i] for i in top_k_similar.keys()]
+    result = dict(zip(names, top_k_similar.values()))
     return result
 #combines 2 users preferences into 1 string and fetches best options
     #TODO: optimize for more users
     shared_pref = ''
     for pref in user1:
+        shared_pref += pref.lower()
         shared_pref += " "
     shared_pref += " "
     for pref in user2:
+        shared_pref += pref.lower()
         shared_pref += " "
+    freq_words = Counter(shared_pref.split())
+    return shared_pref, freq_words
+def filter_places(restrictions):
+    #punish the weight of places that don't fit restrictions
+    # st.write("Here are the restrictions you provided:")
+    # st.write(restrictions)
+    taboo = set([word.lower() for word in restrictions])
+    for i in range(len(st.session_state.df)):
+        descr = [word.lower() for word in st.session_state.df['Strings'][i].split()]
+        name = st.session_state.df['Names'][i]
+        for criteria in taboo:
+            if criteria not in descr:
+                st.session_state.df['Weights'][i] = 0.1 * st.session_state.df['Weights'][i]
+    return st.session_state.df
+def promote_places(preferences):
+    '''
+    input type: dict()
+    a function that takes most common words, checks if descriptions fit them, increases their weight if they do
+    '''
+    #punish the weight of places that don't fit restrictions
+    # st.write("Here are the most common preferences you provided:")
+    # st.write(preferences)
+    for i in range(len(st.session_state.df)):
+        descr = [word.lower() for word in st.session_state.df['Strings'][i].split()]
+        name = st.session_state.df['Names'][i]
+        for pref in preferences:
+            if pref in descr:
+                st.session_state.df['Weights'][i] = 2 * st.session_state.df['Weights'][i]
+    return st.session_state.df
+def generate_results(sort_by):
+    if sort_by == 'Price':
+        with st.spinner("Sorting your results by price..."):
+            st.write("Sorting your results by price...")
+            results = sort_by_price(10)
+    elif sort_by == 'Rating':
+        with st.spinner("Sorting your results by rating..."):
+            st.write("Sorting your results by rating...")
+            results = sort_by_rating(10)
+    elif sort_by == 'Relevancy (default)':
+        with st.spinner("Sorting your results by relevancy..."):
+            st.write("Sorting your results by relevancy...")
+            results = sort_by_relevancy(10)
+    else:
+        st.write("Sorry, we are still working on this option. For now, the results are sorted by relevance")
+        with st.spinner("Sorting your results by relevancy..."):
+            results = sort_by_relevancy(10)
+    return results
 if 'preferences_1' not in st.session_state:
   st.session_state.preferences_1 = []
 if 'ambiance' not in st.session_state:
   st.session_state.ambiance = ['Romantic date', 'Friends catching up', 'Family gathering', 'Big group', 'Business-meeting', 'Other']
+if 'restrictions' not in st.session_state:
+    st.session_state.restrictions = []
 if 'price' not in st.session_state:
+  st.session_state.price = {'$': 2, '₩': 2, '$$': 1, '₩₩': 1, '$$$': 0.5, '$$$$': 0.1, "nan": 1}
+if 'sort_by' not in st.session_state:
+    st.session_state.sort_by = ''
+if 'options' not in st.session_state:
+    st.session_state.options = ['Relevancy (default)', 'Price', 'Rating', 'Distance']
+if 'df' not in st.session_state:
+    st.session_state.df = init_df
+if 'precalculated_df' not in st.session_state:
+    st.session_state.precalculated_df = pd.DataFrame()
+if 'stop_search' not in st.session_state:
+    st.session_state.stop_search = False
 # Configure Streamlit page and state
 st.title("GoTogether!")
+st.markdown("Tell us about your preferences!")
 st.caption("In section 'Others', you can describe any wishes.")
+# options_disability_1 = st.multiselect(
+#      'Do you need a wheelchair?',
+#      ['Yes', 'No'], ['No'], key=101)
+# if options_disability_1 == 'Yes':
+#     st.session_state.restrictions.append('Wheelchair')
+# price_1 = st.select_slider("Your preferred price range", options=('$', '$$', '$$$', '$$$$'), key=3)
+# st.session_state.preferences_1.append(ambiance_1)
+# Komplettes Beispiel für die Verwendung der 'with'-Notation
+# with st.form('my_form_1'):
+#     st.subheader('**User 1**')
+st.write("User 1")
+# Eingabe-Widgets
 food_1 = st.selectbox('Select the food type you prefer', st.session_state.food, key=1)
 if food_1 == 'Other':
     food_1 = st.text_input(label="Your description", placeholder="What kind of food would you like to eat?", key=10)
 ambiance_1 = st.selectbox('What describes your occasion the best?', st.session_state.ambiance, key=2)
 if ambiance_1 == 'Other':
+    ambiance_1 = st.text_input(label="Your description", placeholder="How would you describe your meeting?", key=11)
+options_food_1 = st.multiselect(
+    'Do you have any dietary restrictions?',
+    ['Vegan', 'Vegetarian', 'Halal'], key=100)
+additional_1 = st.text_input(label="Your description", placeholder="Anything else you wanna share?", key=102)
+with_kids = st.checkbox('I will come with kids', key=200)
+    # st.subheader('**User 2**')
+st.write("User 2")
+# Eingabe-Widgets
+food_2 = st.selectbox('Select the food type you prefer', st.session_state.food, key=3)
 if food_2 == 'Other':
+    food_2 = st.text_input(label="Your description", placeholder="What kind of food would you like to eat?", key=4)
 ambiance_2 = st.selectbox('What describes your occasion the best?', st.session_state.ambiance, key=5)
 if ambiance_2 == 'Other':
+    ambiance_2 = st.text_input(label="Your description", placeholder="How would you describe your meeting?", key=6)
+options_food_2 = st.multiselect(
+    'Do you have any dietary restrictions?',
+    ['Vegan', 'Vegetarian', 'Halal', 'Other'], key=7)
+additional_2 = st.text_input(label="Your description", placeholder="Anything else you wanna share?", key=8)
+with_kids_2 = st.checkbox('I will come with kids', key=201)
+if len(st.session_state.preferences_1) == 0:
+    st.session_state.preferences_1.append(food_1)
+    st.session_state.preferences_1.append(ambiance_1)
+    st.session_state.restrictions.extend(options_food_1)
+    if additional_1:
+        st.session_state.preferences_1.append(additional_1)
+    if with_kids:
+        st.session_state.restrictions.append('kids')
+if len(st.session_state.preferences_2) == 0:
+    st.session_state.preferences_2.append(food_2)
+    st.session_state.preferences_2.append(ambiance_2)
+    st.session_state.restrictions.extend(options_food_2)
+    if additional_2:
+        st.session_state.preferences_2.append(additional_2)
+    if with_kids_2:
+        st.session_state.restrictions.append('kids')
+submitted = st.button('Submit!')
+if submitted:
+    st.markdown("Thanks, we received your preferences!")
+    st.session_state.stop_search = False
+else:
+    st.write('☝️ Describe your preferences!')
+submit = st.button("Find best matches!", type='primary')
+if submit or (not st.session_state.precalculated_df.empty):
+    with st.spinner("Please wait while we are finding the best solution..."):
+        if st.session_state.precalculated_df.empty:
+            query = get_combined_preferences(st.session_state.preferences_1, st.session_state.preferences_2)
+            st.write("Your query is:", query[0])
+            #sort places based on restrictions
+            st.session_state.precalculated_df = filter_places(st.session_state.restrictions)
+            #sort places by elevating preferrences
+            st.session_state.precalculated_df = promote_places(query[1])
+            st.session_state.precalculated_df = compute_cos_sim(query[0])
+        sort_by = st.selectbox(('Sort by:'), st.session_state.options, key=400,
+                               index=st.session_state.options.index('Relevancy (default)'))
+        if sort_by:
+            st.session_state.sort_by = sort_by
+            results = generate_results(st.session_state.sort_by)
+            k = 10
+            st.write(f"Here are the best {k} matches to your preferences:")
+            i = 1
+            for name, score in results.items():
+                st.write("Top", i, ':', name, score)
+                condition = st.session_state.precalculated_df['Names'] == name
+                # Use the condition to extract the value(s)
+                description = st.session_state.precalculated_df.loc[condition, 'Strings']
+                st.write(description)
+                i+=1
+stop = st.button("New search!", type='primary', key=500)
+if stop:
+    st.session_state.preferences_1, st.session_state.preferences_2 = [], []
+    st.session_state.restrictions = []
+    st.session_state.sort_by = ""
+    st.session_state.df = init_df
+    st.session_state.precalculated_df = pd.DataFrame()
 # #TODO: implement price range as a sliding bar
+# When the user presses "New search", erase everything
+# Propose URLs
+# Show keywords instead of whole strings