Spaces:

GoTogether
/

App_Prototype

Sleeping

App Files Files Community

NursNurs commited on Oct 6, 2023

Commit

696b1e3

1 Parent(s): 4cbe5e0

Upload 4 files

Browse files

Files changed (3) hide show

.gitattributes +1 -0
app.py +167 -0
restaurants_dataframe_with_embeddings.csv +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+restaurants_dataframe_with_embeddings.csv filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,167 @@

+import streamlit as st
+import torch
+import numpy as np
+import time
+import string
+import pandas as pd
+import numpy as np
+from transformers import BertTokenizer, BertModel
+from collections import defaultdict, Counter
+from tqdm.auto import tqdm
+from sklearn.metrics.pairwise import cosine_similarity
+#Loading the model
+@st.cache_resource
+def get_models():
+  st.write('Loading the model...')
+  tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
+  model = BertModel.from_pretrained("bert-base-uncased")
+  st.write("_The model is loaded and ready to use! :tada:_")
+  return model, tokenizer
+#convert numpy arrays from strings back to arrays
+def str_to_numpy(array_string):
+    array_string = array_string.replace('\n', '').replace('[','').replace(']','')
+    numpy_array = np.fromstring(array_string, sep=' ')
+    numpy_array = numpy_array.reshape((1, -1))
+    return numpy_array
+@st.cache_data  # 👈 Add the caching decorator
+def load_data():
+    vectors_df = pd.read_csv('restaurants_dataframe_with_embeddings.csv')
+    embeds = dict(enumerate(vectors_df['Embeddings']))
+    rest_names = list(vectors_df['Names'])
+    return embeds, rest_names, vectors_df
+#type: dict; keys: 0-n
+restaurants_embeds, rest_names, df = load_data()
+model, tokenizer = get_models()
+#a function that takes a sentence and converts it into embeddings
+def get_bert_embeddings(sentence, model, tokenizer):
+    inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True)
+    with torch.no_grad():
+        outputs = model(**inputs)
+        embeddings = outputs.last_hidden_state.mean(dim=1)  # Average pool over tokens
+    return embeddings
+# a function that return top-K best restaurants
+def return_top_k(query, k=10):
+    embedded_query = get_bert_embeddings(query, model, tokenizer)
+    embedded_query = embedded_query.numpy()
+    top_similar = dict()
+    for i in range(len(restaurants_embeds)):
+        name = rest_names[i]
+        top_similar[i] = cosine_similarity(embedded_query, str_to_numpy(restaurants_embeds[i]))[0][0]
+    top_similar = dict(sorted(top_similar.items(), key=lambda item: item[1], reverse=True))
+    top_similar = dict([(key, value) for key, value in top_similar.items()][:k])
+    names = [rest_names[i] for i in top_similar.keys()]
+    result = dict(zip(names, top_similar.values()))
+    return result
+#combines 2 users preferences into 1 string and fetches best options
+def get_combined_preferences(user1, user2):
+    #TODO: optimize for more users
+    shared_pref = ''
+    for pref in user1:
+        shared_pref += pref
+        shared_pref += " "
+    shared_pref += " "
+    for pref in user2:
+        shared_pref += pref
+        shared_pref += " "
+    return shared_pref
+if 'preferences_1' not in st.session_state:
+  st.session_state.preferences_1 = []
+if 'preferences_2' not in st.session_state:
+  st.session_state.preferences_2 = []
+if 'food' not in st.session_state:
+  st.session_state.food = ['Coffee', 'Italian', 'Mexican', 'Chinese', 'Indian', 'Asian', 'Fast food', 'Other']
+if 'ambiance' not in st.session_state:
+  st.session_state.ambiance = ['Romantic date', 'Friends catching up', 'Family gathering', 'Big group', 'Business-meeting', 'Other']
+if 'price' not in st.session_state:
+  st.session_state.price = dict(enumerate(['$', '$$', '$$$', '$$$$']))
+# Configure Streamlit page and state
+st.title("GoTogether!")
+st.markdown(
+    "Tell us about your preferences!")
+st.caption("In section 'Others', you can describe any wishes.")
+st.write('User 1')
+food_1 = st.selectbox('Select the food type you prefer', st.session_state.food, key=1)
+if food_1 == 'Other':
+    food_1 = st.text_input(label="Your description", placeholder="What kind of food would you like to eat?", key=10)
+st.session_state.preferences_1.append(food_1)
+ambiance_1 = st.selectbox('What describes your occasion the best?', st.session_state.ambiance, key=2)
+if ambiance_1 == 'Other':
+    ambiance_1 = st.text_input(label="Your description", placeholder="How would you describe your meeting?", key=11)
+price_1 = st.select_slider("Your preferred price range", options=('$', '$$', '$$$', '$$$$'), key=3)
+st.session_state.preferences_1.append(ambiance_1)
+st.write('User 2')
+food_2 = st.selectbox('Select the food type you prefer', st.session_state.food, key=4)
+if food_2 == 'Other':
+    food_2 = st.text_input(label="Your description", placeholder="What kind of food would you like to eat?", key=13)
+st.session_state.preferences_2.append(food_2)
+ambiance_2 = st.selectbox('What describes your occasion the best?', st.session_state.ambiance, key=5)
+if ambiance_2 == 'Other':
+    ambiance_2 = st.text_input(label="Your description", placeholder="How would you describe your meeting?", key=12)
+price_2 = st.select_slider("Your preferred price range", options=('$', '$$', '$$$', '$$$$'), key=6)
+st.session_state.preferences_2.append(ambiance_2)
+submit = st.button("Submit")
+if submit:
+    with st.spinner("Please wait while we are finding the best solution..."):
+        query = get_combined_preferences(st.session_state.preferences_1, st.session_state.preferences_2)
+        st.write("Your query is:", query)
+        results = return_top_k(query, k=10)
+    st.write("Here are the best matches to your preferences:")
+    i = 1
+    for name, score in results.items():
+        st.write("Top", i, ':', name, score)
+        condition = df['Names'] == name
+        # Use the condition to extract the value(s)
+        description = df.loc[condition, 'Strings']
+        st.write(description)
+        i+=1
+st.session_state.preferences_1, st.session_state.preferences_2 = [], []
+#TODO: include rating and price as variables
+# if input:
+#     input_embed = model.encode(input)
+#     sim_score = similarity_top(input_embed, icd_embeddings)
+#     i = 1
+#     for dis, value in sim_score:
+#         st.write(f":green[Prediction number] {i}:")
+#         st.write(f"{dis} (similarity score:", value, ")")
+#         i+= 1
+#     text_spinner_placeholder = st.empty()
+#     with st.spinner("Please wait while your visualizations are being generated..."):
+#         time.sleep(5)
+#     vis_results_2d(input_embed)
+#     vis_results_3d(input_embed)
+# #TODO: implement price range as a sliding bar

restaurants_dataframe_with_embeddings.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:31926e0abc4ff33c12761b0cc1d2b2f855bb097463e72a2dccbe9f2f2df3cf70
+size 19014982