Spaces:
Sleeping
Sleeping
Commit
·
3fe0603
1
Parent(s):
07d5c67
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# streamlit_app.py
|
2 |
+
import streamlit as st
|
3 |
+
import pandas as pd
|
4 |
+
import torch
|
5 |
+
from sentence_transformers import SentenceTransformer, util
|
6 |
+
import pickle
|
7 |
+
|
8 |
+
|
9 |
+
# Load the first set of sentences & embeddings from disk
|
10 |
+
with open('clinical_inno_embeddings_masterid_paraphrase-multilingual-mpnet-base-v2.pkl', "rb") as fIn:
|
11 |
+
stored_data_1 = pickle.load(fIn)
|
12 |
+
stored_masterid_1 = stored_data_1['pro_master_id']
|
13 |
+
stored_products_1 = stored_data_1['products']
|
14 |
+
stored_embeddings_1 = stored_data_1['embeddings']
|
15 |
+
|
16 |
+
# Load the second set of sentences & embeddings from disk
|
17 |
+
# Replace 'other_embeddings.pkl' with your actual second embeddings file
|
18 |
+
with open('mean_clinical_inno_embeddings_masterid_paraphrase-multilingual-mpnet-base-v2.pkl', "rb") as fIn:
|
19 |
+
stored_data_2 = pickle.load(fIn)
|
20 |
+
stored_masterid_2 = stored_data_2['pro_master_id']
|
21 |
+
stored_products_2 = stored_data_2['mean_products']
|
22 |
+
stored_embeddings_2 = stored_data_2['mean_embeddings']
|
23 |
+
|
24 |
+
# Initialize the SentenceTransformer model
|
25 |
+
embedder = SentenceTransformer('sentence-transformers/paraphrase-multilingual-mpnet-base-v2')
|
26 |
+
|
27 |
+
def get_similar_products(query, products, embeddings, top_k=10):
|
28 |
+
query_embedding = embedder.encode(query, convert_to_tensor=True)
|
29 |
+
cos_scores = util.cos_sim(query_embedding, embeddings)[0]
|
30 |
+
top_results = torch.topk(cos_scores, k=top_k)
|
31 |
+
|
32 |
+
similar_products = [(products[idx.item()], score.item()) for score, idx in zip(top_results[0], top_results[1])]
|
33 |
+
return similar_products
|
34 |
+
|
35 |
+
# Streamlit UI
|
36 |
+
st.title("Product Similarity Finder")
|
37 |
+
|
38 |
+
# Embedding selection slider
|
39 |
+
embedding_option = st.select_slider(
|
40 |
+
'Select Search Approach',
|
41 |
+
options=['All Products', 'Master Products']
|
42 |
+
)
|
43 |
+
|
44 |
+
# Determine which embeddings to use based on the slider selection
|
45 |
+
if embedding_option == 'All Products':
|
46 |
+
stored_products = stored_products_1
|
47 |
+
st.write(len(stored_products))
|
48 |
+
stored_embeddings = stored_embeddings_1
|
49 |
+
else:
|
50 |
+
stored_products = stored_products_2
|
51 |
+
st.write(len(stored_products))
|
52 |
+
stored_embeddings = stored_embeddings_2
|
53 |
+
|
54 |
+
# User input
|
55 |
+
user_input = st.text_input("Enter a product name or description:")
|
56 |
+
|
57 |
+
# Search button
|
58 |
+
if st.button('Search'):
|
59 |
+
if user_input:
|
60 |
+
# Get and display similar products
|
61 |
+
results = get_similar_products(user_input, stored_products, stored_embeddings)
|
62 |
+
|
63 |
+
# Convert results to a DataFrame for nicer display
|
64 |
+
results_df = pd.DataFrame(results, columns=['Product', 'Score'])
|
65 |
+
|
66 |
+
# Use Streamlit's dataframe function to display results in a table with default formatting
|
67 |
+
st.dataframe(results_df.style.format({'Score': '{:.4f}'}))
|
68 |
+
else:
|
69 |
+
st.write("Please enter a product name or description to search.")
|