Spaces:
Runtime error
Runtime error
Commit
·
fd7fdc2
1
Parent(s):
4d85c55
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import json
|
4 |
+
import numpy as np
|
5 |
+
import faiss
|
6 |
+
from sentence_transformers import SentenceTransformer
|
7 |
+
import time
|
8 |
+
from concurrent.futures import ThreadPoolExecutor
|
9 |
+
|
10 |
+
def process_string(s):
|
11 |
+
return s.lower().replace('&', 'and')
|
12 |
+
|
13 |
+
#@st.cache
|
14 |
+
@st.cache_data
|
15 |
+
def load_model():
|
16 |
+
return SentenceTransformer(r"finetiuned_model")
|
17 |
+
|
18 |
+
def process_embedding(ingredient, model):
|
19 |
+
processed_ingredient = process_string(ingredient)
|
20 |
+
return model.encode([processed_ingredient]).tolist()
|
21 |
+
|
22 |
+
def faiss_query(xq, index, top_k=1):
|
23 |
+
distances, indices = index.search(np.array(xq).astype('float32'), top_k)
|
24 |
+
return distances[0], indices[0]
|
25 |
+
|
26 |
+
def get_top_matches(ingredients_flat, ingredients, loaded_model, index):
|
27 |
+
matches = []
|
28 |
+
scores = []
|
29 |
+
|
30 |
+
# Generate embeddings in parallel
|
31 |
+
with ThreadPoolExecutor() as executor:
|
32 |
+
embeddings = list(executor.map(lambda ing: process_embedding(ing, loaded_model), ingredients))
|
33 |
+
|
34 |
+
# Query Faiss in parallel
|
35 |
+
results = []
|
36 |
+
with ThreadPoolExecutor() as executor:
|
37 |
+
results = list(executor.map(lambda xq: faiss_query(xq, index), embeddings))
|
38 |
+
|
39 |
+
# Extract matches and scores
|
40 |
+
for distances, indices in results:
|
41 |
+
if indices.size > 0:
|
42 |
+
match = ingredients_flat[indices[0]]
|
43 |
+
matches.append(match)
|
44 |
+
scores.append(round(1 - distances[0] / 2, 2))
|
45 |
+
|
46 |
+
return matches, scores
|
47 |
+
|
48 |
+
# Load the Faiss index from disk
|
49 |
+
index = faiss.read_index('faiss_index.bin')
|
50 |
+
|
51 |
+
# Load the metadata from the JSON file
|
52 |
+
with open('metadata_faiss.json', 'r') as f:
|
53 |
+
metadata = json.load(f)
|
54 |
+
|
55 |
+
ingredients_flat = [item["Ingredient"] for item in metadata]
|
56 |
+
loaded_model = load_model()
|
57 |
+
|
58 |
+
def main():
|
59 |
+
#st.set_page_config(page_title="Ingredients Matching App", page_icon=":smiley:", layout="wide")
|
60 |
+
st.title("Ingredients name matching App :smiley:")
|
61 |
+
|
62 |
+
st.header("Matches using embeddings (semantic search)")
|
63 |
+
st.write("Enter the JSON input:")
|
64 |
+
json_input = st.text_area("")
|
65 |
+
|
66 |
+
if st.button("Process"):
|
67 |
+
start_time = time.time()
|
68 |
+
with st.spinner("Processing..."):
|
69 |
+
try:
|
70 |
+
input_data = json.loads(json_input)
|
71 |
+
|
72 |
+
for menu_item in input_data:
|
73 |
+
ing_list = menu_item.get("ingredients", [])
|
74 |
+
matches, scores = get_top_matches(ingredients_flat, ing_list, loaded_model, index)
|
75 |
+
menu_item["Ingradients_matched"] = matches
|
76 |
+
menu_item["scores"] = scores
|
77 |
+
|
78 |
+
#st.write("Processed JSON:")
|
79 |
+
#st.write("<pre>" + json.dumps(input_data, indent=4) + "</pre>", unsafe_allow_html=True)
|
80 |
+
output_df = pd.DataFrame(input_data)
|
81 |
+
st.write("Processed Data:")
|
82 |
+
st.write(output_df)
|
83 |
+
|
84 |
+
|
85 |
+
except json.JSONDecodeError:
|
86 |
+
st.error("Invalid JSON input. Please check and try again.")
|
87 |
+
|
88 |
+
end_time = time.time()
|
89 |
+
st.write(f"Processing time: {end_time - start_time:.2f} seconds")
|
90 |
+
|
91 |
+
if __name__ == "__main__":
|
92 |
+
main()
|