File size: 1,583 Bytes
08eb2b9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import streamlit as st
import pandas as pd
import json
import numpy as np
#from fuzzywuzzy import fuzz

import pinecone
from sentence_transformers import SentenceTransformer

pinecone.init(api_key='f5112f8c-f27d-4af1-b427-0c0953c113b5', environment='asia-southeast1-gcp')

#model = SentenceTransformer('all-mpnet-base-v2',device='cpu')

loaded_model = SentenceTransformer(r"finetiuned_model")

def process_string(s):
    return s.lower().replace('&', 'and')



index = pinecone.Index('ingradientsearch')


# Create a Streamlit app
def main():
    st.set_page_config(page_title="Ingradients Matching App", page_icon=":smiley:", layout="wide")
    st.title("Ingradients name matching App :smiley:")

   
    st.header("Matches using embeddings (semantic search)")
    st.write("Enter a ingradient name:")
    st.write("e.g. Chicken")
    input_string = st.text_input("")

    input_string = process_string(input_string)

    if st.button("Enter"):
        st.write("Top 5 matches using semantic search:")

        xq = model.encode([input_string]).tolist()
        result = index.query(xq, top_k=5, includeMetadata=True)

        Ingredient=[]
        Group=[]
        score=[]
        for matches in result['matches']:
            Ingredient.append(matches['metadata']['Ingredient'])
            #Group.append(matches['metadata']['Group'])
            score.append(matches['score'])
        
        final_result= pd.DataFrame(list(zip(Ingredient, Group, score)),
            columns =['Ingredient','score' ])

        st.dataframe(final_result)

if __name__ == "__main__":
    main()