import streamlit as st import pandas as pd import json import numpy as np import pinecone from sentence_transformers import SentenceTransformer import time #pinecone.init(api_key='f5112f8c-f27d-4af1-b427-0c0953c113b5', environment='asia-southeast1-gcp') pinecone.init(api_key='5c5b5687-b73d-47e9-9cc8-e184ff72cc45', environment='us-central1-gcp') def process_string(s): return s.lower().replace('&', 'and') index = pinecone.Index('ingradientsearch') @st.cache_data def load_model(): return SentenceTransformer(r"finetiuned_model") from concurrent.futures import ThreadPoolExecutor def process_embedding(ingredient, model): processed_ingredient = process_string(ingredient) return model.encode([processed_ingredient]).tolist() def pinecone_query(xq, index, top_k=1, includeMetadata=True): return index.query(xq, top_k=top_k, includeMetadata=includeMetadata) def get_top_matches(ingredients): loaded_model = load_model() matches = [] scores = [] # Generate embeddings in parallel with ThreadPoolExecutor() as executor: embeddings = list(executor.map(lambda ing: process_embedding(ing, loaded_model), ingredients)) # Query Pinecone in parallel results = [] with ThreadPoolExecutor() as executor: results = list(executor.map(lambda xq: pinecone_query(xq, index), embeddings)) # Extract matches and scores for result in results: if result['matches']: match = result['matches'][0] matches.append(match['metadata']['Ingredient']) scores.append(round(match['score'], 2)) return matches, scores def main(): st.set_page_config(page_title="Ingredients Matching App", page_icon=":smiley:", layout="wide") st.title("Ingredients name matching App :smiley:") st.header("Matches using embeddings (semantic search)") st.write("Enter the JSON input:") json_input = st.text_area("") if st.button("Process"): start_time = time.time() with st.spinner("Processing..."): try: input_data = json.loads(json_input) for menu_item in input_data: ingredients = menu_item.get("ingredients", []) matches, scores = get_top_matches(ingredients) menu_item["Ingradients_matched"] = matches menu_item["scores"] = scores st.write("Processed JSON:") #st.write(json.dumps(input_data, indent=2)) st.write("
" + json.dumps(input_data, indent=4) + "", unsafe_allow_html=True) except json.JSONDecodeError: st.error("Invalid JSON input. Please check and try again.") end_time = time.time() st.write(f"Processing time: {end_time - start_time:.2f} seconds") if __name__ == "__main__": main()