import faiss import pickle import pandas as pd import streamlit as st from sentence_transformers import SentenceTransformer from vector_engine.utils import vector_search @st.cache_data def read_data(pibdata="pib2022_23_cleaned_abs.csv"): """Read the pib data.""" return pd.read_csv(pibdata) @st.cache_resource def load_bert_model(name="pushpdeep/sbertmsmarco-en_to_indic_ur-murilv1"): """Instantiate a sentence-level DistilBERT model.""" return SentenceTransformer(name) @st.cache_data def load_faiss_index(path_to_faiss="models/faiss_index_ip.pickle"): """Load and deserialize the Faiss index.""" with open(path_to_faiss, "rb") as h: data = pickle.load(h) return faiss.deserialize_index(data) def main(): # Load data and models data = read_data() model = load_bert_model() faiss_index = load_faiss_index() st.title("Vector-based search with Sentence Transformers and Faiss") # User search user_input = st.text_area("Search box", "हिंद महासागर") # Filters st.sidebar.markdown("**Filters**") # filter_year = st.sidebar.slider("Publication year", 2010, 2021, (2010, 2021), 1) # filter_citations = st.sidebar.slider("Citations", 0, 250, 0) num_results = st.sidebar.slider("Number of search results", 10, 50, 10) # Fetch results if user_input: # Get paper IDs D, I = vector_search([user_input], model, faiss_index, num_results) # Slice data on year frame = data # Get individual results for id_ in I.flatten().tolist(): if id_ in set(frame.rid): f = frame[(frame.rid == id_)] else: continue st.write( f""" **Language**: {f.iloc[0].language} **Article**: {f.iloc[0].abstract} https://pib.gov.in/PressReleasePage.aspx?PRID={f.iloc[0].rid} """ ) if __name__ == "__main__": main()