from langchain_community.embeddings import HuggingFaceBgeEmbeddings from langchain_community.vectorstores import Pinecone from langchain_community.vectorstores.utils import DistanceStrategy from langchain_openai import ChatOpenAI from pinecone import Pinecone as PineconeClient import streamlit as st st.set_page_config(layout="wide", page_title="LegisQA") CONGRESS_GOV_TYPE_MAP = { "hconres": "house-concurrent-resolution", "hjres": "house-joint-resolution", "hr": "house-bill", "hres": "house-resolution", "s": "senate-bill", "sconres": "senate-concurrent-resolution", "sjres": "senate-joint-resolution", "sres": "senate-resolution", } OPENAI_CHAT_MODELS = [ "gpt-3.5-turbo-0125", "gpt-4-0125-preview", ] def load_bge_embeddings(): model_name = "BAAI/bge-small-en-v1.5" model_kwargs = {"device": "cpu"} encode_kwargs = {"normalize_embeddings": True} emb_fn = HuggingFaceBgeEmbeddings( model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs, query_instruction="Represent this question for searching relevant passages: ", ) return emb_fn def load_pinecone_vectorstore(): emb_fn = load_bge_embeddings() pc = PineconeClient(api_key=st.secrets["pinecone_api_key"]) index = pc.Index(st.secrets["pinecone_index_name"]) vectorstore = Pinecone( index=index, embedding=emb_fn, text_key="text", distance_strategy=DistanceStrategy.COSINE, ) return vectorstore vectorstore = load_pinecone_vectorstore() query = st.text_area("Enter query") docs = vectorstore.similarity_search_with_score(query) st.write(docs)