import streamlit as st from streamlit_chat import message from langchain.document_loaders.csv_loader import CSVLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain.llms import CTransformers from langchain.memory import ConversationBufferMemory from langchain.chains import ConversationalRetrievalChain import sys st.title("Chat with CSV using open source LLM Inference Point 🦙🦜") st.markdown("

Built by AI Anytime with ❤️

", unsafe_allow_html=True) uploaded_file = st.sidebar.file_uploader("Upload your Data", type="csv") if uploaded_file : #use tempfile because CSVLoader only accepts a file_path with tempfile.NamedTemporaryFile(delete=False) as tmp_file: tmp_file.write(uploaded_file.getvalue()) tmp_file_path = tmp_file.name db = DB_FAISS_PATH = "vectorstore/db_faiss" loader = CSVLoader(file_path="data/2019.csv", encoding="utf-8", csv_args={'delimiter': ','}) data = loader.load() db.save_local(DB_FAISS_PATH) llm = load_llm() chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=db.as_retriever()) def conversational_chat(query): result = chain({"question": query, "chat_history": st.session_state['history']}) st.session_state['history'].append((query, result["answer"])) return result["answer"] if 'history' not in st.session_state: st.session_state['history'] = [] if 'generated' not in st.session_state: st.session_state['generated'] = ["Hello ! Ask me anything about " + uploaded_file.name + " 🤗"] if 'past' not in st.session_state: st.session_state['past'] = ["Hey ! 👋"] #container for the chat history response_container = st.container() #container for the user's text input container = st.container() with container: with st.form(key='my_form', clear_on_submit=True): user_input = st.text_input("Query:", placeholder="Talk to your csv data here (:", key='input') submit_button = st.form_submit_button(label='Send') if submit_button and user_input: output = conversational_chat(user_input) st.session_state['past'].append(user_input) st.session_state['generated'].append(output) if st.session_state['generated']: with response_container: for i in range(len(st.session_state['generated'])): message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="big-smile") message(st.session_state["generated"][i], key=str(i), avatar_style="thumbs") # Split the text into Chunks text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20) text_chunks = text_splitter.split_documents(data) print(len(text_chunks)) # Download Sentence Transformers Embedding From Hugging Face embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', model_kwargs={'device': 'cpu'}) # COnverting the text Chunks into embeddings and saving the embeddings into FAISS Knowledge Base docsearch = FAISS.from_documents(text_chunks, embeddings) docsearch.save_local(DB_FAISS_PATH) #query = "What is the value of GDP per capita of Finland provided in the data?" #docs = docsearch.similarity_search(query, k=3) #print("Result", docs) from transformers import pipeline pipe = pipeline("text-generation",model="mistralai/Mistral-7B-v0.1",model_type="llama",max_new_tokens=512,temperature=0.1 ) qa = ConversationalRetrievalChain.from_llm(llm, retriever=docsearch.as_retriever()) # Insert a chat message container. with st.chat_message("user"): st.write("Hello 👋") st.line_chart(np.random.randn(30, 3)) # Display a chat input widget. st.chat_input("Say something") while True: chat_history = [] #query = "What is the value of GDP per capita of Finland provided in the data?" query = input(f"Input Prompt: ") if query == 'exit': print('Exiting') sys.exit() if query == '': continue result = qa({"question":query, "chat_history":chat_history}) print("Response: ", result['answer'])