File size: 5,308 Bytes
07ff0bc
793769c
1ad8ba9
0766a90
 
 
 
 
 
 
 
 
1ad8ba9
42039d6
1ad8ba9
42039d6
793769c
1ad8ba9
42039d6
793769c
42039d6
1ad8ba9
42039d6
 
 
 
0766a90
42039d6
1ad8ba9
42039d6
 
 
 
 
 
 
 
1ad8ba9
42039d6
1ad8ba9
42039d6
 
 
1ad8ba9
42039d6
1ad8ba9
42039d6
 
1ad8ba9
42039d6
1ad8ba9
42039d6
 
1ad8ba9
42039d6
1ad8ba9
42039d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ad8ba9
42039d6
1ad8ba9
42039d6
 
1ad8ba9
42039d6
 
1ad8ba9
42039d6
1ad8ba9
42039d6
1ad8ba9
42039d6
 
0766a90
 
 
 
 
 
 
42039d6
 
0766a90
 
 
 
 
 
 
 
 
 
 
 
 
e4c99ee
 
bf98f0a
2e29973
0766a90
 
793769c
f211f5b
1ad8ba9
 
793769c
 
f211f5b
793769c
e4c99ee
0766a90
 
e4c99ee
0766a90
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import streamlit as st
from streamlit_chat import message
# Initialize the CSVLoader to load the uploaded CSV file
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import CTransformers
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
import sys

# Display the title of the web page
st.title("Chat with CSV using open source LLM Inference Point πŸ¦™πŸ¦œ")
# Display a markdown message with additional information
st.markdown("<h3 style='text-align: center; color: white;'>Built by <a href='https://github.com/AIAnytime'>AI Anytime with ❀️ </a></h3>", unsafe_allow_html=True)

# Allow users to upload a CSV file
uploaded_file = st.sidebar.file_uploader("Upload your Data", type="csv")

if uploaded_file :
# Initialize the CSVLoader to load the uploaded CSV file
   #use tempfile because CSVLoader only accepts a file_path
    with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
        tmp_file.write(uploaded_file.getvalue())
        tmp_file_path = tmp_file.name

    db = DB_FAISS_PATH = "vectorstore/db_faiss"
# Initialize the CSVLoader to load the uploaded CSV file
    loader = CSVLoader(file_path="data/2019.csv", encoding="utf-8", csv_args={'delimiter': ','})
    data = loader.load()
    db.save_local(DB_FAISS_PATH)
    llm = load_llm()

    chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=db.as_retriever())

    def conversational_chat(query):
# Maintain and display the chat history
            result = chain({"question": query, "chat_history": st.session_state['history']})
# Maintain and display the chat history
            st.session_state['history'].append((query, result["answer"]))
            return result["answer"]
    
# Maintain and display the chat history
    if 'history' not in st.session_state:
# Maintain and display the chat history
        st.session_state['history'] = []

# Maintain and display the chat history
    if 'generated' not in st.session_state:
# Maintain and display the chat history
        st.session_state['generated'] = ["Hello ! Ask me anything about " + uploaded_file.name + " πŸ€—"]

# Maintain and display the chat history
    if 'past' not in st.session_state:
# Maintain and display the chat history
        st.session_state['past'] = ["Hey ! πŸ‘‹"]
        
    #container for the chat history
    response_container = st.container()
    #container for the user's text input
    container = st.container()

    with container:
        with st.form(key='my_form', clear_on_submit=True):
            
            user_input = st.text_input("Query:", placeholder="Talk to your csv data here (:", key='input')
            submit_button = st.form_submit_button(label='Send')
            
        if submit_button and user_input:
            output = conversational_chat(user_input)
            
# Maintain and display the chat history
            st.session_state['past'].append(user_input)
# Maintain and display the chat history
            st.session_state['generated'].append(output)

# Maintain and display the chat history
    if st.session_state['generated']:
        with response_container:
# Maintain and display the chat history
            for i in range(len(st.session_state['generated'])):
# Maintain and display the chat history
                message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="big-smile")
# Maintain and display the chat history
                message(st.session_state["generated"][i], key=str(i), avatar_style="thumbs")
            
# Split the text into Chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
text_chunks = text_splitter.split_documents(data)

print(len(text_chunks))

# Download Sentence Transformers Embedding From Hugging Face
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
                                       model_kwargs={'device': 'cpu'})

# COnverting the text Chunks into embeddings and saving the embeddings into FAISS Knowledge Base
docsearch = FAISS.from_documents(text_chunks, embeddings)

docsearch.save_local(DB_FAISS_PATH)


#query = "What is the value of GDP per capita of Finland provided in the data?"

#docs = docsearch.similarity_search(query, k=3)

#print("Result", docs)

from transformers import pipeline

pipe = pipeline("text-generation",model="mistralai/Mistral-7B-v0.1",model_type="llama",max_new_tokens=512,temperature=0.1 )

qa = ConversationalRetrievalChain.from_llm(llm, retriever=docsearch.as_retriever())

# Insert a chat message container.
with st.chat_message("user"):
    st.write("Hello πŸ‘‹")
    st.line_chart(np.random.randn(30, 3))

# Display a chat input widget.
st.chat_input("Say something")

while True:
    chat_history = []
    #query = "What is the value of  GDP per capita of Finland provided in the data?"
    query = input(f"Input Prompt: ")
    if query == 'exit':
        print('Exiting')
        sys.exit()
    if query == '':
        continue
    result = qa({"question":query, "chat_history":chat_history})
    print("Response: ", result['answer'])