File size: 3,944 Bytes
31df2d1
 
 
 
 
 
 
 
 
 
 
 
9306c69
 
 
31df2d1
9306c69
6aaa040
 
 
 
 
31df2d1
 
 
 
b110556
6aaa040
008e424
31df2d1
 
6aaa040
 
 
 
31df2d1
 
 
b110556
31df2d1
 
 
 
 
 
6aaa040
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a23cb40
6aaa040
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31df2d1
 
 
6aaa040
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import streamlit as st
# from dotenv import load_dotenv
import pickle
from PyPDF2 import PdfReader
from streamlit_extras.add_vertical_space import add_vertical_space
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain
from langchain.callbacks import get_openai_callback
import os
from langchain.vectorstores import Chroma
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.embeddings import HuggingFaceHubEmbeddings
# Sidebar contents
from langchain.llms import HuggingFaceHub


if 'HuggingFace_API_Key' not in st.session_state:
    st.session_state['HuggingFace_API_Key'] = ''

with st.sidebar:
    st.title('πŸ€—πŸ’¬ LLM Chat App')
    st.markdown('''
   ## About
   This app is an LLM-powered chatbot
   PDF:Chatbot AI-powered chat assistant for PDFs

   ''')
    add_vertical_space(5)


st.session_state['HuggingFace_API_Key'] = st.sidebar.text_input(
    "What's your HuggingFace API key?", type="password")

# load_dotenv()

load_button = st.sidebar.button("Submit API Key", key="load_button")


def main():
    st.header("Chat with PDF πŸ’¬")
    pdf = st.file_uploader("Upload your PDF", type='pdf')

    if st.session_state['HuggingFace_API_Key'] != "":
        # upload a PDF file
        # st.write(pdf)
        if pdf is not None:
            pdf_reader = PdfReader(pdf)

            text = ""
            for page in pdf_reader.pages:
                text += page.extract_text()
            # st.write(text)
            text_splitter = RecursiveCharacterTextSplitter(
                chunk_size=1000,
                chunk_overlap=200,
                length_function=len
            )
            chunks = text_splitter.split_text(text=text)

            # # embeddings
            store_name = pdf.name[:-4]
            # st.write(f'{store_name}')
            # st.write(chunks)

            if os.path.exists(f"{store_name}.pkl"):
                with open(f"{store_name}.pkl", "rb") as f:
                    VectorStore = pickle.load(f)
                # st.write('Embeddings Loaded from the Disk')s
            else:
                # embeddings = OpenAIEmbeddings(
                #     openai_api_key='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')
                # embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
                embeddings = HuggingFaceHubEmbeddings(
                    huggingfacehub_api_token=st.session_state['HuggingFace_API_Key'])
                VectorStore = FAISS.from_texts(
                    chunks, embedding=embeddings)
                # VectorStore=Chroma.from_documents(chunks, embeddings)
                with open(f"{store_name}.pkl", "wb") as f:
                    pickle.dump(VectorStore, f)

            # embeddings = OpenAIEmbeddings()
            # VectorStore = FAISS.from_texts(chunks, embedding=embeddings)

            # Accept user questions/query
            query = st.text_input("Ask questions about your PDF file:")
            # st.write(query)

            if query:
                docs = VectorStore.similarity_search(query=query, k=3)
                llm = HuggingFaceHub(repo_id='google/flan-ul2',
                                     huggingfacehub_api_token=st.session_state['HuggingFace_API_Key'], model_kwargs={"temperature": 0.1, "max_new_tokens": 500})
                # llm = OpenAI()
                chain = load_qa_chain(llm=llm, chain_type="stuff")
                response = chain.run(input_documents=docs, question=query)
                # with get_openai_callback() as cb:
                #     response = chain.run(input_documents=docs, question=query)
                #     print(cb)
                st.write(response)


if __name__ == '__main__':
    main()