edjdhug3 commited on
Commit
1a71f87
·
1 Parent(s): 03d1beb

Upload 3 files

Browse files
Files changed (3) hide show
  1. .env +1 -0
  2. app.py +86 -0
  3. requirement.txt +94 -0
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ HUGGINGFACEHUB_API_TOKEN = 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
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from dotenv import load_dotenv
3
+ from streamlit_extras.add_vertical_space import add_vertical_space
4
+ from PyPDF2 import PdfReader
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.embeddings import HuggingFaceEmbeddings
7
+ from langchain.vectorstores import FAISS
8
+ import pickle
9
+ # from sentence_transformers import SentenceTransformer
10
+ from langchain import HuggingFaceHub
11
+ from langchain.chains.question_answering import load_qa_chain
12
+
13
+ import os
14
+
15
+ # model = SentenceTransformer('all-MiniLM-L6-v2')
16
+
17
+ with st.sidebar:
18
+ st.title('LLM PDF Chats')
19
+ st.markdown('''
20
+ ## about
21
+ - This is LLM power chatbot
22
+ - By [Prathamesh Shete]('https://www.linkedin.com/in/prathameshshete')
23
+
24
+
25
+ ''')
26
+ add_vertical_space(5)
27
+ st.write('Made By Prathamesh')
28
+
29
+ load_dotenv()
30
+ def main():
31
+ st.header('Chat With PDF')
32
+
33
+ pdf = st.file_uploader('Upload Your PDF',type='pdf')
34
+
35
+ if pdf is not None:
36
+ pdf_reader = PdfReader(pdf)
37
+ # st.write(pdf_reader)
38
+
39
+ text = ''
40
+ for page in pdf_reader.pages:
41
+ text = page.extract_text()
42
+
43
+ text_splitter = RecursiveCharacterTextSplitter(
44
+ chunk_size = 1000,
45
+ chunk_overlap = 200,
46
+ length_function = len
47
+ )
48
+
49
+ chunks = text_splitter.split_text(text=text)
50
+
51
+ # st.write(chunks)
52
+
53
+ # embeddings
54
+
55
+ store_name = pdf.name[:-4]
56
+
57
+ if os.path.exists(f'{store_name}.pkl'):
58
+ with open(f'{store_name}.pkl','rb') as f:
59
+ VectorStore = pickle.load(f)
60
+ else:
61
+ embeddings = HuggingFaceEmbeddings()
62
+ VectorStore = FAISS.from_texts(chunks,embedding=embeddings)
63
+ with open(f'{store_name}.pkl','wb') as f:
64
+ pickle.dump(VectorStore,f)
65
+
66
+
67
+ # accept user query's
68
+
69
+ ask_query = st.text_input('Ask question about PDF : ')
70
+
71
+
72
+ if ask_query:
73
+ docs = VectorStore.similarity_search(query=ask_query, k=3)
74
+ # st.write(docs)
75
+ llm = HuggingFaceHub(repo_id="google/flan-t5-xl", model_kwargs={"temperature": 0, "max_length": 64})
76
+ chain = load_qa_chain(llm=llm, chain_type='stuff')
77
+ response = chain.run(input_documents=docs, question=ask_query)
78
+ st.write(response)
79
+ # st.write(text)
80
+
81
+
82
+
83
+
84
+
85
+ if __name__ == "__main__":
86
+ main()
requirement.txt ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.8.4
2
+ aiosignal==1.3.1
3
+ altair==5.0.1
4
+ async-timeout==4.0.2
5
+ attrs==23.1.0
6
+ beautifulsoup4==4.12.2
7
+ blinker==1.6.2
8
+ cachetools==5.3.1
9
+ certifi==2023.5.7
10
+ charset-normalizer==3.1.0
11
+ click==8.1.3
12
+ contourpy==1.1.0
13
+ cycler==0.11.0
14
+ dataclasses-json==0.5.9
15
+ decorator==5.1.1
16
+ faiss-cpu==1.7.4
17
+ Faker==18.11.2
18
+ favicon==0.7.0
19
+ fonttools==4.40.0
20
+ frozenlist==1.3.3
21
+ gitdb==4.0.10
22
+ GitPython==3.1.31
23
+ greenlet==2.0.2
24
+ htbuilder==0.6.1
25
+ idna==3.4
26
+ importlib-metadata==6.7.0
27
+ Jinja2==3.1.2
28
+ jsonschema==4.17.3
29
+ kiwisolver==1.4.4
30
+ langchain==0.0.224
31
+ langchainplus-sdk==0.0.20
32
+ lxml==4.9.3
33
+ Markdown==3.4.3
34
+ markdown-it-py==3.0.0
35
+ markdownlit==0.0.7
36
+ MarkupSafe==2.1.3
37
+ marshmallow==3.19.0
38
+ marshmallow-enum==1.5.1
39
+ matplotlib==3.7.1
40
+ mdurl==0.1.2
41
+ more-itertools==9.1.0
42
+ multidict==6.0.4
43
+ mypy-extensions==1.0.0
44
+ numexpr==2.8.4
45
+ numpy==1.25.0
46
+ openapi-schema-pydantic==1.2.4
47
+ packaging==23.1
48
+ pandas==2.0.3
49
+ Pillow==9.5.0
50
+ protobuf==4.23.3
51
+ pyarrow==12.0.1
52
+ pydantic==1.10.11
53
+ pydeck==0.8.1b0
54
+ Pygments==2.15.1
55
+ pymdown-extensions==10.0.1
56
+ Pympler==1.0.1
57
+ pyparsing==3.1.0
58
+ PyPDF2==3.0.1
59
+ pyrsistent==0.19.3
60
+ python-dateutil==2.8.2
61
+ python-dotenv==1.0.0
62
+ pytz==2023.3
63
+ pytz-deprecation-shim==0.1.0.post0
64
+ PyYAML==6.0
65
+ requests==2.31.0
66
+ rich==13.4.2
67
+ six==1.16.0
68
+ smmap==5.0.0
69
+ soupsieve==2.4.1
70
+ SQLAlchemy==2.0.17
71
+ st-annotated-text==4.0.0
72
+ streamlit==1.24.0
73
+ streamlit-camera-input-live==0.2.0
74
+ streamlit-card==0.0.61
75
+ streamlit-embedcode==0.1.2
76
+ streamlit-extras==0.2.7
77
+ streamlit-faker==0.0.2
78
+ streamlit-image-coordinates==0.1.5
79
+ streamlit-keyup==0.2.0
80
+ streamlit-toggle-switch==1.0.2
81
+ streamlit-vertical-slider==1.0.2
82
+ tenacity==8.2.2
83
+ toml==0.10.2
84
+ toolz==0.12.0
85
+ tornado==6.3.2
86
+ typing-inspect==0.9.0
87
+ typing_extensions==4.7.1
88
+ tzdata==2023.3
89
+ tzlocal==4.3.1
90
+ urllib3==2.0.3
91
+ validators==0.20.0
92
+ watchdog==3.0.0
93
+ yarl==1.9.2
94
+ zipp==3.15.0