edjdhug3 commited on
Commit
bb3bf4b
·
1 Parent(s): 150163e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -60
app.py CHANGED
@@ -6,71 +6,59 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain.embeddings import HuggingFaceEmbeddings
7
  from langchain.vectorstores import FAISS
8
  import pickle
9
- # from sentence_transformers import SentenceTransformer
10
  from langchain import HuggingFaceHub
11
  from langchain.chains.question_answering import load_qa_chain
12
-
13
  import os
14
 
15
- # model = SentenceTransformer('all-MiniLM-L6-v2')
16
-
17
- with st.sidebar:
18
- st.title('LLM PDF Chats')
19
- st.markdown('''
20
- ## about
21
- - This is LLM power chatbot
22
- - By [Prathamesh Shete]('https://www.linkedin.com/in/prathameshshete')
23
-
24
-
25
- ''')
26
- add_vertical_space(5)
27
- st.write('Made By Prathamesh')
28
-
29
- load_dotenv()
30
- def main():
31
  st.header('Chat With PDF')
32
-
33
- pdf = st.file_uploader('Upload Your PDF', type='pdf')
34
 
35
- if pdf is not None:
36
- pdf_reader = PdfReader(pdf)
37
-
38
- text = ''
39
- for page in pdf_reader.pages:
40
- text += page.extract_text()
41
-
42
- text_splitter = RecursiveCharacterTextSplitter(
43
- chunk_size=1000,
44
- chunk_overlap=200,
45
- length_function=len
46
- )
47
-
48
- chunks = text_splitter.split_text(text=text)
49
-
50
- store_name = pdf.name[:-4]
51
-
52
- if os.path.exists(f'{store_name}.pkl'):
53
- with open(f'{store_name}.pkl', 'rb') as f:
54
- VectorStore = pickle.load(f)
55
- else:
56
- embeddings = HuggingFaceEmbeddings()
57
- VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
58
- with open(f'{store_name}.pkl', 'wb') as f:
59
- pickle.dump(VectorStore, f)
60
-
61
- ask_query = st.text_input('Ask question about PDF: ')
62
-
63
- if ask_query:
64
- docs = VectorStore.similarity_search(query=ask_query, k=3)
65
- llm = HuggingFaceHub(repo_id="google/flan-t5-xl", model_kwargs={"temperature": 0, "max_length": 64})
66
- chain = load_qa_chain(llm=llm, chain_type='stuff')
67
- response = chain.run(input_documents=docs, question=ask_query)
68
- st.write(response)
69
 
70
-
71
-
72
-
73
-
74
-
75
  if __name__ == "__main__":
76
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  from langchain.embeddings import HuggingFaceEmbeddings
7
  from langchain.vectorstores import FAISS
8
  import pickle
 
9
  from langchain import HuggingFaceHub
10
  from langchain.chains.question_answering import load_qa_chain
 
11
  import os
12
 
13
+ def main(pdf):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  st.header('Chat With PDF')
 
 
15
 
16
+ if pdf is not None:
17
+ pdf_reader = PdfReader(pdf)
18
+
19
+ text = ''
20
+ for page in pdf_reader.pages:
21
+ text += page.extract_text()
22
+
23
+ text_splitter = RecursiveCharacterTextSplitter(
24
+ chunk_size=1000,
25
+ chunk_overlap=200,
26
+ length_function=len
27
+ )
28
+
29
+ chunks = text_splitter.split_text(text=text)
30
+
31
+ store_name = pdf.name[:-4]
32
+
33
+ if os.path.exists(f'{store_name}.pkl'):
34
+ with open(f'{store_name}.pkl', 'rb') as f:
35
+ VectorStore = pickle.load(f)
36
+ else:
37
+ embeddings = HuggingFaceEmbeddings()
38
+ VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
39
+ with open(f'{store_name}.pkl', 'wb') as f:
40
+ pickle.dump(VectorStore, f)
41
+
42
+ ask_query = st.text_input('Ask question about PDF: ')
43
+
44
+ if ask_query:
45
+ docs = VectorStore.similarity_search(query=ask_query, k=3)
46
+ llm = HuggingFaceHub(repo_id="google/flan-t5-xl", model_kwargs={"temperature": 0, "max_length": 64})
47
+ chain = load_qa_chain(llm=llm, chain_type='stuff')
48
+ response = chain.run(input_documents=docs, question=ask_query)
49
+ st.write(response)
50
 
 
 
 
 
 
51
  if __name__ == "__main__":
52
+ load_dotenv()
53
+
54
+ st.sidebar.title('LLM PDF Chats')
55
+ st.sidebar.markdown('''
56
+ ## About
57
+ - This is LLM power chatbot
58
+ - By [Prathamesh Shete]('https://www.linkedin.com/in/prathameshshete')
59
+ ''')
60
+ add_vertical_space(5)
61
+ st.sidebar.write('Made By Prathamesh')
62
+
63
+ pdf = st.file_uploader('Upload Your PDF', type='pdf')
64
+ main(pdf)