edjdhug3 commited on
Commit
c6cdefa
·
1 Parent(s): 42aaff3

Upload 2 files

Browse files
Files changed (2) hide show
  1. llm5.py +103 -0
  2. requirements.txt +3 -0
llm5.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfFileReader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.embeddings import GooglePalmEmbeddings
5
+ from langchain.llms import GooglePalm
6
+ from langchain.vectorstores import FAISS
7
+ from langchain.chains import ConversationalRetrievalChain
8
+ from langchain.memory import ConversationBufferMemory
9
+ import os
10
+ from langchain.document_loaders import UnstructuredURLLoader
11
+ import tempfile
12
+
13
+ # Set your Google API Key here
14
+ os.environ['GOOGLE_API_KEY'] = 'YOUR_API_KEY'
15
+
16
+ direct_links = [
17
+ "https://zollege.in/exams/comedk-uget",
18
+ "https://zollege.in/exams/comedk-uget/cutoff",
19
+ 'https://www.iimrohtak.ac.in/panel/assets/images/prospectus/16881265522765.pdf',
20
+ 'https://www.iimrohtak.ac.in/panel/assets/images/lor/16884755042121.pdf',
21
+ 'https://www.iimrohtak.ac.in/dpm.php',
22
+ 'https://www.iimrohtak.ac.in/dpm-admission.php'
23
+ 'https://www.iimrohtak.ac.in/areas-of-specialisation.php',
24
+ 'https://www.iimrohtak.ac.in/financial-assistance.php',
25
+ 'https://www.iimrohtak.ac.in/panel/assets/images/prospectus/16903487969776.pdf',
26
+ 'https://www.iimrohtak.ac.in/faqs-for-dpm.php',
27
+ 'https://www.iimrohtak.ac.in/dpm-student.php',
28
+ 'https://www.iimrohtak.ac.in/publication.php',
29
+ 'https://www.iimrohtak.ac.in/dpm-contact.php',
30
+ 'https://www.iimrohtak.ac.in/ipm.php'
31
+ ]
32
+
33
+ # Example PDF files (replace with your own file paths)
34
+ pdf_files = ["sample.pdf", "sample2.pdf"]
35
+
36
+ def get_data(direct_links, pdf_files):
37
+ direct_link_loader = UnstructuredURLLoader(urls=direct_links)
38
+ direct_link_data = direct_link_loader.load()
39
+ pdf_data = get_pdf_text(pdf_files)
40
+ return direct_link_data + pdf_data
41
+
42
+ def get_pdf_text(pdf_files):
43
+ text = ""
44
+ for pdf_file in pdf_files:
45
+ with open(pdf_file, "rb") as file:
46
+ pdf_reader = PdfFileReader(file)
47
+ for page_num in range(pdf_reader.numPages):
48
+ text += pdf_reader.getPage(page_num).extractText()
49
+ return text
50
+
51
+ def get_text_chunks(text):
52
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
53
+ chunks = text_splitter.split_text(text)
54
+ return chunks
55
+
56
+ def get_vector_store(text_chunks):
57
+ embeddings = GooglePalmEmbeddings()
58
+ vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
59
+ return vector_store
60
+
61
+ def get_conversational_chain(vector_store):
62
+ llm = GooglePalm()
63
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
64
+ conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=vector_store.as_retriever(), memory=memory)
65
+ return conversation_chain
66
+
67
+ def user_input(user_question):
68
+ response = st.session_state.conversation({'question': user_question})
69
+ st.session_state.chatHistory = response['chat_history']
70
+ for i, message in enumerate(st.session_state.chatHistory):
71
+ if i % 2 == 0:
72
+ st.write("Human: ", message.content)
73
+ else:
74
+ st.write("Bot: ", message.content)
75
+
76
+ def main():
77
+ st.set_page_config("Chat with Multiple PDFs")
78
+ st.header("Chat with Multiple PDF 💬")
79
+ user_question = st.text_input("Ask a Question from the PDF Files")
80
+ if "conversation" not in st.session_state:
81
+ st.session_state.conversation = None
82
+ if "chatHistory" not in st.session_state:
83
+ st.session_state.chatHistory = None
84
+ if user_question:
85
+ user_input(user_question)
86
+ with st.sidebar:
87
+ st.title("Settings")
88
+ st.subheader("Upload your Documents")
89
+ pdf_uploads = st.file_uploader("Upload your PDF Files and Click on the Process Button", accept_multiple_files=True)
90
+ if st.button("Process"):
91
+ with st.spinner("Processing"):
92
+ pdf_files = [tempfile.NamedTemporaryFile(delete=False).name for _ in pdf_uploads]
93
+ for uploaded_file, pdf_file in zip(pdf_uploads, pdf_files):
94
+ with open(pdf_file, "wb") as file:
95
+ file.write(uploaded_file.read())
96
+ combined_text = get_data(direct_links, pdf_files)
97
+ text_chunks = get_text_chunks(combined_text)
98
+ vector_store = get_vector_store(text_chunks)
99
+ st.session_state.conversation = get_conversational_chain(vector_store)
100
+ st.success("Done")
101
+
102
+ if __name__ == "__main__":
103
+ main()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ langchain
2
+ streamlit
3
+ PyPDF2