Update app.py
Browse files
app.py
CHANGED
@@ -4,10 +4,9 @@ from langchain.embeddings.openai import OpenAIEmbeddings
|
|
4 |
from langchain.vectorstores import Chroma
|
5 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
6 |
from langchain.chat_models import ChatOpenAI
|
7 |
-
from langchain.chains import ConversationalRetrievalChain
|
8 |
from langchain.memory import ConversationBufferMemory
|
9 |
from langchain.document_loaders import PyPDFLoader
|
10 |
-
import time
|
11 |
|
12 |
# Initialize session state variables
|
13 |
if "messages" not in st.session_state:
|
@@ -16,16 +15,12 @@ if "chain" not in st.session_state:
|
|
16 |
st.session_state.chain = None
|
17 |
if "processed_pdfs" not in st.session_state:
|
18 |
st.session_state.processed_pdfs = False
|
19 |
-
if "waiting_for_answer" not in st.session_state:
|
20 |
-
st.session_state.waiting_for_answer = False
|
21 |
|
22 |
def create_sidebar():
|
23 |
with st.sidebar:
|
24 |
st.title("PDF Chat")
|
25 |
st.markdown("### Quick Demo of RAG")
|
26 |
-
|
27 |
api_key = st.text_input("OpenAI API Key:", type="password")
|
28 |
-
|
29 |
st.markdown("""
|
30 |
### Tools Used
|
31 |
- OpenAI
|
@@ -37,48 +32,37 @@ def create_sidebar():
|
|
37 |
2. Upload PDF
|
38 |
3. Chat!
|
39 |
""")
|
40 |
-
|
41 |
return api_key
|
42 |
|
43 |
-
def save_uploaded_file(uploaded_file, path='./uploads/'):
|
44 |
-
os.makedirs(path, exist_ok=True)
|
45 |
-
file_path = os.path.join(path, uploaded_file.name)
|
46 |
-
with open(file_path, "wb") as f:
|
47 |
-
f.write(uploaded_file.getbuffer())
|
48 |
-
return file_path
|
49 |
-
|
50 |
-
def load_texts_from_papers(papers):
|
51 |
-
all_texts = []
|
52 |
-
for paper in papers:
|
53 |
-
try:
|
54 |
-
file_path = save_uploaded_file(paper)
|
55 |
-
loader = PyPDFLoader(file_path)
|
56 |
-
documents = loader.load()
|
57 |
-
text_splitter = RecursiveCharacterTextSplitter(
|
58 |
-
chunk_size=1000,
|
59 |
-
chunk_overlap=200,
|
60 |
-
length_function=len,
|
61 |
-
is_separator_regex=False,
|
62 |
-
)
|
63 |
-
texts = text_splitter.split_documents(documents)
|
64 |
-
all_texts.extend(texts)
|
65 |
-
os.remove(file_path)
|
66 |
-
except Exception as e:
|
67 |
-
st.error(f"Error processing {paper.name}: {str(e)}")
|
68 |
-
return all_texts
|
69 |
-
|
70 |
-
def initialize_vectorstore(api_key):
|
71 |
-
embedding = OpenAIEmbeddings(openai_api_key=api_key)
|
72 |
-
vectorstore = Chroma(embedding_function=embedding, persist_directory="db")
|
73 |
-
return vectorstore
|
74 |
-
|
75 |
def process_pdfs(papers, api_key):
|
76 |
if papers and not st.session_state.processed_pdfs:
|
77 |
with st.spinner("Processing PDFs..."):
|
78 |
-
texts =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
if texts:
|
80 |
-
|
|
|
81 |
vectorstore.add_documents(texts)
|
|
|
82 |
st.session_state.chain = ConversationalRetrievalChain.from_llm(
|
83 |
ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_key=api_key),
|
84 |
vectorstore.as_retriever(),
|
@@ -92,19 +76,10 @@ def process_pdfs(papers, api_key):
|
|
92 |
return texts
|
93 |
return []
|
94 |
|
95 |
-
def get_assistant_response(prompt, texts):
|
96 |
-
try:
|
97 |
-
if texts or st.session_state.processed_pdfs:
|
98 |
-
result = st.session_state.chain({"question": prompt})
|
99 |
-
return result["answer"]
|
100 |
-
else:
|
101 |
-
return "Please upload a PDF first."
|
102 |
-
except Exception as e:
|
103 |
-
return f"Error: {str(e)}"
|
104 |
-
|
105 |
def main():
|
106 |
-
st.set_page_config(page_title="PDF Chat"
|
107 |
|
|
|
108 |
api_key = create_sidebar()
|
109 |
|
110 |
if not api_key:
|
@@ -116,31 +91,33 @@ def main():
|
|
116 |
# File uploader
|
117 |
papers = st.file_uploader("Upload PDFs", type=["pdf"], accept_multiple_files=True)
|
118 |
|
119 |
-
# Process PDFs
|
120 |
texts = process_pdfs(papers, api_key)
|
121 |
|
122 |
-
#
|
123 |
-
|
|
|
|
|
124 |
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
st.markdown(message["content"])
|
130 |
|
131 |
-
#
|
132 |
-
|
133 |
-
|
134 |
-
st.session_state.messages.append({"role": "user", "content": prompt})
|
135 |
-
st.chat_message("user").markdown(prompt)
|
136 |
|
137 |
-
|
138 |
-
|
|
|
|
|
|
|
139 |
with st.spinner("Thinking..."):
|
140 |
-
|
141 |
-
|
142 |
|
143 |
-
|
144 |
st.session_state.messages.append({"role": "assistant", "content": response})
|
145 |
|
146 |
if __name__ == "__main__":
|
|
|
4 |
from langchain.vectorstores import Chroma
|
5 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
6 |
from langchain.chat_models import ChatOpenAI
|
7 |
+
from langchain.chains import ConversationalRetrievalChain
|
8 |
from langchain.memory import ConversationBufferMemory
|
9 |
from langchain.document_loaders import PyPDFLoader
|
|
|
10 |
|
11 |
# Initialize session state variables
|
12 |
if "messages" not in st.session_state:
|
|
|
15 |
st.session_state.chain = None
|
16 |
if "processed_pdfs" not in st.session_state:
|
17 |
st.session_state.processed_pdfs = False
|
|
|
|
|
18 |
|
19 |
def create_sidebar():
|
20 |
with st.sidebar:
|
21 |
st.title("PDF Chat")
|
22 |
st.markdown("### Quick Demo of RAG")
|
|
|
23 |
api_key = st.text_input("OpenAI API Key:", type="password")
|
|
|
24 |
st.markdown("""
|
25 |
### Tools Used
|
26 |
- OpenAI
|
|
|
32 |
2. Upload PDF
|
33 |
3. Chat!
|
34 |
""")
|
|
|
35 |
return api_key
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
def process_pdfs(papers, api_key):
|
38 |
if papers and not st.session_state.processed_pdfs:
|
39 |
with st.spinner("Processing PDFs..."):
|
40 |
+
texts = []
|
41 |
+
for paper in papers:
|
42 |
+
try:
|
43 |
+
file_path = os.path.join('./uploads', paper.name)
|
44 |
+
os.makedirs('./uploads', exist_ok=True)
|
45 |
+
with open(file_path, "wb") as f:
|
46 |
+
f.write(paper.getbuffer())
|
47 |
+
|
48 |
+
loader = PyPDFLoader(file_path)
|
49 |
+
documents = loader.load()
|
50 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
51 |
+
chunk_size=1000,
|
52 |
+
chunk_overlap=200,
|
53 |
+
length_function=len,
|
54 |
+
is_separator_regex=False,
|
55 |
+
)
|
56 |
+
texts.extend(text_splitter.split_documents(documents))
|
57 |
+
os.remove(file_path)
|
58 |
+
except Exception as e:
|
59 |
+
st.error(f"Error processing {paper.name}: {str(e)}")
|
60 |
+
|
61 |
if texts:
|
62 |
+
embedding = OpenAIEmbeddings(openai_api_key=api_key)
|
63 |
+
vectorstore = Chroma(embedding_function=embedding, persist_directory="db")
|
64 |
vectorstore.add_documents(texts)
|
65 |
+
|
66 |
st.session_state.chain = ConversationalRetrievalChain.from_llm(
|
67 |
ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_key=api_key),
|
68 |
vectorstore.as_retriever(),
|
|
|
76 |
return texts
|
77 |
return []
|
78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
def main():
|
80 |
+
st.set_page_config(page_title="PDF Chat")
|
81 |
|
82 |
+
# Sidebar with API key input
|
83 |
api_key = create_sidebar()
|
84 |
|
85 |
if not api_key:
|
|
|
91 |
# File uploader
|
92 |
papers = st.file_uploader("Upload PDFs", type=["pdf"], accept_multiple_files=True)
|
93 |
|
94 |
+
# Process PDFs if needed
|
95 |
texts = process_pdfs(papers, api_key)
|
96 |
|
97 |
+
# Display chat messages from history
|
98 |
+
for message in st.session_state.messages:
|
99 |
+
with st.chat_message(message["role"]):
|
100 |
+
st.markdown(message["content"])
|
101 |
|
102 |
+
# Accept user input
|
103 |
+
if prompt := st.chat_input("Ask about your PDFs"):
|
104 |
+
# Add user message to chat history
|
105 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
|
|
106 |
|
107 |
+
# Display user message
|
108 |
+
with st.chat_message("user"):
|
109 |
+
st.markdown(prompt)
|
|
|
|
|
110 |
|
111 |
+
# Generate and display assistant response
|
112 |
+
with st.chat_message("assistant"):
|
113 |
+
if not st.session_state.processed_pdfs:
|
114 |
+
response = "Please upload a PDF first."
|
115 |
+
else:
|
116 |
with st.spinner("Thinking..."):
|
117 |
+
result = st.session_state.chain({"question": prompt})
|
118 |
+
response = result["answer"]
|
119 |
|
120 |
+
st.markdown(response)
|
121 |
st.session_state.messages.append({"role": "assistant", "content": response})
|
122 |
|
123 |
if __name__ == "__main__":
|