rajesh1729 commited on
Commit
226a55c
·
verified ·
1 Parent(s): 11c9bc2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -37
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import os
2
  import streamlit as st
3
  from langchain.embeddings.openai import OpenAIEmbeddings
4
- from langchain.vectorstores import FAISS # Changed to FAISS for in-memory storage
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain.chat_models import ChatOpenAI
7
  from langchain.chains import ConversationalRetrievalChain
@@ -39,19 +39,15 @@ def process_pdfs(papers, api_key):
39
 
40
  with st.spinner("Processing PDFs..."):
41
  try:
42
- # Create embeddings instance
43
  embeddings = OpenAIEmbeddings(openai_api_key=api_key)
44
-
45
- # Process all PDFs
46
  all_texts = []
 
47
  for paper in papers:
48
- # Save and load PDF
49
  file_path = os.path.join('./uploads', paper.name)
50
  os.makedirs('./uploads', exist_ok=True)
51
  with open(file_path, "wb") as f:
52
  f.write(paper.getbuffer())
53
 
54
- # Load and split the PDF
55
  loader = PyPDFLoader(file_path)
56
  documents = loader.load()
57
  text_splitter = RecursiveCharacterTextSplitter(
@@ -60,24 +56,22 @@ def process_pdfs(papers, api_key):
60
  )
61
  texts = text_splitter.split_documents(documents)
62
  all_texts.extend(texts)
63
-
64
- # Cleanup
65
  os.remove(file_path)
66
 
67
- # Create vectorstore
68
  vectorstore = FAISS.from_documents(all_texts, embeddings)
69
 
70
- # Create chain
 
 
 
 
 
71
  st.session_state.chain = ConversationalRetrievalChain.from_llm(
72
  llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_key=api_key),
73
- retriever=vectorstore.as_retriever(
74
- search_kwargs={"k": 3}
75
- ),
76
- memory=ConversationBufferMemory(
77
- memory_key="chat_history",
78
- return_messages=True,
79
- ),
80
- return_source_documents=True,
81
  )
82
 
83
  st.success(f"Processed {len(papers)} PDF(s) successfully!")
@@ -90,7 +84,6 @@ def process_pdfs(papers, api_key):
90
  def main():
91
  st.set_page_config(page_title="PDF Chat")
92
 
93
- # Sidebar with API key input
94
  api_key = create_sidebar()
95
 
96
  if not api_key:
@@ -99,47 +92,29 @@ def main():
99
 
100
  st.title("Chat with PDF")
101
 
102
- # File uploader
103
  papers = st.file_uploader("Upload PDFs", type=["pdf"], accept_multiple_files=True)
104
 
105
- # Process PDFs button
106
  if papers:
107
  if st.button("Process PDFs"):
108
  process_pdfs(papers, api_key)
109
 
110
- # Display chat messages from history
111
  for message in st.session_state.messages:
112
  with st.chat_message(message["role"]):
113
  st.markdown(message["content"])
114
 
115
- # Accept user input
116
  if prompt := st.chat_input("Ask about your PDFs"):
117
- # Add user message to chat history
118
  st.session_state.messages.append({"role": "user", "content": prompt})
119
 
120
- # Display user message
121
  with st.chat_message("user"):
122
  st.markdown(prompt)
123
 
124
- # Generate and display assistant response
125
  with st.chat_message("assistant"):
126
  if st.session_state.chain is None:
127
  response = "Please upload and process a PDF first."
128
  else:
129
  with st.spinner("Thinking..."):
130
- # Get response with source documents
131
  result = st.session_state.chain({"question": prompt})
132
  response = result["answer"]
133
-
134
- # Optionally show sources
135
- if "source_documents" in result:
136
- sources = result["source_documents"]
137
- if sources:
138
- response += "\n\nSources:"
139
- for i, doc in enumerate(sources, 1):
140
- # Add page numbers if available
141
- page_info = f" (Page {doc.metadata['page'] + 1})" if 'page' in doc.metadata else ""
142
- response += f"\n{i}.{page_info} {doc.page_content[:200]}..."
143
 
144
  st.markdown(response)
145
  st.session_state.messages.append({"role": "assistant", "content": response})
 
1
  import os
2
  import streamlit as st
3
  from langchain.embeddings.openai import OpenAIEmbeddings
4
+ from langchain.vectorstores import FAISS
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain.chat_models import ChatOpenAI
7
  from langchain.chains import ConversationalRetrievalChain
 
39
 
40
  with st.spinner("Processing PDFs..."):
41
  try:
 
42
  embeddings = OpenAIEmbeddings(openai_api_key=api_key)
 
 
43
  all_texts = []
44
+
45
  for paper in papers:
 
46
  file_path = os.path.join('./uploads', paper.name)
47
  os.makedirs('./uploads', exist_ok=True)
48
  with open(file_path, "wb") as f:
49
  f.write(paper.getbuffer())
50
 
 
51
  loader = PyPDFLoader(file_path)
52
  documents = loader.load()
53
  text_splitter = RecursiveCharacterTextSplitter(
 
56
  )
57
  texts = text_splitter.split_documents(documents)
58
  all_texts.extend(texts)
 
 
59
  os.remove(file_path)
60
 
 
61
  vectorstore = FAISS.from_documents(all_texts, embeddings)
62
 
63
+ memory = ConversationBufferMemory(
64
+ memory_key="chat_history",
65
+ return_messages=True,
66
+ output_key="answer"
67
+ )
68
+
69
  st.session_state.chain = ConversationalRetrievalChain.from_llm(
70
  llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_key=api_key),
71
+ retriever=vectorstore.as_retriever(),
72
+ memory=memory,
73
+ return_source_documents=False,
74
+ chain_type="stuff"
 
 
 
 
75
  )
76
 
77
  st.success(f"Processed {len(papers)} PDF(s) successfully!")
 
84
  def main():
85
  st.set_page_config(page_title="PDF Chat")
86
 
 
87
  api_key = create_sidebar()
88
 
89
  if not api_key:
 
92
 
93
  st.title("Chat with PDF")
94
 
 
95
  papers = st.file_uploader("Upload PDFs", type=["pdf"], accept_multiple_files=True)
96
 
 
97
  if papers:
98
  if st.button("Process PDFs"):
99
  process_pdfs(papers, api_key)
100
 
 
101
  for message in st.session_state.messages:
102
  with st.chat_message(message["role"]):
103
  st.markdown(message["content"])
104
 
 
105
  if prompt := st.chat_input("Ask about your PDFs"):
 
106
  st.session_state.messages.append({"role": "user", "content": prompt})
107
 
 
108
  with st.chat_message("user"):
109
  st.markdown(prompt)
110
 
 
111
  with st.chat_message("assistant"):
112
  if st.session_state.chain is None:
113
  response = "Please upload and process a PDF first."
114
  else:
115
  with st.spinner("Thinking..."):
 
116
  result = st.session_state.chain({"question": prompt})
117
  response = result["answer"]
 
 
 
 
 
 
 
 
 
 
118
 
119
  st.markdown(response)
120
  st.session_state.messages.append({"role": "assistant", "content": response})