rajesh1729's picture
Update app.py
648f1a1 verified
raw
history blame
5.05 kB
import os
import streamlit as st
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain, ConversationChain
from langchain.memory import ConversationBufferMemory
from langchain.document_loaders import PyPDFLoader
import time
# Initialize session state variables
if "messages" not in st.session_state:
st.session_state.messages = []
if "chain" not in st.session_state:
st.session_state.chain = None
if "processed_pdfs" not in st.session_state:
st.session_state.processed_pdfs = False
if "waiting_for_answer" not in st.session_state:
st.session_state.waiting_for_answer = False
def create_sidebar():
with st.sidebar:
st.title("PDF Chat")
st.markdown("### Quick Demo of RAG")
api_key = st.text_input("OpenAI API Key:", type="password")
st.markdown("""
### Tools Used
- OpenAI
- LangChain
- ChromaDB
### Steps
1. Add API key
2. Upload PDF
3. Chat!
""")
return api_key
def save_uploaded_file(uploaded_file, path='./uploads/'):
os.makedirs(path, exist_ok=True)
file_path = os.path.join(path, uploaded_file.name)
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
return file_path
def load_texts_from_papers(papers):
all_texts = []
for paper in papers:
try:
file_path = save_uploaded_file(paper)
loader = PyPDFLoader(file_path)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
length_function=len,
is_separator_regex=False,
)
texts = text_splitter.split_documents(documents)
all_texts.extend(texts)
os.remove(file_path)
except Exception as e:
st.error(f"Error processing {paper.name}: {str(e)}")
return all_texts
def initialize_vectorstore(api_key):
embedding = OpenAIEmbeddings(openai_api_key=api_key)
vectorstore = Chroma(embedding_function=embedding, persist_directory="db")
return vectorstore
def process_pdfs(papers, api_key):
if papers and not st.session_state.processed_pdfs:
with st.spinner("Processing PDFs..."):
texts = load_texts_from_papers(papers)
if texts:
vectorstore = initialize_vectorstore(api_key)
vectorstore.add_documents(texts)
st.session_state.chain = ConversationalRetrievalChain.from_llm(
ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_key=api_key),
vectorstore.as_retriever(),
memory=ConversationBufferMemory(
memory_key="chat_history",
return_messages=True
)
)
st.session_state.processed_pdfs = True
st.success("PDFs processed successfully!")
return texts
return []
def get_assistant_response(prompt, texts):
try:
if texts or st.session_state.processed_pdfs:
result = st.session_state.chain({"question": prompt})
return result["answer"]
else:
return "Please upload a PDF first."
except Exception as e:
return f"Error: {str(e)}"
def main():
st.set_page_config(page_title="PDF Chat", layout="wide")
api_key = create_sidebar()
if not api_key:
st.warning("Please enter your OpenAI API key")
return
st.title("Chat with PDF")
# File uploader
papers = st.file_uploader("Upload PDFs", type=["pdf"], accept_multiple_files=True)
# Process PDFs
texts = process_pdfs(papers, api_key)
# Chat interface
chat_container = st.container()
with chat_container:
# Display existing chat messages
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Get user input
if prompt := st.chat_input("Ask about your PDFs"):
# Add user message immediately
st.session_state.messages.append({"role": "user", "content": prompt})
st.chat_message("user").markdown(prompt)
# Get assistant response with a loading indicator
with st.chat_message("assistant"):
with st.spinner("Thinking..."):
response = get_assistant_response(prompt, texts)
st.markdown(response)
# Add assistant response to messages
st.session_state.messages.append({"role": "assistant", "content": response})
if __name__ == "__main__":
main()