Spaces:

rajesh1729
/

Streamlit-RAG-Chat-with-PDF

Running

App Files Files Community

Streamlit-RAG-Chat-with-PDF / app.py

rajesh1729

Update app.py

648f1a1 verified 6 months ago

raw

history blame

5.05 kB

	import os
	import streamlit as st
	from langchain.embeddings.openai import OpenAIEmbeddings
	from langchain.vectorstores import Chroma
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.chat_models import ChatOpenAI
	from langchain.chains import ConversationalRetrievalChain, ConversationChain
	from langchain.memory import ConversationBufferMemory
	from langchain.document_loaders import PyPDFLoader
	import time

	# Initialize session state variables
	if "messages" not in st.session_state:
	st.session_state.messages = []
	if "chain" not in st.session_state:
	st.session_state.chain = None
	if "processed_pdfs" not in st.session_state:
	st.session_state.processed_pdfs = False
	if "waiting_for_answer" not in st.session_state:
	st.session_state.waiting_for_answer = False

	def create_sidebar():
	with st.sidebar:
	st.title("PDF Chat")
	st.markdown("### Quick Demo of RAG")

	api_key = st.text_input("OpenAI API Key:", type="password")

	st.markdown("""
	### Tools Used
	- OpenAI
	- LangChain
	- ChromaDB

	### Steps
	1. Add API key
	2. Upload PDF
	3. Chat!
	""")

	return api_key

	def save_uploaded_file(uploaded_file, path='./uploads/'):
	os.makedirs(path, exist_ok=True)
	file_path = os.path.join(path, uploaded_file.name)
	with open(file_path, "wb") as f:
	f.write(uploaded_file.getbuffer())
	return file_path

	def load_texts_from_papers(papers):
	all_texts = []
	for paper in papers:
	try:
	file_path = save_uploaded_file(paper)
	loader = PyPDFLoader(file_path)
	documents = loader.load()
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=1000,
	chunk_overlap=200,
	length_function=len,
	is_separator_regex=False,
	)
	texts = text_splitter.split_documents(documents)
	all_texts.extend(texts)
	os.remove(file_path)
	except Exception as e:
	st.error(f"Error processing {paper.name}: {str(e)}")
	return all_texts

	def initialize_vectorstore(api_key):
	embedding = OpenAIEmbeddings(openai_api_key=api_key)
	vectorstore = Chroma(embedding_function=embedding, persist_directory="db")
	return vectorstore

	def process_pdfs(papers, api_key):
	if papers and not st.session_state.processed_pdfs:
	with st.spinner("Processing PDFs..."):
	texts = load_texts_from_papers(papers)
	if texts:
	vectorstore = initialize_vectorstore(api_key)
	vectorstore.add_documents(texts)
	st.session_state.chain = ConversationalRetrievalChain.from_llm(
	ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_key=api_key),
	vectorstore.as_retriever(),
	memory=ConversationBufferMemory(
	memory_key="chat_history",
	return_messages=True
	)
	)
	st.session_state.processed_pdfs = True
	st.success("PDFs processed successfully!")
	return texts
	return []

	def get_assistant_response(prompt, texts):
	try:
	if texts or st.session_state.processed_pdfs:
	result = st.session_state.chain({"question": prompt})
	return result["answer"]
	else:
	return "Please upload a PDF first."
	except Exception as e:
	return f"Error: {str(e)}"

	def main():
	st.set_page_config(page_title="PDF Chat", layout="wide")

	api_key = create_sidebar()

	if not api_key:
	st.warning("Please enter your OpenAI API key")
	return

	st.title("Chat with PDF")

	# File uploader
	papers = st.file_uploader("Upload PDFs", type=["pdf"], accept_multiple_files=True)

	# Process PDFs
	texts = process_pdfs(papers, api_key)

	# Chat interface
	chat_container = st.container()

	with chat_container:
	# Display existing chat messages
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])

	# Get user input
	if prompt := st.chat_input("Ask about your PDFs"):
	# Add user message immediately
	st.session_state.messages.append({"role": "user", "content": prompt})
	st.chat_message("user").markdown(prompt)

	# Get assistant response with a loading indicator
	with st.chat_message("assistant"):
	with st.spinner("Thinking..."):
	response = get_assistant_response(prompt, texts)
	st.markdown(response)

	# Add assistant response to messages
	st.session_state.messages.append({"role": "assistant", "content": response})

	if __name__ == "__main__":
	main()