File size: 5,045 Bytes
53d8e52 efb1b7a 53d8e52 648f1a1 53d8e52 32c2394 648f1a1 32c2394 53d8e52 e5702bf 53d8e52 e5702bf 53d8e52 32c2394 efb1b7a 53d8e52 32c2394 648f1a1 32c2394 648f1a1 32c2394 648f1a1 32c2394 648f1a1 32c2394 53d8e52 32c2394 53d8e52 32c2394 53d8e52 32c2394 648f1a1 32c2394 648f1a1 53d8e52 648f1a1 53d8e52 648f1a1 53d8e52 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
import os
import streamlit as st
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain, ConversationChain
from langchain.memory import ConversationBufferMemory
from langchain.document_loaders import PyPDFLoader
import time
# Initialize session state variables
if "messages" not in st.session_state:
st.session_state.messages = []
if "chain" not in st.session_state:
st.session_state.chain = None
if "processed_pdfs" not in st.session_state:
st.session_state.processed_pdfs = False
if "waiting_for_answer" not in st.session_state:
st.session_state.waiting_for_answer = False
def create_sidebar():
with st.sidebar:
st.title("PDF Chat")
st.markdown("### Quick Demo of RAG")
api_key = st.text_input("OpenAI API Key:", type="password")
st.markdown("""
### Tools Used
- OpenAI
- LangChain
- ChromaDB
### Steps
1. Add API key
2. Upload PDF
3. Chat!
""")
return api_key
def save_uploaded_file(uploaded_file, path='./uploads/'):
os.makedirs(path, exist_ok=True)
file_path = os.path.join(path, uploaded_file.name)
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
return file_path
def load_texts_from_papers(papers):
all_texts = []
for paper in papers:
try:
file_path = save_uploaded_file(paper)
loader = PyPDFLoader(file_path)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
length_function=len,
is_separator_regex=False,
)
texts = text_splitter.split_documents(documents)
all_texts.extend(texts)
os.remove(file_path)
except Exception as e:
st.error(f"Error processing {paper.name}: {str(e)}")
return all_texts
def initialize_vectorstore(api_key):
embedding = OpenAIEmbeddings(openai_api_key=api_key)
vectorstore = Chroma(embedding_function=embedding, persist_directory="db")
return vectorstore
def process_pdfs(papers, api_key):
if papers and not st.session_state.processed_pdfs:
with st.spinner("Processing PDFs..."):
texts = load_texts_from_papers(papers)
if texts:
vectorstore = initialize_vectorstore(api_key)
vectorstore.add_documents(texts)
st.session_state.chain = ConversationalRetrievalChain.from_llm(
ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_key=api_key),
vectorstore.as_retriever(),
memory=ConversationBufferMemory(
memory_key="chat_history",
return_messages=True
)
)
st.session_state.processed_pdfs = True
st.success("PDFs processed successfully!")
return texts
return []
def get_assistant_response(prompt, texts):
try:
if texts or st.session_state.processed_pdfs:
result = st.session_state.chain({"question": prompt})
return result["answer"]
else:
return "Please upload a PDF first."
except Exception as e:
return f"Error: {str(e)}"
def main():
st.set_page_config(page_title="PDF Chat", layout="wide")
api_key = create_sidebar()
if not api_key:
st.warning("Please enter your OpenAI API key")
return
st.title("Chat with PDF")
# File uploader
papers = st.file_uploader("Upload PDFs", type=["pdf"], accept_multiple_files=True)
# Process PDFs
texts = process_pdfs(papers, api_key)
# Chat interface
chat_container = st.container()
with chat_container:
# Display existing chat messages
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Get user input
if prompt := st.chat_input("Ask about your PDFs"):
# Add user message immediately
st.session_state.messages.append({"role": "user", "content": prompt})
st.chat_message("user").markdown(prompt)
# Get assistant response with a loading indicator
with st.chat_message("assistant"):
with st.spinner("Thinking..."):
response = get_assistant_response(prompt, texts)
st.markdown(response)
# Add assistant response to messages
st.session_state.messages.append({"role": "assistant", "content": response})
if __name__ == "__main__":
main() |