Spaces:
Sleeping
Sleeping
from langchain_openai import ChatOpenAI | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_community.vectorstores import Chroma | |
from langchain.chains import ConversationalRetrievalChain | |
from langchain_community.chat_message_histories import ChatMessageHistory | |
from langchain.memory import ConversationBufferMemory | |
from langchain_core.prompts import PromptTemplate | |
from langchain_core.document_loaders import BaseLoader | |
from langchain_core.documents import Document | |
import streamlit as st | |
import os | |
from io import BytesIO | |
import pdfplumber | |
class InMemoryPDFLoader(BaseLoader): | |
def __init__(self, file_bytes: bytes): | |
self.file_bytes = file_bytes | |
def load(self): | |
pdf_stream = BytesIO(self.file_bytes) | |
with pdfplumber.open(pdf_stream) as pdf: | |
text = "" | |
for page in pdf.pages: | |
text += page.extract_text() | |
return [Document(page_content=text)] | |
# Access the OpenAI API key from the environment | |
open_ai_key = os.getenv("OPENAI_API_KEY") | |
llm = ChatOpenAI(api_key=open_ai_key) | |
template = """Use the following pieces of information to answer the user's question. | |
If you don't know the answer, just say that you don't know, don't try to make up an answer. | |
Context: {context} | |
Question: {question} | |
Only return the helpful answer below and nothing else. | |
Helpful answer: | |
""" | |
prompt = PromptTemplate(template=template, input_variables=["context", "question"]) | |
pdf_file = st.file_uploader("Upload your PDF", type="pdf") | |
question = st.chat_input("Ask your question") | |
if pdf_file is not None: | |
try: | |
pdf_bytes = pdf_file.read() | |
loader = InMemoryPDFLoader(file_bytes=pdf_bytes) | |
pdf_data = loader.load() | |
# Split the text into chunks | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
docs = text_splitter.split_documents(pdf_data) | |
# Create a Chroma vector store | |
embeddings = HuggingFaceEmbeddings(model_name="embaas/sentence-transformers-multilingual-e5-base") | |
db = Chroma.from_documents(docs, embeddings) | |
# Initialize message history for conversation | |
message_history = ChatMessageHistory() | |
# Memory for conversational context | |
memory = ConversationBufferMemory( | |
memory_key="chat_history", | |
output_key="answer", | |
chat_memory=message_history, | |
return_messages=True, | |
) | |
# Create a chain that uses the Chroma vector store | |
chain = ConversationalRetrievalChain.from_llm( | |
llm=llm, | |
chain_type="stuff", | |
retriever=db.as_retriever(), | |
memory=memory, | |
return_source_documents=False, | |
combine_docs_chain_kwargs={'prompt': prompt} | |
) | |
if question: | |
with st.chat_message("user"): | |
st.markdown(question) | |
with st.chat_message("assistant"): | |
res = chain({"question": question}) | |
answer = res["answer"] | |
st.write(f"{answer}") | |
except Exception as e: | |
st.error(f"An error occurred: {e}") | |