File size: 3,236 Bytes
cff6833
 
 
 
 
 
 
 
5c6a202
 
8c7647b
dd5b21e
974dc54
544643b
 
974dc54
 
 
 
 
 
 
 
 
 
 
cff6833
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c6a202
597629c
974dc54
9067974
5c6a202
974dc54
 
9067974
5c6a202
9067974
 
 
5c6a202
9067974
 
 
5c6a202
9067974
 
5c6a202
9067974
 
5c6a202
 
 
 
 
 
9067974
 
5c6a202
 
 
 
 
 
 
 
9067974
5c6a202
b0c936d
221c526
b0c936d
 
 
 
9067974
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from langchain_openai import ChatOpenAI
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain.memory import ConversationBufferMemory
from langchain_core.prompts import PromptTemplate
from langchain_core.document_loaders import BaseLoader
from langchain_core.documents import Document
import streamlit as st
import os
from io import BytesIO
import pdfplumber

class InMemoryPDFLoader(BaseLoader):
    def __init__(self, file_bytes: bytes):
        self.file_bytes = file_bytes

    def load(self):
        pdf_stream = BytesIO(self.file_bytes)
        with pdfplumber.open(pdf_stream) as pdf:
            text = ""
            for page in pdf.pages:
                text += page.extract_text()
        return [Document(page_content=text)]

# Access the OpenAI API key from the environment
open_ai_key = os.getenv("OPENAI_API_KEY")
llm = ChatOpenAI(api_key=open_ai_key)

template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

prompt = PromptTemplate(template=template, input_variables=["context", "question"])

pdf_file = st.file_uploader("Upload your PDF", type="pdf")
question = st.chat_input("Ask your question")

if pdf_file is not None:
    try:
        pdf_bytes = pdf_file.read()
        loader = InMemoryPDFLoader(file_bytes=pdf_bytes)
        pdf_data = loader.load()

        # Split the text into chunks
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        docs = text_splitter.split_documents(pdf_data)

        # Create a Chroma vector store
        embeddings = HuggingFaceEmbeddings(model_name="embaas/sentence-transformers-multilingual-e5-base")
        db = Chroma.from_documents(docs, embeddings)

        # Initialize message history for conversation
        message_history = ChatMessageHistory()

        # Memory for conversational context
        memory = ConversationBufferMemory(
            memory_key="chat_history",
            output_key="answer",
            chat_memory=message_history,
            return_messages=True,
        )

        # Create a chain that uses the Chroma vector store
        chain = ConversationalRetrievalChain.from_llm(
            llm=llm,
            chain_type="stuff",
            retriever=db.as_retriever(),
            memory=memory,
            return_source_documents=False,
            combine_docs_chain_kwargs={'prompt': prompt}
        )

        if question:
            with st.chat_message("user"):
                st.markdown(question)
                
            with st.chat_message("assistant"):
                res = chain({"question": question})
                answer = res["answer"]
                st.write(f"{answer}")

    except Exception as e:
        st.error(f"An error occurred: {e}")