File size: 2,177 Bytes
e949dee
a70f40e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a019f2
a70f40e
 
 
e949dee
4a019f2
 
 
 
 
 
 
 
 
 
a70f40e
4a019f2
 
a70f40e
4a019f2
 
 
 
 
 
 
 
 
 
 
 
 
fef8c53
a70f40e
4a019f2
a70f40e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import os
import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.callbacks import get_openai_callback
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

def main():
    st.set_page_config(page_title="PDF Chat")
    st.header("Chat with your PDFs 💬")

    # Upload PDF files
    pdf_files = st.file_uploader("Upload your PDF files", type="pdf", accept_multiple_files=True)
    if pdf_files:
        for idx, pdf_file in enumerate(pdf_files):
            try:
                pdf_reader = PdfReader(pdf_file)
                text = ""
                for page in pdf_reader.pages:
                    text += page.extract_text()
                
                text_splitter = CharacterTextSplitter(
                    separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len
                )
                chunks = text_splitter.split_text(text)
                
                embeddings = OpenAIEmbeddings()
                knowledge_base = FAISS.from_texts(chunks, embeddings)
                
                user_question = st.text_input(f"Ask a question about '{pdf_file.name}':", key=f"question_{idx}")
                if user_question:
                    docs = knowledge_base.similarity_search(user_question)
                    
                    llm = OpenAI()
                    chain = load_qa_chain(llm, chain_type="stuff")
                    
                    with get_openai_callback() as cb:
                        response = chain.run(input_documents=docs, question=user_question)
                        print(cb)
                    
                    st.write(response)
            except Exception as e:
                st.error(f"An error occurred while processing '{pdf_file.name}'. This file may be protected by the author, or contain scanned text which this basic demo is not set up to process.")

if __name__ == "__main__":
    main()