ChatPDF / app.py
Benjy's picture
Update app.py
fef8c53 verified
raw
history blame
2.18 kB
import os
import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.callbacks import get_openai_callback
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
def main():
st.set_page_config(page_title="PDF Chat")
st.header("Chat with your PDFs 💬")
# Upload PDF files
pdf_files = st.file_uploader("Upload your PDF files", type="pdf", accept_multiple_files=True)
if pdf_files:
for idx, pdf_file in enumerate(pdf_files):
try:
pdf_reader = PdfReader(pdf_file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
text_splitter = CharacterTextSplitter(
separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len
)
chunks = text_splitter.split_text(text)
embeddings = OpenAIEmbeddings()
knowledge_base = FAISS.from_texts(chunks, embeddings)
user_question = st.text_input(f"Ask a question about '{pdf_file.name}':", key=f"question_{idx}")
if user_question:
docs = knowledge_base.similarity_search(user_question)
llm = OpenAI()
chain = load_qa_chain(llm, chain_type="stuff")
with get_openai_callback() as cb:
response = chain.run(input_documents=docs, question=user_question)
print(cb)
st.write(response)
except Exception as e:
st.error(f"An error occurred while processing '{pdf_file.name}'. This file may be protected by the author, or contain scanned text which this basic demo is not set up to process.")
if __name__ == "__main__":
main()