ChatPDF

Sleeping

App Files Files Community

ChatPDF / app.py

Benjy

Update app.py

4a019f2 verified 6 months ago

raw

history blame

2.19 kB

	import os
	import streamlit as st
	from PyPDF2 import PdfReader
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.embeddings import OpenAIEmbeddings
	from langchain.vectorstores import FAISS
	from langchain.chains.question_answering import load_qa_chain
	from langchain.llms import OpenAI
	from langchain.callbacks import get_openai_callback
	from dotenv import load_dotenv

	# Load environment variables
	load_dotenv()

	def main():
	st.set_page_config(page_title="PDF Chat")
	st.header("Chat with your PDFs 💬")

	# Upload PDF files
	pdf_files = st.file_uploader("Upload your PDF files", type="pdf", accept_multiple_files=True)
	if pdf_files:
	for idx, pdf_file in enumerate(pdf_files):
	try:
	pdf_reader = PdfReader(pdf_file)
	text = ""
	for page in pdf_reader.pages:
	text += page.extract_text()

	text_splitter = CharacterTextSplitter(
	separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len
	)
	chunks = text_splitter.split_text(text)

	embeddings = OpenAIEmbeddings()
	knowledge_base = FAISS.from_texts(chunks, embeddings)

	user_question = st.text_input(f"Ask a question about '{pdf_file.name}':", key=f"question_{idx}")
	if user_question:
	docs = knowledge_base.similarity_search(user_question)

	llm = OpenAI()
	chain = load_qa_chain(llm, chain_type="stuff")

	with get_openai_callback() as cb:
	response = chain.run(input_documents=docs, question=user_question)
	print(cb)

	st.write(response)
	except Exception as e:
	st.error(f"An error occurred while processing '{pdf_file.name}': {str(e)}. This file may be protected by the author, or contain scanned text which this basic demo is not set up to process.")

	if __name__ == "__main__":
	main()