Spaces:
Sleeping
Sleeping
File size: 2,177 Bytes
e949dee a70f40e 4a019f2 a70f40e e949dee 4a019f2 a70f40e 4a019f2 a70f40e 4a019f2 fef8c53 a70f40e 4a019f2 a70f40e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import os
import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from langchain.callbacks import get_openai_callback
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
def main():
st.set_page_config(page_title="PDF Chat")
st.header("Chat with your PDFs 💬")
# Upload PDF files
pdf_files = st.file_uploader("Upload your PDF files", type="pdf", accept_multiple_files=True)
if pdf_files:
for idx, pdf_file in enumerate(pdf_files):
try:
pdf_reader = PdfReader(pdf_file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
text_splitter = CharacterTextSplitter(
separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len
)
chunks = text_splitter.split_text(text)
embeddings = OpenAIEmbeddings()
knowledge_base = FAISS.from_texts(chunks, embeddings)
user_question = st.text_input(f"Ask a question about '{pdf_file.name}':", key=f"question_{idx}")
if user_question:
docs = knowledge_base.similarity_search(user_question)
llm = OpenAI()
chain = load_qa_chain(llm, chain_type="stuff")
with get_openai_callback() as cb:
response = chain.run(input_documents=docs, question=user_question)
print(cb)
st.write(response)
except Exception as e:
st.error(f"An error occurred while processing '{pdf_file.name}'. This file may be protected by the author, or contain scanned text which this basic demo is not set up to process.")
if __name__ == "__main__":
main() |