import gradio as gr import fitz import os from langchain.embeddings.openai import OpenAIEmbeddings from langchain.text_splitter import CharacterTextSplitter from langchain_community.vectorstores.faiss import FAISS from langchain.chains.question_answering import load_qa_chain from langchain_community.llms import OpenAI openai_api_key = os.environ.get('OPENAI_API_KEY') def read_pdf(pdf_file, prompt): try: text = "" with open(pdf_file.name, "rb") as file: doc = fitz.open(file) for page in doc: text += page.get_text() # split the text into several chunks text_splitter = CharacterTextSplitter( separator = "\n", chunk_size = 1000, chunk_overlap = 200, length_function = len, ) texts = text_splitter.split_text(text) # download embedding from OpenAI embeddings = OpenAIEmbeddings(api_key=openai_api_key) docsearch = FAISS.from_texts(texts, embeddings) chain = load_qa_chain(OpenAI(api_key=openai_api_key), chain_type="stuff") docs = docsearch.similarity_search(prompt) answer = chain.run(input_documents = docs, question = prompt) return answer except Exception as e: return f"Error: {str(e)}" iface = gr.Interface( read_pdf, inputs=["file","text"], outputs="text", title="PDF Reader", description="Upload a PDF file!", ) iface.launch(share=True)