import os from dotenv import load_dotenv from PyPDF2 import PdfReader from langchain.text_splitter import CharacterTextSplitter from langchain import vectorstores from langchain import chains from langchain import llms from langchain.embeddings import HuggingFaceEmbeddings import gradio as gr llm = llms.AI21(ai21_api_key='diNNQzvL40ZnBnEQkIBwNESWjtj792NG') def pdf_qa(pdf, query): if pdf is not None: pdf_reader = PdfReader(pdf) texts = "" for page in pdf_reader.pages: texts += page.extract_text() text_splitter = CharacterTextSplitter( separator="\n", chunk_size=1000, chunk_overlap=0 ) chunks = text_splitter.split_text(texts) embeddings = HuggingFaceEmbeddings() db = vectorstores.Chroma.from_texts(chunks, embeddings) retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 10}) qa = chains.ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever) chat_history = [] if query: result = qa({"question": query, "chat_history": chat_history}) return result["answer"] return "Please upload a PDF and enter a query." with gr.Blocks() as demo: pdf_input = gr.File(label="Upload your PDF", type="file") query_input = gr.Textbox(label="Ask a question in PDF") output = gr.Textbox(label="Answer") submit_button = gr.Button("Submit") submit_button.click(fn=pdf_qa, inputs=[pdf_input, query_input], outputs=output) demo.launch()