Spaces:
Sleeping
Sleeping
import os | |
from dotenv import load_dotenv | |
from PyPDF2 import PdfReader | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain import vectorstores | |
from langchain import chains | |
from langchain import llms | |
from langchain.embeddings import HuggingFaceEmbeddings | |
import gradio as gr | |
llm = llms.AI21(ai21_api_key='diNNQzvL40ZnBnEQkIBwNESWjtj792NG') | |
def pdf_qa(pdf, query): | |
if pdf is not None: | |
pdf_reader = PdfReader(pdf) | |
texts = "" | |
for page in pdf_reader.pages: | |
texts += page.extract_text() | |
text_splitter = CharacterTextSplitter( | |
separator="\n", | |
chunk_size=1000, | |
chunk_overlap=0 | |
) | |
chunks = text_splitter.split_text(texts) | |
embeddings = HuggingFaceEmbeddings() | |
db = vectorstores.Chroma.from_texts(chunks, embeddings) | |
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 10}) | |
qa = chains.ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever) | |
chat_history = [] | |
if query: | |
result = qa({"question": query, "chat_history": chat_history}) | |
return result["answer"] | |
return "Please upload a PDF and enter a query." | |
pdf_input = gr.inputs.File(label="Upload your PDF", type="file", file_count="single") | |
query_input = gr.inputs.Textbox(label="Ask a question in PDF") | |
output = gr.outputs.Textbox(label="Answer") | |
gr.Interface(fn=pdf_qa, inputs=[pdf_input, query_input], outputs=output, title="PDF QA").launch() | |