Spaces:
Sleeping
Sleeping
File size: 3,522 Bytes
cc93217 ffe7e6e cc93217 65d65e8 cc93217 fe87044 65d65e8 cc93217 65d65e8 cc93217 65d65e8 cc93217 179ea90 cc93217 65d65e8 cc93217 65d65e8 cc93217 ffe7e6e cc93217 179ea90 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import gradio as gr
from langchain.document_loaders import OnlinePDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.prompts import PromptTemplate
# from langhchain.llms import openai
from langchain.llms import OpenAI
text_splitter = CharacterTextSplitter(chunk_size=350, chunk_overlap=0)
from langchain.llms import HuggingFaceHub
# flan_ul2 = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={"temperature":0.1, "max_new_tokens":300})
flan_ul2 = OpenAI()
global qa
from langchain.embeddings import HuggingFaceHubEmbeddings, OpenAIEmbeddings
# embeddings = HuggingFaceHubEmbeddings()
embeddings = OpenAIEmbeddings()
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader
def loading_pdf():
return "Loading..."
def pdf_changes(pdf_doc):
# loader = OnlinePDFLoader(pdf_doc.name)
loader = PyPDFLoader(pdf_doc.name)
documents = loader.load()
texts = text_splitter.split_documents(documents)
db = Chroma.from_documents(texts, embeddings)
retriever = db.as_retriever()
prompt_template = """You have been given a pdf or pdfs. You must search these pdfs.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Only answer the question.
{context}
Question: {sample.question}
Answer:"""
PROMPT = PromptTemplate(
template=prompt_template, input_variables=["context", "question"]
)
chain_type_kwargs = {"prompt": PROMPT}
global qa
qa = RetrievalQA.from_chain_type(
llm=flan_ul2,
chain_type="stuff",
retriever=retriever,
return_source_documents=True,
chain_type_kwargs=chain_type_kwargs,
)
return "Ready"
def add_text(history, text):
history = history + [(text, None)]
return history, ""
def bot(history):
response = infer(history[-1][0])
history[-1][1] = response['result']
return history
def infer(question):
query = question
result = qa({"query": query})
return result
css="""
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
"""
title = """
<div style="text-align: center;max-width: 700px;">
<h1>Chat with PDF</h1>
<p style="text-align: center;">Upload a .PDF from your computer, click the "Load PDF to LangChain" button, <br />
when everything is ready, you can start asking questions about the pdf ;)</p>
</div>
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.HTML(title)
# with gr.Blocks() as demo:
with gr.Column():
pdf_doc = gr.File()
# pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="filepath") #try filepath for type if binary does not work
with gr.Row():
langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
load_pdf = gr.Button("Load pdf to langchain")
chatbot = gr.Chatbot([], elem_id="chatbot") #.style(height=350)
with gr.Row():
question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
load_pdf.click(loading_pdf, None, langchain_status, queue=False)
load_pdf.click(pdf_changes, pdf_doc, langchain_status, queue=False)
question.submit(add_text, [chatbot, question], [chatbot, question]).then(
bot, chatbot, chatbot
)
demo.launch()
|