File size: 3,522 Bytes
cc93217
 
 
 
 
 
 
ffe7e6e
 
 
cc93217
 
 
65d65e8
 
cc93217
fe87044
 
65d65e8
 
 
cc93217
 
 
 
65d65e8
 
 
cc93217
 
 
65d65e8
 
cc93217
 
 
 
 
 
 
 
 
 
 
179ea90
cc93217
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65d65e8
cc93217
 
65d65e8
 
cc93217
 
 
 
ffe7e6e
cc93217
 
 
 
 
 
 
 
179ea90
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import gradio as gr

from langchain.document_loaders import OnlinePDFLoader

from langchain.text_splitter import CharacterTextSplitter
from langchain.prompts import PromptTemplate

# from langhchain.llms import openai
from langchain.llms import OpenAI

text_splitter = CharacterTextSplitter(chunk_size=350, chunk_overlap=0)

from langchain.llms import HuggingFaceHub
# flan_ul2 = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={"temperature":0.1, "max_new_tokens":300})
flan_ul2 = OpenAI()

global qa

from langchain.embeddings import HuggingFaceHubEmbeddings, OpenAIEmbeddings
# embeddings = HuggingFaceHubEmbeddings()
embeddings = OpenAIEmbeddings()

from langchain.vectorstores import Chroma

from langchain.chains import RetrievalQA

from langchain.document_loaders import PyPDFLoader

def loading_pdf():
    return "Loading..."
def pdf_changes(pdf_doc):
    # loader = OnlinePDFLoader(pdf_doc.name)
    loader = PyPDFLoader(pdf_doc.name)
    documents = loader.load()
    texts = text_splitter.split_documents(documents)
    db = Chroma.from_documents(texts, embeddings)
    retriever = db.as_retriever()
    
    prompt_template = """You have been given a pdf or pdfs. You must search these pdfs. 
    If you don't know the answer, just say that you don't know, don't try to make up an answer.
    Only answer the question.
    
    {context}
    
    Question: {sample.question}
    Answer:"""
    PROMPT = PromptTemplate(
        template=prompt_template, input_variables=["context", "question"]
    )
    chain_type_kwargs = {"prompt": PROMPT}
    global qa 
    qa = RetrievalQA.from_chain_type(
        llm=flan_ul2, 
        chain_type="stuff", 
        retriever=retriever, 
        return_source_documents=True,
        chain_type_kwargs=chain_type_kwargs,
    )
    return "Ready"

def add_text(history, text):
    history = history + [(text, None)]
    return history, ""

def bot(history):
    response = infer(history[-1][0])
    history[-1][1] = response['result']
    return history

def infer(question):
    
    query = question
    result = qa({"query": query})

    return result

css="""
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
"""

title = """
<div style="text-align: center;max-width: 700px;">
    <h1>Chat with PDF</h1>
    <p style="text-align: center;">Upload a .PDF from your computer, click the "Load PDF to LangChain" button, <br />
    when everything is ready, you can start asking questions about the pdf ;)</p>
</div>
"""


with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.HTML(title)
        # with gr.Blocks() as demo:
        
        with gr.Column():
            pdf_doc = gr.File()
            # pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="filepath") #try filepath for type if binary does not work
            with gr.Row():
                langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
                load_pdf = gr.Button("Load pdf to langchain")
        
        chatbot = gr.Chatbot([], elem_id="chatbot") #.style(height=350)
        with gr.Row():
            question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
    load_pdf.click(loading_pdf, None, langchain_status, queue=False)    
    load_pdf.click(pdf_changes, pdf_doc, langchain_status, queue=False)
    question.submit(add_text, [chatbot, question], [chatbot, question]).then(
        bot, chatbot, chatbot
    )

demo.launch()