isayahc commited on
Commit
cc93217
·
verified ·
1 Parent(s): 093c770

set up for mvp

Browse files
Files changed (2) hide show
  1. app.py +98 -0
  2. ingest.py +5 -5
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from langchain.document_loaders import OnlinePDFLoader
4
+
5
+ from langchain.text_splitter import CharacterTextSplitter
6
+ from langchain.prompts import PromptTemplate
7
+
8
+ text_splitter = CharacterTextSplitter(chunk_size=350, chunk_overlap=0)
9
+
10
+ from langchain.llms import HuggingFaceHub
11
+ flan_ul2 = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={"temperature":0.1, "max_new_tokens":300})
12
+
13
+ from langchain.embeddings import HuggingFaceHubEmbeddings
14
+ embeddings = HuggingFaceHubEmbeddings()
15
+
16
+ from langchain.vectorstores import Chroma
17
+
18
+ from langchain.chains import RetrievalQA
19
+ def loading_pdf():
20
+ return "Loading..."
21
+ def pdf_changes(pdf_doc):
22
+ loader = OnlinePDFLoader(pdf_doc.name)
23
+ documents = loader.load()
24
+ texts = text_splitter.split_documents(documents)
25
+ db = Chroma.from_documents(texts, embeddings)
26
+ retriever = db.as_retriever()
27
+
28
+ prompt_template = """You have been given a pdf or pdfs. You must search these pdfs.
29
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
30
+ Only answer the question.
31
+
32
+ {context}
33
+
34
+ Question: {question}
35
+ Answer:"""
36
+ PROMPT = PromptTemplate(
37
+ template=prompt_template, input_variables=["context", "question"]
38
+ )
39
+ chain_type_kwargs = {"prompt": PROMPT}
40
+ global qa
41
+ qa = RetrievalQA.from_chain_type(
42
+ llm=flan_ul2,
43
+ chain_type="stuff",
44
+ retriever=retriever,
45
+ return_source_documents=True,
46
+ chain_type_kwargs=chain_type_kwargs,
47
+ )
48
+ return "Ready"
49
+
50
+ def add_text(history, text):
51
+ history = history + [(text, None)]
52
+ return history, ""
53
+
54
+ def bot(history):
55
+ response = infer(history[-1][0])
56
+ history[-1][1] = response['result']
57
+ return history
58
+
59
+ def infer(question):
60
+
61
+ query = question
62
+ result = qa({"query": query})
63
+
64
+ return result
65
+
66
+ css="""
67
+ #col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
68
+ """
69
+
70
+ title = """
71
+ <div style="text-align: center;max-width: 700px;">
72
+ <h1>Chat with PDF</h1>
73
+ <p style="text-align: center;">Upload a .PDF from your computer, click the "Load PDF to LangChain" button, <br />
74
+ when everything is ready, you can start asking questions about the pdf ;)</p>
75
+ </div>
76
+ """
77
+
78
+
79
+ with gr.Blocks(css=css) as demo:
80
+ with gr.Column(elem_id="col-container"):
81
+ gr.HTML(title)
82
+
83
+ with gr.Column():
84
+ pdf_doc = gr.File(label="Load a pdf", file_types=['.pdf'], type="file")
85
+ with gr.Row():
86
+ langchain_status = gr.Textbox(label="Status", placeholder="", interactive=False)
87
+ load_pdf = gr.Button("Load pdf to langchain")
88
+
89
+ chatbot = gr.Chatbot([], elem_id="chatbot").style(height=350)
90
+ with gr.Row():
91
+ question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ")
92
+ load_pdf.click(loading_pdf, None, langchain_status, queue=False)
93
+ load_pdf.click(pdf_changes, pdf_doc, langchain_status, queue=False)
94
+ question.submit(add_text, [chatbot, question], [chatbot, question]).then(
95
+ bot, chatbot, chatbot
96
+ )
97
+
98
+ demo.launch()
ingest.py CHANGED
@@ -24,14 +24,14 @@ query_engine = index.as_query_engine()
24
  # Define the query text
25
  query_text = "How does the weather affect crop growth?"
26
 
27
- data = query_engine.query(query)
28
 
29
  # Preprocess the query text
30
- query_builder = QueryBuilder(service_context)
31
- query = query_builder.build_query(query_text)
32
 
33
- # Search for similar documents or retrieve relevant information
34
- results = index.search(query)
35
 
36
  # Process the search results
37
  for result in results:
 
24
  # Define the query text
25
  query_text = "How does the weather affect crop growth?"
26
 
27
+ data = query_engine.query(query_text)
28
 
29
  # Preprocess the query text
30
+ # query_builder = QueryBuilder(service_context)
31
+ # query = query_builder.build_query(query_text)
32
 
33
+ # # Search for similar documents or retrieve relevant information
34
+ # results = index.search(query)
35
 
36
  # Process the search results
37
  for result in results: