rasyosef commited on
Commit
03e7215
1 Parent(s): 4c5f0fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -50
app.py CHANGED
@@ -8,12 +8,14 @@ from langchain.embeddings import HuggingFaceEmbeddings
8
  from langchain.chains import RetrievalQA
9
  from langchain.prompts.prompt import PromptTemplate
10
  from langchain.vectorstores.base import VectorStoreRetriever
11
- from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
12
 
13
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
14
  import torch
 
15
  from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
16
 
 
 
 
17
  # Prompt template
18
  template = """Instruction:
19
  You are an AI assistant for answering questions about the provided context.
@@ -22,10 +24,8 @@ If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up
22
  =======
23
  {context}
24
  =======
25
- Chat History:
26
-
27
- {question}
28
- Output:"""
29
 
30
  QA_PROMPT = PromptTemplate(
31
  template=template,
@@ -58,10 +58,18 @@ def prepare_vector_store(filename):
58
  model_id = "microsoft/phi-2"
59
 
60
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
61
- model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32, device_map="cpu", trust_remote_code=True)
62
- phi2 = pipeline("text-generation", tokenizer=tokenizer, model=model, max_new_tokens=64, device_map="auto") # GPU
63
-
64
- phi2.tokenizer.pad_token_id = phi2.model.config.eos_token_id
 
 
 
 
 
 
 
 
65
  hf_model = HuggingFacePipeline(pipeline=phi2)
66
 
67
  # Retrieveal QA chian
@@ -81,27 +89,16 @@ def get_retrieval_qa_chain(filename):
81
  qa_chain = get_retrieval_qa_chain(filename="Oppenheimer-movie-wiki.txt")
82
 
83
  # Generates response using the question answering chain defined earlier
84
- def generate(question, chat_history):
85
- query = ""
86
- for req, res in chat_history:
87
- query += f"User: {req}\n"
88
- query += f"Assistant: {res}\n"
89
- query += f"User: {question}"
90
 
91
- result = qa_chain.invoke({"query": query})
92
- response = result["result"].strip()
93
- response = response.split("\n\n")[0].strip()
94
 
95
- if "User:" in response:
96
- response = response.split("User:")[0].strip()
97
- if "INPUT:" in response:
98
- response = response.split("INPUT:")[0].strip()
99
- if "Assistant:" in response:
100
- response = response.split("Assistant:")[1].strip()
101
-
102
- chat_history.append((question, response))
103
-
104
- return "", chat_history
105
 
106
  # replaces the retreiver in the question answering chain whenever a new file is uploaded
107
  def upload_file(qa_chain):
@@ -114,34 +111,31 @@ def upload_file(qa_chain):
114
 
115
  with gr.Blocks() as demo:
116
  gr.Markdown("""
117
- # RAG-Phi-2 Chatbot demo
118
- ### This demo uses the Phi-2 language model and Retrieval Augmented Generation (RAG) to allow you to add custom knowledge to the chatbot by uploading a txt file. Upload a txt file that contains the text data that you would like to augment the chatbot with.
119
  ### If you don't have one, there is a txt file already loaded, the new Oppenheimer movie's entire wikipedia page. The movie came out very recently in July, 2023, so the Phi-2 model is not aware of it.
120
-
121
- The context size of the Phi-2 model is 2048 tokens, so even this medium size wikipedia page (11.5k tokens) does not fit in the context window.
122
- Retrieval Augmented Generation (RAG) enables us to retrieve just the few small chunks of the document that are relevant to the our query and inject it into our prompt.
123
- The chatbot is then able to answer questions by incorporating knowledge from the newly provided document. RAG can be used with thousands of documents, but this demo is limited to just one txt file.
124
  """)
125
 
126
- file_output = gr.File(label="txt file")
127
- upload_button = gr.UploadButton(
128
- label="Click to upload a txt file",
129
- file_types=["text"],
130
- file_count="single"
131
- )
132
- upload_button.upload(upload_file(qa_chain), upload_button, file_output)
133
-
134
- chatbot = gr.Chatbot(label="RAG Phi-2 Chatbot")
135
- msg = gr.Textbox(label="Message", placeholder="Enter text here")
136
-
137
- clear = gr.ClearButton([msg, chatbot])
138
- msg.submit(fn=generate, inputs=[msg, chatbot], outputs=[msg, chatbot])
139
  examples = gr.Examples(
140
  examples=[
141
  "Who portrayed J. Robert Oppenheimer in the new Oppenheimer movie?",
142
  "In the plot of the movie, why did Lewis Strauss resent Robert Oppenheimer?"
143
  ],
144
- inputs=[msg],
145
  )
146
 
147
- demo.launch()
 
8
  from langchain.chains import RetrievalQA
9
  from langchain.prompts.prompt import PromptTemplate
10
  from langchain.vectorstores.base import VectorStoreRetriever
 
11
 
 
12
  import torch
13
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
14
  from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
15
 
16
+ from transformers import TextIteratorStreamer
17
+ from threading import Thread
18
+
19
  # Prompt template
20
  template = """Instruction:
21
  You are an AI assistant for answering questions about the provided context.
 
24
  =======
25
  {context}
26
  =======
27
+ Question: {question}
28
+ Output:\n"""
 
 
29
 
30
  QA_PROMPT = PromptTemplate(
31
  template=template,
 
58
  model_id = "microsoft/phi-2"
59
 
60
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
61
+ model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32, device_map="auto", trust_remote_code=True)
62
+
63
+ streamer = TextIteratorStreamer(tokenizer=tokenizer, skip_prompt=True)
64
+ phi2 = pipeline(
65
+ "text-generation",
66
+ tokenizer=tokenizer,
67
+ model=model,
68
+ max_new_tokens=256,
69
+ eos_token_id=tokenizer.eos_token_id,
70
+ device_map="auto",
71
+ streamer=streamer
72
+ ) # GPU
73
  hf_model = HuggingFacePipeline(pipeline=phi2)
74
 
75
  # Retrieveal QA chian
 
89
  qa_chain = get_retrieval_qa_chain(filename="Oppenheimer-movie-wiki.txt")
90
 
91
  # Generates response using the question answering chain defined earlier
92
+ def generate(question, answer):
93
+ query = f"{question}"
 
 
 
 
94
 
95
+ thread = Thread(target=qa_chain.invoke, kwargs={"input": {"query": query}})
96
+ thread.start()
 
97
 
98
+ response = ""
99
+ for token in streamer:
100
+ response += token
101
+ yield response
 
 
 
 
 
 
102
 
103
  # replaces the retreiver in the question answering chain whenever a new file is uploaded
104
  def upload_file(qa_chain):
 
111
 
112
  with gr.Blocks() as demo:
113
  gr.Markdown("""
114
+ # RAG-Phi-2 Question Answering demo
115
+ ### This demo uses the Phi-2 language model and Retrieval Augmented Generation (RAG) to allow you to upload a txt file and ask the model questions related to the content of that file.
116
  ### If you don't have one, there is a txt file already loaded, the new Oppenheimer movie's entire wikipedia page. The movie came out very recently in July, 2023, so the Phi-2 model is not aware of it.
117
+ The context size of the Phi-2 model is 2048 tokens, so even this medium size wikipedia page (11.5k tokens) does not fit in the context window.
118
+ Retrieval Augmented Generation (RAG) enables us to retrieve just the few small chunks of the document that are relevant to the our query and inject it into our prompt.
119
+ The model is then able to answer questions by incorporating knowledge from the newly provided document. RAG can be used with thousands of documents, but this demo is limited to just one txt file.
 
120
  """)
121
 
122
+ with gr.Row():
123
+ with gr.Column():
124
+ ques = gr.Textbox(label="Question", placeholder="Enter text here", lines=3)
125
+ with gr.Column():
126
+ ans = gr.Textbox(label="Answer", lines=4)
127
+ with gr.Row():
128
+ with gr.Column():
129
+ btn = gr.Button("Submit")
130
+ with gr.Column():
131
+ clear = gr.ClearButton([ques, ans])
132
+ btn.click(fn=generate, inputs=[ques, ans], outputs=[ans])
 
 
133
  examples = gr.Examples(
134
  examples=[
135
  "Who portrayed J. Robert Oppenheimer in the new Oppenheimer movie?",
136
  "In the plot of the movie, why did Lewis Strauss resent Robert Oppenheimer?"
137
  ],
138
+ inputs=[ques],
139
  )
140
 
141
+ demo.queue().launch()