Update app.py
Browse files
app.py
CHANGED
@@ -143,50 +143,23 @@ def talk(prompt, history):
|
|
143 |
tokenizer.convert_tokens_to_ids("<|eot_id|>") # Converts a token strings in a single/ sequence of integer id using the vocabulary
|
144 |
]
|
145 |
# indicates the end of a sequence
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
#
|
151 |
# text += output["choices"][0]["text"]
|
152 |
# yield text
|
153 |
|
154 |
-
# model_input = model.create_chat_completion(messages = messages)
|
155 |
-
|
156 |
-
# input_ids = tokenizer.apply_chat_template(
|
157 |
-
# messages,
|
158 |
-
# add_generation_prompt=True,
|
159 |
-
# return_tensors="pt"
|
160 |
-
# )
|
161 |
# preparing tokens for model input
|
162 |
# add_generation_prompt argument tells the template to add tokens that indicate the start of a bot response
|
163 |
-
# print(input_ids)
|
164 |
-
# print("check7")
|
165 |
-
# print(input_ids.dtype)
|
166 |
|
167 |
# calling the model to generate response based on message/ input
|
168 |
# do_sample if set to True uses strategies to select the next token from the probability distribution over the entire vocabulary
|
169 |
# temperature controls randomness. more renadomness with higher temperature
|
170 |
# only the tokens comprising the top_p probability mass are considered for responses
|
171 |
# This output is a data structure containing all the information returned by generate(), but that can also be used as tuple or dictionary.
|
172 |
-
|
173 |
-
|
174 |
-
# print("check10")
|
175 |
-
# t = Thread(target=model.generate, kwargs=generate_kwargs)
|
176 |
-
# to process multiple instances
|
177 |
-
# t.start()
|
178 |
-
# print("check11")
|
179 |
-
# start a thread
|
180 |
-
# outputs = []
|
181 |
-
# outputs = model_input
|
182 |
-
# return outputs
|
183 |
-
# print(model.tokenize(messages))
|
184 |
-
# tokens = model.tokenize(messages)
|
185 |
-
# for token in model.generate(tokens):
|
186 |
-
# print(model.detokenize([token]))
|
187 |
-
# input_ids = tokenizer(*messages)
|
188 |
-
|
189 |
-
# print(model.generate(tensor([[ 1, 529, 29989, 5205, 29989]])))
|
190 |
# start = time.time()
|
191 |
# NUM_TOKENS=0
|
192 |
# print('-'*4+'Start Generation'+'-'*4)
|
@@ -200,10 +173,7 @@ def talk(prompt, history):
|
|
200 |
# print(f'Time for complete generation: {time_generate}s')
|
201 |
# print(f'Tokens per secound: {NUM_TOKENS/time_generate}')
|
202 |
# print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
|
208 |
TITLE = "AI Copilot for Diabetes Patients"
|
209 |
|
|
|
143 |
tokenizer.convert_tokens_to_ids("<|eot_id|>") # Converts a token strings in a single/ sequence of integer id using the vocabulary
|
144 |
]
|
145 |
# indicates the end of a sequence
|
146 |
+
import pprint
|
147 |
+
stream = model.create_chat_completion(messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}], max_tokens=1000, stop=["</s>"], stream=True)
|
148 |
+
# print(output['choices'][0]['message']['content'])
|
149 |
+
pprint.pprint(stream)
|
150 |
+
# for output in stream:
|
151 |
# text += output["choices"][0]["text"]
|
152 |
# yield text
|
153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
# preparing tokens for model input
|
155 |
# add_generation_prompt argument tells the template to add tokens that indicate the start of a bot response
|
|
|
|
|
|
|
156 |
|
157 |
# calling the model to generate response based on message/ input
|
158 |
# do_sample if set to True uses strategies to select the next token from the probability distribution over the entire vocabulary
|
159 |
# temperature controls randomness. more renadomness with higher temperature
|
160 |
# only the tokens comprising the top_p probability mass are considered for responses
|
161 |
# This output is a data structure containing all the information returned by generate(), but that can also be used as tuple or dictionary.
|
162 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
# start = time.time()
|
164 |
# NUM_TOKENS=0
|
165 |
# print('-'*4+'Start Generation'+'-'*4)
|
|
|
173 |
# print(f'Time for complete generation: {time_generate}s')
|
174 |
# print(f'Tokens per secound: {NUM_TOKENS/time_generate}')
|
175 |
# print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
|
176 |
+
|
|
|
|
|
|
|
177 |
|
178 |
TITLE = "AI Copilot for Diabetes Patients"
|
179 |
|