Namitg02 commited on
Commit
7a38f19
·
verified ·
1 Parent(s): b84e0a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -37
app.py CHANGED
@@ -143,50 +143,23 @@ def talk(prompt, history):
143
  tokenizer.convert_tokens_to_ids("<|eot_id|>") # Converts a token strings in a single/ sequence of integer id using the vocabulary
144
  ]
145
  # indicates the end of a sequence
146
-
147
- output = model.create_chat_completion(messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}], max_tokens=1000, stop=["</s>"], stream=False)
148
- print(output['choices'][0]['message']['content'])
149
-
150
- # for output in stream:
151
  # text += output["choices"][0]["text"]
152
  # yield text
153
 
154
- # model_input = model.create_chat_completion(messages = messages)
155
-
156
- # input_ids = tokenizer.apply_chat_template(
157
- # messages,
158
- # add_generation_prompt=True,
159
- # return_tensors="pt"
160
- # )
161
  # preparing tokens for model input
162
  # add_generation_prompt argument tells the template to add tokens that indicate the start of a bot response
163
- # print(input_ids)
164
- # print("check7")
165
- # print(input_ids.dtype)
166
 
167
  # calling the model to generate response based on message/ input
168
  # do_sample if set to True uses strategies to select the next token from the probability distribution over the entire vocabulary
169
  # temperature controls randomness. more renadomness with higher temperature
170
  # only the tokens comprising the top_p probability mass are considered for responses
171
  # This output is a data structure containing all the information returned by generate(), but that can also be used as tuple or dictionary.
172
- #
173
-
174
- # print("check10")
175
- # t = Thread(target=model.generate, kwargs=generate_kwargs)
176
- # to process multiple instances
177
- # t.start()
178
- # print("check11")
179
- # start a thread
180
- # outputs = []
181
- # outputs = model_input
182
- # return outputs
183
- # print(model.tokenize(messages))
184
- # tokens = model.tokenize(messages)
185
- # for token in model.generate(tokens):
186
- # print(model.detokenize([token]))
187
- # input_ids = tokenizer(*messages)
188
-
189
- # print(model.generate(tensor([[ 1, 529, 29989, 5205, 29989]])))
190
  # start = time.time()
191
  # NUM_TOKENS=0
192
  # print('-'*4+'Start Generation'+'-'*4)
@@ -200,10 +173,7 @@ def talk(prompt, history):
200
  # print(f'Time for complete generation: {time_generate}s')
201
  # print(f'Tokens per secound: {NUM_TOKENS/time_generate}')
202
  # print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
203
-
204
-
205
-
206
-
207
 
208
  TITLE = "AI Copilot for Diabetes Patients"
209
 
 
143
  tokenizer.convert_tokens_to_ids("<|eot_id|>") # Converts a token strings in a single/ sequence of integer id using the vocabulary
144
  ]
145
  # indicates the end of a sequence
146
+ import pprint
147
+ stream = model.create_chat_completion(messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}], max_tokens=1000, stop=["</s>"], stream=True)
148
+ # print(output['choices'][0]['message']['content'])
149
+ pprint.pprint(stream)
150
+ # for output in stream:
151
  # text += output["choices"][0]["text"]
152
  # yield text
153
 
 
 
 
 
 
 
 
154
  # preparing tokens for model input
155
  # add_generation_prompt argument tells the template to add tokens that indicate the start of a bot response
 
 
 
156
 
157
  # calling the model to generate response based on message/ input
158
  # do_sample if set to True uses strategies to select the next token from the probability distribution over the entire vocabulary
159
  # temperature controls randomness. more renadomness with higher temperature
160
  # only the tokens comprising the top_p probability mass are considered for responses
161
  # This output is a data structure containing all the information returned by generate(), but that can also be used as tuple or dictionary.
162
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  # start = time.time()
164
  # NUM_TOKENS=0
165
  # print('-'*4+'Start Generation'+'-'*4)
 
173
  # print(f'Time for complete generation: {time_generate}s')
174
  # print(f'Tokens per secound: {NUM_TOKENS/time_generate}')
175
  # print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
176
+
 
 
 
177
 
178
  TITLE = "AI Copilot for Diabetes Patients"
179