Namitg02 commited on
Commit
401b243
·
verified ·
1 Parent(s): e2c10c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -35
app.py CHANGED
@@ -133,28 +133,21 @@ def talk(prompt, history):
133
 
134
  # formatted_prompt_with_history = formatted_prompt_with_history[:600] # to avoid memory issue
135
  # print(formatted_prompt_with_history)
136
- messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}]
137
  # binding the system context and new prompt for LLM
138
  # the chat template structure should be based on text generation model format
139
  print("check6")
140
- print(messages)
141
- print("check7")
142
- streamer = TextIteratorStreamer(
143
- tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
144
- )
145
- # stores print-ready text in a queue, to be used by a downstream application as an iterator. removes special tokens in generated text.
146
- # timeout for text queue. tokenizer for decoding tokens
147
- # called by generate_kwargs
148
 
149
  terminators = [
150
  tokenizer.eos_token_id, # End-of-Sequence Token that indicates where the model should consider the text sequence to be complete
151
  tokenizer.convert_tokens_to_ids("<|eot_id|>") # Converts a token strings in a single/ sequence of integer id using the vocabulary
152
  ]
153
  # indicates the end of a sequence
154
- text = ""
155
- stream = model.create_chat_completion(messages, max_tokens=1000, stop=["</s>"], stream=False)
156
- return stream
157
- # for output in stream:
 
158
  # text += output["choices"][0]["text"]
159
  # yield text
160
 
@@ -171,16 +164,6 @@ def talk(prompt, history):
171
  # print("check7")
172
  # print(input_ids.dtype)
173
 
174
- # generate_kwargs = dict(
175
- # tokens= input_ids) #,
176
- # streamer=streamer,
177
- # do_sample=True,
178
- # eos_token_id=terminators,
179
- # )
180
-
181
- # outputs = model.generate(
182
- # )
183
- # print(outputs)
184
  # calling the model to generate response based on message/ input
185
  # do_sample if set to True uses strategies to select the next token from the probability distribution over the entire vocabulary
186
  # temperature controls randomness. more renadomness with higher temperature
@@ -202,6 +185,7 @@ def talk(prompt, history):
202
  # for token in model.generate(tokens):
203
  # print(model.detokenize([token]))
204
  # input_ids = tokenizer(*messages)
 
205
  # print(model.generate(tensor([[ 1, 529, 29989, 5205, 29989]])))
206
  # start = time.time()
207
  # NUM_TOKENS=0
@@ -218,19 +202,7 @@ def talk(prompt, history):
218
  # print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
219
 
220
 
221
-
222
- pd.options.display.max_colwidth = 800
223
- print("check13")
224
- # outputstring = ''.join(outputs)
225
 
226
- # global historylog
227
- # historynew = {
228
- # "Prompt": prompt,
229
- # "Output": outputstring
230
- # }
231
- # historylog.append(historynew)
232
- # return historylog
233
- # print(historylog)
234
 
235
 
236
  TITLE = "AI Copilot for Diabetes Patients"
 
133
 
134
  # formatted_prompt_with_history = formatted_prompt_with_history[:600] # to avoid memory issue
135
  # print(formatted_prompt_with_history)
136
+ # messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}]
137
  # binding the system context and new prompt for LLM
138
  # the chat template structure should be based on text generation model format
139
  print("check6")
 
 
 
 
 
 
 
 
140
 
141
  terminators = [
142
  tokenizer.eos_token_id, # End-of-Sequence Token that indicates where the model should consider the text sequence to be complete
143
  tokenizer.convert_tokens_to_ids("<|eot_id|>") # Converts a token strings in a single/ sequence of integer id using the vocabulary
144
  ]
145
  # indicates the end of a sequence
146
+
147
+ output = model.create_chat_completion(messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}], max_tokens=1000, stop=["</s>"], stream=True)
148
+ print(output['choices'][0]['message']['content'])
149
+
150
+ # for output in stream:
151
  # text += output["choices"][0]["text"]
152
  # yield text
153
 
 
164
  # print("check7")
165
  # print(input_ids.dtype)
166
 
 
 
 
 
 
 
 
 
 
 
167
  # calling the model to generate response based on message/ input
168
  # do_sample if set to True uses strategies to select the next token from the probability distribution over the entire vocabulary
169
  # temperature controls randomness. more renadomness with higher temperature
 
185
  # for token in model.generate(tokens):
186
  # print(model.detokenize([token]))
187
  # input_ids = tokenizer(*messages)
188
+
189
  # print(model.generate(tensor([[ 1, 529, 29989, 5205, 29989]])))
190
  # start = time.time()
191
  # NUM_TOKENS=0
 
202
  # print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
203
 
204
 
 
 
 
 
205
 
 
 
 
 
 
 
 
 
206
 
207
 
208
  TITLE = "AI Copilot for Diabetes Patients"