Spaces:

Namitg02
/

Test

Runtime error

App Files Files Community

Namitg02 commited on Jun 20, 2024

Commit

7a38f19

verified ·

1 Parent(s): b84e0a2

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -37

app.py CHANGED Viewed

@@ -143,50 +143,23 @@ def talk(prompt, history):
       tokenizer.convert_tokens_to_ids("<|eot_id|>") # Converts a token strings in a single/ sequence of integer id using the vocabulary
     ]
 # indicates the end of a sequence
-    output = model.create_chat_completion(messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}], max_tokens=1000, stop=["</s>"], stream=False)
-    print(output['choices'][0]['message']['content'])
-#   for output in stream:
 #        text += output["choices"][0]["text"]
 #        yield text
-#    model_input = model.create_chat_completion(messages = messages)
- #   input_ids = tokenizer.apply_chat_template(
- #     messages,
- #     add_generation_prompt=True,
- #     return_tensors="pt"
- #    )
     # preparing tokens for model input
     # add_generation_prompt argument tells the template to add tokens that indicate the start of a bot response
-#    print(input_ids)
-#    print("check7")
-#    print(input_ids.dtype)
     # calling the model to generate response based on message/ input
     # do_sample if set to True uses strategies to select the next token from the probability distribution over the entire vocabulary
     # temperature controls randomness. more renadomness with higher temperature
     # only the tokens comprising the top_p probability mass are considered for responses
     # This output is a data structure containing all the information returned by generate(), but that can also be used as tuple or dictionary.
- #
- #   print("check10")
- #   t = Thread(target=model.generate, kwargs=generate_kwargs)
-    # to process multiple instances
- #   t.start()
-  #  print("check11")
-    # start a thread
-#    outputs = []
-#    outputs = model_input
-#    return outputs
-#    print(model.tokenize(messages))
-#    tokens = model.tokenize(messages)
-#    for token in model.generate(tokens):
-#       print(model.detokenize([token]))
- #   input_ids = tokenizer(*messages)
-#    print(model.generate(tensor([[    1,   529, 29989,  5205, 29989]])))
 #    start = time.time()
 #    NUM_TOKENS=0
 #    print('-'*4+'Start Generation'+'-'*4)
@@ -200,10 +173,7 @@ def talk(prompt, history):
  #   print(f'Time for complete generation: {time_generate}s')
  #   print(f'Tokens per secound: {NUM_TOKENS/time_generate}')
  #   print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
 TITLE = "AI Copilot for Diabetes Patients"

       tokenizer.convert_tokens_to_ids("<|eot_id|>") # Converts a token strings in a single/ sequence of integer id using the vocabulary
     ]
 # indicates the end of a sequence
+    import pprint
+    stream = model.create_chat_completion(messages = [{"role":"system","content":SYS_PROMPT},{"role":"user","content":formatted_prompt}], max_tokens=1000, stop=["</s>"], stream=True)
+#   print(output['choices'][0]['message']['content'])
+    pprint.pprint(stream)
+#    for output in stream:
 #        text += output["choices"][0]["text"]
 #        yield text
     # preparing tokens for model input
     # add_generation_prompt argument tells the template to add tokens that indicate the start of a bot response
     # calling the model to generate response based on message/ input
     # do_sample if set to True uses strategies to select the next token from the probability distribution over the entire vocabulary
     # temperature controls randomness. more renadomness with higher temperature
     # only the tokens comprising the top_p probability mass are considered for responses
     # This output is a data structure containing all the information returned by generate(), but that can also be used as tuple or dictionary.
 #    start = time.time()
 #    NUM_TOKENS=0
 #    print('-'*4+'Start Generation'+'-'*4)
  #   print(f'Time for complete generation: {time_generate}s')
  #   print(f'Tokens per secound: {NUM_TOKENS/time_generate}')
  #   print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
 TITLE = "AI Copilot for Diabetes Patients"