Robin Genolet commited on
Commit
9b52308
·
1 Parent(s): 2bfab33

test: generation

Browse files
Files changed (1) hide show
  1. utils/epfl_meditron_utils.py +17 -15
utils/epfl_meditron_utils.py CHANGED
@@ -1,8 +1,7 @@
1
-
2
 
3
 
4
  def get_llm_response(repo, filename, model_type, gpu_layers, system_message, prompt):
5
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
6
 
7
  model_name_or_path = "TheBloke/meditron-7B-GPTQ"
8
  # To use a different branch, change revision
@@ -12,15 +11,23 @@ def get_llm_response(repo, filename, model_type, gpu_layers, system_message, pro
12
  trust_remote_code=False,
13
  revision="main")
14
 
15
- tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
16
-
 
 
 
 
 
 
 
 
 
 
17
  print("\n\n*** Generate:")
18
 
19
- #input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
20
- #output = model.generate(inputs=input_ids, temperature=0.7, do_sample=True, top_p=0.95, top_k=40, max_new_tokens=512)
21
- #print(tokenizer.decode(output[0]))
22
-
23
- # Inference can also be done using transformers' pipeline
24
 
25
  print("*** Pipeline:")
26
  pipe = pipeline(
@@ -35,12 +42,7 @@ def get_llm_response(repo, filename, model_type, gpu_layers, system_message, pro
35
  repetition_penalty=1.1
36
  )
37
 
38
- prompt_template=f'''<|im_start|>system
39
- {system_message}<|im_end|>
40
- <|im_start|>user
41
- {prompt}<|im_end|>
42
- <|im_start|>assistant
43
- '''
44
 
45
  response = pipe(prompt_template)[0]['generated_text']
46
  print(response)
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
2
 
3
 
4
  def get_llm_response(repo, filename, model_type, gpu_layers, system_message, prompt):
 
5
 
6
  model_name_or_path = "TheBloke/meditron-7B-GPTQ"
7
  # To use a different branch, change revision
 
11
  trust_remote_code=False,
12
  revision="main")
13
 
14
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
15
+
16
+ prompt_template=f'''<|im_start|>system
17
+ {system_message}<|im_end|>
18
+ <|im_start|>user
19
+ {prompt}<|im_end|>
20
+ <|im_start|>assistant
21
+ '''
22
+
23
+ print("Template:")
24
+ print(prompt_template)
25
+
26
  print("\n\n*** Generate:")
27
 
28
+ input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
29
+ output = model.generate(inputs=input_ids, temperature=0.01, do_sample=True, top_p=0.95, top_k=40, max_new_tokens=512)
30
+ print(tokenizer.decode(output[0]))
 
 
31
 
32
  print("*** Pipeline:")
33
  pipe = pipeline(
 
42
  repetition_penalty=1.1
43
  )
44
 
45
+
 
 
 
 
 
46
 
47
  response = pipe(prompt_template)[0]['generated_text']
48
  print(response)