michaelfeil commited on
Commit
0e24490
·
verified ·
1 Parent(s): 6b53f65

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +41 -2
README.md CHANGED
@@ -3,7 +3,7 @@
3
  ---
4
 
5
  Deployment:
6
- ```
7
  build_commands: []
8
  external_package_dirs: []
9
  model_metadata: {}
@@ -31,10 +31,50 @@ trt_llm:
31
  source: HF
32
  repo: "baseten/example-Meta-Llama-3-70B-InstructForSequenceClassification"
33
  revision: "main" # hf revision hash
 
34
  quantization_type: fp8
35
  num_builder_gpus: 4
36
  ```
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  Reproduce this model:
39
  ```python
40
  #!/usr/bin/env python
@@ -101,5 +141,4 @@ def main():
101
 
102
  if __name__ == "__main__":
103
  main()
104
-
105
  ```
 
3
  ---
4
 
5
  Deployment:
6
+ ```yaml
7
  build_commands: []
8
  external_package_dirs: []
9
  model_metadata: {}
 
31
  source: HF
32
  repo: "baseten/example-Meta-Llama-3-70B-InstructForSequenceClassification"
33
  revision: "main" # hf revision hash
34
+ # `fp8` or `no_quant` (=fp16) are allowed.
35
  quantization_type: fp8
36
  num_builder_gpus: 4
37
  ```
38
 
39
+ Usage:
40
+ ```python
41
+ import requests
42
+ import os
43
+ from transformers import AutoTokenizer
44
+
45
+ tokenizer = AutoTokenizer.from_pretrained("Skywork/Skywork-Reward-Llama-3.1-8B-v0.2")
46
+
47
+ prompt = "Jane has 12 apples. She gives 4 apples to her friend Mark, then buys 1 more apple, and finally splits all her apples equally among herself and her 2 siblings. How many apples does each person get?"
48
+ # Positive example, gets high score 0.999 or raw around inv_sig(0.999) ~ 13
49
+ response1 = "1. Jane starts with 12 apples and gives 4 to Mark. 12 - 4 = 8. Jane now has 8 apples.\n2. Jane buys 1 more apple. 8 + 1 = 9. Jane now has 9 apples.\n3. Jane splits the 9 apples equally among herself and her 2 siblings (3 people in total). 9 ÷ 3 = 3 apples each. Each person gets 3 apples."
50
+ # negative example, gets low score ~0.001 or raw around inv_sig(0.001) ~ -9
51
+ response2 = "1. Jane starts with 12 apples and gives 4 to Mark. 12 - 4 = 8. Jane now has 8 apples.\n2. Jane buys 1 more apple. 8 + 1 = 9. Jane now has 9 apples.\n3. Jane splits the 9 apples equally among her 2 siblings (2 people in total). 9 ÷ 2 = 4.5 apples each. Each person gets 4 apples."
52
+
53
+ # predict api: {
54
+ # "inputs": "What is Deep Learning?", # str, may be formatted with chat template.
55
+ # "raw_scores": false, # with or without sigmoid activation
56
+ # "truncate": false,
57
+ # "truncation_direction": "right"
58
+ # }
59
+
60
+ for assistant_response in [response1, response2]:
61
+ # Feel free to parallelize this, requests will be batched in the backend.
62
+
63
+ conv = [{"role": "user", "content": prompt}, {"role": "assistant", "content": assistant_response}]
64
+ conv_formatted = tokenizer.apply_chat_template(conv, tokenize=False)
65
+ input_json = dict(inputs=conv_formatted, raw_scores=True)
66
+ resp = requests.post(
67
+ "https://model-xxxxxx.api.baseten.co/environments/production/sync/predict",
68
+ headers={"Authorization": f"Api-Key {os.environ['BASETEN_API_KEY']}"},
69
+ json=input_json,
70
+ )
71
+
72
+ print(resp.json())
73
+ # prints
74
+ # [{'score': 13.714337, 'label': 'LABEL_0'}]
75
+ # [{'score': -9.353895, 'label': 'LABEL_0'}]
76
+ ```
77
+
78
  Reproduce this model:
79
  ```python
80
  #!/usr/bin/env python
 
141
 
142
  if __name__ == "__main__":
143
  main()
 
144
  ```