Update README.md
Browse files
README.md
CHANGED
@@ -3,7 +3,7 @@
|
|
3 |
---
|
4 |
|
5 |
Deployment:
|
6 |
-
```
|
7 |
build_commands: []
|
8 |
external_package_dirs: []
|
9 |
model_metadata: {}
|
@@ -31,10 +31,50 @@ trt_llm:
|
|
31 |
source: HF
|
32 |
repo: "baseten/example-Meta-Llama-3-70B-InstructForSequenceClassification"
|
33 |
revision: "main" # hf revision hash
|
|
|
34 |
quantization_type: fp8
|
35 |
num_builder_gpus: 4
|
36 |
```
|
37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
Reproduce this model:
|
39 |
```python
|
40 |
#!/usr/bin/env python
|
@@ -101,5 +141,4 @@ def main():
|
|
101 |
|
102 |
if __name__ == "__main__":
|
103 |
main()
|
104 |
-
|
105 |
```
|
|
|
3 |
---
|
4 |
|
5 |
Deployment:
|
6 |
+
```yaml
|
7 |
build_commands: []
|
8 |
external_package_dirs: []
|
9 |
model_metadata: {}
|
|
|
31 |
source: HF
|
32 |
repo: "baseten/example-Meta-Llama-3-70B-InstructForSequenceClassification"
|
33 |
revision: "main" # hf revision hash
|
34 |
+
# `fp8` or `no_quant` (=fp16) are allowed.
|
35 |
quantization_type: fp8
|
36 |
num_builder_gpus: 4
|
37 |
```
|
38 |
|
39 |
+
Usage:
|
40 |
+
```python
|
41 |
+
import requests
|
42 |
+
import os
|
43 |
+
from transformers import AutoTokenizer
|
44 |
+
|
45 |
+
tokenizer = AutoTokenizer.from_pretrained("Skywork/Skywork-Reward-Llama-3.1-8B-v0.2")
|
46 |
+
|
47 |
+
prompt = "Jane has 12 apples. She gives 4 apples to her friend Mark, then buys 1 more apple, and finally splits all her apples equally among herself and her 2 siblings. How many apples does each person get?"
|
48 |
+
# Positive example, gets high score 0.999 or raw around inv_sig(0.999) ~ 13
|
49 |
+
response1 = "1. Jane starts with 12 apples and gives 4 to Mark. 12 - 4 = 8. Jane now has 8 apples.\n2. Jane buys 1 more apple. 8 + 1 = 9. Jane now has 9 apples.\n3. Jane splits the 9 apples equally among herself and her 2 siblings (3 people in total). 9 ÷ 3 = 3 apples each. Each person gets 3 apples."
|
50 |
+
# negative example, gets low score ~0.001 or raw around inv_sig(0.001) ~ -9
|
51 |
+
response2 = "1. Jane starts with 12 apples and gives 4 to Mark. 12 - 4 = 8. Jane now has 8 apples.\n2. Jane buys 1 more apple. 8 + 1 = 9. Jane now has 9 apples.\n3. Jane splits the 9 apples equally among her 2 siblings (2 people in total). 9 ÷ 2 = 4.5 apples each. Each person gets 4 apples."
|
52 |
+
|
53 |
+
# predict api: {
|
54 |
+
# "inputs": "What is Deep Learning?", # str, may be formatted with chat template.
|
55 |
+
# "raw_scores": false, # with or without sigmoid activation
|
56 |
+
# "truncate": false,
|
57 |
+
# "truncation_direction": "right"
|
58 |
+
# }
|
59 |
+
|
60 |
+
for assistant_response in [response1, response2]:
|
61 |
+
# Feel free to parallelize this, requests will be batched in the backend.
|
62 |
+
|
63 |
+
conv = [{"role": "user", "content": prompt}, {"role": "assistant", "content": assistant_response}]
|
64 |
+
conv_formatted = tokenizer.apply_chat_template(conv, tokenize=False)
|
65 |
+
input_json = dict(inputs=conv_formatted, raw_scores=True)
|
66 |
+
resp = requests.post(
|
67 |
+
"https://model-xxxxxx.api.baseten.co/environments/production/sync/predict",
|
68 |
+
headers={"Authorization": f"Api-Key {os.environ['BASETEN_API_KEY']}"},
|
69 |
+
json=input_json,
|
70 |
+
)
|
71 |
+
|
72 |
+
print(resp.json())
|
73 |
+
# prints
|
74 |
+
# [{'score': 13.714337, 'label': 'LABEL_0'}]
|
75 |
+
# [{'score': -9.353895, 'label': 'LABEL_0'}]
|
76 |
+
```
|
77 |
+
|
78 |
Reproduce this model:
|
79 |
```python
|
80 |
#!/usr/bin/env python
|
|
|
141 |
|
142 |
if __name__ == "__main__":
|
143 |
main()
|
|
|
144 |
```
|