Update README.md
Browse files
README.md
CHANGED
@@ -42,7 +42,7 @@ generate_text = pipeline(
|
|
42 |
)
|
43 |
|
44 |
res = generate_text(
|
45 |
-
"
|
46 |
min_new_tokens=2,
|
47 |
max_new_tokens=256,
|
48 |
do_sample=False,
|
@@ -57,11 +57,11 @@ print(res[0]["generated_text"])
|
|
57 |
You can print a sample prompt after the preprocessing step to see how it is feed to the tokenizer:
|
58 |
|
59 |
```python
|
60 |
-
print(generate_text.preprocess("
|
61 |
```
|
62 |
|
63 |
```bash
|
64 |
-
<|prompt
|
65 |
```
|
66 |
|
67 |
Alternatively, if you prefer to not use `trust_remote_code=True` you can download [h2oai_pipeline.py](h2oai_pipeline.py), store it alongside your notebook, and construct the pipeline yourself from the loaded model and tokenizer:
|
@@ -85,7 +85,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
85 |
generate_text = H2OTextGenerationPipeline(model=model, tokenizer=tokenizer)
|
86 |
|
87 |
res = generate_text(
|
88 |
-
"
|
89 |
min_new_tokens=2,
|
90 |
max_new_tokens=256,
|
91 |
do_sample=False,
|
@@ -106,7 +106,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
106 |
model_name = "yukismd/JapaneseQuizChatbot_v1" # either local folder or huggingface model name
|
107 |
# Important: The prompt needs to be in the same format the model was trained with.
|
108 |
# You can find an example prompt in the experiment logs.
|
109 |
-
prompt = "<|prompt
|
110 |
|
111 |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
|
112 |
model = AutoModelForCausalLM.from_pretrained(model_name)
|
|
|
42 |
)
|
43 |
|
44 |
res = generate_text(
|
45 |
+
"日本で一番高い山は富士山ですが、二番目に高い山は?",
|
46 |
min_new_tokens=2,
|
47 |
max_new_tokens=256,
|
48 |
do_sample=False,
|
|
|
57 |
You can print a sample prompt after the preprocessing step to see how it is feed to the tokenizer:
|
58 |
|
59 |
```python
|
60 |
+
print(generate_text.preprocess("日本で一番高い山は富士山ですが、二番目に高い山は?")["prompt_text"])
|
61 |
```
|
62 |
|
63 |
```bash
|
64 |
+
<|prompt|>日本で一番高い山は富士山ですが、二番目に高い山は?<|endoftext|><|answer|>
|
65 |
```
|
66 |
|
67 |
Alternatively, if you prefer to not use `trust_remote_code=True` you can download [h2oai_pipeline.py](h2oai_pipeline.py), store it alongside your notebook, and construct the pipeline yourself from the loaded model and tokenizer:
|
|
|
85 |
generate_text = H2OTextGenerationPipeline(model=model, tokenizer=tokenizer)
|
86 |
|
87 |
res = generate_text(
|
88 |
+
"日本で一番高い山は富士山ですが、二番目に高い山は?",
|
89 |
min_new_tokens=2,
|
90 |
max_new_tokens=256,
|
91 |
do_sample=False,
|
|
|
106 |
model_name = "yukismd/JapaneseQuizChatbot_v1" # either local folder or huggingface model name
|
107 |
# Important: The prompt needs to be in the same format the model was trained with.
|
108 |
# You can find an example prompt in the experiment logs.
|
109 |
+
prompt = "<|prompt|>日本で一番高い山は富士山ですが、二番目に高い山は?<|endoftext|><|answer|>"
|
110 |
|
111 |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
|
112 |
model = AutoModelForCausalLM.from_pretrained(model_name)
|