lokinfey commited on
Commit
0e8d45e
·
verified ·
1 Parent(s): 3627032

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +67 -3
README.md CHANGED
@@ -1,3 +1,67 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ ---
4
+
5
+
6
+ ## **Phi-4-14B-ONNX-INT4-GPU**
7
+
8
+
9
+ <b><span style="text-decoration:underline">Note: This is unoffical version,just for test and dev.</span></b>
10
+
11
+ ### **Sample**
12
+
13
+ ```python
14
+
15
+ import onnxruntime_genai as og
16
+ import numpy as np
17
+ import os
18
+
19
+
20
+ model_folder = "Your Phi-4-14B-ONNX-INT4-GPU location"
21
+
22
+
23
+ model = og.Model(model_folder)
24
+
25
+
26
+ tokenizer = og.Tokenizer(model)
27
+ tokenizer_stream = tokenizer.create_stream()
28
+
29
+
30
+ search_options = {}
31
+ search_options['max_length'] = 2048
32
+ search_options['past_present_share_buffer'] = False
33
+
34
+
35
+ chat_template = "<|im_start|><|user|>{input}<|im_end|>\n<|assistant|>"
36
+
37
+
38
+ text = """ Explain 'The goat grazing problem'"""
39
+
40
+
41
+ prompt = f'{chat_template.format(input=text)}'
42
+
43
+
44
+ input_tokens = tokenizer.encode(prompt)
45
+
46
+
47
+ params = og.GeneratorParams(model)
48
+
49
+
50
+ params.set_search_options(**search_options)
51
+ params.input_ids = input_tokens
52
+
53
+
54
+ generator = og.Generator(model, params)
55
+
56
+
57
+ while not generator.is_done():
58
+ generator.compute_logits()
59
+ generator.generate_next_token()
60
+
61
+ new_token = generator.get_next_tokens()[0]
62
+ print(tokenizer_stream.decode(new_token), end='', flush=True)
63
+
64
+
65
+
66
+
67
+ ```