Commit
·
9a96e2c
1
Parent(s):
2bd7827
updated README.
Browse files
README.md
CHANGED
@@ -73,7 +73,7 @@ token_indices = torch.load(indices_path)
|
|
73 |
inv_token_indices = torch.load(inv_indices_path)
|
74 |
|
75 |
ids_32k = tokenizer32k.encode('This is an example sentence.')
|
76 |
-
ids_128k = [token_indices[id] for id in ids_32k]
|
77 |
print(f'{ids_32k=}')
|
78 |
print(f'{ids_128k=}')
|
79 |
|
@@ -81,7 +81,7 @@ print(tokenizer128k.decode(ids_128k))
|
|
81 |
|
82 |
|
83 |
ids_128k = tokenizer128k.encode('This is another example sentence.')
|
84 |
-
ids_32k = [inv_token_indices[id] for id in ids_128k]
|
85 |
print(f'{ids_128k=}')
|
86 |
print(f'{ids_32k=}') # non-exist tokens in 32k vocab will map to -1
|
87 |
|
|
|
73 |
inv_token_indices = torch.load(inv_indices_path)
|
74 |
|
75 |
ids_32k = tokenizer32k.encode('This is an example sentence.')
|
76 |
+
ids_128k = [token_indices[id].item() for id in ids_32k]
|
77 |
print(f'{ids_32k=}')
|
78 |
print(f'{ids_128k=}')
|
79 |
|
|
|
81 |
|
82 |
|
83 |
ids_128k = tokenizer128k.encode('This is another example sentence.')
|
84 |
+
ids_32k = [inv_token_indices[id].item() for id in ids_128k]
|
85 |
print(f'{ids_128k=}')
|
86 |
print(f'{ids_32k=}') # non-exist tokens in 32k vocab will map to -1
|
87 |
|