k-l-lambda
/

Llama-3.2-1B-vocab32k

Text Generation

text-generation-inference

Model card Files Files and versions Community

k-l-lambda commited on Nov 21, 2024

Commit

9a96e2c

·

1 Parent(s): 2bd7827

updated README.

Files changed (1) hide show

README.md +2 -2

README.md CHANGED Viewed

@@ -73,7 +73,7 @@ token_indices = torch.load(indices_path)
 inv_token_indices = torch.load(inv_indices_path)
 ids_32k = tokenizer32k.encode('This is an example sentence.')
-ids_128k = [token_indices[id] for id in ids_32k]
 print(f'{ids_32k=}')
 print(f'{ids_128k=}')
@@ -81,7 +81,7 @@ print(tokenizer128k.decode(ids_128k))
 ids_128k = tokenizer128k.encode('This is another example sentence.')
-ids_32k = [inv_token_indices[id] for id in ids_128k]
 print(f'{ids_128k=}')
 print(f'{ids_32k=}')	# non-exist tokens in 32k vocab will map to -1

 inv_token_indices = torch.load(inv_indices_path)
 ids_32k = tokenizer32k.encode('This is an example sentence.')
+ids_128k = [token_indices[id].item() for id in ids_32k]
 print(f'{ids_32k=}')
 print(f'{ids_128k=}')
 ids_128k = tokenizer128k.encode('This is another example sentence.')
+ids_32k = [inv_token_indices[id].item() for id in ids_128k]
 print(f'{ids_128k=}')
 print(f'{ids_32k=}')	# non-exist tokens in 32k vocab will map to -1