kevin36524 commited on
Commit
73faea1
·
verified ·
1 Parent(s): 420c2e9

Upload export_smollm.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. export_smollm.py +355 -0
export_smollm.py ADDED
@@ -0,0 +1,355 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ import warnings
4
+ from typing import List, Optional, Tuple
5
+
6
+ import coremltools as ct
7
+ import numpy as np
8
+ import torch
9
+ from transformers.cache_utils import Cache
10
+ from transformers.models.llama.modeling_llama import (
11
+ LLAMA_ATTENTION_CLASSES,
12
+ LlamaAttention,
13
+ LlamaConfig,
14
+ LlamaForCausalLM,
15
+ apply_rotary_pos_emb,
16
+ repeat_kv,
17
+ )
18
+
19
+ warnings.filterwarnings("ignore")
20
+ logging.getLogger("coremltools").setLevel(logging.ERROR)
21
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
22
+
23
+ # https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3
24
+ MODEL_ID: str = "HuggingFaceTB/SmolLM-360M-Instruct"
25
+ METADATA_TOKENIZER: str = "co.huggingface.exporters.name"
26
+
27
+ class SliceUpdateKeyValueCache(Cache):
28
+ def __init__(
29
+ self,
30
+ shape: Tuple[int, ...],
31
+ device="cpu",
32
+ dtype=torch.float32,
33
+ ) -> None:
34
+ """KV cache of shape (#layers, batch_size, #kv_heads, context_size, head_dim)."""
35
+ super().__init__()
36
+ self.past_seen_tokens: int = 0
37
+ self.k_cache: torch.Tensor = torch.zeros(shape, dtype=dtype, device=device)
38
+ self.v_cache: torch.Tensor = torch.zeros(shape, dtype=dtype, device=device)
39
+ self.max_length: int = shape[3] # context_size dimension
40
+
41
+ def update(
42
+ self,
43
+ k_state: torch.Tensor,
44
+ v_state: torch.Tensor,
45
+ layer_idx: int,
46
+ slice_indices: torch.LongTensor,
47
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
48
+ """
49
+ Update key/value cache tensors for slice [slice_indices[0], slice_indices[1]).
50
+ Return slice of key/value cache tensors from [0, slice_indices[1]).
51
+ """
52
+ if len(slice_indices) != 2:
53
+ raise ValueError(f"Expect tuple of integers [start, end), got {slice_indices=}.")
54
+ begin, end = slice_indices
55
+ self.k_cache[layer_idx, :, : k_state.shape[1], begin:end, :] = k_state
56
+ self.v_cache[layer_idx, :, : v_state.shape[1], begin:end, :] = v_state
57
+ k_cache: torch.Tensor = self.k_cache[layer_idx, :, :, :end, :]
58
+ v_cache: torch.Tensor = self.v_cache[layer_idx, :, :, :end, :]
59
+ return k_cache, v_cache
60
+
61
+ def get_seq_length(self, _: int | None = 0) -> int:
62
+ """Get the sequence length of the cache."""
63
+ return self.past_seen_tokens
64
+
65
+ def get_max_length(self) -> Optional[int]:
66
+ """Returns the maximum sequence length of the cached states, if there is any."""
67
+ return None
68
+
69
+
70
+ class SliceUpdateLlamaAttention(LlamaAttention):
71
+ def __init__(self, config: LlamaConfig, layer_idx: Optional[int] = None):
72
+ super().__init__(config=config, layer_idx=layer_idx)
73
+
74
+ @torch.no_grad()
75
+ def forward(
76
+ self,
77
+ hidden_states: torch.Tensor,
78
+ attention_mask: torch.Tensor,
79
+ position_ids: Optional[torch.LongTensor] = None,
80
+ past_key_value: Optional[Cache] = None,
81
+ **kwargs,
82
+ ) -> Tuple[torch.Tensor | None, ...]:
83
+ bsz, q_len, _ = hidden_states.size()
84
+
85
+ query_states = self.q_proj(hidden_states)
86
+ key_states = self.k_proj(hidden_states)
87
+ value_states = self.v_proj(hidden_states)
88
+ #KEVINDEBUG query_states:torch.Size([1, 5, 896]) key_states:torch.Size([1, 5, 128]) value_states:torch.Size([1, 5, 128])
89
+
90
+ query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
91
+ key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(
92
+ 1, 2
93
+ )
94
+ value_states = value_states.view(
95
+ bsz, q_len, self.num_key_value_heads, self.head_dim
96
+ ).transpose(1, 2)
97
+
98
+ cos, sin = self.rotary_emb(value_states, position_ids)
99
+ query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)
100
+
101
+
102
+ # Slice update key/value cache
103
+ end_step = attention_mask.shape[-1]
104
+ key_states, value_states = past_key_value.update(
105
+ key_states,
106
+ value_states,
107
+ self.layer_idx,
108
+ slice_indices=(end_step - q_len - 1, end_step - 1),
109
+ )
110
+
111
+ key_states = repeat_kv(key_states, self.num_key_value_groups)
112
+ value_states = repeat_kv(value_states, self.num_key_value_groups)
113
+
114
+ causal_mask = None
115
+ if q_len > 1:
116
+ L, S = query_states.size(-2), key_states.size(-2)
117
+ causal_mask = torch.ones(L, S, dtype=torch.bool).tril(diagonal=0)
118
+
119
+ #print(f"KEVINDEBUG query_states:{query_states.shape} key_states:{key_states.shape} value_states:{value_states.shape} causal_mask:{causal_mask}")
120
+ attn_output = torch.nn.functional.scaled_dot_product_attention(
121
+ query_states,
122
+ key_states,
123
+ value_states,
124
+ attn_mask=causal_mask,
125
+ dropout_p=0.0,
126
+ is_causal=False,
127
+ )
128
+
129
+ attn_output = attn_output.transpose(1, 2).contiguous()
130
+ attn_output = attn_output.view(bsz, q_len, self.hidden_size)
131
+ attn_output = self.o_proj(attn_output)
132
+
133
+ return attn_output, None, None
134
+
135
+
136
+ class StatefulLlamaForCausalLM(torch.nn.Module):
137
+ def __init__(self, model_path: str, max_context_size: int = 2048, batch_size: int = 1) -> None:
138
+ super().__init__()
139
+
140
+ # Custom attention implementation for stateful slice update key/value cache, override
141
+ # "sdpa" to compliance with transformers.modeling_utils._autoset_attn_implementation
142
+ LLAMA_ATTENTION_CLASSES["sdpa"] = SliceUpdateLlamaAttention
143
+ self.model = LlamaForCausalLM.from_pretrained(model_path)
144
+
145
+ # Register KV cache buffers to be recognized as Core ML states
146
+ config: LlamaConfig = self.model.config
147
+ self.kv_cache_shape: Tuple[int, ...] = (
148
+ config.num_hidden_layers,
149
+ batch_size,
150
+ config.num_key_value_heads,
151
+ max_context_size,
152
+ config.hidden_size // config.num_attention_heads,
153
+ )
154
+ self.kv_cache = SliceUpdateKeyValueCache(shape=self.kv_cache_shape)
155
+ self.register_buffer("keyCache", self.kv_cache.k_cache)
156
+ self.register_buffer("valueCache", self.kv_cache.v_cache)
157
+
158
+ @torch.no_grad()
159
+ def forward(
160
+ self,
161
+ input_ids: torch.LongTensor,
162
+ causal_mask: torch.Tensor,
163
+ ) -> torch.Tensor:
164
+ # Compute past seen tokens used for updating key/value cache slices
165
+ self.kv_cache.past_seen_tokens = causal_mask.shape[-1] - input_ids.shape[-1]
166
+ return self.model(
167
+ input_ids,
168
+ attention_mask=causal_mask,
169
+ past_key_values=self.kv_cache,
170
+ use_cache=True,
171
+ ).logits
172
+
173
+ def generate() -> None:
174
+ # Construct model from transformers and trace to TorchScript
175
+ max_context_size: int = 2048
176
+ torch_model = StatefulLlamaForCausalLM(MODEL_ID, max_context_size=max_context_size)
177
+ torch_model.eval()
178
+
179
+ # Decode output tokens using the tokenizer
180
+ from transformers import AutoTokenizer
181
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
182
+ #initial_prompt = "Write a christmas Carol"
183
+ initial_prompt = "Write a poem on Apple "
184
+
185
+ input_ids = tokenizer(initial_prompt, return_tensors='pt').input_ids
186
+ causal_mask: torch.Tensor = torch.ones((1, 1, 1, input_ids.shape[-1] + 1), dtype=torch.float32)
187
+
188
+ # Set the output length
189
+ output_length = 20
190
+
191
+ is_first_run = True
192
+
193
+ # Initialize the output tensor
194
+ output_tokens = input_ids
195
+
196
+ # Loop until the desired output length is reached
197
+ while output_tokens.shape[-1] < output_length + input_ids.shape[-1]:
198
+ # Compute the past seen tokens used for updating key/value cache slices
199
+ #torch_model.kv_cache.past_seen_tokens = causal_mask.shape[-1] - output_tokens.shape[-1]
200
+
201
+ # Get the model output
202
+ model_inp = output_tokens[:, -1:]
203
+ if is_first_run:
204
+ model_inp = input_ids
205
+ is_first_run = False
206
+ #print(f"KEVINDEBUG model_inp: {model_inp} causal_mask: {causal_mask}")
207
+ output = torch_model(model_inp, causal_mask) # Start with a sub-squence that long so need multiple previous when size only very lwo larger later same past arg a so try keeping right padded!
208
+
209
+ # Get the most likely token IDs
210
+ output_ids = torch.argmax(output, dim=-1)
211
+
212
+ # Append the generated token IDs to the output tensor
213
+ output_tokens = torch.cat((output_tokens, output_ids[:, -1, None]), dim=-1)
214
+ #print(f"KEVINDEBUG output_tokens: {output_tokens}")
215
+
216
+ # Update the causal mask
217
+ causal_mask = torch.ones((1, 1, 1, output_tokens.shape[-1] + 1), dtype=torch.float32)
218
+
219
+ decoded_output = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
220
+ print(f"input : {tokenizer.decode(input_ids[0])} output: {decoded_output}")
221
+
222
+ def export() -> None:
223
+ # Construct model from transformers and trace to TorchScript
224
+ max_context_size: int = 2048
225
+ torch_model = StatefulLlamaForCausalLM(MODEL_ID, max_context_size=max_context_size)
226
+ torch_model.eval()
227
+ input_ids: torch.Tensor = torch.tensor([[19161, 253, 8216, 335, 10910, 216]], dtype=torch.int32)
228
+ #input_ids: torch.Tensor = torch.tensor([[ 11 ]], dtype=torch.int32)
229
+ causal_mask: torch.Tensor = torch.ones((1, 1, 1, 7), dtype=torch.float32)
230
+ traced_model = torch.jit.trace(torch_model, [input_ids, causal_mask])
231
+
232
+ # Convert traced TorchScript to Core ML format
233
+ query_length = ct.RangeDim(lower_bound=1, upper_bound=max_context_size, default=1)
234
+ end_step_dim = ct.RangeDim(lower_bound=1, upper_bound=max_context_size, default=1)
235
+ inputs: List[ct.TensorType] = [
236
+ ct.TensorType(shape=(1, query_length), dtype=np.int32, name="inputIds"),
237
+ ct.TensorType(
238
+ shape=(1, 1, query_length, end_step_dim),
239
+ dtype=np.float16,
240
+ name="causalMask",
241
+ ),
242
+ ]
243
+ outputs: List[ct.TensorType] = [ct.TensorType(dtype=np.float16, name="logits")]
244
+ states: List[ct.StateType] = [
245
+ ct.StateType(
246
+ wrapped_type=ct.TensorType(shape=torch_model.kv_cache_shape, dtype=np.float16),
247
+ name="keyCache",
248
+ ),
249
+ ct.StateType(
250
+ wrapped_type=ct.TensorType(shape=torch_model.kv_cache_shape, dtype=np.float16),
251
+ name="valueCache",
252
+ ),
253
+ ]
254
+
255
+ # Convert model with FP16 precision
256
+ mlmodel_fp16: ct.MLModel = ct.convert(
257
+ traced_model,
258
+ inputs=inputs,
259
+ outputs=outputs,
260
+ states=states,
261
+ minimum_deployment_target=ct.target.iOS18,
262
+ skip_model_load=True,
263
+ )
264
+ mlmodel_fp16._spec.description.metadata.userDefined.update({METADATA_TOKENIZER: MODEL_ID})
265
+ mlmodel_fp16.save("StatefulSmolLM_360M_InstructFP16.mlpackage")
266
+
267
+ # Block-wise quantize model weights to int4
268
+ op_config = ct.optimize.coreml.OpLinearQuantizerConfig(
269
+ mode="linear_symmetric",
270
+ dtype="int4",
271
+ granularity="per_block",
272
+ block_size=32,
273
+ )
274
+ config = ct.optimize.coreml.OptimizationConfig(global_config=op_config)
275
+ mlmodel_int4 = ct.optimize.coreml.linear_quantize_weights(mlmodel_fp16, config=config)
276
+ mlmodel_int4._spec.description.metadata.userDefined.update({METADATA_TOKENIZER: MODEL_ID})
277
+ mlmodel_int4.save("StatefulSmolLM_360M_InstructInt4.mlpackage")
278
+
279
+
280
+ if __name__ == "__main__":
281
+ export()
282
+
283
+ ###
284
+ #KEVINDEBUG model_inp: tensor([[ 8420, 374, 264, 32794, 911, 60249]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1.]]]])
285
+ #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11]])
286
+ #KEVINDEBUG model_inp: tensor([[11]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1.]]]])
287
+ #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689]])
288
+ #KEVINDEBUG model_inp: tensor([[17689]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
289
+ #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13]])
290
+ #KEVINDEBUG model_inp: tensor([[13]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
291
+ #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084]])
292
+ #KEVINDEBUG model_inp: tensor([[1084]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
293
+ #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
294
+ # 374]])
295
+ #KEVINDEBUG model_inp: tensor([[374]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
296
+ #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
297
+ # 374, 264]])
298
+ #KEVINDEBUG model_inp: tensor([[264]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
299
+ #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
300
+ # 374, 264, 32794]])
301
+ #KEVINDEBUG model_inp: tensor([[32794]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
302
+ #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
303
+ # 374, 264, 32794, 911]])
304
+ #KEVINDEBUG model_inp: tensor([[911]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
305
+ #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
306
+ # 374, 264, 32794, 911, 279]])
307
+ #KEVINDEBUG model_inp: tensor([[279]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
308
+ #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
309
+ # 374, 264, 32794, 911, 279, 3283]])
310
+ #KEVINDEBUG model_inp: tensor([[3283]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
311
+ #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
312
+ # 374, 264, 32794, 911, 279, 3283, 315]])
313
+ #KEVINDEBUG model_inp: tensor([[315]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
314
+ # 1.]]]])
315
+ #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
316
+ # 374, 264, 32794, 911, 279, 3283, 315, 60249]])
317
+ #KEVINDEBUG model_inp: tensor([[60249]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
318
+ # 1., 1.]]]])
319
+ #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
320
+ # 374, 264, 32794, 911, 279, 3283, 315, 60249, 11]])
321
+ #KEVINDEBUG model_inp: tensor([[11]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
322
+ # 1., 1., 1.]]]])
323
+ #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
324
+ # 374, 264, 32794, 911, 279, 3283, 315, 60249, 11, 17689]])
325
+ #KEVINDEBUG model_inp: tensor([[17689]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
326
+ # 1., 1., 1., 1.]]]])
327
+ #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
328
+ # 374, 264, 32794, 911, 279, 3283, 315, 60249, 11, 17689,
329
+ # 13]])
330
+ #KEVINDEBUG model_inp: tensor([[13]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
331
+ # 1., 1., 1., 1., 1.]]]])
332
+ #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
333
+ # 374, 264, 32794, 911, 279, 3283, 315, 60249, 11, 17689,
334
+ # 13, 576]])
335
+ #KEVINDEBUG model_inp: tensor([[576]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
336
+ # 1., 1., 1., 1., 1., 1.]]]])
337
+ #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
338
+ # 374, 264, 32794, 911, 279, 3283, 315, 60249, 11, 17689,
339
+ # 13, 576, 32794]])
340
+ #KEVINDEBUG model_inp: tensor([[32794]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
341
+ # 1., 1., 1., 1., 1., 1., 1.]]]])
342
+ #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
343
+ # 374, 264, 32794, 911, 279, 3283, 315, 60249, 11, 17689,
344
+ # 13, 576, 32794, 374]])
345
+ #KEVINDEBUG model_inp: tensor([[374]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
346
+ # 1., 1., 1., 1., 1., 1., 1., 1.]]]])
347
+ #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
348
+ # 374, 264, 32794, 911, 279, 3283, 315, 60249, 11, 17689,
349
+ # 13, 576, 32794, 374, 5326]])
350
+ #KEVINDEBUG model_inp: tensor([[5326]]) causal_mask: tensor([[[[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
351
+ # 1., 1., 1., 1., 1., 1., 1., 1., 1.]]]])
352
+ #KEVINDEBUG output_tokens: tensor([[ 8420, 374, 264, 32794, 911, 60249, 11, 17689, 13, 1084,
353
+ # 374, 264, 32794, 911, 279, 3283, 315, 60249, 11, 17689,
354
+ # 13, 576, 32794, 374, 5326, 304]])
355
+ #input : Here is a poem about Valencia output: Here is a poem about Valencia, Spain. It is a poem about the city of Valencia, Spain. The poem is written in