Upload 3 files

Browse files

Implemented processor.

Files changed (3) hide show

config.json +7 -5
modeling_qwen2.py +175 -107
processing_qwen2_ts.py +171 -0

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "/mnt/bn/mllmhl/sft_checkpoints/qwen2.5-14b-ts-explaints-1124-stage1-sp/checkpoint-400",
   "architectures": [
     "Qwen2TSForCausalLM"
   ],
@@ -7,7 +7,8 @@
   "auto_map": {
     "AutoConfig": "configuration_qwen2.Qwen2TSConfig",
     "AutoModel": "modeling_qwen2.Qwen2TSForCausalLM",
-    "AutoModelForCausalLM": "modeling_qwen2.Qwen2TSForCausalLM"
   },
   "bos_token_id": 151643,
   "eos_token_id": 151645,
@@ -33,10 +34,11 @@
     "hidden_size": 5120,
     "num_features": 2,
     "num_layers": 5,
-    "patch_size": 16
   },
-  "ts_token_end_index": 151665,
-  "ts_token_start_index": 151666,
   "use_cache": false,
   "use_sliding_window": false,
   "vocab_size": 152064

 {
+  "_name_or_path": "chatts_release",
   "architectures": [
     "Qwen2TSForCausalLM"
   ],
   "auto_map": {
     "AutoConfig": "configuration_qwen2.Qwen2TSConfig",
     "AutoModel": "modeling_qwen2.Qwen2TSForCausalLM",
+    "AutoModelForCausalLM": "modeling_qwen2.Qwen2TSForCausalLM",
+    "AutoProcessor": "processing_qwen2_ts.Qwen2TSProcessor"
   },
   "bos_token_id": 151643,
   "eos_token_id": 151645,
     "hidden_size": 5120,
     "num_features": 2,
     "num_layers": 5,
+    "patch_size": 16,
+    "max_length": 2048
   },
+  "ts_token_end_index": 151666,
+  "ts_token_start_index": 151665,
   "use_cache": false,
   "use_sliding_window": false,
   "vocab_size": 152064

modeling_qwen2.py CHANGED Viewed

@@ -26,7 +26,7 @@
 import inspect
 import math
 import copy
-from typing import List, Optional, Tuple, Union
 from dataclasses import dataclass
 import torch
@@ -68,6 +68,44 @@ _CHECKPOINT_FOR_DOC = "Qwen/Qwen2-7B-beta"
 _CONFIG_FOR_DOC = "Qwen2TSConfig"
 ########################Naive TS Embedding#####################
 class TimeSeriesEmbedding(nn.Module):
     def __init__(self, config):
@@ -1187,147 +1225,127 @@ class Qwen2TSForCausalLM(Qwen2PreTrainedModel):
     def get_decoder(self):
         return self.model
-    def _get_real_length(self, timeseries, input_ids):
-        # Return the embed length after inserting timeseries features
-        if timeseries is None:
-            return input_ids.size(1)
-        num_time_steps = timeseries.size(1) * timeseries.size(2) // self.config.ts['num_features']
-        num_patches = num_time_steps // self.config.ts['patch_size']
-        special_ts_token_mask_start = input_ids == self.config.ts_token_start_index
-        num_special_ts_tokens = torch.sum(special_ts_token_mask_start, dim=-1)
-        return num_special_ts_tokens * (num_patches - 2) + input_ids.size(1)
-    def _get_original_length(self, timeseries, input_ids, past_length):
-        if timeseries is None:
-            if isinstance(past_length, int):
-                original_length = torch.full((input_ids.size(0),), past_length, dtype=torch.long, device=input_ids.device)
-            else:
-                original_length = past_length
-            num_special_ts_tokens_within_past = torch.zeros(input_ids.size(0), dtype=torch.long, device=input_ids.device)
-            return original_length, num_special_ts_tokens_within_past
-        patch_size = self.config.ts['patch_size']
-        num_patches = timeseries.size(1) * timeseries.size(2) // patch_size // self.config.ts['num_features']
-        ts_token_start_index = self.config.ts_token_start_index
-        ts_mask = (input_ids == ts_token_start_index).long()  # (batch_size, seq_length)
-        cumsum_ts = torch.cumsum(ts_mask, dim=1)  # (batch_size, seq_length)
-        seq_length = input_ids.size(1)
-        positions = torch.arange(1, seq_length + 1, device=input_ids.device).unsqueeze(0).expand_as(input_ids)  # (batch_size, seq_length)
-        transformed_length = positions + cumsum_ts * (num_patches - 2)  # (batch_size, seq_length)
-        if isinstance(past_length, int):
-            past_length_tensor = torch.full((input_ids.size(0),), past_length, dtype=torch.long, device=input_ids.device)
-        else:
-            past_length_tensor = past_length.to(input_ids.device)
-        mask = transformed_length <= past_length_tensor.unsqueeze(1)  # (batch_size, seq_length)
-        original_length = torch.sum(mask, dim=1)  # (batch_size,)
-        original_positions = torch.arange(1, seq_length + 1, device=input_ids.device).unsqueeze(0).expand_as(input_ids)  # (batch_size, seq_length)
-        original_mask = original_positions <= original_length.unsqueeze(1)  # (batch_size, seq_length)
-        ts_within_original_mask = ts_mask.bool() & original_mask.bool()  # (batch_size, seq_length)
-        num_special_ts_tokens_within_past = torch.sum(ts_within_original_mask, dim=1)  # (batch_size,)
-        original_length = torch.clamp(original_length, min=0)
-        return original_length, num_special_ts_tokens_within_past
     def _merge_input_ids_with_time_series_features(
-        self, time_series_features, inputs_embeds, input_ids, attention_mask, labels, patch_cnt
-    ):
-        total_time_steps, embed_dim = time_series_features.shape
         batch_size, sequence_length = input_ids.shape
         left_padding = False
         # 1. Create a mask to know where special time series tokens are
         special_ts_token_mask_start = input_ids == self.config.ts_token_start_index
         special_ts_token_mask_end = input_ids == self.config.ts_token_end_index
         special_ts_token_mask = special_ts_token_mask_start | special_ts_token_mask_end
         num_special_ts_tokens = torch.sum(special_ts_token_mask_start, dim=-1)
         # Correctly calculate the total number of patches per batch
         num_total_patches = torch.zeros(batch_size, dtype=patch_cnt.dtype, device=patch_cnt.device)
         special_ts_token_mask_start_nonzero = special_ts_token_mask_start.nonzero()
         special_ts_token_mask_start_with_size = special_ts_token_mask_start.clone().long()
-        patch_index = 0
         for i in range(batch_size):
             num_ts_in_batch = num_special_ts_tokens[i]
-            num_total_patches[i] = patch_cnt[patch_index:patch_index + num_ts_in_batch].sum() - 2 * num_ts_in_batch
             for idx in range(patch_index, patch_index + num_ts_in_batch):
-                batch_idx, pos_idx = special_ts_token_mask_start_nonzero[idx]
-                special_ts_token_mask_start_with_size[batch_idx, pos_idx] *= (patch_cnt[idx].item() - 2)
             patch_index += num_ts_in_batch
-        # Compute the maximum embed dimension, considering both start and end tokens
         max_embed_dim = sequence_length + num_total_patches.max()
-        # batch_indices, non_ts_indices = torch.where(~special_ts_token_mask)
         batch_indices, non_ts_indices = torch.where(~special_ts_token_mask)
-        # 2. Compute the positions where text should be written
         new_token_positions = torch.cumsum((special_ts_token_mask_start_with_size + 1), dim=-1) - 1
         nb_ts_pad = max_embed_dim - 1 - new_token_positions[:, -1]
         if left_padding:
-            new_token_positions += nb_ts_pad[:, None]  # offset for left padding
         text_to_overwrite = new_token_positions[batch_indices, non_ts_indices]
-        # 3. Create the full embedding, already padded to the maximum position
         final_embedding = torch.zeros(
             batch_size, max_embed_dim, embed_dim, dtype=inputs_embeds.dtype, device=inputs_embeds.device
         )
-        final_attention_mask = torch.zeros(
-            batch_size, max_embed_dim, dtype=attention_mask.dtype, device=inputs_embeds.device
-        )
         if labels is not None:
             final_labels = torch.full(
                 (batch_size, max_embed_dim), self.config.ignore_index, dtype=input_ids.dtype, device=input_ids.device
             )
         target_device = inputs_embeds.device
         batch_indices, non_ts_indices, text_to_overwrite = (
             batch_indices.to(target_device),
             non_ts_indices.to(target_device),
             text_to_overwrite.to(target_device),
         )
-        attention_mask = attention_mask.to(target_device)
-        # 4. Fill the embeddings based on the mask
         final_embedding[batch_indices, text_to_overwrite] = inputs_embeds[batch_indices, non_ts_indices]
-        final_attention_mask[batch_indices, text_to_overwrite] = attention_mask[batch_indices, non_ts_indices]
         if labels is not None:
             final_labels[batch_indices, text_to_overwrite] = labels[batch_indices, non_ts_indices]
-        # 5. Fill the embeddings corresponding to the time series
         ts_to_overwrite = torch.full(
             (batch_size, max_embed_dim), True, dtype=torch.bool, device=inputs_embeds.device
         )
         ts_to_overwrite[batch_indices, text_to_overwrite] = False
         reversed_cumsum = ts_to_overwrite.flip(dims=[-1]).cumsum(-1).flip(dims=[-1]) - 1
         ts_to_overwrite &= reversed_cumsum >= nb_ts_pad[:, None].to(target_device)
         if ts_to_overwrite.sum() != time_series_features.shape[:-1].numel():
             raise ValueError(
                 f"The input provided to the model are wrong. The number of time series tokens is {torch.sum(special_ts_token_mask_start)} while"
                 f" the number of time series given to the model is {len(patch_cnt)}. This prevents correct indexing and breaks batch generation."
             )
         final_embedding[ts_to_overwrite] = time_series_features.contiguous().reshape(-1, embed_dim).to(target_device)
-        final_attention_mask |= ts_to_overwrite
         position_ids = (final_attention_mask.cumsum(-1) - 1).masked_fill_((final_attention_mask == 0), 1)
-        # 6. Mask out the embedding at padding positions
-        batch_indices, pad_indices = torch.where(input_ids == self.config.pad_token_id)
-        indices_to_mask = new_token_positions[batch_indices, pad_indices]
-        final_embedding[batch_indices, indices_to_mask] = 0
-        if labels is None:
-            final_labels = None
         return final_embedding, final_attention_mask, position_ids, final_labels
     @add_start_docstrings_to_model_forward(QWEN2_INPUTS_DOCSTRING)
@@ -1382,10 +1400,8 @@ class Qwen2TSForCausalLM(Qwen2PreTrainedModel):
             inputs_embeds = self.get_input_embeddings()(input_ids)
             if timeseries is not None and timeseries.shape[0] > 0:
-                # Disable KV Cache as it has not been implemented yet
-                use_cache = False
                 ts_features, patch_cnt = self.ts_encoder(timeseries)
                 inputs_embeds = inputs_embeds.to(ts_features.dtype)
                 inputs_embeds, attention_mask, position_ids, labels = self._merge_input_ids_with_time_series_features(
@@ -1424,14 +1440,63 @@ class Qwen2TSForCausalLM(Qwen2PreTrainedModel):
             output = (logits,) + outputs[1:]
             return (loss,) + output if loss is not None else output
-        return CausalLMOutputWithPast(
             loss=loss,
             logits=logits,
             past_key_values=outputs.past_key_values,
             hidden_states=outputs.hidden_states,
             attentions=outputs.attentions,
         )
     def prepare_inputs_for_generation(
         self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, timeseries=None, **kwargs
@@ -1446,20 +1511,23 @@ class Qwen2TSForCausalLM(Qwen2PreTrainedModel):
                 cache_length = past_length = past_key_values[0][0].shape[2]
                 max_cache_length = None
-            # Keep only the unprocessed tokens:
-            # 1 - If the length of the attention_mask exceeds the length of input_ids, then we are in a setting where
-            # some of the inputs are exclusively passed as part of the cache (e.g. when passing input_embeds as
-            # input)
-            real_len = self._get_real_length(timeseries, input_ids)
-            origin_past_len, past_num_ts = self._get_original_length(timeseries, input_ids, past_length)
-            if attention_mask is not None and attention_mask.shape[1] > real_len:
                 input_ids = input_ids[:, -(attention_mask.shape[1] - past_length) :]
-            # 2 - If the past_length is smaller than input_ids', then input_ids holds all input tokens. We can discard
-            # input_ids based on the past_length.
-            elif past_length < real_len:
-                input_ids = input_ids[:, origin_past_len:]
-                if timeseries is not None:
-                    timeseries = timeseries[past_num_ts:]
             # 3 - Otherwise (past_length >= input_ids.shape[1]), let's assume input_ids only has unprocessed tokens.
             # If we are about to go beyond the maximum cache length, we need to crop the input attention mask.
@@ -1476,7 +1544,7 @@ class Qwen2TSForCausalLM(Qwen2PreTrainedModel):
             position_ids = attention_mask.long().cumsum(-1) - 1
             position_ids.masked_fill_(attention_mask == 0, 1)
             if past_key_values:
-                position_ids = position_ids[:, -input_ids.size(1) :]
         # if `inputs_embeds` are passed, we only want to use them in the 1st generation step
         if inputs_embeds is not None and past_key_values is None:

 import inspect
 import math
 import copy
+from typing import List, Optional, Tuple, Union, Dict, Any
 from dataclasses import dataclass
 import torch
 _CONFIG_FOR_DOC = "Qwen2TSConfig"
+@dataclass
+class Qwen2TSCausalLMOutputWithPast(ModelOutput):
+    """
+    Base class for Qwen2TS causal language model (or autoregressive) outputs.
+    Args:
+        loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
+            Language modeling loss (for next-token prediction).
+        logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
+            Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
+        past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
+            Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape
+            `(batch_size, num_heads, sequence_length, embed_size_per_head)`)
+            Contains pre-computed hidden-states (key and values in the self-attention blocks) that can be used (see
+            `past_key_values` input) to speed up sequential decoding.
+        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
+            Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, +
+            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.
+            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
+        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
+            Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
+            sequence_length)`.
+            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
+            heads.
+        attention_mask (`torch.FloatTensor`, *optional*):
+            Attentions mask, used to update attention mask and position_ids.
+    """
+    loss: Optional[torch.FloatTensor] = None
+    logits: torch.FloatTensor = None
+    past_key_values: Optional[List[torch.FloatTensor]] = None
+    hidden_states: Optional[Tuple[torch.FloatTensor]] = None
+    attentions: Optional[Tuple[torch.FloatTensor]] = None
+    attention_mask: Optional[torch.FloatTensor] = None
 ########################Naive TS Embedding#####################
 class TimeSeriesEmbedding(nn.Module):
     def __init__(self, config):
     def get_decoder(self):
         return self.model
     def _merge_input_ids_with_time_series_features(
+    self, time_series_features, inputs_embeds, input_ids, attention_mask, labels, patch_cnt
+):
         batch_size, sequence_length = input_ids.shape
+        _left_padding = torch.any(attention_mask[:, 0] == 0)
+        _right_padding = torch.any(attention_mask[:, -1] == 0)
         left_padding = False
+        if batch_size > 1:
+            if _left_padding and not _right_padding:
+                left_padding = True
+            elif not _left_padding and _right_padding:
+                left_padding = False
+            elif not _left_padding and not _right_padding:
+                left_padding = False
+            else:
+                raise ValueError(f"both side of attention_mask has zero, invalid. {attention_mask}")
+        else:
+            if _left_padding and not _right_padding:
+                left_padding = True
+            else:
+                left_padding = False
         # 1. Create a mask to know where special time series tokens are
         special_ts_token_mask_start = input_ids == self.config.ts_token_start_index
         special_ts_token_mask_end = input_ids == self.config.ts_token_end_index
         special_ts_token_mask = special_ts_token_mask_start | special_ts_token_mask_end
+        # 2. Calculate patch count
         num_special_ts_tokens = torch.sum(special_ts_token_mask_start, dim=-1)
+        total_time_steps, embed_dim = time_series_features.shape
         # Correctly calculate the total number of patches per batch
+        patch_index = 0
         num_total_patches = torch.zeros(batch_size, dtype=patch_cnt.dtype, device=patch_cnt.device)
         special_ts_token_mask_start_nonzero = special_ts_token_mask_start.nonzero()
         special_ts_token_mask_start_with_size = special_ts_token_mask_start.clone().long()
+        attn_mask_cnt = attention_mask.sum(dim=-1)
         for i in range(batch_size):
             num_ts_in_batch = num_special_ts_tokens[i]
+            num_total_patches[i] = patch_cnt[patch_index : patch_index + num_ts_in_batch].sum() - 2 * num_ts_in_batch
             for idx in range(patch_index, patch_index + num_ts_in_batch):
+                b_idx, pos = special_ts_token_mask_start_nonzero[idx]
+                special_ts_token_mask_start_with_size[b_idx, pos] *= (patch_cnt[idx].item() - 2)
             patch_index += num_ts_in_batch
+            attn_mask_cnt[i] += num_total_patches[i].item()
+        # 3. Embeding length
         max_embed_dim = sequence_length + num_total_patches.max()
+        # 4. Non ts tokens
         batch_indices, non_ts_indices = torch.where(~special_ts_token_mask)
+        # 5. Text token in final text positions
         new_token_positions = torch.cumsum((special_ts_token_mask_start_with_size + 1), dim=-1) - 1
+        # nb_ts_pad
         nb_ts_pad = max_embed_dim - 1 - new_token_positions[:, -1]
         if left_padding:
+            new_token_positions += nb_ts_pad[:, None]
         text_to_overwrite = new_token_positions[batch_indices, non_ts_indices]
+        # 6. Final embedding and attention masks
         final_embedding = torch.zeros(
             batch_size, max_embed_dim, embed_dim, dtype=inputs_embeds.dtype, device=inputs_embeds.device
         )
+        final_attention_mask = torch.zeros(batch_size, max_embed_dim, dtype=attention_mask.dtype, device=inputs_embeds.device)
+        for i in range(attention_mask.size(0)):
+            if left_padding:
+                final_attention_mask[i, max_embed_dim - attn_mask_cnt[i] :] = 1
+            else:
+                final_attention_mask[i, : attn_mask_cnt[i]] = 1
+        final_labels = None
         if labels is not None:
             final_labels = torch.full(
                 (batch_size, max_embed_dim), self.config.ignore_index, dtype=input_ids.dtype, device=input_ids.device
             )
         target_device = inputs_embeds.device
         batch_indices, non_ts_indices, text_to_overwrite = (
             batch_indices.to(target_device),
             non_ts_indices.to(target_device),
             text_to_overwrite.to(target_device),
         )
+        # 7. Move embedding and labels to final positions
         final_embedding[batch_indices, text_to_overwrite] = inputs_embeds[batch_indices, non_ts_indices]
         if labels is not None:
             final_labels[batch_indices, text_to_overwrite] = labels[batch_indices, non_ts_indices]
+        # 8. Move time series to final positions
         ts_to_overwrite = torch.full(
             (batch_size, max_embed_dim), True, dtype=torch.bool, device=inputs_embeds.device
         )
         ts_to_overwrite[batch_indices, text_to_overwrite] = False
         reversed_cumsum = ts_to_overwrite.flip(dims=[-1]).cumsum(-1).flip(dims=[-1]) - 1
         ts_to_overwrite &= reversed_cumsum >= nb_ts_pad[:, None].to(target_device)
+        # Check that the number of time series tokens is correct
         if ts_to_overwrite.sum() != time_series_features.shape[:-1].numel():
             raise ValueError(
                 f"The input provided to the model are wrong. The number of time series tokens is {torch.sum(special_ts_token_mask_start)} while"
                 f" the number of time series given to the model is {len(patch_cnt)}. This prevents correct indexing and breaks batch generation."
             )
         final_embedding[ts_to_overwrite] = time_series_features.contiguous().reshape(-1, embed_dim).to(target_device)
+        # 9. Calculate position ids
         position_ids = (final_attention_mask.cumsum(-1) - 1).masked_fill_((final_attention_mask == 0), 1)
+        if position_ids.size(-1) < input_ids.size(-1):
+            position_ids = position_ids[:, -input_ids.size(-1) :]
+        # 10. Move attention mask to final positions
+        pad_batch_indices, pad_indices = torch.where(input_ids == self.config.pad_token_id)
+        if len(pad_batch_indices) > 0:
+            indices_to_mask = new_token_positions[pad_batch_indices, pad_indices]
+            final_embedding[pad_batch_indices, indices_to_mask] = 0
         return final_embedding, final_attention_mask, position_ids, final_labels
     @add_start_docstrings_to_model_forward(QWEN2_INPUTS_DOCSTRING)
             inputs_embeds = self.get_input_embeddings()(input_ids)
             if timeseries is not None and timeseries.shape[0] > 0:
+                # use_cache = False
                 ts_features, patch_cnt = self.ts_encoder(timeseries)
                 inputs_embeds = inputs_embeds.to(ts_features.dtype)
                 inputs_embeds, attention_mask, position_ids, labels = self._merge_input_ids_with_time_series_features(
             output = (logits,) + outputs[1:]
             return (loss,) + output if loss is not None else output
+        return Qwen2TSCausalLMOutputWithPast(
             loss=loss,
             logits=logits,
             past_key_values=outputs.past_key_values,
             hidden_states=outputs.hidden_states,
             attentions=outputs.attentions,
+            attention_mask=attention_mask
         )
+    def _update_model_kwargs_for_generation(
+        self,
+        outputs: ModelOutput,
+        model_kwargs: Dict[str, Any],
+        is_encoder_decoder: bool = False,
+        num_new_tokens: int = 1,
+    ) -> Dict[str, Any]:
+        # update past_key_values keeping its naming used in model code
+        cache_name, cache = self._extract_past_from_model_output(outputs)
+        model_kwargs[cache_name] = cache
+        if getattr(outputs, "state", None) is not None:
+            model_kwargs["state"] = outputs.state
+        # update attention_mask
+        if getattr(outputs, "attention_mask", None) is not None:
+            model_kwargs["attention_mask"] = outputs.attention_mask
+        # update token_type_ids with last value
+        if "token_type_ids" in model_kwargs:
+            token_type_ids = model_kwargs["token_type_ids"]
+            model_kwargs["token_type_ids"] = torch.cat([token_type_ids, token_type_ids[:, -1].unsqueeze(-1)], dim=-1)
+        if not is_encoder_decoder:
+            # update attention mask
+            if "attention_mask" in model_kwargs:
+                attention_mask = model_kwargs["attention_mask"]
+                model_kwargs["attention_mask"] = torch.cat(
+                    [attention_mask, attention_mask.new_ones((attention_mask.shape[0], 1))], dim=-1
+                )
+        else:
+            # update decoder attention mask
+            if "decoder_attention_mask" in model_kwargs:
+                decoder_attention_mask = model_kwargs["decoder_attention_mask"]
+                model_kwargs["decoder_attention_mask"] = torch.cat(
+                    [decoder_attention_mask, decoder_attention_mask.new_ones((decoder_attention_mask.shape[0], 1))],
+                    dim=-1,
+                )
+        if model_kwargs.get("use_cache", True):
+            model_kwargs["cache_position"] = model_kwargs["cache_position"][-1:] + num_new_tokens
+        else:
+            past_positions = model_kwargs.pop("cache_position")
+            new_positions = torch.arange(
+                past_positions[-1] + 1, past_positions[-1] + num_new_tokens + 1, dtype=past_positions.dtype
+            ).to(past_positions.device)
+            model_kwargs["cache_position"] = torch.cat((past_positions, new_positions))
+        return model_kwargs
     def prepare_inputs_for_generation(
         self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, timeseries=None, **kwargs
                 cache_length = past_length = past_key_values[0][0].shape[2]
                 max_cache_length = None
+            has_ts = timeseries is not None and len(timeseries) > 0
+            if has_ts and kwargs.get("attention_mask") is not None:
+                attention_mask = kwargs["attention_mask"]
+                attention_mask = torch.cat(
+                    [attention_mask, attention_mask.new_ones((attention_mask.shape[0], 1))], dim=-1
+                )
+            # Set attention mask and input_ids
+            if has_ts and past_length > 0:
+                # We have only one token added and timeseries are already inferenced
+                input_ids = input_ids[:, -1:]
+                timeseries = None
+            elif attention_mask is not None and attention_mask.shape[1] > input_ids.shape[1]:
                 input_ids = input_ids[:, -(attention_mask.shape[1] - past_length) :]
+            elif past_length < input_ids.shape[1]:
+                input_ids = input_ids[:, past_length:]
             # 3 - Otherwise (past_length >= input_ids.shape[1]), let's assume input_ids only has unprocessed tokens.
             # If we are about to go beyond the maximum cache length, we need to crop the input attention mask.
             position_ids = attention_mask.long().cumsum(-1) - 1
             position_ids.masked_fill_(attention_mask == 0, 1)
             if past_key_values:
+                position_ids = position_ids[:, -input_ids.shape[1] :]
         # if `inputs_embeds` are passed, we only want to use them in the 1st generation step
         if inputs_embeds is not None and past_key_values is None:

processing_qwen2_ts.py ADDED Viewed

	@@ -0,0 +1,171 @@

+# coding=utf-8
+# Copyright 2024 Tsinghua University and ByteDance.
+#
+# Licensed under the MIT License (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://opensource.org/license/mit
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+from typing import List, Union, Tuple, Optional
+import torch
+from transformers.feature_extraction_utils import BatchFeature
+from transformers.processing_utils import ProcessorMixin
+from transformers.tokenization_utils_base import (
+    PreTokenizedInput,
+    TextInput,
+    PaddingStrategy,
+)
+def sp_encoding(timeseries: np.ndarray, eots_token: bool = True) -> Tuple[np.ndarray, str, dict]:
+    """
+    Encodes a time series with scalar normalization.
+    Args:
+        timeseries (np.ndarray): The raw time series data (1D or 2D).
+    Returns:
+        result_timeseries (np.ndarray): The encoded time series, shape [seq_len, 1].
+        prompt (str): The placeholder string with offset and scaling info.
+        metadata (dict): Metadata containing the offset and scaling factor.
+    """
+    mean = np.mean(timeseries)
+    scaled_timeseries = timeseries - mean
+    scale_factor = 1.0
+    if np.any(np.abs(scaled_timeseries) >= 3.0):
+        scale_factor = np.max(np.abs(scaled_timeseries)) / 3.0
+        scaled_timeseries /= scale_factor
+    prompt = f"[Value Offset: {-mean:.4f}|Value Scaling: {scale_factor:.4f}]<ts>"
+    if eots_token:
+        prompt += '<ts/>'
+    result_timeseries = np.stack([scaled_timeseries, np.ones_like(scaled_timeseries)], axis=-1).reshape(-1, 1)
+    return result_timeseries, prompt, {"offset": float(-mean), "scale_factor": float(scale_factor)}
+class Qwen2TSProcessor(ProcessorMixin):
+    """
+    A processor for ChatTS that integrates text prompt processing and time series encoding.
+    """
+    attributes = ["tokenizer"]
+    feature_extractor_class = None  # You can add a feature extractor if needed
+    tokenizer_class = "AutoTokenizer"
+    def __init__(self, tokenizer=None):
+        """
+        Args:
+            tokenizer: An optional tokenizer to process text prompts.
+        """
+        super().__init__(tokenizer=tokenizer)
+    def __call__(
+        self,
+        text: List[str],
+        timeseries: List[List[np.ndarray]],
+        padding: Union[bool, str, PaddingStrategy] = False,
+        padding_side: str = 'left',
+        vllm_flag: bool = False,
+        **kwargs,
+    ) -> BatchFeature:
+        """
+        Encodes a prompt and its associated time series.
+        Args:
+            prompt (List[str]): The input prompt containing <ts><ts/> placeholders.
+            timeseries (List[np.ndarray]): A list of time series matched to placeholders in the prompt.
+            padding (bool or str or PaddingStrategy, optional): Passed to the tokenizer for text padding.
+            return_tensors (str, optional): "pt" to return PyTorch tensors; None to return NumPy arrays.
+            **kwargs: Additional tokenizer parameters.
+        Returns:
+            BatchFeature: Contains processed prompt, encoded time series, and tokenizer outputs.
+        """
+        if type(text) == str:
+            text = [text]
+        encoded_ts_arrays = []
+        reconstructed_prompts = []
+        total_ts_cnt = 0
+        for idx, prompt in enumerate(text):
+            # Split prompt by <ts><ts/> placeholders
+            last_ts_cnt = total_ts_cnt
+            prompt_segments = prompt.split("<ts><ts/>")
+            total_ts_cnt = total_ts_cnt + len(prompt_segments) - 1
+            # Encode each time series and rebuild the prompt
+            reconstructed_prompt = prompt_segments[0]
+            for i, ts in enumerate(timeseries[last_ts_cnt:total_ts_cnt]):
+                encoded_ts, ts_prompt, _ = sp_encoding(ts, eots_token=not vllm_flag)
+                reconstructed_prompt += ts_prompt + prompt_segments[i + 1]
+                # Ensure time series shape [1, seq_len, feature_dim] for batch concatenation
+                encoded_ts_arrays.append(encoded_ts[None, ...])
+            reconstructed_prompts.append(reconstructed_prompt)
+        if len(timeseries) != len(encoded_ts_arrays):
+            raise ValueError(
+                f"Mismatch between <ts><ts/> placeholders ({total_ts_cnt}) "
+                f"and time series ({len(encoded_ts_arrays)})."
+            )
+        if len(encoded_ts_arrays) > 0:
+            # Pad time series to the same length
+            max_length = max(ts.shape[1] for ts in encoded_ts_arrays)
+            padded_ts_arrays = [
+                np.pad(ts, ((0, 0), (0, max_length - ts.shape[1]), (0, 0)), mode="constant", constant_values=0.0)
+                for ts in encoded_ts_arrays
+            ]
+            concatenated_ts = np.concatenate(padded_ts_arrays, axis=0)  # Shape: [batch_size, max_length, feature_dim]
+            # Convert to torch
+            concatenated_ts = torch.from_numpy(concatenated_ts).half()
+        else:
+            concatenated_ts = None
+        # Tokenize the processed prompt
+        tokenizer_outputs = {}
+        if self.tokenizer is not None:
+            tokenizer_outputs = self.tokenizer(reconstructed_prompts, padding=padding, padding_side=padding_side, **kwargs)
+        # Create the final output
+        outputs = {
+            "timeseries": concatenated_ts
+        }
+        outputs.update(tokenizer_outputs)
+        return BatchFeature(data=outputs)
+    @property
+    def model_input_names(self):
+        """
+        Define the input names expected by the model.
+        """
+        tokenizer_input_names = []
+        if self.tokenizer and hasattr(self.tokenizer, "model_input_names"):
+            tokenizer_input_names = self.tokenizer.model_input_names
+        return list(dict.fromkeys(["processed_prompt", "time_series"] + tokenizer_input_names))
+    def batch_decode(self, *args, **kwargs):
+        """
+        This method forwards all its arguments to Qwen2TokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please
+        refer to the docstring of this method for more information.
+        """
+        return self.tokenizer.batch_decode(*args, **kwargs)
+    def decode(self, *args, **kwargs):
+        """
+        This method forwards all its arguments to Qwen2TokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to
+        the docstring of this method for more information.
+        """
+        return self.tokenizer.decode(*args, **kwargs)