Upload 7 files

Browse files

Files changed (3) hide show

README.md +69 -3
modeling_rwkv5.py +56 -37
tokenization_rwkv_world.py +142 -12

README.md CHANGED Viewed

@@ -85,7 +85,7 @@ Assistant:"""
 model = AutoModelForCausalLM.from_pretrained("RWKV/rwkv-5-world-1b5", trust_remote_code=True, torch_dtype=torch.float16).to(0)
 tokenizer = AutoTokenizer.from_pretrained("RWKV/rwkv-5-world-1b5", trust_remote_code=True)
-text = "乌兰察布"
 prompt = generate_prompt(text)
 inputs = tokenizer(prompt, return_tensors="pt").to(0)
@@ -100,8 +100,74 @@ User: hi
 Assistant: Hi. I am your assistant and I will provide expert full response in full details. Please feel free to ask any question and I will always answer it.
-User: 乌兰察布
-Assistant: 乌兰察布市是中国新疆维吾尔自治区的一个地级市，位于新疆维吾尔自治区西南部，毗邻青海省。乌兰察布市是新疆维吾尔自治区的重要城市之一，也是新疆维吾尔自治区的第二大城市。乌兰察布市是新疆的重要经济中心之一，拥有丰富的自然资源和人口密度，是新疆的重要交通枢纽和商
 ```

 model = AutoModelForCausalLM.from_pretrained("RWKV/rwkv-5-world-1b5", trust_remote_code=True, torch_dtype=torch.float16).to(0)
 tokenizer = AutoTokenizer.from_pretrained("RWKV/rwkv-5-world-1b5", trust_remote_code=True)
+text = "介绍一下大熊猫"
 prompt = generate_prompt(text)
 inputs = tokenizer(prompt, return_tensors="pt").to(0)
 Assistant: Hi. I am your assistant and I will provide expert full response in full details. Please feel free to ask any question and I will always answer it.
+User: 介绍一下大熊猫
+Assistant: 大熊猫是一种中国特有的哺乳动物，也是中国的国宝之一。它们的外貌特征是圆形的黑白相间的身体，有着黑色的毛发和白色的耳朵。大熊猫的食物主要是竹子，它们会在竹林中寻找竹子，并且会将竹子放在竹笼中进行储存。大熊猫的寿命约为20至30年，但由于栖息地的丧失和人类活动的
+```
+#### Batch Inference
+```python
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+def generate_prompt(instruction, input=""):
+    instruction = instruction.strip().replace('\r\n', '\n').replace('\n\n', '\n')
+    input = input.strip().replace('\r\n', '\n').replace('\n\n', '\n')
+    if input:
+        return f"""Instruction: {instruction}
+Input: {input}
+Response:"""
+    else:
+        return f"""User: hi
+Assistant: Hi. I am your assistant and I will provide expert full response in full details. Please feel free to ask any question and I will always answer it.
+User: {instruction}
+Assistant:"""
+model = AutoModelForCausalLM.from_pretrained("RWKV/rwkv-5-world-1b5", trust_remote_code=True).to(torch.float32)
+tokenizer = AutoTokenizer.from_pretrained("RWKV/rwkv-5-world-1b5", trust_remote_code=True)
+texts = ["请介绍北京的旅游景点", "介绍一下大熊猫", "乌兰察布"]
+prompts = [generate_prompt(text) for text in texts]
+inputs = tokenizer(prompts, return_tensors="pt", padding=True)
+outputs = model.generate(inputs["input_ids"], max_new_tokens=128, do_sample=True, temperature=1.0, top_p=0.3, top_k=0, )
+for output in outputs:
+    print(tokenizer.decode(output.tolist(), skip_special_tokens=True))
 ```
+output:
+```shell
+User: hi
+Assistant: Hi. I am your assistant and I will provide expert full response in full details. Please feel free to ask any question and I will always answer it.
+User: 请介绍北京的旅游景点
+Assistant: 北京是中国的首都，拥有丰富的旅游资源和历史文化遗产。以下是一些北京的旅游景点：
+1. 故宫：位于北京市中心，是明清两代的皇宫，是中国最大的古代宫殿建筑群之一。
+2. 天安门广场：位于北京市中心，是中国最著名的城市广场之一，也是中国最大的城市广场。
+3. 颐和
+User: hi
+Assistant: Hi. I am your assistant and I will provide expert full response in full details. Please feel free to ask any question and I will always answer it.
+User: 介绍一下大熊猫
+Assistant: 大熊猫是一种生活在中国中部地区的哺乳动物，也是中国的国宝之一。它们的外貌特征是圆形的黑白相间的身体，有着黑色的毛发和圆圆的眼睛。大熊猫是一种濒危物种，目前只有在野外的几个保护区才能看到它们的身影。大熊猫的食物主要是竹子，它们会在竹子上寻找食物，并且可以通
+User: hi
+Assistant: Hi. I am your assistant and I will provide expert full response in full details. Please feel free to ask any question and I will always answer it.
+User: 乌兰察布
+Assistant: 乌兰察布是中国新疆维吾尔自治区的一个县级市，位于新疆维吾尔自治区中部，是新疆的第二大城市。乌兰察布市是新疆的第一大城市，也是新疆的重要城市之一。乌兰察布市是新疆的经济中心，也是新疆的重要交通枢纽之一。乌兰察布市的人口约为2.5万人，其中汉族占绝大多数。乌
+```

modeling_rwkv5.py CHANGED Viewed

@@ -85,33 +85,46 @@ def rwkv_linear_attention_v5_0(H, S, T, hidden, time_decay, time_first, receptan
     return out, state
-def rwkv_linear_attention_v5_2(H, S, T, n_head, hidden, time_decay, time_first, receptance, key, value, gate, lxw, lxb, ow, state, return_state=False, seq_mode=True):
     time_decay = torch.exp(-torch.exp(time_decay.float())).reshape(-1,1,1).reshape(n_head, -1, 1)
     time_first = time_first.float().reshape(-1,1,1).reshape(n_head, -1, 1)
     lxw = lxw.float()
     lxb = lxb.float()
-    if seq_mode:
-        out = torch.empty((T, H, S), dtype=receptance.dtype, device=receptance.device)
-        for t in range(T):
-            rt = receptance[:,t:t+1,:]
-            kt = key[:,:,t:t+1]
-            vt = value[:,t:t+1,:]
-            at = kt @ vt
-            out[t] = (rt @ (time_first * at + state.squeeze(0))).squeeze(1)
-            state = at + time_decay * state
-        out = out.reshape(T, H*S)
-        out = F.group_norm(out, num_groups=H, weight=lxw, bias=lxb)
-        out = out.to(dtype=hidden.dtype) * gate
-        out = out @ ow
-    else:
-        a = key @ value
-        out = receptance @ (time_first * a + state.squeeze(0))
-        state = a + time_decay * state
-        out = out.flatten()
-        out = F.group_norm(out.unsqueeze(0), num_groups=H, weight=lxw, bias=lxb).squeeze(0)
-        out = out.to(dtype=hidden.dtype) * gate
-        out = out @ ow
     return out, state
@@ -153,7 +166,7 @@ class RwkvSelfAttention(nn.Module):
         self.ln_x = nn.GroupNorm(hidden_size // config.head_size, hidden_size)
     # TODO: maybe jit, otherwise move inside forward
-    def extract_key_value(self, H, S, T, hidden, state=None):
         # Mix hidden with the previous timestep to produce key, value, receptance
         if hidden.size(1) == 1 and state is not None:
             shifted = state[0][:, :, self.layer_id]
@@ -161,25 +174,27 @@ class RwkvSelfAttention(nn.Module):
             shifted = self.time_shift(hidden)
             if state is not None:
                 shifted[:, 0] = state[0][:, :, self.layer_id]
         key = hidden * self.time_mix_key + shifted * (1 - self.time_mix_key)
         value = hidden * self.time_mix_value + shifted * (1 - self.time_mix_value)
         receptance = hidden * self.time_mix_receptance + shifted * (1 - self.time_mix_receptance)
         if self.config.model_version == "5_2":
             gate = hidden* self.time_mix_gate + shifted * (1 - self.time_mix_gate)
-        if hidden.size(1) == 1 and state is not None:
-            receptance = self.receptance(receptance).to(torch.float32).view(H, 1, S)
-            key = self.key(key).to(torch.float32).view(H, S, 1)
-            value = self.value(value).to(torch.float32).view(H, 1, S)
-        else:
-            # https://github.com/BlinkDL/ChatRWKV/blob/main/rwkv_pip_package/src/rwkv/model.py#L693
-            key = self.key(key).to(torch.float32).view(T, H, S).transpose(0, 1).transpose(-2, -1)
-            value = self.value(value).to(torch.float32).view(T, H, S).transpose(0, 1)
-            receptance = self.receptance(receptance).to(torch.float32).view(T, H, S).transpose(0, 1)
         if self.config.model_version == "5_2":
             gate = F.silu(self.gate(gate))
         if state is not None:
             state[0][:, :, self.layer_id] = hidden[:, -1]
@@ -188,17 +203,19 @@ class RwkvSelfAttention(nn.Module):
         return receptance, key, value, state
     def forward(self, hidden, state=None, use_cache=False, seq_mode=True):
         H = self.time_decay.shape[0]
         S = hidden.shape[-1] // H
         T = hidden.shape[1]
         if self.config.model_version == "5_2":
-            receptance, key, value, gate, state = self.extract_key_value(H, S, T, hidden, state=state)
         else:
             receptance, key, value, state = self.extract_key_value(H, S, T, hidden, state=state)
         layer_state = state[1][:, :, :, :, self.layer_id] if state is not None else None
         if self.config.model_version == "5_2":
             rwkv, layer_state = rwkv_linear_attention_v5_2(
             H,
             S,
             T,
@@ -273,6 +290,8 @@ class RwkvFeedForward(nn.Module):
             shifted = self.time_shift(hidden)
             if state is not None:
                 shifted[:, 0] = state[2][:, :, self.layer_id]
         key = hidden * self.time_mix_key + shifted * (1 - self.time_mix_key)
         receptance = hidden * self.time_mix_receptance + shifted * (1 - self.time_mix_receptance)
@@ -594,7 +613,8 @@ class RwkvModel(RwkvPreTrainedModel):
         hidden_states = inputs_embeds
         all_self_attentions = () if output_attentions else None
         all_hidden_states = () if output_hidden_states else None
         for idx, block in enumerate(self.blocks):
@@ -645,7 +665,6 @@ class RwkvModel(RwkvPreTrainedModel):
         self.layers_are_rescaled = not self.training
 @add_start_docstrings(
     """
     The RWKV Model transformer with a language modeling head on top (linear layer with weights tied to the input

     return out, state
+cnt = 0
+def rwkv_linear_attention_v5_2(B, H, S, T, n_head, hidden, time_decay, time_first, receptance, key, value, gate, lxw, lxb, ow, state, return_state=False, seq_mode=True):
     time_decay = torch.exp(-torch.exp(time_decay.float())).reshape(-1,1,1).reshape(n_head, -1, 1)
     time_first = time_first.float().reshape(-1,1,1).reshape(n_head, -1, 1)
     lxw = lxw.float()
     lxb = lxb.float()
+    # if seq_mode:
+    out = torch.empty((B, T, H, S), dtype=receptance.dtype, device=receptance.device)
+    for t in range(T):
+        rt = receptance[:,:,t:t+1,:]
+        kt = key[:,:,:,t:t+1]
+        vt = value[:,:,t:t+1,:]
+        at = kt @ vt
+        out[:, t] = (rt @ (time_first * at + state)).squeeze(2)
+        state = at + time_decay * state
+    out = out.reshape(B*T, H*S)
+    out = F.group_norm(out, num_groups=H, weight=lxw, bias=lxb).reshape(B, T, H*S)
+    out = out.to(dtype=hidden.dtype) * gate
+    out = out @ ow
+    # else:
+    #     a = key @ value
+    #     # print('key.shape: ', key.shape)
+    #     # print('value.shape: ', value.shape)
+    #     # print('receptance.shape: ', receptance.shape)
+    #     # print('a.shape: ', a.shape)
+    #     # print('time_first.shape: ', time_first.shape)
+    #     # print('(time_first * a).shape: ', (time_first * a).shape)
+    #     # print('time_decay.shape: ', time_decay.shape)
+    #     # print('state.shape: ', state.shape)
+    #     out = receptance @ (time_first * a + state)
+    #     # print('out.shape: ', out.shape)
+    #     state = a + time_decay * state
+    #     # print('state.shape: ', state.shape)
+    #     out = out.reshape(B, H*S)
+    #     out = F.group_norm(out, num_groups=H, weight=lxw, bias=lxb).reshape(B, 1, H*S)
+    #     out = out.to(dtype=hidden.dtype) * gate
+    #     out = out @ ow
     return out, state
         self.ln_x = nn.GroupNorm(hidden_size // config.head_size, hidden_size)
     # TODO: maybe jit, otherwise move inside forward
+    def extract_key_value(self, B, H, S, T, hidden, state=None):
         # Mix hidden with the previous timestep to produce key, value, receptance
         if hidden.size(1) == 1 and state is not None:
             shifted = state[0][:, :, self.layer_id]
             shifted = self.time_shift(hidden)
             if state is not None:
                 shifted[:, 0] = state[0][:, :, self.layer_id]
+        if len(shifted.size()) == 2:
+            shifted = shifted.unsqueeze(1)
         key = hidden * self.time_mix_key + shifted * (1 - self.time_mix_key)
         value = hidden * self.time_mix_value + shifted * (1 - self.time_mix_value)
         receptance = hidden * self.time_mix_receptance + shifted * (1 - self.time_mix_receptance)
         if self.config.model_version == "5_2":
             gate = hidden* self.time_mix_gate + shifted * (1 - self.time_mix_gate)
+        # if hidden.size(1) == 1 and state is not None:
+        #     receptance = self.receptance(receptance).to(torch.float32).view(B, H, 1, S)
+        #     key = self.key(key).to(torch.float32).view(B, H, S, 1)
+        #     value = self.value(value).to(torch.float32).view(B, H, 1, S)
+        # else:
+        # https://github.com/BlinkDL/ChatRWKV/blob/main/rwkv_pip_package/src/rwkv/model.py#L693
+        key = self.key(key).to(torch.float32).view(B, T, H, S).transpose(1, 2).transpose(-2, -1)
+        value = self.value(value).to(torch.float32).view(B, T, H, S).transpose(1, 2)
+        receptance = self.receptance(receptance).to(torch.float32).view(B, T, H, S).transpose(1, 2)
         if self.config.model_version == "5_2":
             gate = F.silu(self.gate(gate))
         if state is not None:
             state[0][:, :, self.layer_id] = hidden[:, -1]
         return receptance, key, value, state
     def forward(self, hidden, state=None, use_cache=False, seq_mode=True):
+        B = hidden.shape[0]
         H = self.time_decay.shape[0]
         S = hidden.shape[-1] // H
         T = hidden.shape[1]
         if self.config.model_version == "5_2":
+            receptance, key, value, gate, state = self.extract_key_value(B, H, S, T, hidden, state=state)
         else:
             receptance, key, value, state = self.extract_key_value(H, S, T, hidden, state=state)
         layer_state = state[1][:, :, :, :, self.layer_id] if state is not None else None
         if self.config.model_version == "5_2":
             rwkv, layer_state = rwkv_linear_attention_v5_2(
+            B,
             H,
             S,
             T,
             shifted = self.time_shift(hidden)
             if state is not None:
                 shifted[:, 0] = state[2][:, :, self.layer_id]
+        if len(shifted.size()) == 2:
+            shifted = shifted.unsqueeze(1)
         key = hidden * self.time_mix_key + shifted * (1 - self.time_mix_key)
         receptance = hidden * self.time_mix_receptance + shifted * (1 - self.time_mix_receptance)
         hidden_states = inputs_embeds
+        global cnt
+        cnt += 1
         all_self_attentions = () if output_attentions else None
         all_hidden_states = () if output_hidden_states else None
         for idx, block in enumerate(self.blocks):
         self.layers_are_rescaled = not self.training
 @add_start_docstrings(
     """
     The RWKV Model transformer with a language modeling head on top (linear layer with weights tied to the input

tokenization_rwkv_world.py CHANGED Viewed

@@ -107,6 +107,7 @@ class RWKVWorldTokenizer(PreTrainedTokenizer):
             self,
             vocab_file,
             errors="replace",
             **kwargs
     ):
         self.add_bos_token = False
@@ -122,11 +123,7 @@ class RWKVWorldTokenizer(PreTrainedTokenizer):
             assert len(x) == int(l[l.rindex(' '):])
             sorted += [x]
             self.encoder[idx] = x
-        super().__init__(
-            errors=errors,
-            **kwargs,
-        )
         self.decoder = {}
         for k,v in self.encoder.items():
             self.decoder[v] = int(k)
@@ -136,6 +133,14 @@ class RWKVWorldTokenizer(PreTrainedTokenizer):
             _ = self.trie.add(t, val=(t, i))
         self.errors = errors  # how to handle errors in decoding
         self.cache = {}
     @property
     def vocab_size(self):
@@ -143,6 +148,22 @@ class RWKVWorldTokenizer(PreTrainedTokenizer):
     def get_vocab(self):
         return dict(self.encoder, **self.added_tokens_encoder)
     def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
         if self.add_bos_token:
@@ -219,14 +240,21 @@ class RWKVWorldTokenizer(PreTrainedTokenizer):
                skip_special_tokens: bool = False,
                **kwargs
                ) -> str:
         # Convert inputs to python lists
         token_ids = to_py_obj(token_ids)
         if isinstance(token_ids, int):
             if token_ids in self.all_special_ids and skip_special_tokens:
                 return ""
             return self.encoder.get(token_ids, self.unk_token)
         elif isinstance(token_ids, list):
             out_str = ""
             out_last = 0
             out_tokens = []
@@ -268,6 +296,11 @@ class RWKVWorldTokenizer(PreTrainedTokenizer):
     def prepare_for_tokenization(self, text, **kwargs):
         return (text, kwargs)
     def _encode_plus(
         self,
         text: Union[TextInput, EncodedInput],
@@ -352,19 +385,33 @@ class RWKVWorldTokenizer(PreTrainedTokenizer):
         verbose: bool = True,
         **kwargs
     ) -> BatchEncoding:
-        def get_input_ids(text):
             if isinstance(text, str):
-                text_id = self._tokenize(text)
-                return text_id
             elif isinstance(text, list) and len(text) > 0 and isinstance(text[0], str):
-                return [self._tokenize(t) for t in text]
             elif isinstance(text, (list, tuple)) and len(text) > 0 and isinstance(text[0], int):
                 return text
             else:
                 raise ValueError(
                     "Input is not valid. Should be a string, a list/tuple of strings or a list/tuple of integers."
                 )
         if return_offsets_mapping:
             raise NotImplementedError(
                 "return_offset_mapping is not available when using Python tokenizers. "
@@ -372,15 +419,29 @@ class RWKVWorldTokenizer(PreTrainedTokenizer):
                 "transformers.PreTrainedTokenizerFast."
             )
-        input_ids = []
         for ids_or_pair_ids in batch_text_or_text_pairs:
             if not isinstance(ids_or_pair_ids, (list, tuple)):
                 ids, pair_ids = ids_or_pair_ids, None
             else:
                 ids, pair_ids = ids_or_pair_ids
             first_ids = get_input_ids(ids)
             second_ids = get_input_ids(pair_ids) if pair_ids is not None else None
             input_ids.append((first_ids, second_ids))
         batch_outputs = self._batch_prepare_for_model(
@@ -401,6 +462,75 @@ class RWKVWorldTokenizer(PreTrainedTokenizer):
         )
         return BatchEncoding(batch_outputs)
     def _build_conversation_input_ids(self, conversation: "Conversation") -> List[int]:
         input_ids = []

             self,
             vocab_file,
             errors="replace",
+            pad_token="0",
             **kwargs
     ):
         self.add_bos_token = False
             assert len(x) == int(l[l.rindex(' '):])
             sorted += [x]
             self.encoder[idx] = x
         self.decoder = {}
         for k,v in self.encoder.items():
             self.decoder[v] = int(k)
             _ = self.trie.add(t, val=(t, i))
         self.errors = errors  # how to handle errors in decoding
         self.cache = {}
+        self.first_max_length = 0
+        # pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
+        super().__init__(
+            errors=errors,
+            # pad_token=pad_token,
+            **kwargs,
+        )
     @property
     def vocab_size(self):
     def get_vocab(self):
         return dict(self.encoder, **self.added_tokens_encoder)
+    def add_tokens(self, new_tokens, special_tokens: bool = False):
+        for token in new_tokens:
+            token_id = self.convert_tokens_to_ids(token)
+            self.added_tokens_decoder[token_id] = token
+    def convert_ids_to_tokens(self, ids, skip_special_tokens=False):
+        if isinstance(ids, int):
+            ids = [ids]
+        tokens = []
+        for id_ in ids:
+            if id_ in self.added_tokens_decoder:
+                tokens.append(self.added_tokens_decoder[id_])
+            else:
+                tokens.append(self._convert_id_to_token(id_))
+        return tokens
     def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
         if self.add_bos_token:
                skip_special_tokens: bool = False,
                **kwargs
                ) -> str:
+        def remove_zeros_from_first_segment(token_ids, first_max_length):
+            first_segment = token_ids[:first_max_length]
+            first_segment_cleaned = [token for token in first_segment if token != 0]
+            return first_segment_cleaned + token_ids[first_max_length:]
         # Convert inputs to python lists
         token_ids = to_py_obj(token_ids)
+        token_ids = remove_zeros_from_first_segment(token_ids, self.first_max_length)
         if isinstance(token_ids, int):
             if token_ids in self.all_special_ids and skip_special_tokens:
                 return ""
             return self.encoder.get(token_ids, self.unk_token)
         elif isinstance(token_ids, list):
+            self.first_max_length
             out_str = ""
             out_last = 0
             out_tokens = []
     def prepare_for_tokenization(self, text, **kwargs):
         return (text, kwargs)
+    def _get_padding_truncation_strategies(
+        self, padding=False, truncation=None, max_length=None, pad_to_multiple_of=None, verbose=True, **kwargs
+    ):
+        return PaddingStrategy.LONGEST, TruncationStrategy.DO_NOT_TRUNCATE, -1, kwargs
     def _encode_plus(
         self,
         text: Union[TextInput, EncodedInput],
         verbose: bool = True,
         **kwargs
     ) -> BatchEncoding:
+        def get_input_ids(text, max_length=None, pad_token_id=0):
+            def pad_sequence(seq, max_len, pad_tok):
+                return [pad_tok] * (max_len - len(seq)) + seq
             if isinstance(text, str):
+                tokens = self._tokenize(text)
+                if max_length is not None:
+                    tokens = pad_sequence(tokens, max_length, pad_token_id)
+                return tokens
             elif isinstance(text, list) and len(text) > 0 and isinstance(text[0], str):
+                tokenized_texts = [self._tokenize(t) for t in text]
+                if max_length is None:
+                    max_length = max(len(t) for t in tokenized_texts)
+                return [pad_sequence(t, max_length, pad_token_id) for t in tokenized_texts]
             elif isinstance(text, (list, tuple)) and len(text) > 0 and isinstance(text[0], int):
+                if max_length is not None and len(text) < max_length:
+                    return pad_sequence(text, max_length, pad_token_id)
                 return text
             else:
                 raise ValueError(
                     "Input is not valid. Should be a string, a list/tuple of strings or a list/tuple of integers."
                 )
         if return_offsets_mapping:
             raise NotImplementedError(
                 "return_offset_mapping is not available when using Python tokenizers. "
                 "transformers.PreTrainedTokenizerFast."
             )
+        first_max_length = 0
+        second_max_length = 0
         for ids_or_pair_ids in batch_text_or_text_pairs:
             if not isinstance(ids_or_pair_ids, (list, tuple)):
                 ids, pair_ids = ids_or_pair_ids, None
             else:
                 ids, pair_ids = ids_or_pair_ids
             first_ids = get_input_ids(ids)
             second_ids = get_input_ids(pair_ids) if pair_ids is not None else None
+            first_max_length = max(first_max_length, len(first_ids))
+            if second_ids is not None:
+                second_max_length = max(second_max_length, len(second_ids))
+        self.first_max_length = first_max_length
+        input_ids = []
+        for ids_or_pair_ids in batch_text_or_text_pairs:
+            if not isinstance(ids_or_pair_ids, (list, tuple)):
+                ids, pair_ids = ids_or_pair_ids, None
+            else:
+                ids, pair_ids = ids_or_pair_ids
+            first_ids = get_input_ids(ids, max_length=first_max_length)
+            second_ids = get_input_ids(pair_ids, max_length=second_max_length) if pair_ids is not None else None
             input_ids.append((first_ids, second_ids))
         batch_outputs = self._batch_prepare_for_model(
         )
         return BatchEncoding(batch_outputs)
+    def decode(
+        self,
+        token_ids: Union[int, List[int], "np.ndarray", "torch.Tensor", "tf.Tensor"],
+        skip_special_tokens: bool = False,
+        clean_up_tokenization_spaces: bool = None,
+        **kwargs,
+    ) -> str:
+        """
+        Converts a sequence of ids in a string, using the tokenizer and vocabulary with options to remove special
+        tokens and clean up tokenization spaces.
+        Similar to doing `self.convert_tokens_to_string(self.convert_ids_to_tokens(token_ids))`.
+        Args:
+            token_ids (`Union[int, List[int], np.ndarray, torch.Tensor, tf.Tensor]`):
+                List of tokenized input ids. Can be obtained using the `__call__` method.
+            skip_special_tokens (`bool`, *optional*, defaults to `False`):
+                Whether or not to remove special tokens in the decoding.
+            clean_up_tokenization_spaces (`bool`, *optional*):
+                Whether or not to clean up the tokenization spaces. If `None`, will default to
+                `self.clean_up_tokenization_spaces`.
+            kwargs (additional keyword arguments, *optional*):
+                Will be passed to the underlying model specific decode method.
+        Returns:
+            `str`: The decoded sentence.
+        """
+        # Convert inputs to python lists
+        return self._decode(
+            token_ids=token_ids,
+            skip_special_tokens=skip_special_tokens,
+            clean_up_tokenization_spaces=clean_up_tokenization_spaces,
+            **kwargs,
+        )
+    def batch_decode(
+        self,
+        sequences: Union[List[int], List[List[int]], "np.ndarray", "torch.Tensor", "tf.Tensor"],
+        skip_special_tokens: bool = False,
+        clean_up_tokenization_spaces: bool = None,
+        **kwargs,
+    ) -> List[str]:
+        """
+        Convert a list of lists of token ids into a list of strings by calling decode.
+        Args:
+            sequences (`Union[List[int], List[List[int]], np.ndarray, torch.Tensor, tf.Tensor]`):
+                List of tokenized input ids. Can be obtained using the `__call__` method.
+            skip_special_tokens (`bool`, *optional*, defaults to `False`):
+                Whether or not to remove special tokens in the decoding.
+            clean_up_tokenization_spaces (`bool`, *optional*):
+                Whether or not to clean up the tokenization spaces. If `None`, will default to
+                `self.clean_up_tokenization_spaces`.
+            kwargs (additional keyword arguments, *optional*):
+                Will be passed to the underlying model specific decode method.
+        Returns:
+            `List[str]`: The list of decoded sentences.
+        """
+        return [
+            self.decode(
+                seq,
+                skip_special_tokens=skip_special_tokens,
+                clean_up_tokenization_spaces=clean_up_tokenization_spaces,
+                **kwargs,
+            )
+            for seq in sequences
+        ]
     def _build_conversation_input_ids(self, conversation: "Conversation") -> List[int]:
         input_ids = []