Upload ModularStarEncoder
Browse files- modularStarEncoder.py +5 -11
modularStarEncoder.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from transformers import
|
2 |
import sys
|
3 |
from config import ModularStarEncoderConfig
|
4 |
import os
|
@@ -13,7 +13,6 @@ from transformers.activations import ACT2FN
|
|
13 |
from transformers.modeling_utils import PreTrainedModel
|
14 |
from transformers.utils import (
|
15 |
ModelOutput,
|
16 |
-
|
17 |
logging,
|
18 |
|
19 |
)
|
@@ -34,9 +33,6 @@ class StarEncoder2PreTrainedModel(PreTrainedModel):
|
|
34 |
_supports_sdpa = True
|
35 |
_supports_cache_class = True
|
36 |
|
37 |
-
# def __init__(self):
|
38 |
-
# self._supports_flash_attn_2 = True
|
39 |
-
# super().__init__()
|
40 |
|
41 |
|
42 |
def _init_weights(self, module):
|
@@ -81,7 +77,7 @@ class ModularStarEncoderOutput(ModelOutput):
|
|
81 |
prediction_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
|
82 |
Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
|
83 |
seq_relationship_logits (`torch.FloatTensor` of shape `(batch_size, 2)`):
|
84 |
-
Prediction scores of the
|
85 |
before SoftMax).
|
86 |
hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
|
87 |
Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of
|
@@ -249,11 +245,9 @@ class ModularStarEncoder(StarEncoder2PreTrainedModel):
|
|
249 |
config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked),
|
250 |
the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
|
251 |
next_sentence_label (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
- 0 indicates sequence B is a continuation of sequence A,
|
256 |
-
- 1 indicates sequence B is a random sequence.
|
257 |
kwargs (`Dict[str, any]`, optional, defaults to *{}*):
|
258 |
Used to hide legacy arguments that have been deprecated.
|
259 |
|
|
|
1 |
+
from transformers import Starcoder2Model
|
2 |
import sys
|
3 |
from config import ModularStarEncoderConfig
|
4 |
import os
|
|
|
13 |
from transformers.modeling_utils import PreTrainedModel
|
14 |
from transformers.utils import (
|
15 |
ModelOutput,
|
|
|
16 |
logging,
|
17 |
|
18 |
)
|
|
|
33 |
_supports_sdpa = True
|
34 |
_supports_cache_class = True
|
35 |
|
|
|
|
|
|
|
36 |
|
37 |
|
38 |
def _init_weights(self, module):
|
|
|
77 |
prediction_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
|
78 |
Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
|
79 |
seq_relationship_logits (`torch.FloatTensor` of shape `(batch_size, 2)`):
|
80 |
+
Prediction scores of the in context classification (classification) head (scores of True/False continuation
|
81 |
before SoftMax).
|
82 |
hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
|
83 |
Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of
|
|
|
245 |
config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked),
|
246 |
the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
|
247 |
next_sentence_label (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
248 |
+
This label is assigned to the in context loss:
|
249 |
+
- 0 indicates sequence B belongs to the same repository of A,
|
250 |
+
- 1 indicates sequence B is a random repository.
|
|
|
|
|
251 |
kwargs (`Dict[str, any]`, optional, defaults to *{}*):
|
252 |
Used to hide legacy arguments that have been deprecated.
|
253 |
|