Update modeling_custom.py
Browse files- modeling_custom.py +2 -2
modeling_custom.py
CHANGED
@@ -160,7 +160,7 @@ class LlamaForRewardModelWithGating(LlamaPreTrainedModel):
|
|
160 |
|
161 |
gating_token_positions = [find_token_for_gating(ids.tolist()) for ids in input_ids]
|
162 |
prompt_embedding = tokens_hidden_states[dummy_iterator, gating_token_positions, :]
|
163 |
-
gating_output = self.gating(prompt_embedding
|
164 |
|
165 |
#with torch.autocast(device_type=rewards.device.type, dtype=torch.float32):
|
166 |
# [B, num_quantiles, num_objectives]
|
@@ -171,7 +171,7 @@ class LlamaForRewardModelWithGating(LlamaPreTrainedModel):
|
|
171 |
|
172 |
rewards_expectation = rewards.float().mean(dim=2)
|
173 |
|
174 |
-
score = torch.sum(gating_output.float() * rewards_expectation, dim=1, keepdim=True)
|
175 |
|
176 |
return CustomOutput(
|
177 |
# reward_quantiles=reward_quantiles,
|
|
|
160 |
|
161 |
gating_token_positions = [find_token_for_gating(ids.tolist()) for ids in input_ids]
|
162 |
prompt_embedding = tokens_hidden_states[dummy_iterator, gating_token_positions, :]
|
163 |
+
gating_output = self.gating(prompt_embedding)
|
164 |
|
165 |
#with torch.autocast(device_type=rewards.device.type, dtype=torch.float32):
|
166 |
# [B, num_quantiles, num_objectives]
|
|
|
171 |
|
172 |
rewards_expectation = rewards.float().mean(dim=2)
|
173 |
|
174 |
+
score = torch.sum(gating_output.float() * rewards_expectation.float(), dim=1, keepdim=True)
|
175 |
|
176 |
return CustomOutput(
|
177 |
# reward_quantiles=reward_quantiles,
|