nicolinho commited on
Commit
c04147c
·
verified ·
1 Parent(s): 1b1d3a1

Update modeling_custom.py

Browse files
Files changed (1) hide show
  1. modeling_custom.py +2 -2
modeling_custom.py CHANGED
@@ -160,7 +160,7 @@ class LlamaForRewardModelWithGating(LlamaPreTrainedModel):
160
 
161
  gating_token_positions = [find_token_for_gating(ids.tolist()) for ids in input_ids]
162
  prompt_embedding = tokens_hidden_states[dummy_iterator, gating_token_positions, :]
163
- gating_output = self.gating(prompt_embedding.float())
164
 
165
  #with torch.autocast(device_type=rewards.device.type, dtype=torch.float32):
166
  # [B, num_quantiles, num_objectives]
@@ -171,7 +171,7 @@ class LlamaForRewardModelWithGating(LlamaPreTrainedModel):
171
 
172
  rewards_expectation = rewards.float().mean(dim=2)
173
 
174
- score = torch.sum(gating_output.float() * rewards_expectation, dim=1, keepdim=True)
175
 
176
  return CustomOutput(
177
  # reward_quantiles=reward_quantiles,
 
160
 
161
  gating_token_positions = [find_token_for_gating(ids.tolist()) for ids in input_ids]
162
  prompt_embedding = tokens_hidden_states[dummy_iterator, gating_token_positions, :]
163
+ gating_output = self.gating(prompt_embedding)
164
 
165
  #with torch.autocast(device_type=rewards.device.type, dtype=torch.float32):
166
  # [B, num_quantiles, num_objectives]
 
171
 
172
  rewards_expectation = rewards.float().mean(dim=2)
173
 
174
+ score = torch.sum(gating_output.float() * rewards_expectation.float(), dim=1, keepdim=True)
175
 
176
  return CustomOutput(
177
  # reward_quantiles=reward_quantiles,