BAAI
/

shunxing1234 commited on
Commit
72802bf
·
1 Parent(s): 9a53f41

Update modeling_aquila.py

Browse files
Files changed (1) hide show
  1. modeling_aquila.py +1 -1
modeling_aquila.py CHANGED
@@ -346,7 +346,7 @@ class AquilaAttention(nn.Module):
346
  value_states = repeat_kv(value_states, self.num_key_value_groups)
347
 
348
  attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim)
349
-
350
  if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len):
351
  raise ValueError(
352
  f"Attention weights should be of size {(bsz, self.num_heads, q_len, kv_seq_len)}, but is"
 
346
  value_states = repeat_kv(value_states, self.num_key_value_groups)
347
 
348
  attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim)
349
+ attn_weights = torch.clamp(attn_weights, min=-1024., max=1024.)
350
  if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len):
351
  raise ValueError(
352
  f"Attention weights should be of size {(bsz, self.num_heads, q_len, kv_seq_len)}, but is"