Update modeling_bert.py
Browse files- modeling_bert.py +1 -1
modeling_bert.py
CHANGED
|
@@ -387,7 +387,7 @@ class BertSelfAttention(nn.Module):
|
|
| 387 |
# Normalize the attention scores to probabilities.
|
| 388 |
#attention_probs = nn.functional.softmax(attention_scores, dim=-1)
|
| 389 |
attention_probs = softmax_1(attention_scores, dim=-1)
|
| 390 |
-
|
| 391 |
|
| 392 |
# This is actually dropping out entire tokens to attend to, which might
|
| 393 |
# seem a bit unusual, but is taken from the original Transformer paper.
|
|
|
|
| 387 |
# Normalize the attention scores to probabilities.
|
| 388 |
#attention_probs = nn.functional.softmax(attention_scores, dim=-1)
|
| 389 |
attention_probs = softmax_1(attention_scores, dim=-1)
|
| 390 |
+
|
| 391 |
|
| 392 |
# This is actually dropping out entire tokens to attend to, which might
|
| 393 |
# seem a bit unusual, but is taken from the original Transformer paper.
|