Skip to content

Commit 3549460

Browse files
committed
activations
1 parent 22c5add commit 3549460

File tree

13 files changed

+57
-422
lines changed

13 files changed

+57
-422
lines changed

paddlenlp/transformers/albert/modeling.py

Lines changed: 5 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,20 @@
1515
"""Modeling classes for ALBERT model."""
1616

1717
import math
18-
from typing import Optional, Tuple, List
19-
from dataclasses import dataclass
18+
from typing import Optional, Tuple
19+
2020
import paddle
2121
import paddle.nn as nn
2222
import paddle.nn.functional as F
2323
from paddle.nn import Layer
24+
2425
from .. import PretrainedModel, register_base_model
26+
from ..activations import ACT2FN
2527
from ..model_outputs import (
2628
BaseModelOutput,
27-
ModelOutput,
2829
BaseModelOutputWithPooling,
2930
MaskedLMOutput,
31+
ModelOutput,
3032
MultipleChoiceModelOutput,
3133
QuestionAnsweringModelOutput,
3234
SequenceClassifierOutput,
@@ -48,45 +50,6 @@
4850
dtype_float = paddle.get_default_dtype()
4951

5052

51-
def get_activation(activation_string):
52-
if activation_string in ACT2FN:
53-
return ACT2FN[activation_string]
54-
else:
55-
raise KeyError("function {} not found in ACT2FN mapping {}".format(activation_string, list(ACT2FN.keys())))
56-
57-
58-
def mish(x):
59-
return x * F.tanh(F.softplus(x))
60-
61-
62-
def linear_act(x):
63-
return x
64-
65-
66-
def swish(x):
67-
return x * F.sigmoid(x)
68-
69-
70-
def gelu_new(x):
71-
"""
72-
Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT). Also see
73-
the Gaussian Error Linear Units paper: https://arxiv.org/abs/1606.08415
74-
"""
75-
return 0.5 * x * (1.0 + paddle.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * paddle.pow(x, 3.0))))
76-
77-
78-
ACT2FN = {
79-
"relu": F.relu,
80-
"gelu": F.gelu,
81-
"gelu_new": gelu_new,
82-
"tanh": F.tanh,
83-
"sigmoid": F.sigmoid,
84-
"mish": mish,
85-
"linear": linear_act,
86-
"swish": swish,
87-
}
88-
89-
9053
class AlbertForPreTrainingOutput(ModelOutput):
9154
"""
9255
Output type of [`AlbertForPreTraining`].

paddlenlp/transformers/bigbird/modeling.py

Lines changed: 4 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,14 @@
1212
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
15-
import math
16-
17-
import numpy as np
1815
import paddle
19-
from paddle.nn import Linear, Dropout, LayerNorm, LayerList, Layer
20-
import paddle.nn.functional as F
2116
import paddle.nn as nn
17+
import paddle.nn.functional as F
18+
from paddle.nn import Dropout, Layer, LayerList, LayerNorm, Linear
2219

23-
from ..attention_utils import _convert_param_attr_to_list, MultiHeadAttention, AttentionRegistry
2420
from .. import PretrainedModel, register_base_model
21+
from ..activations import ACT2FN
22+
from ..attention_utils import MultiHeadAttention, _convert_param_attr_to_list
2523

2624
__all__ = [
2725
"BigBirdModel",
@@ -38,38 +36,6 @@
3836
]
3937

4038

41-
def mish(x):
42-
return x * F.tanh(F.softplus(x))
43-
44-
45-
def linear_act(x):
46-
return x
47-
48-
49-
def swish(x):
50-
return x * F.sigmoid(x)
51-
52-
53-
def gelu_new(x):
54-
"""
55-
Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT). Also see
56-
the Gaussian Error Linear Units paper: https://arxiv.org/abs/1606.08415
57-
"""
58-
return 0.5 * x * (1.0 + paddle.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * paddle.pow(x, 3.0))))
59-
60-
61-
ACT2FN = {
62-
"relu": F.relu,
63-
"gelu": F.gelu,
64-
"gelu_new": gelu_new,
65-
"tanh": F.tanh,
66-
"sigmoid": F.sigmoid,
67-
"mish": mish,
68-
"linear": linear_act,
69-
"swish": swish,
70-
}
71-
72-
7339
class TransformerEncoderLayer(Layer):
7440
def __init__(
7541
self,

paddlenlp/transformers/electra/modeling.py

Lines changed: 8 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -13,23 +13,22 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515

16-
from typing import Optional, Tuple
17-
from dataclasses import dataclass
16+
from typing import Optional
17+
1818
import paddle
19-
from paddle import Tensor
2019
import paddle.nn as nn
2120
import paddle.nn.functional as F
22-
from paddle.nn import TransformerEncoderLayer, TransformerEncoder
23-
from paddle.nn.layer.transformer import _convert_attention_mask
21+
from paddle import Tensor
22+
from paddle.nn import TransformerEncoder, TransformerEncoderLayer
2423

2524
from .. import PretrainedModel, register_base_model
25+
from ..activations import get_activation
2626
from ..model_outputs import (
27-
BaseModelOutputWithPastAndCrossAttentions,
27+
MaskedLMOutput,
28+
MultipleChoiceModelOutput,
29+
QuestionAnsweringModelOutput,
2830
SequenceClassifierOutput,
2931
TokenClassifierOutput,
30-
QuestionAnsweringModelOutput,
31-
MultipleChoiceModelOutput,
32-
MaskedLMOutput,
3332
tuple_output,
3433
)
3534

@@ -53,36 +52,6 @@
5352
]
5453

5554

56-
def get_activation(activation_string):
57-
if activation_string in ACT2FN:
58-
return ACT2FN[activation_string]
59-
else:
60-
raise KeyError("function {} not found in ACT2FN mapping {}".format(activation_string, list(ACT2FN.keys())))
61-
62-
63-
def mish(x):
64-
return x * F.tanh(F.softplus(x))
65-
66-
67-
def linear_act(x):
68-
return x
69-
70-
71-
def swish(x):
72-
return x * F.sigmoid(x)
73-
74-
75-
ACT2FN = {
76-
"relu": F.relu,
77-
"gelu": F.gelu,
78-
"tanh": F.tanh,
79-
"sigmoid": F.sigmoid,
80-
"mish": mish,
81-
"linear": linear_act,
82-
"swish": swish,
83-
}
84-
85-
8655
class ElectraEmbeddings(nn.Layer):
8756
"""Construct the embeddings from word, position and token_type embeddings."""
8857

paddlenlp/transformers/fnet/modeling.py

Lines changed: 2 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,12 @@
1313
# limitations under the License.
1414
"""Modeling classes for FNet model."""
1515

16-
import math
1716
import paddle
1817
import paddle.nn as nn
19-
import paddle.nn.functional as F
20-
from functools import partial
2118
from paddle.nn import Layer
19+
2220
from .. import PretrainedModel, register_base_model
21+
from ..activations import ACT2FN
2322

2423
__all__ = [
2524
"FNetPretrainedModel",
@@ -34,45 +33,6 @@
3433
]
3534

3635

37-
def get_activation(activation_string):
38-
if activation_string in ACT2FN:
39-
return ACT2FN[activation_string]
40-
else:
41-
raise KeyError("function {} not found in ACT2FN mapping {}".format(activation_string, list(ACT2FN.keys())))
42-
43-
44-
def mish(x):
45-
return x * F.tanh(F.softplus(x))
46-
47-
48-
def linear_act(x):
49-
return x
50-
51-
52-
def swish(x):
53-
return x * F.sigmoid(x)
54-
55-
56-
def gelu_new(x):
57-
"""
58-
Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT). Also see
59-
the Gaussian Error Linear Units paper: https://arxiv.org/abs/1606.08415
60-
"""
61-
return 0.5 * x * (1.0 + paddle.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * paddle.pow(x, 3.0))))
62-
63-
64-
ACT2FN = {
65-
"relu": F.relu,
66-
"gelu": F.gelu,
67-
"gelu_new": gelu_new,
68-
"tanh": F.tanh,
69-
"sigmoid": F.sigmoid,
70-
"mish": mish,
71-
"linear": linear_act,
72-
"swish": swish,
73-
}
74-
75-
7636
class FNetBasicOutput(Layer):
7737
def __init__(self, hidden_size, layer_norm_eps):
7838
super().__init__()

0 commit comments

Comments
 (0)