Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 5 additions & 42 deletions paddlenlp/transformers/albert/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,20 @@
"""Modeling classes for ALBERT model."""

import math
from typing import Optional, Tuple, List
from dataclasses import dataclass
from typing import Optional, Tuple

import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Layer

from .. import PretrainedModel, register_base_model
from ..activations import ACT2FN
from ..model_outputs import (
BaseModelOutput,
ModelOutput,
BaseModelOutputWithPooling,
MaskedLMOutput,
ModelOutput,
MultipleChoiceModelOutput,
QuestionAnsweringModelOutput,
SequenceClassifierOutput,
Expand All @@ -48,45 +50,6 @@
dtype_float = paddle.get_default_dtype()


def get_activation(activation_string):
if activation_string in ACT2FN:
return ACT2FN[activation_string]
else:
raise KeyError("function {} not found in ACT2FN mapping {}".format(activation_string, list(ACT2FN.keys())))


def mish(x):
return x * F.tanh(F.softplus(x))


def linear_act(x):
return x


def swish(x):
return x * F.sigmoid(x)


def gelu_new(x):
"""
Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT). Also see
the Gaussian Error Linear Units paper: https://arxiv.org/abs/1606.08415
"""
return 0.5 * x * (1.0 + paddle.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * paddle.pow(x, 3.0))))


ACT2FN = {
"relu": F.relu,
"gelu": F.gelu,
"gelu_new": gelu_new,
"tanh": F.tanh,
"sigmoid": F.sigmoid,
"mish": mish,
"linear": linear_act,
"swish": swish,
}


class AlbertForPreTrainingOutput(ModelOutput):
"""
Output type of [`AlbertForPreTraining`].
Expand Down
42 changes: 4 additions & 38 deletions paddlenlp/transformers/bigbird/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,14 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math

import numpy as np
import paddle
from paddle.nn import Linear, Dropout, LayerNorm, LayerList, Layer
import paddle.nn.functional as F
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Dropout, Layer, LayerList, LayerNorm, Linear

from ..attention_utils import _convert_param_attr_to_list, MultiHeadAttention, AttentionRegistry
from .. import PretrainedModel, register_base_model
from ..activations import ACT2FN
from ..attention_utils import MultiHeadAttention, _convert_param_attr_to_list

__all__ = [
"BigBirdModel",
Expand All @@ -38,38 +36,6 @@
]


def mish(x):
return x * F.tanh(F.softplus(x))


def linear_act(x):
return x


def swish(x):
return x * F.sigmoid(x)


def gelu_new(x):
"""
Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT). Also see
the Gaussian Error Linear Units paper: https://arxiv.org/abs/1606.08415
"""
return 0.5 * x * (1.0 + paddle.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * paddle.pow(x, 3.0))))


ACT2FN = {
"relu": F.relu,
"gelu": F.gelu,
"gelu_new": gelu_new,
"tanh": F.tanh,
"sigmoid": F.sigmoid,
"mish": mish,
"linear": linear_act,
"swish": swish,
}


class TransformerEncoderLayer(Layer):
def __init__(
self,
Expand Down
2 changes: 1 addition & 1 deletion paddlenlp/transformers/codegen/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@
from ...utils.env import CONFIG_NAME
from ...utils.log import logger
from .. import PretrainedModel, register_base_model
from ..activations import ACT2FN
from ..model_outputs import (
BaseModelOutputWithPastAndCrossAttentions,
CausalLMOutputWithCrossAttentions,
)
from ..nezha.modeling import ACT2FN
from .configuration import (
CODEGEN_PRETRAINED_INIT_CONFIGURATION,
CODEGEN_PRETRAINED_RESOURCE_FILES_MAP,
Expand Down
3 changes: 2 additions & 1 deletion paddlenlp/transformers/convbert/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@
import paddle.nn.functional as F
from paddle import tensor
from paddle.nn import Layer
from ..electra.modeling import get_activation

from .. import PretrainedModel, register_base_model
from ..activations import get_activation

__all__ = [
"ConvBertModel",
Expand Down
47 changes: 8 additions & 39 deletions paddlenlp/transformers/electra/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,22 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Optional, Tuple
from dataclasses import dataclass
from typing import Optional

import paddle
from paddle import Tensor
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import TransformerEncoderLayer, TransformerEncoder
from paddle.nn.layer.transformer import _convert_attention_mask
from paddle import Tensor
from paddle.nn import TransformerEncoder, TransformerEncoderLayer

from .. import PretrainedModel, register_base_model
from ..activations import get_activation
from ..model_outputs import (
BaseModelOutputWithPastAndCrossAttentions,
MaskedLMOutput,
MultipleChoiceModelOutput,
QuestionAnsweringModelOutput,
SequenceClassifierOutput,
TokenClassifierOutput,
QuestionAnsweringModelOutput,
MultipleChoiceModelOutput,
MaskedLMOutput,
tuple_output,
)

Expand All @@ -53,36 +52,6 @@
]


def get_activation(activation_string):
if activation_string in ACT2FN:
return ACT2FN[activation_string]
else:
raise KeyError("function {} not found in ACT2FN mapping {}".format(activation_string, list(ACT2FN.keys())))


def mish(x):
return x * F.tanh(F.softplus(x))


def linear_act(x):
return x


def swish(x):
return x * F.sigmoid(x)


ACT2FN = {
"relu": F.relu,
"gelu": F.gelu,
"tanh": F.tanh,
"sigmoid": F.sigmoid,
"mish": mish,
"linear": linear_act,
"swish": swish,
}


class ElectraEmbeddings(nn.Layer):
"""Construct the embeddings from word, position and token_type embeddings."""

Expand Down
44 changes: 2 additions & 42 deletions paddlenlp/transformers/fnet/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,12 @@
# limitations under the License.
"""Modeling classes for FNet model."""

import math
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from functools import partial
from paddle.nn import Layer

from .. import PretrainedModel, register_base_model
from ..activations import ACT2FN

__all__ = [
"FNetPretrainedModel",
Expand All @@ -34,45 +33,6 @@
]


def get_activation(activation_string):
if activation_string in ACT2FN:
return ACT2FN[activation_string]
else:
raise KeyError("function {} not found in ACT2FN mapping {}".format(activation_string, list(ACT2FN.keys())))


def mish(x):
return x * F.tanh(F.softplus(x))


def linear_act(x):
return x


def swish(x):
return x * F.sigmoid(x)


def gelu_new(x):
"""
Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT). Also see
the Gaussian Error Linear Units paper: https://arxiv.org/abs/1606.08415
"""
return 0.5 * x * (1.0 + paddle.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * paddle.pow(x, 3.0))))


ACT2FN = {
"relu": F.relu,
"gelu": F.gelu,
"gelu_new": gelu_new,
"tanh": F.tanh,
"sigmoid": F.sigmoid,
"mish": mish,
"linear": linear_act,
"swish": swish,
}


class FNetBasicOutput(Layer):
def __init__(self, hidden_size, layer_norm_eps):
super().__init__()
Expand Down
Loading