Source code for mygrad.nnet.activations.glu

from typing import Optional

from numpy import ndarray

from mygrad.math.arithmetic.funcs import multiply
from mygrad.tensor_base import Tensor
from mygrad.typing import ArrayLike

from .sigmoid import sigmoid

[docs]def glu(x: ArrayLike, axis: int = -1, *, constant: Optional[bool] = None) -> Tensor: """Returns the Gated Linear Unit A * σ(B), where A and B are split from `x`. Parameters ---------- x : ArrayLike The input. axis : int, optional (default=-1) The axis along which to split the input in half and apply the GLU. constant : Optional[bool] If ``True``, the returned tensor is a constant (it does not back-propagate a gradient). Returns ------- mygrad.Tensor The result of applying the Gated Linear Unit elementwise to the input. Notes ----- The Gated Linear Unit was proposed in the paper "Language Modeling with Gated Convolutional Networks" Yann Dauphin, Angela Fan, Michael Auli, David Grangier available at The GLU operation splits the input `x` in half along `axis`, storing the first half in A and the second in B. The return value is then A ⊙ σ(B), where ⊙ is elementwise multiplication and σ is the sigmoid function. Examples -------- >>> import mygrad as mg >>> from mygrad.nnet.activations import glu >>> x = mg.arange(-5., 5.) >>> x Tensor([-5., -4., -3., -2., -1., 0., 1., 2., 3., 4.]) >>> y = glu(x); y Tensor([-2.5 , -2.92423431, -2.64239123, -1.90514825, -0.98201379]) >>> y.backward() >>> x.grad array([ 0, 0, 0, 0, 0, -1, 0, 0, 0, 0]) """ if isinstance(axis, (ndarray, Tensor)): axis = axis.item() if not isinstance(axis, int): raise TypeError( f"`axis` must be an integer-valued scalar, got {axis} (type {type(axis)})" ) first_idx = list(slice(None) for _ in x.shape) second_idx = list(slice(None) for _ in x.shape) first_idx[axis] = slice(0, x.shape[axis] // 2) second_idx[axis] = slice(x.shape[axis] // 2, None) first_half = x[tuple(first_idx)] second_half = x[tuple(second_idx)] if first_half.shape != second_half.shape: raise ValueError( f"The shapes after splitting must be the same but got {first_half.shape} " "and {second_half.shape}" ) return multiply(first_half, sigmoid(second_half), constant=constant)