Source code for mygrad.nnet.initializers.dirac
from typing import Optional
import numpy as np
from mygrad.tensor_base import Tensor
[docs]def dirac(*shape: int, dtype=np.float32, constant: Optional[bool] = None) -> Tensor:
"""Initialize a :class:`mygrad.Tensor` according to the Dirac initialization procedure described
by Zagoruyko and Komodakis.
Parameters
----------
shape : Sequence[int]
The shape of the output Tensor. Note that ``shape`` must be at least two-dimensional.
dtype : data-type, optional (default=float32)
The data type of the output tensor.
constant : Optional[bool]
If ``True``, this tensor is treated as a constant, and thus does not
facilitate back propagation (i.e. ``constant.grad`` will always return
``None``).
Defaults to ``False`` for float-type data.
Defaults to ``True`` for integer-type data.
Integer-type tensors must be constant.
Returns
-------
mygrad.Tensor, shape=``shape``
A Tensor, with values initialized according to the Dirac initialization.
Extended Description
--------------------
Zagoruyko and Komodakis put forward the Dirac initialization in the paper
"DiracNets: Training Very Deep Neural Networks without Skip Connections"
https://arxiv.org/abs/1706.00388
A Tensor I initialized via this should satisfy:
I ⋆ x = x
for compatible tensors ``x``, where ``⋆`` indicates convolution. Note that this does not
guarantee that the convolution will produce ``x``, but it will preserve as many channels of
the input as possible.
"""
if len(shape) == 1:
shape = shape[0]
if len(shape) < 2:
raise ValueError("Dirac initialization requires at least two dimensions")
tensor = np.zeros(shape, dtype=dtype)
minimum_depth = np.minimum(shape[0], shape[1]) # out dim, in dim
depths = range(minimum_depth)
trailing_indices = ([i // 2] * len(depths) for i in tensor.shape[2:])
# tensor[i, i, k1//2, k2//2, ..., kn//2] for each i in min(shape[0], shape[1]
# where the k values are the spatial dimensions of `tensor`
tensor[(depths, depths, *trailing_indices)] = 1
return Tensor(tensor, constant=constant)