from typing import Optional, Union
import numpy as np
from numpy import ndarray
import mygrad as mg
from mygrad.math.misc.ops import MatMul
from mygrad.tensor_base import Tensor, implements_numpy_override
from mygrad.typing import ArrayLike, DTypeLikeReals, Mask
from mygrad.ufuncs import ufunc_creator
from .ops import Abs, Cbrt, Maximum, Minimum, Sqrt
__all__ = [
"abs",
"absolute",
"cbrt",
"clip",
"sqrt",
"maximum",
"minimum",
"matmul",
"multi_matmul",
]
@ufunc_creator(Abs)
def absolute(
x: ArrayLike,
out: Optional[Union[ndarray, Tensor]] = None,
*,
where: Mask = True,
dtype: DTypeLikeReals = None,
constant: Optional[bool] = None,
nan_to_num: bool = True,
) -> Tensor: # pragma: no cover
"""The absolute value, computed elementwise.
This docstring was adapted from that of numpy.absolute [1]_
Parameters
----------
x : ArrayLike
Input array.
out : Optional[Union[Tensor, ndarray]]
A location into which the result is stored. If provided, it must have
a shape that the inputs broadcast to. If not provided or None,
a freshly-allocated tensor is returned.
constant : Optional[bool]
If ``True``, this tensor is treated as a constant, and thus does not
facilitate back propagation (i.e. ``constant.grad`` will always return
``None``).
Defaults to ``False`` for float-type data.
Defaults to ``True`` for integer-type data.
Integer-type tensors must be constant.
nan_to_num : bool, optional (default=True)
If `True` then gradients that would store nans due to the presence of
zeros in `x` will instead store zeros in those places.
where : Mask
This condition is broadcast over the input. At locations where the
condition is True, the ``out`` tensor will be set to the ufunc result.
Elsewhere, the ``out`` tensor will retain its original value.
Note that if an uninitialized `out` tensor is created via the default
``out=None``, locations within it where the condition is False will
remain uninitialized.
dtype : Optional[DTypeLikeReals]
The dtype of the resulting tensor.
Returns
-------
absolute : Tensor
An ndarray containing the absolute value of
each element in `x`.
References
----------
.. [1] Retrieved from https://numpy.org/doc/stable/reference/generated/numpy.absolute.html
Examples
--------
>>> import mygrad as mg
>>> x = mg.array([-1.2, 1.2])
>>> mg.absolute([-1.2, 1.2])
Tensor([ 1.2, 1.2])
The absolute-value function is not differentiable at `x=0.0`.
By default the derivative at this point is treated as 0.
>>> x = mg.tensor([-2.0, 0.0, 2.0])
>>> mg.absolute(x).backward()
>>> x.grad
np.array([-1., 0., 1.])
However a more rigorous behavior can be enabled such that the
undefined derivative will be returned as `nan`.
>>> x = mg.tensor([-2.0, 0.0, 2.0])
>>> mg.absolute(x, nan_to_num=False).backward()
>>> x.grad
np.array([-1., nan, 1.])
Plot the function and its derivate over ``[-10, 10]``:
.. plot::
>>> import mygrad as mg
>>> import matplotlib.pyplot as plt
>>> x = mg.linspace(-5, 5, 100)
>>> y = mg.absolute(x)
>>> plt.title("absolute(x)")
>>> y.backward()
>>> plt.plot(x, x.grad, label="df/dx")
>>> plt.plot(x, y, label="f(x)")
>>> plt.legend()
>>> plt.grid()
>>> plt.show()
"""
...
abs = absolute
@ufunc_creator(Sqrt)
def sqrt(
x: ArrayLike,
out: Optional[Union[ndarray, Tensor]] = None,
*,
where: Mask = True,
dtype: DTypeLikeReals = None,
constant: Optional[bool] = None,
) -> Tensor: # pragma: no cover
"""The square root, elementwise.
This docstring was adapted from that of numpy.sqrt [1]_
Parameters
----------
x : ArrayLike
The values whose square-roots are required.
out : Optional[Union[Tensor, ndarray]]
A location into which the result is stored. If provided, it must have
a shape that the inputs broadcast to. If not provided or None,
a freshly-allocated tensor is returned.
constant : Optional[bool]
If ``True``, this tensor is treated as a constant, and thus does not
facilitate back propagation (i.e. ``constant.grad`` will always return
``None``).
Defaults to ``False`` for float-type data.
Defaults to ``True`` for integer-type data.
Integer-type tensors must be constant.
where : Mask
This condition is broadcast over the input. At locations where the
condition is True, the ``out`` tensor will be set to the ufunc result.
Elsewhere, the ``out`` tensor will retain its original value.
Note that if an uninitialized `out` tensor is created via the default
``out=None``, locations within it where the condition is False will
remain uninitialized.
dtype : Optional[DTypeLikeReals]
The dtype of the resulting tensor.
Returns
-------
y : ndarray
A tensor of the same shape as `x`, containing the positive
square-root of each element in `x`. Negative-valued inputs
produce nans.
Notes
-----
*sqrt* has--consistent with common convention--as its branch cut the
real "interval" [`-inf`, 0), and is continuous from above on it.
A branch cut is a curve in the complex plane across which a given
complex function fails to be continuous.
References
----------
.. [1] Retrieved from https://numpy.org/doc/stable/reference/generated/numpy.sqrt.html
Examples
--------
>>> import mygrad as mg
>>> mg.sqrt([1, 4, 9])
Tensor([ 1., 2., 3.])
>>> mg.sqrt([4, -1, mg.inf])
Tensor([ 2., nan, inf])
"""
...
@ufunc_creator(Cbrt)
def cbrt(
x: ArrayLike,
out: Optional[Union[ndarray, Tensor]] = None,
*,
where: Mask = True,
dtype: DTypeLikeReals = None,
constant: Optional[bool] = None,
) -> Tensor: # pragma: no cover
"""The cube root elementwise.
This docstring was adapted from that of numpy.cbrt [1]_
Parameters
----------
x : ArrayLike
The values whose cube-roots are computed.
out : Optional[Union[Tensor, ndarray]]
A location into which the result is stored. If provided, it must have
a shape that the inputs broadcast to. If not provided or None,
a freshly-allocated tensor is returned.
constant : Optional[bool]
If ``True``, this tensor is treated as a constant, and thus does not
facilitate back propagation (i.e. ``constant.grad`` will always return
``None``).
Defaults to ``False`` for float-type data.
Defaults to ``True`` for integer-type data.
Integer-type tensors must be constant.
where : Mask
This condition is broadcast over the input. At locations where the
condition is True, the ``out`` tensor will be set to the ufunc result.
Elsewhere, the ``out`` tensor will retain its original value.
Note that if an uninitialized `out` tensor is created via the default
``out=None``, locations within it where the condition is False will
remain uninitialized.
dtype : Optional[DTypeLikeReals]
The dtype of the resulting tensor.
Returns
-------
y : ndarray
A tensor of the same shape as `x`, containing the cube
cube-root of each element in `x`.
References
----------
.. [1] Retrieved from https://numpy.org/doc/stable/reference/generated/numpy.cbrt.html
Examples
--------
>>> import mygrad as mg
>>> mg.cbrt([1, 8, 27])
Tensor([ 1., 2., 3.])
"""
...
@ufunc_creator(Maximum)
def maximum(
x1: ArrayLike,
x2: ArrayLike,
out: Optional[Union[ndarray, Tensor]] = None,
*,
where: Mask = True,
dtype: DTypeLikeReals = None,
constant: Optional[bool] = None,
) -> Tensor: # pragma: no cover
"""Pair-wise maximum of tensor elements.
This docstring was adapted from that of numpy.maximum [1]_
Parameters
----------
x1, x2 : ArrayLike
The tensors holding the elements to be compared.
If ``x1.shape != x2.shape``, they must be broadcastable to a common
shape (which becomes the shape of the output).
out : Optional[Union[Tensor, ndarray]]
A location into which the result is stored. If provided, it must have
a shape that the inputs broadcast to. If not provided or None,
a freshly-allocated tensor is returned.
constant : Optional[bool]
If ``True``, this tensor is treated as a constant, and thus does not
facilitate back propagation (i.e. ``constant.grad`` will always return
``None``).
Defaults to ``False`` for float-type data.
Defaults to ``True`` for integer-type data.
Integer-type tensors must be constant.
where : Mask
This condition is broadcast over the input. At locations where the
condition is True, the ``out`` tensor will be set to the ufunc result.
Elsewhere, the ``out`` tensor will retain its original value.
Note that if an uninitialized `out` tensor is created via the default
``out=None``, locations within it where the condition is False will
remain uninitialized.
dtype : Optional[DTypeLikeReals]
The dtype of the resulting tensor.
Returns
-------
y : Tensor
The maximum of `x1` and `x2`, element-wise.
See Also
--------
minimum :
Element-wise minimum of two arrays, propagates NaNs.
Notes
-----
The maximum is equivalent to ``mg.where(x1 >= x2, x1, x2)`` when
neither x1 nor x2 are nans, but it is faster and does proper
broadcasting.
References
----------
.. [1] Retrieved from https://numpy.org/doc/stable/reference/generated/numpy.maximum.html
Examples
--------
>>> import mygrad as mg
>>> mg.maximum([2, 3, 4], [1, 5, 2])
Tensor([2, 5, 4])
>>> mg.maximum(mg.eye(2), [0.5, 2]) # broadcasting
Tensor([[ 1. , 2. ],
[ 0.5, 2. ]])
>>> mg.maximum([mg.nan, 0, mg.nan], [0, mg.nan, mg.nan])
Tensor([nan, nan, nan])
>>> mg.maximum(mg.Inf, 1)
Tensor(inf)
"""
...
@ufunc_creator(Minimum)
def minimum(
x1: ArrayLike,
x2: ArrayLike,
out: Optional[Union[ndarray, Tensor]] = None,
*,
where: Mask = True,
dtype: DTypeLikeReals = None,
constant: Optional[bool] = None,
) -> Tensor: # pragma: no cover
"""Pair-wise minimum of tensor elements.
This docstring was adapted from that of numpy.minimum [1]_
Parameters
----------
x1, x2 : ArrayLike
The tensors holding the elements to be compared.
If ``x1.shape != x2.shape``, they must be broadcastable to a common
shape (which becomes the shape of the output).
out : Optional[Union[Tensor, ndarray]]
A location into which the result is stored. If provided, it must have
a shape that the inputs broadcast to. If not provided or None,
a freshly-allocated tensor is returned.
constant : Optional[bool]
If ``True``, this tensor is treated as a constant, and thus does not
facilitate back propagation (i.e. ``constant.grad`` will always return
``None``).
Defaults to ``False`` for float-type data.
Defaults to ``True`` for integer-type data.
Integer-type tensors must be constant.
where : Mask
This condition is broadcast over the input. At locations where the
condition is True, the ``out`` tensor will be set to the ufunc result.
Elsewhere, the ``out`` tensor will retain its original value.
Note that if an uninitialized `out` tensor is created via the default
``out=None``, locations within it where the condition is False will
remain uninitialized.
dtype : Optional[DTypeLikeReals]
The dtype of the resulting tensor.
Returns
-------
y : Tensor
The minimum of `x1` and `x2`, element-wise.
See Also
--------
maximum :
Element-wise maximum of two arrays, propagates NaNs.
Notes
-----
The minimum is equivalent to ``mg.where(x1 <= x2, x1, x2)`` when
neither x1 nor x2 are NaNs, but it is faster and does proper
broadcasting.
References
----------
.. [1] Retrieved from https://numpy.org/doc/stable/reference/generated/numpy.minimum.html
Examples
--------
>>> import mygrad as mg
>>> mg.minimum([2, 3, 4], [1, 5, 2])
Tensor([1, 3, 2])
>>> mg.minimum(mg.eye(2), [0.5, 2]) # broadcasting
Tensor([[ 0.5, 0. ],
[ 0. , 1. ]])
>>> mg.minimum([mg.nan, 0, mg.nan],[0, mg.nan, mg.nan])
Tensor([nan, nan, nan])
>>> mg.minimum(-mg.Inf, 1)
Tensor(-inf)
"""
...
[docs]@implements_numpy_override()
def clip(
a: ArrayLike,
a_min: Union[ArrayLike, None],
a_max: Union[ArrayLike, None],
out: Optional[Union[np.ndarray, Tensor]] = None,
*,
constant: Optional[bool] = None,
) -> Tensor:
"""Clip (limit) the values in an array.
Given an interval, values outside the interval are clipped to
the interval edges. For example, if an interval of ``[0, 1]``
is specified, values smaller than 0 become 0, and values larger
than 1 become 1.
Equivalent to `mg.minimum(a_max, mg.maximum(a, a_min))``.
No check is performed to ensure ``a_min < a_max``.
This docstring was adapted from that of `numpy.clip`
Parameters
----------
a : ArrayLike
Array containing elements to clip.
a_min : Optional[float, ArrayLike]
Minimum value. If `None`, clipping is not performed on lower
interval edge. Not more than one of `a_min` and `a_max` may be
`None`.
a_max : Optional[float, ArrayLike]
Maximum value. If `None`, clipping is not performed on upper
interval edge. Not more than one of `a_min` and `a_max` may be
`None`. If `a_min` or `a_max` are ArrayLike, then the three
arrays will be broadcasted to match their shapes.
out : Optional[Union[ndarray, Tensor]]
A location into which the result is stored. If provided, it must have
a shape that the inputs broadcast to. If not provided or None, a
freshly-allocated tensor is returned.
constant : bool, optional(default=False)
If ``True``, the returned tensor is a constant (it
does not backpropagate a gradient)
Returns
-------
Tensor
A tensor with the elements of `a`, but where values
< `a_min` are replaced with `a_min`, and those > `a_max`
with `a_max`.
Examples
--------
>>> import mygrad as mg
>>> a = mg.arange(10)
>>> mg.clip(a, 1, 8)
Tensor([1, 1, 2, 3, 4, 5, 6, 7, 8, 8])
>>> a
Tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
>>> mg.clip(a, [3, 4, 1, 1, 1, 4, 4, 4, 4, 4], 8)
Tensor([3, 4, 2, 3, 4, 5, 6, 7, 8, 8])"""
if np.__version__ < "2.1.0" and a_min is None and a_max is None: # pragma: no cover
raise ValueError("`a_min` and `a_max` cannot both be set to `None`")
if a_min is not None:
a = maximum(a_min, a, out=out, constant=constant)
if a_max is not None:
a = minimum(a_max, a, out=out, constant=constant)
return mg.astensor(a)
@ufunc_creator(MatMul)
def matmul(
x1: ArrayLike,
x2: ArrayLike,
out: Optional[Union[np.ndarray, Tensor]] = None,
*,
dtype: DTypeLikeReals = None,
constant: Optional[bool] = None,
) -> Tensor: # pragma: no cover
r"""
Matrix product of two tensors:
``matmul(x, y)`` is equivalent to ``x @ y``.
This documentation was adapted from ``numpy.matmul``
The behavior depends on the arguments in the following way.
- If both arguments are 2-D they are multiplied like conventional
matrices.
- If either argument is N-D, N > 2, it is treated as a stack of
matrices residing in the last two indexes and broadcast accordingly.
- If the first argument is 1-D, it is promoted to a matrix by
prepending a 1 to its dimensions. After matrix multiplication
the prepended 1 is removed.
- If the second argument is 1-D, it is promoted to a matrix by
appending a 1 to its dimensions. After matrix multiplication
the appended 1 is removed.
Multiplication by a scalar is not allowed, use ``*`` instead. Note that
multiplying a stack of matrices with a vector will result in a stack of
vectors, but matmul will not recognize it as such.
``matmul`` differs from ``numpy.dot`` in two important ways.
- Multiplication by scalars is not allowed.
- Stacks of matrices are broadcast together as if the matrices
were elements.
Parameters
----------
x1 : ArrayLike
x2 : ArrayLike
constant : Optional[bool]
If ``True``, this tensor is treated as a constant, and thus does not
facilitate back propagation (i.e. ``constant.grad`` will always return
``None``).
Defaults to ``False`` for float-type data.
Defaults to ``True`` for integer-type data.
Integer-type tensors must be constant.
dtype : Optional[DTypeLikeReals]
The dtype of the resulting tensor.
out : Optional[Union[ndarray, Tensor]]
A location into which the result is stored. If provided, it must have
a shape that the inputs broadcast to. If not provided or None,
a freshly-allocated tensor is returned.
Returns
-------
output : mygrad.Tensor
Returns the matrix product of ``x1`` and `x2``.
Raises
------
ValueError
If :
- The last dimension of ``x1`` is not the same size as
the second-to-last dimension of ``x2``.
- If scalar value is passed.
See Also
--------
einsum : Einstein summation convention.
Notes
-----
The matmul function implements the semantics of the `@` operator introduced
in Python 3.5 following PEP465.
Examples
--------
For two 2D tensors, ``matmul(a, b)`` is the matrix product :math:`\sum_{j}{A_{ij} B_{jk}} = F_{ik}`:
>>> import mygrad as mg
>>> a = [[1, 0], [0, 1]]
>>> b = [[4, 1], [2, 2]]
>>> mg.matmul(a, b)
Tensor([[4, 1],
[2, 2]])
For 2-D mixed with 1-D, the result is the matrix-vector product, :math:`\sum_{j}{A_{ij} B_{j}} = F_{i}`:
>>> a = [[1, 0], [0, 1]]
>>> b = [1, 2]
>>> mg.matmul(a, b)
Tensor([1, 2])
Broadcasting is conventional for stacks of arrays. Here ``a`` is treated
like a stack of three 5x6 matrices, and the 6x4 matrix ``b`` is broadcast
matrix-multiplied against each one. This produces a shape-(3, 5, 4) tensor
as a result.
>>> a = mg.arange(3*5*6).reshape((3,5,6))
>>> b = mg.arange(6*4).reshape((6,4))
>>> mg.matmul(a,b).shape
(3, 5, 4)
Scalar multiplication raises an error.
>>> mg.matmul(a, 3)
Traceback (most recent call last):
...
ValueError: Scalar operands are not allowed, use '*' instead"""
...
[docs]def multi_matmul(tensors: ArrayLike, *, constant: Optional[bool] = None) -> Tensor:
"""
Matrix product of two or more tensors calculated in the optimal ordering
Parameters
----------
tensors: Sequence[array_like]
The sequence of tensors to be matrix-multiplied.
constant : Optional[bool]
If ``True``, this tensor is treated as a constant, and thus does not
facilitate back propagation (i.e. ``constant.grad`` will always return
``None``).
Defaults to ``False`` for float-type data.
Defaults to ``True`` for integer-type data.
Integer-type tensors must be constant.
Returns
-------
mygrad.Tensor
Returns the matrix product of the tensors provided
Extended Summary
----------------
This documentation was adapted from ``numpy.linalg.multi_dot``
Compute the matrix multiplication of two or more arrays in a single function
call, while automatically selecting the fastest evaluation order.
``multi_matmul`` chains ``matmul`` and uses optimal parenthesization [1]_ [2]_.
Depending on the shapes of the matrices, this can speed up the multiplication a lot.
If the first argument is 1-D it is treated as a row vector.
If the last argument is 1-D it is treated as a column vector.
The other arguments must be 2-D or greater.
Think of `multi_dot` as an optimized version of::
def multi_dot(tensors): return functools.reduce(mg.matmul, tensors)
Raises
------
ValueError
If ``tensors`` contains less than two array_like items.
ValueError
If ``tensor`` other than the first or last is less than two dimensional
See Also
--------
matmul : matrix multiplication with two arguments.
References
----------
.. [1] Cormen, "Introduction to Algorithms", Chapter 15.2, p. 370-378
.. [2] http://en.wikipedia.org/wiki/Matrix_chain_multiplication
Notes
-----
The cost for a matrix multiplication can be calculated with the
following function::
def cost(A, B):
return A.shape[0] * A.shape[1] * B.shape[1]
Let's assume we have three matrices :math:`A_{10x100}, B_{100x5}, C_{5x50}`.
The costs for the two different parenthesizations are as follows::
cost((AB)C) = 10*100*5 + 10*5*50 = 5000 + 2500 = 7500
cost(A(BC)) = 10*100*50 + 100*5*50 = 50000 + 25000 = 75000
Examples
--------
``multi_matmul`` allows you to write:
>>> from mygrad.math.misc.funcs import matmul >>> from mygrad import multi_matmul, Tensor
>>> import numpy as np
>>> # Prepare some random tensors
>>> A = Tensor(np.random.random((10000, 100)))
>>> B = Tensor(np.random.random((100, 1000)))
>>> C = Tensor(np.random.random((1000, 5)))
>>> D = Tensor(np.random.random((5, 333)))
>>> # the actual matrix multiplication
>>> multi_matmul([A, B, C, D]) # computes (A @ (B @ C)) @ D
instead of:
>>> matmul(matmul(matmul(A, B), C), D)
>>> # or
>>> A @ B @ C @ D
"""
for a in tensors:
if not (1 <= a.ndim <= 2):
raise ValueError(
"%d-dimensional tensor given. Tensor must be one or two-dimensional"
% (a.ndim,)
)
n = len(tensors)
if n < 2:
raise ValueError("Expecting at least two arrays.")
elif n == 2:
return matmul(tensors[0], tensors[1], constant=constant)
tensors = [a if isinstance(a, Tensor) else np.asarray(a) for a in tensors]
# save original ndim to reshape the result array into the proper form later
ndim_first, ndim_last = tensors[0].ndim, tensors[-1].ndim
# Explicitly convert vectors to 2D arrays to keep the logic of this function simpler
if tensors[0].ndim == 1:
tensors[0] = mg.expand_dims(
tensors[0],
axis=0,
constant=tensors[0].constant if isinstance(tensors[0], Tensor) else True,
)
if tensors[-1].ndim == 1:
tensors[-1] = mg.expand_dims(
tensors[-1],
axis=1,
constant=tensors[-1].constant if isinstance(tensors[-1], Tensor) else True,
)
if n == 3:
result = _multi_matmul_three(
tensors[0], tensors[1], tensors[2], constant=constant
)
else:
order = _multi_matmul_chain_order(tensors)
result = _multi_matmul(tensors, order, 0, n - 1, constant=constant)
# return proper shape since we possibly added dimensions to the first
# and last arrays
if ndim_first == 1 and ndim_last == 1:
result = result[0, 0]
return result
elif ndim_first == 1 or ndim_last == 1:
result = result.reshape(-1)
return result
else:
return result
def _multi_matmul_three(A, B, C, *, constant=None) -> Tensor:
"""
Find the best order for three arrays and do the multiplication.
"""
a0, a1b0 = A.shape[-2:]
b1c0, c1 = C.shape[-2:]
cost1 = a0 * b1c0 * (a1b0 + c1)
cost2 = a1b0 * c1 * (a0 + b1c0)
if cost1 < cost2:
return matmul(matmul(A, B, constant=constant), C, constant=constant)
else:
return matmul(A, matmul(B, C, constant=constant), constant=constant)
def _multi_matmul_chain_order(arrays):
"""
Return a np.array that encodes the optimal order of multiplications.
The optimal order array is then used by `_multi_matmul()` to do the
multiplication.
The implementation CLOSELY follows Cormen, "Introduction to Algorithms",
Chapter 15.2, p. 370-378. Note that Cormen uses 1-based indices.
cost[i, j] = min([
cost[prefix] + cost[suffix] + cost_mult(prefix, suffix)
for k in range(i, j)])
"""
n = len(arrays)
# p stores the dimensions of the matrices
# Example for p: A_{10x100}, B_{100x5}, C_{5x50} --> p = [10, 100, 5, 50]
# Using -2 to generalize for shapes that are more than 2 dimensions
p = [a.shape[-2] for a in arrays] + [arrays[-1].shape[-1]]
# m is a matrix of costs of the subproblems
# m[i,j]: min number of scalar multiplications needed to compute A_{i..j}
m = np.zeros((n, n), dtype=np.double)
# s is the actual ordering
# s[i, j] is the value of k at which we split the product A_i..A_j
s = np.empty((n, n), dtype=np.intp)
for ind in range(1, n):
for i in range(n - ind):
j = i + ind
m[i, j] = np.inf
for k in range(i, j):
q = m[i, k] + m[k + 1, j] + p[i] * p[k + 1] * p[j + 1]
if q < m[i, j]:
m[i, j] = q
s[i, j] = k # Note that Cormen uses 1-based index
return s
def _multi_matmul(arrays, order, i, j, *, constant=None) -> Tensor:
"""Actually do the multiplication with the given order."""
if i == j:
return arrays[i]
else:
return matmul(
_multi_matmul(arrays, order, i, order[i, j], constant=constant),
_multi_matmul(arrays, order, order[i, j] + 1, j, constant=constant),
constant=constant,
)