"""
This module defines the base tensor class along with all of its essential
attributes and special methods. Public math methods, e.g. ``sum``, ``mean``,
etc., are bound to the Tensor class in ``mygrad.__init__.py``.
"""
from collections import deque
from numbers import Integral, Number
from typing import (
TYPE_CHECKING,
Any,
Callable,
Deque,
Dict,
Iterator,
List,
Optional,
Sequence,
Set,
Tuple,
Type,
TypeVar,
Union,
)
from weakref import ReferenceType, finalize
import numpy as np
import mygrad._utils.duplicating_graph as _dup
import mygrad._utils.graph_tracking as _track
import mygrad._utils.lock_management as _mem
from mygrad._numpy_version import NP_IS_V2
from mygrad._tensor_core_ops.indexing import GetItem, SetItem
from mygrad._utils import WeakRef, WeakRefIterable, collect_all_tensors_and_clear_grads
from mygrad.errors import DisconnectedView
from mygrad.math.arithmetic.ops import (
Add,
Divide,
Multiply,
Negative,
Positive,
Power,
Square,
Subtract,
)
from mygrad.math.misc.ops import MatMul
from mygrad.math.sequential.ops import (
CumProd,
CumSum,
Max,
Mean,
Min,
Prod,
StdDev,
Sum,
Variance,
)
from mygrad.operation_base import Operation, _NoValue
from mygrad.tensor_manip.array_shape.ops import Flatten, Ravel, Reshape, Squeeze
from mygrad.tensor_manip.transpose_like.ops import (
MoveAxis,
SwapAxes,
Tensor_Transpose_Property,
Transpose,
)
from mygrad.typing import ArrayLike, DTypeLike, DTypeLikeReals, Index, Shape
__all__ = ["Tensor", "asarray", "astensor", "implements_numpy_override"]
if TYPE_CHECKING: # pragma: no cover
from mygrad.ufuncs._ufunc_creators import ufunc as mygrad_ufunc
T = TypeVar("T")
CONSTANT_ONLY_DTYPES = (np.integer, np.bool_)
def _resolve_constant(*others: Any, constant: Optional[bool]) -> Optional[bool]:
"""Determines if `constant` should be resolved to True based on `others`.
Otherwise defers to a tensor-creator to handle further resolutions based on dtype.
"""
if constant is not None:
return constant
for other in others:
if isinstance(other, Tensor) and not other.constant:
# let subsequent tensor casting infer constant from dtype
return None
# all inputs are constants
return True
[docs]def asarray(a: ArrayLike, dtype: DTypeLike = None, order: str = None) -> np.ndarray:
"""Convert the input to an array.
This docstring is adapted from that of ``numpy.asarray``
Parameters
----------
a : array_like
Input data, in any form - including a mygrad tensor - that can be converted to an array. This
includes lists, lists of tuples, tuples, tuples of tuples, tuples
of lists and ndarrays.
dtype : data-type, optional
By default, the data-type is inferred from the input data.
order : {'C', 'F'}, optional
Whether to use row-major (C-style) or
column-major (Fortran-style) memory representation.
Defaults to 'C'.
Returns
-------
out : ndarray
Array interpretation of `a`. No copy is performed if the input
is already an ndarray with matching dtype and order. If `a` is a
subclass of ndarray, a base class ndarray is returned.
Examples
--------
Convert a list into an array:
>>> import mygrad as mg
>>> a = [1, 2]
>>> mg.asarray(a)
array([1, 2])
Convert a tensor into an array. No copy of the
underlying numpy array is created:
>>> t = mg.Tensor([1, 2.])
>>> mg.asarray(t)
array([1., 2.])
>>> t.data is np.asarray(t))
True
Existing arrays are not copied:
>>> a = np.array([1, 2])
>>> mg.asarray(a) is a
True
If `dtype` is set, array is copied only if dtype does not match:
>>> a = np.array([1, 2], dtype=np.float32)
>>> mg.asarray(a, dtype=np.float32) is a
True
>>> mg.asarray(a, dtype=np.float64) is a
False
Contrary to `asanyarray`, ndarray subclasses are not passed through:
>>> issubclass(np.recarray, np.ndarray)
True
>>> a = np.array([(1.0, 2), (3.0, 4)], dtype='f4,i4').view(np.recarray)
>>> mg.asarray(a) is a
False
>>> np.asanyarray(a) is a
True
"""
if isinstance(a, Tensor):
a = a.data # faster than passing the tensor directly
return np.asarray(a, dtype=dtype, order=order)
[docs]def tensor(
arr_like: ArrayLike,
dtype: DTypeLikeReals = None,
*,
constant: Optional[bool] = None,
copy: bool = True,
ndmin: int = 0,
) -> "Tensor":
"""
Create a tensor
This documentation was adapted from that of ``numpy.array`
Parameters
----------
arr_like : array_like
A tensor, any object exposing the array interface, an object whose
__array__ method returns an tensor, a real number, any (nested) sequence.
dtype : data-type, optional
The desired data-type for the tensor. Restricted to integer and float type.
If not specified, then the type will be determined as the minimum type required
to hold the objects in the sequence.
constant : Optional[bool]
If ``True``, this tensor is treated as a constant, and thus does not
facilitate back propagation (i.e. ``constant_tensor.grad`` will always
return ``None``).
If a new tensor is returned:
- Defaults to ``False`` for float-type data.
- Defaults to ``True`` for integer-type data.
copy : bool, optional
If true (default), or if a copy is needed to satisfy any of the
other requirements (``dtype``, ``constant``, etc.) then a new tensor
is created from copied data. Otherwise the tensor will be returned
unchanged.
ndmin : int, optional
Specifies the minimum number of dimensions that the resulting
tensor should have. Ones will be prepended to the shape as
needed to meet this requirement.
Returns
-------
out : Tensor
A tensor satisfying the specified requirements.
See Also
--------
empty_like : Return an empty tensor with shape and type of input.
ones_like : Return an tensor of ones with shape and type of input.
zeros_like : Return an tensor of zeros with shape and type of input.
full_like : Return a new tensor with shape of input filled with value.
empty : Return a new uninitialized tensor.
ones : Return a new tensor setting values to one.
zeros : Return a new tensor setting values to zero.
full : Return a new tensor of given shape filled with value.
Examples
--------
>>> import mygrad as mg
>>> mg.tensor([1, 2, 3])
Tensor([1, 2, 3])
Upcasting:
>>> mg.tensor([1, 2, 3.0])
Tensor([ 1., 2., 3.])
More than one dimension:
>>> mg.tensor([[1, 2], [3, 4]])
Tensor([[1, 2],
[3, 4]])
Minimum dimensions 2:
>>> mg.tensor([1, 2, 3], ndmin=2)
Tensor([[1, 2, 3]])
Type provided:
>>> mg.tensor([1, 2, 3], dtype="float32")
Tensor([1., 2., 3.], dtype=float32)
"""
if isinstance(arr_like, Tensor) and copy is False:
if (constant is None or arr_like.constant is constant) and (
dtype is None or (arr_like.dtype == np.dtype(dtype))
):
if not isinstance(ndmin, Integral):
raise TypeError(
f"TypeError: `ndmin` requires a non-negative integer (got type {type(ndmin)})"
)
if ndmin < 0:
ndmin = 0 # numpy does this
if ndmin > arr_like.ndim:
arr_like = arr_like[(*(None for _ in range(ndmin - arr_like.ndim)),)]
# return tensor as-as
return arr_like
return Tensor(arr_like, dtype=dtype, constant=constant, copy=copy, ndmin=ndmin)
[docs]def astensor(
t: ArrayLike, dtype: DTypeLikeReals = None, *, constant: Optional[bool] = None
) -> "Tensor":
"""Convert the input to a tensor.
A tensor `t` is returned unchanged - its gradient and computational
graph state preserved - if dtype and constant are compatible.
A copy of the underlying numpy array is created only if dtype is
incompatible or if a non-constant tensor is being created from a constant.
Parameters
----------
t : array_like
Input data, in any form that can be converted to a tensor. This
includes lists, lists of tuples, tuples, tuples of tuples, tuples
of lists and ndarrays.
dtype : data-type, optional
By default, the data-type is inferred from the input data.
constant : Optional[bool]
By default, `constant` is inferred from `t` if `t` is a tensor,
otherwise it defaults to `False`.
Defaults to ``False`` for float-type data.
Defaults to ``True`` for integer-type data.
Integer-type tensors must be constant.
Returns
-------
out : Tensor
Tensor interpretation of `a`. No copy is performed if the input
is already a tensor with matching dtype and constant-flag.
Examples
--------
Convert a list into an array:
>>> import mygrad as mg
>>> import numpy as np
>>> t = [1, 2]
>>> mg.astensor(t)
Tensor([1, 2])
Convert an array into a tensor. No copy of the
underlying numpy array is created:
>>> a = np.array([1.0, 2.0])
>>> mg.astensor(a)
Tensor([1., 2.])
>>> a is mg.astensor(a).data
True
Existing tensors are not copied and their gradients and
computational graphs are preserved:
>>> t1 = 2 * mg.tensor([1, 2])
>>> t2 = mg.astensor(t1)
>>> t1 is t2
True
>>> t1.creator is t2.creator
True
If `dtype` is set, a new tensor is created - with copied data - only
if dtype does not match:
>>> t = mg.Tensor([1, 2], dtype=np.float32)
>>> mg.astensor(t, dtype=np.float32) is t
True
>>> mg.astensor(t, dtype=np.float64) is t
False
Otherwise, if `constant` is set, a new tensor is created (with
no copy of the underlying data) only if constant doesn't match.
>>> t1 = mg.tensor([1, 2], constant=False)
>>> mg.astensor(t1, constant=False) is t
True
>>> mg.astensor(t1, constant=True) is t1
False
>>> mg.astensor(t1, constant=True).data is t1.data
True
"""
return tensor(t, dtype=dtype, constant=constant, copy=False, ndmin=0)
_REGISTERED_UFUNC: Dict[np.ufunc, Type["mygrad_ufunc"]] = {}
_REGISTERED_DIFFERENTIABLE_NUMPY_FUNCS: Dict[
Callable[..., np.ndarray], Callable[..., "Tensor"]
] = {}
_REGISTERED_BOOL_ONLY_UFUNC: Set[np.ufunc] = {
np.isnan,
np.isfinite,
np.isinf,
np.isnat,
np.signbit,
np.logical_not,
np.logical_and,
np.logical_or,
np.logical_xor,
np.greater,
np.greater_equal,
np.less,
np.less_equal,
np.equal,
np.not_equal,
}
# These are ufuncs that users might mistake for being differentiable functions;
# for this reason we make explicit the fact that only constant tensors are permitted
# in these operations.
_REGISTERED_CONST_ONLY_UFUNC = {
np.floor_divide,
np.remainder,
np.mod,
np.fmod,
np.divmod,
np.rint,
np.sign,
np.floor,
np.ceil,
np.trunc,
}
_REGISTERED_NO_DIFF_NUMPY_FUNCS: Set[Callable] = {
np.allclose,
np.bincount,
np.can_cast,
np.copyto,
np.isclose,
np.may_share_memory,
np.min_scalar_type,
np.result_type,
np.shares_memory,
np.shape,
}
class implements_numpy_override:
"""Registers a mygrad-based override for a NumPy function of the same name, via
the standard __array_function__ interface. [1]_
Examples
--------
>>> @implements_numpy_override() # np.reshape to be overridden
... def reshape(x, shape):
... # a mygrad-based implementation of numpy.reshape
... print("hello world")
>>> import numpy as np
>>> import mygrad as mg
>>> np.reshape(mg.tensor(1.), 2)
'hello world'
You can also explicit provide the numpy function explicitly
>>> import numpy as np
>>> @implements_numpy_override(np.reshape) # np.reshape to be overridden
... def some_function(x, shape):
... pass
References
----------
.. [1] https://numpy.org/devdocs/reference/arrays.classes.html?#numpy.class.__array_function__
"""
__slots__ = ("numpy_func",)
def __init__(self, numpy_func: Optional[Callable] = None):
# if None, `numpy_func` is inferred from the name of the decorated function
self.numpy_func = numpy_func
def __call__(self, wrapped_func: T) -> T:
if self.numpy_func is None:
try:
self.numpy_func = getattr(np, wrapped_func.__name__)
except AttributeError:
raise AttributeError(
f"@implements_numpy_override tried to register an override for the function numpy.{wrapped_func.__name__}, but no "
f"such function exists."
)
_REGISTERED_DIFFERENTIABLE_NUMPY_FUNCS[self.numpy_func] = wrapped_func
return wrapped_func
class _ConstantOnly(ValueError):
pass
def _as_constant_array(t: Union["Tensor", np.ndarray]) -> np.ndarray:
"""Passes through all non-tensor objects and constant tensors. Raises on
non-constant tensors."""
if isinstance(t, Tensor):
if t.constant is False:
raise _ConstantOnly()
return t.data
return t
class Tensor:
"""A numpy-array-like object capable of serving as a node in a computational
graph that supports back-propagation of derivatives via the chain rule.
See the Examples section of the docstring for more details.
Like the numpy array, mygrad's tensor stores data as an N-dimensional array
and provides an interface accessing, setting, and performing vectorized
operations along the various dimensions of this array. Vectorized operations
support numpy-style broadcasting semantics.
The contents of a tensor can be accessed and written to using all variety
of basic and advanced indexing (along with mixtures of the two).
Creating a Tensor
-----------------
``mygrad.Tensor`` can be passed any "array-like" object of numerical data.
This includes numbers, sequences (e.g. lists), nested sequences, numpy-ndarrays,
and other mygrad-tensors. mygrad also provides familiar numpy-style tensor-creation
functions (e.g. ``mygrad.arange``, ``mygrad.linspace``, etc.)
>>> import mygrad as mg
>>> mg.tensor(2.3) # creating a 0-dimensional tensor
Tensor(2.3)
>>> mg.tensor(np.array([1.2, 3.0])) # casting a numpy-array to a tensor
Tensor([1.2, 3.0])
>>> mg.tensor([[1, 2], [3, 4]]) # creating a 2-dimensional tensor
Tensor([[1, 2],
[3, 4]])
>>> mg.arange(4) # using numpy-style tensor creation functions
Tensor([0, 1, 2, 3])
Creating a non-constant tensor will copy array data:
>>> import numpy as np
>>> arr = np.arange(10.)
>>> t_var = tensor(arr, constant=False)
>>> np.shares_memory(arr, t_var)
False
Creating constant tensor will not make a copy of the array data:
>>> t_const = mg.tensor(arr, constant=True)
>>> np.shares_memory(arr, t_const)
True
Forward and Back-Propagation
----------------------------
Let's construct a computational graph consisting of two zero-dimensional
tensors, ``x`` and ``y``, which are used to compute an output tensor,
````. This is a "forward pass imperative" style for creating a computational
graph - the graph is constructed as we carry out the forward-pass computation.
>>> x = mg.tensor(3.0)
>>> y = mg.tensor(2.0)
>>> ℒ = 2 * x + y ** 2
Invoking ``ℒ.backward()`` signals the computational graph to
compute the total-derivative of ``f`` with respect to each one of its dependent
variables. I.e. ``x.grad`` will store ``dℒ/dx`` and ``y.grad`` will store
``dℒ/dy``. Thus we have back-propagated a gradient from ``f`` through our graph.
Each tensor of derivatives is computed elementwise. That is, if `x = Tensor(x0, x1, x2)`,
then dℒ/dx represents `[dℒ/d(x0), dℒ/d(x1), dℒ/d(x2)]`
>>> ℒ.backward() # computes df/dx and df/dy
>>> x.grad # df/dx
array(6.0)
>>> y.grad # df/dy
array(4.0)
>>> ℒ.grad
array(1.0) # dℒ/dℒ
Once the gradients are computed, the computational graph containing ``x``,
``y``, and ``ℒ`` is cleared automatically. Additionally, involving any
of these tensors in a new computational graph will automatically null
their gradients.
>>> 2 * x
>>> x.grad is None
True
Or, you can use the ``tensor.null_grad()`` method to manually clear a
tensor's gradient
>>> y.null_grad()
Tensor(2.)
>>> y.grad is None
True
Accessing the Underlying NumPy Array
------------------------------------
``mygrad.Tensor`` is a thin wrapper on ``numpy.ndarray``. A tensor's
underlying numpy-array can be accessed via ``.data``:
>>> x = mg.tensor([1, 2])
>>> x.data
array([1, 2])
**Do not modify this underlying array**. Any in-place modifications made to this
array will not be tracked by any computational graph involving that tensor, thus
back-propagation through that tensor will likely be incorrect.
Producing a "View" of a Tensor
------------------------------
MyGrad's tensors exhibit the same view semantics and memory-sharing relationships
as NumPy arrays. I.e. any (non-scalar) tensor produced via basic indexing will share
memory with its parent.
>>> x = mg.tensor([1., 2., 3., 4.])
>>> y = x[:2] # the view: Tensor([1., 2.])
>>> y.base is x
True
>>> np.shares_memory(x, y)
True
Mutating shared data will propagate through views:
>>> y *= -1
>>> x
Tensor([-1., -2., 3., 4.])
>>> y
Tensor([-1., -2.])
And this view relationship will also manifest between the tensors' gradients
>>> (x ** 2).backward()
>>> x.grad
array([-2., -4., 6., 8.])
>>> y.grad
array([-2., -4.])
In-Place Operations are not Efficient
=====================================
It is important to note that while MyGrad's view semantics promote a rich parity
with NumPy, that certain aspects should be avoided in the interest of optimized performance.
Namely, performing in-place operations on tensors is generally not more efficient than
their non-mutating counterparts.
This is because MyGrad has to track the state of tensors that are involved in a computational
graph. Thus a mutated tensor must have its pre-augmented state stored for future reference; this
defeats the performance benefit of writing to an array's memory in-place. This is especially
inefficient if you are mutating a tensor involved with multiple views of the same memory(
By contrast, producing a view of a tensor _is_ efficient as one would expect).
Thus these NumPy-like in-place semantics are supported by MyGrad not for the same performance
purposes, but instead to support convenient and familiar code-patterns and to enable one to
port NumPy code to MyGrad (or, in the future, inject MyGrad tensors into NumPy!!) and get
the exact same behavior.
A final note: MyGrad's in-place operations, when run under :func:`~mygrad.no_autodiff` mode,
do not incur the extra costs noted above, and thus your code will benefit from the performance
benefits of in-place operations.
"""
__array_priority__ = 15.0
def __array_ufunc__(
self, ufunc: Type[np.ufunc], method: str, *inputs: ArrayLike, **kwargs
) -> Union["Tensor", np.ndarray]:
"""An interface provided by NumPy to override the behavior of its ufuncs [1]_.
MyGrad implements its own ufuncs for all differentiable NumPy ufuncs.
Non-differentiable numpy ufuncs simply get called on the underlying arrays of tensors and
will return ndarrays.
The differentiability - or lack thereof - of ufuncs may not be obvious to end users.
Thus potentially ambiguous ufuncs (e.g. `numpy.ceil`) will be made to raise on non-constant
tensors so that the lack of differentiability is made obvious to the users. This design decision
is made in the same spirit as requiring integer-dtype tensors be constant.
References
----------
.. [1] https://numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_ufunc__
Examples
--------
NumPy ufuncs that represent differentiable operations are overloaded by MyGrad tensors
so that they support backprop
>>> import mygrad as mg
>>> import numpy as np
>>> x = mg.tensor([1., 2.])
This calls ``mygrad.sin`` under the hood.
>>> np.sin(x) # returns a tensor
Tensor([0.84147098, 0.90929743])
>>> np.sin(x).backward()
>>> x.grad # stores d(sin(x))/dx @ x = [1., 2.]
array([ 0.54030231, -0.41614684])
Specifying a dtype, a ``where`` mask, an in-place target (via ``out``) as an array
or a tensor, are all supported.
>>> x = mg.tensor([1., 2.])
>>> y = mg.tensor([-1., -1.])
>>> np.exp(x, where=[False, True], out=y)
Tensor([-1. , 7.3890561])
>>> y.backward()
>>> x.grad
array([0. , 7.3890561])
Non-differentiable NumPy ufuncs simply operate on the ndarrays that are wrapped
by MyGrad tensors; these return ndarrays, which will appropriately and explicitly
serve as constants elsewhere in a computational graph.
>>> x = mg.tensor([1., 2.])
>>> np.less_equal(x, 1)
array([ True, False])
"""
out = kwargs.pop("out", (None,))
if len(out) > 1: # pragma: no cover
raise ValueError(
"mygrad does not support in-place operations with more that one target"
)
(out,) = out
out: Optional[Union[np.ndarray, "Tensor"]]
try:
# differentiable ufunc implemented by mygrad
return getattr(_REGISTERED_UFUNC[ufunc], method)(*inputs, **kwargs, out=out)
except KeyError:
pass
# non-differentiable ufuncs get called on numpy arrays stored by tensors
if ufunc in _REGISTERED_BOOL_ONLY_UFUNC:
caster = asarray
elif ufunc in _REGISTERED_CONST_ONLY_UFUNC:
# the presence of non-constant tensors will raise
caster = _as_constant_array
else: # pragma: no cover
return NotImplemented
try:
if out is not None:
kwargs["out"] = caster(out)
# returns ndarray
return getattr(ufunc, method)(*(caster(t) for t in inputs), **kwargs)
except _ConstantOnly:
raise ValueError(
f"{repr(ufunc)} cannot involve non-constant mygrad tensors."
)
def __array_function__(
self, func: Callable[..., np.ndarray], types, args, kwargs
) -> Union["Tensor", np.ndarray]:
if func in _REGISTERED_DIFFERENTIABLE_NUMPY_FUNCS:
return _REGISTERED_DIFFERENTIABLE_NUMPY_FUNCS[func](*args, **kwargs)
elif func in _REGISTERED_NO_DIFF_NUMPY_FUNCS:
return func(
*(t.data if isinstance(t, Tensor) else t for t in args),
**{
k: (v.data if isinstance(v, Tensor) else v)
for k, v in kwargs.items()
},
)
else: # pragma: no cover
return NotImplemented
def __array__(
self, dtype: DTypeLike = None, copy: Optional[bool] = None
) -> np.ndarray:
if NP_IS_V2:
return np.asarray(self.data, dtype=dtype, copy=copy)
else: # pragma: no cover
if copy is None:
copy = False
return np.array(self.data, dtype=dtype, copy=copy)
def __init__(
self,
x: ArrayLike,
*,
dtype: DTypeLikeReals = None,
constant: Optional[bool] = None,
copy: bool = True,
ndmin: int = 0,
_creator: Optional[Operation] = None,
_base: Optional["Tensor"] = None,
):
"""
Parameters
----------
x : ArrayLike
Input data, in any form that can be converted to an array. This
includes numbers, sequences, nested sequences, numpy-ndarrays,
and mygrad-tensors.
dtype : DTypeLikeReals
`int`, `float`, or a real-valued numpy data type. By default the
data type is inferred from ``x`` via ``numpy.asarray(x)``.
constant : Optional[bool]
If ``True``, this tensor is treated as a constant, and thus does not
facilitate back propagation (i.e. `self.grad` will always return
``None``).
Defaults to ``False`` for float-type data.
Defaults to ``True`` for integer-type data.
Integer-type tensors must be constant.
copy : Optional[bool]
Determines if the incoming array-data will be copied.
ndmin : int, optional
Specifies the minimum number of dimensions that the resulting
array should have. Ones will be prepended to the shape as
needed to meet this requirement.
Notes
-----
The following are parameters reserved only for internal use:
_creator : Optional[mygrad.Operation]
The operation-instance whose forward pass produced `self`. Should not
be set manually by users.
_base : Optional[Tensor]
Points to the tensor that ``self`` shares memory with.
"""
if constant is not None and not isinstance(constant, bool):
raise TypeError(f"`constant` must be a boolean value, got: {constant}")
self._creator: Optional[Operation] = _creator
if not NP_IS_V2: # pragma: no cover
self.data = np.array(x, dtype=dtype, copy=copy, ndmin=ndmin)
else:
if copy is False:
self.data = np.asarray(x, dtype=dtype)
if not isinstance(ndmin, Integral):
raise TypeError(
f"'{type(ndmin)}' object cannot be interpreted as an integer"
)
if ndmin and self.data.ndim < ndmin:
self.data = self.data[
(*(None for _ in range(ndmin - self.data.ndim)),)
]
else:
self.data = np.array(x, dtype=dtype, copy=copy, ndmin=ndmin)
dtype = self.data.dtype.type
is_float = issubclass(dtype, np.floating) # faster than `numpy.issubdtype`
if not is_float and _track.TRACK_GRAPH:
# No need to constrain dtypes if we aren't tracking the graph.
# Also, it is nice to enable complex arithmetic through mygrad
# functions that are wrapped in no_autodiff
if not issubclass(dtype, CONSTANT_ONLY_DTYPES):
raise TypeError(
f"Tensor data must be of an floating type, integer type, or boolean type, "
f"received {dtype}"
)
elif constant is False:
raise ValueError("Integer-valued tensors must be treated as constants.")
if constant is None:
# non-float: default constant -> True
# float: default constant -> False
constant = not is_float
self._constant = constant
self._grad = None # type: Union[None, np.ndarray]
# track all operations that this tensor participates in
self._ops: Set[WeakRef[Operation]] = set()
# base points to the initial tensor that owns the memory of this
# tensor
self._base = _base # type: Optional[Tensor]
# stores all of the tensors that are a view of this tensor
self._view_children = WeakRefIterable() # type: WeakRefIterable[Tensor]
# Used to reflect the view of the gradient associated with that of `self.base`.
# This is a means of distinguishing between the gradient set on `self` as
# part of backpropagation and the view of the gradient of its base.
self._view_grad: Optional[np.ndarray] = None
@property
def grad(self) -> Optional[np.ndarray]:
"""
Returns the derivative of ``ℒ`` with respect to this tensor.
``ℒ`` is the terminal node in the compuational graph from which
``ℒ.backward()`` was invoked.
If this tensor is a view of another tensor then their gradients
will exhibit the same memory-sharing relationship as their data.
Returns
-------
dℒ/dx: numpy.ndarray
The gradient of the terminal node in a computational graph
with respect to this tensor. The shape of this numpy array
matches ``self.shape``
Examples
--------
>>> import mygrad as mg
>>> x = mg.Tensor([1.0, 2.0])
Prior to backpropagation tensors have ``None`` set for their gradients.
>>> x.grad is None
True
Now we trigger backpropagation...
>>> ℒ = x ** 2
>>> ℒ.backward()
and we see that ``x.grad`` stores dℒ/dx
>>> x.grad # dℒ/dx
array([2., 4.])
Now we will demonstrate the relationship between gradient a view tensor
and that of its base.
>>> base = mg.Tensor([1.0, 2.0, 3.0])
>>> view = base[:2]; view
Tensor([1., 2.])
>>> ℒ = base ** 2
>>> ℒ.backward()
Although ``view`` is not directly involved in the computation in ``ℒ``,
and thus would not typically store a gradient in due to ``ℒ.backward()``,
it shares memory with ``base`` and thus it stores a gradient in correspondence
to this "view relationship". I.e. because ``view == base[:2]``, then we expect
to find that ``view.grad == base.grad[:2]``.
>>> base.grad
array([2., 4., 6.])
>>> view.grad
array([2., 4.])
>>> view.grad.base is base.grad
True
The reasoning here is that, because a base tensor and its view share the same
array data, then varying an element in that data implies that both the base
tensor and the view will change (assuming the variation occurs specifically in
a shared region). It follows that the base tensor's gradient must share the same
relationship with the view-tensor since these are measures of "cause and effects"
associated with varying elements of data (albeit infinitesmaly).
"""
if self._base is None:
return self._grad
if self._view_grad is not None and self._view_grad.base is self._base._grad:
# view grad has been computed already
return self._view_grad
if self._base._grad is None or self._creator is None:
# ``self`` had its graph, connecting it to its base, cleared.
# ``self._view_grad`` can't be computed without this info.
return None
(view_parent,) = self._creator.variables
# recursively fetches grad from parent
grad = view_parent.grad
with _track.no_autodiff:
self._view_grad = self._replay_op(grad).data if grad is not None else None
return self._view_grad
[docs] def astype(
self,
dtype: DTypeLikeReals,
casting="unsafe",
copy: bool = True,
*,
constant: Optional[bool] = None,
) -> "Tensor":
"""Copy of the tensor with the specified dtype.
The resulting tensor is not involved in any computational graph
and has no gradient associated with it.
This docstring was adapted from that of ``ndarray.astype``.
Parameters
----------
dtype : Union[type, str]
The real-valued numeric data type. This can be a numpy dtype or
a corresponding string identifier.
casting : Literal['no', 'equiv', 'safe', 'same_kind', 'unsafe']
Controls what kind of data casting may occur. Defaults to ‘unsafe’ for backwards compatibility.
- ‘no’ means the data types should not be cast at all.
- ‘equiv’ means only byte-order changes are allowed.
- ‘safe’ means only casts which can preserve values are allowed.
- ‘same_kind’ means only safe casts or casts within a kind, like float64 to float32, are allowed.
- ‘unsafe’ means any data conversions may be done.
copy : bool, optional (default=True)
By default, astype always returns a newly allocated array. If this is set to false, and
the ``dtype`` and ``constant`` requirements are satisfied, the input tensor is returned
instead of a copy.
constant : Optional[bool]
If specified, determines if the returned tensor is a constant.
Otherwise this argument is inferred from the original tensor.
Returns
-------
Tensor
The resulting tensor with the specified data type.
References
----------
[1].. Retrieved from: https://numpy.org/doc/stable/reference/generated/numpy.ndarray.astype.html
Examples
--------
>>> import mygrad as mg
>>> import numpy as np
>>> x = mg.arange(10); x
Tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
Using a string to specify the data type:
>>> x.astype("float32")
Tensor([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.], dtype=float32)
Specifying a numpy data type object, and specifying that the
tensor is to be treated as a constant:
>>> x.astype(np.int8, constant=True)
Tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int8)
"""
cast_data = self.data.astype(dtype=dtype, casting=casting, copy=copy)
if cast_data is self.data and (constant is None or self.constant is constant):
return self
return type(self)(cast_data, copy=False, constant=constant)
@classmethod
def _op(
cls,
Op: Type[Operation],
*input_vars: ArrayLike,
op_args: Optional[Sequence] = None,
op_kwargs: Optional[Dict[str, Any]] = None,
constant: Optional[bool] = None,
out: Optional[Union[np.ndarray, "Tensor"]] = None,
):
"""Wraps operations performed between tensors: f(a, b, ...).
For developer use only.
Parameters
----------
Op : Type[Operation]
Operation-class, used to perform forward-pass on `input_vars`.
input_vars : Tuple[array_like, ...]
An arbitrary number of input-tensors. These can take any form that
can be converted to an array. This includes numbers, sequences, nested
numerical sequences, numpy-ndarrays, and mygrad-tensors.
op_args : Optional[Tuple[Any, ...]]
Arbitrary positional arguments passed to the operation's forward pass.
op_kwargs : Optional[Dict[str, Any]]
Arbitrary keyword arguments passed to the operation's forward pass.
constant : bool, optional (default=False)
If True, the resulting Tensor is a constant.
out: Optional[Union[np.ndarray, "Tensor"]]
The target where the output (an ndarray) of the operation will be written.
Thus this raises if `out` is read-only.
There is an exception to this if a tensor is provided, in which case the
operation does not write to its underlying memory but rather triggers
"in-place semantics" so that the computational graph behaves as if the
tensor was mutated. See ``Tensor._in_place_op`` for more details.
Returns
-------
mygrad.Tensor
The tensor-result of the operation's forward-pass."""
if out is not None:
if isinstance(out, tuple):
if len(out) > 1: # pragma: no cover
raise ValueError(
"mygrad does not support in-place operations with more that one target"
)
(out,) = out
if isinstance(out, Tensor):
out._in_place_op(
Op,
*input_vars,
op_args=op_args,
op_kwargs=op_kwargs,
constant=constant,
)
return out
_uniques_bases_then_arrs = ()
tensor_vars = tuple(
cls(var, constant=True, copy=False) if not isinstance(var, Tensor) else var
for var in input_vars
)
# cast all input-vars to tensors
if _track.TRACK_GRAPH and _mem.MEM_GUARD:
# lock memory of array data
_uniques_bases_then_arrs = WeakRefIterable(
_mem.lock_arr_writeability(x)
for x in _mem.unique_arrs_and_bases(tensor_vars)
)
if op_args is None:
op_args = tuple()
if op_kwargs is None:
op_kwargs = {}
f = Op()
try:
if out is None:
op_out: np.ndarray = f(*tensor_vars, *op_args, **op_kwargs)
else:
op_out: np.ndarray = f(*tensor_vars, *op_args, **op_kwargs, out=out)
except Exception as e:
if _track.TRACK_GRAPH and _mem.MEM_GUARD:
_mem.release_writeability_lock_on_op(_uniques_bases_then_arrs)
raise e
if not _track.TRACK_GRAPH:
# execute operation without tracking creator or any graph
# information
return cls(
op_out,
constant=constant, # constant not determined by graph info
copy=False,
_creator=None,
_base=None,
)
# points to parent tensor that op-output is a view of
base = None # type: Optional[Tensor]
# If output of op is a view - tracks the tensor var that is
# the parent of the view
parent_var: Optional[Tensor] = None
# Determine whether or not op was a view; if so, `base`
# points to parent Tensor
op_out_base = op_out.base
if f.can_return_view and op_out_base is not None:
vars_can_share_mem = (
isinstance(var, (np.ndarray, Tensor)) for var in input_vars
)
for can_share_mem, parent_var in zip(vars_can_share_mem, tensor_vars):
if not can_share_mem:
continue
parent_data = parent_var.data
parent_data_base = parent_data.base
if (
(op_out_base is parent_data)
or (op_out_base is parent_data_base)
or (op_out is parent_data)
):
if parent_var._base is not None and parent_var._creator is None:
parent_var._base = None
base = parent_var if parent_var.base is None else parent_var.base
break
else:
parent_var = None
for v in input_vars:
if isinstance(v, Tensor):
# tensor's graph has been cleared, but its base lingers
if v._base is not None and v._creator is None:
v._base = None
if base is None:
# non-view ops clear grads
v._grad = None
v._view_grad = None
if base is not None:
# we need to be able to replay view-ops for doing in-place operations
# on graphs with views
f.replay_args = op_args
f.replay_kwargs = op_kwargs
f.replay_force_constant = constant
# record graph information
if constant is None:
if any(not var.constant for var in tensor_vars):
constant = None
else:
constant = True
# record that a variable participated in that op
ref_f = ReferenceType(f) # type: WeakRef[Operation]
for var in tensor_vars:
var._ops.add(ref_f)
tensor_out = cls(
op_out,
constant=constant,
copy=False,
_creator=f,
_base=base,
)
if parent_var is not None:
parent_var._view_children.append(tensor_out)
if _mem.MEM_GUARD:
if out is not None and tensor_out.data.base is not None:
_mem.lock_arr_writeability(tensor_out.data.base)
_uniques_bases_then_arrs.append(tensor_out.data.base)
_mem.lock_arr_writeability(tensor_out.data)
tensor_refs = _uniques_bases_then_arrs
tensor_refs.append(tensor_out.data)
finalize(f, _mem.release_writeability_lock_on_op, tensor_refs)
return tensor_out
def _replay_op(self, *input_vars: ArrayLike) -> "Tensor":
"""*dev use only*
Replays the op that produced `self` - called on the specified
input vars"""
if self.creator is None:
raise DisconnectedView(
"``Tensor._replay_op(...)`` was called on a tensor without a creator."
"\nPlease report this error at: https://github.com/rsokl/MyGrad/issues"
)
return self._op(
type(self.creator),
*input_vars,
op_args=self.creator.replay_args,
op_kwargs=self.creator.replay_kwargs,
constant=self.creator.replay_force_constant,
)
[docs] def backward(self, grad: Optional[ArrayLike] = None):
"""Trigger backpropagation and compute the derivatives of this tensor.
Designating this tensor as the tensor ℒ, compute dℒ/dx for all (non-constant) tensors
that preceded ℒ in its computational graph, and store each of these derivatives in ``x.grad``
respectively.
Once back-propagation is finished, the present tensor is removed from all computational
graphs, and the preceding graph is cleared.
If ℒ is a non-scalar tensor (i.e. ``ℒ.ndim`` is greater than 0), then calling
``ℒ.backward()`` will behave as if ℒ was first reduced to a scalar via summation. I.e. it
will behave identically to ``ℒ.sum().backward()``; this ensures that each element of any
dℒ/dx will represent a derivative of a scalar function.
Parameters
----------
grad : Optional[array_like], (must be broadcast-compatible with ``self``
By default, the present tensor is treated as the terminus of the computational graph (ℒ).
Otherwise, one can specify a "downstream" derivative, representing ``dℒ/d(self)``.
This can be used to effectively connect otherwise separate computational graphs.
Examples
--------
>>> import mygrad as mg
>>> x = mg.tensor(2)
>>> y = mg.tensor(3)
>>> w = x * y
>>> ℒ = 2 * w
>>> ℒ.backward() # computes dℒ/dℒ, dℒ/dw, dℒ/dy, and dℒ/dx
>>> ℒ.grad # dℒ/df == 1 by identity
array(1.)
>>> w.grad # dℒ/dw
array(2.)
>>> y.grad # dℒ/dy = dℒ/dw * dw/dy
array(4.)
>>> x.grad # dℒ/dx = dℒ/dw * dw/dx
array(6.)
Calling ``ℒ.backward()`` from a non-scalar tensor is equivalent
to first summing that tensor.
>>> tensor = mg.tensor([2.0, 4.0, 8.0])
>>> ℒ = tensor * tensor[::-1] # [x0*x2, x1*x1, x2*x0]
>>> ℒ.backward() # behaves like ℒ = x0*x2 + x1*x1 + x2*x0
>>> tensor.grad
array([16., 8., 4.])
>>> tensor = mg.Tensor([2.0, 4.0, 8.0])
>>> ℒ = tensor * tensor[::-1]
>>> ℒ.sum().backward()
>>> tensor.grad
array([16., 8., 4.])
Specifying a value for ``grad``
>>> x = mg.Tensor(1.)
>>> x.backward(2.)
>>> x.grad # Would normally be dℒ/dℒ == 1
array(2.)
"""
if not _track.TRACK_GRAPH:
return
if self.constant:
self.clear_graph()
return
topo_sorted_tensors: Deque["Tensor"] = deque([])
seen: Set[int] = set()
collect_all_tensors_and_clear_grads(self, seen, topo_sorted_tensors)
# don't set self._grad yet because there is a grad-clearing step that
# occurs during graph creation
if grad is not None:
# `self` is guaranteed to be a tensor of floats
# so we can simply cast `grad` to be the same dtype
_grad = asarray(grad, dtype=self.dtype)
if _grad.shape != self.shape:
try:
# See if grad can broadcast to `self`
# raises ValueError if not
_grad = np.multiply(
np.full_like(self.data, fill_value=1.0),
_grad,
dtype=self.dtype,
)
if _grad.shape != self.shape:
# mutual broadcasting occurred
raise ValueError()
except ValueError:
raise ValueError(
f"`tensor.backward(grad)` was passed a gradient with an incompatible shape.\n"
f"`grad` must be broadcast-compatible with `tensor.shape={self.shape}`\n"
f"Got `grad.shape={_grad.shape}`"
)
else:
_grad = np.full_like(self.data, fill_value=1.0)
self._grad = _grad
if self.creator is not None:
for t in topo_sorted_tensors:
t._backward()
self.clear_graph()
def _backward(self):
"""
**For dev-use only**
If `self` has accumulated incoming gradients from all operations in the terminal node's
computational graph, back-propagate the accumulated gradient to the creator of `self`.
Parameters
----------
graph : Set[Operation]
The set of all operations relevant to the terminal node of the computational graph,
which triggered back-propagation
Raises
------
AssertionError
Raises if the tensor and its associated gradient possess different shapes.
Raises if `_backward` triggered on a tensor with gradient of `None`.
"""
assert self._grad is not None, (
f"backprop, post grad-accumulation, was triggered "
f"on a tensor with no gradient"
f"\n{self}"
f"\nid {id(self._ops)}"
f"\ngrad: {self.grad}"
f"\ncreator: {self.creator}"
f"\nops: {self._ops}"
f"\nbase: {self.base}"
)
assert self._grad.shape == self.shape, (
f"A tensor and its associated gradient must possess the same shape. Got:"
f"\ntensor-shape: {self.shape}"
f"\ngrad-shape: {self._grad.shape}"
)
if self._creator is not None:
self._creator.backward(self._grad)
return
[docs] def null_grad(self, *, _clear_view_info: bool = False) -> "Tensor":
"""Sets this tensor's gradient to be ``None``.
This operation is performed in-place, but a reference to the
tensor is returned in order to permit mapping semantics.
Also removes any ``base`` reference from disconnected views.
Returns
-------
self
Examples
--------
>>> import mygrad as mg
>>> x = mg.Tensor(2.)
>>> (x ** 2).backward()
>>> x.grad
array(4.)
>>> x.null_grad() # returns a reference of `x`
Tensor(2.0)
>>> x.grad is None
True"""
self._view_grad = None
self._grad = None
if _clear_view_info:
if self._base is not None and self._creator is None:
self._base = None
return self
[docs] def null_gradients(self, clear_graph: bool = True):
"""
**Deprecated: Tensors will automatically have their computational graphs cleared during backprop.
Simply involving a tensor in a new computational graph will null its gradient.**
Sets the gradient for this tensor and for all preceding tensors in the computation graph
to ``None``.
Additionally, the computational graph that terminates in this tensor can also be cleared
during this process.
Parameters
----------
clear_graph : bool, optional (default=True)
If ``True`` clear the computational graph in addition to nulling the gradients.
Notes
-----
It is advised to clear the computational graph when nulling gradients, i.e. invoke
``null_gradients(clear_graph=True)`` (or simply ``null_gradients()``). This de-references
all intermediate operations and tensors in the computational graph and thus permits
garbage collection - freeing the memory that was used by the computational graph.
Examples
--------
>>> import mygrad as mg
>>> x = mg.tensor(2)
>>> y = mg.tensor(3)
>>> w = x * y
>>> f = 2 * w
>>> f.backward() # computes df/df, df/dw, df/dy, and df/dx
>>> any(tensor.grad is None for tensor in (f, w , x, y))
False
>>> f.null_gradients() # set tensor.grad to None for all tensors in the graph
>>> all(tensor.grad is None for tensor in (f, w , x, y))
True
"""
import warnings
warnings.warn(
"`tensor.null_gradients()` is deprecated. Calling it will raise an error "
"in future versions of MyGrad. A tensor will automatically "
"have its gradient nulled if you use it in a new computational graph. "
"Or, you can call `tensor.null_grad()` to null that individual tensor's "
"gradient.",
FutureWarning,
)
[docs] def clear_graph(self):
"""
Removes the current tensor – and tensors above it – from their shared
computational graph.
This de-references all operations involved in the graph and the intermediate
tensors that were created by it. Arrays whose memory were locked by the
computational graph will have their writeability restored.
Examples
--------
>>> import mygrad as mg
>>> import numpy as np
>>> x = np.array([1., 2.])
>>> y = mg.multiply(2., x)
>>> x.flags.writeable, y.creator
(False, <mygrad.math.arithmetic.ops.Multiply at 0x224f89cac48>)
>>> y.clear_graph()
>>> x.flags.writeable, y.creator
(True, None)
"""
if self._base is not None:
# "pull" on grad to force views to update their
# gradients from upstream before the graph info
# gets cleared
_ = self.grad
self._view_children.clear()
self._ops.clear()
if self._creator is None:
return
creator = self._creator
self._creator = None # marks tensor as "visited" during graph-traversal
for var in creator.variables: # type: "Tensor"
var.clear_graph()
@property
def constant(self) -> bool:
"""If ``True``, this tensor is a constant; it will not propagate any gradient.
Additionally, any tensor that is a descendant of constant tensors will also
be a constant.
Integer-valued tesnors, Python scalars and NumPy arrays are treated as constant
tensors when included in MyGrad computational graphs.
Returns
-------
bool
Examples
--------
Constant-tensors do not back-propagate gradients:
>>> import mygrad as mg
>>> x = mg.Tensor([1., 2.], constant=True)
>>> y = mg.Tensor([0., 3.], constant=False)
>>> f = x * y
>>> f.backward()
>>> x.grad is None # x has no gradient
True
>>> y.grad
array([1., 2.])
A tensor that is derived solely from constant tensors is also
a constant:
>>> import numpy as np
>>> x = mg.Tensor([1., 2.], constant=True)
>>> y = mg.Tensor([0., 3.], constant=True)
>>> z = (x + y) ** 2 - np.array([8., 7.])
>>> z.constant
True
Integer-valued tensors are treated as constants
>>> mg.Tensor([1, 2]).constant
True
"""
return self._constant
@property
def creator(self) -> Optional[Operation]:
"""The ``Operation`` instance that produced ``self``.
Returns
-------
creator : Optional[Operation]
The operation-instance that created the tensor, or `None`.
Examples
--------
>>> import mygrad as mg
>>> x = mg.Tensor(3)
>>> x.creator is None
True
>>> y = mg.Tensor(2)
>>> z = x * y # Multiply(x, y) -> z
>>> z.creator
<mygrad.math.arithmetic.ops.Multiply at 0x2df5a130438>
"""
return self._creator
def __len__(self) -> int:
return len(self.data)
def __contains__(self, item) -> bool:
return self.data.__contains__(item)
def __getitem__(self, item: Index) -> "Tensor":
return self._op(GetItem, self, op_args=(item,))
def __iter__(self) -> Iterator["Tensor"]:
# In the same way that numpy doesn't let you iterate over 0-dimensional
# arrays, don't allow iteration over 0-dimensional arrays.
if self.ndim == 0:
raise TypeError("iteration over a 0-d tensor")
return iter(self[n] for n in range(len(self)))
def _in_place_op(
self,
inplace_op: Type[Operation],
*input_vars: ArrayLike,
op_args: Optional[Sequence] = None,
op_kwargs: Optional[Dict] = None,
constant: Optional[bool] = None,
):
if _track.TRACK_GRAPH is False:
return self._op(
inplace_op,
*input_vars,
op_args=op_args,
op_kwargs=op_kwargs,
constant=constant,
out=self.data,
)
#
# **********************************************************************************
# The way that in-place updates work in MyGrad is that any tensor that
# is about to undergo a mutation gets "cloned". Each resulting "placeholder"
# is used to represent that tensor in any non-view operations that the tensor
# was participating in. This ensures that the stateful computational graph
# is not corrupted by this mutation.
#
# Once the placeholders have been created, they have permanently replaced the
# rolls of their counterparts within the computational graph. Furthermore, they
# exist only internally to the computational graph and thus cannot be the
# targets of subsequent views or in-place updates.
#
# At this point, the "original" tensors merely reserve the publicly-available
# Tensor-instances (husks) that the users will access. We eventually need to
# populate these husks with the appropriate augmented contents and graph-history.
#
# Thus this method will compute the in-place operation on a new tensor, and
# will create a new, internal computational graph involving the base tensor
# affected by the mutation and any of its view-children. These tensors represent
# the mutated tensors that the users expect to have access to.
#
# We must connect this new computational graph to the preceding one – the one
# involving the placeholders; this way we can backpropagate appropriately and
# through all influencers.
#
# Finally we mirror each of these new tensors into the husks of the publicly
# -available tensors and reroute the computational graph through them so that
# the user sees that all of the relevant tensors have been augmented, and that
# they are connected to the appropriate "history" such that backprop occurs
# without error or inaccuracy.
#
#
# For illustration, consider the following graph:
#
# ... x------[square]-- y = x**2
# \
# ---[slice]-- z = view-x
# \
# ---[mul]-- w = 3 * z
#
# Now suppose that we mutate `x` with `x[:] = 0`. This is a simpler case than
# mutating a view of `x`, since `x` is already the base tensor.
# - This should not affect `y`
# - It should affect `view_x`
# - It should *not* affect `w`, which depends on `view_x` in a "static" way.
# I.e. the value for `w` is already resolved and is not a view of z or x.
#
#
# As prescribed above, we will make the placeholders: px and pz, and we
# will reroute the operations that statically depend on the old values of x and z
# through these placeholders.
#
# Next we will have `x` point to a mutated version of itself, in accord with the
# in-place update being performed, and we will subsequently recreate any
# views of x (i.e. z), based off of this mutated tensor.
#
# The resulting graph is:
#
# ---[slice]-- z = view-x
# /
# -----[set-item] -- x = px.copy()[:]=0
# /
# ... px------[square]-- y = px**2
# \
# ---[slice]-- pz = view-px
# \
# ---[mul]-- w = 3 * pz
#
# Note that px and pz are strictly *internal* tensors; they cannot be accessed for
# use in any further operations, whereas `x` and `z` are available for further use.
#
# **********************************************************************************
#
# Replace base and all of its views with "placeholder" tensors;
# they serve as internal references to all tensors pre-mutation
# and will preserve ops relying on the un-mutated tensors.
#
# These placeholder tensors are never publicly-available and thus cannot
# be involved directly in future in-place updates
# In Tensor._op, any tensor entering an op has its grad/view-info cleared
# We must do this here up front since we need to consume information
# about ``self``
self.null_grad(_clear_view_info=True)
if self._base is not None and not self._base._view_children:
self._base = None
graph = _dup.DuplicatingGraph(self if self.base is None else self.base)
# Create copy of base so that mutation has no impact on the
# state of any ops depending on it or its views
mutant_base = graph.base.tensor.copy()
mutant_base.data.flags.writeable = (
graph.base.tensor.data.flags.writeable
or _mem.array_is_tracked(graph.base.tensor.data)
)
# Create view of base in correspondence to relationship
# that `self` has to base. Mutating this view will mutate
# base appropriately
inplace_target = mutant_base
# stores view-fn sequence from base -> in-place target
view_fn_sequence: List[Callable[[np.ndarray], np.ndarray]] = []
with _track.no_autodiff:
# get view sequence from base -> in-place target
for node in graph.get_path_to_base(self)[::-1][1:]: # skip base
# need to point to place-holder replay op to avoid creating
# forwards references to downstream tensors
f = node.placeholder._replay_op
if self.base is not None:
# need sequence of view-ops
view_fn_sequence.append(_track.no_autodiff(f, to_numpy=True))
inplace_target = f(inplace_target)
# Constant info was not propagated through no-autodiff mode.
# It must be inferred from the original tensor
inplace_target._constant = mutant_base.constant
mutant_base_data = mutant_base.data
del mutant_base
try:
with _mem.mem_guard_off:
placeholder_mutant_view = (
self._op( # will raise if original data not writeable
inplace_op,
*(graph.get_placeholder_if_exists(t) for t in input_vars),
op_args=op_args,
op_kwargs=op_kwargs,
constant=constant,
out=inplace_target.data,
)
)
except Exception as e:
graph.restore_old_graph()
raise e
placeholder_mutant_view._constant = inplace_target._constant
if _mem.MEM_GUARD:
_mem.force_lock_tensor_and_creators(placeholder_mutant_view)
if placeholder_mutant_view.creator.where is not True:
# An operation like `multiply(x, y, where=mask, out=z)` occurred.
# `placeholder_mutant_view` is the mutated version of `z`.
# We need to connect the upstream version of `z` to the computational
# graph so that `~mask * dℒ/dz` backprops to it, whereas `~mask * dℒ/dz`
# will backprop to `x` and `y`.
#
# This is basically an alternative to treating
# `multiply(x, y, where=mask, out=z)`
# like a three-input operation, which adds complexity to the implementation
# of every op that supports `where` and `out`.
#
# old-z ---------------------
# | |
# multiply(x, y, where=mask, out=z) |
# | |
# z --------------------
# | |
# ApplyMask
# |
# z
with _mem.mem_guard_off:
placeholder_mutant_view = type(self)._op(
_dup.ApplyMask,
placeholder_mutant_view, # gets passed through unchanged
# ~mask * grad backprops to upstream placeholder
graph[self].placeholder,
op_kwargs={
"mask": placeholder_mutant_view.creator.where,
},
)
# Connect public base tensor to placeholder graph via the mutated placeholder
# tensor `out`.
if self.base is None:
# The current graph:
# base-p --> | inplace | --> vp'
# Becomes:
# base-p --> | inplace | --> base'
#
# The base tensor itself was the target of the in-place operation,
# thus we need simply mirror original base against the mutant placeholder.
# This effectively connects the original base to the placeholder graph
mutant_base = placeholder_mutant_view
else:
# in-place operation occurred on a view; must connect mutated base
# to graph and then reproduce downstream views
#
# The current graph:
# vp --> | inplace | --> vp'
#
# Becomes:
#
# vp --> | inplace | --> vp' --> | |
# | unview | --> base'
# base-p -----------------------> | |
#
# I.e. the mutated base is a combination of the placeholder
# base and of the mutant view.
mutant_base = type(self)._op(
_dup.UnView,
graph.base.placeholder,
placeholder_mutant_view,
op_kwargs={
# Copy to avoid upstream placeholder mutant view sharing memory
# with downstream mutant base
"mutant_base_data": mutant_base_data,
"view_fn_sequence": view_fn_sequence,
},
)
del placeholder_mutant_view
# The original base now points to the augmented array data
# and has the InPlaceOp as its creator
_dup.mirror_tensor(source=mutant_base, target=graph.base.tensor)
del mutant_base
# Now that the base-tensor has been incorporated into the graph,
# recreate the view-graph and reroute all tensors from previous
# graph to their downstream counterparts
#
# Note that iterating in a topologically-ordered way is critical
# here: each parent is updated before creating one of its children
#
# Iteration is always based off of the placeholders' relative positions
# in the graph since this will never be mutated.
for node in graph:
if node.parent is None:
continue
view = node.tensor._replay_op(node.parent)
_dup.mirror_tensor(source=view, target=node.tensor)
node.parent._view_children.append(node.tensor)
@property
def shape(self) -> Shape:
"""Tuple of tensor dimension-sizes.
Sizes are reported in row-major order.
Returns
-------
Tuple[int, ...]
Examples
--------
>>> import mygrad as mg
>>> x = mg.Tensor([1, 2, 3, 4]) # axis-0 has size 4
>>> x.shape
(4,)
>>> y = mg.Tensor([[1, 2, 3], # axis-0 has size 2, axis-1 has size 3
... [4, 5, 6]])
>>> y.shape
(2, 3)
The shape attribute can also be set to reshape the tensor in-place
>>> y.shape = (1, 6, 1)
>>> y
Tensor([[[1],
[2],
[3],
[4],
[5],
[6]]])
See Also
--------
mygrad.reshape : similar function
Tensor.reshape : similar method"""
return self.data.shape
@shape.setter
def shape(self, newshape: Union[int, Shape]):
# Even though this op cannot mutate views, we still must
# do graph-replaying here so that views can still reference
# this tensor, but with the proper reshaping mediating them.
#
# E.g.
# x = arange(10) # shape-(10,)
# y = x[:6] # shape-(6,)
# x.shape = (2, 5) # shape-(2, 5)
#
# y.base points to the shape-(2,5) array
# even though y is a view of the flat array
#
# thus we need to play this graph as
# (history)
# |
# placeholder shape-(10,)
# |-reshape
# x shape-(2,5)
# |-reshape
# placeholder shape-(10,)
# |-getitem
# y shape-(4,)
if not _track.TRACK_GRAPH:
self.data.shape = newshape
return
if newshape == self.shape:
return
old_shape = self.shape
# raise here if the shape is not compatible
self.data.shape = newshape
self.data.shape = old_shape
# create placeholders for self and all of its view-children
graph = _dup.DuplicatingGraph(self)
# need to iterate over all nodes now before we tinker
# with the view children
nodes = tuple(graph)
# reshape placeholder of self
out = graph.base.placeholder.reshape(newshape)
# Store contents of `out` in `self` and replace `out` in
# graph with `self`
out._base = graph.base.placeholder.base
_dup.mirror_tensor(source=out, target=self)
_dup.reroute_ops_through(source=out, target=self)
del out
# although `self` is a view of placeholder, placeholder
# is strictly an internal tensor, we won't expose it as
# base
graph.base.placeholder._view_children.append(self)
base = graph.base.placeholder.base
if base is not None:
# if `self` was a view, we need to update that parent's
# view children so that it points to the placeholder
creator = graph.base.placeholder.creator.variables[0]
creator._view_children = WeakRefIterable(
[
w if w is not self else graph.base.placeholder
for w in graph.base.placeholder._view_children
]
)
# Undo the reshape, and place this as the tensor joining
# the reshaped `self` with the views of unshaped `self`
unshaped = self.reshape(old_shape)
for node in nodes:
if node.parent is None:
continue
# direct what would be views of `self` to be views of `unshaped`,
# which translates the mutated shape of `self` to the original
# shape used to create the views
parent = node.parent if node.parent is not self else unshaped
view = node.tensor._replay_op(parent)
_dup.mirror_tensor(source=view, target=node.tensor)
_dup.reroute_ops_through(source=view, target=node.tensor)
parent._view_children.append(node.tensor)
def __setitem__(self, key: Index, value: ArrayLike):
self._in_place_op(SetItem, self, value, op_args=(key,))
def __add__(self, other: ArrayLike) -> "Tensor":
return self._op(Add, self, other)
def __iadd__(self, other: ArrayLike) -> "Tensor":
self._in_place_op(Add, self, other)
return self
def __radd__(self, other: ArrayLike) -> "Tensor":
return self._op(Add, other, self)
def __sub__(self, other: ArrayLike) -> "Tensor":
return self._op(Subtract, self, other)
def __isub__(self, other: ArrayLike) -> "Tensor":
self._in_place_op(Subtract, self, other)
return self
def __rsub__(self, other: ArrayLike) -> "Tensor":
return self._op(Subtract, other, self)
def __truediv__(self, other: ArrayLike) -> "Tensor":
return self._op(Divide, self, other)
def __rtruediv__(self, other: ArrayLike) -> "Tensor":
return self._op(Divide, other, self)
def __floordiv__(self, other: ArrayLike) -> np.ndarray:
return np.floor_divide(self, other)
def __rfloordiv__(self, other: ArrayLike) -> np.ndarray:
return np.floor_divide(other, self)
def __itruediv__(self, other: ArrayLike) -> "Tensor":
self._in_place_op(Divide, self, other)
return self
def __mul__(self, other: ArrayLike) -> "Tensor":
return self._op(Multiply, self, other)
def __imul__(self, other: ArrayLike) -> "Tensor":
self._in_place_op(Multiply, self, other)
return self
def __rmul__(self, other: ArrayLike) -> "Tensor":
return self._op(Multiply, other, self)
def __matmul__(self, other: ArrayLike) -> "Tensor":
return self._op(MatMul, self, other)
def __rmatmul__(self, other: ArrayLike) -> "Tensor":
return self._op(MatMul, other, self)
def __pow__(self, other: ArrayLike):
if isinstance(other, Number) or (
isinstance(other, np.ndarray) and other.ndim == 0
):
if other == 1:
return self._op(Positive, self)
elif other == 2:
return self._op(Square, self)
return self._op(Power, self, other)
def __ipow__(self, other: ArrayLike) -> "Tensor":
if isinstance(other, Number) or (
isinstance(other, np.ndarray) and other.ndim == 0
):
if other == 1:
self._in_place_op(Positive, self)
return self
elif other == 2:
self._in_place_op(Square, self)
return self
self._in_place_op(Power, self, other)
return self
def __rpow__(self, other: ArrayLike):
return self._op(Power, other, self)
def __neg__(self):
return self._op(Negative, self)
def __pos__(self):
return self._op(Positive, self)
def __repr__(self) -> str:
return repr(self.data).replace("array", "Tensor").replace("\n", "\n ")
def __copy__(self) -> "Tensor":
"""Produces a copy of ``self`` with ``copy.creator=None``.
Copies of the underlying numpy data array and gradient array are created.
Returns
-------
Tensor
"""
return self.copy()
[docs] def copy(self, *, constant: Optional[bool] = None) -> "Tensor":
"""Produces a copy of ``self`` with ``copy.creator=None``.
Copies of the underlying numpy data array and gradient array are created.
No information regarding the tensor's participation in the computational
graph are copied.
Parameters
----------
constant : Optional[bool]
Returns
-------
Tensor
Examples
--------
>>> import mygrad as mg
>>> x = mg.Tensor(data, constant=constant)
>>> y = x * 2
>>> y.backward()
>>> y_copy = y.copy()
>>> y_copy
Tensor(6)
>>> y_copy.grad
array(1.)
>>> y_copy.creator is None
True
"""
copy = Tensor(
np.copy(self.data),
constant=(self.constant if constant is None else constant),
)
copy._grad = np.copy(self._grad) if self._grad is not None else None
return copy
[docs] def item(self) -> Union[int, float]:
"""Copy an element of a tensor to a standard Python scalar and return it.
Note that the returned object does not support back-propagation.
Returns
-------
z : Standard Python scalar object
A copy of the specified element of the tensor as a suitable
Python scalar
Examples
--------
>>> import mygrad as mg
>>> x = Tensor([22.2])
>>> x.item()
22.2
>>> type(x.item())
float"""
if self.size > 1:
raise ValueError("can only convert a tensor of size 1 to a Python scalar")
return self.data.item()
def __float__(self) -> float:
if self.size > 1:
raise TypeError("can only convert a tensor of size 1 to a Python scalar")
return float(self.data)
def __int__(self) -> int:
if self.size > 1:
raise TypeError("can only convert a tensor of size 1 to a Python scalar")
return int(self.data)
def __index__(self) -> int:
"""Return self converted to an integer, if self is suitable for use as an index
into a list."""
return self.data.__index__()
[docs] def flatten(self, *, constant: Optional[bool] = None) -> "Tensor":
"""Return a copy of the tensor collapsed into one dimension.
This docstring was adapted from ``numpy.ndarray.flatten``.
Parameters
----------
constant : bool, optional(default=False)
If ``True``, the returned tensor is a constant (it
does not back-propagate a gradient)
Returns
-------
mygrad.Tensor
A copy of the input tensor, flattened to one dimension.
Notes
-----
To return a flattened view of the tensor, use ``x.reshape(-1)``.
Examples
--------
>>> import mygrad as mg
>>> x = mg.Tensor([[1, 2],
... [3, 4]])
>>> x.flatten()
Tensor([1, 2, 3, 4])
"""
return Tensor._op(Flatten, self, constant=constant)
@property
def base(self) -> Optional["Tensor"]:
"""
A reference to the base tensor that the present tensor is a view of.
It this tensor owns its memory, then this returns ``None``.
Examples
--------
The base of a tensor that owns its memory is ``None``:
>>> import mygrad as mg
>>> x = mg.arange(5)
>>> x.base is None
True
Slicing creates a view, whose memory is shared with x:
>>> y = x[2:]
>>> y.base is x
True
>>> y.data.base is x.data
True
A view of a view has the same base as its "parent"
>>> z = y[:]
>>> z.base is x
True
The behavior of ``Tensor.base`` departs from that of ``ndarray.base`` in that
mygrad will never create an "internal" tensor to serve as a base; e.g.
>>> import numpy as np
>>> np.reshape(2., (1,)).base
array(2.)
>>> mg.reshape(2., (1,)).base is None
True
"""
return self._base
@property
def size(self) -> int:
"""
Number of elements in the tensor. i.e., the product of the tensor's
dimensions.
Returns
-------
int
Examples
--------
>>> import mygrad as mg
>>> x = mg.zeros((3, 5, 2)) # creates a tensor with 3x5x2 (= 30) elements
>>> x.size
30
"""
return self.data.size
@property
def ndim(self) -> int:
"""Number of tensor dimensions. I.e. the number
of indices that must be supplied to uniquely specify
an element in the tensor.
Returns
-------
int
Examples
--------
>>> import mygrad as mg
>>> x = mg.Tensor([1, 2, 3])
>>> x.ndim
1
>>> x[0] # a single index identifies an element in `x`
Tensor(1)
>>> y = mg.Tensor([[1, 2, 3],
... [4, 5, 6]])
>>> y.ndim
2
>>> y[0, 0] # two indices are required to identify an element in `x`
Tensor(1)"""
return self.data.ndim
@property
def dtype(self) -> np.dtype:
"""Data-type of the tensor's elements.
Returns
-------
numpy dtype object
Examples
--------
>>> import mygrad as mg
>>> x = mg.Tensor([[0, 1],
... [2, 3]])
>>> x.dtype
dtype('int32')
>>> type(x.dtype)
<type 'numpy.dtype'>"""
return self.data.dtype
def reshape(
self, *newshape: Union[int, Shape], constant: Optional[bool] = None
) -> "Tensor":
"""Returns a tensor with a new shape, without changing its data.
This docstring was adapted from ``numpy.reshape``
Parameters
----------
*newshape : Union[int, Tuple[int, ...]]
The new shape should be compatible with the original shape. If
an integer, then the result will be a 1-D tensor of that length.
One shape dimension can be -1. In this case, the value is
inferred from the length of the tensor and remaining dimensions.
constant : bool, optional(default=False)
If ``True``, the returned tensor is a constant (it
does not back-propagate a gradient)
Returns
-------
mygrad.Tensor
``a`` with its shape changed. A new tensor is returned.
Notes
-----
``reshape`` utilizes C-ordering, meaning that it reads & writes elements using
C-like index ordering; the last axis index changing fastest, and, proceeding
in reverse order, the first axis index changing slowest.
Examples
--------
>>> import mygrad as mg
>>> a = mg.Tensor([[1, 2, 3], [4, 5, 6]])
>>> a.reshape(6)
Tensor([1, 2, 3, 4, 5, 6])
>>> a.reshape(3, -1)) # the unspecified value is inferred to be 2
Tensor([[1, 2],
[3, 4],
[5, 6]])
"""
if not newshape:
raise TypeError("reshape() takes at least 1 argument (0 given)")
if hasattr(newshape[0], "__iter__"):
if len(newshape) > 1:
raise TypeError("an integer is required")
newshape = newshape[0]
return Tensor._op(Reshape, self, op_args=(newshape,), constant=constant)
@property
def T(self) -> "Tensor":
"""Same as self.transpose(), except that self is returned if self.ndim < 2 and
a view of the underlying data is utilized whenever possible.
Returns
-------
Tensor
Examples
--------
>>> import mygrad as mg
>>> y = mg.Tensor([[1, 2, 3],
... [4, 5, 6]])
>>> y.T
Tensor([[1, 4],
[2, 5],
[3, 6]])
"""
return self._op(Tensor_Transpose_Property, self)
def __eq__(self, other: ArrayLike) -> np.ndarray:
return np.ndarray.__eq__(self.data, asarray(other))
def __ne__(self, other: ArrayLike) -> np.ndarray:
return np.ndarray.__ne__(self.data, asarray(other))
def __lt__(self, other: ArrayLike) -> np.ndarray:
return np.ndarray.__lt__(self.data, asarray(other))
def __le__(self, other: ArrayLike) -> np.ndarray:
return np.ndarray.__le__(self.data, asarray(other))
def __gt__(self, other: ArrayLike) -> np.ndarray:
return np.ndarray.__gt__(self.data, asarray(other))
def __ge__(self, other: ArrayLike) -> np.ndarray:
return np.ndarray.__ge__(self.data, asarray(other))
def __imatmul__(self, other): # pragma: no cover
raise TypeError(
"In-place matrix multiplication is not (yet) supported. "
"Use 'a = a @ b' instead of 'a @= b'"
)
def sum(
self,
axis: Optional[Union[int, Tuple[int, ...]]] = None,
keepdims: bool = False,
*,
constant: Optional[bool] = None,
) -> "Tensor":
"""
Sum of tensor elements over a given axis.
Parameters
----------
axis : Optional[int, Tuple[ints, ...]]
Axis or axes along which a sum is performed. The default,
axis=None, will sum all of the elements of the input tensor. If
axis is negative it counts from the last to the first axis.
If axis is a tuple of ints, a sum is performed on all of the axes
specified in the tuple instead of a single axis or all the axes as
before.
keepdims : bool, optional
If this is set to True, the axes which are reduced are left
in the result as dimensions with size one. With this option,
the result will broadcast correctly against the input tensor.
constant : Optional[bool]
If ``True``, this tensor is treated as a constant, and thus does not
facilitate back propagation (i.e. ``constant.grad`` will always return
``None``).
Defaults to ``False`` for float-type data.
Defaults to ``True`` for integer-type data.
Integer-type tensors must be constant.
Returns
-------
sum_along_axis : mygrad.Tensor
A Tensor with the same shape as `self`, with the specified
axis/axes removed. If `self` is a 0-d tensor, or if `axis` is None,
a 0-dim Tensor is returned.
See Also
--------
mygrad.Tensor.sum : Equivalent method.
cumsum : Cumulative sum of array elements.
mean, average
Notes
-----
Arithmetic is modular when using integer types, and no error is
raised on overflow.
The sum of an empty tensor is the neutral element 0:
>>> mygrad.sum([])
Tensor(0.0)
Examples
--------
>>> import mygrad as mg
>>> import numpy as np
>>> x = mg.tensor([1., 1.])
>>> x.sum()
Tensor(2.0)
>>> x = mg.tensor([0.5, 0.7, 0.2, 1.5])
>>> x.sum(dtype=np.int32)
Tensor(1)
>>> x = mg.tensor([[0, 1], [0, 5]])
>>> x.sum()
Tensor(6)
>>> x.sum(axis=0)
Tensor([0, 6])
>>> x.sum(axis=1)
Tensor([1, 5])
"""
return Tensor._op(
Sum, self, op_kwargs={"axis": axis, "keepdims": keepdims}, constant=constant
)
def prod(
self,
axis: Optional[Union[int, Tuple[int, ...]]] = None,
keepdims: bool = False,
*,
constant: Optional[bool] = None,
) -> "Tensor":
"""
Return the product of array elements over given axes.
Parameters
----------
axis : Optional[Union[int, Tuple[int, ...]]]
Axis or axes along which to operate. By default, flattened input is used.
keepdims : bool, optional (default=False)
If this is set to True, the axes which are reduced are left in the
result as dimensions with size one. With this option, the result
will broadcast correctly against the input array.
constant : Optional[bool]
If ``True``, this tensor is treated as a constant, and thus does not
facilitate back propagation (i.e. ``constant.grad`` will always return
``None``).
Defaults to ``False`` for float-type data.
Defaults to ``True`` for integer-type data.
Integer-type tensors must be constant.
Returns
-------
product_along_axis : mygrad.Tensor
A tensor shaped as `a` but with the specified axis removed."""
return Tensor._op(
Prod,
self,
op_kwargs={"axis": axis, "keepdims": keepdims},
constant=constant,
)
def cumprod(
self,
axis: Optional[Union[int, Tuple[int, ...]]] = None,
*,
constant: Optional[bool] = None,
) -> "Tensor":
"""
Return the cumulative product of elements along a given axis.
This docstring was adapted from the official numpy documentation
Parameters
----------
axis : Optional[int]
Axis along which the cumulative product is computed. By default
the input is flattened.
constant : bool, optional(default=False)
If ``True``, the returned tensor is a constant (it
does not back-propagate a gradient)
constant : Optional[bool]
If ``True``, this tensor is treated as a constant, and thus does not
facilitate back propagation (i.e. ``constant.grad`` will always return
``None``).
Defaults to ``False`` for float-type data.
Defaults to ``True`` for integer-type data.
Integer-type tensors must be constant.
Returns
-------
mygrad.Tensor
Notes
-----
Arithmetic is modular when using integer types, and no error is
raised on overflow."""
return Tensor._op(CumProd, self, op_kwargs={"axis": axis}, constant=constant)
def cumsum(
self,
axis: Optional[Union[int, Tuple[int, ...]]] = None,
*,
constant: Optional[bool] = None,
) -> "Tensor":
"""
Return the cumulative sum of the elements along a given axis.
This docstring was adapted from the official numpy documentation
Parameters
----------
axis : int, optional
Axis along which the cumulative sum is computed. The default
(None) is to compute the cumsum over the flattened array.
constant : Optional[bool]
If ``True``, this tensor is treated as a constant, and thus does not
facilitate back propagation (i.e. ``constant.grad`` will always return
``None``).
Defaults to ``False`` for float-type data.
Defaults to ``True`` for integer-type data.
Integer-type tensors must be constant.
Returns
-------
mygrad.Tensor
"""
return Tensor._op(CumSum, self, op_kwargs={"axis": axis}, constant=constant)
def mean(
self,
axis: Optional[Union[int, Tuple[int, ...]]] = None,
keepdims: bool = False,
*,
constant: Optional[bool] = None,
) -> "Tensor":
"""
Mean of tensor elements over a given axis.
Parameters
----------
x : ArrayLike
axis : Optional[int, Tuple[ints, ...]
Axis or axes along which a mean is performed. The default,
axis=None, will mean all of the elements of the input tensor. If
axis is negative it counts from the last to the first axis.
If axis is a tuple of ints, a mean is performed on all of the axes
specified in the tuple instead of a single axis or all the axes as
before.
keepdims : bool, optional
If this is set to True, the axes which are reduced are left
in the result as dimensions with size one. With this option,
the result will broadcast correctly against the input tensor.
constant : Optional[bool]
If ``True``, this tensor is treated as a constant, and thus does not
facilitate back propagation (i.e. ``constant.grad`` will always return
``None``).
Defaults to ``False`` for float-type data.
Defaults to ``True`` for integer-type data.
Integer-type tensors must be constant.
Returns
-------
mean_along_axis : Tensor
A Tensor with the same shape as `self`, with the specified
axis/axes removed. If `self` is a 0-d tensor, or if `axis` is None,
a 0-dim Tensor is returned.
"""
return Tensor._op(
Mean,
self,
op_kwargs={"axis": axis, "keepdims": keepdims},
constant=constant,
)
def std(
self,
axis: Optional[Union[int, Tuple[int, ...]]] = None,
ddof: int = 0,
keepdims: bool = False,
*,
constant: Optional[bool] = None,
) -> "Tensor":
"""
Compute the standard deviation along the specified axis.
Returns the variance of the array elements, a measure of the spread of a
distribution. The variance is computed for the flattened array by
default, otherwise over the specified axis.
Parameters
----------
axis : Optional[Union[int, Tuple[int, ...]]]
Axis or axes along which the variance is computed. The default is to
compute the variance of the flattened array.
ddof : int, optional (default=0)
"Delta Degrees of Freedom": the divisor used in the calculation is
``N - ddof``, where ``N`` represents the number of elements. By
default `ddof` is zero.
keepdims : bool, optional (default=False)
If this is set to True, the axes which are reduced are left
in the result as dimensions with size one. With this option,
the result will broadcast correctly against the input array.
constant : Optional[bool]
If ``True``, this tensor is treated as a constant, and thus does not
facilitate back propagation (i.e. ``constant.grad`` will always return
``None``).
Defaults to ``False`` for float-type data.
Defaults to ``True`` for integer-type data.
Integer-type tensors must be constant.
Returns
-------
std : mygrad.Tensor
Notes
-----
The variance is the average of the squared deviations from the mean,
i.e., ``var = mean(abs(x - x.mean())**2)``.
The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``.
If, however, `ddof` is specified, the divisor ``N - ddof`` is used
instead. In standard statistical practice, ``ddof=1`` provides an
unbiased estimator of the variance of a hypothetical infinite population.
``ddof=0`` provides a maximum likelihood estimate of the variance for
normally distributed variables."""
return Tensor._op(
StdDev,
self,
op_kwargs={"axis": axis, "keepdims": keepdims, "ddof": ddof},
constant=constant,
)
def var(
self,
axis: Optional[Union[int, Tuple[int, ...]]] = None,
ddof: int = 0,
keepdims: bool = False,
*,
constant: Optional[bool] = None,
) -> "Tensor":
"""
Compute the variance along the specified axis.
Returns the variance of the array elements, a measure of the spread of a
distribution. The variance is computed for the flattened array by
default, otherwise over the specified axis.
Parameters
----------
axis : Optional[int, Tuple[int, ...]]
Axis or axes along which the variance is computed. The default is to
compute the variance of the flattened array.
ddof : int, optional (default=0)
"Delta Degrees of Freedom": the divisor used in the calculation is
``N - ddof``, where ``N`` represents the number of elements. By
default `ddof` is zero.
keepdims : bool, optional (default=False)
If this is set to True, the axes which are reduced are left
in the result as dimensions with size one. With this option,
the result will broadcast correctly against the input array..
constant : Optional[bool]
If ``True``, this tensor is treated as a constant, and thus does not
facilitate back propagation (i.e. ``constant.grad`` will always return
``None``).
Defaults to ``False`` for float-type data.
Defaults to ``True`` for integer-type data.
Integer-type tensors must be constant.
Returns
-------
variance : mygrad.Tensor
Notes
-----
The variance is the average of the squared deviations from the mean,
i.e., ``var = mean(abs(x - x.mean())**2)``.
The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``.
If, however, `ddof` is specified, the divisor ``N - ddof`` is used
instead. In standard statistical practice, ``ddof=1`` provides an
unbiased estimator of the variance of a hypothetical infinite population.
``ddof=0`` provides a maximum likelihood estimate of the variance for
normally distributed variables."""
return Tensor._op(
Variance,
self,
op_kwargs={"axis": axis, "keepdims": keepdims, "ddof": ddof},
constant=constant,
)
def max(
self,
axis: Optional[Union[int, Tuple[int, ...]]] = None,
keepdims: bool = False,
*,
constant: Optional[bool] = None,
) -> "Tensor":
"""
Return the maximum of a tensor or maximum along its axes.
Parameters
----------
x : ArrayLike
axis : Optional[int, Tuple[int, ...]]
Axis or axes along which to operate. By default, flattened input is used.
keepdims : bool, optional
If this is set to True, the axes which are reduced are left
in the result as dimensions with size one. With this option,
the result will broadcast correctly against the original `arr`.
constant : Optional[bool]
If ``True``, this tensor is treated as a constant, and thus does not
facilitate back propagation (i.e. ``constant.grad`` will always return
``None``).
Defaults to ``False`` for float-type data.
Defaults to ``True`` for integer-type data.
Integer-type tensors must be constant.
Returns
-------
max : mygrad.Tensor
Maximum of `a`. If `axis` is None, the result is a 0-D tensor.
Examples
--------
>>> import mygrad as mg
>>> import numpy as np
>>> a = mg.arange(4).reshape((2,2))
>>> a
Tensor([[0, 1],
[2, 3]])
>>> mg.amax(a) # Maximum of the flattened array
Tensor(3)
>>> mg.amax(a, axis=0) # Maxima along the first axis
Tensor([2, 3])
>>> mg.amax(a, axis=1) # Maxima along the second axis
Tensor([1, 3])
>>> b = mg.arange(5, dtype=float)
>>> b[2] = np.NaN
>>> mg.amax(b)
Tensor(nan)
"""
return Tensor._op(
Max,
self,
op_kwargs={"axis": axis, "keepdims": keepdims, "dtype": _NoValue},
constant=constant,
)
def min(
self,
axis: Optional[Union[int, Tuple[int, ...]]] = None,
keepdims: bool = False,
*,
constant: Optional[bool] = None,
) -> "Tensor":
"""
Return the minimum of a tensor or minimum along its axes.
Parameters
----------
axis : Optional[int, Tuple[int, ...]]
Axis or axes along which to operate. By default, flattened input is used.
keepdims : bool, optional
If this is set to True, the axes which are reduced are left
in the result as dimensions with size one. With this option,
the result will broadcast correctly against the original `arr`.
constant : Optional[bool]
If ``True``, this tensor is treated as a constant, and thus does not
facilitate back propagation (i.e. ``constant.grad`` will always return
``None``).
Defaults to ``False`` for float-type data.
Defaults to ``True`` for integer-type data.
Integer-type tensors must be constant.
Returns
-------
min : mygrad.Tensor
Minimum of `a`. If `axis` is None, the result is a 0-D tensor.
Examples
--------
>>> import mygrad as mg
>>> import numpy as np
>>> a = mg.arange(4).reshape((2,2))
>>> a
Tensor([[0, 1],
[2, 3]])
>>> mg.amin(a) # Minimum of the flattened array
Tensor(0)
>>> mg.amin(a, axis=0) # Minima along the first axis
Tensor([0, 1])
>>> mg.amin(a, axis=1) # Minima along the second axis
Tensor([0, 2])
>>> b = mg.arange(5, dtype=float)
>>> b[2] = np.NaN
>>> mg.amin(b)
Tensor(nan)
"""
return Tensor._op(
Min,
self,
op_kwargs={"axis": axis, "keepdims": keepdims, "dtype": _NoValue},
constant=constant,
)
def swapaxes(
self, axis1: int, axis2: int, *, constant: Optional[bool] = None
) -> "Tensor":
"""Interchange two axes of a tensor.
Parameters
----------
axis1 : int
First axis.
axis2 : int
Second axis.
constant : Optional[bool]
If ``True``, this tensor is treated as a constant, and thus does not
facilitate back propagation (i.e. ``constant.grad`` will always return
``None``).
Defaults to ``False`` for float-type data.
Defaults to ``True`` for integer-type data.
Integer-type tensors must be constant.
Returns
-------
mygrad.Tensor
"""
return Tensor._op(SwapAxes, self, op_args=(axis1, axis2), constant=constant)
def transpose(
self: ArrayLike, *axes: int, constant: Optional[bool] = None
) -> "Tensor":
"""Permute the dimensions of a tensor.
Parameters
----------
axes : int
By default, reverse the dimensions, otherwise permute the axes
according to the values given.
constant : Optional[bool]
If ``True``, this tensor is treated as a constant, and thus does not
facilitate back propagation (i.e. ``constant.grad`` will always return
``None``).
Defaults to ``False`` for float-type data.
Defaults to ``True`` for integer-type data.
Integer-type tensors must be constant.
Returns
-------
mygrad.Tensor
`a` with its axes permuted. A new tensor is returned.
Examples
--------
>>> import mygrad as mg
>>> a = mg.tensor([[1, 2], [3, 4]])
>>> a
Tensor([[1, 2],
[3, 4]])
>>> a.transpose()
Tensor([[1, 3],
[2, 4]])
>>> a.transpose((1, 0))
Tensor([[1, 3],
[2, 4]])
>>> a.transpose(1, 0)
Tensor([[1, 3],
[2, 4]])"""
if not axes:
axes = None
elif hasattr(axes[0], "__iter__") or axes[0] is None:
if len(axes) > 1:
raise TypeError(
f"'{type(axes[0])}' object cannot be interpreted as an integer"
)
axes = axes[0]
return Tensor._op(Transpose, self, op_args=(axes,), constant=constant)
def moveaxis(
self,
source: Union[int, Tuple[int, ...]],
destination: Union[int, Tuple[int, ...]],
*,
constant: Optional[bool] = None,
) -> "Tensor":
"""Move axes of a tensor to new positions. Other axes remain in their
original order.
Parameters
----------
source : Union[int, Sequence[int]]
Original positions of the axes to move. These must be unique.
destination : Union[int, Sequence[int]]
Destination positions for each of the original axes. These must also be
unique.
constant : Optional[bool]
If ``True``, this tensor is treated as a constant, and thus does not
facilitate back propagation (i.e. ``constant.grad`` will always return
``None``).
Defaults to ``False`` for float-type data.
Defaults to ``True`` for integer-type data.
Integer-type tensors must be constant.
Returns
-------
result : mygrad.Tensor
Array with moved axes. This array is a view of the input array.."""
return Tensor._op(
MoveAxis, self, op_args=(source, destination), constant=constant
)
def squeeze(
self,
axis: Optional[Union[int, Tuple[int, ...]]] = None,
*,
constant: Optional[bool] = None,
) -> "Tensor":
"""
Remove single-dimensional entries from the shape of a tensor.
This docstring was adapted from ``numpy.squeeze``
Parameters
----------
axis : Optional[int, Tuple[int, ...]]
Selects a subset of the single-dimensional entries in the
shape. If an axis is selected with shape entry greater than
one, an error is raised.
constant : Optional[bool]
If ``True``, this tensor is treated as a constant, and thus does not
facilitate back propagation (i.e. ``constant.grad`` will always return
``None``).
Defaults to ``False`` for float-type data.
Defaults to ``True`` for integer-type data.
Integer-type tensors must be constant.
Returns
-------
mygrad.Tensor
Raises
------
ValueError
If ``axis`` is not ``None``, and an axis being squeezed is not of length 1
"""
return Tensor._op(Squeeze, self, op_args=(axis,), constant=constant)
def ravel(self, *, constant: Optional[bool] = None) -> "Tensor":
"""
Flattens contents of a tensor into a contiguous 1-D array. A copy is made only if needed.
This docstring was adapted from ``numpy.ravel``.
Parameters
----------
constant : Optional[bool]
If ``True``, this tensor is treated as a constant, and thus does not
facilitate back propagation (i.e. ``constant.grad`` will always return
``None``).
Defaults to ``False`` for float-type data.
Defaults to ``True`` for integer-type data.
Integer-type tensors must be constant.
Returns
-------
mygrad.Tensor
Notes
-----
``ravel`` utilizes C-ordering, meaning that it reads & writes elements using
C-like index ordering; the last axis index changing fastest, and, proceeding
in reverse order, the first axis index changing slowest.
"""
return Tensor._op(Ravel, self, constant=constant)
def argmax(
self, axis: Optional[int] = None, out: Optional[np.ndarray] = None
) -> np.ndarray:
"""Returns the indices of the maximum values along an axis.
Parameters
----------
a: array_like
axis: int, optional
By default, the index is into the flattened array, otherwise along the specified axis.
out: numpy.array, optional
If provided, the result will be inserted into this array. It should be of the appropriate shape and dtype.
Returns
-------
numpy.ndarray[int]"""
return np.argmax(self.data, axis, out)
def argmin(
self, axis: Optional[int] = None, out: Optional[np.ndarray] = None
) -> np.ndarray:
"""Returns the indices of the minimum values along an axis.
Parameters
----------
axis: int, optional
By default, the index is into the flattened array, otherwise along the specified axis.
out: numpy.array, optional
If provided, the result will be inserted into this array. It should be of the appropriate shape and dtype.
Returns
-------
numpy.ndarray[int]"""
return np.argmin(self.data, axis, out)
def any(
self,
axis: Optional[Union[int, Tuple[int, ...]]] = None,
out: Optional[np.ndarray] = None,
keepdims: bool = False,
) -> np.ndarray:
"""Test whether any array or Tensor element along a given axis evaluates to True.
Returns single boolean if `axis` is ``None``
This documentation was adapted from ``numpy.add``
Parameters
----------
axis : None or int or tuple of ints, optional
Axis or axes along which a logical OR reduction is performed.
The default (``axis=None``) is to perform a logical OR over all
the dimensions of the input array. `axis` may be negative, in
which case it counts from the last to the first axis.
If this is a tuple of ints, a reduction is performed on multiple
axes, instead of a single axis or all the axes as before.
out : ndarray, optional
Alternate output array in which to place the result. It must have
the same shape as the expected output and its type is preserved
(e.g., if it is of type float, then it will remain so, returning
1.0 for True and 0.0 for False, regardless of the type of `a`).
See `ufuncs-output-type` for more details.
keepdims : bool, optional
If this is set to True, the axes which are reduced are left
in the result as dimensions with size one. With this option,
the result will broadcast correctly against the input array.
If the default value is passed, then `keepdims` will not be
passed through to the `any` method of sub-classes of
`ndarray`, however any non-default value will be. If the
sub-class' method does not implement `keepdims` any
exceptions will be raised.
Returns
-------
any : bool or ndarray
A new boolean or `ndarray` is returned unless `out` is specified,
in which case a reference to `out` is returned.
See Also
--------
Tensor.any : equivalent method
"""
return np.any(self.data, axis=axis, out=out, keepdims=keepdims)
def clip(
self,
a_min: ArrayLike,
a_max: ArrayLike,
out: Optional[Union[np.ndarray, "Tensor"]] = None,
*,
constant: Optional[bool] = None,
) -> "Tensor": # pragma: no cover
"""Clip (limit) the values in an array.
Given an interval, values outside the interval are clipped to
the interval edges. For example, if an interval of ``[0, 1]``
is specified, values smaller than 0 become 0, and values larger
than 1 become 1.
Equivalent to `mg.minimum(a_max, mg.maximum(a, a_min))``.
No check is performed to ensure ``a_min < a_max``.
This docstring was adapted from that of `numpy.clip`
Parameters
----------
a_min : Optional[float, ArrayLike]
Minimum value. If `None`, clipping is not performed on lower
interval edge. Not more than one of `a_min` and `a_max` may be
`None`.
a_max : Optional[float, ArrayLike]
Maximum value. If `None`, clipping is not performed on upper
interval edge. Not more than one of `a_min` and `a_max` may be
`None`. If `a_min` or `a_max` are ArrayLike, then the three
arrays will be broadcasted to match their shapes.
out : Optional[Union[ndarray, Tensor]]
A location into which the result is stored. If provided, it must have
a shape that the inputs broadcast to. If not provided or None, a
freshly-allocated tensor is returned.
constant : bool, optional(default=False)
If ``True``, the returned tensor is a constant (it
does not backpropagate a gradient)
Returns
-------
Tensor
A tensor with the elements of `a`, but where values
< `a_min` are replaced with `a_min`, and those > `a_max`
with `a_max`.
Examples
--------
>>> import mygrad as mg
>>> a = mg.arange(10)
>>> a
Tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
>>> a.clip(1, 8)
Tensor([1, 1, 2, 3, 4, 5, 6, 7, 8, 8])
>>> a.clip([3, 4, 1, 1, 1, 4, 4, 4, 4, 4], 8)
Tensor([3, 4, 2, 3, 4, 5, 6, 7, 8, 8])"""
# set in added in mygrad.__init__
...