"""
Directional Variogram
"""
import numpy as np
from scipy.spatial.distance import pdist
from .Variogram import Variogram
from skgstat import plotting
from .MetricSpace import MetricSpace, MetricSpacePair
[docs]
class DirectionalVariogram(Variogram):
    """DirectionalVariogram Class
    Calculates a variogram of the separating distances in the given
    coordinates and relates them to one of the semi-variance measures of the
    given dependent values.
    The directional version of a Variogram will only form paris of points
    that share a specified spatial relationship.
    """
[docs]
    def __init__(self,
                 coordinates=None,
                 values=None,
                 estimator='matheron',
                 model='spherical',
                 dist_func='euclidean',
                 bin_func='even',
                 normalize=False,
                 fit_method='trf',
                 fit_sigma=None,
                 directional_model='triangle',
                 azimuth=0,
                 tolerance=45.0,
                 bandwidth='q33',
                 use_nugget=False,
                 maxlag=None,
                 n_lags=10,
                 verbose=False,
                 **kwargs
                 ):
        r"""Variogram Class
        Directional Variogram. Works similar to the base class, but one can
        define a ``azimuth`` and a ``tolerance`` angle to only account for
        point pairs in a user-defined direction. Directional variograms are
        used to describe anisotropy in the data.
        Parameters
        ----------
        coordinates : numpy.ndarray
            Array of shape (m, n). Will be used as m observation points of
            n-dimensions. This variogram can be calculated on 1 - n
            dimensional coordinates. In case a 1-dimensional array is passed,
            a second array of same length containing only zeros will be
            stacked to the passed one.
        values : numpy.ndarray
            Array of values observed at the given coordinates. The length of
            the values array has to match the m dimension of the coordinates
            array. Will be used to calculate the dependent variable of the
            variogram.
        estimator : str, callable
            String identifying the semi-variance estimator to be used.
            Defaults to the Matheron estimator. Possible values are:
              * matheron        [Matheron, default]
              * cressie         [Cressie-Hawkins]
              * dowd            [Dowd-Estimator]
              * genton          [Genton]
              * minmax          [MinMax Scaler]
              * entropy         [Shannon Entropy]
            If a callable is passed, it has to accept an array of absolute
            differences, aligned to the 1D distance matrix (flattened upper
            triangle) and return a scalar, that converges towards small
            values for similarity (high covariance).
        model : str
            String identifying the theoretical variogram function to be used
            to describe the experimental variogram. Can be one of:
              * spherical       [Spherical, default]
              * exponential     [Exponential]
              * gaussian        [Gaussian]
              * cubic           [Cubic]
              * stable          [Stable model]
              * matern          [Matérn model]
              * nugget          [nugget effect variogram]
        dist_func : str
            String identifying the distance function. Defaults to
            'euclidean'. Can be any metric accepted by
            scipy.spatial.distance.pdist. Additional parameters are not (yet)
            passed through to pdist. These are accepted by pdist for some of
            the metrics. In these cases the default values are used.
        bin_func : str
            .. versionchanged:: 0.3.8
                added 'fd', 'sturges', 'scott', 'sqrt', 'doane'
            String identifying the binning function used to find lag class
            edges. All methods calculate bin edges on the interval [0, maxlag[.
            Possible values are:
                * `'even'` (default) finds `n_lags` same width bins
                * `'uniform'` forms `n_lags` bins of same data count
                * `'fd'` applies Freedman-Diaconis estimator to find `n_lags`
                * `'sturges'` applies Sturge's rule to find `n_lags`.
                * `'scott'` applies Scott's rule to find `n_lags`
                * `'doane'` applies Doane's extension to Sturge's rule to find `n_lags`
                * `'sqrt'` uses the square-root of :func:`distance <skgstat.Variogram.distance>`. as `n_lags`.
            More details are given in the documentation for :func:`set_bin_func <skgstat.Variogram.set_bin_func>`.
        normalize : bool
            Defaults to False. If True, the independent and dependent
            variable will be normalized to the range [0,1].
        fit_method : str
            String identifying the method to be used for fitting the
            theoretical variogram function to the experimental. More info is
            given in the Variogram.fit docs. Can be one of:
                * 'lm': Levenberg-Marquardt algorithm for unconstrained
                  problems. This is the faster algorithm, yet is the fitting of
                  a variogram not unconstrianed.
                * 'trf': Trust Region Reflective function for non-linear
                  constrained problems. The class will set the boundaries
                  itself. This is the default function.
        fit_sigma : numpy.ndarray, str
            Defaults to None. The sigma is used as measure of uncertainty
            during variogram fit. If fit_sigma is an array, it has to hold
            n_lags elements, giving the uncertainty for all lags classes. If
            fit_sigma is None (default), it will give no weight to any lag.
            Higher values indicate higher uncertainty and will lower the
            influcence of the corresponding lag class for the fit.
            If fit_sigma is a string, a pre-defined function of separating
            distance will be used to fill the array. Can be one of:
                * 'linear': Linear loss with distance. Small bins will have
                  higher impact.
                * 'exp': The weights decrease by a e-function of distance
                * 'sqrt': The weights decrease by the squareroot of distance
                * 'sq': The weights decrease by the squared distance.
            More info is given in the Variogram.fit_sigma documentation.
        directional_model : string, function
            The model used for selecting all points fulfilling the
            directional constraint of the Variogram. A predefined
            model can be selected by passing the model name as string.
            Optionally a callable accepting the difference vectors
            between points in polar form as angles and distances and
            returning a mask array can be passed. In this case, the
            azimuth, tolerance and bandwidth has to be incorporated by
            hand into the model.
                * 'compass': includes points in the direction of the
                  azimuth at given tolerance. The bandwidth parameter will be
                  ignored.
                * 'triangle': constructs a triangle with an angle of
                  tolerance at the point of interest and union an rectangle
                  parallel to azimuth, once the hypotenuse length reaches
                  bandwidth.
                * 'circle': constructs a half circle touching the point of
                  interest, dislocating the center at the distance of
                  bandwidth in the direction of azimuth. The half circle is
                  union with an rectangle parallel to azimuth.
            Visual representations, usage hints and implementation specifics
            are given in the documentation.
        azimuth : float
            The azimuth of the directional dependence for this Variogram,
            given as an angle in **degree**. The East of the coordinate
            plane is set to be at 0° and is counted clockwise to 180° and
            counter-clockwise to -180°. Only Points lying in the azimuth of a
            specific point will be used for forming point pairs.
        tolerance : float
            The tolerance is given as an angle in **degree**- Points being
            dislocated from the exact azimuth by half the tolerance will be
            accepted as well. It's half the tolerance as the point may be
            dislocated in the positive and negative direction from the azimuth.
        bandwidth : float
            Maximum tolerance acceptable in **coordinate units**, which is
            usually meter. Points at higher distances may be far dislocated
            from the azimuth in terms of coordinate distance, as the
            tolerance is defined as an angle. THe bandwidth defines a maximum
            width for the search window. It will be perpendicular to and
            bisected by the azimuth.
        use_nugget : bool
            Defaults to False. If True, a nugget effet will be added to all
            Variogram.models as a third (or fourth) fitting parameter. A
            nugget is essentially the y-axis interception of the theoretical
            variogram function.
        maxlag : float, str
            Can specify the maximum lag distance directly by giving a value
            larger than 1. The binning function will not find any lag class
            with an edge larger than maxlag. If 0 < maxlag < 1, then maxlag
            is relative and maxlag * max(Variogram.distance) will be used.
            In case maxlag is a string it has to be one of 'median', 'mean'.
            Then the median or mean of all Variogram.distance will be used.
            Note maxlag=0.5 will use half the maximum separating distance,
            this is not the same as 'median', which is the median of all
            separating distances
        n_lags : int
            Specify the number of lag classes to be defined by the binning
            function.
        verbose : bool
            Set the Verbosity of the class. Not Implemented yet.
        Keyword Arguments
        -----------------
        entropy_bins : int, str
            .. versionadded:: 0.3.7
            If the `estimator <skgstat.Variogram.estimator>` is set to
            `'entropy'` this argument sets the number of bins, that should be
            used for histogram calculation.
        percentile : int
            .. versionadded:: 0.3.7
            If the `estimator <skgstat.Variogram.estimator>` is set to
            `'entropy'` this argument sets the percentile to be used.
        """
        # Before we do anything else, make kwargs available
        self._kwargs = self._validate_kwargs(**kwargs)
        # FIXME: Call __init__ of baseclass?
        # No, because the sequence at which the arguments get initialized
        # does matter. There is way too much transitive dependence, thus
        # it was easiest to copy the init over.
        self._direction_mask_cache = None
        if not isinstance(coordinates, MetricSpace):
            coordinates = np.asarray(coordinates)
            coordinates = MetricSpace(coordinates.copy(), dist_func)
            # FIXME: Currently _direction_mask / _angles / _euclidean_dist don't get correctly calculated for sparse dspaces
            # coordinates = MetricSpace(coordinates.copy(), dist_func, maxlag if maxlag and not isinstance(maxlag, str) and maxlag >= 1 else None)
        else:
            assert self.dist_func == coordinates.dist_metric, "Distance metric of variogram differs from distance metric of coordinates"
            assert coordinates.max_dist is None
        # Set coordinates
        self._X = coordinates
        # pairwise difference
        self._diff = None
        # set verbosity
        self.verbose = verbose
        # declare a flag to mark if this is a covariogram
        # this is set to None, as set_values will figure out
        self._is_cross = None
        self._co_variable = None
        # set values
        self._values = None
        # calc_diff = False here, because it will be calculated by fit() later
        self.set_values(values=values, calc_diff=False)
        # distance matrix
        self._dist = None
        # set distance calculation function
        self._dist_func_name = None
        self.set_dist_function(func=dist_func)
        # Angles and euclidean distances used for direction mask calculation
        self._angles = None
        self._euclidean_dist = None
        # lags and max lag
        self.n_lags = n_lags
        self._maxlag = None
        self.maxlag = maxlag
        # estimator can be function or a string
        self._estimator = None
        self.set_estimator(estimator_name=estimator)
        # model can be function or a string
        self._model = None
        self.set_model(model_name=model)
        # azimuth direction
        self._azimuth = None
        self.azimuth = azimuth
        # azimuth tolerance
        self._tolerance = None
        self.tolerance = tolerance
        # tolerance bandwidth
        self._bandwidth = None
        self.bandwidth = bandwidth
        # set the directional model
        self._directional_model = None
        self._is_model_custom = False
        self.set_directional_model(model_name=directional_model)
        # the binning settings
        self._bin_func = None
        self._groups = None
        self._bins = None
        self.set_bin_func(bin_func=bin_func)
        # specify if the lag should be given absolute or relative to the maxlag
        self._normalized = normalize
        # set the fitting method and sigma array
        self.fit_method = fit_method
        self._fit_sigma = None
        self.fit_sigma = fit_sigma
        # set if nugget effect shall be used
        self.use_nugget = use_nugget
        # set attributes to be filled during calculation
        self.cov = None
        self.cof = None
        # settings, not reachable by init (not yet)
        self._cache_experimental = False
        # do the preprocessing and fitting upon initialization
        # Note that fit() calls preprocessing
        self.fit(force=True)
        # finally check if any of the uncertainty propagation kwargs are set
        self._experimental_conf_interval = None
        self._model_conf_interval = None
        if 'obs_sigma' in self._kwargs:
            self._propagate_obs_sigma() 
[docs]
    def preprocessing(self, force=False):
        self._calc_direction_mask_data(force)
        self._calc_diff(force=force)
        self._calc_groups(force=force) 
[docs]
    def _calc_direction_mask_data(self, force=False):
        r"""
        Calculate directional mask data.
        For this, the angle between the vector between the two
        points, and east (see comment about self.azimuth) is calculated.
        The result is stored in self._angles and contains the angle of each
        point pair vector to the x-axis in radians.
        Parameters
        ----------
        force : bool
            If True, a new calculation of all angles is forced, even if they
            are already in the cache.
        Notes
        -----
        The masked data is in radias, while azimuth is given in degrees.
        For the Vector between a point pair A,B :math:`\overrightarrow{AB}=u` and the
        x-axis, represented by vector :math:`\overrightarrow{e} = [1,0]`, the angle
        :math:`\Theta` is calculated like:
        .. math::
            cos(\Theta) = \frac{u \circ e}{|e| \cdot |[1,0]|}
        See Also
        --------
        `azimuth <skgstat.DirectionalVariogram.azimuth>`_
        """
        # FIXME: This should be optimized for the sparse case (range << bbox(coordinates)),
        # i.e. use the MetricSpace in self._X
        # check if already calculated
        if self._angles is not None and not force:
            return
        # if self.coordinates is of just one dimension, concat zeros.
        if self.coordinates.ndim == 1:
            _x = np.vstack(zip(self.coordinates, np.zeros(len(self.coordinates))))
        elif self.coordinates.ndim == 2:
            _x = self.coordinates
        else:
            raise NotImplementedError('N-dimensional coordinates cannot be handled')
        # for angles, we need Euklidean distance,
        # no matter which distance function is used
        # if self._dist_func_name == "euclidean":
        #    self._euclidean_dist = scipy.spatial.distance.squareform(self.distance_matrix)
        # else:
        self._euclidean_dist = pdist(_x, "euclidean")
        # Calculate the angles
        # (a - b).[1,0] = ||a - b|| * ||[1,0]|| * cos(v)
        # cos(v) = (a - b).[1,0] / ||a - b||
        # cos(v) = (a.[1,0] - b.[1,0]) / ||a - b||
        scalar = pdist(np.array([np.dot(_x, [1, 0])]).T, np.subtract)
        pos_angles = np.arccos(scalar / self._euclidean_dist)
        # cos(v) for [2,1] and [2, -1] is the same,
        # but v is not (v vs -v), fix that.
        ydiff = pdist(np.array([np.dot(_x, [0, 1])]).T, np.subtract)
        # store the angle or negative angle, depending on the
        # amount of the x coordinate
        self._angles = np.where(ydiff >= 0, pos_angles, -pos_angles) 
    @property
    def azimuth(self):
        """Direction azimuth
        Main direction for the selection of points in the formation of point
        pairs. East of the coordinate plane is defined to be 0° and then the
        azimuth is set clockwise up to 180°and count-clockwise to -180°.
        Parameters
        ----------
        angle : float
            New azimuth angle in **degree**.
        Raises
        ------
        ValueError : in case angle < -180° or angle > 180
        """
        return self._azimuth
    @azimuth.setter
    def azimuth(self, angle):
        if angle < -180 or angle > 180:
            raise ValueError('The azimuth is an angle in degree and has to '
                             'meet -180 <= angle <= 180')
        else:
            self._azimuth = angle
        # reset groups and mask cache on azimuth change
        self._direction_mask_cache = None
        self._groups = None
    @property
    def tolerance(self):
        """Azimuth tolerance
         Tolerance angle of how far a point can be off the azimuth for being
         still counted as directional. A tolerance angle will be applied to
         the azimuth angle symmetrically.
         Parameters
         ----------
         angle : float
             New tolerance angle in **degree**. Has to meet 0 <= angle <= 360.
         Raises
         ------
         ValueError : in case angle < 0 or angle > 360
         """
        return self._tolerance
    @tolerance.setter
    def tolerance(self, angle):
        if angle < 0 or angle > 360:
            raise ValueError('The tolerance is an angle in degree and has to '
                             'meet 0 <= angle <= 360')
        else:
            self._tolerance = angle
        # reset groups and mask on tolerance change
        self._direction_mask_cache = None
        self._groups = None
    @property
    def bandwidth(self):
        """Tolerance bandwidth
        New bandwidth parameter. As the tolerance from azimuth is given as an
        angle, point pairs at high distances can be far off the azimuth in
        coordinate distance. The bandwidth limits this distance and has the
        unnit of the coordinate system.
        Parameters
        ----------
        width : float
            Positive coordinate distance.
        Raises
        ------
        ValueError : in case width is negative
        """
        if self._bandwidth is None:
            return 0
        else:
            return self._bandwidth
    @bandwidth.setter
    def bandwidth(self, width):
        # check if quantiles is given
        if isinstance(width, str):
            # TODO document and handle more exceptions
            q = int(width[1:])
            self._bandwidth = np.percentile(self.distance, q)
        elif width < 0:
            raise ValueError('The bandwidth cannot be negative.')
        elif width > np.max(self.distance):
            print('The bandwidth is larger than the maximum separating '
                  'distance. Thus it will have no effect.')
        else:
            self._bandwidth = width
        # reset groups and direction mask cache on bandwidth change
        self._direction_mask_cache = None
        self._groups = None
[docs]
    def set_directional_model(self, model_name):
        """Set new directional model
        The model used for selecting all points fulfilling the
        directional constraint of the Variogram. A predefined model
        can be selected by passing the model name as string.
        Optionally a callable accepting the difference vectors between
        points in polar form as angles and distances and returning a
        mask array can be passed. In this case, the azimuth, tolerance
        and bandwidth has to be incorporated by hand into the model.
        The predefined options are:
        * 'compass': includes points in the direction of the azimuth at given
          tolerance. The bandwidth parameter will be ignored.
        * 'triangle': constructs a triangle with an angle of tolerance at the
          point of interest and union an rectangle parallel to azimuth,
          once the hypotenuse length reaches bandwidth.
        * 'circle': constructs a half circle touching the point of interest,
          dislocating the center at the distance of bandwidth in the
          direction of azimuth. The half circle is union with an rectangle
          parallel to azimuth.
        Visual representations, usage hints and implementation specifics
        are given in the documentation.
        Parameters
        ----------
        model_name : string, callable
            The name of the predefined model (string) or a function
            that accepts angle and distance arrays and returns a mask
            array.
        """
        # handle predefined models
        if isinstance(model_name, str):
            if model_name.lower() == 'compass':
                self._directional_model = self._compass
            elif model_name.lower() == 'triangle':
                self._directional_model = self._triangle
            elif model_name.lower() == 'circle':
                self._directional_model = self._circle
            else:
                raise ValueError('%s is not a valid model.' % model_name)
        # handle callable
        elif callable(model_name):
            self._directional_model = model_name
        else:
            raise ValueError('The directional model has to be identified by a '
                             'model name, or it has to be the search area '
                             'itself')
        # reset the groups as the directional model changed
        self._groups = None 
    @property
    def bins(self):
        if self._bins is None:
            # get the distances
            d = self.distance.copy()
            d[np.where(~self._direction_mask())] = np.nan
            self._bins, n = self.bin_func(d, self._n_lags, self.maxlag)
            # if the binning function returned an N, the n_lags need
            # to be adjusted directly (not through the setter)
            if n is not None:
                self._n_lags = n
        return self._bins.copy()
    def _calc_groups(self, force=False):
        super(DirectionalVariogram, self)._calc_groups(force=force)
        # set to outside maxlag group
        self._groups[np.where(~self._direction_mask())] = -1
#    @jit
[docs]
    def _direction_mask(self, force=False):
        """Directional Mask
        Array aligned to self.distance masking all point pairs which shall be
        ignored for binning and grouping. The one dimensional array contains
        all row-wise point pair combinations from the upper or lower triangle
        of the distance matrix in case either of both is directional.
        Returns
        -------
        mask : numpy.array
            Array aligned to self.distance giving for each point pair
            combination a boolean value whether the point are directional or
            not.
        """
        if force or self._direction_mask_cache is None:
            self._direction_mask_cache = self._directional_model(self._angles, self._euclidean_dist)
        return self._direction_mask_cache 
[docs]
    def pair_field(self, ax=None, cmap="gist_rainbow", points='all', add_points=True, alpha=0.3, **kwargs):  # pragma: no cover
        """
        Plot a pair field.
        Plot a network graph for all point pairs that fulfill the direction
        filter and lie within each others search area.
        Parameters
        ----------
        ax : matplotlib.Subplot
            A matplotlib Axes object to plot the pair field onto.
            If ``None``, a new new matplotlib figure will be created.
        cmap : string
            Any color-map name that is supported by matplotlib
        points : 'all', int, list
            If not ``'all'``, only the given coordinate (int) or
            list of coordinates (list) will be plotted. Recommended, if
            the input data is quite large.
        add_points : bool
            If True (default) The coordinates will be added as black points.
        alpha : float
            Alpha value for the colors to make overlapping vertices
            visualize better. Defaults to ``0.3``.
        """
        # get the backend
        used_backend = plotting.backend()
        if used_backend == 'matplotlib':
            return plotting.matplotlib_pair_field(self, ax=ax, cmap=cmap, points=points, add_points=add_points, alpha=alpha, **kwargs)
        elif used_backend == 'plotly':
            return plotting.plotly_pair_field(self, fig=ax, points=points, add_points=add_points, alpha=alpha, **kwargs) 
[docs]
    def _triangle(self, angles, dists):
        r"""Triangular Search Area
        Construct a triangular bounded search area for building directional
        dependent point pairs. The Search Area will be located onto the
        current point of interest and the local x-axis is rotated onto the
        azimuth angle.
        Parameters
        ----------
        angles, dists : numpy.array
            Vectors between point pairs in polar form (angle relative
            to east in radians, length in coordinate space units)
        Returns
        -------
        mask : numpy.array(bool)
            Point pair mask, indexed as the results of
            scipy.spatial.distance.pdist are.
        Notes
        -----
        .. code-block:: text
                 C
                /|\
             a / | \ a
              /__|h_\
             A   c   B
        The point of interest is C and c is the bandwidth. The angle at C
        (gamma) is the tolerance. From this, a and then h can be calculated.
        When rotated into the local coordinate system, the two points needed
        to build the search area A,B are A := (h, 1/2 c) and B:= (h, -1/2 c)
        a can be calculated like:
        .. math::
            a = \frac{c}{2 * sin\left(\frac{\gamma}{2}\right)}
        See Also
        --------
        DirectionalVariogram._compass
        DirectionalVariogram._circle
        """
        absdiff = np.abs(angles + np.radians(self.azimuth))
        absdiff = np.where(absdiff > np.pi, absdiff - np.pi, absdiff)
        absdiff = np.where(absdiff > np.pi / 2, np.pi - absdiff, absdiff)
        in_tol = absdiff <= np.radians(self.tolerance / 2)
        in_band = self.bandwidth / 2 >= np.abs(dists * np.sin(np.abs(angles + np.radians(self.azimuth))))
        return in_tol & in_band 
    def _circle(self, angles, dists):
        r"""Circular Search Area
        Construct a half-circled bounded search area for building directional
        dependent point pairs. The Search Area will be located onto the
        current point of interest and the local x-axis is rotated onto the
        azimuth angle.
        The radius of the half-circle is set to half the bandwidth.
        Parameters
        ----------
        angles, dists : numpy.array
            Vectors between point pairs in polar form (angle relative
            to east in radians, length in coordinate space units)
        Returns
        -------
        mask : numpy.array(bool)
            Point pair mask, indexed as the results of
            scipy.spatial.distance.pdist are.
        Raises
        ------
        ValueError : In case the DirectionalVariogram.bandwidth is None or 0.
        See Also
        --------
        DirectionalVariogram._triangle
        DirectionalVariogram._compass
        """
        raise NotImplementedError
[docs]
    def _compass(self, angles, dists):
        r"""Compass direction direction mask
        Construct a search area for building directional dependent point
        pairs. The compass search area will **not** be bounded by the
        bandwidth. It will include all point pairs at the azimuth direction
        with a given tolerance. The Search Area will be located onto the
        current point of interest and the local x-axis is rotated onto the
        azimuth angle.
        Parameters
        ----------
        angles, dists : numpy.array
            Vectors between point pairs in polar form (angle relative
            to east in radians, length in coordinate space units)
        Returns
        -------
        mask : numpy.array(bool)
            Point pair mask, indexed as the results of
            scipy.spatial.distance.pdist are.
        See Also
        --------
        DirectionalVariogram._triangle
        DirectionalVariogram._circle
        """
        absdiff = np.abs(angles + np.radians(self.azimuth))
        absdiff = np.where(absdiff > np.pi, absdiff - np.pi, absdiff)
        absdiff = np.where(absdiff > np.pi / 2, np.pi - absdiff, absdiff)
        return absdiff <= np.radians(self.tolerance / 2)