Source code for skgstat.Variogram

"""
Variogram class
"""
import copy
import inspect
import warnings
from typing import Iterable, Callable, Union, Tuple

import numpy as np
from pandas import DataFrame
from scipy.optimize import curve_fit, minimize, OptimizeWarning
from scipy.spatial.distance import pdist, squareform
from scipy import stats, sparse
from sklearn.isotonic import IsotonicRegression

from skgstat import estimators, models, binning
from skgstat import plotting
from skgstat.util import shannon_entropy
from .MetricSpace import MetricSpace, ProbabalisticMetricSpace
from skgstat.interfaces.gstools import skgstat_to_gstools, skgstat_to_krige



[docs]
class Variogram(object):
    """Variogram Class

    Calculates a variogram of the separating distances in the given
    coordinates and relates them to one of the semi-variance measures of
    the given dependent values.

    """

[docs]
    def __init__(self,
                 coordinates=None,
                 values=None,
                 estimator='matheron',
                 model='spherical',
                 dist_func='euclidean',
                 bin_func='even',
                 normalize=False,
                 fit_method='trf',
                 fit_sigma=None,
                 use_nugget=False,
                 maxlag=None,
                 samples=None,
                 n_lags=10,
                 verbose=False,
                 **kwargs,
                 ):
        r"""Variogram Class

        Parameters
        ----------
        coordinates : numpy.ndarray, MetricSpace
            .. versionchanged:: 0.5.0
                now accepts MetricSpace
            Array of shape (m, n). Will be used as m observation points of
            n-dimensions. This variogram can be calculated on 1 - n
            dimensional coordinates. In case a 1-dimensional array is passed,
            a second array of same length containing only zeros will be
            stacked to the passed one.
            For very large datasets, you can set maxlag to only calculate
            distances within the maximum lag in a sparse matrix.
            Alternatively you can supply a MetricSpace (optionally with a
            `max_dist` set for the same effect). This is useful if you're
            creating many different variograms for different measured
            parameters that are all measured at the same set of coordinates,
            as distances will only be calculated once, instead of once per
            variogram.
        values : numpy.ndarray
            .. versionchanged:: 1.0.5
                Now accepts co-variables for calculating cross variograms.

            Array of values observed at the given coordinates. The length of
            the values array has to match the m dimension of the coordinates
            array. Will be used to calculate the dependent variable of the
            variogram.
            If the values are of shape ``(n_samples, 2)``, a cross-variogram
            will be calculated. This assumes the main variable and the
            co-variable to be co-located under Markov-model 1 assumptions,
            meaning the variable need to be conditionally independent.
        estimator : str, callable
            String identifying the semi-variance estimator to be used.
            Defaults to the Matheron estimator. Possible values are:

              * matheron        [Matheron, default]
              * cressie         [Cressie-Hawkins]
              * dowd            [Dowd-Estimator]
              * genton          [Genton]
              * minmax          [MinMax Scaler]
              * entropy         [Shannon Entropy]

            If a callable is passed, it has to accept an array of absolute
            differences, aligned to the 1D distance matrix (flattened upper
            triangle) and return a scalar, that converges towards small
            values for similarity (high covariance).
        model : str | Callable
            .. versionchanged:: 1.0.12
                Added support for sum of models (e.g., "spherical+gaussian"), or custom model (Callable). Using
                `fit_bounds` to optimize the fit is recommended for custom models, and can be useful for sum of models.

            String or callable identifying the theoretical variogram function to be used
            to describe the experimental variogram. Can be one of:

              * spherical       [Spherical, default]
              * exponential     [Exponential]
              * gaussian        [Gaussian]
              * cubic           [Cubic]
              * stable          [Stable model]
              * matern          [Matérn model]
              * nugget          [nugget effect variogram]

            Any number of these theoretical models can be summed using "+" iteratively, e.g. "spherical+cubic+matern".
            The nugget parameters of the models are removed except for the last model (sum of nuggets = single nugget).

        dist_func : str
            String identifying the distance function. Defaults to
            'euclidean'. Can be any metric accepted by
            scipy.spatial.distance.pdist. Additional parameters are not (yet)
            passed through to pdist. These are accepted by pdist for some of
            the metrics. In these cases the default values are used.
        bin_func : str | Callable | Iterable
            .. versionchanged:: 0.3.8
                added 'fd', 'sturges', 'scott', 'sqrt', 'doane'
            .. versionchanged:: 0.3.9
                added 'kmeans', 'ward'

            String identifying the binning function used to find lag class
            edges. All methods calculate bin edges on the interval [0, maxlag[.
            Possible values are:

                * `'even'` (default) finds `n_lags` same width bins
                * `'uniform'` forms `n_lags` bins of same data count
                * `'fd'` applies Freedman-Diaconis estimator to find `n_lags`
                * `'sturges'` applies Sturge's rule to find `n_lags`.
                * `'scott'` applies Scott's rule to find `n_lags`
                * `'doane'` applies Doane's extension to Sturge's rule
                  to find `n_lags`
                * `'sqrt'` uses the square-root of
                  :func:`distance <skgstat.Variogram.distance>`
                  as `n_lags`.
                * `'kmeans'` uses KMeans clustering to well supported bins
                * `'ward'` uses hierarchical clustering to find
                  minimum-variance clusters.

            More details are given in the documentation for
            :func:`set_bin_func <skgstat.Variogram.set_bin_func>`.

        normalize : bool
            Defaults to False. If True, the independent and dependent
            variable will be normalized to the range [0,1].
        fit_method : str | None
            .. versionchanged:: 0.3.10
                Added 'ml' and 'custom'

            String identifying the method to be used for fitting the
            theoretical variogram function to the experimental. If None
            is passed, the fit does not run. More info is given in the
            Variogram.fit docs. Can be one of:

                * 'lm': Levenberg-Marquardt algorithm for unconstrained
                  problems. This is the faster algorithm, yet is the fitting of
                  a variogram not unconstrianed.
                * 'trf': Trust Region Reflective function for non-linear
                  constrained problems. The class will set the boundaries
                  itself. This is the default function.
                * 'ml': Maximum-Likelihood estimation. With the current
                  implementation only the Nelder-Mead solver for
                  unconstrained problems is implemented. This will estimate
                  the variogram parameters from a Gaussian parameter space
                  by minimizing the negative log-likelihood.
                * 'manual': Manual fitting. You can set the range, sill and
                  nugget either directly to the
                  :func:`fit <skgstat.Variogram.fit>` function, or as
                  `fit_` prefixed keyword arguments on Variogram instantiation.

        fit_sigma : numpy.ndarray, str
            Defaults to None. The sigma is used as measure of uncertainty
            during variogram fit. If fit_sigma is an array, it has to hold
            n_lags elements, giving the uncertainty for all lags classes. If
            fit_sigma is None (default), it will give no weight to any lag.
            Higher values indicate higher uncertainty and will lower the
            influcence of the corresponding lag class for the fit.
            If fit_sigma is a string, a pre-defined function of separating
            distance will be used to fill the array. Can be one of:

                * 'linear': Linear loss with distance. Small bins will have
                  higher impact.
                * 'exp': The weights decrease by a e-function of distance
                * 'sqrt': The weights decrease by the squareroot of distance
                * 'sq': The weights decrease by the squared distance.

            More info is given in the Variogram.fit_sigma documentation.
        use_nugget : bool
            Defaults to False. If True, a nugget effet will be added to all
            Variogram.models as a third (or fourth) fitting parameter. A
            nugget is essentially the y-axis interception of the theoretical
            variogram function. For a sum of variogram, the nugget is defined
            in its last model.
        maxlag : float, str
            Can specify the maximum lag distance directly by giving a value
            larger than 1. The binning function will not find any lag class
            with an edge larger than maxlag. If 0 < maxlag < 1, then maxlag
            is relative and maxlag * max(Variogram.distance) will be used.
            In case maxlag is a string it has to be one of 'median', 'mean'.
            Then the median or mean of all Variogram.distance will be used.
            Note maxlag=0.5 will use half the maximum separating distance,
            this is not the same as 'median', which is the median of all
            separating distances
        samples : float, int
            If set to a non-None value point pairs are sampled
            randomly. Two random subset of all points are chosen, and
            the distance matrix is calculated only between these two
            subsets. The size of each subset is set by `samples`: if <
            1 it specifies a fraction of all points, if >= 1 it
            specifies the number of points in each subset.
        n_lags : int
            Specify the number of lag classes to be defined by the binning
            function.
        verbose : bool
            Set the Verbosity of the class. Not Implemented yet.

        Keyword Arguments
        -----------------
        entropy_bins : int, str
            .. versionadded:: 0.3.7

            If the `estimator <skgstat.Variogram.estimator>` is set to
            `'entropy'` this argument sets the number of bins, that should be
            used for histogram calculation.
        percentile : int
            .. versionadded:: 0.3.7

            If the `estimator <skgstat.Variogram.estimator>` is set to
            `'entropy'` this argument sets the percentile to be used.
        binning_random_state : int, None
            .. versionadded:: 0.3.9

            If :func:`bin_func <skgstat.Variogram.set_bin_func>` is `'kmeans'`
            this can overwrite the seed for the initial guess of the cluster
            centroids. Note, that K-Means is not deterministic and is therefore
            seeded to 42 here. You can pass `None` to disable this behavior,
            but use it with care, as you will get different results.
        binning_agg_func : str
            .. versionadded:: 0.3.10

            If :func:`bin_func <skgstat.Variogram.set_bin_func>` is `'ward'`
            this keyword argument can switch from default mean aggregation to
            median aggregation for calculating the cluster centroids.
        obs_sigma : int, float
            .. versionadded:: 0.6.0

            If set, the Variogram will use this sigma as the standard deviation
            of the observations passed as values. Using a MonteCarlo simulation
            the uncertainties are propagated into the experimental variogram.
            If present, the plot will indicate the confidence interval as
            error bars around the experimental variogram.

        fit_bounds: 2-tuple of array_like or Bounds, optional
            .. versionadded:: 1.0.12

            Lower and upper bounds on parameters passed to scipy.optimize.curve_fit.

            Order is typically (range, sill, nugget) or (range, sill, smoothness, nugget) for individual models, or
            (range1, sill1, nugget1, range2, sill2, nugget2) for a sum of 2 models.
            Recommended for custom models, where bounds cannot be determined logically.
            For internal models, defaults to known min/max values for the sill (0, max variance), range (0, max lag)
            and smoothness (0, 2) or (0, 20) for stable and matern, respectively.

        fit_p0: array_like, optional
            .. versionadded:: 1.0.12

            Initial guess for the parameters passed to scipy.optimize.curve_fit.

            Same order as for fit_bounds.
            Defaults to upper bounds values. For custom models, if no bounds are defined, defaults to 1.
        """
        # Before we do anything else, make kwargs available
        self._kwargs = self._validate_kwargs(**kwargs)

        # handle the coordinates
        self._1d = False
        if not isinstance(coordinates, MetricSpace):
            coordinates = np.asarray(coordinates)

            # handle 1D coords
            if len(coordinates.shape) < 2:
                coordinates = np.column_stack((
                    coordinates,
                    np.zeros(len(coordinates))
                ))
                self._1d = True

            # handle maxlag for MetricSpace
            if maxlag and not isinstance(maxlag, str) and maxlag >= 1:
                _maxlag = maxlag
            else:
                _maxlag = None

            if samples is None:
                coordinates = MetricSpace(
                    coordinates.copy(),
                    dist_func,
                    _maxlag
                )
            else:
                coordinates = ProbabalisticMetricSpace(
                    coordinates.copy(),
                    dist_func, _maxlag,
                    samples=samples,
                    rnd=self._kwargs.get("binning_random_state", None)
                )
        elif dist_func != coordinates.dist_metric:
            raise AttributeError((
                "Distance metric of variogram differs "
                "from distance metric of coordinates"
            ))

        # Set coordinates
        self._X = coordinates

        # pairwise differences
        self._diff = None

        # set verbosity
        self.verbose = verbose

        # declare a flag to mark if this is a covariogram
        # this is set to None, as set_values will figure out.
        self._is_cross = None
        self._co_variable = None

        # set values
        self._values = None
        # calc_diff = False here, because it will be calculated by fit() later
        self.set_values(values=values, calc_diff=False)

        # lags and max lag
        self._n_lags_passed_value = n_lags
        self._n_lags = None
        self.n_lags = n_lags
        self._maxlag = None
        self.maxlag = maxlag

        # harmonize model placeholder
        self._harmonize = False

        # estimator can be a function or a string
        self._estimator = None
        self.set_estimator(estimator_name=estimator)

        # the binning settings
        self._bin_func_name = None
        self._bin_func = None
        self._groups = None
        self._bins = None
        self._bin_count = None
        self.set_bin_func(bin_func=bin_func)

        # Needed for harmonized models, no need to
        # run if there is no fit
        if fit_method is not None:
            self.preprocessing(force=True)

        # set if nugget effect shall be used
        self._use_nugget = None
        self.use_nugget = use_nugget

        # model can be a function or a string
        self._model = None
        self._model_name = None
        self._is_model_custom = False
        self.set_model(model_name=model)

        # specify if the lag should be given absolute or relative to the maxlag
        self._normalized = normalize

        # set the fitting method and sigma array
        self._fit_method = None
        if fit_method is not None:
            self.fit_method = fit_method
        self._fit_sigma = None
        self.fit_sigma = fit_sigma

        # set attributes to be filled during calculation
        self.cov = None
        self.cof = None

        # settings, not reachable by init (not yet)
        self._cache_experimental = False

        # do the preprocessing and fitting upon initialization
        # Note that fit() calls preprocessing
        fit_bounds = self._kwargs.get('fit_bounds') # returns None if empty
        fit_p0 = self._kwargs.get('fit_p0')
        self.fit(force=True, bounds=fit_bounds, p0=fit_p0)

        # finally check if any of the uncertainty propagation kwargs are set
        self._experimental_conf_interval = None
        self._model_conf_interval = None
        if 'obs_sigma' in self._kwargs:
            self._propagate_obs_sigma()


    @property
    def coordinates(self):
        """Coordinates property

        Array of observation locations the variogram is build for. This
        property has no setter. If you want to change the coordinates,
        use a new Variogram instance.

        Returns
        -------
        coordinates : numpy.array

        """
        return self._X.coords

    @property
    def metric_space(self):
        r"""
        .. versionadded:: 0.5.6

        :class:`MetricSpace <skgstat.MetricSpace>` representation of the
        input coordinates. A :class:`MetricSpace <skgstat.MetricSpace>`
        can be used to pass pre-calculated coordinates to other
        :class:`Variogram <skgstat.Variogram>` instances.

        Returns
        -------
        metric_space : skgstat.MetricSpace

        See Also
        --------
        Variogram.coordinates : coordinate representation

        """
        return self._X

    @metric_space.setter
    def metric_space(self, value):
        raise AttributeError('MetricSpace is read-only.')

    @property
    def dim(self):
        """
        Input coordinates dimensionality.
        """
        if self._1d:
            return 1
        return self.coordinates.shape[1]

    @property
    def values(self):
        """Values property

        Array of observations, the variogram is build for. The setter of this
        property utilizes the Variogram.set_values function for setting new
        arrays.

        Returns
        -------
        values : numpy.ndarray

        See Also
        --------
        Variogram.set_values

        """
        return self._values

    @values.setter
    def values(self, values):
        self.set_values(values=values)

    @property
    def value_matrix(self):
        """Value matrix

        Returns a matrix of pairwise differences in absolute values. The
        matrix will have the shape (m, m) with m = len(Variogram.values).
        Note that Variogram.values holds the values themselves, while the
        value_matrix consists of their pairwise differences.

        Returns
        -------
        values : numpy.matrix
            Matrix of pairwise absolute differences of the values.

        See Also
        --------
        Variogram._diff

        """
        return squareform(self.pairwise_diffs)


[docs]
    def set_values(self, values, calc_diff=True):
        """Set new values

        Will set the passed array as new value array. This array has to be of
        same length as the first axis of the coordinates array. The Variogram
        class does only accept one dimensional arrays.
        On success all fitting parameters are deleted and the pairwise
        differences are recalculated.
        Raises :py:class:`ValueError`s on shape mismatches and a Warning

        .. versionchanged::
            Now a warnings.warn message is thrown if all input data is the same

        Parameters
        ----------
        values : numpy.ndarray

        Returns
        -------
        void

        Raises
        ------
        ValueError : raised if the values array shape does not match the
            coordinates array, or more than one dimension given
        Warning : raised if all input values are the same

        See Also
        --------
        Variogram.values

        """
        # check dimensions
        if not len(values) == len(self.coordinates):  # pragma: no cover
            raise ValueError('The length of the values array has to match ' +
                             'the length of coordinates')

        # use an array
        _y = np.asarray(values)

        # check if this is should be a cross-variogram
        # TODO: run tests for this
        if _y.ndim > 2:
            raise ValueError(
                "values has to be 1d (classic variogram) or 2d dimensional (cross-variogram)"
            )
        elif _y.ndim == 2:
            if _y.shape[1] > 2:
                raise ValueError(
                    "Use the utility function to create a grid of cross-variograms"
                )
            elif _y.shape[1] == 2:
                # set the co-variable
                self._co_variable = _y[:, 1].flatten()

                # generate a warning if the cross variogram flag was set to False
                if self._is_cross is not None and not self._is_cross:
                    warnings.warn("You passed two variables as observation and " +
                        "effectively turned this instance into a cross-variogram.")
                self._is_cross = True

                # by definition, the first axis is the observation
                _y = _y[:, 0].flatten()
        else:
            self._is_cross = False

        # check if all input values are the same
        if len(set(_y)) < 2:
            self.__single_input = True
            warnings.warn('All input values are the same.')
        else:
            self.__single_input = False

        # reset fitting parameter
        self.cof, self.cov = None, None
        self._diff = None

        # set new values
        self._values = np.asarray(_y)

        # recalculate the pairwise differences
        if calc_diff:
            self._calc_diff(force=True)


    @property
    def pairwise_diffs(self):
        """
        .. versionadded:: 1.0.4

        Pairwise residual differences of the input data.
        The property should be used over the Variogram._diff attribute,
        as this will contain multiple targets with future releases to
        implement cross-variograms.

        """
        if self._diff is None:
            self.preprocessing()

        return self._diff

    @property
    def bin_func(self):
        """Binning function

        Returns an instance of the function used for binning the separating
        distances into the given amount of bins. Both functions use the same
        signature of func(distances, n, maxlag).

        The setter of this property utilizes the Variogram.set_bin_func to
        set a new function.

        Returns
        -------
        binning_function : function

        See Also
        --------
        Variogram.set_bin_func

        """
        return self._bin_func

    @bin_func.setter
    def bin_func(self, bin_func):
        self.set_bin_func(bin_func=bin_func)


[docs]
    def set_bin_func(self, bin_func: Union[str, Iterable, Callable[[np.ndarray, float, float], Tuple[np.ndarray, float]]]):
        r"""Set binning function

        Sets a new binning function to be used. The new binning method is set
        by either a string identifying the new function to be used, or an
        iterable containing the bin edges, or any function that can compute
        bins from the distances, number of lags and maximum lag.
        The string can be one of: ['even', 'uniform', 'fd',
         'sturges', 'scott', 'sqrt', 'doane'].
        If the number of lag classes should be estimated automatically, it is
        recommended to use ' sturges' for small, normal distributed locations
        and 'fd' or 'scott' for large datasets, where 'fd' is more robust to
        outliers. 'sqrt' is by far the fastest estimator. 'doane' is an
        extension of Sturge's rule for non-normal distributed data.

        .. versionchanged:: 0.3.8
            added 'fd', 'sturges', 'scott', 'sqrt', 'doane'

        .. versionchanged:: 0.3.9
            added 'kmeans', 'ward'

        .. versionchanged:: 0.4.0
            added 'stable_entropy'

        .. versionchanged:: 0.4.1
            refactored local wrapper function definition. The wrapper to
            pass kwargs to the binning functions is now implemented as
            a instance method, to make it pickleable.

        .. versionchanged:: 0.6.5
            added iterable and function as arguments to allow for custom
            bins.

        Parameters
        ----------
        bin_func : str | Iterable | Callable
            Can be one of:

                * 'even'
                * 'uniform'
                * 'fd'
                * 'sturges'
                * 'scott'
                * 'sqrt'
                * 'doane'
                * 'kmeans'
                * 'ward'
                * 'stable_entropy'

        Returns
        -------
        void

        Notes
        -----
        **`'even'`**: Use skgstat.binning.even_width_lags for using
        n_lags lags of equal width up to maxlag.

        **`'uniform'`**: Use skgstat.binning.uniform_count_lags for using
        n_lags lags up to maxlag in which the pairwise differences
        follow a uniform distribution.

        **`'sturges'`**: estimates the number of evenly distributed lag
        classes (n) by Sturges rule [101]_:

        .. math::
            n = log_2 n + 1

        **`'scott'`**: estimates the lag class widths (h) by
        Scott's rule [102]_:

        .. math::
            h = \sigma \frac{24 * \sqrt{\pi}}{n}^{\frac{1}{3}}

        **`'sqrt'`**: estimates the number of lags (n) by the suare-root:

        .. math::
            n = \sqrt{n}

        **`'fd'`**: estimates the lag class widths (h) using the
        Freedman Diaconis estimator [103]_:

        .. math::
            h = 2\frac{IQR}{n^{1/3}}

        **`'doane'`**: estimates the number of evenly distributed lag classes
        using Doane's extension to Sturge's rule [104]_:

        .. math::
            n = 1 + \log_{2}(s) + \log_2\left(1 + \frac{|g|}{k}\right)
            g = E\left[\left(\frac{x - \mu_g}{\sigma}\right)^3\right]
            k = \sqrt{\frac{6(s - 2)}{(s + 1)(s + 3)}}

        **`'kmeans'`**: This method will search for `n` clusters in the
        distance matrix. The cluster centroids are used to calculate the
        upper edges of the lag classes, by setting it to half of the distance
        between two neighboring clusters. Note: This does not necessarily
        result in even width bins.

        **`'ward'`** uses a hierarchical culstering algorithm to iteratively
        merge pairs of clusters until there are only `n` remaining clusters.
        The merging is done by minimizing the variance for the merged cluster.

        **`'stable_entropy'`** will adjust `n` bin edges by minimizing the
        absolute differences between each lag's Shannon Entropy. This will
        lead to uneven bin widths. Each lag class value distribution will be
        of comparable intrinsic uncertainty from an information theoretic
        point of view, which makes the semi-variances quite comparable.
        However, it is not guaranteed, that the binning makes any sense
        from a geostatistical point of view, as the first lags might be way
        too wide.

        See Also
        --------
        Variogram.bin_func
        skgstat.binning.uniform_count_lags
        skgstat.binning.even_width_lags
        skgstat.binning.auto_derived_lags
        skgstat.binning.kmeans
        skgstat.binning.ward
        sklearn.cluster.KMeans
        sklearn.cluster.AgglomerativeClustering

        References
        ----------
        .. [101] Scott, D.W. (2009), Sturges' rule. WIREs Comp Stat, 1:
            303-306. https://doi.org/10.1002/wics.35
        .. [102] Scott, D.W. (2010), Scott's rule. WIREs Comp Stat, 2:
            497-502. https://doi.org/10.1002/wics.103
        .. [103] Freedman, David, and Persi Diaconis  (1981), "On the
            histogram as a density estimator: L 2 theory." Zeitschrift
            für Wahrscheinlichkeitstheorie und verwandte Gebiete 57.4:
            453-476.
        .. [104] Doane, D. P. (1976). Aesthetic frequency classifications.
            The American Statistician, 30(4), 181-183.

        """
        # handle strings
        if isinstance(bin_func, str):
            fname = bin_func.lower()
            # switch the input
            if fname == 'even':
                self._bin_func = binning.even_width_lags

            elif fname == 'uniform':
                self._bin_func = binning.uniform_count_lags

            # remove the n_lags if they will be adjusted on call
            else:
                # reset lags for adjusting algorithms
                if fname not in ('kmeans', 'ward', 'stable_entropy'):
                    self._n_lags = None

                # use the wrapper for all but even and uniform
                self._bin_func = self._bin_func_wrapper
            bin_func_name = bin_func

        elif isinstance(bin_func, Callable):  # pragma: no cover
            self._bin_func = bin_func
            bin_func_name = 'custom_func'

        elif isinstance(bin_func, Iterable):
            self._bin_func = None
            bin_func_name = 'custom_bin_edges'

        else:
            raise AttributeError('bin_func has to be of type string, iterable or callable.')

        # store the name
        self._bin_func_name = bin_func_name

        # reset groups, bins and bin count
        self._groups = None
        self._bin_count = None

        if isinstance(bin_func, str) or isinstance(bin_func, Callable):
            self._bins = None
        # if the input is an iterable with bin edges, we need to write self._bins here
        else:
            self._bins = bin_func
            self._maxlag = max(bin_func)
            self._n_lags = sum(1 for e in bin_func)

        self.cof, self.cov = None, None


    def _bin_func_wrapper(self, distances, n, maxlag):
        """
        Wrapper around the call of the actual binning method.
        This is needed to pass keyword arguments to kmeans or
        stable_entropy binning methods, and respect the slightly
        different function signature of auto_derived_lags.
        """
        if self._bin_func_name.lower() == 'kmeans':
            return binning.kmeans(distances, n, maxlag, **self._kwargs)

        elif self._bin_func_name.lower() == 'ward':
            return binning.ward(distances, n, maxlag)

        elif self._bin_func_name.lower() == 'stable_entropy':
            return binning.stable_entropy_lags(
                distances,
                n,
                maxlag,
                **self._kwargs
            )

        else:
            return binning.auto_derived_lags(
                distances,
                self._bin_func_name,
                maxlag
            )

    @property
    def normalized(self):
        return self._normalized

    @normalized.setter
    def normalized(self, status):
        # set the new value
        self._normalized = status

    @property
    def bins(self):
        """Distance lag bins

        Independent variable of the the experimental variogram sample.
        The bins are the upper edges of all calculated distance lag
        classes. If you need bin centers, use
        :func:`get_empirical <skgstat.Variogram.get_empirical>`.

        Returns
        -------
        bins : numpy.ndarray
            1D array of the distance lag classes.

        See Also
        --------
        Variogram.get_empirical

        """
        # if bins are not calculated, do it
        if self._bins is None:
            self._bins, n = self.bin_func(self.distance, self._n_lags, self.maxlag)
            # if the binning function returned an N, the n_lags need
            # to be adjusted directly (not through the setter)
            if n is not None:
                self._n_lags = n

        return self._bins.copy()

    @bins.setter
    def bins(self, bins):
        # set the new bins
        self._bins = np.asarray(bins)

        # clean the groups as they are not valid anymore
        self._bin_count = None
        self._groups = None
        self.cov = None
        self.cof = None

    @property
    def n_lags(self):
        """Number of lag bins

        Pass the number of lag bins to be used on
        this Variogram instance. This will reset
        the grouping index and fitting parameters

        """
        if self._n_lags is None:
            self._n_lags = len(self.bins)
        return self._n_lags

    @n_lags.setter
    def n_lags(self, n):
        # TODO: here accept strings and implement some optimum methods
        # string are not implemented yet
        if isinstance(n, str):  # pragma: no cover
            raise NotImplementedError('n_lags string values not implemented')

        # n_lags is int
        elif isinstance(n, int):
            if n < 1:
                raise ValueError('n_lags has to be a positive integer')

            # set parameter
            self._n_lags = n

            # reset the bins
            self._bins = None

        # else
        else:
            raise ValueError('n_lags has to be a positive integer')

        # if there are no errors, store the passed value
        self._n_lags_passed_value = n

        # reset the groups and bin count
        self._groups = None
        self._bin_count = None

        # reset the fitting
        self.cof = None
        self.cov = None

    @property
    def bin_count(self):

        if self._bin_count is None:
            self._bin_count = np.fromiter((g.size for g in self.lag_classes()), dtype=int)
        return self._bin_count

    @property
    def estimator(self):
        return self._estimator

    @estimator.setter
    def estimator(self, value):
        self.set_estimator(estimator_name=value)


[docs]
    def set_estimator(self, estimator_name):
        # reset the fitting
        self.cof, self.cov = None, None

        if isinstance(estimator_name, str):
            if estimator_name.lower() == 'matheron':
                self._estimator = estimators.matheron
            elif estimator_name.lower() == 'cressie':
                self._estimator = estimators.cressie
            elif estimator_name.lower() == 'dowd':
                self._estimator = estimators.dowd
            elif estimator_name.lower() == 'genton':
                self._estimator = estimators.genton
            elif estimator_name.lower() == 'minmax':
                self._estimator = estimators.minmax
            elif estimator_name.lower() == 'percentile':
                self._estimator = estimators.percentile
            elif estimator_name.lower() == 'entropy':
                self._estimator = estimators.entropy
            else:
                raise ValueError(
                    (
                        'Variogram estimator %s is not understood, please '
                        'provide the function.'
                    ) % estimator_name
                )
        elif callable(estimator_name):  # pragma: no cover
            self._estimator = estimator_name
        else:
            raise ValueError('The estimator has to be a string or callable.')


    @property
    def model(self):
        return self._model

    @model.setter
    def model(self, value):
        self.set_model(model_name=value)


[docs]
    def set_model(self, model_name):
        """
        Set model as the new theoretical variogram function.

        """
        # reset the fitting
        self.cof, self.cov = None, None

        if isinstance(model_name, str):
            # at first reset harmonize
            self._harmonize = False
            if model_name.lower() == 'harmonize':
                mname = 'harmonize'
                self._harmonize = True
                self._model = self._build_harmonized_model()
            elif "+" in model_name:
                mname =  ''.join(model_name.split()).lower() # Remove all whitespaces
                self._model = self._build_sum_models(mname)
            elif hasattr(models, model_name.lower()):
                mname = model_name.lower()
                self._model = getattr(models, mname)
            else:
                raise ValueError(
                    (
                        'The theoretical Variogram function %s is not'
                        ' understood, please provide the function'
                    ) % model_name
                )
            # Set model name attribute
            self._model_name = mname

        else:  # pragma: no cover
            self._is_model_custom = True
            self._model = model_name
            self._model_name = model_name.__name__


    def _get_argpos_sum_models(self, list_model_names):
        """
        Get argument slice position (list of slices) for the sum of models from a list of model names (list of strings).
        """
        # Get the models
        list_models = [getattr(models, model_name.lower()).py_func for model_name in list_model_names]

        # Get the number of arguments per model (e.g., [3, 4, 4])
        nb_args_per_model = np.array([len(inspect.getfullargspec(model).args) for model in list_models])

        # We remove the nugget and lags parameters, except nugget for the last model (sum of nuggets = single nugget)
        nb_args_per_model -= 2
        nb_args_per_model[-1] += 1
        # Compute cumulative number of args removing 2 args everywhere (all lags and nuggets, last one will compensate)
        cum_args_minus_lag = np.cumsum(nb_args_per_model)

        # Prepare argument slices to distribute to submodels
        args_indices = np.insert(cum_args_minus_lag, 0, 0)  # We add the first indice of 0
        args_slices = [(slice(args_indices[i], args_indices[i + 1])) for i in range(len(args_indices) - 1)]

        return args_slices

    def _build_sum_models(self, sum_models_name: str):
        """
        Build sum of theoretical models, variogram-decorated function.
        """

        # Remove all whitespaces in the string, in case the user wrote something like "spherical + gaussian"
        sum_models_name = ''.join(sum_models_name.split()).lower()

        # Get individual model names
        list_model_names = sum_models_name.split("+")

        # Check that all models exist in the "models" module
        if not all(hasattr(models, model_name.lower()) for model_name in list_model_names):
            raise ValueError(
                (
                    'One of the theoretical models in the list "%s" is not'
                    ' understood, please provide existing model names separated by "+".'
                ) % ", ".join(list_model_names)
            )

        # First, build the models from their py_func (ignoring variogram decorator) and get args per model
        list_models = [getattr(models, model_name.lower()).py_func for model_name in list_model_names]
        # Get the argument positions for the model sum (function uses model names to be called by other methods)
        args_slices = self._get_argpos_sum_models(list_model_names=list_model_names)

        # Distribute first argument (lag) and use all others in order (nugget ignored when last argument not passed)
        @models.variogram
        def sum_models(h, *args):
            return sum(list_models[i](h, *args[args_slices[i]]) for i in range(len(list_models)))

        return sum_models

    def _build_harmonized_model(self):
        x = self.bins
        y = self.experimental

        _x = x[~np.isnan(y)]
        _y = y[~np.isnan(y)]
        regr = IsotonicRegression(increasing=True).fit(_x, _y)

        # create the model function
        def harmonize(x):
            """Monotonized Variogram

            Return the isotonic harmonized experimental variogram.
            This means, the experimental variogram is monotonic
            after harmonization.

            The harmonization is done using following Hinterding (2003)
            using the PAVA algorithm (Barlow and Bartholomew, 1972).

            Returns
            -------
            gamma : numpy.ndarray
                monotonized experimental variogram

            References
            ----------
            Barlow, R., D. Bartholomew, et al. (1972): Statistical
                Interference Under Order Restrictions. John Wiley
                and Sons, New York.
            Hiterding, A. (2003): Entwicklung hybrider Interpolations-
                verfahren für den automatisierten Betrieb am Beispiel
                meteorologischer Größen. Dissertation, Institut für
                Geoinformatik, Westphälische Wilhelms-Universität Münster,
                IfGIprints, Münster. ISBN: 3-936616-12-4

            """

            if isinstance(x, (list, tuple, np.ndarray)):
                return regr.transform(x)
            else:
                return regr.transform([x])

        return harmonize

    @property
    def use_nugget(self):
        """
        Use a nugget effect on this Variogram instance.
        If disabled, the automatic fitting procedures will omit the nugget and
        not use it as a model parameter.

        Note
        ----
        If :func:`fit_method <skgstat.Variogram.fit_method>` is set to
        ``'manual'`` and a nugget parameter is pass to
        :func:`fit <skgstat.Variogram.fit>`, use_nugget will be set to True.

        Returns
        -------
        use_nugget : bool

        """
        return self._use_nugget

    @use_nugget.setter
    def use_nugget(self, enable_nugget):
        if not isinstance(enable_nugget, bool):
            raise ValueError('use_nugget has to be of type bool.')

        # set new value
        self._use_nugget = enable_nugget

    @property
    def dist_function(self):
        return self._X.dist_metric


[docs]
    @classmethod
    def wrapped_distance_function(cls, dist_func, x, **kwargs):
        return pdist(X=x, metric=dist_func, **kwargs)


    @dist_function.setter
    def dist_function(self, func):
        self.set_dist_function(func=func)


[docs]
    def set_dist_function(self, func):
        """Set distance function

        Set the function used for distance calculation. func can either be a
        callable or a string. The ranked distance function is not implemented
        yet. strings will be forwarded to the scipy.spatial.distance.pdist
        function as the metric argument.
        If func is a callable, it has to return the upper triangle of the
        distance matrix as a flat array (Like the pdist function).

        Parameters
        ----------
        func : string, callable

        Returns
        -------
        numpy.array

        """
        # reset the fitting
        self.cof, self.cov = None, None

        if isinstance(func, str):  # pragma: no cover
            if func.lower() == 'rank':
                raise NotImplementedError
        elif not callable(func):
            raise ValueError('Input not supported. Pass a string or callable.')

        # re-calculate distances
        self._X = MetricSpace(self._X.coords, func, self._X.max_dist)


    @property
    def distance(self):
        # handle sparse matrix
        if isinstance(self.distance_matrix, sparse.spmatrix):
            return self.triangular_distance_matrix.data

        # Turn it back to triangular form not to have duplicates
        return squareform(self.distance_matrix)

    @property
    def triangular_distance_matrix(self):
        """
        Like distance_matrix but with zeros below the diagonal...
        Only defined if distance_matrix is a sparse matrix
        """
        if not isinstance(self.distance_matrix, sparse.spmatrix):
            raise RuntimeWarning("Only available for sparse coordinates.")

        m = self.distance_matrix
        c = m.tocsc()
        c.data = c.indices
        rows = c.tocsr()
        filt = sparse.csr_matrix(
            (m.indices < rows.data, m.indices, m.indptr),
            m.shape
        )
        return m.multiply(filt)

    @property
    def distance_matrix(self):
        return self._X.dists

    @property
    def maxlag(self):
        """
        Maximum lag distance to be considered in this Variogram instance.
        You can limit the distance at which point pairs are calculated.
        There are three possible ways how to do that, in absolute lag units,
        which is a number larger one. Secondly, a number ``0 < maxlag < 1``
        can be set, which will use this share of the maximum distance as
        maxlag. Lastly, a string can be set: ``'mean'`` and ``'median'``
        for the mean or median value of the distance matrix.

        Notes
        -----
        This setting is largely flexible, but all options except the
        absolute limit in lag units need the full distance matrix to be
        calculated. Hence, it does **not** speed up the calculation
        of large distance matrices, just the estimation of the variogram.
        Thus, if you pre-calcualte the distance matrix using
        :class:`MetricSpace <skgstat.MetricSpace>`, only absolute
        limits can be used.

        """
        return self._maxlag

    @maxlag.setter
    def maxlag(self, value):
        # reset fitting
        self.cof, self.cov = None, None

        # remove bins, groups, and bin count
        self._bins = None
        self._groups = None
        self._bin_count = None

        # set new maxlag
        if value is None:
            self._maxlag = None
        elif isinstance(value, str):
            if value == 'median':
                self._maxlag = np.median(self.distance)
            elif value == 'mean':
                self._maxlag = np.mean(self.distance)
        elif value < 1:
            self._maxlag = value * np.max(self.distance)
        else:
            self._maxlag = value

    @property
    def fit_method(self):
        """
        .. versionadded:: 0.6.2

        Set the fit method to be used for this Variogram instance.
        Possible values are:

        * ``'trf'``   - Trust-Region Reflective (default)
        * ``'lm'``    - Levenberg-Marquardt
        * ``'ml'``    - Maximum Likelihood estimation
        * `'manual'`` - Manual fitting by setting the parameters

        .. versionchanged:: 0.6.6
            Passing `None` will prevent the fitting procedure from
            running.

        See Also
        --------
        scipy.optimize.minimize
        scipy.optimize.curve_fit
        Variogram.fit

        Notes
        -----
        The default method (TRF) is a bounded least squares method,
        that sets constraints to the value space of all parameters.
        All methods use an initial guess for all used parameters.
        This is :func:`max(bins) <skgstat.Variogram.bins>` for
        the range,
        :func:`max(experimental) <skgstat.Variogram.experimental>`
        for the sill, ``20`` for the Matérn smoothness, ``2`` for
        the stable model shape and ``1`` for the nugget if used.

        """
        return self._fit_method

    @fit_method.setter
    def fit_method(self, value):
        if value not in ('lm', 'ml', 'trf', 'manual'):
            raise AttributeError(
                "fit_method has to be one of ['lm', 'ml', 'trf', 'manual']"
            )

        # value is fine -check for manual does not drop the coefficients
        elif value == 'manual':
            self._fit_method = 'manual'

        # Trust region reflective does not work wi
        elif value == 'trf' and self.__single_input:
            raise AttributeError("'trf' is bounded and therefore not supported when all values are the same.")

        # otherwise - refit
        else:
            # set method
            self._fit_method = value

            # reset fit
            self.cof = None
            self.cov = None

    @property
    def fit_sigma(self):
        r"""Fitting Uncertainty

        Set or calculate an array of observation uncertainties aligned to the
        Variogram.bins. These will be used to weight the observations in the
        cost function, which divides the residuals by their uncertainty.

        When setting fit_sigma, the array of uncertainties itself can be
        given, or one of the strings:
        ['linear', 'exp', 'sqrt', 'sq', 'entropy'].
        The parameters described below refer to the setter of this property.

        .. versionchanged:: 0.3.11
            added the 'entropy' option.

        Parameters
        ----------
        sigma : string, array
            Sigma can either be an array of discrete uncertainty values,
            which have to align to the Variogram.bins, or of type string.
            Then, the weights for fitting are calculated as a function of
            (lag) distance.

              * **sigma='linear'**: The residuals get weighted by the lag
                distance normalized to the maximum lag distance, denoted as
                :math:`w_n`
              * **sigma='exp'**: The residuals get weighted by the function:
                :math:`w = e^{1 / w_n}`
              * **sigma='sqrt'**: The residuals get weighted by the function:
                :math:`w = \sqrt(w_n)`
              * **sigma='sq'**: The residuals get weighted by the function:
                :math:`w = w_n^2`
              * **sigma='entropy'**: Calculates the Shannon Entropy as
                intrinsic uncertainty of each lag class.

        Returns
        -------
        void

        Notes
        -----
        The cost function is defined as:

        .. math::
            chisq = \sum {\frac{r}{\sigma}}^2

        where r are the residuals between the experimental variogram and the
        modeled values for the same lag. Following this function,
        small values will increase the influence of that residual, while a very
        large sigma will cause the observation to be ignored.

        See Also
        --------
        scipy.optimize.curve_fit

        """
        # unweighted
        if self._fit_sigma is None:
            return None

        # discrete uncertainties
        elif isinstance(self._fit_sigma, (list, tuple, np.ndarray)):
            if len(self._fit_sigma) == len(self._bins):
                return self._fit_sigma
            else:
                raise AttributeError('len(fit_sigma) != len(bins)')

        # linear function of distance
        elif self._fit_sigma == 'linear':
            return self.bins / np.max(self.bins)

        # e function of distance
        elif self._fit_sigma == 'exp':
            return 1. / np.exp(1. / (self.bins / np.max(self.bins)))

        # sqrt function of distance
        elif self._fit_sigma == 'sqrt':
            return np.sqrt(self.bins / np.max(self.bins))

        # squared function of distance
        elif self._fit_sigma == 'sq':
            return (self.bins / np.max(self.bins)) ** 2

        # entropy
        elif self._fit_sigma == 'entropy':
            # get the binning using scotts rule
            bins = np.histogram_bin_edges(self.distance, 'scott')

            # get the maximum entropy
#            hmax = np.log2(len(self.distance))

            # apply the entropy
            h = np.asarray([shannon_entropy(grp, bins) for grp in self.lag_classes() if len(grp) > 0])
            return 1. / h

        else:
            raise ValueError(
                "fit_sigma is not understood. It has to be an " +
                "array or one of ['linear', 'exp', 'sqrt', 'sq', 'entropy']."
            )

    @fit_sigma.setter
    def fit_sigma(self, sigma):
        self._fit_sigma = sigma

        # remove fitting parameters
        self.cof = None
        self.cov = None

    @property
    def is_cross_variogram(self) -> bool:
        """Read-only flag indicating if the current instance is a cross-variogram"""
        return self._is_cross


[docs]
    def update_kwargs(self, **kwargs):
        """
        .. versionadded:: 0.3.7

        Update the keyword arguments of this Variogram instance.
        The keyword arguments will be validated first and the update the
        existing kwargs. That means, you can pass only the kwargs, which
        need to be updated.

        .. note::
            Updating the kwargs does not force a preprocessing circle.
            Any affected intermediate result, that might be cached internally,
            will not make use of updated kwargs. Make a call to
            :func:`preprocessing(force=True) <skgstat.Variogram.preprocessing>`
            to force a clean re-calculation of the Variogram instance.

        """
        old = self._kwargs

        # update the keyword-arguments
        updated = self._validate_kwargs(**kwargs)
        old.update(updated)

        self._kwargs = old


    def _validate_kwargs(self, **kwargs):
        """
        .. versionadded:: 0.3.7

        This functions actually does nothing right now.
        It will be used in the future, as soon as the Variogram takes
        more kwargs. Then, these can be checked here.

        """
        return kwargs


[docs]
    def lag_groups(self):
        """Lag class groups

        Returns a mask array with as many elements as self._diff has,
        identifying the lag class group for each pairwise difference. Can be
        used to extract all pairwise values within the same lag bin.

        Returns
        -------
        numpy.ndarray

        See Also
        --------
        Variogram.lag_classes

        """
        if self._groups is None:
            self._calc_groups()

        return self._groups



[docs]
    def lag_classes(self):
        """Iterate over the lag classes

        Generates an iterator over all lag classes. Can be zipped with
        Variogram.bins to identify the lag.

        .. versionchanged:: 0.3.6
            yields an empty array for empty lag groups now

        Returns
        -------
        iterable

        """
        # get the diffs
        diffs = self.pairwise_diffs

        # get the groups
        groups = self.lag_groups()

        # yield all groups
        for i in range(len(self.bins)):
            yield diffs[np.where(groups == i)]



[docs]
    def preprocessing(self, force=False):
        """Preprocessing function

        Prepares all input data for the fit and transform functions. Namely,
        the distances are calculated and the value differences. Then the
        binning is set up and bin edges are calculated. If any of the listed
        subsets are already prepared, their processing is skipped. This
        behaviour can be changed by the force parameter. This will cause a
        clean preprocessing.

        Parameters
        ----------
        force : bool
            If set to True, all preprocessing data sets will be deleted. Use
            it in case you need a clean preprocessing.

        Returns
        -------
        void

        """
        # call the _calc functions
        self._calc_diff(force=force)
        self._calc_groups(force=force)



[docs]
    def fit(self, force=False, method=None, sigma=None, bounds=None, p0=None, **kwargs):
        """Fit the variogram

        The fit function will fit the theoretical variogram function to the
        experimental. The preprocessed distance matrix, pairwise differences
        and binning will not be recalculated, if already done. This could be
        forced by setting the force parameter to true.

        In case you call fit function directly, with method or sigma,
        the parameters set on Variogram object instantiation will get
        overwritten. All other keyword arguments will be passed to
        scipy.optimize.curve_fit function.

        .. versionchanged:: 0.3.10
            added 'ml' and 'custom' method.

        .. versionchanged:: 1.0.1
            use_nugget is now flagged implicitly, whenever a nugget > 0 is
            passed in manual fitting.

        Parameters
        ----------
        force : bool
            If set to True, a clean preprocessing of the distance matrix,
            pairwise differences and the binning will be forced. Default is
            False.
        method : string
            A string identifying one of the implemented fitting procedures.
            Can be one of:

              * lm: Levenberg-Marquardt algorithms implemented in
                scipy.optimize.leastsq function.
              * trf: Trust Region Reflective algorithm implemented in
                scipy.optimize.least_squares(method='trf')
              * 'ml': Maximum-Likelihood estimation. With the current
                implementation only the Nelder-Mead solver for unconstrained
                problems is implemented. This will estimate the variogram
                parameters from a Gaussian parameter space by minimizing
                the negative log-likelihood.
              * 'manual': Manual fitting. You can set the range, sill and
                nugget either directly to the
                :func:`fit <skgstat.Variogram.fit>` function, or as
                `fit_` prefixed keyword arguments on Variogram instantiation.


        sigma : string, array
            Uncertainty array for the bins. Has to have the same dimension as
            self.bins. Refer to Variogram.fit_sigma for more information.

        bounds: 2-tuple of array_like or Bounds, optional
            Lower and upper bounds on parameters passed to scipy.optimize.curve_fit.

            Order is typically (range, sill, nugget) or (range, sill, smoothness, nugget) for individual models, or
            (range1, sill1, nugget1, range2, sill2, nugget2) for a sum of 2 models.
            Recommended for custom models, where bounds cannot be determined logically.
            For internal models, defaults to known min/max values for the sill (0, max variance), range (0, max lag)
            and smoothness (0, 2) or (0, 20) for stable and matern, respectively.

        p0: array_like, optional
            Initial guess for the parameters passed to scipy.optimize.curve_fit.

            Same order as for fit_bounds.
            Defaults to upper bounds values. For custom models, if no bounds are defined, defaults to 1.

        Returns
        -------
        void

        See Also
        --------
        scipy.optimize.minimize
        scipy.optimize.curve_fit
        scipy.optimize.leastsq
        scipy.optimize.least_squares

        """
        # store the old cof
        if self.cof is None:
            old_params = {}
        else:
            old_params = self.describe()

        # delete the last cov and cof
        if force:
            self.cof = None
            self.cov = None

        # if force, force a clean preprocessing
        self.preprocessing(force=force)

        # load the data
        x = np.array(self.bins)
        y = self.experimental

        # overwrite fit setting if new params are given
        if method is not None:
            self.fit_method = method
        if sigma is not None:
            self.fit_sigma = sigma

        # remove nans
        _x = x[~np.isnan(y)]
        _y = y[~np.isnan(y)]

        # check if method is manual and a nugget was passed
        if self.fit_method == 'manual' and kwargs.get('nugget', False):
            self.use_nugget = True

        # handle harmonized models
        if self._harmonize:
            _x = np.linspace(0, np.max(_x), 100)
            _y = self._model(_x)

            # get the params
            s = 0.95 * np.nanmax(_y)
            with warnings.catch_warnings():
                warnings.simplefilter('ignore')
                r = np.argwhere(_y >= s)[0][0]
            n = _y[0] if self.use_nugget else 0.0

            # set the params
            self.cof = [r, s, n]
            return

        # For a supported model, wrap the function depending on nugget and get logical bounds
        if not self._is_model_custom:
            # Switch the method
            # wrap the model to include or exclude the nugget
            if self.use_nugget:
                def wrapped(*args):
                    return self._model(*args)
            else:
                def wrapped(*args):
                    return self._model(*args, 0)

            # get p0
            if bounds is None:
                bounds = (0, self.__get_fit_bounds(x, y))
            if p0 is None:
                p0 = np.asarray(bounds[1])
        # Else, inspect the function for the number of arguments
        else:
            # The number of arguments of argspec minus one is what we initialized
            argspec = inspect.getfullargspec(self._model.__wrapped__)
            nb_args = len(argspec.args) - 1
            if bounds is None:
                warnings.warn("Parameter bounds cannot be logically derived for a custom model during the fit. "
                              "User bounds can be passed using fit(..., bounds=) or Variogram(..., fit_bounds=).", UserWarning)
                bounds = ([-np.inf] * nb_args, [np.inf] * nb_args)
            if p0 is None:
                # If all bounds are infinite (not defined, pass 1)
                if bounds == ([-np.inf] * nb_args, [np.inf] * nb_args):
                    p0 = np.ones(nb_args)
                # Else pass the upper bounds
                else:
                    p0 = np.asarray(bounds[1])

            def wrapped(*args):
                return self._model(*args)

        # Trust Region Reflective
        if self.fit_method == 'trf':
            self.cof, self.cov = curve_fit(
                wrapped,
                _x, _y,
                method='trf',
                sigma=self.fit_sigma,
                p0=p0,
                bounds=bounds,
                **kwargs
            )

        # Levenberg-Marquardt
        elif self.fit_method == 'lm':
            self.cof, self.cov = curve_fit(
                wrapped,
                _x, _y,
                method='lm',
                sigma=self.fit_sigma,
                p0=p0,
                **kwargs
            )

        # maximum-likelihood
        elif self.fit_method == 'ml':
            # check if the probabilities must be weighted
            if self.fit_sigma is None:
                sigma = np.ones(self.bins.size)
            else:
                sigma = 1 / self.fit_sigma

            # define the loss function to be minimized
            def ml(params):
                # predict
                pred = [wrapped(_, *params) for _ in _x]

                # get the probabilities of _y
                p = [stats.norm.logpdf(_p, loc=o, scale=1.) for _p, o in zip(pred, _y)]

                # weight the probs
                return - np.sum(p * sigma)

            # apply maximum likelihood estimation by minimizing ml
            result = minimize(ml, p0 * 0.5, method='SLSQP', bounds=[(0, _) for _ in p0])

            if not result.success:  # pragma: no cover
                raise OptimizeWarning('Maximum Likelihood could not estimate parameters.')
            else:
                # set the result
                self.cof = result.x

        # manual fitting
        elif self.fit_method == 'manual':
            # TODO: here, the Error could only be raises if cof was None so far
            r = kwargs.get('range', self._kwargs.get('fit_range', old_params.get('effective_range')))
            s = kwargs.get('sill', self._kwargs.get('fit_sill', old_params.get('sill')))

            # if not given raise an AttributeError
            if r is None or s is None:
                raise AttributeError('For manual fitting, you need to pass the \
                    variogram parameters either to fit or to the Variogram \
                    instance.\n parameter need to be prefixed with fit_ if \
                    passed to __init__.')

            # get the nugget
            n = kwargs.get('nugget', self._kwargs.get('fit_nugget', old_params.get('nugget', 0.0)))

            # check if a s parameter is needed
            if self._model_name in ('stable', 'matern'):
                if self._model_name == 'stable':
                    s2 = kwargs.get('shape', self._kwargs.get('fit_shape', old_params.get('shape',2.0)))
                if self._model_name == 'matern':
                    s2 = kwargs.get('shape', self._kwargs.get('fit_shape', old_params.get('smoothness', 2.0)))

                # set
                self.cof = [r, s, s2, n]
            else:
                self.cof = [r, s, n]



[docs]
    def transform(self, x):
        """Transform

        Transform a given set of lag values to the theoretical variogram
        function using the actual fitting and preprocessing parameters in
        this Variogram instance

        Parameters
        ----------
        x : numpy.array
            Array of lag values to be used as model input for the fitted
            theoretical variogram model

        Returns
        -------
        numpy.array

        """
        self.preprocessing()

        # if instance is not fitted, fit it
        if self.cof is None:
            self.fit(force=True)

        # return the result
        return self.fitted_model(x)


    @property
    def fitted_model(self):
        """Fitted Model

        Returns a callable that takes a distance value and returns a
        semivariance. This model is fitted to the current Variogram
        parameters. The function will be interpreted at return time with the
        parameters hard-coded into the function code.

        Returns
        -------
        model : callable
            The current semivariance model fitted to the current Variogram
            model parameters.
        """
        if self.cof is None:
            self.fit(force=True)

        return self.fitted_model_function(self._model, self.cof)


[docs]
    @classmethod
    def fitted_model_function(cls, model, cof=None, **kw):
        if cof is None:
            # Make sure to keep this synchronized with the output
            # of describe()!
            cof = [kw["effective_range"], kw["sill"]]
            if "smoothness" in kw:
                cof.append(kw["smoothness"])
            if "shape" in kw:
                cof.append(kw["shape"])
            if kw["nugget"] != 0:
                cof.append(kw["nugget"])

        if not callable(model):
            if model == "harmonize":
                raise ValueError("Please supply the actual harmonized model directly")
            else:
                model = model.lower()
                model = getattr(models, model)

        if model.__name__ == "harmonize":
            code = """fitted_model = lambda x: model(x)"""
        else:
            code = """fitted_model = lambda x: model(x, %s)""" % \
               (', '.join([str(_) for _ in cof]))

        # run the code
        loc = dict(model=model)
        exec(code, loc, loc)
        return loc['fitted_model']


    def _format_values_stack(self, values: np.ndarray) -> np.ndarray:
        """
        Create a numpy column stack to calculate differences between two value arrays.
        The format function will handle sparse matrices, as these do not include
        pairwise differences that are separated beyond maxlag.
        The dense numpy.array matrices contain all point pairs.

        """
        # handle sparse matrix
        if isinstance(self.distance_matrix, sparse.spmatrix):
            # get triangular distance matrices
            c = r = self.triangular_distance_matrix

            # get the sparse CSR matrix
            if not isinstance(c, sparse.csr_matrix):
                c = c.tocsr()
            if not isinstance(r, sparse.csc_matrix):
                r = r.tocsc()

            # get the cols
            Vcol = sparse.csr_matrix(
                (values[c.indices], c.indices, c.indptr)
            )

            # The sparse rows are always the observations
            Vrow = sparse.csc_matrix(
                (values[r.indices], r.indices, r.indptr)
            ).tocsr()

            # self._diff will have same shape as self.distances, even
            # when that's not in diagonal format...
            # Note: it might be compelling to do np.abs(Vrow -
            # Vcol).data instead here, but that might optimize away
            # some zeros, leaving _diff in a different shape
            return np.abs(Vrow.data - Vcol.data)

        else:
            # Append a column of zeros to make pdist happy
            # euclidean: sqrt((a-b)**2 + (0-0)**2) == sqrt((a-b)**2)
            return pdist(
                np.column_stack((values, np.zeros(len(values)))),
                metric="euclidean"
            )

    def _calc_diff(self, force=False):
        """
        Calculates the pairwise differences for all coordinate locations.
        If the Variogram is a cross-variogram, the differences are calculated
        between the main variable (self.values) and the co-variable.

        .. versionchanged:: 1.0.5
            calculates co-variate differences

        Returns
        -------
        void

        """
        # already calculated
        if self._diff is not None and not force:
            return

        # format into column-stack for faster calculation
        diffs = self._format_values_stack(self.values)

        # check if this is a cross-variogram
        if self.is_cross_variogram:
            co_diffs = self._format_values_stack(self._co_variable)

            # multiply to cross-difference
            diffs *= co_diffs

        # set the new differences
        self._diff = diffs

    def _calc_groups(self, force=False):
        """Calculate the lag class mask array

        Returns
        -------

        """
        # already calculated
        if self._groups is not None and not force:
            return

        # get the bin edges and distances
        bin_edges = self.bins
        d = self.distance

        # -1 is the group fir distances outside maxlag
        self._groups = np.ones(len(d), dtype=int) * -1

        for i, bounds in enumerate(zip([0] + list(bin_edges), bin_edges)):
            self._groups[np.where((d >= bounds[0]) & (d < bounds[1]))] = i


[docs]
    def clone(self):
        """Deep copy of self

        Return a deep copy of self.

        Returns
        -------
        Variogram

        """
        return copy.deepcopy(self)


    @property
    def experimental(self):
        """Experimental Variogram

        Array of experimental (empirical) semivariance values. The array
        length will be aligned to Variogram.bins. The current
        Variogram.estimator has been used to calculate the values. Depending
        on the setting of Variogram.harmonize (True | False), either
        Variogram._experimental or Variogram.isotonic will be returned.

        Returns
        -------
        vario : numpy.ndarray
            Array of the experimental semi-variance values aligned to
            Variogram.bins.

        See Also
        --------
        Variogram._experimental
        Variogram.isotonic

        """
        return self._experimental

    @property
    def _experimental(self):
        """
        Calculates the experimental variogram from the current lag classes.
        It handles the special case of the `'entropy'` and `'percentile'`
        estimators, which take an additional argument.

        .. versionchanged:: 0.3.6
            replaced the for-loops with :func:`fromiter <numpy.fromiter>`

        .. versionchanged:: 0.3.7
            makes use of `kwargs <skgstat.Variogram._kwargs>` for
            specific estimators now

        Returns
        -------
        experimental : np.ndarray
            1D array of the experimental variogram values. Has same length
            as :func:`bins <skgstat.Variogram.bins>`

        """
        if self._estimator.__name__ == 'entropy':
            # get the parameter from kwargs, if not set use 50
            N = self._kwargs.get('entropy_bins', 50)

            # we need to use N -1 as we use the last inclusive
            if isinstance(N, int):
                N -= 1

            bins = np.histogram_bin_edges(self.distance, bins=N)

            # define the mapper to the estimator function
            def mapper(lag_values):
                return self._estimator(lag_values, bins=bins)

        elif self._estimator.__name__ == 'percentile':
            if self._kwargs.get('percentile', False):
                p = self._kwargs.get('percentile')

                def mapper(lag_values):
                    return self._estimator(lag_values, p=p)
            else:
                mapper = self._estimator

        else:
            mapper = self._estimator

        # return the mapped result
        return np.fromiter(map(mapper, self.lag_classes()), dtype=float)


[docs]
    def get_empirical(self, bin_center=False):
        """Empirical variogram

        Returns a tuple of dependent and independent sample values, this
        :class:`Variogram <skgstat.Variogram>` is estimated for.
        This is a tuple of the current :func:`bins <skgstat.Variogram.bins>`
        and :func:`experimental <skgstat.Variogram.experimental>`
        semi-variance values. By default the upper bin edges are used.
        This can be set to bin center by the `bin_center` argument.

        Parameters
        ----------
        bin_center : bool
            If set to `True`, the center for each distance lag bin is
            used over the upper limit (default).

        Returns
        -------
        bins : numpy.ndarray
            1D array of :func:`n_lags <skgstat.Variogram.n_lags>`
            distance lag bins.
        experimental : numpy.ndarray
            1D array of :func:`n_lags <skgstat.Variogram.n_lags>`
            experimental semi-variance values.

        See Also
        --------
        Variogram.bins
        Variogram.experimental

        """
        # get the bins and experimental values
        bins = self.bins
        experimental = self.experimental

        # align bin centers
        if bin_center:
            # get the bin centers
            bins = np.subtract(bins, np.diff([0] + bins.tolist()) / 2)

        # return
        return bins, experimental


    def __get_fit_bounds(self, x, y):
        """
        Return the bounds for parameter space in fitting a variogram model.
        The bounds are depended on the Model that is used

        Returns
        -------
        list

        """
        all_mname = self._model_name

        # for a sum of models, create a list
        if "+" in all_mname:
            list_mname = all_mname.split("+")
        else:
            list_mname = [all_mname]

        # we append all bounds (for one or several models)
        all_bounds = []
        for i, mname in enumerate(list_mname):

            # use range, sill and smoothness parameter
            if mname == 'matern':
                # a is max(x), C0 is max(y) s is limited to 20?
                bounds = [np.nanmax(x), np.nanmax(y), 20.]

            # use range, sill and shape parameter
            elif mname == 'stable':
                # a is max(x), C0 is max(y) s is limited to 2?
                bounds = [np.nanmax(x), np.nanmax(y), 2.]

            # use only sill
            elif mname == 'nugget':
                # a is max(x):
                bounds = [np.nanmax(x)]

            # use range and sill
            else:
                # a is max(x), C0 is max(y)
                bounds = [np.nanmax(x), np.nanmax(y)]

            # if use_nugget is True add the nugget (for the last model only in case it is a sum)
            if self.use_nugget and i == (len(list_mname) - 1):
                bounds.append(0.99*np.nanmax(y))

            all_bounds += bounds

        return all_bounds


[docs]
    def data(self, n=100, force=False):
        """Theoretical variogram function

        Calculate the experimental variogram and apply the binning. On
        success, the variogram model will be fitted and applied to n lag
        values. Returns the lags and the calculated semi-variance values.
        If force is True, a clean preprocessing and fitting run will be
        executed.

        Parameters
        ----------
        n : integer
            length of the lags array to be used for fitting. Defaults to 100,
            which will be fine for most plots
        force: boolean
            If True, the preprocessing and fitting will be executed as a
            clean run. This will force all intermediate results to be
            recalculated. Defaults to False

        Returns
        -------
        variogram : tuple
            first element is the created lags array
            second element are the calculated semi-variance values

        """
        # force a clean preprocessing if needed
        self.preprocessing(force=force)

        # calculate the experimental variogram
        _bin = self.bins

        x = np.linspace(0, np.float64(np.nanmax(_bin)), n)

        # fit if needed
        if self.cof is None:
            self.fit(force=force)

        return x, self._model(x, *self.cof)


    def _propagate_obs_sigma(self):
        """
        Observation uncertainties are propagated into the experimental
        variogram.
        """
        # due to circular import, import here
        from skgstat.util.uncertainty import propagate

        # TODO: load and propagate percentile settings
        self._experimental_conf_interval = propagate(
            self,
            'values',
            self._kwargs['obs_sigma'],
            use_bounds=True
        )

    @property
    def residuals(self):
        """Model residuals

        Calculate the model residuals defined as the differences between the
        experimental variogram and the theoretical model values at
        corresponding lag values

        .. deprecated:: 1.0.4
            residuals can be ambiguous, thus the property is renamed to model_residuals

        Returns
        -------
        numpy.ndarray

        """
        warnings.warn(
            "residuals is deprecated and will be removed. Please use Variogram.model_residuals",
            DeprecationWarning
        )
        return self.model_residuals


    @property
    def model_residuals(self) -> np.ndarray:
        """
        Calculate the model residuals defined as the differences between the
        experimental variogram and the theoretical model values at
        corresponding lag values.

        Returns
        -------
        residuals : numpy.ndarray

        """
        # get the deviations
        experimental, model = self.model_deviations()

        # calculate the residuals
        return np.fromiter(
            map(lambda x, y: x - y, model, experimental), float
        )

    @property
    def mean_residual(self):
        """Mean Model residuals

        Calculates the mean, absolute deviations between the experimental
        variogram and theretical model values.

        Returns
        -------
        float
        """
        return np.nanmean(np.fromiter(map(np.abs, self.model_residuals), float))

    @property
    def rmse(self):
        r"""RMSE

        Calculate the Root Mean squared error between the experimental
        variogram and the theoretical model values at corresponding lags.
        Can be used as a fitting quality measure.

        Returns
        -------
        float

        See Also
        --------
        Variogram.residuals

        Notes
        -----
        The RMSE is implemented like:

        .. math::
            RMSE = \sqrt{\frac{\sum_{i=0}^{i=N(x)} (x-y)^2}{N(x)}}

        """
        return self.root_mean_square

    @property
    def mse(self):
        r"""MSE

        Calculate the Mean squared error between the experimental
        variogram and the theoretical model values at corresponding lags.
        Can be used as a fitting quality measure.

        Returns
        -------
        float

        See Also
        --------
        Variogram.residuals

        Notes
        -----
        The MSE is implemented like:

        .. math::
            MSE = \frac{\sum_{i=0}^{i=N(x)} (x-y)^2}{N(x)}

        """
        # get the deviations
        experimental, model = self.model_deviations()

        # get the mean
        mse = np.mean(np.power(np.subtract(experimental, model), 2))

        return mse

    @property
    def mae(self):
        r"""RMSE

        Calculate the Mean absolute error between the experimental
        variogram and the theoretical model values at corresponding lags.
        Can be used as a fitting quality measure.

        Returns
        -------
        float

        See Also
        --------
        Variogram.residuals

        Notes
        -----
        The MAE is implemented like:

        .. math::
            MAE = \frac{\sum_{i=0}^{i=N(x)} |x-y|}{N(x)}

        """
        # get the deviations
        experimental, model = self.model_deviations()

        # get the mean
        mae = np.mean(np.abs(np.subtract(experimental, model)))

        return mae

    @property
    def nrmse(self):
        r"""NRMSE

        Calculate the normalized root mean squared error between the
        experimental variogram and the theoretical model values at
        corresponding lags. Can be used as a fitting quality measure

        Returns
        -------
        float

        See Also
        --------
        Variogram.residuals
        Variogram.rmse

        Notes
        -----

        The NRMSE is implemented as:

        .. math::

            NRMSE = \frac{RMSE}{mean(y)}

        where RMSE is Variogram.rmse and y is Variogram.experimental


        """
        return self.rmse / np.nanmean(self.experimental)

    @property
    def root_mean_square(self):
        """Root Mean Square (RMS) of the residuals

        Calculates the square root of the mean of squared residuals.

        Returns
        -------
        float
            Root Mean Square of the residuals.
        """
        return np.sqrt(np.nanmean(np.square(self.model_residuals)))

    @property
    def residual_sum_of_squares(self):
        """Residual Sum of Squares (RSS)

        Calculates the sum of squared differences between the experimental
        variogram and theoretical model values.

        Returns
        -------
        float
            Residual sum of squares (RSS), a measure of the overall model fit
            representing the sum of squared deviations between the observed
            experimental variogram and the corresponding theoretical model values.
        """
        return np.nansum(np.square(self.model_residuals))

    @property
    def rss(self):
        return self.residual_sum_of_squares

    @property
    def nrmse_r(self):
        r"""NRMSE

        Alternative normalized root mean squared error between the
        experimental variogram and the theoretical model values at
        corresponding lags. Can be used as a fitting quality measure.

        Returns
        -------
        float

        See Also
        --------
        Variogram.rmse
        Variogram.nrmse

        Notes
        -----
        Unlike Variogram.nrmse, nrmse_r is not normalized to the mean of y,
        but the difference of the maximum y to its mean:

        .. math::
            NRMSE_r = \frac{RMSE}{max(y) - mean(y)}

        """
        _y = self.experimental
        return self.rmse / (np.nanmax(_y) - np.nanmean(_y))

    @property
    def r(self):
        """
        Pearson correlation of the fitted Variogram

        :return:
        """
        # get the experimental and theoretical variogram and calculate means
        experimental, model = self.model_deviations()
        mx = np.nanmean(experimental)
        my = np.nanmean(model)

        # calculate the single pearson correlation terms
        term1 = np.nansum(np.fromiter(
            map(lambda x, y: (x-mx) * (y-my), experimental, model), float
        ))

        t2x = np.nansum(
            np.fromiter(map(lambda x: (x-mx)**2, experimental), float)
        )
        t2y = np.nansum(
            np.fromiter(map(lambda y: (y-my)**2, model), float)
        )

        return term1 / (np.sqrt(t2x * t2y))

    @property
    def NS(self):
        """
        Nash Sutcliffe efficiency of the fitted Variogram

        :return:
        """
        experimental, model = self.model_deviations()
        mx = np.nanmean(experimental)

        # calculate the single nash-sutcliffe terms
        term1 = np.nansum(np.fromiter(
            map(lambda x, y: (x - y)**2, experimental, model),
            float
            )
        )
        term2 = np.nansum(np.fromiter(
            map(lambda x: (x - mx)**2, experimental),
            float
            )
        )

        return 1 - (term1 / term2)

    @property
    def aic(self) -> float:
        """
        """
        from skgstat.util.cross_validation import aic
        return aic(self)

    @property
    def bic(self) -> float:
        """
        """
        from skgstat.util.cross_validation import bic
        return bic(self)


[docs]
    def model_deviations(self):
        """Model Deviations

        Calculate the deviations between the experimental variogram and the
        recalculated values for the same bins using the fitted theoretical
        variogram function. Can be utilized to calculate a quality measure
        for the variogram fit.

        Returns
        -------
        deviations : tuple
            first element is the experimental variogram
            second element are the corresponding values of the theoretical
            model.

        """
        # get the experimental values and their bin bounds
        _exp = self.experimental
        _bin = self.bins

        # get the model parameters
        param = self.describe()
        if 'error' in param:
            raise RuntimeError('The variogram cannot be calculated.')

        # calculate the model values at bin bounds
        _model = self.transform(_bin)

        return _exp, _model



[docs]
    def cross_validate(
        self,
        method: str = 'jacknife',
        n: int = None,
        metric: str = 'rmse',
        seed=None
    ) -> float:
        """
        Cross validation of the variogram model by means of Kriging.
        Right now, this function can only utilize a jacknife (leave-one-out)
        cross validation and will only use the builtin OrdinaryKriging
        method (not yet the to_gs_krige interface).

        Parameters
        ----------
        method : str
            Right now, 'jacknife' is the only possible input.
        n : int
            The number of points to be included into the cross-validation.
            If None (default), all points will be used.
        metric : str
            Metric used for cross-validation.
            Can be root mean square error (rmse), mean squared error (mse)
            or mean absolute error (mae).
        seed : int
            If n is not None, the random selection of input data for the
            cross-validation can be seeded.

        Returns
        -------
        metric : float
            The cross-validation result as specified above.

        See Also
        --------
        skgstat.util.cross_validation.jacknife
        """
        # not sure how to solve the circular import here
        from skgstat.util import cross_validation

        if method == 'jacknife':
            return cross_validation.jacknife(
                self,
                n=n,
                metric=metric,
                seed=seed
            )
        else:
            raise AttributeError(f"A method '{method}' is not implemented.")



[docs]
    def describe(self, short=False, flat=False):
        """Variogram parameters

        Return a dictionary of the variogram parameters.

        .. versionchanged:: 0.3.7
            The describe now returns all init parameters in as the
            `describe()['params']` key and all keyword arguments as
            `describe()['kwargs']`. This output can be suppressed
            by setting `short=True`.

        Parameters
        ----------
        short : bool
            If `True`, the `'params'` and `'kwargs'` keys will be
            omitted. Defaults to `False`.
        flat : bool
            If `True`, the `'params'` and `'kwargs'` nested `dict`s
            will be distributed to the main `dict` to return a
            flat `dict`. Defaults to `False`

        Returns
        -------
        parameters : dict
            Returns fitting parameters of the theoretical variogram
            model along with the init parameters of the
            `Variogram <skgstat.Variogram>` instance.

        """
        # fit, if not already done
        if self.cof is None:
            self.fit(force=True)

        # scale sill and range
        maxlag = np.nanmax(self.bins)
        maxvar = np.nanmax(self.experimental)

        # get the fitting coefficients
        cof = self.cof

        # build the dict

        def fnname(fn):
            if callable(fn):
                name = "%s.%s" % (fn.__module__, fn.__name__)
            else:
                name = fn
            if name.startswith("skgstat."):
                return name.split(".")[-1]
            return name

        rdict = dict(
            model=fnname(self._model) if not self._harmonize else "harmonize",
            estimator=fnname(self._estimator),
            dist_func=fnname(self.dist_function))

        def create_dict_for_model(model_name, cof, maxlag, maxvar, use_nugget, id = ''):
            """
            Create dictionary of parameters for an individual model.
            Optionally, append a model ID to the key (for the case of summed models).
            """
            # id appended to the param key name to differentiate models for a sum
            if id != '':
                id = '_' + id

            d = {
                'normalized_effective_range' + id: cof[0] * maxlag,
                'normalized_sill' + id: cof[1] * maxvar,
                'normalized_nugget' + id: cof[-1] * maxvar if use_nugget else 0,
                'effective_range' + id: cof[0],
                'sill' + id: cof[1],
                'nugget' + id: cof[-1] if use_nugget else 0,
            }

            # handle s parameters for matern and stable model
            if model_name == 'matern':
                d['smoothness' + id] = cof[2]
            elif model_name == 'stable':
                d['shape' + id] = cof[2]

            return d

        # for a custom model: we list the optimized params for the function wrapped by the variogram decorator
        if self._is_model_custom:
            custom_arg_names = inspect.getfullargspec(self._model.__wrapped__).args
            all_params = {"param"+str(i+1)+"_"+custom_arg_names[i+1]: cof[i] for i in range(len(custom_arg_names) - 1)}

        # for a sum of models
        elif "+" in self._model_name:
            list_model_names = self._model_name.split("+")
            list_argslices = self._get_argpos_sum_models(list_model_names=list_model_names)
            all_params = {}
            # add the parameters for each model, with parameter suffix from 1 to the total number
            for i in range(len(list_model_names)):
                model_params = create_dict_for_model(model_name=list_model_names[i], cof=cof[list_argslices[i]],
                                                  maxlag=maxlag, maxvar=maxvar, use_nugget=self.use_nugget, id=str(i+1))
                all_params.update(model_params)

        # for a single model
        else:
            all_params = create_dict_for_model(model_name=self._model_name, cof=cof, maxlag=maxlag, maxvar=maxvar,
                                                use_nugget=self.use_nugget)
        # update dictionary
        rdict.update(all_params)

        # add other stuff if not short version requested
        if not short:
            kwargs = self._kwargs
            params = dict(
                estimator=self._estimator.__name__,
                model=self._model_name,
                dist_func=str(self.dist_function),
                bin_func=self._bin_func_name,
                normalize=self.normalized,
                fit_method=self._fit_method,
                fit_sigma=self.fit_sigma,
                use_nugget=self.use_nugget,
                maxlag=self.maxlag,
                n_lags=self._n_lags_passed_value,
                verbose=self.verbose
            )

            # update or append the params
            if flat:
                rdict.update(params)
                rdict.update(kwargs)
            else:
                rdict['params'] = params
                rdict['kwargs'] = kwargs

        # return
        return rdict


    @property
    def parameters(self):
        """
        Extract just the variogram parameters range, sill
        and nugget from the
        :func:`describe <skgstat.Variogram.describe>` output.

        Returns
        -------
        params : list
            [range, sill, nugget] for most models and
            [range, sill, shape, nugget] for matern and stable model.
            [range1, sill1, nugget1, range2, still2, nugget2] for a sum of 2 models.
            [param1, param2, param3, ...] in order for a custom model.

        """
        d = self.describe()
        if 'error' in d:
            return [None, None, None]

        def get_params_list_from_dict(d, model_name, id=''):

            # ID used to differentiate params for a sum of models
            if id != '':
                id = '_'+id

            # Get specific smoothness and shape parameters for matern and stable
            if model_name == 'matern':
                return list([
                    d['effective_range'+id],
                    d['sill'+id],
                    d['smoothness'+id],
                    d['nugget'+id]
                ])
            elif model_name == 'stable':
                return list([
                    d['effective_range'+id],
                    d['sill'+id],
                    d['shape'+id],
                    d['nugget'+id]
                ])
            # Get nugget for only-nugget
            elif model_name == 'nugget':
                return list([d['nugget'+id]])
            # Or get classic parameters
            else:
                return list([
                    d['effective_range'+id],
                    d['sill'+id],
                    d['nugget'+id]
                ])

        # For a custom model, just pass cof in order
        if self._is_model_custom:
            list_params = self.cof
        # Get parameters for a sum of models
        elif '+' in self._model_name:
            list_model_names = self._model_name.split('+')
            list_params = []
            for i in range(len(list_model_names)):
                params = get_params_list_from_dict(d, model_name=list_model_names[i], id=str(i+1))
                list_params += params
        # Or for a single model
        else:
            list_params = get_params_list_from_dict(d, model_name=self._model_name)

        return list_params


[docs]
    def to_DataFrame(self, n=100, force=False):
        """Variogram DataFrame

        Returns the fitted theoretical variogram as a pandas.DataFrame
        instance. The n and force parameter control the calculation,
        refer to the data function for more info.

        .. deprecated:: 1.0.10
            The return value of this function will change with a future release

        Parameters
        ----------
        n : integer
            length of the lags array to be used for fitting. Defaults to 100,
            which will be fine for most plots
        force: boolean
            If True, the preprocessing and fitting will be executed as a
            clean run. This will force all intermediate results to be
            recalculated. Defaults to False

        Returns
        -------
        pandas.DataFrame

        See Also
        --------
        Variogram.data

        """
        warnings.warn('The return value of this function will change in a future release.', FutureWarning)
        lags, data = self.data(n=n, force=force)

        return DataFrame({
            'lags': lags,
            self._model_name: data}
        ).copy()



[docs]
    def to_gstools(self, **kwargs):
        """
        Instantiate a corresponding GSTools CovModel.

        By default, this will be an isotropic model.

        Parameters
        ----------
        **kwargs
            Keyword arguments forwarded to the instantiated GSTools CovModel.
            The default parameters 'dim', 'var', 'len_scale', 'nugget',
            'rescale' and optional shape parameters will be extracted
            from the given Variogram but they can be overwritten here.

        Raises
        ------
        ImportError
            When GSTools is not installed.
        ValueError
            When GSTools version is not v1.3 or greater.
        ValueError
            When given Variogram model is not supported ('harmonize').

        Warns
        -----
        Warning
            If the Variogram is a cross-variogram

        Returns
        -------
        :any:`CovModel`
            Corresponding GSTools covmodel.

        Note
        ----
        In case you intend to use the
        :func:`coordinates <skgstat.Variogram.coordinates>`
        in a GSTools workflow, you need to transpose the coordinate
        array like:

        >> cond_pos Variogram.coordinates.T

        """
        # first fit again
        self.fit()

        return skgstat_to_gstools(self, **kwargs)



[docs]
    def to_gs_krige(self, **kwargs):
        """
        Instantiate a GSTools Krige class.

        This can only export isotropic models.
        Note: the `fit_variogram` is always set to `False`

        Parameters
        ----------
        variogram : skgstat.Variogram
            Scikit-GStat Variogram instamce
        **kwargs
            Keyword arguments forwarded to GSTools Krige.
            Refer to :any:`Krige <gstools.krige.Krige>` to
            learn about all possible options.
            Note that the `fit_variogram` parameter will
            always be False.

        Raises
        ------
        ImportError
            When GSTools is not installed.
        ValueError
            When GSTools version is not v1.3 or greater.
        ValueError
            When given Variogram model is not supported ('harmonize').

        Warns
        -----
        Warning
            If the Variogram is a cross-variogram

        Returns
        -------
        :any:`Krige`
            Instantiated GSTools Krige class.

        See Also
        --------
        gstools.Krige

        """
        # fit
        self.fit()
        return skgstat_to_krige(self, **kwargs)



[docs]
    def plot(self, axes=None, grid=True, show=True, hist=True):
        """Variogram Plot

        Plot the experimental variogram, the fitted theoretical function and
        an histogram for the lag classes. The axes attribute can be used to
        pass a list of AxesSubplots or a single instance to the plot
        function. Then these Subplots will be used. If only a single instance
        is passed, the hist attribute will be ignored as only the variogram
        will be plotted anyway.

        .. versionchanged:: 0.4.0
            This plot can be plotted with the plotly plotting backend

        Parameters
        ----------
        axes : list, tuple, array, AxesSubplot or None
            If None, the plot function will create a new matplotlib figure.
            Otherwise a single instance or a list of AxesSubplots can be
            passed to be used. If a single instance is passed, the hist
            attribute will be ignored.
        grid : bool
            Defaults to True. If True a custom grid will be drawn through
            the lag class centers
        show : bool
            Defaults to True. If True, the show method of the passed or
            created matplotlib Figure will be called before returning the
            Figure. This should be set to False, when used in a Notebook,
            as a returned Figure object will be plotted anyway.
        hist : bool
            Defaults to True. If False, the creation of a histogram for the
            lag classes will be suppressed.

        Returns
        -------
        matplotlib.Figure

        """
        # get the backend
        used_backend = plotting.backend()

        if used_backend == 'matplotlib':
            return plotting.matplotlib_variogram_plot(
                self,
                axes=axes,
                grid=grid,
                show=show,
                hist=hist
            )
        elif used_backend == 'plotly':
            return plotting.plotly_variogram_plot(
                self,
                fig=axes,
                grid=grid,
                show=show,
                hist=hist
            )

        # if we reach this line, somethings wrong with plotting backend
        raise ValueError('The plotting backend has an undefined state.')



[docs]
    def scattergram(self, ax=None, show=True, **kwargs):  # pragma: no cover
        """Scattergram plot

        Groups the values by lags and plots the head and tail values
        of all point pairs within the groups against each other.
        This can be used to investigate the distribution of the
        value residuals.

        .. versionchanged:: 0.4.0
            This plot can be plotted with the plotly plotting backend

        Parameters
        ----------
        ax : matplotlib.Axes, plotly.graph_objects.Figure
            If None, a new plotting Figure will be created. If given,
            it has to be an instance of the used plotting backend, which
            will be used to plot on.
        show : boolean
            If True (default), the `show` method of the Figure will be
            called. Can be set to False to prevent duplicated plots in
            some environments.

        Returns
        -------
        fig : matplotlib.Figure, plotly.graph_objects.Figure
            Resulting figure, depending on the plotting backend
        """
        # get the backend
        used_backend = plotting.backend()

        if used_backend == 'matplotlib':
            return plotting.matplotlib_variogram_scattergram(
                self,
                ax=ax,
                show=show,
                **kwargs
            )
        elif used_backend == 'plotly':
            return plotting.plotly_variogram_scattergram(
                self,
                fig=ax,
                show=show,
                **kwargs
            )

        # if we reach this line, somethings wrong with plotting backend
        raise ValueError('The plotting backend has an undefined state.')



[docs]
    def location_trend(self, axes=None, show=True, **kwargs):
        """Location Trend plot

        Plots the values over each dimension of the coordinates in a scatter
        plot. This will visually show correlations between the values and any
        of the coordinate dimension. If there is a value dependence on the
        location, this would violate the intrinsic hypothesis. This is a
        weaker form of stationarity of second order.

        .. versionchanged:: 0.4.0
            This plot can be plotted with the plotly plotting backend

        Parameters
        ----------
        axes : list
            Can be None (default) or a list of matplotlib.AxesSubplots. If a
            list is passed, the location trend plots will be plotted on the
            given instances. Note that then length of the list has to match
            the dimeonsionality of the coordinates array. In case 3D
            coordinates are used, three subplots have to be given.
        show : boolean
            If True (default), the `show` method of the Figure will be
            called. Can be set to False to prevent duplicated plots in
            some environments.

        Keyword Arguments
        -----------------
        add_trend_line : bool
            .. versionadded:: 0.3.5

            If set to `True`, the class will fit a linear model to each
            coordinate dimension and output the model along with a
            calculated R². With high R² values, you should consider
            rejecting the input data, or transforming it.

            .. note::
                Right now, this is only supported for ``'plotly'`` backend


        Returns
        -------
        fig : matplotlib.Figure, plotly.graph_objects.Figure
            The figure produced by the function. Dependends on the
            current backend.

        """
        # get the backend
        used_backend = plotting.backend()

        if used_backend == 'matplotlib':
            return plotting.matplotlib_location_trend(
                self,
                axes=axes,
                show=show,
                **kwargs
            )
        elif used_backend == 'plotly':
            return plotting.plotly_location_trend(
                self,
                fig=axes,
                show=show,
                **kwargs
            )

        # if we reach this line, somethings wrong with plotting backend
        raise ValueError('The plotting backend has an undefined state.')



[docs]
    def distance_difference_plot(self, ax=None, plot_bins=True, show=True):
        """Raw distance plot

        Plots all absolute value differences of all point pair combinations
        over their separating distance, without sorting them into a lag.

        .. versionchanged:: 0.4.0
            This plot can be plotted with the plotly plotting backend

        Parameters
        ----------
        ax : None, AxesSubplot
            If None, a new matplotlib.Figure will be created. In case a
            Figure was already created, pass the Subplot to use as ax argument.
        plot_bins : bool
            If True (default) the bin edges will be included into the plot.
        show : bool
            If True (default), the show method of the Figure will be called
            before returning the Figure. Can be set to False, to avoid
            doubled figure rendering in Jupyter notebooks.

        Returns
        -------
        matplotlib.pyplot.Figure

        """
        # get the backend
        used_backend = plotting.backend()

        if used_backend == 'matplotlib':
            return plotting.matplotlib_dd_plot(
                self,
                ax=ax,
                plot_bins=plot_bins,
                show=show
            )
        elif used_backend == 'plotly':
            return plotting.plotly_dd_plot(
                self,
                fig=ax,
                plot_bins=plot_bins,
                show=show
            )

        # if we reach this line, somethings wrong with plotting backend
        raise ValueError('The plotting backend has an undefined state.')



[docs]
    def __repr__(self):  # pragma: no cover
        """
        Textual representation of this Variogram instance.

        :return:
        """
        try:
            _name = self._model_name
            _b = int(len(self.bins))
        except Exception:
            return "< abstract Variogram >"
        return "< %s Semivariogram fitted to %d bins >" % (_name, _b)



[docs]
    def __str__(self):  # pragma: no cover
        """String Representation

        Descriptive representation of this Variogram instance that shall give
        the main variogram parameters in a print statement.

        Returns
        -------
        description : str
            String description of the variogram instance. Described by the
            Variogram parameters.

        """
        par = self.describe()

        _sill = np.nan if 'error' in par else par['sill']
        _range = np.nan if 'error' in par else par['effective_range']
        _nugget = np.nan if 'error' in par else par['nugget']

        s = "{0} Variogram\n".format(par['model'])
        s += "-" * (len(s) - 1) + "\n"
        s += """Estimator:         %s
        \rEffective Range:   %.2f
        \rSill:              %.2f
        \rNugget:            %.2f
        """ % (par['estimator'], _range, _sill, _nugget)

        return s