Source code for giddy.rank

"""
Rank and spatial rank mobility measures.
"""
__author__ = "Sergio J. Rey <sjsrey@gmail.com>, Wei Kang <weikang9009@gmail.com>"

__all__ = [
    "SpatialTau",
    "Tau",
    "Theta",
    "Tau_Local",
    "Tau_Local_Neighbor",
    "Tau_Local_Neighborhood",
    "Tau_Regional",
]

from scipy.stats.mstats import rankdata
from scipy.special import erfc
import numpy as np
import scipy as sp
from libpysal import weights


[docs]class Theta:
    """
    Regime mobility measure. :cite:`Rey2004a`

    For sequence of time periods Theta measures the extent to which rank
    changes for a variable measured over n locations are in the same direction
    within mutually exclusive and exhaustive partitions (regimes) of the n locations.

    Theta is defined as the sum of the absolute sum of rank changes within
    the regimes over the sum of all absolute rank changes.

    Parameters
    ----------
    y            : array
                   (n, k) with k>=2, successive columns of y are later moments
                   in time (years, months, etc).
    regime       : array
                   (n, ), values corresponding to which regime each observation
                   belongs to.
    permutations : int
                   number of random spatial permutations to generate for
                   computationally based inference.

    Attributes
    ----------
    ranks        : array
                   ranks of the original y array (by columns).
    regimes      : array
                   the original regimes array.
    total        : array
                   (k-1, ), the total number of rank changes for each of the
                   k periods.
    max_total    : int
                   the theoretical maximum number of rank changes for n
                   observations.
    theta        : array
                   (k-1,), the theta statistic for each of the k-1 intervals.
    permutations : int
                   the number of permutations.
    pvalue_left  : float
                   p-value for test that observed theta is significantly lower
                   than its expectation under complete spatial randomness.
    pvalue_right : float
                   p-value for test that observed theta is significantly
                   greater than its expectation under complete spatial randomness.

    Examples
    --------
    >>> import libpysal as ps
    >>> from giddy.rank import Theta
    >>> import numpy as np
    >>> f=ps.io.open(ps.examples.get_path("mexico.csv"))
    >>> vnames=["pcgdp%d"%dec for dec in range(1940,2010,10)]
    >>> y=np.transpose(np.array([f.by_col[v] for v in vnames]))
    >>> regime=np.array(f.by_col['esquivel99'])
    >>> np.random.seed(10)
    >>> t=Theta(y,regime,999)
    >>> t.theta
    array([[0.41538462, 0.28070175, 0.61363636, 0.62222222, 0.33333333,
            0.47222222]])
    >>> t.pvalue_left
    array([0.307, 0.077, 0.823, 0.552, 0.045, 0.735])
    >>> t.total
    array([130., 114.,  88.,  90.,  90.,  72.])
    >>> t.max_total
    512

    """

[docs]    def __init__(self, y, regime, permutations=999):
        ranks = rankdata(y, axis=0)
        self.ranks = ranks
        n, k = y.shape
        ranks_d = ranks[:, list(range(1, k))] - ranks[:, list(range(k - 1))]
        self.ranks_d = ranks_d
        regimes = sp.unique(regime)
        self.regimes = regimes
        self.total = sum(abs(ranks_d))
        self.max_total = sum([abs(i - n + i - 1) for i in range(1, n + 1)])
        self._calc(regime)
        self.theta = self._calc(regime)
        self.permutations = permutations
        if permutations:
            np.perm = np.random.permutation
            sim = np.array([self._calc(np.perm(regime)) for i in range(permutations)])
            self.theta.shape = (1, len(self.theta))
            sim = np.concatenate((self.theta, sim))
            self.sim = sim
            den = permutations + 1.0
            self.pvalue_left = (sim <= sim[0]).sum(axis=0) / den
            self.pvalue_right = (sim > sim[0]).sum(axis=0) / den
            self.z = (sim[0] - sim.mean(axis=0)) / sim.std(axis=0)

    def _calc(self, regime):
        within = [abs(sum(self.ranks_d[regime == reg])) for reg in self.regimes]
        return np.array(sum(within) / self.total)


[docs]class Tau:
    """
    Kendall's Tau is based on a comparison of the number of pairs of n
    observations that have concordant ranks between two variables.

    Parameters
    ----------
    x            : array
                   (n, ), first variable.
    y            : array
                   (n, ), second variable.

    Attributes
    ----------
    tau          : float
                   The classic Tau statistic.
    tau_p        : float
                   asymptotic p-value.

    Notes
    -----
    Modification of algorithm suggested by :cite:`Christensen2005`.PySAL/giddy
    implementation uses a list based representation of a binary tree for
    the accumulation of the concordance measures. Ties are handled by this
    implementation (in other words, if there are ties in either x, or y, or
    both, the calculation returns Tau_b, if no ties classic Tau is returned.)

    Examples
    --------
    >>> from scipy.stats import kendalltau
    >>> from giddy.rank import Tau
    >>> x1 = [12, 2, 1, 12, 2]
    >>> x2 = [1, 4, 7, 1, 0]
    >>> kt = Tau(x1,x2)
    >>> kt.tau
    -0.47140452079103173
    >>> kt.tau_p
    0.24821309157521476
    >>> tau, p = kendalltau(x1,x2)
    >>> tau
    -0.4714045207910316
    >>> p
    0.2827454599327748

    """

[docs]    def __init__(self, x, y):
        res = self._calc(x, y)
        self.tau = res[0]
        self.tau_p = res[1]
        self.concordant = res[2]
        self.discordant = res[3]
        self.extraX = res[4]
        self.extraY = res[5]

    def _calc(self, x, y):
        """
        List based implementation of binary tree algorithm for concordance
        measure after :cite:`Christensen2005`.

        """
        x = np.array(x)
        y = np.array(y)
        n = len(y)
        perm = list(range(n))
        perm.sort(key=lambda a: (x[a], y[a]))
        vals = y[perm]
        ExtraY = 0
        ExtraX = 0
        ACount = 0
        BCount = 0
        CCount = 0
        DCount = 0
        ECount = 1
        DCount = 0
        Concordant = 0
        Discordant = 0
        # ids for left child
        li = [None] * (n - 1)
        # ids for right child
        ri = [None] * (n - 1)
        # number of left descendants for a node
        ld = np.zeros(n)
        # number of values equal to value i
        nequal = np.zeros(n)

        for i in range(1, n):
            NumBefore = 0
            NumEqual = 1
            root = 0
            x0 = x[perm[i - 1]]
            y0 = y[perm[i - 1]]
            x1 = x[perm[i]]
            y1 = y[perm[i]]
            if x0 != x1:
                DCount = 0
                ECount = 1
            else:
                if y0 == y1:
                    ECount += 1
                else:
                    DCount += ECount
                    ECount = 1
            root = 0
            inserting = True
            while inserting:
                current = y[perm[i]]
                if current > y[perm[root]]:
                    # right branch
                    NumBefore += 1 + ld[root] + nequal[root]
                    if ri[root] is None:
                        # insert as right child to root
                        ri[root] = i
                        inserting = False
                    else:
                        root = ri[root]
                elif current < y[perm[root]]:
                    # increment number of left descendants
                    ld[root] += 1
                    if li[root] is None:
                        # insert as left child to root
                        li[root] = i
                        inserting = False
                    else:
                        root = li[root]
                elif current == y[perm[root]]:
                    NumBefore += ld[root]
                    NumEqual += nequal[root] + 1
                    nequal[root] += 1
                    inserting = False

            ACount = NumBefore - DCount
            BCount = NumEqual - ECount
            CCount = i - (ACount + BCount + DCount + ECount - 1)
            ExtraY += DCount
            ExtraX += BCount
            Concordant += ACount
            Discordant += CCount

        cd = Concordant + Discordant
        num = Concordant - Discordant
        tau = num / np.sqrt((cd + ExtraX) * (cd + ExtraY))
        v = (4.0 * n + 10) / (9.0 * n * (n - 1))
        z = tau / np.sqrt(v)
        pval = erfc(np.abs(z) / 1.4142136)  # follow scipy
        return tau, pval, Concordant, Discordant, ExtraX, ExtraY


[docs]class SpatialTau(object):
    """
    Spatial version of Kendall's rank correlation statistic.

    Kendall's Tau is based on a comparison of the number of pairs of n
    observations that have concordant ranks between two variables. The spatial
    Tau decomposes these pairs into those that are spatial neighbors and those
    that are not, and examines whether the rank correlation is different
    between the two sets relative to what would be expected under spatial randomness.

    Parameters
    ----------
    x             : array
                    (n, ), first variable.
    y             : array
                    (n, ), second variable.
    w             : W
                    spatial weights object.
    permutations  : int
                    number of random spatial permutations for computationally
                    based inference.

    Attributes
    ----------
    tau                : float
                         The classic Tau statistic.
    tau_spatial        : float
                         Value of Tau for pairs that are spatial neighbors.
    taus               : array
                         (permtuations, 1), values of simulated tau_spatial values
                         under random spatial permutations in both periods. (Same
                         permutation used for start and ending period).
    pairs_spatial      : int
                         Number of spatial pairs.
    concordant         : float
                         Number of concordant pairs.
    concordant_spatial : float
                         Number of concordant pairs that are spatial neighbors.
    extraX             : float
                         Number of extra X pairs.
    extraY             : float
                         Number of extra Y pairs.
    discordant         : float
                         Number of discordant pairs.
    discordant_spatial : float
                         Number of discordant pairs that are spatial neighbors.
    taus               : float
                         spatial tau values for permuted samples (if permutations>0).
    tau_spatial_psim   : float
                         one-sided pseudo p-value for observed tau_spatial under the null
                         of spatial randomness of rank exchanges (if permutations>0).

    Notes
    -----
    Algorithm has two stages. The first calculates classic Tau using a list
    based implementation of the algorithm from :cite:`Christensen2005`. Second
    stage calculates concordance measures for neighboring pairs of locations
    using a modification of the algorithm from :cite:`Press2007`. See
    :cite:`Rey2014` for details.

    Examples
    --------
    >>> import libpysal as ps
    >>> import numpy as np
    >>> from giddy.rank import SpatialTau
    >>> f=ps.io.open(ps.examples.get_path("mexico.csv"))
    >>> vnames=["pcgdp%d"%dec for dec in range(1940,2010,10)]
    >>> y=np.transpose(np.array([f.by_col[v] for v in vnames]))
    >>> regime=np.array(f.by_col['esquivel99'])
    >>> w=ps.weights.block_weights(regime)
    >>> np.random.seed(12345)
    >>> res=[SpatialTau(y[:,i],y[:,i+1],w,99) for i in range(6)]
    >>> for r in res:
    ...     ev = r.taus.mean()
    ...     "%8.3f %8.3f %8.3f"%(r.tau_spatial, ev, r.tau_spatial_psim)
    ...
    '   0.397    0.659    0.010'
    '   0.492    0.706    0.010'
    '   0.651    0.772    0.020'
    '   0.714    0.752    0.210'
    '   0.683    0.705    0.270'
    '   0.810    0.819    0.280'
    """

[docs]    def __init__(self, x, y, w, permutations=0):

        w.transform = "b"
        self.n = len(x)
        res = Tau(x, y)
        self.tau = res.tau
        self.tau_p = res.tau_p
        self.concordant = res.concordant
        self.discordant = res.discordant
        self.extraX = res.extraX
        self.extraY = res.extraY
        res = self._calc(x, y, w)
        self.tau_spatial = res[0]
        self.pairs_spatial = int(w.s0 / 2.0)
        self.concordant_spatial = res[1]
        self.discordant_spatial = res[2]

        if permutations > 0:
            taus = np.zeros(permutations)
            ids = np.arange(self.n)
            for r in range(permutations):
                rids = np.random.permutation(ids)
                taus[r] = self._calc(x[rids], y[rids], w)[0]
            self.taus = taus
            self.tau_spatial_psim = pseudop(taus, self.tau_spatial, permutations)

    def _calc(self, x, y, w):
        n1 = n2 = iS = gc = 0
        ijs = {}
        for i in w.id_order:
            xi = x[i]
            yi = y[i]
            for j in w.neighbors[i]:
                if i < j:
                    ijs[(i, j)] = (i, j)
                    xj = x[j]
                    yj = y[j]
                    dx = xi - xj
                    dy = yi - yj
                    dxdy = dx * dy
                    if dxdy != 0:
                        n1 += 1
                        n2 += 1
                        if dxdy > 0.0:
                            gc += 1
                            iS += 1
                        else:
                            iS -= 1
                    else:
                        if dx != 0.0:
                            n1 += 1
                        if dy != 0.0:
                            n2 += 1
        tau_g = iS / (np.sqrt(n1) * np.sqrt(n2))
        gd = gc - iS
        return [tau_g, gc, gd]


def pseudop(sim, observed, nperm):
    above = sim >= observed
    larger = above.sum()
    psim = (larger + 1.0) / (nperm + 1.0)
    if psim > 0.5:
        psim = (nperm - larger + 1.0) / (nperm + 1.0)
    return psim


[docs]class Tau_Local:
    """
    Local version of the classic Tau.

    Decomposition of the classic Tau into local components.

    Parameters
    ----------
    x             : array
                    (n, ), first variable.
    y             : array
                    (n, ), second variable.

    Attributes
    ----------
    n             : int
                    number of observations.
    tau           : float
                    The classic Tau statistic.
    tau_local     : array
                    (n, ), local concordance (local version of the
                    classic tau).
    S             : array
                    (n ,n), concordance matrix, s_{i,j}=1 if
                    observation i and j are concordant, s_{i,j}=-1
                    if observation i and j are discordant, and
                    s_{i,j}=0 otherwise.

    Notes
    -----
    The equation for calculating local concordance statistic can be
    found in :cite:`Rey2016` Equation (9).

    Examples
    --------
    >>> import libpysal as ps
    >>> import numpy as np
    >>> from giddy.rank import Tau_Local,Tau
    >>> np.random.seed(10)
    >>> f = ps.io.open(ps.examples.get_path("mexico.csv"))
    >>> vnames = ["pcgdp%d"%dec for dec in range(1940, 2010, 10)]
    >>> y = np.transpose(np.array([f.by_col[v] for v in vnames]))
    >>> r = y / y.mean(axis=0)
    >>> tau_local = Tau_Local(r[:,0],r[:,1])
    >>> tau_local.tau_local
    array([-0.03225806,  0.93548387,  0.80645161,  0.74193548,  0.93548387,
            0.74193548,  0.67741935,  0.41935484,  1.        ,  0.5483871 ,
            0.74193548,  0.93548387,  0.67741935,  0.74193548,  0.80645161,
            0.74193548,  0.5483871 ,  0.67741935,  0.74193548,  0.74193548,
            0.5483871 , -0.16129032,  0.93548387,  0.61290323,  0.67741935,
            0.48387097,  0.93548387,  0.61290323,  0.74193548,  0.41935484,
            0.61290323,  0.61290323])
    >>> tau_local.tau
    0.6612903225806451
    >>> tau_classic = Tau(r[:,0],r[:,1])
    >>> tau_classic.tau
    0.6612903225806451

    """

[docs]    def __init__(self, x, y):

        self.n = len(x)
        x = np.asarray(x)
        y = np.asarray(y)
        xx = x.reshape(self.n, 1)
        yy = y.reshape(self.n, 1)

        C = (xx - xx.T) * (yy - yy.T)
        self.S = -1 * (C < 0) + 1 * (C > 0)

        self.tau = self.S.sum() * 1.0 / (self.n * (self.n - 1))
        si = self.S.sum(axis=1)

        self.tau_local = si * 1.0 / (self.n - 1)


[docs]class Tau_Local_Neighbor:
    """
    Neighbor set LIMA.

    Local concordance relationships between a focal unit and its
    neighbors. A decomposition of local Tau into neighbor and
    non-neighbor components.

    Parameters
    ----------
    x              : array
                     (n, ), first variable.
    y              : array
                     (n, ), second variable.
    w              : W
                     spatial weights object.
    permutations   : int
                     number of random spatial permutations for
                     computationally based inference.

    Attributes
    ----------
    n              : int
                     number of observations.
    tau_local       : array
                     (n, ), local concordance (local version of the
                     classic tau).
    S              : array
                     (n ,n), concordance matrix, s_{i,j}=1 if
                     observation i and j are concordant, s_{i,
                     j}=-1 if observation i and j are discordant,
                     and s_{i,j}=0 otherwise.
    tau_ln         : array
                     (n, ), observed neighbor set LIMA values.
    tau_ln_weights : array
                     (n, ), weights for neighbor set LIMA at each
                     location. GIMA is the weighted average of
                     neighbor set LIMA.
    tau_ln_sim     : array
                     (n, permutations), neighbor set LIMA values for
                     permuted samples (if permutations>0).
    tau_ln_pvalues : array
                     (n, ), one-sided pseudo p-values for observed neighbor
                     set LIMA values under the null that concordance
                     relationship between the focal state and itsn
                     eighbors is not different from what could be
                     expected from randomly distributed rank changes.
    sign           : array
                     (n, ), values indicate concordant or
                     disconcordant: 1 concordant, -1 disconcordant

    Notes
    -----
    The equation for calculating neighbor set LIMA statistic can be
    found in :cite:`Rey2016` Equation (16).

    Examples
    --------
    >>> import libpysal as ps
    >>> import numpy as np
    >>> from giddy.rank import Tau_Local_Neighbor, SpatialTau
    >>> np.random.seed(10)
    >>> f = ps.io.open(ps.examples.get_path("mexico.csv"))
    >>> vnames = ["pcgdp%d"%dec for dec in range(1940, 2010, 10)]
    >>> y = np.transpose(np.array([f.by_col[v] for v in vnames]))
    >>> r = y / y.mean(axis=0)
    >>> regime = np.array(f.by_col['esquivel99'])
    >>> w = ps.weights.block_weights(regime)
    >>> res = Tau_Local_Neighbor(r[:,0], r[:,1], w, permutations=999)
    >>> res.tau_ln
    array([-0.2       ,  1.        ,  1.        ,  1.        ,  0.33333333,
            0.6       ,  0.6       , -0.5       ,  1.        ,  1.        ,
            0.2       ,  0.33333333,  0.33333333,  0.5       ,  1.        ,
            1.        ,  1.        ,  0.        ,  0.6       , -0.33333333,
           -0.33333333, -0.6       ,  1.        ,  0.2       ,  0.        ,
            0.2       ,  1.        ,  0.6       ,  0.33333333,  0.5       ,
            0.5       , -0.2       ])
    >>> res.tau_ln_weights
    array([0.03968254, 0.03968254, 0.03174603, 0.03174603, 0.02380952,
           0.03968254, 0.03968254, 0.03174603, 0.00793651, 0.03968254,
           0.03968254, 0.02380952, 0.02380952, 0.03174603, 0.00793651,
           0.02380952, 0.02380952, 0.03174603, 0.03968254, 0.02380952,
           0.02380952, 0.03968254, 0.03174603, 0.03968254, 0.03174603,
           0.03968254, 0.03174603, 0.03968254, 0.02380952, 0.03174603,
           0.03174603, 0.03968254])
    >>> res.tau_ln_pvalues
    array([0.541, 0.852, 0.668, 0.568, 0.11 , 0.539, 0.609, 0.058, 1.   ,
           0.255, 0.125, 0.087, 0.393, 0.433, 0.908, 0.657, 0.447, 0.128,
           0.531, 0.033, 0.12 , 0.271, 0.868, 0.234, 0.124, 0.387, 0.859,
           0.697, 0.349, 0.664, 0.596, 0.041])
    >>> res.sign
    array([-1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
            1,  1, -1, -1, -1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1])
    >>> (res.tau_ln * res.tau_ln_weights).sum() #global spatial tau
    0.39682539682539675
    >>> res1 = SpatialTau(r[:,0],r[:,1],w,permutations=999)
    >>> res1.tau_spatial
    0.3968253968253968

    """

[docs]    def __init__(self, x, y, w, permutations=0):

        x = np.asarray(x)
        y = np.asarray(y)
        self.n = len(x)

        w.transform = "b"
        self.tau_ln, self.tau_ln_weights = self._calc(x, y, w)

        concor_sign = np.ones(self.n)
        concor_sign[self.tau_ln < 0] = -1
        self.sign = concor_sign.astype(int)

        if permutations > 0:
            tau_ln_sim = np.zeros((self.n, permutations))
            tau_ln_pvalues = np.zeros(self.n)
            for i in range(self.n):
                obs_i = self.tau_ln[i]  # observed value i LIMA statistic
                yr = np.zeros_like(y)
                xr = np.zeros_like(y)
                rids = np.arange(self.n)
                rids = np.delete(rids, i)
                for j in range(permutations):
                    pids = np.random.permutation(rids)
                    xr[i] = x[i]
                    xr[rids] = x[pids]
                    yr[i] = y[i]
                    yr[rids] = y[pids]
                    tau_ln_sim[i, j] = self._calc(xr, yr, w, i)
                larger = (tau_ln_sim[i] >= obs_i).sum()
                smaller = (tau_ln_sim[i] <= obs_i).sum()
                tau_ln_pvalues[i] = (np.min([larger, smaller]) + 1.0) / (
                    1 + permutations
                )
            self.tau_ln_sim = tau_ln_sim
            self.tau_ln_pvalues = tau_ln_pvalues

    def _calc_r(self, xi, yi, xj, yj, w):
        dx = xi - xj
        dy = yi - yj
        dxdy = dx * dy
        if dxdy != 0:
            if dxdy > 0.0:
                return 1
            else:
                return -1
        else:
            return 0

    def _calc(self, x, y, w, i=None):
        if i is not None:
            iS_local = 0
            for j in w.neighbors[i]:
                iS_local += self._calc_r(x[i], y[i], x[j], y[j], w)
            tau_ln = iS_local * 1.0 / w.cardinalities[i]
            return tau_ln
        else:
            tau_ln = np.zeros(self.n)
            tau_ln_weights = np.zeros(self.n)
            for i in w.id_order:
                iS_local = 0
                for j in w.neighbors[i]:
                    iS_local += self._calc_r(x[i], y[i], x[j], y[j], w)
                tau_ln[i] = iS_local * 1.0 / w.cardinalities[i]
                tau_ln_weights[i] = w.cardinalities[i] * 1.0 / w.s0
            return tau_ln, tau_ln_weights


[docs]class Tau_Local_Neighborhood:
    """
    Neighborhood set LIMA.

    An extension of neighbor set LIMA. Consider local concordance
    relationships for a subset of states, defined as the focal state
    and its neighbors.

    Parameters
    ----------
    x                  : array
                         (n, ), first variable.
    y                  : array
                         (n, ), second variable.
    w                  : W
                         spatial weights object.
    permutations       : int
                         number of random spatial permutations for
                         computationally based inference.

    Attributes
    ----------
    n                  : int
                         number of observations.
    tau_local          : array
                         (n, ), local concordance (local version of the
                         classic tau).
    S                  : array
                         (n ,n), concordance matrix, s_{i,j}=1 if
                         observation i and j are concordant, s_{i,
                         j}=-1 if observation i and j are discordant,
                         and s_{i,j}=0 otherwise.
    tau_lnhood         : array
                         (n, ), observed neighborhood set LIMA values.
    tau_lnhood_sim     : array
                         (n, permutations), neighborhood set LIMA
                         values for permuted samples (if
                         permutations>0).
    tau_lnhood_pvalues : array
                         (n, 1), one-sided pseudo p-values for observed
                         neighborhood set LIMA values under the null
                         that the concordance relationships for a
                         subset of states, defined as the focal state
                         and its neighbors, is different from what
                         would be expected from randomly distributed
                         rank changes.
    sign              :  array
                         (n, ), values indicate concordant or
                         disconcordant: 1 concordant, -1 disconcordant

    Notes
    -----
    The equation for calculating neighborhood set LIMA statistic can
    be found in :cite:`Rey2016` Equation (22).

    Examples
    --------
    >>> import libpysal as ps
    >>> from giddy.rank import Tau_Local_Neighborhood
    >>> import numpy as np
    >>> np.random.seed(10)
    >>> f = ps.io.open(ps.examples.get_path("mexico.csv"))
    >>> vnames = ["pcgdp%d"%dec for dec in range(1940, 2010, 10)]
    >>> y = np.transpose(np.array([f.by_col[v] for v in vnames]))
    >>> r = y / y.mean(axis=0)
    >>> regime = np.array(f.by_col['esquivel99'])
    >>> w = ps.weights.block_weights(regime)
    >>> res = Tau_Local_Neighborhood(r[:,0],r[:,1],w,permutations=999)
    >>> res.tau_lnhood
    array([0.06666667, 0.6       , 0.2       , 0.8       , 0.33333333,
           0.6       , 0.6       , 0.2       , 1.        , 0.06666667,
           0.06666667, 0.33333333, 0.33333333, 0.2       , 1.        ,
           0.33333333, 0.33333333, 0.2       , 0.6       , 0.33333333,
           0.33333333, 0.06666667, 0.8       , 0.06666667, 0.2       ,
           0.6       , 0.8       , 0.6       , 0.33333333, 0.8       ,
           0.8       , 0.06666667])
    >>> res.tau_lnhood_pvalues
    array([0.106, 0.33 , 0.107, 0.535, 0.137, 0.414, 0.432, 0.169, 1.   ,
           0.03 , 0.019, 0.146, 0.249, 0.1  , 0.908, 0.225, 0.311, 0.125,
           0.399, 0.215, 0.334, 0.115, 0.669, 0.045, 0.11 , 0.525, 0.655,
           0.466, 0.236, 0.413, 0.504, 0.038])
    >>> res.sign
    array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

    """

[docs]    def __init__(self, x, y, w, permutations=0):

        x = np.asarray(x)
        y = np.asarray(y)
        res = Tau_Local(x, y)
        self.n = res.n
        self.S = res.S
        self.tau_local = res.tau_local

        w.transform = "b"
        tau_lnhood = np.zeros(self.n)
        for i in range(self.n):
            neighbors_i = [i]
            neighbors_i.extend(w.neighbors[i])
            n_i = len(neighbors_i)
            sh_i = self.S[neighbors_i, :][:, neighbors_i]
            # Neighborhood set LIMA
            tau_lnhood[i] = sh_i.sum() * 1.0 / (n_i * (n_i - 1))
        self.tau_lnhood = tau_lnhood

        concor_sign = np.ones(self.n)
        concor_sign[self.tau_lnhood < 0] = -1
        self.sign = concor_sign.astype(int)

        if permutations > 0:
            tau_lnhood_sim = np.zeros((self.n, permutations))
            tau_lnhood_pvalues = np.zeros(self.n)
            for i in range(self.n):
                obs_i = self.tau_lnhood[i]
                rids = list(range(self.n))
                rids.remove(i)
                larger = 0
                for j in range(permutations):
                    np.random.shuffle(rids)
                    neighbors_i = [i]
                    neighbors_i.extend(rids[: len(w.neighbors[i])])
                    n_i = len(neighbors_i)
                    neighbors_i_second = neighbors_i
                    sh_i = self.S[neighbors_i, :][:, neighbors_i_second]
                    tau_lnhood_sim[i, j] = sh_i.sum() * 1.0 / (n_i * (n_i - 1))

                larger = (tau_lnhood_sim[i] >= obs_i).sum()
                smaller = (tau_lnhood_sim[i] <= obs_i).sum()
                tau_lnhood_pvalues[i] = (np.min([larger, smaller]) + 1.0) / (
                    1 + permutations
                )

            self.tau_lnhood_sim = tau_lnhood_sim
            self.tau_lnhood_pvalues = tau_lnhood_pvalues


[docs]class Tau_Regional:
    """
    Inter and intraregional decomposition of the classic Tau.

    Parameters
    ----------
    x               : array
                      (n, ), first variable.
    y               : array
                      (n, ), second variable.
    regimes         : array
                      (n, ), ids of which regime an observation belongs to.
    permutations    : int
                      number of random spatial permutations for
                      computationally based inference.

    Attributes
    ----------
    n               : int
                      number of observations.
    S               : array
                      (n ,n), concordance matrix, s_{i,j}=1 if
                      observation i and j are concordant, s_{i,
                      j}=-1 if observation i and j are discordant,
                      and s_{i,j}=0 otherwise.
    tau_reg         : array
                      (k, k), observed concordance matrix with
                      diagonal elements measuring concordance
                      between units within a regime and the
                      off-diagonal elements denoting concordance
                      between observations from a specific
                      pair of different regimes.
    tau_reg_sim     : array
                      (permutations, k, k), concordance matrices for
                      permuted samples (if permutations>0).
    tau_reg_pvalues : array
                      (k, k), one-sided pseudo p-values for observed
                      concordance matrix under the null that income
                      mobility were random in its spatial distribution.

    Notes
    -----
    The equation for calculating inter and intraregional Tau
    statistic can be found in :cite:`Rey2016` Equation (27).

    Examples
    --------
    >>> import libpysal as ps
    >>> import numpy as np
    >>> from giddy.rank import Tau_Regional
    >>> np.random.seed(10)
    >>> f = ps.io.open(ps.examples.get_path("mexico.csv"))
    >>> vnames = ["pcgdp%d"%dec for dec in range(1940, 2010, 10)]
    >>> y = np.transpose(np.array([f.by_col[v] for v in vnames]))
    >>> r = y / y.mean(axis=0)
    >>> regime = np.array(f.by_col['esquivel99'])
    >>> res = Tau_Regional(y[:,0],y[:,-1],regime,permutations=999)
    >>> res.tau_reg
    array([[1.        , 0.25      , 0.5       , 0.6       , 0.83333333,
            0.6       , 1.        ],
           [0.25      , 0.33333333, 0.5       , 0.3       , 0.91666667,
            0.4       , 0.75      ],
           [0.5       , 0.5       , 0.6       , 0.4       , 0.38888889,
            0.53333333, 0.83333333],
           [0.6       , 0.3       , 0.4       , 0.2       , 0.4       ,
            0.28      , 0.8       ],
           [0.83333333, 0.91666667, 0.38888889, 0.4       , 0.6       ,
            0.73333333, 1.        ],
           [0.6       , 0.4       , 0.53333333, 0.28      , 0.73333333,
            0.8       , 0.8       ],
           [1.        , 0.75      , 0.83333333, 0.8       , 1.        ,
            0.8       , 0.33333333]])
    >>> res.tau_reg_pvalues
    array([[0.782, 0.227, 0.464, 0.638, 0.294, 0.627, 0.201],
           [0.227, 0.352, 0.391, 0.14 , 0.048, 0.252, 0.327],
           [0.464, 0.391, 0.587, 0.198, 0.107, 0.423, 0.124],
           [0.638, 0.14 , 0.198, 0.141, 0.184, 0.089, 0.217],
           [0.294, 0.048, 0.107, 0.184, 0.583, 0.25 , 0.005],
           [0.627, 0.252, 0.423, 0.089, 0.25 , 0.38 , 0.227],
           [0.201, 0.327, 0.124, 0.217, 0.005, 0.227, 0.322]])

    """

[docs]    def __init__(self, x, y, regime, permutations=0):

        x = np.asarray(x)
        y = np.asarray(y)
        res = Tau_Local(x, y)
        self.n = res.n
        self.S = res.S

        reg = np.array(regime).flatten()
        ur = np.unique(reg).tolist()
        k = len(ur)
        P = np.zeros((k, self.n))
        for i, r in enumerate(reg):
            P[ur.index(r), i] = 1  # construct P matrix

        w = weights.block_weights(regime)
        w.transform = "b"
        W = w.full()[0]
        WH = np.ones((self.n, self.n)) - np.eye(self.n) - W

        # inter and intraregional decomposition of Tau for the observed value

        self.tau_reg = self._calc(W, WH, P, self.S)

        if permutations > 0:
            tau_reg_sim = np.zeros((permutations, k, k))
            larger = np.zeros((k, k))
            smaller = np.zeros((k, k))
            rids = np.arange(len(x))
            for i in range(permutations):
                np.random.shuffle(rids)
                res = Tau_Local(x[rids], y[rids])
                tau_reg_sim[i] = self._calc(W, WH, P, res.S)
                larger += np.greater_equal(tau_reg_sim[i], self.tau_reg)
                smaller += np.less_equal(tau_reg_sim[i], self.tau_reg)

            m = np.less(smaller, larger)
            pvalues = (1 + m * smaller + (1 - m) * larger) / (1.0 + permutations)
            self.tau_reg_sim = tau_reg_sim
            self.tau_reg_pvalues = pvalues

    def _calc(self, W, WH, P, S):

        nomi = np.dot(P, np.dot(S, P.T))
        denomi = np.dot(P, np.dot(W, P.T)) + np.dot(P, np.dot(WH, P.T))
        T = nomi / denomi

        return T