Skip to content

distance_covariances module

Distance covariance and partial distance covariance à la Szekely and Rizzo; evaluation and tests of independence and conditional independence:

  • DcovResults, PdcovResults: classes for distance covariances
  • dcov_dcor: `evaluates the distance covariance and correlation of two random variables
  • pdcov_pdcor: evaluates the partial distance covariance and correlation of X and Y given Z
  • pvalue_dcov: test of no dependence between X and Y given Z.

dcov_dcor(X, Y, unbiased=False)

evaluate the distance covariance and correlation of X and Y

Parameters:

Name Type Description Default
X ndarray

n observations of a random variable or vector

required
Y ndarray

n observations of a random variable or vector

required
unbiased bool

if True, we use the Szekely and Rizzo 2014 formula

False

Returns:

Type Description
DcovResults

dCov^2(X,Y) and dCor^2(X,Y)

Source code in bs_python_utils/distance_covariances.py
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
def dcov_dcor(X: np.ndarray, Y: np.ndarray, unbiased: bool = False) -> DcovResults:
    """
    evaluate the distance covariance and correlation of `X` and `Y`

    Args:
        X: `n` observations of a random variable or vector
        Y: `n` observations of a random variable or vector
        unbiased: if `True`, we use the Szekely and Rizzo 2014 formula

    Returns:
        `dCov^2(X,Y)` and `dCor^2(X,Y)`
    """
    X_dist = _compute_distances(X)
    n = X_dist.shape[0]
    X_dd = _double_decenter(X_dist, unbiased)
    Y_dist = _compute_distances(Y)
    Y_dd = _double_decenter(Y_dist, unbiased)
    dcov2 = _dcov_prod(X_dd, Y_dd, unbiased)
    dcor2 = dcov2 / sqrt(
        _dcov_prod(X_dd, X_dd, unbiased) * _dcov_prod(Y_dd, Y_dd, unbiased)
    )
    return DcovResults(
        dcov=dcov2,
        dcor=dcor2,
        X_dd=X_dd,
        Y_dd=Y_dd,
        unbiased=unbiased,
        dcov_stat=n * dcov2,
    )

pdcov_pdcor(X, Y, Z)

evaluate the partial distance covariance and correlation of X and Y given Z

Parameters:

Name Type Description Default
X ndarray

n observations of a random variable or vector

required
Y ndarray

n observations of a random variable or vector

required
Z ndarray

n observations of a random variable or vector

required

Returns:

Type Description
PdcovResults

a PdcovResults instance

Source code in bs_python_utils/distance_covariances.py
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
def pdcov_pdcor(X: np.ndarray, Y: np.ndarray, Z: np.ndarray) -> PdcovResults:
    """
    evaluate the partial distance covariance and correlation of `X` and `Y` given `Z`

    Args:
        X: `n` observations of a random variable or vector
        Y: `n` observations of a random variable or vector
        Z: `n` observations of a random variable or vector

    Returns:
        a `PdcovResults` instance
    """
    unbiased = True
    X_dist = _compute_distances(X)
    X_dd = _double_decenter(X_dist, unbiased)
    Y_dist = _compute_distances(Y)
    Y_dd = _double_decenter(Y_dist, unbiased)
    Z_dist = _compute_distances(Z)
    Z_dd = _double_decenter(Z_dist, unbiased)
    C_XX = _dcov_prod(X_dd, X_dd, unbiased)
    C_XY = _dcov_prod(X_dd, Y_dd, unbiased)
    C_YY = _dcov_prod(Y_dd, Y_dd, unbiased)
    C_XZ = _dcov_prod(X_dd, Z_dd, unbiased)
    C_YZ = _dcov_prod(Y_dd, Z_dd, unbiased)
    C_ZZ = _dcov_prod(Z_dd, Z_dd, unbiased)
    pdcov = C_XY - (C_XZ * C_YZ) / C_ZZ
    pdcor = pdcov / sqrt((C_XX - C_XZ * C_XZ / C_ZZ) * (C_YY - C_YZ * C_YZ / C_ZZ))
    n = X.shape[0]
    return PdcovResults(
        pdcov=pdcov, pdcor=pdcor, pdcov_stat=n * pdcov, X_dd=X_dd, Y_dd=Y_dd, Z_dd=Z_dd
    )

pvalue_dcov(dcov_results, ndraws=199)

test of no dependence between X and Y given Z

Parameters:

Name Type Description Default
dcov_results DcovResults

results from dcov_dcor

required
ndraws int

the number of draws we use

199

Returns:

Type Description
float

the bootstrapped p-value of the test

Source code in bs_python_utils/distance_covariances.py
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
def pvalue_dcov(dcov_results: DcovResults, ndraws: int = 199) -> float:
    """
    test of no dependence between `X` and `Y` given `Z`

    Args:
        dcov_results:  results from `dcov_dcor`
        ndraws: the number of draws we use

    Returns:
        the bootstrapped  p-value of the test
    """
    X_dd = dcov_results.X_dd
    Y_dd = dcov_results.Y_dd
    dcov_stat = dcov_results.dcov_stat
    unbiased = dcov_results.unbiased
    dcov_stats_boot = _dcov_bootstrap(X_dd, Y_dd, unbiased, ndraws)
    sum_small = cast(int, np.sum(dcov_stat < dcov_stats_boot))
    return (1.0 + sum_small) / (1.0 + ndraws)

pvalue_pdcov(pdcov_results, ndraws=199)

test of no dependence between X and Y given Z

Parameters:

Name Type Description Default
pdcov_results PdcovResults

the results of pdcov_pdcor

required
ndraws int

the number of draws we use

199

Returns:

Type Description
float

the bootstrapped p-value of the test

Source code in bs_python_utils/distance_covariances.py
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
def pvalue_pdcov(pdcov_results: PdcovResults, ndraws: int = 199) -> float:
    """
    test of no dependence between `X` and `Y` given `Z`

    Args:
        pdcov_results: the results of `pdcov_pdcor`
        ndraws: the number of draws we use

    Returns:
        the bootstrapped  p-value of the test
    """
    X_dd = pdcov_results.X_dd
    Y_dd = pdcov_results.Y_dd
    Z_dd = pdcov_results.Z_dd
    pdcov_stat = pdcov_results.pdcov_stat
    pdcov_stats_boot = _pdcovs_bootstrap(X_dd, Y_dd, Z_dd, ndraws)
    sum_small = cast(int, np.sum(pdcov_stat < pdcov_stats_boot))
    return (1.0 + sum_small) / (1.0 + ndraws)