Skip to content

bivariate_quantiles module

This takes in observations of a bivariate random variable y and computes vector quantiles and vector ranks à la Chernozhukov-Galichon-Hallin-Henry (Ann. Stats. 2017).

Note

if the math looks strange in the documentation, just reload the page.

The sequence of steps is as follows:

  • choose a number of Chebyshev nodes for numerical integration and optimize the weights: v = solve_for_v(y, n_nodes)
  • to obtain the \((u_1,u_2)\) quantiles for \((u_1, u_2)\in [0,1]\), run qtiles_y = bivariate_quantiles_v(y, v, u1, u2)
  • to compute the vector ranks for all points in the sample (the barycenters of the cells in the power diagram): ranks_y = bivariate_ranks_v(y, v, n_nodes)

Steps 1 and 2 can be combined: qtiles_y = bivariate_quantiles(y, v, u1, u2, n_nodes)

Steps 1 and 3 can be combined: ranks_y = bivariate_ranks(y, n_nodes)

bivariate_quantiles(y, u, n_nodes=32, verbose=False)

computes the bivariate quantiles of y at the quantiles u

Parameters:

Name Type Description Default
y ndarray

the observations, an (n, 2) matrix

required
u ndarray

the quantiles at which to compute the bivariate quantiles, an (m, 2) matrix

required
n_nodes int

the number of nodes to use for the quadrature

32
verbose bool

if True, print some information

False

Returns:

Type Description
ndarray

an (m, 2) matrix of bivariate quantiles

Source code in bs_python_utils/bivariate_quantiles.py
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
def bivariate_quantiles(
    y: np.ndarray, u: np.ndarray, n_nodes: int = 32, verbose: bool = False
) -> np.ndarray:
    """computes the bivariate quantiles of `y` at the quantiles `u`

    Args:
        y: the observations, an `(n, 2)` matrix
        u: the quantiles at which to compute the bivariate quantiles,
            an `(m, 2)` matrix
        n_nodes: the number of nodes to use for the quadrature
        verbose: if `True`, print some information

    Returns:
        an `(m, 2)` matrix of bivariate quantiles
    """
    v = solve_for_v_(y, n_nodes, verbose)
    return bivariate_quantiles_v(y, u, v)

bivariate_quantiles_v(y, u, v)

computes the vector quantiles of y at values u, given the converged v

Parameters:

Name Type Description Default
y ndarray

the observations, an (n,2) matrix

required
u ndarray

the values where we want the quantiles, an (m,2) matrix in \([0,1]\)

required
v ndarray

the converged values of the weights, an n-vector

required

Returns:

Type Description
ndarray

an (m,2) matrix with the quantiles of y at the values u

Source code in bs_python_utils/bivariate_quantiles.py
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
def bivariate_quantiles_v(y: np.ndarray, u: np.ndarray, v: np.ndarray) -> np.ndarray:
    """computes the vector quantiles of `y` at values `u`, given the converged `v`

    Args:
        y: the observations, an `(n,2)` matrix
        u: the values where we want the quantiles, an `(m,2)` matrix in $[0,1]$
        v: the converged values of the weights, an `n`-vector

    Returns:
        an `(m,2)` matrix with the quantiles of `y` at the values `u`
    """
    net_val = u @ y.T - v
    k_max = np.argmax(net_val, 1)
    return cast(np.ndarray, y[k_max])

bivariate_ranks(y, n_nodes=32, verbose=False)

computes the bivariate ranks of y

Parameters:

Name Type Description Default
y ndarray

the observations, an (n, 2) matrix

required
n_nodes int

the number of nodes to use for the quadrature

32
verbose bool

if True, print some information

False

Returns:

Type Description
ndarray

the (n, 2) matrix of bivariate average ranks

Source code in bs_python_utils/bivariate_quantiles.py
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
def bivariate_ranks(
    y: np.ndarray, n_nodes: int = 32, verbose: bool = False
) -> np.ndarray:
    """computes the bivariate ranks of `y`

    Args:
        y: the observations, an `(n, 2)` matrix
        n_nodes: the number of nodes to use for the quadrature
        verbose: if `True`, print some information

    Returns:
        the `(n, 2)` matrix of bivariate average ranks
    """
    v = solve_for_v_(y, n_nodes, verbose)
    return bivariate_ranks_v(y, v, n_nodes)

bivariate_ranks_v(y, v, n_nodes=32, presorted=False)

computes the vector ranks of y, given the converged v

Parameters:

Name Type Description Default
y ndarray

the observations, an (n,2) matrix

required
v ndarray

the converged values of the weights, an n-vector

required
n_nodes int

the number of nodes for Chebyshev integration

32
presorted bool

if True, then y and v are sorted by increasing y[:, 1].

False

Returns:

Type Description
ndarray

an (n,2) matrix with the average ranks of y

Source code in bs_python_utils/bivariate_quantiles.py
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
def bivariate_ranks_v(
    y: np.ndarray, v: np.ndarray, n_nodes: int = 32, presorted: bool = False
) -> np.ndarray:
    """computes the vector ranks of `y`, given the converged `v`

    Args:
        y: the observations, an `(n,2)` matrix
        v: the converged values of the weights, an `n`-vector
        n_nodes: the number of nodes for Chebyshev integration
        presorted: if `True`, then `y` and `v` are sorted by increasing `y[:, 1]`.

    Returns:
        an `(n,2)` matrix with the average ranks of `y`
    """
    n, d = y.shape

    if d != 2:
        bs_error_abort(f"only works for 2-dimensional y, not for {d}")

    interval01 = Interval(0.0, 1.0)
    u1_nodes, u1_weights = cheb_get_nodes_1d(interval01, n_nodes)

    if presorted:
        sort_order = np.arange(n)
        y_sorted = y
        v_sorted = v
    else:
        sort_order = np.argsort(y[:, 1])
        y_sorted = y[sort_order, :]
        v_sorted = v[sort_order]

    a_mat, b_mat = _compute_ab(y_sorted, v_sorted)

    average_ranks = np.zeros((n, 2))

    for k in range(n):
        left_bounds, right_bounds = _compute_u2_bounds(k, u1_nodes, a_mat, b_mat)
        pos_diffs = np.maximum(right_bounds - left_bounds, 0.0)
        pos_diffs_sq = np.maximum(
            right_bounds * right_bounds - left_bounds * left_bounds, 0.0
        )
        prob_k = pos_diffs @ u1_weights
        average_ranks[sort_order[k], 0] = ((u1_nodes * pos_diffs) @ u1_weights) / prob_k
        average_ranks[sort_order[k], 1] = ((pos_diffs_sq @ u1_weights) / 2.0) / prob_k

    return average_ranks