Skip to content

min_distance_utils module

Utility programs used in min_distance.py.

MDEResults dataclass

The results from minimum-distance estimation and testing.

Parameters:

Name Type Description Default
X int

the number of types of men

required
Y int

the number of types of women

required
K int

the number of bases

required
number_households int

the number of households in the sample

required
estimated_coefficients np.ndarray

the estimated coefficients

required
varcov_coefficients np.ndarray

their eetimated var-covar

required
stderrs_coefficients np.ndarray

their estimated stderrs

required
estimated_Phi np.ndarray

the estimated joint surplus

required
test_statistic float

the value of the misspecification statistic

required
test_pvalue float

the p-value of the test

required
ndf int

the number of degrees of freedom

required
parameterized_entropy bool | None

True if the derivative of the entropy has unknown parameters

False
Source code in cupid_matching/min_distance_utils.py
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
@dataclass
class MDEResults:
    """
    The results from minimum-distance estimation and testing.

    Args:
        X: the number of types of men
        Y: the number of types of women
        K: the number of bases
        number_households: the number of households in the sample
        estimated_coefficients: the estimated coefficients
        varcov_coefficients: their eetimated var-covar
        stderrs_coefficients: their estimated stderrs
        estimated_Phi: the estimated joint surplus
        test_statistic: the value of the misspecification statistic
        test_pvalue: the p-value of the test
        ndf: the number of degrees of freedom
        parameterized_entropy: True if the derivative of the entropy has unknown parameters
    """

    X: int
    Y: int
    K: int
    number_households: int
    estimated_coefficients: np.ndarray
    varcov_coefficients: np.ndarray
    stderrs_coefficients: np.ndarray
    estimated_Phi: np.ndarray
    test_statistic: float
    test_pvalue: float
    ndf: int
    parameterized_entropy: bool | None = False

    def __str__(self):
        line_stars = "*" * 80 + "\n"
        if self.parameterized_entropy:
            n_alpha = self.estimated_coefficients.size - self.K
            entropy_str = f"     The entropy has {n_alpha} parameters."
        else:
            entropy_str = "     The entropy is parameter-free."
            n_alpha = 0
        model_str = f"The data has {self.number_households} households\n\n"
        model_str += f"The model has {self.X}x{self.Y} margins\n {entropy_str} \n"
        model_str += f"We use {self.K} basis functions.\n\n"
        repr_str = line_stars + model_str
        repr_str += "The estimated coefficients (and their standard errors) are\n\n"
        if self.parameterized_entropy:
            for i, coeff in enumerate(self.estimated_coefficients[:n_alpha]):
                repr_str += (
                    f"   alpha({i + 1}): {coeff: > 10.3f}  "
                    + f"({self.stderrs_coefficients[i]: .3f})\n"
                )
            repr_str += "\n"
        for i, coeff in enumerate(self.estimated_coefficients[n_alpha:]):
            repr_str += (
                f"   base {i + 1}: {coeff: > 10.3f} "
                + f"({self.stderrs_coefficients[n_alpha + i]: .3f})\n"
            )
        repr_str += "\nSpecification test:\n"
        repr_str += (
            f"   the value of the test statistic is {self.test_statistic: > 10.3f}\n"
        )
        repr_str += (
            f"     for a chi2({self.ndf}), the p-value is {self.test_pvalue: > 10.3f}\n"
        )
        return repr_str + line_stars

    def print_results(
        self, true_coeffs: np.ndarray | None = None, n_alpha: int = 0
    ) -> None | float:
        estimates = self.estimated_coefficients
        stderrs = self.stderrs_coefficients

        if true_coeffs is not None:
            repr_str = (
                "The true and estimated coefficients "
                + "(and their standard errors) are\n\n"
            )
            for i, coeff in enumerate(estimates[:n_alpha]):
                repr_str += f"   alpha({i + 1}): {true_coeffs[i]: > 10.3f}"
                repr_str += f"{coeff: > 10.3f}  ({stderrs[i]: > 10.3f})\n"
                repr_str += "\n"
            for i, coeff in enumerate(estimates[n_alpha:]):
                j = n_alpha + i
                repr_str += (
                    f"   base {i + 1}: {true_coeffs[j]: > 10.3f}  "
                    + f"{coeff: > 10.3f}  ({stderrs[j]: > 10.3f})\n"
                )
            print_stars(repr_str)
            discrepancy = npmaxabs(true_coeffs - estimates)
            print_stars(
                "The largest difference between true and estimated coefficients is"
                f" {discrepancy: .2e}"
            )
        else:
            repr_str = (
                "The estimated coefficients " + "(and their standard errors) are\n\n"
            )
            for i, coeff in enumerate(estimates[:n_alpha]):
                repr_str + f"{coeff: > 10.3f}  ({stderrs[i]: > 10.3f})\n"
                repr_str += "\n"
            for i, coeff in enumerate(estimates[n_alpha:]):
                j = n_alpha + i
                repr_str += f"{coeff: > 10.3f}  ({stderrs[j]: > 10.3f})\n"
            print_stars(repr_str)

        repr_str = "\nSpecification test:\n"
        repr_str += (
            "   the value of the test statistic is "
            + f"{self.test_statistic: > 10.3f}\n"
        )
        repr_str += (
            f"     for a chi2({self.ndf}), "
            + f"the p-value is {self.test_pvalue: > 10.3f}\n"
        )
        print_stars(repr_str)
        if true_coeffs is not None:
            return cast(float, discrepancy)
        return None

check_args_mde(muhat, phi_bases)

check that the arguments to the MDE are consistent

Source code in cupid_matching/min_distance_utils.py
19
20
21
22
23
24
25
26
27
28
29
30
31
def check_args_mde(muhat: Matching, phi_bases: np.ndarray) -> tuple[int, int, int]:
    """check that the arguments to the MDE are consistent"""
    muxyhat, *_ = muhat.unpack()
    X, Y = muxyhat.shape
    ndims_phi = phi_bases.ndim
    if ndims_phi != 3:
        bs_error_abort(f"phi_bases should have 3 dimensions, not {ndims_phi}")
    Xp, Yp, K = phi_bases.shape
    if Xp != X or Yp != Y:
        bs_error_abort(
            f"phi_bases should have shape ({X}, {Y}, {K}) not ({Xp}, {Yp}, {K})"
        )
    return X, Y, K

check_indep_phi_no_singles(D2_phi, X, Y)

check that the double difference of the phi matrix has full column rank; if so, return it

Parameters:

Name Type Description Default
D2_phi np.ndarray

an \((X*Y, K)\) matrix of double differences

required
X int

number of types of men

required
Y int

number of types of women

required

Returns:

Type Description
None

nothing

Source code in cupid_matching/min_distance_utils.py
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
def check_indep_phi_no_singles(D2_phi: np.ndarray, X: int, Y: int) -> None:
    """check that the double difference of the phi matrix has full column rank;
        if so, return it

    Args:
        D2_phi: an $(X*Y, K)$ matrix of double differences
        X: number of types of men
        Y: number of types of women

    Returns:
        nothing
    """
    K = D2_phi.shape[1]
    actual_rank = np.linalg.matrix_rank(D2_phi)  # Compute the matrix rank
    if actual_rank != D2_phi.shape[1]:
        bs_error_abort(
            f"We have {K} basis functions but phi_mat only has rank {actual_rank}."
        )

compute_estimates(M, S_mat, d)

Returns the QGLS estimates and their variance-covariance.

Parameters:

Name Type Description Default
M np.ndarray

an (XY,p) matrix

required
S_mat np.ndarray

an (XY, XY) weighting matrix

required
d np.ndarray

an XY-vector

required

Returns:

Type Description
tuple[np.ndarray, np.ndarray]

the p-vector of estimates and their estimated (p,p) variance

Source code in cupid_matching/min_distance_utils.py
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
def compute_estimates(
    M: np.ndarray, S_mat: np.ndarray, d: np.ndarray
) -> tuple[np.ndarray, np.ndarray]:
    """Returns the QGLS estimates and their variance-covariance.

    Args:
        M: an (XY,p) matrix
        S_mat: an (XY, XY) weighting matrix
        d: an XY-vector

    Returns:
        the p-vector of estimates and their estimated (p,p) variance
    """
    M_T = M.T
    M_S_d = M_T @ S_mat @ d
    M_S_M = M_T @ S_mat @ M
    est_coeffs = -spla.solve(M_S_M, M_S_d)
    varcov_coeffs = spla.inv(M_S_M)
    return est_coeffs, varcov_coeffs

get_initial_weighting_matrix(parameterized_entropy, initial_weighting_matrix, XY)

returns the initial weighting matrix for the MDE when the entropy is parameterized

Parameters:

Name Type Description Default
parameterized_entropy bool

if True, the entropy has unknown parameters

required
initial_weighting_matrix np.ndarray | None

the initial weighting matrix, if provided

required
XY int

= X*Y

required

Returns:

Type Description
np.ndarray | None

the initial_weighting_matrix, or None if the entropy is not parameterized.

Source code in cupid_matching/min_distance_utils.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
def get_initial_weighting_matrix(
    parameterized_entropy: bool, initial_weighting_matrix: np.ndarray | None, XY: int
) -> np.ndarray | None:
    """returns the initial weighting matrix for the MDE when the entropy is parameterized

    Args:
        parameterized_entropy: if `True`, the entropy has unknown parameters
        initial_weighting_matrix: the initial weighting matrix, if provided
        XY: = X*Y

    Returns:
        the initial_weighting_matrix, or None if the entropy is not parameterized.
    """
    if parameterized_entropy:
        if initial_weighting_matrix is None:
            print_stars(
                "Using the identity matrix as weighting matrix in the first step."
            )
            S_mat = np.eye(XY)
        else:
            S_mat = initial_weighting_matrix
        return S_mat
    else:
        return None

get_optimal_weighting_matrix(muhat, hessians_both, no_singles=False, D2_mat=None)

compute the \(S^st\) matrix used in the second step of the MDE

Parameters:

Name Type Description Default
muhat Matching

the observed Matching

required
hessians_both np.ndarray

the Hessian of the entropy function

required
Source code in cupid_matching/min_distance_utils.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
def get_optimal_weighting_matrix(
    muhat: Matching,
    hessians_both: np.ndarray,
    no_singles: bool = False,
    D2_mat: np.ndarray | None = None,
) -> np.ndarray:
    """compute the $S^\ast$ matrix used in the second step of the MDE

    Args:
        muhat: the observed `Matching`
        hessians_both: the Hessian of the entropy function
    """
    var_muhat = variance_muhat(muhat)
    var_munm = var_muhat.var_munm
    var_entropy_gradient = hessians_both @ var_munm @ hessians_both.T
    if no_singles:
        if D2_mat is None:
            bs_error_abort("D2_mat should not be None when no_singles is True")
        else:
            var_entropy_gradient = D2_mat @ var_entropy_gradient @ D2_mat.T
    S_mat = spla.inv(var_entropy_gradient)
    return cast(np.ndarray, S_mat)

make_D2_matrix(X, Y)

create the double difference matrix for use w/o singles

Parameters:

Name Type Description Default
X int

number of types of men

required
Y int

number of types of women

required

Returns:

Type Description
tuple[np.ndarray, int]

an (r, XY) matrix and its rank r

Source code in cupid_matching/min_distance_utils.py
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
def make_D2_matrix(X: int, Y: int) -> tuple[np.ndarray, int]:
    """create the double difference matrix for use w/o singles

    Args:
        X: number of types of men
        Y: number of types of women

    Returns:
        an (r, XY) matrix and  its rank r
    """
    XY = X * Y
    D2_mat = np.ones((XY, XY)) / XY + np.eye(XY)
    for x in range(X):
        slice_x = slice(x * Y, x * Y + Y)
        D2_mat[slice_x, slice_x] -= 1.0 / Y
    for y in range(Y):
        slice_y = slice(y, XY, Y)
        D2_mat[slice_y, slice_y] -= 1.0 / X
    rank_D2 = np.linalg.matrix_rank(D2_mat)
    rng = np.random.default_rng(453)
    A_mat = rng.uniform(size=(rank_D2, XY))
    D2_mat = A_mat @ D2_mat
    rank_D2 = np.linalg.matrix_rank(D2_mat)
    print(f"\nThe rank of the double difference matrix D2 is {rank_D2}.\n")
    return D2_mat, rank_D2

make_hessian_mde(hessian_components_mumu, hessian_components_mur)

reconstitute the Hessian of the entropy function from its components

Parameters:

Name Type Description Default
hessian_components_mumu ThreeArrays

the components of the Hesssian wrt \((\mu,\mu)\)

required
hessian_components_mur TwoArrays

the components of the Hesssian wrt \((\mu,r)\)

required

Returns:

Type Description
np.ndarray

np.ndarray: description

Source code in cupid_matching/min_distance_utils.py
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
def make_hessian_mde(
    hessian_components_mumu: ThreeArrays, hessian_components_mur: TwoArrays
) -> np.ndarray:
    """reconstitute the Hessian of the entropy function from its components

    Args:
        hessian_components_mumu:  the components of the Hesssian wrt $(\\mu,\\mu)$
        hessian_components_mur: the components of the Hesssian wrt $(\\mu,r)$

    Returns:
        np.ndarray: _description_
    """
    hessian_mumu = fill_hessianMuMu_from_components(hessian_components_mumu)
    hessian_mur = fill_hessianMuR_from_components(hessian_components_mur)
    hessians_both = np.concatenate((hessian_mumu, hessian_mur), axis=1)
    return cast(np.ndarray, hessians_both)