Skip to content

poisson_glm_utils module

Utilities for Poisson GLM.

PoissonGLMResults dataclass

Stores and formats the estimation results.

Parameters:

Name Type Description Default
X int

int

required
Y int

int

required
K int

int

required
number_households int

int

required
number_individuals int

int

required
estimated_gamma np.ndarray

np.ndarray

required
varcov_gamma np.ndarray

np.ndarray

required
stderrs_gamma np.ndarray

np.ndarray

required
estimated_beta np.ndarray

np.ndarray

required
estimated_u np.ndarray

np.ndarray

required
estimated_v np.ndarray

np.ndarray

required
varcov_beta np.ndarray

np.ndarray

required
stderrs_beta np.ndarray

np.ndarray

required
stderrs_u np.ndarray

np.ndarray

required
stderrs_v np.ndarray

np.ndarray

required
estimated_Phi np.ndarray

np.ndarray

required
Source code in cupid_matching/poisson_glm_utils.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
@dataclass
class PoissonGLMResults:
    """Stores and formats the estimation results.

    Args:
        X: int
        Y: int
        K: int
        number_households: int
        number_individuals: int
        estimated_gamma: np.ndarray
        varcov_gamma: np.ndarray
        stderrs_gamma: np.ndarray
        estimated_beta: np.ndarray
        estimated_u: np.ndarray
        estimated_v: np.ndarray
        varcov_beta: np.ndarray
        stderrs_beta: np.ndarray
        stderrs_u: np.ndarray
        stderrs_v: np.ndarray
        estimated_Phi: np.ndarray
    """

    X: int
    Y: int
    K: int
    number_households: int
    number_individuals: int
    estimated_gamma: np.ndarray
    varcov_gamma: np.ndarray
    stderrs_gamma: np.ndarray
    estimated_beta: np.ndarray
    varcov_beta: np.ndarray
    estimated_u: np.ndarray
    estimated_v: np.ndarray
    stderrs_beta: np.ndarray
    stderrs_u: np.ndarray
    stderrs_v: np.ndarray
    estimated_Phi: np.ndarray

    def __str__(self):
        line_stars = "*" * 80 + "\n"
        print_stars("Estimating a Choo and Siow model by Poisson GLM.")
        model_str = f"The data has {self.number_households} households\n\n"
        model_str += f"We use {self.K} basis functions.\n\n"
        repr_str = line_stars + model_str
        repr_str += (
            "The estimated basis coefficients (and their standard errors) are\n\n"
        )
        for i in range(self.K):
            repr_str += (
                f"   base_{i + 1}: {self.estimated_beta[i]: > 10.3f}  "
                + f"({self.stderrs_beta[i]: .3f})\n"
            )
        repr_str += "The estimated utilities of men (and their standard errors) are\n\n"
        for i in range(self.X):
            repr_str += (
                f"   u_{i + 1}: {self.estimated_u[i]: > 10.3f}  "
                + f"({self.stderrs_u[i]: .3f})\n"
            )
        repr_str += (
            "The estimated utilities of women (and their standard errors) are\n\n"
        )
        for i in range(self.Y):
            repr_str += (
                f"   v {i + 1}: {self.estimated_v[i]: > 10.3f}  "
                + f"({self.stderrs_v[i]: .3f})\n"
            )
        return repr_str + line_stars

    def print_results(
        self,
        lambda_true: np.ndarray | None = None,
        u_true: np.ndarray | None = None,
        v_true: np.ndarray | None = None,
    ) -> float | None:
        estimates_beta = self.estimated_beta
        stderrs_beta = self.stderrs_beta

        if lambda_true is None:
            repr_str = "The  estimated coefficients "
            repr_str += "(and their standard errors) are\n\n"
            for i, coeff in enumerate(estimates_beta):
                repr_str += f" {coeff: > 10.3f}  ({stderrs_beta[i]: > 10.3f})\n"
            print_stars(repr_str)
        else:
            repr_str = "The  true and estimated coefficients "
            repr_str += "(and their standard errors) are\n\n"
            for i, coeff in enumerate(estimates_beta):
                repr_str += f"   base {i + 1}: {lambda_true[i]: > 10.3f} "
                repr_str += f" {coeff: > 10.3f}  ({stderrs_beta[i]: > 10.3f})\n"
            print_stars(repr_str)

        self.report_utilities("men", u_true)
        self.report_utilities("women", v_true)

        if lambda_true is None:
            return None
        else:
            discrepancy = npmaxabs(lambda_true - estimates_beta)
            print_stars(
                "The largest difference between true and estimated coefficients is"
                f" {discrepancy: .2e}"
            )
            return cast(float, discrepancy)

    def report_utilities(self, gender: str, utils_true: np.ndarray | None) -> None:
        if gender not in ["men", "women"]:
            bs_error_abort(f"gender can only be 'men' or 'women', not {gender}")
        utils_estimates = self.estimated_u if gender == "men" else self.estimated_v
        utils_stderrs = self.stderrs_u if gender == "men" else self.stderrs_v
        util_prefix = "u" if gender == "men" else "v"
        if utils_true is None:
            repr_str = f"The estimated utilities for {gender} "
            repr_str += "(and their standard errors) are:\n\n"
            for i, coeff in enumerate(utils_estimates):
                repr_str += f"   {util_prefix}_{i + 1}: "
                repr_str += f" {coeff: > 10.3f}  ({utils_stderrs[i]: > 10.3f})\n"
            print_stars(repr_str)
        else:
            repr_str = f"The true and estimated utilities for {gender} "
            repr_str += "(and their standard errors) are:\n\n"
            for i, coeff in enumerate(utils_estimates):
                repr_str += f"   {util_prefix}_{i + 1}: {utils_true[i]: > 10.3f} "
                repr_str += f" {coeff: > 10.3f}  ({utils_stderrs[i]: > 10.3f})\n"
            print_stars(repr_str)

prepare_data(muhat, var_muhat, no_singles=False)

Normalizes the matching patterns and stacks them. We rescale the data so that the total number of individuals is one.

Parameters:

Name Type Description Default
muhat Matching

the observed Matching

required
var_muhat VarianceMatching

the variance-covariance object for the observed matching

required
no_singles bool

if True, we do not observe singles

False

Returns:

Type Description
np.ndarray

the stacked, normalized muxy, mux0, mu0y (the latter two are zero if no_singles)

VarianceMatching

the corresponding variance-covariance matrix

int

the number of households

int

the number of individuals

Source code in cupid_matching/poisson_glm_utils.py
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
def prepare_data(
    muhat: Matching,
    var_muhat: VarianceMatching,
    no_singles: bool = False,
) -> tuple[np.ndarray, VarianceMatching, int, int]:
    """Normalizes the matching patterns and stacks them.
    We rescale the data so that the total number of individuals is one.

    Args:
        muhat: the observed Matching
        var_muhat: the variance-covariance object for the observed matching
        no_singles: if True, we do not observe singles

    Returns:
        the stacked, normalized `muxy, mux0, mu0y` (the latter two are zero if `no_singles`)
        the corresponding variance-covariance matrix
        the number of households
        the number of individuals
    """
    muxy, mux0, mu0y, *_ = muhat.unpack()
    n_couples = np.sum(muxy)
    if no_singles:
        mux0 = np.zeros(mux0.shape)
        mu0y = np.zeros(mu0y.shape)
    n_households = n_couples + np.sum(mux0) + np.sum(mu0y)
    n_individuals = n_households + n_couples
    # rescale the data so that the total number of individuals is one
    muhat_norm = np.concatenate([muxy.flatten(), mux0, mu0y]) / n_individuals
    n_indivs2 = n_individuals * n_individuals
    var_muhat_norm = var_divide(var_muhat, n_indivs2)
    return (
        muhat_norm,
        var_muhat_norm,
        n_households,
        n_individuals,
    )