`poisson_glm` module¶

Estimates the semilinear Choo and Siow homoskedastic (2006) model using Poisson GLM.

`choo_siow_poisson_glm(muhat, phi_bases, no_singles=False, tol=1e-12, max_iter=10000, verbose=1)` ¶

Estimates the semilinear Choo and Siow homoskedastic (2006) model using Poisson GLM.

Parameters:

Name	Type	Description	Default
`muhat`	`Matching`	the observed Matching	required
`phi_bases`	`np.ndarray`	an (X, Y, K) array of bases	required
`no_singles`	`bool`	if True, we do not observe the singles	`False`
`tol`	`float \| None`	tolerance level for `linear_model.PoissonRegressor.fit`	`1e-12`
`max_iter`	`int \| None`	maximum number of iterations for `linear_model.PoissonRegressor.fit`	`10000`
`verbose`	`int \| None`	defines how much output we want (0 = least)	`1`

Returns:

Type	Description
`PoissonGLMResults`	a `PoissonGLMResults` instance

Example

n_households = 1e6
X, Y, K = 4, 3, 6
# we setup a quadratic set of basis functions
phi_bases = np.zeros((X, Y, K))
phi_bases[:, :, 0] = 1
for x in range(X):
    phi_bases[x, :, 1] = x
    phi_bases[x, :, 3] = x * x
    for y in range(Y):
        phi_bases[x, y, 4] = x * y
for y in range(Y):
    phi_bases[:, y, 2] = y
    phi_bases[:, y, 5] = y * y

lambda_true = np.random.randn(K)
phi_bases = np.random.randn(X, Y, K)
Phi = phi_bases @ lambda_true

# we simulate a Choo and Siow sample from a population
#  with equal numbers of men and women of each type
n = np.ones(X)
m = np.ones(Y)
choo_siow_instance = ChooSiowPrimitives(Phi, n, m)
mus_sim = choo_siow_instance.simulate(n_households)
muxy_sim, mux0_sim, mu0y_sim, n_sim, m_sim = mus_sim.unpack()

results = choo_siow_poisson_glm(mus_sim, phi_bases)

# compare true and estimated parameters
results.print_results(
    lambda_true,
    u_true=-np.log(mux0_sim / n_sim),
    v_true=-np.log(mu0y_sim / m_sim)
)

Source code in cupid_matching/poisson_glm.py

def choo_siow_poisson_glm(
    muhat: Matching,
    phi_bases: np.ndarray,
    no_singles: bool = False,
    tol: float | None = 1e-12,
    max_iter: int | None = 10000,
    verbose: int | None = 1,
) -> PoissonGLMResults:
    """Estimates the semilinear Choo and Siow homoskedastic (2006) model
        using Poisson GLM.

    Args:
        muhat: the observed Matching
        phi_bases: an (X, Y, K) array of bases
        no_singles: if True, we do not observe the singles
        tol: tolerance level for `linear_model.PoissonRegressor.fit`
        max_iter: maximum number of iterations
            for `linear_model.PoissonRegressor.fit`
        verbose: defines how much output we want (0 = least)

    Returns:
        a `PoissonGLMResults` instance

    Example:
        ```py
        n_households = 1e6
        X, Y, K = 4, 3, 6
        # we setup a quadratic set of basis functions
        phi_bases = np.zeros((X, Y, K))
        phi_bases[:, :, 0] = 1
        for x in range(X):
            phi_bases[x, :, 1] = x
            phi_bases[x, :, 3] = x * x
            for y in range(Y):
                phi_bases[x, y, 4] = x * y
        for y in range(Y):
            phi_bases[:, y, 2] = y
            phi_bases[:, y, 5] = y * y

        lambda_true = np.random.randn(K)
        phi_bases = np.random.randn(X, Y, K)
        Phi = phi_bases @ lambda_true

        # we simulate a Choo and Siow sample from a population
        #  with equal numbers of men and women of each type
        n = np.ones(X)
        m = np.ones(Y)
        choo_siow_instance = ChooSiowPrimitives(Phi, n, m)
        mus_sim = choo_siow_instance.simulate(n_households)
        muxy_sim, mux0_sim, mu0y_sim, n_sim, m_sim = mus_sim.unpack()

        results = choo_siow_poisson_glm(mus_sim, phi_bases)

        # compare true and estimated parameters
        results.print_results(
            lambda_true,
            u_true=-np.log(mux0_sim / n_sim),
            v_true=-np.log(mu0y_sim / m_sim)
        )
        ```

    """
    X, Y, K = phi_bases.shape
    XY = X * Y

    # the vector of weights for the Poisson regression
    w = (
        2 * np.ones(XY)
        if no_singles
        else np.concatenate((2 * np.ones(XY), np.ones(X + Y)))
    )
    # reshape the bases
    phi_mat = make_XY_K_mat(phi_bases)

    id_X = np.eye(X)
    id_Y = np.eye(Y)
    ones_X = np.ones((X, 1))
    ones_Y = np.ones((Y, 1))
    if no_singles:
        Z_unweighted = np.hstack(
            [-np.kron(id_X, ones_Y), -np.kron(ones_X, id_Y), phi_mat]
        )
        # we need to normalize u_1 = 0, so we delete the first column
        Z_unweighted = Z_unweighted[:, 1:]
    else:
        zeros_XK = np.zeros((X, K))
        zeros_YK = np.zeros((Y, K))
        zeros_XY = np.zeros((X, Y))
        zeros_YX = np.zeros((Y, X))
        Z_unweighted = np.vstack(
            [
                np.hstack([-np.kron(id_X, ones_Y), -np.kron(ones_X, id_Y), phi_mat]),
                np.hstack([-id_X, zeros_XY, zeros_XK]),
                np.hstack([zeros_YX, -id_Y, zeros_YK]),
            ]
        )
    Z = Z_unweighted / w.reshape((-1, 1))

    var_muhat = variance_muhat(muhat)
    (
        muhat_norm,
        var_muhat_norm,
        n_households,
        n_individuals,
    ) = prepare_data(muhat, var_muhat, no_singles=no_singles)

    clf = linear_model.PoissonRegressor(
        fit_intercept=False,
        tol=tol,
        verbose=verbose,
        alpha=0,
        max_iter=max_iter,
    )
    if no_singles:
        muxyhat_norm = muhat_norm[:XY]
        clf.fit(Z, muxyhat_norm, sample_weight=w)
    else:
        clf.fit(Z, muhat_norm, sample_weight=w)
    gamma_est = clf.coef_

    # we compute_ the variance-covariance of the estimator
    var_allmus_norm = var_muhat_norm.var_allmus
    var_norm = var_allmus_norm[:XY, :XY] if no_singles else var_allmus_norm
    nr, nc = Z.shape
    exp_Zg = np.exp(Z @ gamma_est).reshape(nr)
    A_hat = np.zeros((nc, nc))
    B_hat = np.zeros((nc, nc))
    for i in range(nr):
        Zi = Z[i, :]
        wi = w[i]
        A_hat += wi * exp_Zg[i] * np.outer(Zi, Zi)
        for j in range(nr):
            Zj = Z[j, :]
            B_hat += wi * w[j] * var_norm[i, j] * np.outer(Zi, Zj)

    A_inv = spla.inv(A_hat)
    varcov_gamma = A_inv @ B_hat @ A_inv
    stderrs_gamma = np.sqrt(np.diag(varcov_gamma))

    beta_est = gamma_est[-K:]
    varcov_beta = varcov_gamma[-K:, -K:]
    beta_std = stderrs_gamma[-K:]
    Phi_est = phi_bases @ beta_est

    # we correct for the effect of the normalization
    _, _, _, n, m = muhat.unpack()
    n_norm = n / n_individuals
    m_norm = m / n_individuals
    if no_singles:
        u_est = gamma_est[: (X - 1)]
        v_est = gamma_est[(X - 1) : -K]
        # normalize u_1 = 0
        n_0 = n_norm[0]
        u_est = np.concatenate((np.zeros(1), u_est + np.log(n_norm[1:] / n_0)))
        v_est += np.log(m_norm * n_0)
    else:
        u_est = gamma_est[:X] + np.log(n_norm)
        v_est = gamma_est[X:-K] + np.log(m_norm)

    # since u and v are translated from gamma we need to adjust the estimated stderrs
    A_inv_Z = A_inv @ Z_unweighted.T
    if no_singles:
        u_std = _stderrs_u_no_singles(
            varcov_gamma, n_norm, var_muhat_norm, A_inv_Z, X, Y
        )
        v_std = _stderrs_v_no_singles(
            varcov_gamma, m_norm, n_norm, var_muhat_norm, A_inv_Z, X, Y
        )
    else:
        u_std = _stderrs_u(varcov_gamma, n_norm, var_muhat_norm, A_inv_Z, X, Y)
        v_std = _stderrs_v(varcov_gamma, m_norm, var_muhat_norm, A_inv_Z, X, Y)

    results = PoissonGLMResults(
        X=X,
        Y=Y,
        K=K,
        number_households=n_households,
        number_individuals=n_individuals,
        estimated_gamma=gamma_est,
        estimated_Phi=Phi_est,
        estimated_beta=beta_est,
        estimated_u=u_est,
        estimated_v=v_est,
        varcov_gamma=varcov_gamma,
        varcov_beta=varcov_beta,
        stderrs_gamma=stderrs_gamma,
        stderrs_beta=beta_std,
        stderrs_u=u_std,
        stderrs_v=v_std,
    )

    return results

poisson_glm module¶

choo_siow_poisson_glm(muhat, phi_bases, no_singles=False, tol=1e-12, max_iter=10000, verbose=1) ¶

`poisson_glm` module¶

`choo_siow_poisson_glm(muhat, phi_bases, no_singles=False, tol=1e-12, max_iter=10000, verbose=1)` ¶