Skip to content

sklearn_utils module

Contains Lasso scikit-learn utility programs:

  • skl_npreg_lasso: Lasso regression on polynomial interactions of the covariates
  • plot_lasso_path: plots the Lasso coefficient paths.

plot_lasso_path(y, X, eps=0.001, *, standardize=True, ax=None)

Compute and plot the Lasso regularization path.

Parameters:

Name Type Description Default
y ndarray

Response vector of shape (n_obs,) (flattened automatically if necessary).

required
X ndarray

Feature matrix of shape (n_obs, n_features); 1-D inputs are reshaped.

required
eps float

Path length parameter passed to sklearn.linear_model.lasso_path.

0.001
standardize bool

When True (default) the predictors are standardized and the response is mean-centered before computing the path.

True
ax Axes | None

Optional Matplotlib Axes on which to draw; when None a new axes is created.

None

Returns:

Type Description
ndarray

A tuple (alphas, coefficients, axes) where alphas are the regularization

ndarray

strengths, coefficients has shape (n_features, n_alphas), and axes is the

Axes

Matplotlib axes used for plotting.

Source code in bs_python_utils/sklearn_utils.py
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
def plot_lasso_path(
    y: np.ndarray,
    X: np.ndarray,
    eps: float = 1e-3,
    *,
    standardize: bool = True,
    ax: Axes | None = None,
) -> tuple[np.ndarray, np.ndarray, Axes]:
    """Compute and plot the Lasso regularization path.

    Args:
        y: Response vector of shape ``(n_obs,)`` (flattened automatically if necessary).
        X: Feature matrix of shape ``(n_obs, n_features)``; 1-D inputs are reshaped.
        eps: Path length parameter passed to ``sklearn.linear_model.lasso_path``.
        standardize: When ``True`` (default) the predictors are standardized and the response
            is mean-centered before computing the path.
        ax: Optional Matplotlib ``Axes`` on which to draw; when ``None`` a new axes is created.

    Returns:
        A tuple ``(alphas, coefficients, axes)`` where ``alphas`` are the regularization
        strengths, ``coefficients`` has shape ``(n_features, n_alphas)``, and ``axes`` is the
        Matplotlib axes used for plotting.
    """
    if y.ndim != 1:
        y = y.reshape(-1)
    if X.ndim == 1:
        X = X.reshape(-1, 1)

    if standardize:
        scaler = StandardScaler()
        X_proc = scaler.fit_transform(X)
        y_proc = y - np.mean(y)
    else:
        X_proc, y_proc = X, y

    # Compute paths
    alphas_lasso, coefs_lasso, _ = lasso_path(X_proc, y_proc, eps=eps)
    neg_log_alphas_lasso = -np.log10(alphas_lasso)

    if ax is None:
        _, ax = plt.subplots()

    colors = cycle(["b", "r", "g", "c", "k", "m", "y"])
    for coef_l, c in zip(coefs_lasso, colors):
        ax.plot(neg_log_alphas_lasso, coef_l, c=c)

    ax.set_xlabel("-Log(alpha)")
    ax.set_ylabel("coefficients")
    ax.set_title("Lasso Paths")
    ax.axis("tight")

    return alphas_lasso, coefs_lasso, ax

skl_npreg_lasso(y, X, alpha, degree=4, *, include_bias=False, return_model=False, lasso_kwargs=None)

Fit a polynomial Lasso regression with standard preprocessing.

Inputs are reshaped if necessary and passed through a StandardScaler followed by PolynomialFeatures and Lasso. Extra keyword arguments can be forwarded to the Lasso estimator, and the fitted pipeline can optionally be returned.

Parameters:

Name Type Description Default
y ndarray

Response vector of shape (n_obs,) (flattened automatically if needed).

required
X ndarray

Feature matrix of shape (n_obs, n_features); 1-D inputs are reshaped.

required
alpha float

Lasso penalty parameter.

required
degree int

Total polynomial degree for PolynomialFeatures.

4
include_bias bool

When True, keep the bias column in the polynomial design matrix.

False
return_model bool

If True, also return the fitted scikit-learn Pipeline.

False
lasso_kwargs dict[str, Any] | None

Extra keyword arguments forwarded to sklearn.linear_model.Lasso.

None

Returns:

Type Description
ndarray | tuple[ndarray, Pipeline]

Either the fitted values E[y | X] (shape (n_obs,)) or a tuple

ndarray | tuple[ndarray, Pipeline]

(fitted, pipeline) when return_model is True.

Source code in bs_python_utils/sklearn_utils.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def skl_npreg_lasso(
    y: np.ndarray,
    X: np.ndarray,
    alpha: float,
    degree: int = 4,
    *,
    include_bias: bool = False,
    return_model: bool = False,
    lasso_kwargs: dict[str, Any] | None = None,
) -> np.ndarray | tuple[np.ndarray, Pipeline]:
    """Fit a polynomial Lasso regression with standard preprocessing.

    Inputs are reshaped if necessary and passed through a ``StandardScaler`` followed by
    ``PolynomialFeatures`` and ``Lasso``. Extra keyword arguments can be forwarded to the Lasso
    estimator, and the fitted pipeline can optionally be returned.

    Args:
        y: Response vector of shape ``(n_obs,)`` (flattened automatically if needed).
        X: Feature matrix of shape ``(n_obs, n_features)``; 1-D inputs are reshaped.
        alpha: Lasso penalty parameter.
        degree: Total polynomial degree for ``PolynomialFeatures``.
        include_bias: When ``True``, keep the bias column in the polynomial design matrix.
        return_model: If ``True``, also return the fitted scikit-learn ``Pipeline``.
        lasso_kwargs: Extra keyword arguments forwarded to ``sklearn.linear_model.Lasso``.

    Returns:
        Either the fitted values ``E[y | X]`` (shape ``(n_obs,)``) or a tuple
        ``(fitted, pipeline)`` when ``return_model`` is ``True``.
    """
    if y.ndim != 1:
        y = y.reshape(-1)
    if X.ndim == 1:
        X = X.reshape(-1, 1)

    extra_kwargs = {} if lasso_kwargs is None else dict(lasso_kwargs)
    extra_kwargs.setdefault("max_iter", 10_000)
    model = Pipeline(
        steps=[
            ("scaler", StandardScaler()),
            ("poly", PolynomialFeatures(degree=degree, include_bias=include_bias)),
            (
                "lasso",
                Lasso(alpha=alpha, **extra_kwargs),
            ),
        ]
    )
    model.fit(X, y)
    fitted = cast(np.ndarray, model.predict(X))
    if return_model:
        return fitted, model
    return fitted