Skip to content

sklearn_utils module

Contains Lasso scikit-learn utility programs:

  • skl_npreg_lasso: Lasso regression on polynomial interactions of the covariates
  • plot_lasso_path: plots the Lasso coefficient paths.

plot_lasso_path(y, X, eps=0.001)

plot Lasso coefficient paths

Parameters:

Name Type Description Default
y ndarray

shape (nobs)

required
X ndarray

shape (nobs, nfeatures)

required
eps float

length of path

0.001

Returns:

Type Description
None

plots the paths.

Source code in bs_python_utils/sklearn_utils.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
def plot_lasso_path(y: np.ndarray, X: np.ndarray, eps: float = 1e-3) -> None:
    """
    plot Lasso coefficient paths

    Args:
        y:  shape `(nobs)`
        X: shape  `(nobs, nfeatures)`
        eps: length of path

    Returns:
        plots the paths.
    """
    # Compute paths
    print("Computing regularization path using the lasso...")
    alphas_lasso, coefs_lasso, _ = lasso_path(X, y, eps)

    plt.clf()
    # Display results
    plt.figure(1)
    colors = cycle(["b", "r", "g", "c", "k"])
    neg_log_alphas_lasso = -np.log10(alphas_lasso)
    for coef_l, c in zip(coefs_lasso, colors, strict=True):
        plt.plot(neg_log_alphas_lasso, coef_l, c=c)

    plt.xlabel("-Log(alpha)")
    plt.ylabel("coefficients")
    plt.title("Lasso Paths")
    plt.axis("tight")

    plt.show()

    return

skl_npreg_lasso(y, X, alpha, degree=4)

Lasso nonparametric regression of y over polynomials of X

Parameters:

Name Type Description Default
y ndarray

shape (nobs)

required
X ndarray

shape (nobs, nfeatures)

required
alpha float

Lasso penalty parameter

required
degree int

highest total degree

4

Returns:

Type Description
ndarray

the (nobs) array E(y\vert X) over the sample

Source code in bs_python_utils/sklearn_utils.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
def skl_npreg_lasso(
    y: np.ndarray, X: np.ndarray, alpha: float, degree: int = 4
) -> np.ndarray:
    """
    Lasso nonparametric regression of `y` over polynomials of `X`

    Args:
        y:  shape `(nobs)`
        X: shape  `(nobs, nfeatures)`
        alpha:  Lasso penalty parameter
        degree: highest total degree

    Returns:
        the `(nobs)` array `E(y\\vert X)` over the sample
    """

    # first scale the X variables
    stdsc = StandardScaler()
    sfit = stdsc.fit(X)
    X_scaled = sfit.transform(X)
    pf = PolynomialFeatures(degree)
    # Create the features and fit
    X_poly = pf.fit_transform(X_scaled)
    # now run Lasso
    reg = Lasso(alpha=alpha).fit(X_poly, y)
    expy_X = reg.predict(X_poly)
    return cast(np.ndarray, expy_X)