diff --git a/src/glum/_glm.py b/src/glum/_glm.py index 20c5b3054..66196684d 100644 --- a/src/glum/_glm.py +++ b/src/glum/_glm.py @@ -285,6 +285,9 @@ def _convert_from_pandas( return X + # expose the method as public so can create predictions outside of class + convert_from_pandas = _convert_from_pandas + def _set_up_for_fit(self, y: np.ndarray) -> None: ####################################################################### # 1. input validation # diff --git a/src/glum/_glm_cv.py b/src/glum/_glm_cv.py index f4e15f8ce..fa542b838 100644 --- a/src/glum/_glm_cv.py +++ b/src/glum/_glm_cv.py @@ -543,7 +543,7 @@ def fit( _stype = ["csc"] else: _stype = ["csc", "csr"] - + def _fit_path( self, train_idx, @@ -571,6 +571,8 @@ def _fit_path( y[test_idx], sample_weight[test_idx], ) + # test weights need to sum to 1 too, else deviance is not properly scaled + w_test /= w_test.sum() if offset is not None: offset_train = offset[train_idx] @@ -667,8 +669,8 @@ def _get_deviance(coef): ) deviance_path_ = [_get_deviance(_coef) for _coef in coef_path_] - return intercept_path_, coef_path_, deviance_path_ - + return intercept_path_, coef_path_, deviance_path_, train_idx + jobs = ( joblib.delayed(_fit_path)( self, @@ -706,6 +708,8 @@ def _get_deviance(coef): (cv.get_n_splits(), len(l1_ratio), len(alphas[0])), ) + self.train_indices_ = [elmt[3] for elmt in paths_data] + avg_deviance = self.deviance_path_.mean(axis=0) # type: ignore best_l1, best_alpha = np.unravel_index(