12
12
13
13
14
14
class PropensityModel (metaclass = ABCMeta ):
15
- def __init__ (self , clip_bounds = (1e-3 , 1 - 1e-3 ), ** model_kwargs ):
15
+ def __init__ (self , clip_bounds = (1e-3 , 1 - 1e-3 ), calibrate = True , ** model_kwargs ):
16
16
"""
17
17
Args:
18
18
clip_bounds (tuple): lower and upper bounds for clipping propensity scores. Bounds should be implemented
19
19
such that: 0 < lower < upper < 1, to avoid division by zero in BaseRLearner.fit_predict() step.
20
+ calibrate (bool): whether calibrate the propensity score
20
21
model_kwargs: Keyword arguments to be passed to the underlying classification model.
21
22
"""
22
23
self .clip_bounds = clip_bounds
24
+ self .calibrate = calibrate
23
25
self .model_kwargs = model_kwargs
24
26
self .model = self ._model
27
+ self .calibrator = None
25
28
26
29
@property
27
30
@abstractmethod
@@ -40,6 +43,15 @@ def fit(self, X, y):
40
43
y (numpy.ndarray): a binary target vector
41
44
"""
42
45
self .model .fit (X , y )
46
+ if self .calibrate :
47
+ # Fit a calibrator to the propensity scores with IsotonicRegression.
48
+ # Ref: https://scikit-learn.org/stable/modules/isotonic.html
49
+ self .calibrator = IsotonicRegression (
50
+ out_of_bounds = "clip" ,
51
+ y_min = self .clip_bounds [0 ],
52
+ y_max = self .clip_bounds [1 ],
53
+ )
54
+ self .calibrator .fit (self .model .predict_proba (X )[:, 1 ], y )
43
55
44
56
def predict (self , X ):
45
57
"""
@@ -51,7 +63,11 @@ def predict(self, X):
51
63
Returns:
52
64
(numpy.ndarray): Propensity scores between 0 and 1.
53
65
"""
54
- return np .clip (self .model .predict_proba (X )[:, 1 ], * self .clip_bounds )
66
+ p = self .model .predict_proba (X )[:, 1 ]
67
+ if self .calibrate :
68
+ p = self .calibrator .transform (p )
69
+
70
+ return np .clip (p , * self .clip_bounds )
55
71
56
72
def fit_predict (self , X , y ):
57
73
"""
@@ -66,7 +82,6 @@ def fit_predict(self, X, y):
66
82
"""
67
83
self .fit (X , y )
68
84
propensity_scores = self .predict (X )
69
- logger .info ("AUC score: {:.6f}" .format (auc (y , propensity_scores )))
70
85
return propensity_scores
71
86
72
87
@@ -112,12 +127,15 @@ class GradientBoostedPropensityModel(PropensityModel):
112
127
https://xgboost.readthedocs.io/en/latest/python/python_api.html
113
128
"""
114
129
115
- def __init__ (self , early_stop = False , clip_bounds = (1e-3 , 1 - 1e-3 ), ** model_kwargs ):
130
+ def __init__ (
131
+ self ,
132
+ early_stop = False ,
133
+ clip_bounds = (1e-3 , 1 - 1e-3 ),
134
+ calibrate = True ,
135
+ ** model_kwargs ,
136
+ ):
116
137
self .early_stop = early_stop
117
-
118
- super (GradientBoostedPropensityModel , self ).__init__ (
119
- clip_bounds , ** model_kwargs
120
- )
138
+ super ().__init__ (clip_bounds , calibrate , ** model_kwargs )
121
139
122
140
@property
123
141
def _model (self ):
@@ -156,50 +174,25 @@ def fit(self, X, y, stop_val_size=0.2):
156
174
y_train ,
157
175
eval_set = [(X_val , y_val )],
158
176
)
177
+ if self .calibrate :
178
+ self .calibrator = IsotonicRegression (
179
+ out_of_bounds = "clip" ,
180
+ y_min = self .clip_bounds [0 ],
181
+ y_max = self .clip_bounds [1 ],
182
+ )
183
+ self .calibrator .fit (self .model .predict_proba (X )[:, 1 ], y )
159
184
else :
160
- super (GradientBoostedPropensityModel , self ).fit (X , y )
161
-
162
- def predict (self , X ):
163
- """
164
- Predict propensity scores.
165
-
166
- Args:
167
- X (numpy.ndarray): a feature matrix
168
-
169
- Returns:
170
- (numpy.ndarray): Propensity scores between 0 and 1.
171
- """
172
- if self .early_stop :
173
- return np .clip (
174
- self .model .predict_proba (X )[:, 1 ],
175
- * self .clip_bounds ,
176
- )
177
- else :
178
- return super (GradientBoostedPropensityModel , self ).predict (X )
179
-
180
-
181
- def calibrate (ps , treatment ):
182
- """Calibrate propensity scores with IsotonicRegression.
183
-
184
- Ref: https://scikit-learn.org/stable/modules/isotonic.html
185
-
186
- Args:
187
- ps (numpy.array): a propensity score vector
188
- treatment (numpy.array): a binary treatment vector (0: control, 1: treated)
189
-
190
- Returns:
191
- (numpy.array): a calibrated propensity score vector
192
- """
193
-
194
- two_eps = 2.0 * np .finfo (float ).eps
195
- pm_ir = IsotonicRegression (out_of_bounds = "clip" , y_min = two_eps , y_max = 1.0 - two_eps )
196
- ps_ir = pm_ir .fit_transform (ps , treatment )
197
-
198
- return ps_ir
185
+ super ().fit (X , y )
199
186
200
187
201
188
def compute_propensity_score (
202
- X , treatment , p_model = None , X_pred = None , treatment_pred = None , calibrate_p = True
189
+ X ,
190
+ treatment ,
191
+ p_model = None ,
192
+ X_pred = None ,
193
+ treatment_pred = None ,
194
+ calibrate_p = True ,
195
+ clip_bounds = (1e-3 , 1 - 1e-3 ),
203
196
):
204
197
"""Generate propensity score if user didn't provide and optionally calibrate.
205
198
@@ -210,16 +203,20 @@ def compute_propensity_score(
210
203
X_pred (np.matrix, optional): features for prediction
211
204
treatment_pred (np.array or pd.Series, optional): a treatment vector for prediciton
212
205
calibrate_p (bool, optional): whether calibrate the propensity score
206
+ clip_bounds (tuple, optional): lower and upper bounds for clipping propensity scores. Bounds should be implemented
207
+ such that: 0 < lower < upper < 1, to avoid division by zero in BaseRLearner.fit_predict() step.
213
208
214
209
Returns:
215
210
(tuple)
216
211
- p (numpy.ndarray): propensity score
217
- - p_model (PropensityModel): either the original p_model, a trained ElasticNetPropensityModel, or None if calibrate_p=True
212
+ - p_model (PropensityModel): either the original p_model or a trained ElasticNetPropensityModel
218
213
"""
219
214
if treatment_pred is None :
220
215
treatment_pred = treatment .copy ()
221
216
if p_model is None :
222
- p_model = ElasticNetPropensityModel ()
217
+ p_model = ElasticNetPropensityModel (
218
+ clip_bounds = clip_bounds , calibrate = calibrate_p
219
+ )
223
220
224
221
p_model .fit (X , treatment )
225
222
@@ -231,14 +228,4 @@ def compute_propensity_score(
231
228
logger .info ("predict_proba not available, using predict instead" )
232
229
p = p_model .predict (X_pred )
233
230
234
- if calibrate_p :
235
- logger .info ("Calibrating propensity scores. Returning p_model=None." )
236
- p = calibrate (p , treatment_pred )
237
- p_model = None
238
-
239
- # force the p values within the range
240
- eps = np .finfo (float ).eps
241
- p = np .where (p < 0 + eps , 0 + eps * 1.001 , p )
242
- p = np .where (p > 1 - eps , 1 - eps * 1.001 , p )
243
-
244
231
return p , p_model
0 commit comments