LSSTDESC · tallamjr · Jun 4, 2019 · May 8, 2019 · May 8, 2019 · May 8, 2019
diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,9 @@
 test/*
 !test/*.py
 
+# Do not track log files in utils
+utils/*stdout.txt
+
 ## Python.gitignore from Github.
 ##
 # Byte-compiled / optimized / DLL files

diff --git a/environment.yml b/environment.yml
@@ -11,17 +11,18 @@ dependencies:
   - jupyter>=1.0.0
   - matplotlib>=1.5.1
   - numpy=1.12.0
-  - scikit-learn=0.18.1
+  - scikit-learn>=0.20
   - scipy>=0.17.0
   - george>=0.3.0
   - iminuit>=1.2
   - pandas>=0.23.0
   - extinction>=0.3.0
+  - imbalanced-learn>=0.4.3
 
   - pip:
     - emcee>=2.1.0
     - numpydoc>=0.6.0
     - pywavelets>=0.4.0
-    - sncosmo>=1.3.0
+    - sncosmo==1.7.1
     - nose>=1.3.7
     - future>=0.16
diff --git a/snmachine/gps.py b/snmachine/gps.py
@@ -56,7 +56,7 @@ def compute_gps(dataset, number_gp, t_min, t_max, kernel_param=[500., 20.], outp
     output_root : {None, str}, optional
         If None, don't save anything. If str, it is the output directory, so save the flux and error estimates and used kernels there.
     number_processes : int, optional
-        Number of processors to use for parallelisation (shared memory only). By default `nprocesses` = 1.
+        Number of processors to use for parallelisation (shared memory only). By default `number_processes` = 1.
     gp_algo : str, optional
         which gp package is used for the Gaussian Process Regression, GaPP or george
     """
@@ -148,7 +148,7 @@ def _compute_gps_parallel(dataset, number_gp, t_min, t_max, kernel_param, output
     output_root : {None, str}, optional
         If None, don't save anything. If str, it is the output directory, so save the flux and error estimates and used kernels there.
     number_processes : int, optional
-        Number of processors to use for parallelisation (shared memory only). By default `nprocesses` = 1.
+        Number of processors to use for parallelisation (shared memory only). By default `number_processes` = 1.
     gp_algo : str, optional
         which gp package is used for the Gaussian Process Regression, GaPP or george
     """
@@ -413,4 +413,4 @@ def get_kernel(kernel_name, kernel_param):
     elif kernel_name == 'ExpSquared+ExpSine2':
         kExpSine2 = kernel_param[4]*george.kernels.ExpSine2Kernel(gamma=kernel_param[5],log_period=kernel_param[6])
         kernel = kExpSquared + kExpSine2
-    return kernel
+    return kernel
diff --git a/snmachine/snaugment.py b/snmachine/snaugment.py
@@ -123,7 +123,7 @@ def extract_proxy_features(self,peak_filter='desr',nproc=1,fit_salt2=False,salt2
             #tf=snfeatures.TemplateFeatures(sampler='leastsq')
             tf=snfeatures.TemplateFeatures(sampler=sampler)
             if salt2feats is None:
-                salt2feats=tf.extract_features(self.dataset,nprocesses=nproc,use_redshift=fix_redshift)
+                salt2feats=tf.extract_features(self.dataset,number_processes=nproc,use_redshift=fix_redshift)
 
             #fit models and extract r-peakmags
             peaklogflux=[]

diff --git a/snmachine/snclassifier.py b/snmachine/snclassifier.py
@@ -608,7 +608,7 @@ def __call_classifier(classifier, X_train, y_train, X_test, param_dict, return_c
 
 
 def run_pipeline(features, types, output_name='', columns=[], classifiers=['nb', 'knn', 'svm', 'neural_network', 'boost_dt'],
-                 training_set=0.3, param_dict={}, nprocesses=1, scale=True,
+                 training_set=0.3, param_dict={}, number_processes=1, scale=True,
                  plot_roc_curve=True, return_classifier=False, classifiers_for_cm_plots=[],
                  type_dict=None, seed=1234):
     """
@@ -632,7 +632,7 @@ def run_pipeline(features, types, output_name='', columns=[], classifiers=['nb',
         the ID's of the objects to be used
     param_dict : dict, optional
         Use to run different ranges of hyperparameters for the classifiers when optimising
-    nprocesses : int, optional
+    number_processes : int, optional
         Number of processors for multiprocessing (shared memory only). Each classifier will then be run in parallel.
     scale : bool, optional
         Rescale features using sklearn's preprocessing Scalar class (highly recommended this is True)
@@ -707,15 +707,15 @@ def run_pipeline(features, types, output_name='', columns=[], classifiers=['nb',
     probabilities = {}
     classifier_objects = {}
 
-    if nprocesses > 1 and return_classifier:
+    if number_processes > 1 and return_classifier:
         print("Due to limitations with python's multiprocessing module, classifier objects cannot be returned if " \
               "multiple processors are used. Continuing serially...")
         print()
 
-    if nprocesses > 1 and not return_classifier:
+    if number_processes > 1 and not return_classifier:
         partial_func=partial(__call_classifier, X_train=X_train, y_train=y_train, X_test=X_test,
                              param_dict=param_dict, return_classifier=False)
-        p = Pool(nprocesses, maxtasksperchild=1)
+        p = Pool(number_processes, maxtasksperchild=1)
         result = p.map(partial_func, classifiers)
 
         for i in range(len(result)):