Updated LR and QR classifiers. Added sklearn import to avoid static …

…TLS ImportError. Improved code style of SpectralHomogenizer.interpolate_cube() and SpectralHomogenizer.predict(). Bugfix for also predicting spectral information for pixels that contain nodata in any band (causes faulty predictions). Bugfix for only choosing 25 spectra in classifier creation in case the maximum angle threshold is automatically set to 0 because there are many well matching spectra. Updated version info and HISTORY.rst. Signed-off-by: Daniel Scheffler <[email protected]>
GFZ · Nov 2, 2020 · 27a1613 · 27a1613
1 parent 633b1b7
commit 27a1613
Show file tree

Hide file tree

Showing 6 changed files with 50 additions and 20 deletions.
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -2,13 +2,20 @@
 History
 =======
 
-0.8.3 (coming soon)
--------------------
+0.9.0 (2020-11-02)
+------------------
 
 * Replaced deprecated 'source activate' by 'conda activate.'
 * Updated installation instructions.
 * Revised requirements.
 * Added doc, test, lint and dev requirements to optional requirements in setup.py.
+* Updated LR and QR classifiers.
+* Added sklearn import to avoid static TLS ImportError.
+* Improved code style of SpectralHomogenizer.interpolate_cube() and SpectralHomogenizer.predict().
+* Bugfix for also predicting spectral information for pixels that contain nodata in any band
+  (causes faulty predictions).
+* Bugfix for only choosing 25 spectra in classifier creation in case the maximum angle threshold is automatically
+  set to 0 because there are many well matching spectra.
 
 
 0.8.2 (2020-10-12)

diff --git a/spechomo/classifier_creation.py b/spechomo/classifier_creation.py
@@ -728,19 +728,19 @@ def _extract_best_spectra_from_cluster(self, clusterlabel, df_src_spectra_allclu
             max_angle = np.percentile(df_src_spectra.spectral_angle,
                                       int(max_angle.split('%')[0].strip()))
 
-        tmp = df_src_spectra[df_src_spectra.spectral_angle < max_angle]
+        tmp = df_src_spectra[df_src_spectra.spectral_angle <= max_angle]
         if len(tmp.index) > 10:
             df_src_spectra = tmp
         else:
-            df_src_spectra = df_src_spectra.sort_values(by='spectral_angle').head(25)
-            self.logger.warning('Had to choose spectra with SA up to %.2f degrees for cluster #%s.'
+            df_src_spectra = df_src_spectra.sort_values(by='spectral_angle').head(10)
+            self.logger.warning('Had to choose spectra with SA up to %.1f degrees for cluster #%s.'
                                 % (np.max(df_src_spectra['spectral_angle']), clusterlabel))
 
         if isinstance(max_distance, str):
             max_distance = np.percentile(df_src_spectra.spectral_distance,
                                          int(max_distance.split('%')[0].strip()))
 
-        tmp = df_src_spectra[df_src_spectra.spectral_distance < max_distance]
+        tmp = df_src_spectra[df_src_spectra.spectral_distance <= max_distance]
         if len(tmp.index) > 10:
             df_src_spectra = tmp
         else:

diff --git a/spechomo/prediction.py b/spechomo/prediction.py
@@ -78,15 +78,23 @@ def interpolate_cube(self, arrcube, source_CWLs, target_CWLs, kind='linear'):
         assert arrcube is not None,\
             'L2B_obj.interpolate_cube_linear expects a numpy array as input. Got %s.' % type(arrcube)
 
-        orig_CWLs, target_CWLs = np.array(source_CWLs), np.array(target_CWLs)
+        orig_CWLs = np.array(source_CWLs)
+        target_CWLs = np.array(target_CWLs)
 
         self.logger.info(
             'Performing spectral homogenization (%s interpolation) with target wavelength positions at %s nm.'
             % (kind, ', '.join(np.round(np.array(target_CWLs[:-1]), 1).astype(str)) +
                ' and %s' % np.round(target_CWLs[-1], 1)))
-        outarr = interp1d(np.array(orig_CWLs), arrcube, axis=2, kind=kind, fill_value='extrapolate')(target_CWLs)
+        outarr = \
+            interp1d(np.array(orig_CWLs),
+                     arrcube,
+                     axis=2,
+                     kind=kind,
+                     fill_value='extrapolate')(target_CWLs)
+
+        if np.min(outarr) >= np.iinfo(np.int16).min and \
+           np.max(outarr) <= np.iinfo(np.int16).max:
 
-        if np.min(outarr) >= np.iinfo(np.int16).min and np.max(outarr) <= np.iinfo(np.int16).max:
             outarr = outarr.astype(np.int16)
         elif np.min(outarr) >= np.iinfo(np.int32).min and np.max(outarr) <= np.iinfo(np.int32).max:
             outarr = outarr.astype(np.int32)
@@ -442,9 +450,13 @@ def predict(self, image, classifier, in_nodataVal=None, out_nodataVal=None, cmap
                                    geotransform=image.gt, projection=image.prj, nodata=out_nodataVal,
                                    bandnames=['B%s' % i if len(i) == 2 else 'B0%s' % i for i in classifier.tgt_LBA])
 
-        if classifier.n_clusters > 1 and self.classif_map.ndim > 2:
-            dist_min, dist_max = self.distance_metrics.min(), self.distance_metrics.max()
-            dist_norm = (self.distance_metrics - dist_min) / (dist_max - dist_min)
+        if classifier.n_clusters > 1 and\
+           self.classif_map.ndim > 2:
+
+            dist_min, dist_max = np.min(self.distance_metrics),\
+                                 np.max(self.distance_metrics)
+            dist_norm = (self.distance_metrics - dist_min) /\
+                        (dist_max - dist_min)
             weights = 1 - dist_norm
         else:
             weights = None
@@ -481,9 +493,15 @@ def predict(self, image, classifier, in_nodataVal=None, out_nodataVal=None, cmap
 
             # set saturated pixels (exceeding the output data range with respect to the data type) to no-data
             if isinstance(image_predicted.dtype, np.integer):
-                out_dTMin, out_dTMax = np.iinfo(image_predicted.dtype).min, np.iinfo(image_predicted.dtype).max
-                if im_tile_pred.min() < out_dTMin or im_tile_pred.max() > out_dTMax:
-                    mask_saturated = np.any(im_tile_pred > out_dTMax | im_tile_pred < out_dTMin, axis=2)
+                out_dTMin, out_dTMax = np.iinfo(image_predicted.dtype).min,\
+                                       np.iinfo(image_predicted.dtype).max
+
+                if np.min(im_tile_pred) < out_dTMin or\
+                   np.max(im_tile_pred) > out_dTMax:
+
+                    mask_saturated = np.any(im_tile_pred > out_dTMax |
+                                            im_tile_pred < out_dTMin,
+                                            axis=2)
                     n_saturated_px += np.sum(mask_saturated)
                     im_tile_pred[mask_saturated] = out_nodataVal
 
@@ -517,7 +535,8 @@ def predict(self, image, classifier, in_nodataVal=None, out_nodataVal=None, cmap
 
         # re-apply nodata values to predicted result
         if image.nodata is not None:
-            image_predicted[image.mask_nodata[:] == 0] = out_nodataVal
+            mask_nodata = image.calc_mask_nodata(overwrite=True, flag='any')
+            image_predicted[~mask_nodata] = out_nodataVal
 
         # copy mask_nodata
         image_predicted.mask_nodata = image.mask_nodata

diff --git a/spechomo/utils.py b/spechomo/utils.py
@@ -220,8 +220,10 @@ def export_classifiers_as_JSON(export_rootDir,
 def download_pretrained_classifiers(method, tgt_dir=options['classifiers']['rootdir']):
     remote_filespecs = {
         '100k_conservrsp_SCA_SD100percSA90perc_without_aviris__SCADist90pSAM40p': {
-            'LR': 'https://nextcloud.gfz-potsdam.de/s/Rzb75kckBreFfNE/download',
-            'QR': 'https://nextcloud.gfz-potsdam.de/s/Kk4zoCXxAEkAFZL/download',
+            # 'LR': 'https://nextcloud.gfz-potsdam.de/s/Rzb75kckBreFfNE/download',  # 20201008
+            'LR': 'https://nextcloud.gfz-potsdam.de/s/mZEnS5g7AGWyRHB/download',
+            # 'QR': 'https://nextcloud.gfz-potsdam.de/s/Kk4zoCXxAEkAFZL/download',  # 20201008
+            'QR': 'https://nextcloud.gfz-potsdam.de/s/JcQDbZBtTiw9NYi/download',
         }
     }
     clf_name = options['classifiers']['name']

diff --git a/spechomo/version.py b/spechomo/version.py
@@ -24,5 +24,5 @@
 # You should have received a copy of the GNU Lesser General Public License along
 # with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-__version__ = '0.8.2'
-__versionalias__ = '20201012.01'
+__version__ = '0.9.0'
+__versionalias__ = '20201102.01'
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -25,3 +25,5 @@
 # with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 """Unit test package for spechomo."""
+
+import sklearn  # noqa # avoids a static TLS ImportError during runtime of SICOR (when importing sklearn there)