diff --git a/pipelines/smap_convert_l3.py b/pipelines/smap_convert_l3.py index 390dc0866..de133475a 100644 --- a/pipelines/smap_convert_l3.py +++ b/pipelines/smap_convert_l3.py @@ -40,96 +40,103 @@ # written by Qing Liu, Arthur Endsley and Karyn Tabor # last edited November 30, 2022 by Karyn Tabor - -from absl import app -from osgeo import gdal +import copy import glob import os import re + +from absl import app import numpy as np -from pyl4c.lib.modis import dec2bin_unpack -from pyl4c.spatial import array_to_raster +from osgeo import gdal from pyl4c.data.fixtures import EASE2_GRID_PARAMS from pyl4c.epsg import EPSG +from pyl4c.lib.modis import dec2bin_unpack +from pyl4c.spatial import array_to_raster from google3.third_party.earthengine_catalog.pipelines import smap_lib # For the L3 data, the var list is very specific; the order matters # for retrieving specific datasets -# 1:4 are data sets; 4 is SM QA; 5:6 is TB QA +# 1:4 are data sets; 4 is SM QC; 5:6 is TB QC VAR_LIST = [ 'soil_moisture', 'tb_h_corrected', 'tb_v_corrected', 'vegetation_water_content', 'retrieval_qual_flag', 'tb_qual_flag_h', 'tb_qual_flag_v' ] +# Nodata value to use in uint8 QC bands. The raw values of these bands may +# differ (e.g., uint16), so when that happens we change the nodata value to one +# that will fit in a byte. +QC_NODATA = 255 -def SMAP_retrievalQC_fail(x): - """Returns pass/fail for QC flags. - Returns pass/fail for QC flags based on the SMAP l3 QC protocol for the +def SMAP_retrievalQC_pass_fail(raw_qc, raw_nodata): + """Returns pass/fail for QC flags. - `retrieval_qual_flag` band: Bad pixels have either `1` in the first bit - ("Soil - moisture retrieval doesn't have recommended quality") or 1` in the third bit - (Soil moisture retrieval was not successful"). - Output array is True wherever the array fails QC criteria. Compare to: + Checks are based on the SMAP L3 QC protocol for the `retrieval_qual_flag` + band: Bad pixels have either `1` in the first bit (Soil moisture retrieval + doesn't have recommended quality) or in the third bit (Soil moisture + retrieval was not successful). - np.vectorize(lambda v: v[0] == 1 or v[3:5] != '00') + Output array is True wherever the array fails QC criteria or has no data. Parameters ---------- - x : numpy.ndarray - Array where the last axis enumerates the unpacked bits - (ones and zeros) - example: c = dec2bin_unpack(np.array([2], dtype = np.uint8)) + raw_qc : numpy.ndarray : The raw QC flags taken from the source file. + Values will be cast to uint8 before checking individual bits. + raw_nodata : int or float : The numeric nodata value for `raw_qc`. We will + change the values to QC_NODATA before casting to uint8. + Returns ------- - numpy.ndarray - Boolean array with True wherever QC criteria are failed - example Returns: array([[0, 0, 0, 0, 0, 0, 1, 0]], dtype=uint8) + numpy.ndarray : Boolean array with True wherever QC criteria are failed. """ - y = dec2bin_unpack(x.astype(np.uint8)) + # We need to change nodata values from the original array so they'll fit in a + # uint8. We don't simply check for qc > QC_NODATA because then we would + # incorrectly allow undefined bits to contribute to the QC value. + qc = np.array(raw_qc) + qc[qc == raw_nodata] = QC_NODATA + bits = dec2bin_unpack(qc.astype(np.uint8)) # Emit 1 = FAIL if 1st bit == 1 # ("Soil moisture retrieval doesn't have recommended quality") - c1 = y[...,7] - # + check1 = bits[...,7] # Third bit is ==1 "Soil moisture retrieval was not successful" - c2 = y[...,5] - # + check2 = bits[...,5] # Intermediate arrays are 1 = FAIL, 0 = PASS - return (c1 + c2) > 0 + return (check1 + check2) > 0 -def SMAP_TB_QC_fail(x): +def SMAP_TB_QC_pass_fail(raw_qc, raw_nodata): """Returns pass/fail for QC flags. - Returns pass/fail for QC flags based on the SMAPL3 QC protocol for the - - `tb_qual_flag_v'` and tb_qual_flag_h' layers: Bad pixels have either `1` in - the first bit - ("Observation does not have acceptable quality") - Output array is True wherever the array fails QC criteria. Compare to: + Checks are based on the SMAP L3 QC protocol for the `tb_qual_flag_v` and + `tb_qual_flag_h` layers: Bad pixels have a `1` in the first bit ("Observation + does not have acceptable quality"). - np.vectorize(lambda v: v[0] == 1 or v[3:5] != '00') + Output array is True wherever the array fails QC criteria or has no data. Parameters ---------- - x : numpy.ndarray - Array where the last axis enumerates the unpacked bits - (ones and zeros) + raw_qc : numpy.ndarray : The raw QC flags taken from the source file. + Values will be cast to uint8 before checking individual bits. + raw_nodata : int or float : The numeric nodata value for `raw_qc`. We will + change the values to QC_NODATA before casting to uint8. Returns - ------- - numpy.ndarray - Boolean array with True wherever QC criteria are failed + ------- + numpy.ndarray : Boolean array with True wherever QC criteria are failed. """ - y = dec2bin_unpack(x.astype(np.uint8)) + # We need to change nodata values from the original array so they'll fit in a + # uint8. We don't simply check for qc > QC_NODATA because then we would + # incorrectly allow undefined bits to contribute to the QC value. + qc = np.array(raw_qc) + qc[qc == raw_nodata] = QC_NODATA + bits = dec2bin_unpack(qc.astype(np.uint8)) # Emit 1 = FAIL if 1st bit == 1 ("Data has acceptable quality") - c1 = y[...,7] + check1 = bits[...,7] # Intermediate arrays are 1 = FAIL, 0 = PASS - return (c1) > 0 + return check1 > 0 def convert(source_h5, target_tif): @@ -142,94 +149,86 @@ def convert(source_h5, target_tif): outputBounds=[-17367530.45, 7314540.11, 17367530.45, -7314540.11], ) + # Base options for gdal.Warp. The destination nodata value will change for + # each band. + creation_options = ['COMPRESS=LZW'] + warp_options_kwargs = { + 'creationOptions': creation_options, + 'srcSRS': 'EPSG:6933', + 'dstSRS': 'EPSG:4326', + 'xRes': 0.08, + 'yRes': 0.08, + } + # array_to_raster params gt = EASE2_GRID_PARAMS['M09']['geotransform'] wkt = EPSG[6933] - tif_list =[] - - SMAP_opass = ['AM','PM'] - for i in range(0,2): - print('SMAP overpass is %s' % (SMAP_opass[i])) - if i == 1: - # add '_pm' to all the variables in the list - pass_var_list = [s + '_pm' for s in VAR_LIST] - bnum=8 - else: - pass_var_list = VAR_LIST - bnum = 1 - - # 'bnum' is the number to add to band to number the temp tiff - # band 1 for am and 7 for pm - - # convert individual variables to separate GeoTiff files - for iband in range(0,4): - var = pass_var_list[iband] - sds = smap_lib.gdal_safe_open( - 'HDF5:'+source_h5+'://Soil_Moisture_Retrieval_Data_'+SMAP_opass[i]+ - '/'+var) - sds_array = sds.ReadAsArray() - dst_tmp = '/tmp/tmp'+str(iband+bnum)+'.tif' - sds_gdal = array_to_raster(sds_array,gt,wkt) - ds = gdal.Translate(dst_tmp,sds_gdal,options=translate_options) - ds = None - tif_list.append(dst_tmp) + translated_tif_list = [] + warped_tif_list = [] - # convert individual QA vars to separate GeoTiff files for Soil moisture - iband=4 - var = pass_var_list[iband] - - sds = smap_lib.gdal_safe_open( - 'HDF5:'+source_h5+'://Soil_Moisture_Retrieval_Data_'+SMAP_opass[i]+ - '/'+var) - sds_array = sds.ReadAsArray() - dst_tmp = '/tmp/tmp'+str(iband+bnum)+'.tif' - - # Call to QA function here - qa = SMAP_retrievalQC_fail(sds_array).astype(np.uint8) - qa_gdal = array_to_raster(qa,gt,wkt) - ds = gdal.Translate(dst_tmp,qa_gdal,options=translate_options) - ds = None - tif_list.append(dst_tmp) - - # convert individual QA vars to separate GeoTiff files for TB - - for iband in range(5,7): - var = pass_var_list[iband] + for smap_opass in ('AM', 'PM'): + pass_var_list = [ + s + ('_pm' if smap_opass == 'PM' else '') for s in VAR_LIST + ] + # Convert individual variables to separate GeoTiff files + for band_index, var in enumerate(pass_var_list): sds = smap_lib.gdal_safe_open( - 'HDF5:'+source_h5+'://Soil_Moisture_Retrieval_Data_'+SMAP_opass[i]+ - '/'+var) - sds_array = sds.ReadAsArray() - dst_tmp = '/tmp/tmp'+str(iband+bnum)+'.tif' - - # Call to QA function here - qa = SMAP_TB_QC_fail(sds_array).astype(np.uint8) - qa_gdal = array_to_raster(qa,gt,wkt) - ds = gdal.Translate(dst_tmp,qa_gdal,options=translate_options) + f'HDF5:{source_h5}://Soil_Moisture_Retrieval_Data_{smap_opass}/{var}' + ) + sds_band = sds.GetRasterBand(1) + sds_array = sds_band.ReadAsArray() + nodata_value = sds_band.GetNoDataValue() + + if band_index < 4: + # The first four bands are soil moisture observations. + sds_gdal = array_to_raster(sds_array, gt, wkt, nodata=nodata_value) + else: + # The last three are QC bands. + qa = sds_array + if band_index == 4: + qa = SMAP_retrievalQC_pass_fail(sds_array, nodata_value) + else: + qa = SMAP_TB_QC_pass_fail(sds_array, nodata_value) + # We get back boolean arrays that we cast to uint8. We must override the + # default nodata value or this method will incorrectly clamp it to 0. + sds_gdal = array_to_raster( + qa, gt, wkt, dtype=np.uint8, nodata=QC_NODATA + ) + nodata_value = QC_NODATA + + translated_tmp = f'/tmp/tmp_translated{len(translated_tif_list)}.tif' + ds = gdal.Translate(translated_tmp, sds_gdal, options=translate_options) + ds = None + translated_tif_list.append(translated_tmp) + + warped_tmp = f'/tmp/tmp_warped{len(warped_tif_list)}.tif' + this_warp_options_kwargs = copy.deepcopy(warp_options_kwargs) + this_warp_options_kwargs['dstNodata'] = nodata_value + ds = gdal.Warp( + warped_tmp, + translated_tmp, + options=gdal.WarpOptions(**this_warp_options_kwargs), + ) ds = None - tif_list.append(dst_tmp) + warped_tif_list.append(warped_tmp) - # build a VRT(Virtual Dataset) that includes the list of input tif files + # Build a VRT (Virtual Dataset) that includes the list of input tif files and + # use that to produce a single tif. It is safe to ignore the warning: "Unable + # to export GeoTIFF file with different datatypes per different bands. All + # bands should have the same types in TIFF." It actually exports just fine. + # TODO(simonf): Remove this step and instead return the individual tifs. tmp_vrt = '/tmp/tmp.vrt' - gdal.BuildVRT(tmp_vrt, tif_list, options='-separate') - - warp_options = gdal.WarpOptions( - creationOptions=['COMPRESS=LZW'], - srcSRS='EPSG:6933', - dstSRS='EPSG:4326', - xRes=0.08, - yRes=0.08, - srcNodata=-9999, - dstNodata=-9999 - ) - - # run gdal_Warp to reproject the VRT data and save in the target tif file with - # compression - ds = gdal.Warp(target_tif, tmp_vrt, options=warp_options) + gdal.BuildVRT(tmp_vrt, warped_tif_list, options='-separate') + ds = gdal.Translate( + target_tif, + tmp_vrt, + options=gdal.TranslateOptions(creationOptions=['COMPRESS=LZW']), + ) ds = None # remove temporary files - for f in [tmp_vrt] + tif_list: + for f in [tmp_vrt] + translated_tif_list + warped_tif_list: if os.path.exists(f): os.remove(f) diff --git a/pipelines/smap_convert_l4.py b/pipelines/smap_convert_l4.py index 7ccc291cb..efaf6918f 100644 --- a/pipelines/smap_convert_l4.py +++ b/pipelines/smap_convert_l4.py @@ -145,7 +145,6 @@ def convert(source_h5, target_tif): creationOptions=['COMPRESS=LZW'], srcSRS='EPSG:6933', dstSRS='EPSG:4326', - srcNodata=-9999, dstNodata=-9999 )