Skip to content

Commit

Permalink
Update gdal warp options for SMAP geotiff conversion.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 720601911
  • Loading branch information
Dan Pencoske authored and copybara-github committed Jan 28, 2025
1 parent 4318d2a commit 0c9f806
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 122 deletions.
241 changes: 120 additions & 121 deletions pipelines/smap_convert_l3.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,96 +40,103 @@
# written by Qing Liu, Arthur Endsley and Karyn Tabor
# last edited November 30, 2022 by Karyn Tabor


from absl import app
from osgeo import gdal
import copy
import glob
import os
import re

from absl import app
import numpy as np
from pyl4c.lib.modis import dec2bin_unpack
from pyl4c.spatial import array_to_raster
from osgeo import gdal
from pyl4c.data.fixtures import EASE2_GRID_PARAMS
from pyl4c.epsg import EPSG
from pyl4c.lib.modis import dec2bin_unpack
from pyl4c.spatial import array_to_raster

from google3.third_party.earthengine_catalog.pipelines import smap_lib


# For the L3 data, the var list is very specific; the order matters
# for retrieving specific datasets
# 1:4 are data sets; 4 is SM QA; 5:6 is TB QA
# 1:4 are data sets; 4 is SM QC; 5:6 is TB QC
VAR_LIST = [
'soil_moisture', 'tb_h_corrected', 'tb_v_corrected',
'vegetation_water_content', 'retrieval_qual_flag', 'tb_qual_flag_h',
'tb_qual_flag_v'
]

# Nodata value to use in uint8 QC bands. The raw values of these bands may
# differ (e.g., uint16), so when that happens we change the nodata value to one
# that will fit in a byte.
QC_NODATA = 255

def SMAP_retrievalQC_fail(x):
"""Returns pass/fail for QC flags.

Returns pass/fail for QC flags based on the SMAP l3 QC protocol for the
def SMAP_retrievalQC_pass_fail(raw_qc, raw_nodata):
"""Returns pass/fail for QC flags.
`retrieval_qual_flag` band: Bad pixels have either `1` in the first bit
("Soil
moisture retrieval doesn't have recommended quality") or 1` in the third bit
(Soil moisture retrieval was not successful").
Output array is True wherever the array fails QC criteria. Compare to:
Checks are based on the SMAP L3 QC protocol for the `retrieval_qual_flag`
band: Bad pixels have either `1` in the first bit (Soil moisture retrieval
doesn't have recommended quality) or in the third bit (Soil moisture
retrieval was not successful).
np.vectorize(lambda v: v[0] == 1 or v[3:5] != '00')
Output array is True wherever the array fails QC criteria or has no data.
Parameters
----------
x : numpy.ndarray
Array where the last axis enumerates the unpacked bits
(ones and zeros)
example: c = dec2bin_unpack(np.array([2], dtype = np.uint8))
raw_qc : numpy.ndarray : The raw QC flags taken from the source file.
Values will be cast to uint8 before checking individual bits.
raw_nodata : int or float : The numeric nodata value for `raw_qc`. We will
change the values to QC_NODATA before casting to uint8.
Returns
-------
numpy.ndarray
Boolean array with True wherever QC criteria are failed
example Returns: array([[0, 0, 0, 0, 0, 0, 1, 0]], dtype=uint8)
numpy.ndarray : Boolean array with True wherever QC criteria are failed.
"""
y = dec2bin_unpack(x.astype(np.uint8))
# We need to change nodata values from the original array so they'll fit in a
# uint8. We don't simply check for qc > QC_NODATA because then we would
# incorrectly allow undefined bits to contribute to the QC value.
qc = np.array(raw_qc)
qc[qc == raw_nodata] = QC_NODATA
bits = dec2bin_unpack(qc.astype(np.uint8))
# Emit 1 = FAIL if 1st bit == 1
# ("Soil moisture retrieval doesn't have recommended quality")
c1 = y[...,7]
#
check1 = bits[...,7]
# Third bit is ==1 "Soil moisture retrieval was not successful"
c2 = y[...,5]
#
check2 = bits[...,5]
# Intermediate arrays are 1 = FAIL, 0 = PASS
return (c1 + c2) > 0
return (check1 + check2) > 0


def SMAP_TB_QC_fail(x):
def SMAP_TB_QC_pass_fail(raw_qc, raw_nodata):
"""Returns pass/fail for QC flags.
Returns pass/fail for QC flags based on the SMAPL3 QC protocol for the
`tb_qual_flag_v'` and tb_qual_flag_h' layers: Bad pixels have either `1` in
the first bit
("Observation does not have acceptable quality")
Output array is True wherever the array fails QC criteria. Compare to:
Checks are based on the SMAP L3 QC protocol for the `tb_qual_flag_v` and
`tb_qual_flag_h` layers: Bad pixels have a `1` in the first bit ("Observation
does not have acceptable quality").
np.vectorize(lambda v: v[0] == 1 or v[3:5] != '00')
Output array is True wherever the array fails QC criteria or has no data.
Parameters
----------
x : numpy.ndarray
Array where the last axis enumerates the unpacked bits
(ones and zeros)
raw_qc : numpy.ndarray : The raw QC flags taken from the source file.
Values will be cast to uint8 before checking individual bits.
raw_nodata : int or float : The numeric nodata value for `raw_qc`. We will
change the values to QC_NODATA before casting to uint8.
Returns
-------
numpy.ndarray
Boolean array with True wherever QC criteria are failed
-------
numpy.ndarray : Boolean array with True wherever QC criteria are failed.
"""
y = dec2bin_unpack(x.astype(np.uint8))
# We need to change nodata values from the original array so they'll fit in a
# uint8. We don't simply check for qc > QC_NODATA because then we would
# incorrectly allow undefined bits to contribute to the QC value.
qc = np.array(raw_qc)
qc[qc == raw_nodata] = QC_NODATA
bits = dec2bin_unpack(qc.astype(np.uint8))
# Emit 1 = FAIL if 1st bit == 1 ("Data has acceptable quality")
c1 = y[...,7]
check1 = bits[...,7]
# Intermediate arrays are 1 = FAIL, 0 = PASS
return (c1) > 0
return check1 > 0


def convert(source_h5, target_tif):
Expand All @@ -142,94 +149,86 @@ def convert(source_h5, target_tif):
outputBounds=[-17367530.45, 7314540.11, 17367530.45, -7314540.11],
)

# Base options for gdal.Warp. The destination nodata value will change for
# each band.
creation_options = ['COMPRESS=LZW']
warp_options_kwargs = {
'creationOptions': creation_options,
'srcSRS': 'EPSG:6933',
'dstSRS': 'EPSG:4326',
'xRes': 0.08,
'yRes': 0.08,
}

# array_to_raster params
gt = EASE2_GRID_PARAMS['M09']['geotransform']
wkt = EPSG[6933]
tif_list =[]

SMAP_opass = ['AM','PM']
for i in range(0,2):
print('SMAP overpass is %s' % (SMAP_opass[i]))
if i == 1:
# add '_pm' to all the variables in the list
pass_var_list = [s + '_pm' for s in VAR_LIST]
bnum=8
else:
pass_var_list = VAR_LIST
bnum = 1

# 'bnum' is the number to add to band to number the temp tiff
# band 1 for am and 7 for pm

# convert individual variables to separate GeoTiff files
for iband in range(0,4):
var = pass_var_list[iband]
sds = smap_lib.gdal_safe_open(
'HDF5:'+source_h5+'://Soil_Moisture_Retrieval_Data_'+SMAP_opass[i]+
'/'+var)
sds_array = sds.ReadAsArray()
dst_tmp = '/tmp/tmp'+str(iband+bnum)+'.tif'
sds_gdal = array_to_raster(sds_array,gt,wkt)
ds = gdal.Translate(dst_tmp,sds_gdal,options=translate_options)
ds = None
tif_list.append(dst_tmp)
translated_tif_list = []
warped_tif_list = []

# convert individual QA vars to separate GeoTiff files for Soil moisture
iband=4
var = pass_var_list[iband]

sds = smap_lib.gdal_safe_open(
'HDF5:'+source_h5+'://Soil_Moisture_Retrieval_Data_'+SMAP_opass[i]+
'/'+var)
sds_array = sds.ReadAsArray()
dst_tmp = '/tmp/tmp'+str(iband+bnum)+'.tif'

# Call to QA function here
qa = SMAP_retrievalQC_fail(sds_array).astype(np.uint8)
qa_gdal = array_to_raster(qa,gt,wkt)
ds = gdal.Translate(dst_tmp,qa_gdal,options=translate_options)
ds = None
tif_list.append(dst_tmp)

# convert individual QA vars to separate GeoTiff files for TB

for iband in range(5,7):
var = pass_var_list[iband]
for smap_opass in ('AM', 'PM'):
pass_var_list = [
s + ('_pm' if smap_opass == 'PM' else '') for s in VAR_LIST
]

# Convert individual variables to separate GeoTiff files
for band_index, var in enumerate(pass_var_list):
sds = smap_lib.gdal_safe_open(
'HDF5:'+source_h5+'://Soil_Moisture_Retrieval_Data_'+SMAP_opass[i]+
'/'+var)
sds_array = sds.ReadAsArray()
dst_tmp = '/tmp/tmp'+str(iband+bnum)+'.tif'

# Call to QA function here
qa = SMAP_TB_QC_fail(sds_array).astype(np.uint8)
qa_gdal = array_to_raster(qa,gt,wkt)
ds = gdal.Translate(dst_tmp,qa_gdal,options=translate_options)
f'HDF5:{source_h5}://Soil_Moisture_Retrieval_Data_{smap_opass}/{var}'
)
sds_band = sds.GetRasterBand(1)
sds_array = sds_band.ReadAsArray()
nodata_value = sds_band.GetNoDataValue()

if band_index < 4:
# The first four bands are soil moisture observations.
sds_gdal = array_to_raster(sds_array, gt, wkt, nodata=nodata_value)
else:
# The last three are QC bands.
qa = sds_array
if band_index == 4:
qa = SMAP_retrievalQC_pass_fail(sds_array, nodata_value)
else:
qa = SMAP_TB_QC_pass_fail(sds_array, nodata_value)
# We get back boolean arrays that we cast to uint8. We must override the
# default nodata value or this method will incorrectly clamp it to 0.
sds_gdal = array_to_raster(
qa, gt, wkt, dtype=np.uint8, nodata=QC_NODATA
)
nodata_value = QC_NODATA

translated_tmp = f'/tmp/tmp_translated{len(translated_tif_list)}.tif'
ds = gdal.Translate(translated_tmp, sds_gdal, options=translate_options)
ds = None
translated_tif_list.append(translated_tmp)

warped_tmp = f'/tmp/tmp_warped{len(warped_tif_list)}.tif'
this_warp_options_kwargs = copy.deepcopy(warp_options_kwargs)
this_warp_options_kwargs['dstNodata'] = nodata_value
ds = gdal.Warp(
warped_tmp,
translated_tmp,
options=gdal.WarpOptions(**this_warp_options_kwargs),
)
ds = None
tif_list.append(dst_tmp)
warped_tif_list.append(warped_tmp)

# build a VRT(Virtual Dataset) that includes the list of input tif files
# Build a VRT (Virtual Dataset) that includes the list of input tif files and
# use that to produce a single tif. It is safe to ignore the warning: "Unable
# to export GeoTIFF file with different datatypes per different bands. All
# bands should have the same types in TIFF." It actually exports just fine.
# TODO(simonf): Remove this step and instead return the individual tifs.
tmp_vrt = '/tmp/tmp.vrt'
gdal.BuildVRT(tmp_vrt, tif_list, options='-separate')

warp_options = gdal.WarpOptions(
creationOptions=['COMPRESS=LZW'],
srcSRS='EPSG:6933',
dstSRS='EPSG:4326',
xRes=0.08,
yRes=0.08,
srcNodata=-9999,
dstNodata=-9999
)

# run gdal_Warp to reproject the VRT data and save in the target tif file with
# compression
ds = gdal.Warp(target_tif, tmp_vrt, options=warp_options)
gdal.BuildVRT(tmp_vrt, warped_tif_list, options='-separate')
ds = gdal.Translate(
target_tif,
tmp_vrt,
options=gdal.TranslateOptions(creationOptions=['COMPRESS=LZW']),
)
ds = None

# remove temporary files
for f in [tmp_vrt] + tif_list:
for f in [tmp_vrt] + translated_tif_list + warped_tif_list:
if os.path.exists(f):
os.remove(f)

Expand Down
1 change: 0 additions & 1 deletion pipelines/smap_convert_l4.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,6 @@ def convert(source_h5, target_tif):
creationOptions=['COMPRESS=LZW'],
srcSRS='EPSG:6933',
dstSRS='EPSG:4326',
srcNodata=-9999,
dstNodata=-9999
)

Expand Down

0 comments on commit 0c9f806

Please sign in to comment.