Skip to content

Add ops.image.scale_and_translate. #21577

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions keras/api/_tf_keras/keras/ops/image/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@
from keras.src.ops.image import resize as resize
from keras.src.ops.image import rgb_to_grayscale as rgb_to_grayscale
from keras.src.ops.image import rgb_to_hsv as rgb_to_hsv
from keras.src.ops.image import scale_and_translate as scale_and_translate
1 change: 1 addition & 0 deletions keras/api/ops/image/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@
from keras.src.ops.image import resize as resize
from keras.src.ops.image import rgb_to_grayscale as rgb_to_grayscale
from keras.src.ops.image import rgb_to_hsv as rgb_to_hsv
from keras.src.ops.image import scale_and_translate as scale_and_translate
80 changes: 57 additions & 23 deletions keras/src/backend/jax/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,34 @@
"lanczos5",
"bicubic",
)
AFFINE_TRANSFORM_INTERPOLATIONS = { # map to order
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice, this is clearer.

"nearest": 0,
"bilinear": 1,
}
AFFINE_TRANSFORM_FILL_MODES = {
"constant",
"nearest",
"wrap",
"mirror",
"reflect",
}
MAP_COORDINATES_FILL_MODES = {
"constant",
"nearest",
"wrap",
"mirror",
"reflect",
}
SCALE_AND_TRANSLATE_METHODS = {
"linear",
"bilinear",
"trilinear",
"cubic",
"bicubic",
"tricubic",
"lanczos3",
"lanczos5",
}


def rgb_to_grayscale(images, data_format=None):
Expand Down Expand Up @@ -372,19 +400,6 @@ def resize(
)


AFFINE_TRANSFORM_INTERPOLATIONS = { # map to order
"nearest": 0,
"bilinear": 1,
}
AFFINE_TRANSFORM_FILL_MODES = {
"constant",
"nearest",
"wrap",
"mirror",
"reflect",
}


def affine_transform(
images,
transform,
Expand Down Expand Up @@ -483,15 +498,6 @@ def affine_transform(
return affined


MAP_COORDINATES_FILL_MODES = {
"constant",
"nearest",
"wrap",
"mirror",
"reflect",
}


def perspective_transform(
images,
start_points,
Expand Down Expand Up @@ -545,7 +551,7 @@ def perspective_transform(
if data_format == "channels_first":
images = jnp.transpose(images, (0, 2, 3, 1))

batch_size, height, width, channels = images.shape
_, height, width, _ = images.shape
transforms = compute_homography_matrix(
jnp.asarray(start_points, dtype="float32"),
jnp.asarray(end_points, dtype="float32"),
Expand Down Expand Up @@ -859,3 +865,31 @@ def elastic_transform(
transformed_images = transformed_images.astype(input_dtype)

return transformed_images


def scale_and_translate(
images,
output_shape,
scale,
translation,
spatial_dims,
method,
antialias=True,
):
if method not in SCALE_AND_TRANSLATE_METHODS:
raise ValueError(
"Invalid value for argument `method`. Expected of one "
f"{SCALE_AND_TRANSLATE_METHODS}. Received: method={method}"
)
images = convert_to_tensor(images)
scale = convert_to_tensor(scale)
translation = convert_to_tensor(translation)
return jax.image.scale_and_translate(
images,
output_shape,
spatial_dims,
scale,
translation,
method,
antialias,
)
146 changes: 98 additions & 48 deletions keras/src/backend/numpy/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,34 @@
"lanczos5",
"bicubic",
)
AFFINE_TRANSFORM_INTERPOLATIONS = { # map to order
"nearest": 0,
"bilinear": 1,
}
AFFINE_TRANSFORM_FILL_MODES = {
"constant",
"nearest",
"wrap",
"mirror",
"reflect",
}
MAP_COORDINATES_FILL_MODES = {
"constant",
"nearest",
"wrap",
"mirror",
"reflect",
}
SCALE_AND_TRANSLATE_METHODS = {
"linear",
"bilinear",
"trilinear",
"cubic",
"bicubic",
"tricubic",
"lanczos3",
"lanczos5",
}


def rgb_to_grayscale(images, data_format=None):
Expand Down Expand Up @@ -367,7 +395,7 @@ def resize(
return _resize(images, size, method=interpolation, antialias=antialias)


def compute_weight_mat(
def _compute_weight_mat(
input_size, output_size, scale, translation, kernel, antialias
):
dtype = np.result_type(scale, translation)
Expand Down Expand Up @@ -410,32 +438,11 @@ def compute_weight_mat(


def _resize(image, shape, method, antialias):
def _fill_triangle_kernel(x):
return np.maximum(0, 1 - np.abs(x))

def _fill_keys_cubic_kernel(x):
out = ((1.5 * x - 2.5) * x) * x + 1.0
out = np.where(x >= 1.0, ((-0.5 * x + 2.5) * x - 4.0) * x + 2.0, out)
return np.where(x >= 2.0, 0.0, out)

def _fill_lanczos_kernel(radius, x):
y = radius * np.sin(np.pi * x) * np.sin(np.pi * x / radius)
out = np.where(
x > 1e-3, np.divide(y, np.where(x != 0, np.pi**2 * x**2, 1)), 1
)
return np.where(x > radius, 0.0, out)

if method == "nearest":
return _resize_nearest(image, shape)
elif method == "bilinear":
kernel = _fill_triangle_kernel
elif method == "lanczos3":
kernel = lambda x: _fill_lanczos_kernel(3.0, x)
elif method == "lanczos5":
kernel = lambda x: _fill_lanczos_kernel(5.0, x)
elif method == "bicubic":
kernel = _fill_keys_cubic_kernel
else:
kernel = _kernels.get(method, None)
if kernel is None:
raise ValueError("Unknown resize method")

spatial_dims = tuple(
Expand Down Expand Up @@ -473,6 +480,34 @@ def _resize_nearest(x, output_shape):
return x


def _fill_triangle_kernel(x):
return np.maximum(0, 1 - np.abs(x))


def _fill_keys_cubic_kernel(x):
out = ((1.5 * x - 2.5) * x) * x + 1.0
out = np.where(x >= 1.0, ((-0.5 * x + 2.5) * x - 4.0) * x + 2.0, out)
return np.where(x >= 2.0, 0.0, out)


def _fill_lanczos_kernel(radius, x):
y = radius * np.sin(np.pi * x) * np.sin(np.pi * x / radius)
out = np.where(
x > 1e-3, np.divide(y, np.where(x != 0, np.pi**2 * x**2, 1)), 1
)
return np.where(x > radius, 0.0, out)


_kernels = {
"linear": _fill_triangle_kernel,
"bilinear": _fill_triangle_kernel, # For `resize`.
"cubic": _fill_keys_cubic_kernel,
"bicubic": _fill_keys_cubic_kernel, # For `resize`.
"lanczos3": lambda x: _fill_lanczos_kernel(3.0, x),
"lanczos5": lambda x: _fill_lanczos_kernel(5.0, x),
}


def _scale_and_translate(
x, output_shape, spatial_dims, scale, translation, kernel, antialias
):
Expand All @@ -492,9 +527,9 @@ def _scale_and_translate(
d = d % x.ndim
m, n = input_shape[d], output_shape[d]

w = compute_weight_mat(
w = _compute_weight_mat(
m, n, scale[i], translation[i], kernel, antialias
).astype(np.float32)
).astype(output.dtype)
output = np.tensordot(output, w, axes=(d, 0))
output = np.moveaxis(output, -1, d)

Expand All @@ -504,19 +539,6 @@ def _scale_and_translate(
return output


AFFINE_TRANSFORM_INTERPOLATIONS = { # map to order
"nearest": 0,
"bilinear": 1,
}
AFFINE_TRANSFORM_FILL_MODES = {
"constant",
"nearest",
"wrap",
"mirror",
"reflect",
}


def affine_transform(
images,
transform,
Expand Down Expand Up @@ -877,15 +899,6 @@ def compute_homography_matrix(start_points, end_points):
return homography_matrix


MAP_COORDINATES_FILL_MODES = {
"constant",
"nearest",
"wrap",
"mirror",
"reflect",
}


def map_coordinates(
inputs, coordinates, order, fill_mode="constant", fill_value=0.0
):
Expand Down Expand Up @@ -1135,3 +1148,40 @@ def elastic_transform(
transformed_images = transformed_images.astype(input_dtype)

return transformed_images


def scale_and_translate(
images,
output_shape,
scale,
translation,
spatial_dims,
method,
antialias=True,
):
if method not in SCALE_AND_TRANSLATE_METHODS:
raise ValueError(
"Invalid value for argument `method`. Expected of one "
f"{SCALE_AND_TRANSLATE_METHODS}. Received: method={method}"
)
if method in ("linear", "bilinear", "trilinear", "triangle"):
method = "linear"
elif method in ("cubic", "bicubic", "tricubic"):
method = "cubic"

images = convert_to_tensor(images)
scale = convert_to_tensor(scale)
translation = convert_to_tensor(translation)
kernel = _kernels[method]
dtype = backend.result_type(scale.dtype, translation.dtype)
scale = scale.astype(dtype)
translation = translation.astype(dtype)
return _scale_and_translate(
images,
output_shape,
spatial_dims,
scale,
translation,
kernel,
antialias,
)
50 changes: 50 additions & 0 deletions keras/src/backend/openvino/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,59 @@ def affine_transform(
)


def perspective_transform(
images,
start_points,
end_points,
interpolation="bilinear",
fill_value=0,
data_format=None,
):
raise NotImplementedError(
"`perspective_transform` is not supported with openvino backend"
)


def map_coordinates(
inputs, coordinates, order, fill_mode="constant", fill_value=0
):
raise NotImplementedError(
"`map_coordinates` is not supported with openvino backend"
)


def gaussian_blur(
images, kernel_size=(3, 3), sigma=(1.0, 1.0), data_format=None
):
raise NotImplementedError(
"`gaussian_blur` is not supported with openvino backend"
)


def elastic_transform(
images,
alpha=20.0,
sigma=5.0,
interpolation="bilinear",
fill_mode="reflect",
fill_value=0.0,
seed=None,
data_format=None,
):
raise NotImplementedError(
"`elastic_transform` is not supported with openvino backend"
)


def scale_and_translate(
images,
output_shape,
scale,
translation,
spatial_dims,
method,
antialias=True,
):
raise NotImplementedError(
"`scale_and_translate` is not supported with openvino backend"
)
Loading