Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Update Tile Pre-Processor to support more modes #6558

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 71 additions & 19 deletions invokeai/app/invocations/controlnet_image_processors.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
# Invocations for ControlNet image preprocessors
# initial implementation by Gregg Helt, 2023
# heavily leverages controlnet_aux package: https://github.com/patrickvonplaten/controlnet_aux
import random
from builtins import bool, float
from pathlib import Path
from typing import Dict, List, Literal, Union
from typing import Any, Dict, List, Literal, Union

import cv2
import cv2.ximgproc
import numpy as np
from controlnet_aux import (
ContentShuffleDetector,
Expand Down Expand Up @@ -39,6 +41,7 @@
from invokeai.backend.image_util.canny import get_canny_edges
from invokeai.backend.image_util.depth_anything import DEPTH_ANYTHING_MODELS, DepthAnythingDetector
from invokeai.backend.image_util.dw_openpose import DWPOSE_MODELS, DWOpenposeDetector
from invokeai.backend.image_util.fast_guided_filter.fast_guided_filter import FastGuidedFilter
from invokeai.backend.image_util.hed import HEDProcessor
from invokeai.backend.image_util.lineart import LineartProcessor
from invokeai.backend.image_util.lineart_anime import LineartAnimeProcessor
Expand Down Expand Up @@ -476,37 +479,86 @@ def run_processor(self, image: Image.Image) -> Image.Image:
title="Tile Resample Processor",
tags=["controlnet", "tile"],
category="controlnet",
version="1.2.3",
version="1.3.0",
)
class TileResamplerProcessorInvocation(ImageProcessorInvocation):
"""Tile resampler processor"""

# res: int = InputField(default=512, ge=0, le=1024, description="The pixel resolution for each tile")
down_sampling_rate: float = InputField(default=1.0, ge=1.0, le=8.0, description="Down sampling rate")
mode: Literal["regular", "blur", "super"] = InputField(
default="regular", description="The Tile ControlNet pre-processing mode to use."
)

# tile_resample copied from sd-webui-controlnet/scripts/processor.py
def tile_resample(
self,
np_img: np.ndarray,
res=512, # never used?
down_sampling_rate=1.0,
):
# referenced from
# https://huggingface.co/TTPlanet/TTPLanet_SDXL_Controlnet_Tile_Realistic/blob/37f1c4575b543fb2036e39f5763d082fdd135318/TTP_tile_preprocessor_v5.py
def _apply_gaussian_blur(self, image_np: np.ndarray[Any, Any], ksize: int = 5, sigma_x: float = 1.0):
if ksize % 2 == 0:
ksize += 1 # ksize must be odd
blurred_image = cv2.GaussianBlur(image_np, (ksize, ksize), sigmaX=sigma_x)
return blurred_image

# referenced from
# https://huggingface.co/TTPlanet/TTPLanet_SDXL_Controlnet_Tile_Realistic/blob/37f1c4575b543fb2036e39f5763d082fdd135318/TTP_tile_preprocessor_v5.py
def _apply_guided_filter(self, image_np: np.ndarray[Any, Any], radius: int, eps: float, scale: int):
filter = FastGuidedFilter(image_np, radius, eps, scale)
return filter.filter(image_np)

def _regular_resample(self, np_img: np.ndarray[Any, Any]):
height, width, _ = np_img.shape
np_img = HWC3(np_img)
if down_sampling_rate < 1.1:
if self.down_sampling_rate < 1.1:
return np_img
H, W, C = np_img.shape
H = int(float(H) / float(down_sampling_rate))
W = int(float(W) / float(down_sampling_rate))
np_img = cv2.resize(np_img, (W, H), interpolation=cv2.INTER_AREA)

new_height = int(float(height) / float(self.down_sampling_rate))
new_width = int(float(width) / float(self.down_sampling_rate))
np_img = cv2.resize(np_img, (new_width, new_height), interpolation=cv2.INTER_AREA)
return np_img

# referenced from
# https://huggingface.co/TTPlanet/TTPLanet_SDXL_Controlnet_Tile_Realistic/blob/37f1c4575b543fb2036e39f5763d082fdd135318/TTP_tile_preprocessor_v5.py
def _blur_resample(self, np_img: np.ndarray[Any, Any]):
Comment on lines +518 to +520
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function does not seem to match anything in the linked file. Is the link wrong?

height, width, _ = np_img.shape
ratio = np.sqrt(1024.0 * 1024.0 / (width * height))
resize_w, resize_h = int(width * ratio), int(height * ratio)
np_img = cv2.resize(np_img, (resize_w, resize_h))
Comment on lines +521 to +524
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like the first step is to resize to match the area 1024x1024. Why? Is this SDXL-specific? Does this interact in some important way with one of the later hard-coded values?


blur_strength = random.sample([i / 10.0 for i in range(10, 201, 2)], k=1)[0]
radius = random.sample([i for i in range(1, 40, 2)], k=1)[0] # noqa: C416
eps = random.sample([i / 1000.0 for i in range(1, 101, 2)], k=1)[0]
scale_factor = random.sample([i / 10.0 for i in range(10, 181, 5)], k=1)[0]

if random.random() > 0.5:
np_img = self._apply_gaussian_blur(np_img, ksize=int(blur_strength), sigma_x=blur_strength / 2)

if random.random() > 0.5:
np_img = self._apply_guided_filter(np_img, radius, eps, int(scale_factor))
Comment on lines +526 to +535
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All of the random sampling in this block makes it look like experimental code?
Nodes should be deterministic, so we need to either:

  • Hard-code these values to good defaults.
  • Expose them as parameters.
  • Add a seed. (This seems like a bad idea. I can't think of a good reason for a preprocessor to need a seed.)


np_img = cv2.resize(
np_img, (int(resize_w / scale_factor), int(resize_h / scale_factor)), interpolation=cv2.INTER_AREA
)
np_img = cv2.resize(np_img, (resize_w, resize_h), interpolation=cv2.INTER_CUBIC)
return np_img

def _super_resample(self, np_img: np.ndarray[Any, Any]):
height, width, _ = np_img.shape
ratio = np.sqrt(1024.0 * 1024.0 / (width * height))
resize_w, resize_h = int(width * ratio) // 48 * 48, int(height * ratio) // 48 * 48
np_img = cv2.resize(np_img, (resize_w, resize_h))
Comment on lines +544 to +547
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same question as above re: why 1024x1024?

And why force it to be a multiple of 48?

return np_img

def run_processor(self, image: Image.Image) -> Image.Image:
np_img = np.array(image, dtype=np.uint8)
processed_np_image = self.tile_resample(
np_img,
# res=self.tile_size,
down_sampling_rate=self.down_sampling_rate,
)

if self.mode == "regular":
processed_np_image = self._regular_resample(np_img)
elif self.mode == "blur":
processed_np_image = self._blur_resample(np_img)
elif self.mode == "super":
processed_np_image = self._super_resample(np_img)
else:
raise ValueError(f"Invalid mode: {self.mode}")

processed_image = Image.fromarray(processed_np_image)
return processed_image

Expand Down
Loading