-
Notifications
You must be signed in to change notification settings - Fork 2.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Update Tile Pre-Processor to support more modes #6558
base: main
Are you sure you want to change the base?
Changes from all commits
cc3dbf6
182e2fd
a21f7d7
296a860
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,13 @@ | ||
# Invocations for ControlNet image preprocessors | ||
# initial implementation by Gregg Helt, 2023 | ||
# heavily leverages controlnet_aux package: https://github.com/patrickvonplaten/controlnet_aux | ||
import random | ||
from builtins import bool, float | ||
from pathlib import Path | ||
from typing import Dict, List, Literal, Union | ||
from typing import Any, Dict, List, Literal, Union | ||
|
||
import cv2 | ||
import cv2.ximgproc | ||
import numpy as np | ||
from controlnet_aux import ( | ||
ContentShuffleDetector, | ||
|
@@ -39,6 +41,7 @@ | |
from invokeai.backend.image_util.canny import get_canny_edges | ||
from invokeai.backend.image_util.depth_anything import DEPTH_ANYTHING_MODELS, DepthAnythingDetector | ||
from invokeai.backend.image_util.dw_openpose import DWPOSE_MODELS, DWOpenposeDetector | ||
from invokeai.backend.image_util.fast_guided_filter.fast_guided_filter import FastGuidedFilter | ||
from invokeai.backend.image_util.hed import HEDProcessor | ||
from invokeai.backend.image_util.lineart import LineartProcessor | ||
from invokeai.backend.image_util.lineart_anime import LineartAnimeProcessor | ||
|
@@ -476,37 +479,86 @@ def run_processor(self, image: Image.Image) -> Image.Image: | |
title="Tile Resample Processor", | ||
tags=["controlnet", "tile"], | ||
category="controlnet", | ||
version="1.2.3", | ||
version="1.3.0", | ||
) | ||
class TileResamplerProcessorInvocation(ImageProcessorInvocation): | ||
"""Tile resampler processor""" | ||
|
||
# res: int = InputField(default=512, ge=0, le=1024, description="The pixel resolution for each tile") | ||
down_sampling_rate: float = InputField(default=1.0, ge=1.0, le=8.0, description="Down sampling rate") | ||
mode: Literal["regular", "blur", "super"] = InputField( | ||
default="regular", description="The Tile ControlNet pre-processing mode to use." | ||
) | ||
|
||
# tile_resample copied from sd-webui-controlnet/scripts/processor.py | ||
def tile_resample( | ||
self, | ||
np_img: np.ndarray, | ||
res=512, # never used? | ||
down_sampling_rate=1.0, | ||
): | ||
# referenced from | ||
# https://huggingface.co/TTPlanet/TTPLanet_SDXL_Controlnet_Tile_Realistic/blob/37f1c4575b543fb2036e39f5763d082fdd135318/TTP_tile_preprocessor_v5.py | ||
def _apply_gaussian_blur(self, image_np: np.ndarray[Any, Any], ksize: int = 5, sigma_x: float = 1.0): | ||
if ksize % 2 == 0: | ||
ksize += 1 # ksize must be odd | ||
blurred_image = cv2.GaussianBlur(image_np, (ksize, ksize), sigmaX=sigma_x) | ||
return blurred_image | ||
|
||
# referenced from | ||
# https://huggingface.co/TTPlanet/TTPLanet_SDXL_Controlnet_Tile_Realistic/blob/37f1c4575b543fb2036e39f5763d082fdd135318/TTP_tile_preprocessor_v5.py | ||
def _apply_guided_filter(self, image_np: np.ndarray[Any, Any], radius: int, eps: float, scale: int): | ||
filter = FastGuidedFilter(image_np, radius, eps, scale) | ||
return filter.filter(image_np) | ||
|
||
def _regular_resample(self, np_img: np.ndarray[Any, Any]): | ||
height, width, _ = np_img.shape | ||
np_img = HWC3(np_img) | ||
if down_sampling_rate < 1.1: | ||
if self.down_sampling_rate < 1.1: | ||
return np_img | ||
H, W, C = np_img.shape | ||
H = int(float(H) / float(down_sampling_rate)) | ||
W = int(float(W) / float(down_sampling_rate)) | ||
np_img = cv2.resize(np_img, (W, H), interpolation=cv2.INTER_AREA) | ||
|
||
new_height = int(float(height) / float(self.down_sampling_rate)) | ||
new_width = int(float(width) / float(self.down_sampling_rate)) | ||
np_img = cv2.resize(np_img, (new_width, new_height), interpolation=cv2.INTER_AREA) | ||
return np_img | ||
|
||
# referenced from | ||
# https://huggingface.co/TTPlanet/TTPLanet_SDXL_Controlnet_Tile_Realistic/blob/37f1c4575b543fb2036e39f5763d082fdd135318/TTP_tile_preprocessor_v5.py | ||
def _blur_resample(self, np_img: np.ndarray[Any, Any]): | ||
height, width, _ = np_img.shape | ||
ratio = np.sqrt(1024.0 * 1024.0 / (width * height)) | ||
resize_w, resize_h = int(width * ratio), int(height * ratio) | ||
np_img = cv2.resize(np_img, (resize_w, resize_h)) | ||
Comment on lines
+521
to
+524
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like the first step is to resize to match the area 1024x1024. Why? Is this SDXL-specific? Does this interact in some important way with one of the later hard-coded values? |
||
|
||
blur_strength = random.sample([i / 10.0 for i in range(10, 201, 2)], k=1)[0] | ||
radius = random.sample([i for i in range(1, 40, 2)], k=1)[0] # noqa: C416 | ||
eps = random.sample([i / 1000.0 for i in range(1, 101, 2)], k=1)[0] | ||
scale_factor = random.sample([i / 10.0 for i in range(10, 181, 5)], k=1)[0] | ||
|
||
if random.random() > 0.5: | ||
np_img = self._apply_gaussian_blur(np_img, ksize=int(blur_strength), sigma_x=blur_strength / 2) | ||
|
||
if random.random() > 0.5: | ||
np_img = self._apply_guided_filter(np_img, radius, eps, int(scale_factor)) | ||
Comment on lines
+526
to
+535
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All of the random sampling in this block makes it look like experimental code?
|
||
|
||
np_img = cv2.resize( | ||
np_img, (int(resize_w / scale_factor), int(resize_h / scale_factor)), interpolation=cv2.INTER_AREA | ||
) | ||
np_img = cv2.resize(np_img, (resize_w, resize_h), interpolation=cv2.INTER_CUBIC) | ||
return np_img | ||
|
||
def _super_resample(self, np_img: np.ndarray[Any, Any]): | ||
height, width, _ = np_img.shape | ||
ratio = np.sqrt(1024.0 * 1024.0 / (width * height)) | ||
resize_w, resize_h = int(width * ratio) // 48 * 48, int(height * ratio) // 48 * 48 | ||
np_img = cv2.resize(np_img, (resize_w, resize_h)) | ||
Comment on lines
+544
to
+547
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same question as above re: why 1024x1024? And why force it to be a multiple of 48? |
||
return np_img | ||
|
||
def run_processor(self, image: Image.Image) -> Image.Image: | ||
np_img = np.array(image, dtype=np.uint8) | ||
processed_np_image = self.tile_resample( | ||
np_img, | ||
# res=self.tile_size, | ||
down_sampling_rate=self.down_sampling_rate, | ||
) | ||
|
||
if self.mode == "regular": | ||
processed_np_image = self._regular_resample(np_img) | ||
elif self.mode == "blur": | ||
processed_np_image = self._blur_resample(np_img) | ||
elif self.mode == "super": | ||
processed_np_image = self._super_resample(np_img) | ||
else: | ||
raise ValueError(f"Invalid mode: {self.mode}") | ||
|
||
processed_image = Image.fromarray(processed_np_image) | ||
return processed_image | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This function does not seem to match anything in the linked file. Is the link wrong?