Skip to content

Commit

Permalink
Merge branch 'release/0.3.6'
Browse files Browse the repository at this point in the history
  • Loading branch information
ControlNet committed Jan 9, 2024
2 parents 1c058ee + d1e9e31 commit 4575398
Show file tree
Hide file tree
Showing 22 changed files with 191 additions and 70 deletions.
9 changes: 7 additions & 2 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,18 @@ jobs:
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
torch-version: ["1.9.*", "1.10.*", "1.11.*", "1.12.*", "1.13.*", "2.0.*"]
lightning-version: ["2.0.*"]
torch-version: ["1.9.*", "1.10.*", "1.11.*", "1.12.*", "1.13.*", "2.0.*", "2.1.*"]
lightning-version: ["2.0.*", "2.1.*"]
exclude:
- python-version: "3.10"
torch-version: "1.8.*"
- python-version: "3.10"
torch-version: "1.9.*"
- python-version: "3.10"
torch-version: "1.10.*"

- python-version: "3.11"
torch-version: "1.8.*"
- python-version: "3.11"
torch-version: "1.9.*"
- python-version: "3.11"
Expand Down
21 changes: 13 additions & 8 deletions src/tensorneko/arch/binary_classifier.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@
from abc import ABC
from typing import Optional, Union, Sequence, Dict
from typing import Optional, Union, Sequence, Dict, Any

from torch import Tensor
from torch.nn import BCEWithLogitsLoss
from torch.nn import BCEWithLogitsLoss, Module
from torch.optim import Adam
from torchmetrics import Accuracy, F1Score, AUROC

from ..neko_model import NekoModel


class BinaryClassifier(NekoModel, ABC):
class BinaryClassifier(NekoModel):

def __init__(self, model=None, learning_rate: float = 1e-4, distributed: bool = False):
super().__init__()
def __init__(self, name, model: Module, learning_rate: float = 1e-4, distributed: bool = False):
super().__init__(name)
self.save_hyperparameters()
self.model = model
self.learning_rate = learning_rate
Expand All @@ -23,8 +22,10 @@ def __init__(self, model=None, learning_rate: float = 1e-4, distributed: bool =
self.auc_fn = AUROC(task="binary")

@classmethod
def from_module(cls, model, learning_rate: float = 1e-4, distributed=False):
return cls(model, learning_rate, distributed)
def from_module(cls, model: Module, learning_rate: float = 1e-4, name: str = "binary_classifier",
distributed: bool = False
):
return cls(name, model, learning_rate, distributed)

def forward(self, x):
return self.model(x)
Expand All @@ -49,6 +50,10 @@ def validation_step(self, batch: Optional[Union[Tensor, Sequence[Tensor]]] = Non
) -> Dict[str, Tensor]:
return self.step(batch)

def predict_step(self, batch: Tensor, batch_idx: int, dataloader_idx: Optional[int] = None) -> Any:
x, y = batch
return self(x)

def configure_optimizers(self):
optimizer = Adam(self.parameters(), lr=self.learning_rate)
return [optimizer]
61 changes: 49 additions & 12 deletions src/tensorneko/callback/gpu_stats_logger.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,68 @@
from typing import Any

from lightning.pytorch import Callback, Trainer, LightningModule
from lightning.pytorch.utilities.types import STEP_OUTPUT


class GpuStatsLogger(Callback):
"""Log GPU stats for each training epoch"""

def __init__(self, delay: float = 0.5):
def __init__(self, delay: float = 0.5, on_epoch: bool = True, on_step: bool = False):
try:
from gpumonitor.monitor import GPUStatMonitor
except ImportError:
raise ImportError("gpumonitor is required to use GPUStatsLogger")

self.monitor = GPUStatMonitor(delay=delay)
self.monitor_epoch = GPUStatMonitor(delay=delay) if on_epoch else None
self.monitor_step = GPUStatMonitor(delay=delay) if on_step else None
self.on_epoch = on_epoch
self.on_step = on_step
assert self.on_epoch or self.on_step, "on_epoch and on_step cannot be both False"

def on_train_epoch_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
self.monitor.reset()
if not self.on_epoch:
return
self.monitor_epoch.reset()

def on_train_epoch_end(self, trainer: Trainer, pl_module: LightningModule) -> None:
for gpu in self.monitor.average_stats.gpus:
if not self.on_epoch:
return
for gpu in self.monitor_epoch.average_stats.gpus:
logged_info = {
f"gpu{gpu.index}_memory_used_epoch": gpu.memory_used / 1024,
f"gpu{gpu.index}_memory_total_epoch": gpu.memory_total / 1024,
f"gpu{gpu.index}_memory_util_epoch": gpu.memory_used / gpu.memory_total,
f"gpu{gpu.index}_temperature_epoch": float(gpu.temperature),
f"gpu{gpu.index}_utilization_epoch": gpu.utilization / 100,
f"gpu{gpu.index}_power_draw_epoch": float(gpu.power_draw),
f"gpu{gpu.index}_power_percentage_epoch": gpu.power_draw / gpu.power_limit,
f"gpu{gpu.index}_fan_speed_epoch": float(gpu.fan_speed) if gpu.fan_speed is not None else 0.,
}
pl_module.logger.log_metrics(logged_info, step=trainer.global_step)
pl_module.log_dict(logged_info, logger=False, sync_dist=pl_module.distributed)

def on_train_batch_start(
self, trainer: Trainer, pl_module: LightningModule, batch: Any, batch_idx: int
) -> None:
if not self.on_step:
return
self.monitor_step.reset()

def on_train_batch_end(
self, trainer: Trainer, pl_module: LightningModule, outputs: STEP_OUTPUT, batch: Any, batch_idx: int
) -> None:
if not self.on_step:
return
for gpu in self.monitor_step.average_stats.gpus:
logged_info = {
f"gpu{gpu.index}_memory_used": gpu.memory_used / 1024,
f"gpu{gpu.index}_memory_total": gpu.memory_total / 1024,
f"gpu{gpu.index}_memory_util": gpu.memory_used / gpu.memory_total,
f"gpu{gpu.index}_temperature": float(gpu.temperature),
f"gpu{gpu.index}_utilization": gpu.utilization / 100,
f"gpu{gpu.index}_power_draw": float(gpu.power_draw),
f"gpu{gpu.index}_power_percentage": gpu.power_draw / gpu.power_limit,
f"gpu{gpu.index}_fan_speed": float(gpu.fan_speed) if gpu.fan_speed is not None else 0.,
f"gpu{gpu.index}_memory_used_step": gpu.memory_used / 1024,
f"gpu{gpu.index}_memory_total_step": gpu.memory_total / 1024,
f"gpu{gpu.index}_memory_util_step": gpu.memory_used / gpu.memory_total,
f"gpu{gpu.index}_temperature_step": float(gpu.temperature),
f"gpu{gpu.index}_utilization_step": gpu.utilization / 100,
f"gpu{gpu.index}_power_draw_step": float(gpu.power_draw),
f"gpu{gpu.index}_power_percentage_step": gpu.power_draw / gpu.power_limit,
f"gpu{gpu.index}_fan_speed_step": float(gpu.fan_speed) if gpu.fan_speed is not None else 0.,
}
pl_module.logger.log_metrics(logged_info, step=trainer.global_step)
pl_module.log_dict(logged_info, logger=False, sync_dist=pl_module.distributed)
28 changes: 25 additions & 3 deletions src/tensorneko/callback/system_stats_logger.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,44 @@
from typing import Any

from lightning.pytorch import Callback, Trainer, LightningModule
from lightning.pytorch.utilities.types import STEP_OUTPUT


class SystemStatsLogger(Callback):
"""Log system stats for each training epoch"""

def __init__(self):
def __init__(self, on_epoch: bool = True, on_step: bool = False):
try:
import psutil
except ImportError:
raise ImportError("psutil is required to use SystemStatsLogger")
self.psutil = psutil
self.on_epoch = on_epoch
self.on_step = on_step
assert self.on_epoch or self.on_step, "on_epoch and on_step cannot be both False"

def on_train_epoch_end(self, trainer: Trainer, pl_module: LightningModule) -> None:
if not self.on_epoch:
return
cpu_usage = self.psutil.cpu_percent()
memory_usage = self.psutil.virtual_memory().percent
logged_info = {
"cpu_usage_epoch": cpu_usage,
"memory_usage_epoch": memory_usage
}
pl_module.logger.log_metrics(logged_info, step=trainer.global_step)
pl_module.log_dict(logged_info, logger=False, sync_dist=pl_module.distributed)

def on_train_batch_end(
self, trainer: Trainer, pl_module: LightningModule, outputs: STEP_OUTPUT, batch: Any, batch_idx: int
) -> None:
if not self.on_step:
return
cpu_usage = self.psutil.cpu_percent()
memory_usage = self.psutil.virtual_memory().percent
logged_info = {
"cpu_usage": cpu_usage,
"memory_usage": memory_usage
"cpu_usage_step": cpu_usage,
"memory_usage_step": memory_usage
}
pl_module.logger.log_metrics(logged_info, step=trainer.global_step)
pl_module.log_dict(logged_info, logger=False, sync_dist=pl_module.distributed)
4 changes: 2 additions & 2 deletions src/tensorneko/evaluation/fid.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ def compute(self, batch_size=128, num_workers=0, progress_bar: bool = False) ->

if progress_bar:
tqdm = import_tqdm_auto().tqdm
pred = tqdm(total=len(pred), desc="Forward predicted features")
true = tqdm(total=len(true), desc="Forward ground truth features")
pred = tqdm(pred, total=len(pred), desc="Forward predicted features")
true = tqdm(true, total=len(true), desc="Forward ground truth features")

for batch in pred:
self.fid.update(batch.to(self.device), real=False)
Expand Down
10 changes: 7 additions & 3 deletions src/tensorneko/neko_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,20 @@
from time import time
from typing import Optional, Union, List, Dict

from lightning.fabric.plugins.precision.precision import _PRECISION_INPUT
from lightning.fabric.utilities.types import _PATH
from lightning.pytorch import Trainer, Callback
from lightning.pytorch.accelerators import Accelerator
from lightning.pytorch.callbacks import ModelCheckpoint, Checkpoint
from lightning.pytorch.loggers import Logger, TensorBoardLogger
from lightning.pytorch.plugins import PLUGIN_INPUT
from lightning.pytorch.profilers import Profiler
from lightning.pytorch.strategies import Strategy
from lightning.pytorch.trainer.connectors.accelerator_connector import _LITERAL_WARN
from lightning.fabric.plugins.precision.precision import _PRECISION_INPUT
from lightning.fabric.utilities.types import _PATH

try:
from lightning.pytorch.plugins import PLUGIN_INPUT
except ImportError:
from lightning.pytorch.plugins import _PLUGIN_INPUT as PLUGIN_INPUT

from .callback import NilCallback, LrLogger, EpochNumLogger, EpochTimeLogger, GpuStatsLogger, SystemStatsLogger

Expand Down
3 changes: 2 additions & 1 deletion src/tensorneko/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from .configuration import Configuration
from .misc import reduce_dict_by, summarize_dict_by, with_printed_shape, is_bad_num, count_parameters, compose, \
generate_inf_seq, listdir, with_printed, ifelse, dict_add, as_list, identity, list_to_dict, circular_pad, \
load_py, try_until_success
load_py, try_until_success, sample_indexes
from .misc import get_tensorneko_path
from .dispatched_misc import sparse2binary, binary2sparse
from .reproducibility import Seed
Expand Down Expand Up @@ -71,6 +71,7 @@
"circular_pad",
"load_py",
"try_until_success",
"sample_indexes",
"download_file",
"WindowMerger",
]
8 changes: 5 additions & 3 deletions src/tensorneko/util/configuration.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import Any
from typing import Generic

from tensorneko_util.util.type import T

class Configuration(ABC):

class Configuration(ABC, Generic[T]):
"""
Configuration base abstract class.
Expand Down Expand Up @@ -55,7 +57,7 @@ def __iter__(self):
return iter((*self.args, *self.kwargs.values()))

@abstractmethod
def build(self) -> Any:
def build(self) -> T:
"""
A method to build an object.
Expand Down
3 changes: 2 additions & 1 deletion src/tensorneko/util/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from torch.nn import Module

from tensorneko_util.util.misc import generate_inf_seq, listdir, with_printed, ifelse, dict_add, as_list, \
identity, list_to_dict, compose, circular_pad, load_py, try_until_success
identity, list_to_dict, compose, circular_pad, load_py, try_until_success, sample_indexes
from .type import T, A


Expand Down Expand Up @@ -165,3 +165,4 @@ def get_tensorneko_path() -> str:
circular_pad = circular_pad
load_py = load_py
try_until_success = try_until_success
sample_indexes = sample_indexes
1 change: 0 additions & 1 deletion src/tensorneko/util/type.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from enum import Enum
from typing import Callable, Union, List, Tuple, TypeVar

import numpy as np
Expand Down
19 changes: 19 additions & 0 deletions src/tensorneko_util/backend/_tqdm.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
_is_tqdm_available = None
_is_mita_tqdm_available = None


def import_tqdm():
Expand Down Expand Up @@ -35,3 +36,21 @@ def import_tqdm_auto():
return auto
else:
raise ImportError("tqdm is not installed. Please install it by `pip install tqdm`")


def import_mita_tqdm():
global _is_mita_tqdm_available
if _is_mita_tqdm_available is None:
try:
from mita_client import mita_tqdm
_is_mita_tqdm_available = True
return mita_tqdm
except ImportError:
_is_mita_tqdm_available = False
raise ImportError("mita_client is not installed. Please install it by `pip install mita_client`")
else:
if _is_mita_tqdm_available:
from mita_client import mita_tqdm
return mita_tqdm
else:
raise ImportError("mita_client is not installed. Please install it by `pip install mita_client`")
2 changes: 1 addition & 1 deletion src/tensorneko_util/preprocess/crop.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def crop_with_padding(image: ndarray, x1: int, x2: int, y1: int, y2: int, pad_va


@dispatch
def crop_with_padding(image: ndarray, x1: np.int32, x2: np.int32, y1: np.int32, y2: np.int32,
def crop_with_padding(image: ndarray, x1: ndarray, x2: ndarray, y1: ndarray, y2: ndarray,
pad_value: Union[int, float] = 0., batch: bool = False
) -> ndarray:
return crop_with_padding(image, int(x1), int(x2), int(y1), int(y2), pad_value, batch)
6 changes: 6 additions & 0 deletions src/tensorneko_util/preprocess/crop.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,9 @@ from numpy import ndarray
def crop_with_padding(image: ndarray, x1: int, x2: int, y1: int, y2: int, pad_value: Union[int, float] = 0.,
batch: bool = False
) -> ndarray: ...


@overload
def crop_with_padding(image: ndarray, x1: ndarray, x2: ndarray, y1: ndarray, y2: ndarray,
pad_value: Union[int, float] = 0., batch: bool = False
) -> ndarray: ...
3 changes: 2 additions & 1 deletion src/tensorneko_util/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from .dispatcher import dispatch
from .fp import __, F, _, Stream, return_option, Option, Monad, Eval, Seq, AbstractSeq, curry
from .misc import generate_inf_seq, compose, listdir, with_printed, ifelse, dict_add, as_list, identity, list_to_dict, \
get_tensorneko_util_path, circular_pad, load_py, try_until_success
get_tensorneko_util_path, circular_pad, load_py, try_until_success, sample_indexes
from .dispatched_misc import sparse2binary, binary2sparse
from .ref import ref
from .timer import Timer
Expand Down Expand Up @@ -50,6 +50,7 @@
"circular_pad",
"load_py",
"try_until_success",
"sample_indexes",
"download_file",
"WindowMerger",
]
6 changes: 5 additions & 1 deletion src/tensorneko_util/util/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import inspect
import warnings
from typing import Callable, Dict, List, Generic, Sequence, Optional
from typing import Callable, Dict, List, Generic, Sequence, Optional, TYPE_CHECKING, overload

from .type import T

Expand Down Expand Up @@ -223,3 +223,7 @@ def add(x: str, y: str) -> str:


dispatch = DispatcherDecorator()

# used for type hint, only `typing.overload` can be used for type hint
if TYPE_CHECKING:
dispatch = overload
9 changes: 7 additions & 2 deletions src/tensorneko_util/util/eventbus/event.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@

class EventMeta(type):

def __call__(cls, *args, bus=EventBus.default, **kwargs):
def __call__(cls, *args, **kwargs):
event = super().__call__(*args, **kwargs)
bus.emit(event, blocking=_blocking_flag)
event.bus = kwargs.get("bus", EventBus.default)
event.bus.emit(event, blocking=_blocking_flag)
return event


Expand All @@ -29,5 +30,9 @@ def no_blocking():
class Event(metaclass=EventMeta):
bus: EventBus

def __init_subclass__(cls, **kwargs):
if "bus" in cls.__init__.__annotations__:
raise TypeError("`bus` parameter is preserved. It should not be annotated in the __init__ method")


E = TypeVar("E", bound=Event)
Loading

0 comments on commit 4575398

Please sign in to comment.