Skip to content

Commit

Permalink
Merge branch 'release/0.3.5'
Browse files Browse the repository at this point in the history
  • Loading branch information
ControlNet committed Nov 4, 2023
2 parents 3b41c28 + a18379d commit 0c81bb8
Show file tree
Hide file tree
Showing 22 changed files with 361 additions and 13 deletions.
9 changes: 7 additions & 2 deletions .github/workflows/unittest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,18 @@ jobs:
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
torch-version: ["1.9.*", "1.10.*", "1.11.*", "1.12.*", "1.13.*", "2.0.*"]
lightning-version: ["2.0.*"]
torch-version: ["1.9.*", "1.10.*", "1.11.*", "1.12.*", "1.13.*", "2.0.*", "2.1.*"]
lightning-version: ["2.0.*", "2.1.*"]
exclude:
- python-version: "3.10"
torch-version: "1.8.*"
- python-version: "3.10"
torch-version: "1.9.*"
- python-version: "3.10"
torch-version: "1.10.*"

- python-version: "3.11"
torch-version: "1.8.*"
- python-version: "3.11"
torch-version: "1.9.*"
- python-version: "3.11"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
<div align="center">
<a href="https://www.python.org/"><img src="https://img.shields.io/pypi/pyversions/tensorneko?style=flat-square"></a>
<a href="https://pytorch.org/"><img src="https://img.shields.io/badge/PyTorch-%3E%3D1.9.0-EE4C2C?style=flat-square&logo=pytorch"></a>
<a href="https://www.pytorchlightning.ai/"><img src="https://img.shields.io/badge/Lightning-2.0.*-792EE5?style=flat-square&logo=lightning"></a>
<a href="https://www.pytorchlightning.ai/"><img src="https://img.shields.io/badge/Lightning-2.0.*/2.1.*-792EE5?style=flat-square&logo=lightning"></a>
</div>

<div align="center">
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ torchaudio >= 0.9.0
torchvision >= 0.10.0
torchmetrics >= 0.7.3
tensorboard >= 2.0.0
lightning == 2.0.*
lightning >= 2.0, < 2.2
pillow >= 8.1
av >= 8.0.3
pysoundfile >= 0.9.0; platform_system == "Windows"
Expand Down
2 changes: 1 addition & 1 deletion requirements_test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ torch >= 1.9.0
torchaudio >= 0.9.0
torchvision >= 0.10.0
torchmetrics >= 0.7.3, < 0.11.0
lightning == 2.0.*
lightning >= 2.0, < 2.2
tensorboard >= 2.0.0
pillow >= 8.1
av >= 8.0.3
Expand Down
4 changes: 3 additions & 1 deletion src/tensorneko/arch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
from .vqvae import VQVAE
from .wgan import WGAN
from .auto_encoder import AutoEncoder
from .binary_classifier import BinaryClassifier

__all__ = [
"GAN",
"VQVAE",
"WGAN",
"AutoEncoder"
"AutoEncoder",
"BinaryClassifier",
]
54 changes: 54 additions & 0 deletions src/tensorneko/arch/binary_classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from abc import ABC
from typing import Optional, Union, Sequence, Dict

from torch import Tensor
from torch.nn import BCEWithLogitsLoss
from torch.optim import Adam
from torchmetrics import Accuracy, F1Score, AUROC

from ..neko_model import NekoModel


class BinaryClassifier(NekoModel, ABC):

def __init__(self, model=None, learning_rate: float = 1e-4, distributed: bool = False):
super().__init__()
self.save_hyperparameters()
self.model = model
self.learning_rate = learning_rate
self.distributed = distributed
self.loss_fn = BCEWithLogitsLoss()
self.acc_fn = Accuracy(task="binary")
self.f1_fn = F1Score(task="binary")
self.auc_fn = AUROC(task="binary")

@classmethod
def from_module(cls, model, learning_rate: float = 1e-4, distributed=False):
return cls(model, learning_rate, distributed)

def forward(self, x):
return self.model(x)

def step(self, batch: Optional[Union[Tensor, Sequence[Tensor]]]) -> Dict[str, Tensor]:
x, y = batch
y_hat = self(x).squeeze(1)
loss = self.loss_fn(y_hat, y)
prob = y_hat.sigmoid()
acc = self.acc_fn(prob, y)
f1 = self.f1_fn(prob, y)
auc = self.auc_fn(prob, y)
return {"loss": loss, "acc": acc, "f1": f1, "auc": auc}

def training_step(self, batch: Optional[Union[Tensor, Sequence[Tensor]]] = None, batch_idx: Optional[int] = None,
optimizer_idx: Optional[int] = None, hiddens: Optional[Tensor] = None
) -> Dict[str, Tensor]:
return self.step(batch)

def validation_step(self, batch: Optional[Union[Tensor, Sequence[Tensor]]] = None, batch_idx: Optional[int] = None,
dataloader_idx: Optional[int] = None
) -> Dict[str, Tensor]:
return self.step(batch)

def configure_optimizers(self):
optimizer = Adam(self.parameters(), lr=self.learning_rate)
return [optimizer]
6 changes: 6 additions & 0 deletions src/tensorneko/dataset/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
from .round_robin_dataset import RoundRobinDataset
from .nested_dataset import NestedDataset
from .list_dataset import ListDataset
from . import sampler

__all__ = [
"RoundRobinDataset",
"NestedDataset",
"ListDataset",
"sampler"
]
19 changes: 19 additions & 0 deletions src/tensorneko/dataset/list_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from typing import List

from torch.utils.data.dataset import Dataset, T_co


class ListDataset(Dataset[T_co]):
"""
A dataset wrapping a list of data.
"""

def __init__(self, data: List[T_co]):
super().__init__()
self.data = data

def __getitem__(self, index: int) -> T_co:
return self.data[index]

def __len__(self):
return len(self.data)
2 changes: 1 addition & 1 deletion src/tensorneko/dataset/round_robin_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from torch.utils.data import Dataset
from torch.utils.data.dataset import T_co

from tensorneko.util import circular_pad
from ..util import circular_pad


class RoundRobinDataset(Dataset[T_co]):
Expand Down
5 changes: 5 additions & 0 deletions src/tensorneko/dataset/sampler/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .sequential_iter_sampler import SequentialIterSampler

__all__ = [
"SequentialIterSampler"
]
26 changes: 26 additions & 0 deletions src/tensorneko/dataset/sampler/sequential_iter_sampler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from typing import Sized

from torch.utils.data.sampler import Sampler, T_co


class SequentialIterSampler(Sampler[T_co]):
"""
Use to split the large scale data into small subsets for each epochs
For example, if the dataset size is 1M, and the num_samples = 1000, then each epoch will only use 1000 samples, and
the next epoch will use the next 1000 samples.
"""

def __init__(self, data_source: Sized, num_samples: int):
super().__init__(data_source)
self.data_source = data_source
self.num_samples = num_samples
self.total_size = len(data_source)
self.current_position = 0

def __iter__(self):
yield from map(lambda x: x % self.total_size,
range(self.current_position, self.current_position + self.num_samples))
self.current_position = (self.current_position + self.num_samples) % self.total_size

def __len__(self):
return self.num_samples
2 changes: 2 additions & 0 deletions src/tensorneko/evaluation/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from .iou import iou_1d, iou_2d
from .psnr import psnr_video, psnr_image
from .ssim import ssim_video, ssim_image
from .secs import secs
from .fid import FID

__all__ = [
Expand All @@ -10,5 +11,6 @@
"psnr_image",
"ssim_video",
"ssim_image",
"secs",
"FID",
]
42 changes: 39 additions & 3 deletions src/tensorneko/evaluation/fid.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,34 @@


class FID:
"""
Calculate Fréchet inception distance based on torchmetrics. Require library "torch-fidelity".
Args:
device (``str`` | :class:`~torch.device`, optional): Device to run the metric. Default: ``"cpu"``.
Example::
from tensorneko.evaluation import FID
fid = FID("cuda")
# add predicted and real images
fid.add_pred_image("path/to/pred/image1.png")
fid.add_pred_image("path/to/pred/image2.png")
fid.add_true_image("path/to/true/image1.png")
fid.add_true_image("path/to/true/image2.png")
# add predicted and real videos
fid.add_pred_video("path/to/pred/video1.mp4")
fid.add_pred_video("path/to/pred/video2.mp4")
fid.add_true_video("path/to/true/video1.mp4")
fid.add_true_video("path/to/true/video2.mp4")
# compute FID
fid_score = fid.compute(batch_size=128, num_workers=8, progress_bar=True)
print(fid_score)
"""

def __init__(self, device: Union[str, Device] = "cpu"):
self.device = torch.device(device)
Expand Down Expand Up @@ -56,14 +84,14 @@ def cpu(self) -> FID:
def cuda(self) -> FID:
return self.to("cuda")

def compute(self, batch_size=128, num_workers=8, progress_bar: bool = True) -> float:
def compute(self, batch_size=128, num_workers=0, progress_bar: bool = False) -> float:
pred = torch.utils.data.DataLoader(self.pred_data, batch_size=batch_size, num_workers=num_workers)
true = torch.utils.data.DataLoader(self.true_data, batch_size=batch_size, num_workers=num_workers)

if progress_bar:
tqdm = import_tqdm_auto().tqdm
pred = tqdm(pred, desc="Forward predicted features")
true = tqdm(true, desc="Forward ground truth features")
pred = tqdm(total=len(pred), desc="Forward predicted features")
true = tqdm(total=len(true), desc="Forward ground truth features")

for batch in pred:
self.fid.update(batch.to(self.device), real=False)
Expand All @@ -72,6 +100,11 @@ def compute(self, batch_size=128, num_workers=8, progress_bar: bool = True) -> f

return self.fid.compute().item()

def reset(self):
self.pred_data = _FIDDataset()
self.true_data = _FIDDataset()
self.fid.reset()


@dataclass
class _FIDEntry:
Expand Down Expand Up @@ -104,6 +137,7 @@ def add_video(self, path: str):
raise RuntimeError("Cannot open video file.")
n_frames = int(cap.get(self.cv2.CAP_PROP_FRAME_COUNT))
self.length += n_frames
cap.release()

@staticmethod
def _preprocess_image(image: Tensor) -> Tensor:
Expand All @@ -130,6 +164,8 @@ def _read_video(self, path: str) -> torch.Tensor:
frame = self._preprocess_image(frame)
yield frame

cap.release()

def __iter__(self):
for entry in self.content:
if entry.type == "image":
Expand Down
2 changes: 1 addition & 1 deletion src/tensorneko/evaluation/iou.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def iou_2d(proposal: Union[Tensor, ndarray], target: Union[Tensor, ndarray]) ->

inner_x1 = torch.maximum(proposal_x1, target_x1)
inner_y1 = torch.maximum(proposal_y1, target_y1)
inner_x2 = torch.minimum(proposal_x2, target_y2)
inner_x2 = torch.minimum(proposal_x2, target_x2)
inner_y2 = torch.minimum(proposal_y2, target_y2)

area_proposal = (proposal_x2 - proposal_x1) * (proposal_y2 - proposal_y1)
Expand Down
58 changes: 58 additions & 0 deletions src/tensorneko/evaluation/secs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from numpy import ndarray
from torch import Tensor

from tensorneko_util.util import dispatch, Eval

from tensorneko_util.io import read


@Eval.later
def _secs_encoder():
from resemblyzer import VoiceEncoder
return VoiceEncoder()


@dispatch
def secs(pred: str, real: str) -> float:
from resemblyzer import VoiceEncoder, preprocess_wav
pred_audio = preprocess_wav(read.audio(pred).audio[0].numpy())
real_audio = preprocess_wav(read.audio(real).audio[0].numpy())
return _secs_compute(pred_audio, real_audio)


@dispatch
def secs(pred: Tensor, real: Tensor) -> float:
return secs(pred.numpy(), real.numpy())


@dispatch
def secs(pred: ndarray, real: ndarray) -> float:
from resemblyzer import VoiceEncoder, preprocess_wav
if len(pred.shape) == 2:
if pred.shape[0] == 1:
pred = pred.squeeze(0)
elif pred.shape[1] == 1:
pred = pred.squeeze(1)
else:
raise ValueError("The input audio must be mono.")

if len(real.shape) == 2:
if real.shape[0] == 1:
real = real.squeeze(0)
elif real.shape[1] == 1:
real = real.squeeze(1)
else:
raise ValueError("The input audio must be mono.")

pred_audio = preprocess_wav(pred)
real_audio = preprocess_wav(real)

return _secs_compute(pred_audio, real_audio)


def _secs_compute(pred_audio: ndarray, real_audio: ndarray) -> float:
encoder = _secs_encoder.value
real_embed = encoder.embed_utterance(real_audio)
pred_embed = encoder.embed_utterance(pred_audio)

return float((real_embed * pred_embed).sum())
Loading

0 comments on commit 0c81bb8

Please sign in to comment.