Merge branch 'release/0.3.5'

ControlNet · Nov 4, 2023 · 0c81bb8 · 0c81bb8
2 parents 3b41c28 + a18379d
commit 0c81bb8
Show file tree

Hide file tree

Showing 22 changed files with 361 additions and 13 deletions.
diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml
@@ -87,13 +87,18 @@ jobs:
       fail-fast: false
       matrix:
         python-version: ["3.8", "3.9", "3.10", "3.11"]
-        torch-version: ["1.9.*", "1.10.*", "1.11.*", "1.12.*", "1.13.*", "2.0.*"]
-        lightning-version: ["2.0.*"]
+        torch-version: ["1.9.*", "1.10.*", "1.11.*", "1.12.*", "1.13.*", "2.0.*", "2.1.*"]
+        lightning-version: ["2.0.*", "2.1.*"]
         exclude:
+          - python-version: "3.10"
+            torch-version: "1.8.*"
           - python-version: "3.10"
             torch-version: "1.9.*"
           - python-version: "3.10"
             torch-version: "1.10.*"
+
+          - python-version: "3.11"
+            torch-version: "1.8.*"
           - python-version: "3.11"
             torch-version: "1.9.*"
           - python-version: "3.11"

diff --git a/README.md b/README.md
@@ -12,7 +12,7 @@
 <div align="center">    
     <a href="https://www.python.org/"><img src="https://img.shields.io/pypi/pyversions/tensorneko?style=flat-square"></a>
     <a href="https://pytorch.org/"><img src="https://img.shields.io/badge/PyTorch-%3E%3D1.9.0-EE4C2C?style=flat-square&logo=pytorch"></a>
-    <a href="https://www.pytorchlightning.ai/"><img src="https://img.shields.io/badge/Lightning-2.0.*-792EE5?style=flat-square&logo=lightning"></a>
+    <a href="https://www.pytorchlightning.ai/"><img src="https://img.shields.io/badge/Lightning-2.0.*/2.1.*-792EE5?style=flat-square&logo=lightning"></a>
 </div>
 
 <div align="center">

diff --git a/requirements.txt b/requirements.txt
@@ -3,7 +3,7 @@ torchaudio >= 0.9.0
 torchvision >= 0.10.0
 torchmetrics >= 0.7.3
 tensorboard >= 2.0.0
-lightning == 2.0.*
+lightning >= 2.0, < 2.2
 pillow >= 8.1
 av >= 8.0.3
 pysoundfile >= 0.9.0; platform_system == "Windows"

diff --git a/requirements_test.txt b/requirements_test.txt
@@ -2,7 +2,7 @@ torch >= 1.9.0
 torchaudio >= 0.9.0
 torchvision >= 0.10.0
 torchmetrics >= 0.7.3, < 0.11.0
-lightning == 2.0.*
+lightning >= 2.0, < 2.2
 tensorboard >= 2.0.0
 pillow >= 8.1
 av >= 8.0.3

diff --git a/src/tensorneko/arch/__init__.py b/src/tensorneko/arch/__init__.py
@@ -2,10 +2,12 @@
 from .vqvae import VQVAE
 from .wgan import WGAN
 from .auto_encoder import AutoEncoder
+from .binary_classifier import BinaryClassifier
 
 __all__ = [
     "GAN",
     "VQVAE",
     "WGAN",
-    "AutoEncoder"
+    "AutoEncoder",
+    "BinaryClassifier",
 ]
diff --git a/src/tensorneko/arch/binary_classifier.py b/src/tensorneko/arch/binary_classifier.py
@@ -0,0 +1,54 @@
+from abc import ABC
+from typing import Optional, Union, Sequence, Dict
+
+from torch import Tensor
+from torch.nn import BCEWithLogitsLoss
+from torch.optim import Adam
+from torchmetrics import Accuracy, F1Score, AUROC
+
+from ..neko_model import NekoModel
+
+
+class BinaryClassifier(NekoModel, ABC):
+
+    def __init__(self, model=None, learning_rate: float = 1e-4, distributed: bool = False):
+        super().__init__()
+        self.save_hyperparameters()
+        self.model = model
+        self.learning_rate = learning_rate
+        self.distributed = distributed
+        self.loss_fn = BCEWithLogitsLoss()
+        self.acc_fn = Accuracy(task="binary")
+        self.f1_fn = F1Score(task="binary")
+        self.auc_fn = AUROC(task="binary")
+
+    @classmethod
+    def from_module(cls, model, learning_rate: float = 1e-4, distributed=False):
+        return cls(model, learning_rate, distributed)
+
+    def forward(self, x):
+        return self.model(x)
+
+    def step(self, batch: Optional[Union[Tensor, Sequence[Tensor]]]) -> Dict[str, Tensor]:
+        x, y = batch
+        y_hat = self(x).squeeze(1)
+        loss = self.loss_fn(y_hat, y)
+        prob = y_hat.sigmoid()
+        acc = self.acc_fn(prob, y)
+        f1 = self.f1_fn(prob, y)
+        auc = self.auc_fn(prob, y)
+        return {"loss": loss, "acc": acc, "f1": f1, "auc": auc}
+
+    def training_step(self, batch: Optional[Union[Tensor, Sequence[Tensor]]] = None, batch_idx: Optional[int] = None,
+        optimizer_idx: Optional[int] = None, hiddens: Optional[Tensor] = None
+    ) -> Dict[str, Tensor]:
+        return self.step(batch)
+
+    def validation_step(self, batch: Optional[Union[Tensor, Sequence[Tensor]]] = None, batch_idx: Optional[int] = None,
+        dataloader_idx: Optional[int] = None
+    ) -> Dict[str, Tensor]:
+        return self.step(batch)
+
+    def configure_optimizers(self):
+        optimizer = Adam(self.parameters(), lr=self.learning_rate)
+        return [optimizer]
diff --git a/src/tensorneko/dataset/__init__.py b/src/tensorneko/dataset/__init__.py
@@ -1,5 +1,11 @@
 from .round_robin_dataset import RoundRobinDataset
+from .nested_dataset import NestedDataset
+from .list_dataset import ListDataset
+from . import sampler
 
 __all__ = [
     "RoundRobinDataset",
+    "NestedDataset",
+    "ListDataset",
+    "sampler"
 ]
diff --git a/src/tensorneko/dataset/list_dataset.py b/src/tensorneko/dataset/list_dataset.py
@@ -0,0 +1,19 @@
+from typing import List
+
+from torch.utils.data.dataset import Dataset, T_co
+
+
+class ListDataset(Dataset[T_co]):
+    """
+    A dataset wrapping a list of data.
+    """
+
+    def __init__(self, data: List[T_co]):
+        super().__init__()
+        self.data = data
+
+    def __getitem__(self, index: int) -> T_co:
+        return self.data[index]
+
+    def __len__(self):
+        return len(self.data)
diff --git a/src/tensorneko/dataset/round_robin_dataset.py b/src/tensorneko/dataset/round_robin_dataset.py
@@ -4,7 +4,7 @@
 from torch.utils.data import Dataset
 from torch.utils.data.dataset import T_co
 
-from tensorneko.util import circular_pad
+from ..util import circular_pad
 
 
 class RoundRobinDataset(Dataset[T_co]):

diff --git a/src/tensorneko/dataset/sampler/__init__.py b/src/tensorneko/dataset/sampler/__init__.py
@@ -0,0 +1,5 @@
+from .sequential_iter_sampler import SequentialIterSampler
+
+__all__ = [
+    "SequentialIterSampler"
+]
diff --git a/src/tensorneko/dataset/sampler/sequential_iter_sampler.py b/src/tensorneko/dataset/sampler/sequential_iter_sampler.py
@@ -0,0 +1,26 @@
+from typing import Sized
+
+from torch.utils.data.sampler import Sampler, T_co
+
+
+class SequentialIterSampler(Sampler[T_co]):
+    """
+    Use to split the large scale data into small subsets for each epochs
+    For example, if the dataset size is 1M, and the num_samples = 1000, then each epoch will only use 1000 samples, and
+    the next epoch will use the next 1000 samples.
+    """
+
+    def __init__(self, data_source: Sized, num_samples: int):
+        super().__init__(data_source)
+        self.data_source = data_source
+        self.num_samples = num_samples
+        self.total_size = len(data_source)
+        self.current_position = 0
+
+    def __iter__(self):
+        yield from map(lambda x: x % self.total_size,
+            range(self.current_position, self.current_position + self.num_samples))
+        self.current_position = (self.current_position + self.num_samples) % self.total_size
+
+    def __len__(self):
+        return self.num_samples
diff --git a/src/tensorneko/evaluation/__init__.py b/src/tensorneko/evaluation/__init__.py
@@ -1,6 +1,7 @@
 from .iou import iou_1d, iou_2d
 from .psnr import psnr_video, psnr_image
 from .ssim import ssim_video, ssim_image
+from .secs import secs
 from .fid import FID
 
 __all__ = [
@@ -10,5 +11,6 @@
     "psnr_image",
     "ssim_video",
     "ssim_image",
+    "secs",
     "FID",
 ]
diff --git a/src/tensorneko/evaluation/fid.py b/src/tensorneko/evaluation/fid.py
@@ -22,6 +22,34 @@
 
 
 class FID:
+    """
+    Calculate Fréchet inception distance based on torchmetrics. Require library "torch-fidelity".
+
+    Args:
+        device (``str`` | :class:`~torch.device`, optional): Device to run the metric. Default: ``"cpu"``.
+
+    Example::
+
+        from tensorneko.evaluation import FID
+        fid = FID("cuda")
+        
+        # add predicted and real images
+        fid.add_pred_image("path/to/pred/image1.png")
+        fid.add_pred_image("path/to/pred/image2.png")
+        fid.add_true_image("path/to/true/image1.png")
+        fid.add_true_image("path/to/true/image2.png")
+
+        # add predicted and real videos
+        fid.add_pred_video("path/to/pred/video1.mp4")
+        fid.add_pred_video("path/to/pred/video2.mp4")
+        fid.add_true_video("path/to/true/video1.mp4")
+        fid.add_true_video("path/to/true/video2.mp4")
+
+        # compute FID
+        fid_score = fid.compute(batch_size=128, num_workers=8, progress_bar=True)
+        print(fid_score)
+
+    """
 
     def __init__(self, device: Union[str, Device] = "cpu"):
         self.device = torch.device(device)
@@ -56,14 +84,14 @@ def cpu(self) -> FID:
     def cuda(self) -> FID:
         return self.to("cuda")
 
-    def compute(self, batch_size=128, num_workers=8, progress_bar: bool = True) -> float:
+    def compute(self, batch_size=128, num_workers=0, progress_bar: bool = False) -> float:
         pred = torch.utils.data.DataLoader(self.pred_data, batch_size=batch_size, num_workers=num_workers)
         true = torch.utils.data.DataLoader(self.true_data, batch_size=batch_size, num_workers=num_workers)
 
         if progress_bar:
             tqdm = import_tqdm_auto().tqdm
-            pred = tqdm(pred, desc="Forward predicted features")
-            true = tqdm(true, desc="Forward ground truth features")
+            pred = tqdm(total=len(pred), desc="Forward predicted features")
+            true = tqdm(total=len(true), desc="Forward ground truth features")
 
         for batch in pred:
             self.fid.update(batch.to(self.device), real=False)
@@ -72,6 +100,11 @@ def compute(self, batch_size=128, num_workers=8, progress_bar: bool = True) -> f
 
         return self.fid.compute().item()
 
+    def reset(self):
+        self.pred_data = _FIDDataset()
+        self.true_data = _FIDDataset()
+        self.fid.reset()
+
 
 @dataclass
 class _FIDEntry:
@@ -104,6 +137,7 @@ def add_video(self, path: str):
             raise RuntimeError("Cannot open video file.")
         n_frames = int(cap.get(self.cv2.CAP_PROP_FRAME_COUNT))
         self.length += n_frames
+        cap.release()
 
     @staticmethod
     def _preprocess_image(image: Tensor) -> Tensor:
@@ -130,6 +164,8 @@ def _read_video(self, path: str) -> torch.Tensor:
             frame = self._preprocess_image(frame)
             yield frame
 
+        cap.release()
+
     def __iter__(self):
         for entry in self.content:
             if entry.type == "image":

diff --git a/src/tensorneko/evaluation/iou.py b/src/tensorneko/evaluation/iou.py
@@ -70,7 +70,7 @@ def iou_2d(proposal: Union[Tensor, ndarray], target: Union[Tensor, ndarray]) ->
 
     inner_x1 = torch.maximum(proposal_x1, target_x1)
     inner_y1 = torch.maximum(proposal_y1, target_y1)
-    inner_x2 = torch.minimum(proposal_x2, target_y2)
+    inner_x2 = torch.minimum(proposal_x2, target_x2)
     inner_y2 = torch.minimum(proposal_y2, target_y2)
 
     area_proposal = (proposal_x2 - proposal_x1) * (proposal_y2 - proposal_y1)

diff --git a/src/tensorneko/evaluation/secs.py b/src/tensorneko/evaluation/secs.py
@@ -0,0 +1,58 @@
+from numpy import ndarray
+from torch import Tensor
+
+from tensorneko_util.util import dispatch, Eval
+
+from tensorneko_util.io import read
+
+
+@Eval.later
+def _secs_encoder():
+    from resemblyzer import VoiceEncoder
+    return VoiceEncoder()
+
+
+@dispatch
+def secs(pred: str, real: str) -> float:
+    from resemblyzer import VoiceEncoder, preprocess_wav
+    pred_audio = preprocess_wav(read.audio(pred).audio[0].numpy())
+    real_audio = preprocess_wav(read.audio(real).audio[0].numpy())
+    return _secs_compute(pred_audio, real_audio)
+
+
+@dispatch
+def secs(pred: Tensor, real: Tensor) -> float:
+    return secs(pred.numpy(), real.numpy())
+
+
+@dispatch
+def secs(pred: ndarray, real: ndarray) -> float:
+    from resemblyzer import VoiceEncoder, preprocess_wav
+    if len(pred.shape) == 2:
+        if pred.shape[0] == 1:
+            pred = pred.squeeze(0)
+        elif pred.shape[1] == 1:
+            pred = pred.squeeze(1)
+        else:
+            raise ValueError("The input audio must be mono.")
+
+    if len(real.shape) == 2:
+        if real.shape[0] == 1:
+            real = real.squeeze(0)
+        elif real.shape[1] == 1:
+            real = real.squeeze(1)
+        else:
+            raise ValueError("The input audio must be mono.")
+
+    pred_audio = preprocess_wav(pred)
+    real_audio = preprocess_wav(real)
+
+    return _secs_compute(pred_audio, real_audio)
+
+
+def _secs_compute(pred_audio: ndarray, real_audio: ndarray) -> float:
+    encoder = _secs_encoder.value
+    real_embed = encoder.embed_utterance(real_audio)
+    pred_embed = encoder.embed_utterance(pred_audio)
+
+    return float((real_embed * pred_embed).sum())