Add model and dataset analytics to PyTorchVideo

Summary: For all PTV models, datasets and transforms, adding some analytics to aggregate user metrics Reviewed By: tullie Differential Revision: D28388721 fbshipit-source-id: 9bc45b5a7bda0e3f9d79ad71c79f9eb44d4f3a8f
facebookresearch · May 17, 2021 · 124e132 · 124e132
1 parent 6539ed6
commit 124e132
Show file tree

Hide file tree

Showing 15 changed files with 162 additions and 4 deletions.
diff --git a/pytorchvideo/data/charades.py b/pytorchvideo/data/charades.py
@@ -59,6 +59,9 @@ def __init__(
 
             frames_per_clip (Optional[int]): The number of frames per clip to sample.
         """
+
+        torch._C._log_api_usage_once("PYTORCHVIDEO.dataset.Charades.__init__")
+
         self._transform = transform
         self._clip_sampler = clip_sampler
         (

diff --git a/pytorchvideo/data/epic_kitchen/epic_kitchen_dataset.py b/pytorchvideo/data/epic_kitchen/epic_kitchen_dataset.py
@@ -130,6 +130,9 @@ def __init__(
                 multiple threads.
 
         """
+
+        torch._C._log_api_usage_once("PYTORCHVIDEO.dataset.EpicKitchenDataset.__init__")
+
         assert video_info_file_path
         assert actions_file_path
         assert video_data_manifest_file_path

diff --git a/pytorchvideo/data/hmdb51.py b/pytorchvideo/data/hmdb51.py
@@ -7,6 +7,7 @@
 import pathlib
 from typing import Any, Callable, List, Optional, Tuple, Type, Union
 
+import torch
 import torch.utils.data
 from iopath.common.file_io import g_pathmgr
 
@@ -211,6 +212,9 @@ def Hmdb51(
 
         decoder (str): Defines which backend should be used to decode videos.
     """
+
+    torch._C._log_api_usage_once("PYTORCHVIDEO.dataset.Hmdb51")
+
     labeled_video_paths = Hmdb51LabeledVideoPaths.from_dir(
         data_path, split_id=split_id, split_type=split_type
     )

diff --git a/pytorchvideo/data/json_dataset.py b/pytorchvideo/data/json_dataset.py
@@ -62,6 +62,8 @@ def video_only_dataset(
             frame videos.
     """
 
+    torch._C._log_api_usage_once("PYTORCHVIDEO.dataset.json_dataset.video_only_dataset")
+
     if g_pathmgr.isfile(data_path):
         try:
             with g_pathmgr.open(data_path, "r") as f:

diff --git a/pytorchvideo/data/kinetics.py b/pytorchvideo/data/kinetics.py
@@ -1,10 +1,70 @@
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 
-from .labeled_video_dataset import labeled_video_dataset
+from typing import Any, Callable, Dict, Optional, Type
+
+import torch
+from pytorchvideo.data.clip_sampling import ClipSampler
+
+from .labeled_video_dataset import labeled_video_dataset, LabeledVideoDataset
 
 
 """
     Action recognition video dataset for Kinetics-{400,600,700}
     <https://deepmind.com/research/open-source/open-source-datasets/kinetics/>
 """
-Kinetics = labeled_video_dataset
+
+
+def Kinetics(
+    data_path: str,
+    clip_sampler: ClipSampler,
+    video_sampler: Type[torch.utils.data.Sampler] = torch.utils.data.RandomSampler,
+    transform: Optional[Callable[[Dict[str, Any]], Dict[str, Any]]] = None,
+    video_path_prefix: str = "",
+    decode_audio: bool = True,
+    decoder: str = "pyav",
+) -> LabeledVideoDataset:
+    """
+    A helper function to create ``LabeledVideoDataset`` object for the Kinetics dataset.
+
+    Args:
+        data_path (str): Path to the data. The path type defines how the data
+            should be read:
+
+            * For a file path, the file is read and each line is parsed into a
+              video path and label.
+            * For a directory, the directory structure defines the classes
+              (i.e. each subdirectory is a class).
+
+        clip_sampler (ClipSampler): Defines how clips should be sampled from each
+                video. See the clip sampling documentation for more information.
+
+        video_sampler (Type[torch.utils.data.Sampler]): Sampler for the internal
+                video container. This defines the order videos are decoded and,
+                if necessary, the distributed split.
+
+        transform (Callable): This callable is evaluated on the clip output before
+                the clip is returned. It can be used for user defined preprocessing and
+                augmentations to the clips. See the ``LabeledVideoDataset`` class for clip
+                output format.
+
+        video_path_prefix (str): Path to root directory with the videos that are
+                loaded in ``LabeledVideoDataset``. All the video paths before loading
+                are prefixed with this path.
+
+        decode_audio (bool): If True, also decode audio from video.
+
+        decoder (str): Defines what type of decoder used to decode a video.
+
+    """
+
+    torch._C._log_api_usage_once("PYTORCHVIDEO.dataset.Kinetics")
+
+    return labeled_video_dataset(
+        data_path,
+        clip_sampler,
+        video_sampler,
+        transform,
+        video_path_prefix,
+        decode_audio,
+        decoder,
+    )
diff --git a/pytorchvideo/data/ssv2.py b/pytorchvideo/data/ssv2.py
@@ -70,6 +70,9 @@ def __init__(
 
             rand_sample_frames (bool): If True, randomly sampling frames for each clip.
         """
+
+        torch._C._log_api_usage_once("PYTORCHVIDEO.dataset.SSv2.__init__")
+
         self._transform = transform
         self._clip_sampler = clip_sampler
         self._path_to_videos, self._labels = _read_video_paths_and_labels(

diff --git a/pytorchvideo/data/ucf101.py b/pytorchvideo/data/ucf101.py
@@ -1,10 +1,70 @@
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 
-from .labeled_video_dataset import labeled_video_dataset
+from typing import Any, Callable, Dict, Optional, Type
+
+import torch
+from pytorchvideo.data.clip_sampling import ClipSampler
+
+from .labeled_video_dataset import labeled_video_dataset, LabeledVideoDataset
 
 
 """
     Action recognition video dataset for UCF101
     <https://www.crcv.ucf.edu/data/UCF101.php>
 """
-Ucf101 = labeled_video_dataset
+
+
+def Ucf101(
+    data_path: str,
+    clip_sampler: ClipSampler,
+    video_sampler: Type[torch.utils.data.Sampler] = torch.utils.data.RandomSampler,
+    transform: Optional[Callable[[Dict[str, Any]], Dict[str, Any]]] = None,
+    video_path_prefix: str = "",
+    decode_audio: bool = True,
+    decoder: str = "pyav",
+) -> LabeledVideoDataset:
+    """
+    A helper function to create ``LabeledVideoDataset`` object for the Ucf101 dataset.
+
+    Args:
+        data_path (str): Path to the data. The path type defines how the data
+            should be read:
+
+            * For a file path, the file is read and each line is parsed into a
+              video path and label.
+            * For a directory, the directory structure defines the classes
+              (i.e. each subdirectory is a class).
+
+        clip_sampler (ClipSampler): Defines how clips should be sampled from each
+                video. See the clip sampling documentation for more information.
+
+        video_sampler (Type[torch.utils.data.Sampler]): Sampler for the internal
+                video container. This defines the order videos are decoded and,
+                if necessary, the distributed split.
+
+        transform (Callable): This callable is evaluated on the clip output before
+                the clip is returned. It can be used for user defined preprocessing and
+                augmentations to the clips. See the ``LabeledVideoDataset`` class for clip
+                output format.
+
+        video_path_prefix (str): Path to root directory with the videos that are
+                loaded in ``LabeledVideoDataset``. All the video paths before loading
+                are prefixed with this path.
+
+        decode_audio (bool): If True, also decode audio from video.
+
+        decoder (str): Defines what type of decoder used to decode a video.
+
+    """
+
+    torch._C._log_api_usage_once("PYTORCHVIDEO.dataset.Ucf101")
+
+    return labeled_video_dataset(
+        data_path,
+        clip_sampler,
+        video_sampler,
+        transform,
+        video_path_prefix,
+        decode_audio,
+        decoder,
+    )
diff --git a/pytorchvideo/models/byol.py b/pytorchvideo/models/byol.py
@@ -41,6 +41,9 @@ def __init__(
                 synchronized batchnorm.
         """
         super().__init__()
+
+        torch._C._log_api_usage_once("PYTORCHVIDEO.model.BYOL.__init__")
+
         self.mmt = mmt
         self.feature_dim = feature_dim
         if projector is not None:

diff --git a/pytorchvideo/models/csn.py b/pytorchvideo/models/csn.py
@@ -2,6 +2,7 @@
 
 from typing import Callable, Tuple
 
+import torch
 import torch.nn as nn
 from pytorchvideo.models.head import create_res_basic_head
 from pytorchvideo.models.resnet import Net, create_bottleneck_block, create_res_stage
@@ -110,6 +111,9 @@ def create_csn(
     Returns:
         (nn.Module): the csn model.
     """
+
+    torch._C._log_api_usage_once("PYTORCHVIDEO.model.create_csn")
+
     # Number of blocks for different stages given the model depth.
     _MODEL_STAGE_DEPTH = {50: (3, 4, 6, 3), 101: (3, 4, 23, 3), 152: (3, 8, 36, 3)}
 

diff --git a/pytorchvideo/models/head.py b/pytorchvideo/models/head.py
@@ -72,6 +72,7 @@ def create_res_basic_head(
         output_with_global_average (bool): if True, perform global averaging on temporal
             and spatial dimensions and reshape output to batch_size x out_features.
     """
+
     if activation is None:
         activation_model = None
     elif activation == nn.Softmax:

diff --git a/pytorchvideo/models/r2plus1d.py b/pytorchvideo/models/r2plus1d.py
@@ -2,6 +2,7 @@
 from functools import partial
 from typing import Callable, Tuple
 
+import torch
 import torch.nn as nn
 from pytorchvideo.layers.convolutions import create_conv_2plus1d
 from pytorchvideo.models.head import create_res_basic_head
@@ -235,6 +236,9 @@ def create_r2plus1d(
     Returns:
         (nn.Module): basic resnet.
     """
+
+    torch._C._log_api_usage_once("PYTORCHVIDEO.model.create_r2plus1d")
+
     # Number of blocks for different stages given the model depth.
     _MODEL_STAGE_DEPTH = {50: (3, 4, 6, 3), 101: (3, 4, 23, 3), 152: (3, 8, 36, 3)}
 

diff --git a/pytorchvideo/models/resnet.py b/pytorchvideo/models/resnet.py
@@ -705,6 +705,9 @@ def create_resnet(
     Returns:
         (nn.Module): basic resnet.
     """
+
+    torch._C._log_api_usage_once("PYTORCHVIDEO.model.create_resnet")
+
     # Number of blocks for different stages given the model depth.
     _MODEL_STAGE_DEPTH = {50: (3, 4, 6, 3), 101: (3, 4, 23, 3), 152: (3, 8, 36, 3)}
 

diff --git a/pytorchvideo/models/simclr.py b/pytorchvideo/models/simclr.py
@@ -24,6 +24,9 @@ def __init__(
         temperature: float = 0.07,
     ) -> None:
         super().__init__()
+
+        torch._C._log_api_usage_once("PYTORCHVIDEO.model.SimCLR.__init__")
+
         set_attributes(self, locals())
 
     def forward(self, x1: torch.Tensor, x2: torch.Tensor) -> torch.Tensor:

diff --git a/pytorchvideo/models/slowfast.py b/pytorchvideo/models/slowfast.py
@@ -166,6 +166,8 @@ def create_slowfast(
         (nn.Module): SlowFast model.
     """
 
+    torch._C._log_api_usage_once("PYTORCHVIDEO.model.create_slowfast")
+
     # Number of blocks for different stages given the model depth.
     _num_pathway = len(input_channels)
     _MODEL_STAGE_DEPTH = {

diff --git a/pytorchvideo/models/x3d.py b/pytorchvideo/models/x3d.py
@@ -642,6 +642,9 @@ def create_x3d(
     Returns:
         (nn.Module): the X3D network.
     """
+
+    torch._C._log_api_usage_once("PYTORCHVIDEO.model.create_x3d")
+
     blocks = []
     # Create stem for X3D.
     stem_dim_out = round_width(stem_dim_in, width_factor)