quic · corneliusboehm · May 17, 2021 · May 17, 2021 · May 19, 2021 · May 19, 2021
diff --git a/sense/backbone_networks/efficientnet.py b/sense/backbone_networks/efficientnet.py
@@ -7,9 +7,9 @@
 
 class StridedInflatedEfficientNet(StridedInflatedMobileNetV2):
 
-    def __init__(self):
+    def __init__(self, **kwargs):
 
-        super().__init__()
+        super().__init__(**kwargs)
 
         self.cnn = nn.Sequential(
             ConvReLU(3, 32, 3, stride=2),

diff --git a/sense/backbone_networks/mobilenet.py b/sense/backbone_networks/mobilenet.py
@@ -150,13 +150,10 @@ def realign(self, input_, output_):  # noqa: D102
 
 class StridedInflatedMobileNetV2(RealtimeNeuralNet):
 
-    expected_frame_size = (256, 256)
-    fps = 16
-    step_size = 4
     feature_dim = 1280
 
-    def __init__(self):
-        super().__init__()
+    def __init__(self, fps=16):
+        super().__init__(step_size=4, fps=fps, expected_frame_size=(256, 256))
 
         self.cnn = nn.Sequential(
             ConvReLU(3, 32, 3, stride=2),

diff --git a/sense/downstream_tasks/nn_utils.py b/sense/downstream_tasks/nn_utils.py
@@ -7,43 +7,36 @@ class RealtimeNeuralNet(nn.Module):
     """
     RealtimeNeuralNet is the abstract class for all neural networks used in InferenceEngine.
 
-    Subclasses should overwrite the methods in RealtimeNeuralNet.
+    Subclasses should overwrite the preprocess method.
     """
-    def __init__(self):
+    def __init__(self, step_size: int, fps: int, expected_frame_size: Tuple[int, int]):
+        """
+        :param step_size:
+            The temporal step size of the neural network, i.e. how many frames should be consumed before outputting
+            the next prediction.
+        :param fps:
+            The input frame per second rate of the neural network.
+        :param expected_frame_size:
+            The expected frame size of the neural network.
+        """
         super().__init__()
+        self.step_size = step_size
+        self.fps = fps
+        self.expected_frame_size = expected_frame_size
 
     def preprocess(self, clip: np.ndarray):
         """
         Pre-process a clip from a video source.
         """
         raise NotImplementedError
 
-    @property
-    def step_size(self) -> int:
-        """
-        Return the step size of the neural network.
-        """
-        raise NotImplementedError
-
-    @property
-    def fps(self) -> int:
-        """
-        Return the frame per second rate of the neural network.
-        """
-        raise NotImplementedError
-
-    @property
-    def expected_frame_size(self) -> Tuple[int, int]:
-        """
-        Return the expected frame size of the neural network.
-        """
-        raise NotImplementedError
-
 
 class Pipe(RealtimeNeuralNet):
 
     def __init__(self, feature_extractor, feature_converter):
-        super().__init__()
+        super().__init__(step_size=feature_extractor.step_size,
+                         fps=feature_extractor.fps,
+                         expected_frame_size=self.feature_extractor.expected_frame_size)
         self.feature_extractor = feature_extractor
         self.feature_converter = feature_converter
 
@@ -53,18 +46,6 @@ def forward(self, input_tensor):
             return [convert(feature) for convert in self.feature_converter]
         return self.feature_converter(feature)
 
-    @property
-    def expected_frame_size(self) -> Tuple[int, int]:
-        return self.feature_extractor.expected_frame_size
-
-    @property
-    def fps(self) -> int:
-        return self.feature_extractor.fps
-
-    @property
-    def step_size(self) -> int:
-        return self.feature_extractor.step_size
-
     def preprocess(self, clip: np.ndarray):
         return self.feature_extractor.preprocess(clip)
 

diff --git a/sense/loading.py b/sense/loading.py
@@ -150,7 +150,7 @@ def get_relevant_weights(
     raise Exception(msg)
 
 
-def load_backbone_model_from_config(checkpoint_path: str) -> Tuple[ModelConfig, dict]:
+def load_backbone_model_from_config(checkpoint_path: str) -> Tuple[ModelConfig, dict, dict]:
     """
     Load the backbone model that was used in training for the given model checkpoint as indicated in the 'config.json'
     file. If there is no config file, StridedInflatedEfficientNet-pro will be used per default.
@@ -159,12 +159,16 @@ def load_backbone_model_from_config(checkpoint_path: str) -> Tuple[ModelConfig,
     if os.path.exists(config_file):
         with open(config_file, 'r') as cf:
             config = json.load(cf)
-            backbone_model_config = ModelConfig(config['backbone_name'], config['backbone_version'], [])
+        backbone_model_config = ModelConfig(config['backbone_name'], config['backbone_version'], [])
+        model_kwargs = {
+            'fps': config['model_fps'],
+        }
     else:
         # Assume StridedInflatedEfficientNet-pro was used
         backbone_model_config = ModelConfig('StridedInflatedEfficientNet', 'pro', [])
+        model_kwargs = {}
 
-    return backbone_model_config, backbone_model_config.load_weights()['backbone']
+    return backbone_model_config, backbone_model_config.load_weights()['backbone'], model_kwargs
 
 
 def prepend_resources_path(checkpoint_path):
@@ -233,8 +237,10 @@ def update_backbone_weights(backbone_weights: dict, checkpoint: dict):
         backbone_weights[key] = checkpoint.pop(key)
 
 
-def build_backbone_network(selected_config: ModelConfig, weights: dict,
-                           weights_finetuned: dict = None):
+def build_backbone_network(selected_config: ModelConfig,
+                           weights: dict,
+                           weights_finetuned: dict = None,
+                           **model_kwargs):
     """
     Creates a backbone network and load provided weights, unless Travis is used.
 
@@ -244,10 +250,12 @@ def build_backbone_network(selected_config: ModelConfig, weights: dict,
         A model state dict.
     :param  weights_finetuned:
         A state dict that contains the finetuned weights of a subset of the model layers.
+    :param model_kwargs:
+        Additional keyword arguments for the model
     :return:
         A backbone network, with pre-trained weights.
     """
-    backbone_network = getattr(backbone_networks, selected_config.model_name)()
+    backbone_network = getattr(backbone_networks, selected_config.model_name)(**model_kwargs)
     if not running_on_travis():
         if weights_finetuned:
             update_backbone_weights(weights, weights_finetuned)

diff --git a/tools/conversion/weights_loader.py b/tools/conversion/weights_loader.py
@@ -6,7 +6,7 @@
 
 def load_custom_classifier_weights(path_in):
     # Load backbone network according to config file
-    backbone_model_config, backbone_weights = load_backbone_model_from_config(path_in)
+    backbone_model_config, backbone_weights, _ = load_backbone_model_from_config(path_in)
 
     # Load custom classifier checkpoint
     weights_file = os.path.join(path_in, 'best_classifier.checkpoint')

diff --git a/tools/run_custom_classifier.py b/tools/run_custom_classifier.py
@@ -36,7 +36,7 @@ def run_custom_classifier(custom_classifier, camera_id=0, path_in=None, path_out
                           display_fn=None, stop_event=None):
 
     # Load backbone network according to config file
-    backbone_model_config, backbone_weights = load_backbone_model_from_config(custom_classifier)
+    backbone_model_config, backbone_weights, model_kwargs = load_backbone_model_from_config(custom_classifier)
 
     try:
         # Load custom classifier
@@ -51,8 +51,10 @@ def run_custom_classifier(custom_classifier, camera_id=0, path_in=None, path_out
         return None
 
     # Create backbone network
-    backbone_network = build_backbone_network(backbone_model_config, backbone_weights,
-                                              weights_finetuned=checkpoint_classifier)
+    backbone_network = build_backbone_network(backbone_model_config,
+                                              backbone_weights,
+                                              weights_finetuned=checkpoint_classifier,
+                                              **model_kwargs)
 
     with open(os.path.join(custom_classifier, 'label2int.json')) as file:
         class2int = json.load(file)

diff --git a/tools/sense_studio/project_utils.py b/tools/sense_studio/project_utils.py
@@ -53,6 +53,10 @@ def _backwards_compatibility_update(path, config):
         }
         updated = True
 
+    if 'model_fps' not in config:
+        config['model_fps'] = 16
+        updated = True
+
     if 'tags' not in config:
         # Collect class-wise tags
         old_classes = config['classes']

diff --git a/tools/sense_studio/static/main.js b/tools/sense_studio/static/main.js
@@ -352,6 +352,21 @@ async function toggleAssistedTagging(path) {
     window.location.reload();
 }
 
+
+function changeFps(element, originalModelFps) {
+    UIkit.modal.confirm('Are you sure you want to change the model fps? ' +
+                        'Your existing annotations will be converted to the new frame rate, ' +
+                        'but some information might get lost or be inaccurate. ' +
+                        'Please make sure to check the annotations after this operation.').then(function() {
+        // Confirmed -> TODO: Change model fps and convert annotations
+        console.log('Confirmed.')
+    }, function () {
+        // Cancelled -> Reset select field
+        element.value = originalModelFps;
+    });
+}
+
+
 async function addSelectedTagToClass(classIdx, className, path)  {
     let selectedTagsList = document.getElementById(`selectedTagsList${classIdx}`);
     let selectTagDropdown = document.getElementById(`selectTag${classIdx}`);

diff --git a/tools/sense_studio/templates/project_details.html b/tools/sense_studio/templates/project_details.html
@@ -40,6 +40,22 @@ <h3 class="uk-card-title">Settings</h3>
                         Use GPU
                     </label>
                 </div>
+
+                <form class="uk-margin-small-top uk-grid-match uk-grid-small" autocomplete="off"
+                     uk-tooltip="Input frame rate for the model. Output frequency will be 4 times smaller. Also affects resolution of temporal annotations."
+                     uk-grid>
+                    <div class="uk-width-auto" >
+                        <select class="uk-select" onchange="changeFps(this, {{ config.model_fps }});">
+                            <option value="8" {{ 'selected' if config.model_fps == 8 }}>8 fps</option>
+                            <option value="16" {{ 'selected' if config.model_fps == 16 }}>16 fps</option>
+                            <option value="30" {{ 'selected' if config.model_fps == 30 }}>30 fps</option>
+                        </select>
+                    </div>
+
+                    <div class="uk-width-expand uk-flex uk-flex-middle line-height-normal">
+                        Model Input Frame Rate
+                    </div>
+                </form>
             </div>
         </div>
     </div>
@@ -246,14 +262,14 @@ <h4>Tags</h4>
                         <select class="uk-select" id="selectTag{{ class_idx }}"
                                 onchange="addSelectedTagToClass('{{ class_idx }}', '{{ class }}', '{{ path }}');">
                             {% if tags %}
-                                <option value="">Select tags...</option>
+                                <option value="" hidden>Select tags...</option>
                                 {% for tag_index, tag_name in tags.items()|sort(attribute='1') %}
                                     {% if tag_index not in selected_tags %}
                                         <option value="{{ tag_index }}">{{ tag_name }}</option>
                                     {% endif %}
                                 {% endfor %}
                             {% else %}
-                                <option value="">No tags available</option>
+                                <option value="" hidden>No tags available</option>
                             {% endif %}
                         </select>
                         <ul id="selectedTagsList{{ class_idx }}" class="uk-list uk-margin-top">

diff --git a/tools/sense_studio/templates/testing.html b/tools/sense_studio/templates/testing.html
@@ -27,7 +27,7 @@ <h1 class="uk-heading-medium uk-margin-large-top uk-margin-remove-bottom">Testin
                                 <option>{{ classifier }}</option>
                             {% endfor %}
                         {% else %}
-                            <option value="">No classifiers to select from</option>
+                            <option value="" hidden>No classifiers to select from</option>
                         {% endif %}
                     </select>
                 </div>

diff --git a/tools/sense_studio/templates/training.html b/tools/sense_studio/templates/training.html
@@ -28,7 +28,7 @@ <h1 class="uk-heading-medium uk-margin-large-top uk-margin-remove-bottom">Traini
                                 <option>{{ model }}</option>
                             {% endfor %}
                         {% else %}
-                            <option value="">No models available</option>
+                            <option value="" hidden>No models available</option>
                         {% endif %}
                     </select>
                     {% if not models %}

diff --git a/tools/sense_studio/training.py b/tools/sense_studio/training.py
@@ -61,6 +61,7 @@ def start_training():
         'path_out': path_out,
         'model_version': model_version,
         'model_name': model_name,
+        'model_fps': config['model_fps'],
         'epochs': int(epochs),
         'use_gpu': config['use_gpu'],
         'temporal_training': config['temporal'],

diff --git a/tools/sense_studio/utils.py b/tools/sense_studio/utils.py
@@ -26,7 +26,8 @@ def load_feature_extractor(project_path):
     model_config, weights = get_relevant_weights(SUPPORTED_MODEL_CONFIGURATIONS)
 
     # Setup backbone network
-    backbone_network = build_backbone_network(model_config, weights['backbone'])
+    model_fps = get_project_setting(project_path, 'model_fps')
+    backbone_network = build_backbone_network(model_config, weights['backbone'], fps=model_fps)
 
     # Create Inference Engine
     use_gpu = get_project_setting(project_path, 'use_gpu')

diff --git a/tools/train_classifier.py b/tools/train_classifier.py
@@ -6,6 +6,7 @@
   train_classifier.py  --path_in=PATH
                        [--model_name=NAME]
                        [--model_version=VERSION]
+                       [--model_fps=NUM]
                        [--num_layers_to_finetune=NUM]
                        [--epochs=NUM]
                        [--use_gpu]
@@ -20,6 +21,7 @@
                                  Important: this folder should follow the structure described in the README.
   --model_name=NAME              Name of the backbone model to be used.
   --model_version=VERSION        Version of the backbone model to be used.
+  --model_fps=NUM                Input frame per second rate for the model [default: 16].
   --num_layers_to_finetune=NUM   Number of layers to finetune in addition to the final layer [default: 9].
   --epochs=NUM                   Number of epochs to run [default: 80].
   --path_out=PATH                Where to save results. Will default to `path_in` if not provided.
@@ -60,7 +62,7 @@
 ]
 
 
-def train_model(path_in, path_out, model_name, model_version, num_layers_to_finetune, epochs,
+def train_model(path_in, path_out, model_name, model_version, model_fps=16, num_layers_to_finetune=9, epochs=80,
                 use_gpu=True, overwrite=True, temporal_training=None, resume=False, log_fn=print,
                 confmat_event=None):
     os.makedirs(path_out, exist_ok=True)
@@ -97,8 +99,10 @@ def train_model(path_in, path_out, model_name, model_version, num_layers_to_fine
         checkpoint_classifier = None
 
     # Load backbone network
-    backbone_network = build_backbone_network(selected_config, backbone_weights,
-                                              weights_finetuned=checkpoint_classifier)
+    backbone_network = build_backbone_network(selected_config,
+                                              backbone_weights,
+                                              weights_finetuned=checkpoint_classifier,
+                                              fps=model_fps)
 
     # Get the required temporal dimension of feature tensors in order to
     # finetune the provided number of layers
@@ -218,6 +222,7 @@ def train_model(path_in, path_out, model_name, model_version, num_layers_to_fine
     config = {
         'backbone_name': selected_config.model_name,
         'backbone_version': selected_config.version,
+        'model_fps': model_fps,
         'num_layers_to_finetune': num_layers_to_finetune,
         'classifier': str(gesture_classifier),
         'temporal_training': temporal_training,
@@ -259,6 +264,7 @@ def train_model(path_in, path_out, model_name, model_version, num_layers_to_fine
     _model_version = args['--model_version'] or None
     _num_layers_to_finetune = int(args['--num_layers_to_finetune'])
     _epochs = int(args['--epochs'])
+    _model_fps = int(args['--model_fps'])
     _temporal_training = args['--temporal_training']
     _resume = args['--resume']
     _overwrite = args['--overwrite']
@@ -268,6 +274,7 @@ def train_model(path_in, path_out, model_name, model_version, num_layers_to_fine
         path_out=_path_out,
         model_name=_model_name,
         model_version=_model_version,
+        model_fps=_model_fps,
         num_layers_to_finetune=_num_layers_to_finetune,
         epochs=_epochs,
         use_gpu=_use_gpu,