diff --git a/README.md b/README.md index acef89c..674ac69 100755 --- a/README.md +++ b/README.md @@ -86,16 +86,14 @@ A few sample predictions on VOC 2012 dataset made with a trained model are shown ``` Dataset used: VOC Pascal 2012 Confidence threshold used: 0.5 -Recall: 0.441 -Precision: 0.764 +Recall: 0.458 +Precision: 0.723 +F1 score: 0.561 ``` #### Good prediction ![alt text](./images/good_prediction.png) -#### Typical prediction - many objects are correctly detected, but a few are off -![alt text](./images/typical_prediction.png) - #### Bad prediction ![alt text](./images/bad_prediction.png) diff --git a/config.yaml b/config.yaml index 0a7c1ae..e000840 100755 --- a/config.yaml +++ b/config.yaml @@ -90,3 +90,13 @@ train: model_checkpoint_path: "/data/voc_ssd_models/current_model.weights.h5" best_model_checkpoint_path: "/data/voc_ssd_models/current_model/" + +post_processing: + + non_maximum_suppression: + method: "greedy" + iou_threshold: 0.3 + + # method: "soft" + # score_threshold: 0.7 + # sigma: 0.5 diff --git a/images/bad_prediction.png b/images/bad_prediction.png index 5fb840b..3f6e91a 100644 Binary files a/images/bad_prediction.png and b/images/bad_prediction.png differ diff --git a/images/good_prediction.png b/images/good_prediction.png index 07b6207..ddfd3b1 100644 Binary files a/images/good_prediction.png and b/images/good_prediction.png differ diff --git a/images/typical_prediction.png b/images/typical_prediction.png deleted file mode 100644 index 288b96d..0000000 Binary files a/images/typical_prediction.png and /dev/null differ diff --git a/net/analysis.py b/net/analysis.py index 1510e4a..7fc2d8b 100644 --- a/net/analysis.py +++ b/net/analysis.py @@ -7,6 +7,7 @@ import queue import threading +import box import matplotlib.pyplot as plt import numpy as np import seaborn @@ -92,7 +93,9 @@ class MatchingDataComputer: Utility for computing matched and unmatched annotations and predictions at different thresholds """ - def __init__(self, samples_loader, model, default_boxes_factory, thresholds, categories): + def __init__( + self, samples_loader, model, default_boxes_factory, + confidence_thresholds, categories, post_processing_config: box.Box): """ Constructor :param samples_loader: net.data.VOCSamplesDataLoader instance @@ -101,13 +104,15 @@ def __init__(self, samples_loader, model, default_boxes_factory, thresholds, cat :param thresholds: list of floats, for each threshold, only predictions with confidence above it will be used to compute matching data :param categories: list of strings, labels for categories + :param post_processing_config: box.Box with post processing configuration options """ self.samples_loader = samples_loader self.model = model self.default_boxes_factory = default_boxes_factory - self.thresholds = thresholds + self.confidence_thresholds = confidence_thresholds self.categories = categories + self.post_processing_config = post_processing_config def get_thresholds_matched_data_map(self): """ @@ -119,7 +124,9 @@ def get_thresholds_matched_data_map(self): iterator = iter(self.samples_loader) - thresholds_matched_data_map = {threshold: collections.defaultdict(list) for threshold in self.thresholds} + thresholds_matched_data_map = { + threshold: collections.defaultdict(list) for threshold in self.confidence_thresholds + } samples_count = len(self.samples_loader) samples_queue = queue.Queue(maxsize=250) @@ -163,12 +170,12 @@ def _matching_computations(self, thresholds_matched_data_map, samples_data_queue default_boxes_matrix = self.default_boxes_factory.get_default_boxes_matrix(sample_data_map["image_shape"]) # Compute matching data for sample at each threshold - for threshold in self.thresholds: + for threshold in self.confidence_thresholds: predictions = net.ssd.PredictionsComputer( categories=self.categories, - threshold=threshold, - use_non_maximum_suppression=True).get_predictions( + confidence_threshold=threshold, + post_processing_config=self.post_processing_config).get_predictions( bounding_boxes_matrix=default_boxes_matrix + sample_data_map["offsets_predictions_matrix"], softmax_predictions_matrix=sample_data_map["softmax_predictions_matrix"]) @@ -199,16 +206,11 @@ def _get_matches_data(ground_truth_annotations, predictions): matches_data["unmatched_annotations"].append(ground_truth_annotation) - # For each prediction, check if it was matched by any ground truth annotation - for prediction in predictions: - - if is_annotation_matched(prediction, ground_truth_annotations): + matched_predictions = get_unique_prediction_matches(ground_truth_annotations, predictions) + unmatched_predictions = set(predictions).difference(matched_predictions) - matches_data["matched_predictions"].append(prediction) - - else: - - matches_data["unmatched_predictions"].append(prediction) + matches_data["matched_predictions"].extend(matched_predictions) + matches_data["unmatched_predictions"].extend(unmatched_predictions) matches_data["mean_average_precision_data"] = get_predictions_matches( ground_truth_annotations=ground_truth_annotations, predictions=predictions) @@ -244,6 +246,11 @@ def get_precision_recall_analysis_report( message = "Precision is {:.3f}
".format(precision) messages.append(message) + f1_score = 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0 + + message = "F1 score is {:.3f}
".format(f1_score) + messages.append(message) + return " ".join(messages) @@ -441,9 +448,11 @@ def get_predictions_matches(ground_truth_annotations, predictions): if len(unmatched_ground_truth_annotations_list) > 0: - # Get a boolean vector checking if prediction the same label as any ground truth annotations - categories_matches_vector = [ground_truth_annotation.label == prediction.label - for ground_truth_annotation in unmatched_ground_truth_annotations_list] + # Get a boolean vector checking if prediction has the same label as any ground truth annotations + categories_matches_vector = [ + ground_truth_annotation.label == prediction.label + for ground_truth_annotation in unmatched_ground_truth_annotations_list + ] annotations_bounding_boxes = np.array([ ground_truth_annotation.bounding_box @@ -486,6 +495,70 @@ def get_predictions_matches(ground_truth_annotations, predictions): return matches_data +def get_unique_prediction_matches(ground_truth_annotations, predictions): + """ + Get a list of unique predictions for ground truth annotations. + If multiple predictions match the same ground truth annotation, only the one with the highest confidence is + included in the list. + + Args: + ground_truth_annotations (list[net.utilities.Annotation]): ground truth annotations + predictions (list[net.utilities.Prediction]): predictions + + Returns: + list[net.utilities.Prediction]: unique predictions that matched ground truth annotations + """ + + # Sort predictions by confidence in descending order + sorted_predictions = sorted(predictions, key=lambda x: x.confidence, reverse=True) + + # Set of ground truth annotations that weren't matched with any prediction yet + unmatched_ground_truth_annotations = set(ground_truth_annotations) + + unique_predictions = [] + + for prediction in sorted_predictions: + + # Convert set of unmatched ground truth annotations to a list, so we can work with its indices + unmatched_ground_truth_annotations_list = list(unmatched_ground_truth_annotations) + + if len(unmatched_ground_truth_annotations_list) > 0: + + # Get a boolean vector checking if prediction has the same label as any ground truth annotations + categories_matches_vector = [ + ground_truth_annotation.label == prediction.label + for ground_truth_annotation in unmatched_ground_truth_annotations_list + ] + + annotations_bounding_boxes = np.array([ + ground_truth_annotation.bounding_box + for ground_truth_annotation in unmatched_ground_truth_annotations_list + ]) + + # Return indices of ground truth annotation's boxes that have high intersection over union with + # prediction's box + matched_boxes_indices = net.utilities.get_matched_boxes_indices( + prediction.bounding_box, annotations_bounding_boxes, threshold=0.5) + + # Create boxes matches vector + boxes_matches_vector = np.zeros_like(categories_matches_vector) + boxes_matches_vector[matched_boxes_indices] = True + + # Create matches vector by doing logical and on categories and boxes vectors + matches_flags_vector = np.logical_and(categories_matches_vector, boxes_matches_vector) + + # Record match data for the prediction + if np.any(matches_flags_vector): + + # Remove matched ground truth annotations from unmatched ground truth annotations set + unmatched_ground_truth_annotations = unmatched_ground_truth_annotations.difference( + np.array(unmatched_ground_truth_annotations_list)[matches_flags_vector]) + + unique_predictions.append(prediction) + + return unique_predictions + + def log_mean_average_precision_analysis(logger, thresholds_matching_data_map): """ Log VOC Pascal 2007 style mean average precision for predictions across different thresholds diff --git a/net/invoke/analyze.py b/net/invoke/analyze.py index da7cf8d..bcb1274 100644 --- a/net/invoke/analyze.py +++ b/net/invoke/analyze.py @@ -150,14 +150,17 @@ def analyze_objects_detections_predictions(_context, config_path): config_path (str): path to configurtion file """ + import box import yaml import net.analysis import net.data import net.ml + import net.ssd + import net.utilities with open(config_path, encoding="utf-8") as file: - config = yaml.safe_load(file) + config = box.Box(yaml.safe_load(file)) ssd_model_configuration = config["vggish_model_configuration"] @@ -181,8 +184,9 @@ def analyze_objects_detections_predictions(_context, config_path): samples_loader=validation_samples_loader, model=network, default_boxes_factory=default_boxes_factory, - thresholds=[0, 0.5, 0.9], - categories=config["categories"]).get_thresholds_matched_data_map() + confidence_thresholds=[0, 0.5, 0.9], + categories=config["categories"], + post_processing_config=config.post_processing).get_thresholds_matched_data_map() net.analysis.log_precision_recall_analysis( logger=logger, diff --git a/net/invoke/visualize.py b/net/invoke/visualize.py index 7b0c925..bd246f6 100644 --- a/net/invoke/visualize.py +++ b/net/invoke/visualize.py @@ -154,6 +154,7 @@ def log_predictions(_context, config_path): config_path (str): path to configuration file """ + import box import tqdm import yaml @@ -164,7 +165,7 @@ def log_predictions(_context, config_path): import net.utilities with open(config_path, encoding="utf-8") as file: - config = yaml.safe_load(file) + config = box.Box(yaml.safe_load(file)) logger = net.utilities.get_logger(config["log_path"]) diff --git a/net/logging.py b/net/logging.py index 97c256a..c436d31 100644 --- a/net/logging.py +++ b/net/logging.py @@ -76,8 +76,8 @@ def log_single_prediction(logger, model, default_boxes_factory, samples_iterator predictions_with_nms = net.ssd.PredictionsComputer( categories=config["categories"], - threshold=0.5, - use_non_maximum_suppression=True).get_predictions( + confidence_threshold=0.5, + post_processing_config=config.post_processing).get_predictions( bounding_boxes_matrix=default_boxes_matrix + offsets_predictions_matrix, softmax_predictions_matrix=softmax_predictions_matrix) @@ -163,8 +163,8 @@ def log_single_sample_debugging_info( # Get annotations boxes and labels from predictions matrix and default boxes matrix predictions = net.ssd.PredictionsComputer( categories=config["categories"], - threshold=0.5, - use_non_maximum_suppression=False).get_predictions( + confidence_threshold=0.5, + post_processing_config=config.post_processing).get_predictions( bounding_boxes_matrix=default_boxes_matrix + offsets_predictions_matrix, softmax_predictions_matrix=softmax_predictions_matrix) diff --git a/net/ml.py b/net/ml.py index d9bf736..431b7fc 100755 --- a/net/ml.py +++ b/net/ml.py @@ -163,7 +163,7 @@ def predict(self, image): """ images_batch_op = tf.constant(np.array([image])) - outputs = self.model.predict(images_batch_op) + outputs = self.model.predict(images_batch_op, verbose=False) return outputs["categories_predictions_head"][0], outputs["offsets_predictions_head"][0] diff --git a/net/ssd.py b/net/ssd.py index 57b70f1..837782a 100755 --- a/net/ssd.py +++ b/net/ssd.py @@ -2,6 +2,7 @@ Module with SSD-specific computations """ +import box import numpy as np import tensorflow as tf @@ -99,8 +100,14 @@ def get_boxes_at_location(y_center, x_center, configuration): half_width = width / 2 half_height = height / 2 - box = [x_center - half_width, y_center - half_height, x_center + half_width, y_center + half_height] - boxes.append(box) + corner_box = [ + x_center - half_width, + y_center - half_height, + x_center + half_width, + y_center + half_height + ] + + boxes.append(corner_box) # Horizontal boxes for aspect_ratio in configuration["aspect_ratios"]: @@ -111,8 +118,14 @@ def get_boxes_at_location(y_center, x_center, configuration): half_width = width / 2 half_height = height / 2 - box = [x_center - half_width, y_center - half_height, x_center + half_width, y_center + half_height] - boxes.append(box) + corner_box = [ + x_center - half_width, + y_center - half_height, + x_center + half_width, + y_center + half_height + ] + + boxes.append(corner_box) return np.array(boxes) @@ -240,18 +253,18 @@ class PredictionsComputer: Class for computing objects predictions from predictions matrix and default boxes matrix """ - def __init__(self, categories, threshold, use_non_maximum_suppression): + def __init__(self, categories, confidence_threshold, post_processing_config: box.Box): """ Constructor :param categories: list of strings :param threshold: float, only non-background predictions above this threshold will be returned - :param use_non_maximum_suppression: bool, specifies if non maximum suppression should be used. - soft-nms algorithm is used for non maximum suppression. + :param post_processing_config: box.Box with post-processing options. Should contain key + "method". Other keys depend on method chosen. """ self.categories = categories - self.threshold = threshold - self.use_non_maximum_suppression = use_non_maximum_suppression + self.confidence_threshold = confidence_threshold + self.post_processing_config = post_processing_config def get_predictions(self, bounding_boxes_matrix, softmax_predictions_matrix): """ @@ -262,20 +275,20 @@ def get_predictions(self, bounding_boxes_matrix, softmax_predictions_matrix): :return: list of net.utilities.Prediction instances """ - if self.use_non_maximum_suppression is True: + if self.post_processing_config.non_maximum_suppression.method is None: - return self._get_soft_nms_predictions(bounding_boxes_matrix, softmax_predictions_matrix) + return self._get_raw_predictions(bounding_boxes_matrix, softmax_predictions_matrix) else: - return self._get_raw_predictions(bounding_boxes_matrix, softmax_predictions_matrix) + return self._get_nms_predictions(bounding_boxes_matrix, softmax_predictions_matrix) def _get_raw_predictions(self, bounding_boxes_matrix, softmax_predictions_matrix): # Get a selector for non-background predictions over threshold predictions_selector = \ (np.argmax(softmax_predictions_matrix, axis=1) > 0) & \ - (np.max(softmax_predictions_matrix, axis=1) > self.threshold) + (np.max(softmax_predictions_matrix, axis=1) > self.confidence_threshold) predictions_boxes = bounding_boxes_matrix[predictions_selector] predictions_categories_indices = np.argmax(softmax_predictions_matrix[predictions_selector], axis=1) @@ -283,11 +296,11 @@ def _get_raw_predictions(self, bounding_boxes_matrix, softmax_predictions_matrix predictions = [] - for box, category_id, confidence in \ + for prediction_box, category_id, confidence in \ zip(predictions_boxes, predictions_categories_indices, predictions_confidences): prediction = net.utilities.Prediction( - bounding_box=[int(x) for x in box], + bounding_box=[int(x) for x in prediction_box], confidence=confidence, label=self.categories[category_id], category_id=category_id) @@ -296,12 +309,12 @@ def _get_raw_predictions(self, bounding_boxes_matrix, softmax_predictions_matrix return predictions - def _get_soft_nms_predictions(self, default_boxes_matrix, softmax_predictions_matrix): + def _get_nms_predictions(self, default_boxes_matrix, softmax_predictions_matrix): # Get a selector for non-background predictions over threshold predictions_selector = \ (np.argmax(softmax_predictions_matrix, axis=1) > 0) & \ - (np.max(softmax_predictions_matrix, axis=1) > self.threshold) + (np.max(softmax_predictions_matrix, axis=1) > self.confidence_threshold) predictions_boxes = default_boxes_matrix[predictions_selector] predictions_categories_indices = np.argmax(softmax_predictions_matrix[predictions_selector], axis=1) @@ -317,11 +330,27 @@ def _get_soft_nms_predictions(self, default_boxes_matrix, softmax_predictions_ma # soft nms works on each category separately for category_id in range(1, len(self.categories)): - # Perform soft-nms on detections for current category - retained_detections_at_current_category = net.utilities.get_detections_after_soft_non_maximum_suppression( - detections=detections[predictions_categories_indices == category_id], - sigma=0.5, - score_threshold=0.5) + if self.post_processing_config.non_maximum_suppression.method == "soft": + + # Perform soft-nms on detections for current category + retained_detections_at_current_category = \ + net.utilities.get_detections_after_soft_non_maximum_suppression( + detections=detections[predictions_categories_indices == category_id], + sigma=self.post_processing_config.non_maximum_suppression.sigma, + score_threshold=self.post_processing_config.non_maximum_suppression.score_threshold) + + elif self.post_processing_config.non_maximum_suppression.method == "greedy": + + retained_detections_at_current_category = \ + net.utilities.get_detections_after_greedy_non_maximum_suppression( + detections=detections[predictions_categories_indices == category_id], + iou_threshold=self.post_processing_config.non_maximum_suppression.iou_threshold) + + else: + raise ValueError( + "Unsupported non-maximum suppression method: " + f"{self.post_processing_config.non_maximum_suppression.method}" + ) for detection in retained_detections_at_current_category: diff --git a/net/utilities.py b/net/utilities.py index 109ad25..7e7c805 100755 --- a/net/utilities.py +++ b/net/utilities.py @@ -560,6 +560,58 @@ def get_detections_after_soft_non_maximum_suppression(detections, sigma, score_t return np.array(retained_detections) +def get_detections_after_greedy_non_maximum_suppression(detections, iou_threshold): + """ + Args: + detections (numpy.array): Detection results with shape `(num, 5)`, + data in second dimension are [x_min, y_min, x_max, y_max, score] respectively. + iou_threshold (float): Boxes that have IOU greater than this value with + the box with the highest score will be discarded. + Returns: + numpy.array: Retained boxes. + """ + + areas = (detections[:, 2] - detections[:, 0] + 1) * (detections[:, 3] - detections[:, 1] + 1) + # expand detections with areas, so that the second dimension is + # x_min, y_min, x_max, y_max, score, area + detections = np.concatenate([detections, areas.reshape(-1, 1)], axis=1) + + areas_index = detections.shape[1] - 1 + + retained_detections = [] + + while detections.size > 0: + + # Get index for detection with max score, then swap that detection with detection at index 0. + # This way we will get detection with max score at index 0 in detections array + max_score_index = np.argmax(detections[:, 4], axis=0) + detections[[0, max_score_index]] = detections[[max_score_index, 0]] + + # Save max score detection to retained detections + retained_detections.append(detections[0]) + + # Compute intersection over union between top score box and all other boxes + min_x = np.maximum(detections[0, 0], detections[1:, 0]) + min_y = np.maximum(detections[0, 1], detections[1:, 1]) + max_x = np.minimum(detections[0, 2], detections[1:, 2]) + max_y = np.minimum(detections[0, 3], detections[1:, 3]) + + intersection_area = np.maximum(max_x - min_x + 1, 0.0) * np.maximum(max_y - min_y + 1, 0.0) + intersection_over_union = \ + intersection_area / (detections[0, areas_index] + detections[1:, areas_index] - intersection_area) + + # Discard detections that have IOU with top score detection above threshold. + # Take care to shift indices by +1 to account for fact + # we are leaving out top score detection at index 0 + retained_detections_indices = np.where(intersection_over_union < iou_threshold)[0] + 1 + detections = detections[retained_detections_indices] + + if len(retained_detections) == 0: + return np.array([]).reshape(0, 5) + else: + return np.array(retained_detections)[:, :5] + + def get_image_padded_to_size_factor_multiple(image, size_factor): """ Pad image so its width and height are a multiple of size_factor. diff --git a/requirements.txt b/requirements.txt index 22d2fa1..eef85ed 100644 --- a/requirements.txt +++ b/requirements.txt @@ -55,6 +55,7 @@ pluggy==1.5.0 protobuf==3.19.4 pyasn1==0.4.8 pyasn1-modules==0.2.8 +pycairo==1.26.1 pycodestyle==2.12.1 pygments==2.18.0 PyGObject==3.36.0 @@ -62,6 +63,7 @@ pylint==3.2.7 pyparsing==3.0.9 pytest==8.3.3 python-apt==2.0.0+ubuntu0.20.4.7 +python-box==7.2.0 python-dateutil==2.9.0.post0 pytz==2024.2 PyWavelets==1.4.1 diff --git a/tests/commit_stage/unit_tests/test_analysis.py b/tests/commit_stage/unit_tests/test_analysis.py index fe38ff8..bf7c09a 100644 --- a/tests/commit_stage/unit_tests/test_analysis.py +++ b/tests/commit_stage/unit_tests/test_analysis.py @@ -264,3 +264,30 @@ def test_get_interpolated_precision_values_no_high_recall_values_available(self) precision_values=precision_values) assert np.allclose(expected, actual) + + +def test_get_unique_prediction_matches(): + """ + Test for get_unique_prediction_matches function + """ + + ground_truths = [ + net.utilities.Annotation(bounding_box=[10, 10, 100, 100], label="car"), + net.utilities.Annotation(bounding_box=[20, 50, 80, 120], label="dog"), + net.utilities.Annotation(bounding_box=[30, 50, 200, 300], label="airplane") + ] + + predictions = [ + net.utilities.Prediction(bounding_box=[20, 50, 80, 120], confidence=0.9, label="dog"), + net.utilities.Prediction(bounding_box=[20, 50, 80, 120], confidence=0.8, label="dog"), + net.utilities.Prediction(bounding_box=[10, 10, 100, 100], confidence=0.7, label="car") + ] + + expected = [ + net.utilities.Prediction(bounding_box=[20, 50, 80, 120], confidence=0.9, label="dog"), + net.utilities.Prediction(bounding_box=[10, 10, 100, 100], confidence=0.7, label="car") + ] + + actual = net.analysis.get_unique_prediction_matches(ground_truths, predictions) + + assert expected == actual diff --git a/tests/commit_stage/unit_tests/test_utilities.py b/tests/commit_stage/unit_tests/test_utilities.py index b94f1d7..34aa31f 100755 --- a/tests/commit_stage/unit_tests/test_utilities.py +++ b/tests/commit_stage/unit_tests/test_utilities.py @@ -462,3 +462,55 @@ def test_get_detections_after_soft_non_maximum_suppression_two_highly_overlappin detections=detections, sigma=0.5, score_threshold=0.5)[:, :5] assert np.all(expected == actual) + + +class TestGreedyNonMaximumSuppression: + """ + Tests for greedy non-maximum suppression logic + """ + + def test_with_small_box_inside_large_box(self): + """ + Test greedy non-maximum suppression with a small box inside a large box + """ + + detections = np.array([ + [5, 5, 25, 25, 0.6], + [10, 10, 20, 20, 0.5] + ]) + + # Boxes have small enough IOU to be considered separate + expected = detections + + actual = net.utilities.get_detections_after_greedy_non_maximum_suppression( + detections=detections, + iou_threshold=0.5) + + assert np.all(expected == actual) + + def test_with_boxes_with_large_overlap(self): + """ + Test greedy non-maximum suppression with a set of boxes with large overlap + """ + + detections = np.array([ + # First set of boxes - all have high IOU with each other + [10, 10, 20, 20, 0.5], + [11, 11, 21, 21, 0.6], + [9, 9, 21, 21, 0.7], + # Second set of boxes - all have high IOU with each other + [96, 96, 123, 123, 0.5], + [94, 94, 124, 124, 0.6], + [95, 95, 125, 125, 0.8], + ]) + + expected = np.array([ + [95, 95, 125, 125, 0.8], + [9, 9, 21, 21, 0.7] + ]) + + actual = net.utilities.get_detections_after_greedy_non_maximum_suppression( + detections=detections, + iou_threshold=0.5) + + assert np.all(expected == actual)