Skip to content

[Validate] add warning for unspecified arguments, and change default values #277

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions nucleus/metrics/base.py
Original file line number Diff line number Diff line change
@@ -11,6 +11,8 @@
)
from nucleus.prediction import PredictionList

EPSILON = 10 ** -4 # 0.0001


class MetricResult(ABC):
"""Base MetricResult class"""
@@ -41,6 +43,14 @@ def aggregate(results: Iterable["ScalarResult"]) -> "ScalarResult":
value = total_value / max(total_weight, sys.float_info.epsilon)
return ScalarResult(value, total_weight)

def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
return (
abs(self.value - other.value) < EPSILON
and self.weight == other.weight
)


class Metric(ABC):
"""Abstract class for defining a metric, which takes a list of annotations
60 changes: 47 additions & 13 deletions nucleus/metrics/cuboid_metrics.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import sys
import warnings
from abc import abstractmethod
from typing import List, Optional, Union

@@ -10,6 +11,9 @@
from .filtering import ListOfAndFilters, ListOfOrAndFilters
from .filters import confidence_filter

DEFAULT_IOU_THRESHOLD = 0.1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we changing the default from 0.0 to 0.1? I'm not against it would just be nice to have a comment explaining the choice of the default.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the default of 0.0 is really confusing in my opinion, in particular because it makes predictions with zero overlap true positives. 0.1 seemed reasonable for 3D IoU, though I wouldn't be opposed to 0.5 (to match the 2D default threshold) - what do you think?

DEFAULT_CONFIDENCE_THRESHOLD = 0.0


class CuboidMetric(Metric):
"""Abstract class for metrics of cuboids.
@@ -27,8 +31,9 @@ class CuboidMetric(Metric):

def __init__(
self,
iou_threshold: float,
enforce_label_match: bool = False,
confidence_threshold: float = 0.0,
confidence_threshold: Optional[float] = None,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

doesn't it make sense to also add the IOU threshold to the base class? How would a metric make sense without computing the IOU for the matches?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also noticed that the iou_threshold wasn't used in the base class, I can add

annotation_filters: Optional[
Union[ListOfOrAndFilters, ListOfAndFilters]
] = None,
@@ -54,6 +59,11 @@ def __init__(
(AND), forming a more selective and multiple column predicate. Finally, the most outer list combines
these filters as a disjunction (OR).
"""
if not confidence_threshold:
confidence_threshold = DEFAULT_CONFIDENCE_THRESHOLD
warnings.warn(
f"Got confidence_threshold value of `None`. In this case, we set the confidence_threshold to {confidence_threshold} (include all predictions, regardless of confidence). Consider specifying this value explicitly during metric initialization"
)
self.enforce_label_match = enforce_label_match
assert 0 <= confidence_threshold <= 1
self.confidence_threshold = confidence_threshold
@@ -99,8 +109,8 @@ class CuboidIOU(CuboidMetric):
def __init__(
self,
enforce_label_match: bool = True,
iou_threshold: float = 0.0,
confidence_threshold: float = 0.0,
iou_threshold: Optional[float] = None,
confidence_threshold: Optional[float] = None,
iou_2d: bool = False,
annotation_filters: Optional[
Union[ListOfOrAndFilters, ListOfAndFilters]
@@ -127,13 +137,19 @@ def __init__(
interpreted as a conjunction (AND), forming a more selective and multiple column predicate.
Finally, the most outer list combines these filters as a disjunction (OR).
"""
if not iou_threshold:
iou_threshold = DEFAULT_IOU_THRESHOLD
warnings.warn(
f"The IoU threshold used for matching was initialized to `None`. In this case, the value of iou_threshold defaults to {iou_threshold}. If this values will produce unexpected behavior, consider specifying the iou_threshold argument during metric initialization"
)
assert (
0 <= iou_threshold <= 1
), "IoU threshold must be between 0 and 1."
self.iou_threshold = iou_threshold
self.iou_2d = iou_2d
super().__init__(
enforce_label_match=enforce_label_match,
iou_threshold=iou_threshold,
confidence_threshold=confidence_threshold,
annotation_filters=annotation_filters,
prediction_filters=prediction_filters,
@@ -147,14 +163,16 @@ def eval(
iou_3d_metric, iou_2d_metric = detection_iou(
predictions,
annotations,
threshold_in_overlap_ratio=self.iou_threshold,
self.iou_threshold,
)

weight = max(len(annotations), len(predictions))
# If there are zero IoU matches, avg_iou defaults to value 0
if self.iou_2d:
avg_iou = iou_2d_metric.sum() / max(weight, sys.float_info.epsilon)
weight = len(iou_2d_metric)
avg_iou = iou_2d_metric.sum() / weight if weight > 0 else 0.0
else:
avg_iou = iou_3d_metric.sum() / max(weight, sys.float_info.epsilon)
weight = len(iou_3d_metric)
avg_iou = iou_3d_metric.sum() / weight if weight > 0 else 0.0

return ScalarResult(avg_iou, weight)

@@ -166,8 +184,8 @@ class CuboidPrecision(CuboidMetric):
def __init__(
self,
enforce_label_match: bool = True,
iou_threshold: float = 0.0,
confidence_threshold: float = 0.0,
iou_threshold: Optional[float] = None,
confidence_threshold: Optional[float] = None,
annotation_filters: Optional[
Union[ListOfOrAndFilters, ListOfAndFilters]
] = None,
@@ -192,12 +210,18 @@ def __init__(
interpreted as a conjunction (AND), forming a more selective and multiple column predicate.
Finally, the most outer list combines these filters as a disjunction (OR).
"""
if not iou_threshold:
iou_threshold = DEFAULT_IOU_THRESHOLD
warnings.warn(
f"The IoU threshold used for matching was initialized to `None`. In this case, the value of iou_threshold defaults to {iou_threshold}. If this values will produce unexpected behavior, consider specifying the iou_threshold argument during metric initialization"
)
assert (
0 <= iou_threshold <= 1
), "IoU threshold must be between 0 and 1."
self.iou_threshold = iou_threshold
super().__init__(
enforce_label_match=enforce_label_match,
iou_threshold=iou_threshold,
confidence_threshold=confidence_threshold,
annotation_filters=annotation_filters,
prediction_filters=prediction_filters,
@@ -211,7 +235,9 @@ def eval(
stats = recall_precision(
predictions,
annotations,
threshold_in_overlap_ratio=self.iou_threshold,
self.iou_threshold,
self.confidence_threshold,
self.enforce_label_match,
)
weight = stats["tp_sum"] + stats["fp_sum"]
precision = stats["tp_sum"] / max(weight, sys.float_info.epsilon)
@@ -225,8 +251,8 @@ class CuboidRecall(CuboidMetric):
def __init__(
self,
enforce_label_match: bool = True,
iou_threshold: float = 0.0,
confidence_threshold: float = 0.0,
iou_threshold: Optional[float] = None,
confidence_threshold: Optional[float] = None,
Comment on lines +254 to +255
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changes to default params needs to be reflected in nucleus/validate/eval_functions/available_eval_functions.py configurations as well since that is the client facing code. The defaults there override the defaults provided here if not reflected.

annotation_filters: Optional[
Union[ListOfOrAndFilters, ListOfAndFilters]
] = None,
@@ -241,12 +267,18 @@ def __init__(
iou_threshold: IOU threshold to consider detection as valid. Must be in [0, 1]. Default 0.0
confidence_threshold: minimum confidence threshold for predictions. Must be in [0, 1]. Default 0.0
"""
if not iou_threshold:
iou_threshold = DEFAULT_IOU_THRESHOLD
warnings.warn(
f"The IoU threshold used for matching was initialized to `None`. In this case, the value of iou_threshold defaults to {iou_threshold}. If this values will produce unexpected behavior, consider specifying the iou_threshold argument during metric initialization"
)
assert (
0 <= iou_threshold <= 1
), "IoU threshold must be between 0 and 1."
self.iou_threshold = iou_threshold
super().__init__(
enforce_label_match=enforce_label_match,
iou_threshold=iou_threshold,
confidence_threshold=confidence_threshold,
annotation_filters=annotation_filters,
prediction_filters=prediction_filters,
@@ -260,7 +292,9 @@ def eval(
stats = recall_precision(
predictions,
annotations,
threshold_in_overlap_ratio=self.iou_threshold,
self.iou_threshold,
self.confidence_threshold,
self.enforce_label_match,
)
weight = stats["tp_sum"] + stats["fn_sum"]
recall = stats["tp_sum"] / max(weight, sys.float_info.epsilon)
80 changes: 48 additions & 32 deletions nucleus/metrics/cuboid_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from functools import wraps
from dataclasses import dataclass
from typing import Dict, List, Tuple

import numpy as np
@@ -35,6 +36,14 @@ def __init__(self, *args, **kwargs):
from .base import ScalarResult


@dataclass
class ProcessedCuboids:
xyz: np.array
wlh: np.array
yaw: np.array
labels: List[str]


def group_cuboids_by_label(
annotations: List[CuboidAnnotation],
predictions: List[CuboidPrediction],
@@ -101,19 +110,25 @@ def wrapper(
return wrapper


def process_dataitem(dataitem):
processed_item = {}
processed_item["xyz"] = np.array(
[[ann.position.x, ann.position.y, ann.position.z] for ann in dataitem]
def process_cuboids(item_list, confidence_threshold=None):
if confidence_threshold:
item_list = [
item
for item in item_list
if item.confidence >= confidence_threshold
]
xyz = np.array(
[[ann.position.x, ann.position.y, ann.position.z] for ann in item_list]
)
processed_item["wlh"] = np.array(
wlh = np.array(
[
[ann.dimensions.x, ann.dimensions.y, ann.dimensions.z]
for ann in dataitem
for ann in item_list
]
)
processed_item["yaw"] = np.array([ann.yaw for ann in dataitem])
return processed_item
yaw = np.array([ann.yaw for ann in item_list])
labels = [ann.label for ann in item_list]
return ProcessedCuboids(xyz, wlh, yaw, labels)


def compute_outer_iou(
@@ -178,7 +193,6 @@ def compute_outer_iou(
.intersection(polygon_1)
.area
)

intersection = height_intersection * area_intersection
area_0 = wlh_0[:, 0] * wlh_0[:, 1]
area_1 = wlh_1[:, 0] * wlh_1[:, 1]
@@ -278,6 +292,8 @@ def recall_precision(
prediction: List[CuboidPrediction],
groundtruth: List[CuboidAnnotation],
threshold_in_overlap_ratio: float,
confidence_threshold: float,
enforce_label_match: bool,
) -> Dict[str, float]:
"""
Calculates the precision and recall of each lidar frame.
@@ -294,23 +310,23 @@ def recall_precision(
num_predicted = 0
num_instances = 0

gt_items = process_dataitem(groundtruth)
pred_items = process_dataitem(prediction)
gt_items = process_cuboids(groundtruth)
pred_items = process_cuboids(prediction, confidence_threshold)

num_predicted += pred_items["xyz"].shape[0]
num_instances += gt_items["xyz"].shape[0]
num_predicted += pred_items.xyz.shape[0]
num_instances += gt_items.xyz.shape[0]

tp = np.zeros(pred_items["xyz"].shape[0])
fp = np.ones(pred_items["xyz"].shape[0])
fn = np.ones(gt_items["xyz"].shape[0])
tp = np.zeros(pred_items.xyz.shape[0])
fp = np.ones(pred_items.xyz.shape[0])
fn = np.ones(gt_items.xyz.shape[0])

mapping = associate_cuboids_on_iou(
pred_items["xyz"],
pred_items["wlh"],
pred_items["yaw"] + np.pi / 2,
gt_items["xyz"],
gt_items["wlh"],
gt_items["yaw"] + np.pi / 2,
pred_items.xyz,
pred_items.wlh,
pred_items.yaw + np.pi / 2,
gt_items.xyz,
gt_items.wlh,
gt_items.yaw + np.pi / 2,
threshold_in_overlap_ratio=threshold_in_overlap_ratio,
)

@@ -351,27 +367,27 @@ def detection_iou(
:param threshold: IOU threshold to consider detection as valid. Must be in [0, 1].
"""

gt_items = process_dataitem(groundtruth)
pred_items = process_dataitem(prediction)
gt_items = process_cuboids(groundtruth)
pred_items = process_cuboids(prediction)

meter_2d = []
meter_3d = []

if gt_items["xyz"].shape[0] == 0 or pred_items["xyz"].shape[0] == 0:
if gt_items.xyz.shape[0] == 0 or pred_items.xyz.shape[0] == 0:
return np.array([0.0]), np.array([0.0])

iou_3d, iou_2d = compute_outer_iou(
gt_items["xyz"],
gt_items["wlh"],
gt_items["yaw"],
pred_items["xyz"],
pred_items["wlh"],
pred_items["yaw"],
gt_items.xyz,
gt_items.wlh,
gt_items.yaw,
pred_items.xyz,
pred_items.wlh,
pred_items.yaw,
)

for i, m in enumerate(iou_3d.max(axis=1)):
j = iou_3d[i].argmax()
if m >= threshold_in_overlap_ratio:
j = iou_3d[i].argmax()
meter_3d.append(iou_3d[i, j])
meter_2d.append(iou_2d[i, j])

19 changes: 11 additions & 8 deletions nucleus/validate/eval_functions/available_eval_functions.py
Original file line number Diff line number Diff line change
@@ -10,12 +10,15 @@
from ..data_transfer_objects.eval_function import EvalFunctionEntry
from ..errors import EvalFunctionNotAvailableError

DEFAULT_2D_IOU_THRESHOLD = 0.5
DEFAULT_3D_IOU_THRESHOLD = 0.1


class PolygonIOUConfig(EvalFunctionConfig):
def __call__(
self,
enforce_label_match: bool = False,
iou_threshold: float = 0.0,
iou_threshold: float = DEFAULT_2D_IOU_THRESHOLD,
confidence_threshold: float = 0.0,
annotation_filters: Optional[
Union[ListOfOrAndFilters, ListOfAndFilters]
@@ -77,7 +80,7 @@ def expected_name(cls) -> str:
class PolygonMAPConfig(EvalFunctionConfig):
def __call__(
self,
iou_threshold: float = 0.5,
iou_threshold: float = DEFAULT_2D_IOU_THRESHOLD,
annotation_filters: Optional[
Union[ListOfOrAndFilters, ListOfAndFilters]
] = None,
@@ -135,7 +138,7 @@ class PolygonRecallConfig(EvalFunctionConfig):
def __call__(
self,
enforce_label_match: bool = False,
iou_threshold: float = 0.5,
iou_threshold: float = DEFAULT_2D_IOU_THRESHOLD,
confidence_threshold: float = 0.0,
annotation_filters: Optional[
Union[ListOfOrAndFilters, ListOfAndFilters]
@@ -198,7 +201,7 @@ class PolygonPrecisionConfig(EvalFunctionConfig):
def __call__(
self,
enforce_label_match: bool = False,
iou_threshold: float = 0.5,
iou_threshold: float = DEFAULT_2D_IOU_THRESHOLD,
confidence_threshold: float = 0.0,
annotation_filters: Optional[
Union[ListOfOrAndFilters, ListOfAndFilters]
@@ -261,7 +264,7 @@ class CuboidIOU2DConfig(EvalFunctionConfig):
def __call__(
self,
enforce_label_match: bool = True,
iou_threshold: float = 0.0,
iou_threshold: float = DEFAULT_2D_IOU_THRESHOLD,
confidence_threshold: float = 0.0,
annotation_filters: Optional[
Union[ListOfOrAndFilters, ListOfAndFilters]
@@ -315,7 +318,7 @@ class CuboidIOU3DConfig(EvalFunctionConfig):
def __call__(
self,
enforce_label_match: bool = True,
iou_threshold: float = 0.0,
iou_threshold: float = DEFAULT_3D_IOU_THRESHOLD,
confidence_threshold: float = 0.0,
annotation_filters: Optional[
Union[ListOfOrAndFilters, ListOfAndFilters]
@@ -370,7 +373,7 @@ class CuboidPrecisionConfig(EvalFunctionConfig):
def __call__(
self,
enforce_label_match: bool = True,
iou_threshold: float = 0.0,
iou_threshold: float = DEFAULT_3D_IOU_THRESHOLD,
confidence_threshold: float = 0.0,
annotation_filters: Optional[
Union[ListOfOrAndFilters, ListOfAndFilters]
@@ -424,7 +427,7 @@ class CuboidRecallConfig(EvalFunctionConfig):
def __call__(
self,
enforce_label_match: bool = True,
iou_threshold: float = 0.0,
iou_threshold: float = DEFAULT_3D_IOU_THRESHOLD,
confidence_threshold: float = 0.0,
annotation_filters: Optional[
Union[ListOfOrAndFilters, ListOfAndFilters]
375 changes: 374 additions & 1 deletion tests/metrics/test_cuboid_metrics.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
import pytest

from nucleus.annotation import CuboidAnnotation, Point3D, AnnotationList
from nucleus.metrics.base import ScalarResult
from nucleus.metrics.cuboid_metrics import (
CuboidIOU,
CuboidPrecision,
CuboidRecall,
)
from nucleus.prediction import CuboidPrediction, PredictionList

try:
import shapely
except ModuleNotFoundError:
@@ -8,4 +17,368 @@
allow_module_level=True,
)

# TODO(gunnar): Add Cuboid tests!
CAR_LABEL = "car"
PEDESTRIAN_LABEL = "pedestrian"
DEFAULT_90_DEGREE_ROTATION = 1.57079


def test_cuboid_metrics_simple():
# single item, perfect predictions
annotations = AnnotationList(
cuboid_annotations=[
CuboidAnnotation(
label=CAR_LABEL,
position=Point3D(0, 0, 0),
dimensions=Point3D(10, 10, 10),
yaw=0.0,
reference_id="item_A",
),
CuboidAnnotation(
label=CAR_LABEL,
position=Point3D(1000, 1000, 1000),
dimensions=Point3D(10, 10, 10),
yaw=0.0,
reference_id="item_A",
),
]
)
predictions = PredictionList(
cuboid_predictions=[
CuboidPrediction(
label=CAR_LABEL,
position=Point3D(0, 0, 0),
dimensions=Point3D(10, 10, 10),
yaw=0.0,
reference_id="item_A",
),
CuboidPrediction(
label=CAR_LABEL,
position=Point3D(1000, 1000, 1000),
dimensions=Point3D(10, 10, 10),
yaw=0.0,
reference_id="item_A",
),
]
)
assert CuboidIOU()(annotations, predictions) == ScalarResult(
1.0, len(annotations)
), "Unexpected Cuboid IoU result"
assert CuboidPrecision()(annotations, predictions) == ScalarResult(
1.0, len(annotations)
), "Unexpected Cuboid Precision result"
assert CuboidRecall()(annotations, predictions) == ScalarResult(
1.0, len(annotations)
), "Unexpected Cuboid Recall result"


def test_cuboid_metrics_numerical_check():
# single item, realistic predictions w/ matches and non-matches
annotations = AnnotationList(
cuboid_annotations=[
CuboidAnnotation(
label=CAR_LABEL,
position=Point3D(0, 0, 0),
dimensions=Point3D(10, 10, 5),
yaw=0.0,
reference_id="item_A",
),
CuboidAnnotation(
label=CAR_LABEL,
position=Point3D(1000, 1000, 1000),
dimensions=Point3D(10, 10, 10),
yaw=0.0,
reference_id="item_A",
),
CuboidAnnotation(
label=CAR_LABEL,
position=Point3D(-100, -100, -100),
dimensions=Point3D(10, 10, 10),
yaw=0.0,
reference_id="item_A",
), # false negative
]
)
predictions = PredictionList(
cuboid_predictions=[
CuboidPrediction(
label=CAR_LABEL,
position=Point3D(1.0, 1.0, 1.0),
dimensions=Point3D(10, 10, 5),
yaw=0.0,
reference_id="item_A",
),
CuboidPrediction(
label=CAR_LABEL,
position=Point3D(999, 999, 999),
dimensions=Point3D(8, 8, 6),
yaw=0.0,
reference_id="item_A",
),
CuboidPrediction(
label=CAR_LABEL,
position=Point3D(250, 250, 250),
dimensions=Point3D(2, 2, 2),
yaw=0.0,
reference_id="item_A",
), # false positive
]
)
cuboid_iou_result = CuboidIOU()(annotations, predictions)
cuboid_precision_result = CuboidPrecision()(annotations, predictions)
cuboid_recall_result = CuboidRecall()(annotations, predictions)
assert cuboid_iou_result == ScalarResult(
0.4316, 2
), f"Unexpected Cuboid IoU result: {cuboid_iou_result}"
assert cuboid_precision_result == ScalarResult(
2.0 / 3.0, len(predictions)
), f"Unexpected Cuboid Precision result {cuboid_precision_result}"
assert cuboid_recall_result == ScalarResult(
2.0 / 3.0, len(annotations)
), f"Unexpected Cuboid Recall result {cuboid_recall_result}"


def test_cuboid_metrics_numerical_check_rotation():
annotations = AnnotationList(
cuboid_annotations=[
CuboidAnnotation(
label=CAR_LABEL,
position=Point3D(0, 0, 0),
dimensions=Point3D(10, 5, 5),
yaw=0.0,
reference_id="item_A",
),
CuboidAnnotation(
label=CAR_LABEL,
position=Point3D(1000, 1000, 1000),
dimensions=Point3D(8, 4, 6),
yaw=DEFAULT_90_DEGREE_ROTATION,
reference_id="item_A",
),
]
)
predictions = PredictionList(
cuboid_predictions=[
CuboidPrediction(
label=CAR_LABEL,
position=Point3D(0, 0, 0),
dimensions=Point3D(10, 5, 5),
yaw=DEFAULT_90_DEGREE_ROTATION,
reference_id="item_A",
),
CuboidPrediction(
label=CAR_LABEL,
position=Point3D(1000, 1000, 1000),
dimensions=Point3D(8, 4, 6),
yaw=0.0,
reference_id="item_A",
),
]
)
cuboid_iou_result = CuboidIOU()(annotations, predictions)
cuboid_precision_result = CuboidPrecision()(annotations, predictions)
cuboid_recall_result = CuboidRecall()(annotations, predictions)
assert cuboid_iou_result == ScalarResult(
1.0 / 3.0, 2
), f"Unexpected Cuboid IoU result: {cuboid_iou_result}"


def test_cuboid_metrics_class_labels():
annotations = AnnotationList(
cuboid_annotations=[
CuboidAnnotation(
label=CAR_LABEL,
position=Point3D(0, 0, 0),
dimensions=Point3D(10, 10, 5),
yaw=0.0,
reference_id="item_A",
),
CuboidAnnotation(
label=CAR_LABEL,
position=Point3D(1000, 1000, 1000),
dimensions=Point3D(10, 10, 10),
yaw=0.0,
reference_id="item_A",
),
CuboidAnnotation(
label=CAR_LABEL,
position=Point3D(-100, -100, -100),
dimensions=Point3D(10, 10, 10),
yaw=0.0,
reference_id="item_A",
), # false negative
]
)
predictions = PredictionList(
cuboid_predictions=[
CuboidPrediction(
label=CAR_LABEL,
position=Point3D(1.0, 1.0, 1.0),
dimensions=Point3D(10, 10, 5),
yaw=0.0,
reference_id="item_A",
),
CuboidPrediction(
label=PEDESTRIAN_LABEL,
position=Point3D(999, 999, 999),
dimensions=Point3D(8, 8, 6),
yaw=0.0,
reference_id="item_A",
),
CuboidPrediction(
label=CAR_LABEL,
position=Point3D(250, 250, 250),
dimensions=Point3D(2, 2, 2),
yaw=0.0,
reference_id="item_A",
), # false positive
]
)

cuboid_iou_result1 = CuboidIOU(enforce_label_match=True)(
annotations, predictions
)
cuboid_precision_result1 = CuboidPrecision(enforce_label_match=True)(
annotations, predictions
)
cuboid_recall_result1 = CuboidRecall(enforce_label_match=True)(
annotations, predictions
)
assert cuboid_iou_result1 == ScalarResult(
0.47928, 1
), f"Unexpected Cuboid IoU result: {cuboid_iou_result1}"
assert cuboid_precision_result1 == ScalarResult(
1.0 / 3.0, len(predictions)
), f"Unexpected Cuboid Precision result {cuboid_precision_result1}"
assert cuboid_recall_result1 == ScalarResult(
1.0 / 3.0, len(annotations)
), f"Unexpected Cuboid Recall result {cuboid_recall_result1}"

cuboid_iou_result2 = CuboidIOU(enforce_label_match=False)(
annotations, predictions
)
cuboid_precision_result2 = CuboidPrecision(enforce_label_match=False)(
annotations, predictions
)
cuboid_recall_result2 = CuboidRecall(enforce_label_match=False)(
annotations, predictions
)
assert cuboid_iou_result2 == ScalarResult(
0.4316, 2
), f"Unexpected Cuboid IoU result: {cuboid_iou_result2}"
assert cuboid_precision_result2 == ScalarResult(
2.0 / 3.0, len(predictions)
), f"Unexpected Cuboid Precision result {cuboid_precision_result2}"
assert cuboid_recall_result2 == ScalarResult(
2.0 / 3.0, len(annotations)
), f"Unexpected Cuboid Recall result {cuboid_recall_result2}"


def test_cuboid_metrics_multi_item():
# single item, perfect precision
annotations = AnnotationList(
cuboid_annotations=[
# first item
CuboidAnnotation(
label=CAR_LABEL,
position=Point3D(0, 0, 0),
dimensions=Point3D(10, 10, 5),
yaw=0.0,
reference_id="item_A",
),
CuboidAnnotation(
label=CAR_LABEL,
position=Point3D(1000, 1000, 1000),
dimensions=Point3D(10, 10, 10),
yaw=0.0,
reference_id="item_A",
),
CuboidAnnotation(
label=CAR_LABEL,
position=Point3D(-100, -100, -100),
dimensions=Point3D(10, 10, 10),
yaw=0.0,
reference_id="item_A",
), # false negative
# second item
CuboidAnnotation(
label=CAR_LABEL,
position=Point3D(0, 0, 0),
dimensions=Point3D(10, 10, 5),
yaw=0.0,
reference_id="item_B",
),
CuboidAnnotation(
label=CAR_LABEL,
position=Point3D(30, 50, 120),
dimensions=Point3D(1, 2.5, 3),
yaw=0.0,
reference_id="item_B",
),
]
)
predictions = PredictionList(
cuboid_predictions=[
# first item
CuboidPrediction(
label=CAR_LABEL,
position=Point3D(1.0, 1.0, 1.0),
dimensions=Point3D(10, 10, 5),
yaw=0.0,
reference_id="item_A",
),
CuboidPrediction(
label=PEDESTRIAN_LABEL,
position=Point3D(999, 999, 999),
dimensions=Point3D(8, 8, 6),
yaw=0.0,
reference_id="item_A",
),
CuboidPrediction(
label=CAR_LABEL,
position=Point3D(250, 250, 250),
dimensions=Point3D(2, 2, 2),
yaw=0.0,
reference_id="item_A",
), # false positive
# second item
CuboidPrediction(
label=CAR_LABEL,
position=Point3D(250, 250, 250),
dimensions=Point3D(2, 2, 2),
yaw=0.0,
reference_id="item_B",
), # false positive
]
)

cuboid_iou_result1 = CuboidIOU()(annotations, predictions)
cuboid_precision_result1 = CuboidPrecision()(annotations, predictions)
cuboid_recall_result1 = CuboidRecall()(annotations, predictions)
assert cuboid_iou_result1 == ScalarResult(
0.47928, 1
), f"Unexpected Cuboid IoU result: {cuboid_iou_result1}"
assert cuboid_precision_result1 == ScalarResult(
1.0 / len(predictions), len(predictions)
), f"Unexpected Cuboid Precision result {cuboid_precision_result1}"
assert cuboid_recall_result1 == ScalarResult(
1.0 / len(annotations), len(annotations)
), f"Unexpected Cuboid Recall result {cuboid_recall_result1}"

cuboid_iou_result2 = CuboidIOU(enforce_label_match=False)(
annotations, predictions
)
cuboid_precision_result2 = CuboidPrecision(enforce_label_match=False)(
annotations, predictions
)
cuboid_recall_result2 = CuboidRecall(enforce_label_match=False)(
annotations, predictions
)
assert cuboid_iou_result2 == ScalarResult(
0.4316, 2
), f"Unexpected Cuboid IoU result: {cuboid_iou_result2}"
assert cuboid_precision_result2 == ScalarResult(
2.0 / len(predictions), len(predictions)
), f"Unexpected Cuboid Precision result {cuboid_precision_result2}"
assert cuboid_recall_result2 == ScalarResult(
2.0 / len(annotations), len(annotations)
), f"Unexpected Cuboid Recall result {cuboid_recall_result2}"