Add evaluation instructions and helpers

zenseact-github · zenseact-github · commit 88a3b52b91ed · 2022-01-20T23:34:32.000+01:00
- code used to create the simplified KITTI-style annotations
- Dockerfile that can be used for running evaluation
- example script that prepares the provided detection for evaluation
- util for converting object rotation between coordinate systems
diff --git a/calibration.py b/calibration.py
@@ -6,6 +6,8 @@
 from typing import Type
 
 import numpy as np
+from pyquaternion import Quaternion
+
 import constants
 
 
@@ -158,7 +160,7 @@ def invert_3d_transform(transform):
 
 
 def get_3d_transform_camera_lidar(calib: dict):
-    """Get 3D transformation between lidar and camera."""
+    """Get 3D transformation from lidar to camera."""
     t_refframe_to_frame = calib[constants.LIDAR_EXTRINSICS]
     t_refframe_from_frame = calib[constants.EXTRINSICS]
 
@@ -168,6 +170,11 @@ def get_3d_transform_camera_lidar(calib: dict):
     return t_from_frame_to_frame
 
 
+def transform_rotation(rotation: Quaternion, transform: np.ndarray):
+    """Transform the rotation between two frames defined by the transformation."""
+    return Quaternion(matrix=transform[:3, :3].T) * rotation
+
+
 class CameraInfo(ABC):
     """Class to handle camera info."""
 
diff --git a/eval/Dockerfile b/eval/Dockerfile
@@ -0,0 +1,10 @@
+FROM ubuntu
+
+ENV TZ=Europe/Stockholm DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && apt-get install -y build-essential libboost-all-dev cmake git
+
+RUN cd /root &&  git clone https://github.com/zenseact/kitti_native_evaluation.git
+
+WORKDIR /root/kitti_native_evaluation
+
+RUN cmake ./ -DCMAKE_CXX_FLAGS="-O3" && make
diff --git a/eval/README.md b/eval/README.md
@@ -0,0 +1,39 @@
+# Evaluation example
+
+Here is a simple example of how to use the evaluation program.
+
+For convenience we have provided a `kitti-eval` docker image. If for some reason it is not available you can easily rebuild it with:
+
+```docker build -f Dockerfile -t kitti-eval .```
+
+Then, you can run an evaluation using the image with the following command:
+
+```docker run -v /gt-dir:/gt-dir -v /det-dir:/det-dir kitti-eval ./evaluate_object_3d_offline /gt-dir /det-dir```
+
+For an example of how the pseudo-annoation files should look you can see the prepare_eval.py. It takes our simple detections, extracts the relevant frames, and prepares them for use in the evaluation script.
+For the LIDAR detections that includes converting them to the camera coordinate system as well as adjusting the (nonexistent) 2d box to pass the minimum height requirement.
+Note that this is overly simplified and the projected 3d boxes could have a lower height and should thus actually be ignored.
+
+Full flow (separate eval for camera 2d detections and lidar 3d detections):
+1. ```python prepare_eval.py --tmp-det-dir=/tmp/det-dir```
+2. ```chmod -R 777 /tmp/det-dir```
+3. ```docker run -v /mnt/ai_sweden/road_data_lab/zenseact_disk/:/gt-dir -v /tmp/det-dir:/det-dir kitti-eval ./evaluate_object_3d_offline /gt-dir /det-dir/camera```
+4. ```docker run -v /mnt/ai_sweden/road_data_lab/zenseact_disk/:/gt-dir -v /tmp/det-dir:/det-dir kitti-eval ./evaluate_object_3d_offline /gt-dir /det-dir/lidar```
+
+
+## Results of evaluating provided 2d detections
+```
+vehicle_detection_AP : 74.971565
+pedestrian_detection_AP : 52.170540
+cyclist_detection_AP : 38.734921
+```
+
+## Results of evaluating provided 3d detections
+```
+vehicle_detection_BEV_AP : 72.898407
+pedestrian_detection_BEV_AP : 48.200329
+cyclist_detection_BEV_AP : 40.517864
+vehicle_detection_3D_AP : 61.186440
+pedestrian_detection_3D_AP : 41.484516
+cyclist_detection_3D_AP : 37.284695
+```
diff --git a/eval/convert_annotations_to_kitti.py b/eval/convert_annotations_to_kitti.py
@@ -0,0 +1,169 @@
+"""Converts dynamic object annotations to KITTI format."""
+import argparse
+import glob
+import json
+import os
+from datetime import datetime
+from os.path import join, basename
+from typing import List, Callable
+
+import numpy as np
+from pyquaternion import Quaternion
+from tqdm import tqdm
+
+from calibration import (
+    load_calib_from_json,
+    get_3d_transform_camera_lidar,
+    rigid_transform_3d,
+    transform_rotation,
+)
+from constants import TIME_FORMAT, SIZE, LOCATION, ROTATION
+from plot_objects_on_image import ObjectAnnotationHandler
+
+IMAGE_DIMS = np.array([3848, 2168])  # width, height
+
+OCCLUSION_MAP = {
+    "None": 0,
+    "Light": 1,
+    "Medium": 1,
+    "Heavy": 2,
+    "VeryHeavy": 2,
+    "Undefined": 2,  # If undefined we assume the worst
+}
+
+
+def _parse_class(obj_properties):
+    obj_cls = obj_properties["class"]
+    if obj_cls not in ("VulnerableVehicle", "Vehicle", "Pedestrian"):
+        # Remove Animals, Debris, Movers and any other unwanted classes
+        return None
+    elif obj_properties["unclear"] or obj_properties["object_type"] == "Inconclusive":
+        # Ignore unclear and inconclusive objects
+        obj_cls = "DontCare"
+    elif obj_cls == "VulnerableVehicle":
+        # Rename the VulnerableVehicle class to Cyclist to match KITTI
+        obj_cls = "Cyclist"
+        # Remove stuff without rider
+        if obj_properties.get("with_rider", "True") == "False":
+            return None
+        # Ignore everything that's not a bicyclist or motorbicyclist
+        elif obj_properties["object_type"] not in ("Bicycle", "Motorcycle"):
+            obj_cls = "DontCare"
+    elif obj_cls == "Vehicle":
+        # Ignore more exotic vehicle classes (HeavyEquip, TramTrain, Other)
+        if obj_properties["object_type"] not in ("Car", "Van", "Truck", "Trailer", "Bus"):
+            obj_cls = "DontCare"
+    elif obj_cls == "Pedestrian":
+        # No special treatment for pedestrians
+        pass
+    return obj_cls
+
+
+def _convert_to_kitti(
+    objects: List[ObjectAnnotationHandler], yaw_func: Callable[[Quaternion], float]
+) -> List[str]:
+    kitti_annotation_lines = []
+    for obj in objects:
+        class_name = _parse_class(obj.properties)
+        if class_name is None:
+            continue  # discard object
+        truncation, xmax, xmin, ymax, ymin = _parse_bbox_2d(obj.outer_points)
+        if obj.marking3d is None:
+            size, location, yaw, alpha = [0, 0, 0], [0, 0, 0], 0, 0
+        else:
+            size = obj.marking3d[SIZE][::-1]  # H,W,L not L,W,H
+            location = obj.marking3d[LOCATION]  # x,y,z
+            yaw = yaw_func(obj.marking3d[ROTATION])
+            alpha = 0  # TODO: calculate this!
+        if class_name != "DontCare" and "occlusion_ratio" not in obj.properties:
+            print("Missing occlusion for obj: ", obj)
+        kitti_obj = " ".join(
+            map(
+                str,
+                [
+                    class_name,
+                    truncation,
+                    OCCLUSION_MAP[obj.properties.get("occlusion_ratio", "Undefined")],
+                    alpha,
+                    xmin,
+                    ymin,
+                    xmax,
+                    ymax,
+                    *size,
+                    *location,
+                    yaw,
+                ],
+            )
+        )
+        kitti_annotation_lines.append(kitti_obj)
+    return kitti_annotation_lines
+
+
+def _parse_bbox_2d(outer_points):
+    xmin_nonclip, ymin_nonclip = np.min(outer_points, axis=0)
+    xmax_nonclip, ymax_nonclip = np.max(outer_points, axis=0)
+    xmin, ymin = np.clip([xmin_nonclip, ymin_nonclip], a_min=0, a_max=IMAGE_DIMS)
+    xmax, ymax = np.clip([xmax_nonclip, ymax_nonclip], a_min=0, a_max=IMAGE_DIMS)
+    new_area = (xmax - xmin) * (ymax - ymin)
+    old_area = (xmax_nonclip - xmin_nonclip) * (ymax_nonclip - ymin_nonclip)
+    truncation = 1 - new_area / old_area if old_area > 0.1 else 0
+    return truncation, xmax, xmin, ymax, ymin
+
+
+def _lidar_to_camera(objects, calib):
+    for obj in objects:
+        if obj.marking3d is None:
+            continue
+        transform = get_3d_transform_camera_lidar(calib)
+        obj.marking3d[ROTATION] = transform_rotation(obj.marking3d[ROTATION], transform)
+        obj.marking3d[LOCATION] = rigid_transform_3d(obj.marking3d[LOCATION], transform)
+    return objects
+
+
+def convert_annotation(calib_path, src_anno_pth, target_path):
+    with open(src_anno_pth) as anno_file:
+        src_anno = json.load(anno_file)
+    vehicle, camera_name, time_str, id_ = basename(src_anno_pth.strip(".json")).split("_")
+    id_ = int(id_)
+    objects = ObjectAnnotationHandler.from_annotations(src_anno)
+    objects = [obj[2] for obj in objects]
+
+    # Convert objects from LIDAR to camera using calibration information
+    frame_time = datetime.strptime(time_str, TIME_FORMAT)
+    calib = load_calib_from_json(calib_path, vehicle, frame_time, camera_name)
+    objects = _lidar_to_camera(objects, calib)
+    # Write a KITTI-style annotation with obj in camera frame
+    target_anno = _convert_to_kitti(objects, yaw_func=lambda rot: -rot.yaw_pitch_roll[0])
+    with open(join(target_path, f"{id_:06d}.txt"), "w") as target_file:
+        target_file.write("\n".join(target_anno))
+
+
+def _parse_args():
+    parser = argparse.ArgumentParser(description="Convert annotations to KITTI format")
+    parser.add_argument("--dataset-dir", required=True, help="Root dataset directory")
+    parser.add_argument("--target-dir", required=True, help="Output directory")
+    return parser.parse_args()
+
+
+def main():
+    args = _parse_args()
+    calib_path = join(args.dataset_dir, "calibration")
+    source_path = join(args.dataset_dir, "annotations", "dynamic_objects")
+    assert args.dataset_dir not in args.target_dir, "Do not write to the dataset"
+
+    print("Looking up all source annotations...")
+    source_anno_paths = glob.glob(f"{source_path}/*/*.json")
+
+    # Create target directories
+    os.makedirs(args.target_dir, exist_ok=True)
+
+    for src_anno_pth in tqdm(source_anno_paths, desc="Converting annotations..."):
+        try:
+            convert_annotation(calib_path, src_anno_pth, args.target_dir)
+        except Exception as err:
+            print("Failed converting annotation: ", src_anno_pth, "with error:", str(err))
+            raise
+
+
+if __name__ == "__main__":
+    main()
diff --git a/eval/prepare_eval.py b/eval/prepare_eval.py
@@ -0,0 +1,122 @@
+"""This script takes the provided raw detections and converts them to a evaluation-friendly format."""
+import argparse
+import glob
+import json
+import os
+import shutil
+from datetime import datetime
+
+import numpy as np
+from pyquaternion import Quaternion
+from tqdm import tqdm
+
+from calibration import (
+    get_3d_transform_camera_lidar,
+    transform_rotation,
+    rigid_transform_3d,
+    load_calib_from_json,
+)
+from constants import TIME_FORMAT
+
+
+def _get_train_ids(dataset_root: str):
+    with open(os.path.join(dataset_root, "train.json")) as datalist_file:
+        datalist = json.load(datalist_file)
+    return list(datalist.keys())
+
+
+def _get_test_ids(dataset_root: str):
+    with open(os.path.join(dataset_root, "test.json")) as datalist_file:
+        datalist = json.load(datalist_file)
+    return list(datalist.keys())
+
+
+def _prepare_camera_detection(dataset_dir, camera_output_dir, id_):
+    """Copy camera detection."""
+    camera_detection_path = os.path.join(dataset_dir, "detections", "camera")
+    camera_new_path = os.path.join(camera_output_dir, f"{int(id_):06d}.txt")
+    camera_old_path = list(sorted(glob.glob(os.path.join(camera_detection_path, id_, "*"))))[1]
+    shutil.copy(camera_old_path, camera_new_path)
+    return camera_old_path
+
+
+def _prepare_lidar_detection(dataset_dir, lidar_output_dir, id_, camera_detection_path):
+    """Prepare lidar detection file, including coordinate transformation."""
+    # Find the lidar frame closest to the annotated camera timestamp
+    lidar_detection_path = os.path.join(dataset_dir, "detections", "lidar")
+    camera_timestamp = datetime.strptime(
+        os.path.basename(camera_detection_path).split("_")[2], TIME_FORMAT
+    )
+    lidar_paths = list(sorted(glob.glob(os.path.join(lidar_detection_path, id_, "*"))))
+    lidar_timestamps = [
+        datetime.strptime(os.path.basename(lidar_path).split("_")[1], TIME_FORMAT)
+        for lidar_path in lidar_paths
+    ]
+    diffs = [abs((camera_timestamp - lid_time).total_seconds()) for lid_time in lidar_timestamps]
+    _, min_idx = min((diff, idx) for (idx, diff) in enumerate(diffs))
+    lidar_path = lidar_paths[min_idx]
+
+    # Load calibration
+    calib_path = os.path.join(dataset_dir, "calibration")
+    # example name: golf_FC_2021-04-22T07:03:36.859402Z_0.txt
+    vehicle, camera_name, _, _ = os.path.basename(camera_detection_path).split("_")
+    calib = load_calib_from_json(calib_path, vehicle, camera_timestamp, camera_name)
+
+    # Modify the detections
+    new_lidar_lines = []
+    with open(lidar_path) as lidar_file:
+        for line in lidar_file:
+            line = line.split(" ")
+            pos = np.array([float(val) for val in line[11:14]])
+            rot = Quaternion(axis=[0.0, 0.0, 1.0], angle=float(line[14]))
+            transform = get_3d_transform_camera_lidar(calib)
+            # The transformed rotation will have a 90deg roll and the rotation around camera y is -yaw
+            new_rot = str(-transform_rotation(rot, transform).yaw_pitch_roll[0])
+            new_pos = list(map(str, rigid_transform_3d(pos, transform)))
+            line[11:14], line[14] = new_pos, new_rot
+            # Change ymax to pass the 25px eval height check (ymin is 0)
+            line[7] = "25.1"
+            new_lidar_lines.append(line)
+
+    # Write new detection file
+    new_path = os.path.join(lidar_output_dir, f"{int(id_):06d}.txt")
+    with open(new_path, "w") as new_file:
+        for line in new_lidar_lines:
+            new_file.write(" ".join(line))
+
+    # TODO: merge camera and lidar to one file for joint eval
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--dataset-dir", required=True, help='root dataset directory')
+    parser.add_argument("--tmp-det-dir", default="/tmp/detections")
+    parser.add_argument("--test", action="store_true")
+    return parser.parse_args()
+
+
+def main(args):
+    if args.test:
+        print("Preparing eval on test data")
+        ids = _get_test_ids(args.dataset_dir)
+    else:
+        print(f"Preparing eval on the train data")
+        ids = _get_train_ids(args.dataset_dir)
+
+    # Prepare paths
+    camera_output_dir = os.path.join(args.tmp_det_dir, "camera", "data")
+    lidar_output_dir = os.path.join(args.tmp_det_dir, "lidar", "data")
+    os.makedirs(camera_output_dir, exist_ok=True)
+    os.makedirs(lidar_output_dir, exist_ok=True)
+
+    for id_ in tqdm(ids):
+        camera_detection_path = _prepare_camera_detection(args.dataset_dir, camera_output_dir, id_)
+        _prepare_lidar_detection(args.dataset_dir, lidar_output_dir, id_, camera_detection_path)
+
+    print(f"stored the relevant detections in {args.tmp_det_dir}")
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    print("Command Line Args:", args)
+    main(args)