Skip to content
2 changes: 1 addition & 1 deletion roboflow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from roboflow.models import CLIPModel, GazeModel # noqa: F401
from roboflow.util.general import write_line

__version__ = "1.2.10"
__version__ = "1.2.11"


def check_key(api_key, model, notebook, num_retries=0):
Expand Down
121 changes: 96 additions & 25 deletions roboflow/util/folderparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,45 +111,123 @@ def _map_annotations_to_images_1to1(images, annotations):


def _map_annotations_to_images_1tomany(images, annotationFiles):
annotationsByDirname = _list_map(annotationFiles, "dirname")
image_path_to_annotation_files = _build_image_to_annotationfile_index(annotationFiles)
imgRefMap, annotationMap = _build_image_and_annotation_maps(annotationFiles)

for image in tqdm(images):
dirname = image["dirname"]
annotationsInSameDir = annotationsByDirname.get(dirname, [])
if annotationsInSameDir:
for annotationFile in annotationsInSameDir:
format = annotationFile["parsedType"]
filtered_annotations = _filterIndividualAnnotations(
image, annotationFile, format, imgRefMap, annotationMap
)
if filtered_annotations:
image["annotationfile"] = filtered_annotations
break
# Get candidate annotation files for this image
rel_path = image["file"].lstrip("/")
candidate_annotations = (
image_path_to_annotation_files.get(rel_path, [])
or image_path_to_annotation_files.get(image["name"], [])
or image_path_to_annotation_files.get(image["key"], [])
or annotationFiles # Fallback to all files for non-COCO formats
)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Fallback Annotation Mapping Causes Inefficiency

The _map_annotations_to_images_1tomany function includes a fallback to annotationFiles that causes images to be checked against all annotation files if the index yields no specific matches. This can lead to inefficient processing and potentially incorrect annotation assignments, as images might be matched to unrelated files.

Fix in Cursor Fix in Web


for annotationFile in candidate_annotations:
format = annotationFile["parsedType"]
filtered_annotations = _filterIndividualAnnotations(image, annotationFile, format, imgRefMap, annotationMap)
if filtered_annotations:
image["annotationfile"] = filtered_annotations
break


def _build_image_to_annotationfile_index(annotationFiles):
"""Create an index mapping possible image path keys to annotation files that reference them.

Keys include full relative path, basename, and stem to improve robustness across
different dataset layouts. Supports coco, createml, csv, multilabel_csv, jsonl.
"""
index = defaultdict(list)
for annotationFile in annotationFiles:
parsedType = annotationFile.get("parsedType")
parsed = annotationFile.get("parsed")
if not parsedType or parsed is None:
continue

if parsedType == "coco":
for imageRef in parsed.get("images", []):
file_name = _patch_sep(imageRef.get("file_name", "")).lstrip("/")
if not file_name:
continue
basename = os.path.basename(file_name)
stem = os.path.splitext(basename)[0]
index[file_name].append(annotationFile)
index[basename].append(annotationFile)
index[stem].append(annotationFile)

elif parsedType == "createml":
for entry in parsed:
image_name = entry.get("image")
if not image_name:
continue
index[image_name].append(annotationFile)

elif parsedType == "csv":
for ld in parsed.get("lines", []):
image_name = ld.get("file_name")
if not image_name:
continue
index[image_name].append(annotationFile)

elif parsedType == "multilabel_csv":
for row in parsed.get("rows", []):
image_name = row.get("file_name")
if not image_name:
continue
index[image_name].append(annotationFile)

elif parsedType == "jsonl":
for entry in parsed:
image_name = entry.get("image")
if not image_name:
continue
index[image_name].append(annotationFile)

return index


def _build_image_and_annotation_maps(annotationFiles):
imgRefMap = {}
annotationMap = defaultdict(list)
for annFile in annotationFiles:
filename, dirname, parsed, parsedType = (
filename, parsed, parsedType = (
annFile["file"],
annFile["dirname"],
annFile["parsed"],
annFile["parsedType"],
)
if parsedType == "coco":
for imageRef in parsed["images"]:
imgRefMap[f"{filename}/{imageRef['file_name']}"] = imageRef
# Normalize and index by multiple forms to improve matching robustness
file_name = _patch_sep(imageRef["file_name"]).lstrip("/")
basename = os.path.basename(file_name)
stem = os.path.splitext(basename)[0]

# Prefer full relative path, but also allow basename and stem
imgRefMap.update(
{
f"{filename}/{file_name}": imageRef,
f"{filename}/{basename}": imageRef,
f"{filename}/{stem}": imageRef,
}
)
for annotation in parsed["annotations"]:
annotationMap[f"{dirname}/{annotation['image_id']}"].append(annotation)
annotationMap[f"{filename}/{annotation['image_id']}"].append(annotation)
return imgRefMap, annotationMap


def _filterIndividualAnnotations(image, annotation, format, imgRefMap, annotationMap):
parsed = annotation["parsed"]
if format == "coco":
imgReference = imgRefMap.get(f"{annotation['file']}/{image['name']}")
rel_path = image["file"].lstrip("/")
imgReference = (
# Try matching by full relative path first
imgRefMap.get(f"{annotation['file']}/{rel_path}")
# Fallback: basename with extension
or imgRefMap.get(f"{annotation['file']}/{image['name']}")
# Fallback: stem (no extension)
or imgRefMap.get(f"{annotation['file']}/{image['key']}")
)
if imgReference:
# workaround to make Annotations.js correctly identify this as coco in the backend
fake_annotation = {
Expand All @@ -161,7 +239,7 @@ def _filterIndividualAnnotations(image, annotation, format, imgRefMap, annotatio
"iscrowd": 0,
}
_annotation = {"name": "annotation.coco.json"}
annotations_for_image = annotationMap.get(f"{image['dirname']}/{imgReference['id']}", [])
annotations_for_image = annotationMap.get(f"{annotation['file']}/{imgReference['id']}", [])
_annotation["rawText"] = json.dumps(
{
"info": parsed["info"],
Expand Down Expand Up @@ -314,13 +392,6 @@ def _decide_split(images):
i["split"] = "train"


def _list_map(my_list, key):
d = {}
for i in my_list:
d.setdefault(i[key], []).append(i)
return d


def _infer_classification_labels_from_folders(images):
for image in images:
if image.get("annotationfile"):
Expand Down
110 changes: 110 additions & 0 deletions tests/util/test_folderparser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import json
import os
import tempfile
import unittest
from os.path import abspath, dirname

Expand Down Expand Up @@ -95,6 +97,114 @@ def test_parse_multilabel_classification_csv(self):
self.assertEqual(img1["annotationfile"]["type"], "classification_multilabel")
self.assertEqual(set(img1["annotationfile"]["labels"]), {"Blackheads"})

def test_coco_with_subdir_file_name_should_match_annotations(self):
# COCO file_name includes a subdirectory, but the actual image is at dataset root.
with tempfile.TemporaryDirectory() as tmpdir:
# Create nested image path: /2/100002/img.jpeg
image_name = "example_2_100002_02f2f7c6e15f09b401575ae6.jpeg"
image_relpath = os.path.join("2", "100002", image_name)
image_path = os.path.join(tmpdir, image_name)
# Create an empty image file (content not used by parser)
open(image_path, "wb").close()

# Create COCO annotation JSON at dataset root, referencing the image with subdir in file_name
coco = {
"info": {},
"licenses": [],
"categories": [{"id": 1, "name": "thing"}],
"images": [
{
"id": 10000000,
"file_name": image_relpath.replace(os.sep, "/"),
"width": 800,
"height": 533,
}
],
"annotations": [
{
"id": 1,
"image_id": 10000000,
"category_id": 1,
"bbox": [10, 10, 100, 50],
"area": 5000,
"segmentation": [],
"iscrowd": 0,
}
],
}
coco_path = os.path.join(tmpdir, "_annotations.coco.json")
with open(coco_path, "w") as f:
json.dump(coco, f)

parsed = folderparser.parsefolder(tmpdir)
# Image entries store file with a leading slash relative to root
expected_file_key = f"/{image_name}"
img_entries = [i for i in parsed["images"] if i["file"] == expected_file_key]
self.assertTrue(len(img_entries) == 1)
img_entry = img_entries[0]

# Expect annotationfile to be populated, but this currently fails due to basename-only matching
self.assertIsNotNone(img_entry.get("annotationfile"))

def test_coco_root_annotation_matches_images_in_subdirs(self):
"""Test that COCO annotation at root can match images in subdirectories.

This tests the fix for the bug where annotation file dirname (/) didn't match
image dirname (/1/100001), causing annotations to not be found.
"""
with tempfile.TemporaryDirectory() as tmpdir:
# Create image in subdirectory
subdir = os.path.join(tmpdir, "1", "100001")
os.makedirs(subdir, exist_ok=True)
image_name = "image.jpeg"
image_path = os.path.join(subdir, image_name)
open(image_path, "wb").close()

# Create COCO annotation at root referencing image with subdirectory path
coco = {
"info": {},
"licenses": [],
"categories": [{"id": 1, "name": "object"}],
"images": [
{
"id": 10000000,
"file_name": "1/100001/image.jpeg",
"width": 800,
"height": 600,
}
],
"annotations": [
{
"id": 1,
"image_id": 10000000,
"category_id": 1,
"bbox": [10, 20, 100, 200],
"area": 20000,
"segmentation": [[10, 20, 110, 20, 110, 220, 10, 220]],
"iscrowd": 0,
}
],
}
coco_path = os.path.join(tmpdir, "_annotations.coco.json")
with open(coco_path, "w") as f:
json.dump(coco, f)

parsed = folderparser.parsefolder(tmpdir)

# Find the image
img_entries = [i for i in parsed["images"] if image_name in i["file"]]
self.assertEqual(len(img_entries), 1, "Should find exactly one image")
img_entry = img_entries[0]

# Verify annotation was matched
self.assertIsNotNone(img_entry.get("annotationfile"), "Image should have annotation")

# Verify annotation content
ann_data = json.loads(img_entry["annotationfile"]["rawText"])
self.assertEqual(len(ann_data["images"]), 1, "Should have one image reference")
self.assertEqual(len(ann_data["annotations"]), 1, "Should have one annotation")
self.assertEqual(ann_data["annotations"][0]["bbox"], [10, 20, 100, 200])


def _assertJsonMatchesFile(actual, filename):
with open(filename) as file:
Expand Down