roboflow · iurisilvio · Oct 5, 2025 · Oct 5, 2025 · Oct 8, 2025 · Oct 8, 2025
diff --git a/roboflow/__init__.py b/roboflow/__init__.py
@@ -15,7 +15,7 @@
 from roboflow.models import CLIPModel, GazeModel  # noqa: F401
 from roboflow.util.general import write_line
 
-__version__ = "1.2.10"
+__version__ = "1.2.11"
 
 
 def check_key(api_key, model, notebook, num_retries=0):

diff --git a/roboflow/util/folderparser.py b/roboflow/util/folderparser.py
@@ -111,45 +111,123 @@ def _map_annotations_to_images_1to1(images, annotations):
 
 
 def _map_annotations_to_images_1tomany(images, annotationFiles):
-    annotationsByDirname = _list_map(annotationFiles, "dirname")
+    image_path_to_annotation_files = _build_image_to_annotationfile_index(annotationFiles)
     imgRefMap, annotationMap = _build_image_and_annotation_maps(annotationFiles)
 
     for image in tqdm(images):
-        dirname = image["dirname"]
-        annotationsInSameDir = annotationsByDirname.get(dirname, [])
-        if annotationsInSameDir:
-            for annotationFile in annotationsInSameDir:
-                format = annotationFile["parsedType"]
-                filtered_annotations = _filterIndividualAnnotations(
-                    image, annotationFile, format, imgRefMap, annotationMap
-                )
-                if filtered_annotations:
-                    image["annotationfile"] = filtered_annotations
-                    break
+        # Get candidate annotation files for this image
+        rel_path = image["file"].lstrip("/")
+        candidate_annotations = (
+            image_path_to_annotation_files.get(rel_path, [])
+            or image_path_to_annotation_files.get(image["name"], [])
+            or image_path_to_annotation_files.get(image["key"], [])
+            or annotationFiles  # Fallback to all files for non-COCO formats
+        )
+
+        for annotationFile in candidate_annotations:
+            format = annotationFile["parsedType"]
+            filtered_annotations = _filterIndividualAnnotations(image, annotationFile, format, imgRefMap, annotationMap)
+            if filtered_annotations:
+                image["annotationfile"] = filtered_annotations
+                break
+
+
+def _build_image_to_annotationfile_index(annotationFiles):
+    """Create an index mapping possible image path keys to annotation files that reference them.
+
+    Keys include full relative path, basename, and stem to improve robustness across
+    different dataset layouts. Supports coco, createml, csv, multilabel_csv, jsonl.
+    """
+    index = defaultdict(list)
+    for annotationFile in annotationFiles:
+        parsedType = annotationFile.get("parsedType")
+        parsed = annotationFile.get("parsed")
+        if not parsedType or parsed is None:
+            continue
+
+        if parsedType == "coco":
+            for imageRef in parsed.get("images", []):
+                file_name = _patch_sep(imageRef.get("file_name", "")).lstrip("/")
+                if not file_name:
+                    continue
+                basename = os.path.basename(file_name)
+                stem = os.path.splitext(basename)[0]
+                index[file_name].append(annotationFile)
+                index[basename].append(annotationFile)
+                index[stem].append(annotationFile)
+
+        elif parsedType == "createml":
+            for entry in parsed:
+                image_name = entry.get("image")
+                if not image_name:
+                    continue
+                index[image_name].append(annotationFile)
+
+        elif parsedType == "csv":
+            for ld in parsed.get("lines", []):
+                image_name = ld.get("file_name")
+                if not image_name:
+                    continue
+                index[image_name].append(annotationFile)
+
+        elif parsedType == "multilabel_csv":
+            for row in parsed.get("rows", []):
+                image_name = row.get("file_name")
+                if not image_name:
+                    continue
+                index[image_name].append(annotationFile)
+
+        elif parsedType == "jsonl":
+            for entry in parsed:
+                image_name = entry.get("image")
+                if not image_name:
+                    continue
+                index[image_name].append(annotationFile)
+
+    return index
 
 
 def _build_image_and_annotation_maps(annotationFiles):
     imgRefMap = {}
     annotationMap = defaultdict(list)
     for annFile in annotationFiles:
-        filename, dirname, parsed, parsedType = (
+        filename, parsed, parsedType = (
             annFile["file"],
-            annFile["dirname"],
             annFile["parsed"],
             annFile["parsedType"],
         )
         if parsedType == "coco":
             for imageRef in parsed["images"]:
-                imgRefMap[f"{filename}/{imageRef['file_name']}"] = imageRef
+                # Normalize and index by multiple forms to improve matching robustness
+                file_name = _patch_sep(imageRef["file_name"]).lstrip("/")
+                basename = os.path.basename(file_name)
+                stem = os.path.splitext(basename)[0]
+
+                # Prefer full relative path, but also allow basename and stem
+                imgRefMap.update(
+                    {
+                        f"{filename}/{file_name}": imageRef,
+                        f"{filename}/{basename}": imageRef,
+                        f"{filename}/{stem}": imageRef,
+                    }
+                )
             for annotation in parsed["annotations"]:
-                annotationMap[f"{dirname}/{annotation['image_id']}"].append(annotation)
+                annotationMap[f"{filename}/{annotation['image_id']}"].append(annotation)
     return imgRefMap, annotationMap
 
 
 def _filterIndividualAnnotations(image, annotation, format, imgRefMap, annotationMap):
     parsed = annotation["parsed"]
     if format == "coco":
-        imgReference = imgRefMap.get(f"{annotation['file']}/{image['name']}")
+        rel_path = image["file"].lstrip("/")
+        imgReference = (
+            # Try matching by full relative path first
+            imgRefMap.get(f"{annotation['file']}/{rel_path}")
+            # Fallback: basename with extension
+            or imgRefMap.get(f"{annotation['file']}/{image['name']}")
+            # Fallback: stem (no extension)
+            or imgRefMap.get(f"{annotation['file']}/{image['key']}")
+        )
         if imgReference:
             # workaround to make Annotations.js correctly identify this as coco in the backend
             fake_annotation = {
@@ -161,7 +239,7 @@ def _filterIndividualAnnotations(image, annotation, format, imgRefMap, annotatio
                 "iscrowd": 0,
             }
             _annotation = {"name": "annotation.coco.json"}
-            annotations_for_image = annotationMap.get(f"{image['dirname']}/{imgReference['id']}", [])
+            annotations_for_image = annotationMap.get(f"{annotation['file']}/{imgReference['id']}", [])
             _annotation["rawText"] = json.dumps(
                 {
                     "info": parsed["info"],
@@ -314,13 +392,6 @@ def _decide_split(images):
             i["split"] = "train"
 
 
-def _list_map(my_list, key):
-    d = {}
-    for i in my_list:
-        d.setdefault(i[key], []).append(i)
-    return d
-
-
 def _infer_classification_labels_from_folders(images):
     for image in images:
         if image.get("annotationfile"):

diff --git a/tests/util/test_folderparser.py b/tests/util/test_folderparser.py
@@ -1,4 +1,6 @@
 import json
+import os
+import tempfile
 import unittest
 from os.path import abspath, dirname
 
@@ -95,6 +97,114 @@ def test_parse_multilabel_classification_csv(self):
         self.assertEqual(img1["annotationfile"]["type"], "classification_multilabel")
         self.assertEqual(set(img1["annotationfile"]["labels"]), {"Blackheads"})
 
+    def test_coco_with_subdir_file_name_should_match_annotations(self):
+        # COCO file_name includes a subdirectory, but the actual image is at dataset root.
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Create nested image path: /2/100002/img.jpeg
+            image_name = "example_2_100002_02f2f7c6e15f09b401575ae6.jpeg"
+            image_relpath = os.path.join("2", "100002", image_name)
+            image_path = os.path.join(tmpdir, image_name)
+            # Create an empty image file (content not used by parser)
+            open(image_path, "wb").close()
+
+            # Create COCO annotation JSON at dataset root, referencing the image with subdir in file_name
+            coco = {
+                "info": {},
+                "licenses": [],
+                "categories": [{"id": 1, "name": "thing"}],
+                "images": [
+                    {
+                        "id": 10000000,
+                        "file_name": image_relpath.replace(os.sep, "/"),
+                        "width": 800,
+                        "height": 533,
+                    }
+                ],
+                "annotations": [
+                    {
+                        "id": 1,
+                        "image_id": 10000000,
+                        "category_id": 1,
+                        "bbox": [10, 10, 100, 50],
+                        "area": 5000,
+                        "segmentation": [],
+                        "iscrowd": 0,
+                    }
+                ],
+            }
+            coco_path = os.path.join(tmpdir, "_annotations.coco.json")
+            with open(coco_path, "w") as f:
+                json.dump(coco, f)
+
+            parsed = folderparser.parsefolder(tmpdir)
+            # Image entries store file with a leading slash relative to root
+            expected_file_key = f"/{image_name}"
+            img_entries = [i for i in parsed["images"] if i["file"] == expected_file_key]
+            self.assertTrue(len(img_entries) == 1)
+            img_entry = img_entries[0]
+
+            # Expect annotationfile to be populated, but this currently fails due to basename-only matching
+            self.assertIsNotNone(img_entry.get("annotationfile"))
+
+    def test_coco_root_annotation_matches_images_in_subdirs(self):
+        """Test that COCO annotation at root can match images in subdirectories.
+
+        This tests the fix for the bug where annotation file dirname (/) didn't match
+        image dirname (/1/100001), causing annotations to not be found.
+        """
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Create image in subdirectory
+            subdir = os.path.join(tmpdir, "1", "100001")
+            os.makedirs(subdir, exist_ok=True)
+            image_name = "image.jpeg"
+            image_path = os.path.join(subdir, image_name)
+            open(image_path, "wb").close()
+
+            # Create COCO annotation at root referencing image with subdirectory path
+            coco = {
+                "info": {},
+                "licenses": [],
+                "categories": [{"id": 1, "name": "object"}],
+                "images": [
+                    {
+                        "id": 10000000,
+                        "file_name": "1/100001/image.jpeg",
+                        "width": 800,
+                        "height": 600,
+                    }
+                ],
+                "annotations": [
+                    {
+                        "id": 1,
+                        "image_id": 10000000,
+                        "category_id": 1,
+                        "bbox": [10, 20, 100, 200],
+                        "area": 20000,
+                        "segmentation": [[10, 20, 110, 20, 110, 220, 10, 220]],
+                        "iscrowd": 0,
+                    }
+                ],
+            }
+            coco_path = os.path.join(tmpdir, "_annotations.coco.json")
+            with open(coco_path, "w") as f:
+                json.dump(coco, f)
+
+            parsed = folderparser.parsefolder(tmpdir)
+
+            # Find the image
+            img_entries = [i for i in parsed["images"] if image_name in i["file"]]
+            self.assertEqual(len(img_entries), 1, "Should find exactly one image")
+            img_entry = img_entries[0]
+
+            # Verify annotation was matched
+            self.assertIsNotNone(img_entry.get("annotationfile"), "Image should have annotation")
+
+            # Verify annotation content
+            ann_data = json.loads(img_entry["annotationfile"]["rawText"])
+            self.assertEqual(len(ann_data["images"]), 1, "Should have one image reference")
+            self.assertEqual(len(ann_data["annotations"]), 1, "Should have one annotation")
+            self.assertEqual(ann_data["annotations"][0]["bbox"], [10, 20, 100, 200])
+
 
 def _assertJsonMatchesFile(actual, filename):
     with open(filename) as file: