From d50dbe1611a43f60ca4cecb027758878f4b01a9c Mon Sep 17 00:00:00 2001
From: Yi Ren <reny@microsoft.com>
Date: Wed, 3 Jun 2026 15:48:43 +0800
Subject: [PATCH 1/5] fix(export): resolve timm image_size from pretrained_cfg

Optimum's DummyVisionInputGenerator falls back to 64x64 when normalized_config
lacks image_size/input_size. For timm models loaded via TimmWrapperConfig,
input_size lives nested in pretrained_cfg as a plain dict, which Optimum's
NormalizedConfig does not traverse, and preprocessor_config.json is absent
on the hub. So winml config emitted [1, 3, 64, 64] instead of [1, 3, 224, 224].

Synthesize a preprocessor_config-style dict from pretrained_cfg.input_size
when the hub fetch misses, keeping the existing size-parsing block intact.
Added timm/mobilenetv3_small_100.lamb_in1k and timm/repghostnet_200.in1k to
the e2e registry; both PASS perf on CPU with the correct [1, 3, 224, 224] shape.
---
 scripts/e2e_eval/testsets/models_all.json |  22 ++++
 src/winml/modelkit/export/io.py           | 118 ++++++++++++++++------
 src/winml/modelkit/inspect/resolver.py    |   8 +-
 tests/unit/export/test_io.py              |  93 +++++++++++++++++
 4 files changed, 208 insertions(+), 33 deletions(-)

diff --git a/scripts/e2e_eval/testsets/models_all.json b/scripts/e2e_eval/testsets/models_all.json
index f2cc97f46..d3a4701a0 100644
--- a/scripts/e2e_eval/testsets/models_all.json
+++ b/scripts/e2e_eval/testsets/models_all.json
@@ -4839,6 +4839,28 @@
     "optimum_supported": false,
     "order": 6
   },
+  {
+    "hf_id": "timm/mobilenetv3_small_100.lamb_in1k",
+    "task": "image-classification",
+    "model_type": "timm_wrapper",
+    "group": "Top200",
+    "priority": "P2",
+    "downloads": 0,
+    "last_update_time": "2024-01-01T00:00:00+00:00",
+    "optimum_supported": true,
+    "order": 1
+  },
+  {
+    "hf_id": "timm/repghostnet_200.in1k",
+    "task": "image-classification",
+    "model_type": "timm_wrapper",
+    "group": "Top200",
+    "priority": "P2",
+    "downloads": 0,
+    "last_update_time": "2024-01-01T00:00:00+00:00",
+    "optimum_supported": true,
+    "order": 2
+  },
   {
     "hf_id": "timpal0l/mdeberta-v3-base-squad2",
     "task": "question-answering",
diff --git a/src/winml/modelkit/export/io.py b/src/winml/modelkit/export/io.py
index bc8011660..f9737d141 100644
--- a/src/winml/modelkit/export/io.py
+++ b/src/winml/modelkit/export/io.py
@@ -206,16 +206,22 @@ def _get_onnx_config(
 def _populate_image_size_from_preprocessor(
     model_id: str | None,
     shape_kwargs: dict,
+    hf_config: PretrainedConfig | None = None,
 ) -> None:
-    """Populate height/width in shape_kwargs from preprocessor_config.json.
+    """Populate height/width in shape_kwargs from preprocessor metadata.
 
     Optimum's DummyVisionInputGenerator falls back to 64x64 when model config
-    lacks image_size (e.g., ResNet). This reads the correct size from
-    preprocessor_config.json and injects it into shape_kwargs.
+    lacks image_size (e.g., ResNet, timm). This reads the correct size from
+    a preprocessor_config-style dict obtained via :func:`_get_preprocessor_dict`
+    (which consults the hub's ``preprocessor_config.json`` first and, when that
+    is unavailable, synthesizes one from wrapper-config metadata such as
+    ``TimmWrapperConfig.pretrained_cfg``).
 
     Args:
         model_id: HuggingFace model identifier (e.g., "microsoft/resnet-50")
         shape_kwargs: Mutable dict to update with height/width if found
+        hf_config: HuggingFace PretrainedConfig used to synthesize a
+            preprocessor dict when ``preprocessor_config.json`` is missing.
     """
     if not model_id:
         return
@@ -223,31 +229,83 @@ def _populate_image_size_from_preprocessor(
     if "height" in shape_kwargs or "width" in shape_kwargs:
         return
 
-    try:
-        from transformers.image_processing_utils import ImageProcessingMixin
-
-        config, _ = ImageProcessingMixin.get_image_processor_dict(model_id)
-        size = config.get("size")
-
-        if isinstance(size, int):
-            shape_kwargs["height"] = size
-            shape_kwargs["width"] = size
-        elif isinstance(size, dict):
-            if "height" in size:
-                shape_kwargs["height"] = size["height"]
-                shape_kwargs["width"] = size["width"]
-            elif "shortest_edge" in size:
-                shape_kwargs["height"] = size["shortest_edge"]
-                shape_kwargs["width"] = size["shortest_edge"]
-
-        if "height" in shape_kwargs:
-            logger.debug(
-                "Loaded image size from preprocessor_config.json: %dx%d",
-                shape_kwargs["height"],
-                shape_kwargs["width"],
-            )
-    except (OSError, ValueError, KeyError) as e:
-        logger.debug("Could not load preprocessor_config.json for %s: %s", model_id, e)
+    config = _get_preprocessor_dict(model_id, hf_config)
+    size = config.get("size")
+
+    if isinstance(size, int):
+        shape_kwargs["height"] = size
+        shape_kwargs["width"] = size
+    elif isinstance(size, dict):
+        if "height" in size:
+            shape_kwargs["height"] = size["height"]
+            shape_kwargs["width"] = size["width"]
+        elif "shortest_edge" in size:
+            shape_kwargs["height"] = size["shortest_edge"]
+            shape_kwargs["width"] = size["shortest_edge"]
+
+    if "height" in shape_kwargs:
+        logger.debug(
+            "Loaded image size from preprocessor dict: %dx%d",
+            shape_kwargs["height"],
+            shape_kwargs["width"],
+        )
+
+
+def _get_preprocessor_dict(
+    model_id: str | None,
+    hf_config: PretrainedConfig | None,
+) -> dict:
+    """Return a ``preprocessor_config.json``-style dict for the model.
+
+    Resolution order:
+
+    1. ``preprocessor_config.json`` fetched from the hub (standard HF vision).
+    2. Synthesized from a nested plain-dict attribute on ``hf_config``
+       carrying ``input_size`` or ``image_size`` (e.g.
+       ``TimmWrapperConfig.pretrained_cfg``).
+
+    Returns the dict in the standard preprocessor schema (``{"size": ...}``)
+    so downstream parsing logic does not need to know which source it came
+    from. Returns an empty dict when neither source yields a usable size.
+    """
+    if model_id:
+        try:
+            from transformers.image_processing_utils import ImageProcessingMixin
+
+            config, _ = ImageProcessingMixin.get_image_processor_dict(model_id)
+            return config
+        except (OSError, ValueError, KeyError) as e:
+            logger.debug("Could not load preprocessor_config.json for %s: %s", model_id, e)
+
+    if hf_config is not None:
+        return _synthesize_preprocessor_dict(hf_config)
+    return {}
+
+
+def _synthesize_preprocessor_dict(hf_config: PretrainedConfig) -> dict:
+    """Build a ``preprocessor_config.json``-style dict from ``hf_config.pretrained_cfg``.
+
+    timm wrapper configs (``TimmWrapperConfig``) stash shape metadata in a
+    ``pretrained_cfg`` dict carrying ``input_size = [C, H, W]``. Optimum's
+    NormalizedConfig only walks ``PretrainedConfig`` children, so this
+    dict-wrapped value is invisible to the dummy-input generator and it
+    falls back to 64x64.
+
+    Preprocessing keys (``mean``/``std``/``interpolation``/``crop_pct``)
+    don't affect export tensor shapes and are intentionally ignored.
+    """
+    pretrained_cfg = getattr(hf_config, "pretrained_cfg", None)
+    if not isinstance(pretrained_cfg, dict):
+        return {}
+
+    input_size = pretrained_cfg.get("input_size")
+    if isinstance(input_size, (list, tuple)):
+        if len(input_size) == 3:
+            return {"size": {"height": input_size[1], "width": input_size[2]}}
+        if len(input_size) == 1:
+            return {"size": input_size[0]}
+
+    return {}
 
 
 # Practical cap for export dummy input sequence length.
@@ -339,7 +397,7 @@ def generate_dummy_inputs(
     onnx_config.float_dtype = float_dtype
 
     shape_kwargs["batch_size"] = batch_size
-    _populate_image_size_from_preprocessor(model_id, shape_kwargs)
+    _populate_image_size_from_preprocessor(model_id, shape_kwargs, hf_config)
     _populate_sequence_length_from_config(hf_config, shape_kwargs)
 
     logger.debug(
@@ -402,7 +460,7 @@ def resolve_io_specs(
 
     # Populate shapes from model config / preprocessor
     shape_kwargs["batch_size"] = batch_size
-    _populate_image_size_from_preprocessor(model_id, shape_kwargs)
+    _populate_image_size_from_preprocessor(model_id, shape_kwargs, hf_config)
     _populate_sequence_length_from_config(hf_config, shape_kwargs)
 
     # Generate dummy inputs for concrete shapes and dtypes,
diff --git a/src/winml/modelkit/inspect/resolver.py b/src/winml/modelkit/inspect/resolver.py
index 91a791bb9..49b31aa41 100644
--- a/src/winml/modelkit/inspect/resolver.py
+++ b/src/winml/modelkit/inspect/resolver.py
@@ -812,14 +812,16 @@ def get_config_attr(
         if val is not None:
             extra[attr] = val
 
-    # Step 5: Fallback — read image_size from preprocessor_config.json
-    # for models like ResNet where HF config lacks image_size
+    # Step 5: Fallback — read image_size from a preprocessor-style dict
+    # (preprocessor_config.json on the hub, or synthesized from a nested
+    # dict on hf_config such as TimmWrapperConfig.pretrained_cfg) when the
+    # top-level HF config lacks image_size.
     if image_size is None and model_id is not None:
         try:
             from ..export.io import _populate_image_size_from_preprocessor
 
             shape_kwargs: dict = {}
-            _populate_image_size_from_preprocessor(model_id, shape_kwargs)
+            _populate_image_size_from_preprocessor(model_id, shape_kwargs, config)
             if "height" in shape_kwargs:
                 h, w = shape_kwargs["height"], shape_kwargs["width"]
                 image_size = h if h == w else (h, w)
diff --git a/tests/unit/export/test_io.py b/tests/unit/export/test_io.py
index 36f065f49..8e4dfb04b 100644
--- a/tests/unit/export/test_io.py
+++ b/tests/unit/export/test_io.py
@@ -676,6 +676,99 @@ def test_no_size_key_in_config(self) -> None:
         assert "height" not in shape_kwargs
         assert "width" not in shape_kwargs
 
+    def test_nested_dict_input_size_chw(self) -> None:
+        """``pretrained_cfg.input_size = [C, H, W]`` (timm) synthesizes a size dict."""
+        hf_config = SimpleNamespace(
+            pretrained_cfg={"input_size": [3, 224, 224], "mean": [0.485, 0.456, 0.406]},
+        )
+        shape_kwargs: dict = {}
+
+        # No preprocessor_config.json on the hub -> synthesize from hf_config.
+        with patch(
+            "transformers.image_processing_utils.ImageProcessingMixin.get_image_processor_dict",
+            side_effect=OSError("404"),
+        ):
+            _populate_image_size_from_preprocessor(
+                "timm/some-model",
+                shape_kwargs,
+                hf_config,
+            )
+
+        assert shape_kwargs["height"] == 224
+        assert shape_kwargs["width"] == 224
+
+    def test_preprocessor_takes_precedence_over_nested_dict(self) -> None:
+        """When preprocessor_config.json resolves, nested dict is not consulted."""
+        hf_config = SimpleNamespace(pretrained_cfg={"input_size": [3, 320, 320]})
+        shape_kwargs: dict = {}
+
+        with patch(
+            "transformers.image_processing_utils.ImageProcessingMixin.get_image_processor_dict",
+            return_value=({"size": 384}, {}),
+        ):
+            _populate_image_size_from_preprocessor(
+                "some-model/id",
+                shape_kwargs,
+                hf_config,
+            )
+
+        assert shape_kwargs["height"] == 384
+        assert shape_kwargs["width"] == 384
+
+    def test_nested_dict_input_size_scalar(self) -> None:
+        """``pretrained_cfg.input_size = [side]`` (length-1) maps to a square size."""
+        hf_config = SimpleNamespace(pretrained_cfg={"input_size": [320]})
+        shape_kwargs: dict = {}
+
+        with patch(
+            "transformers.image_processing_utils.ImageProcessingMixin.get_image_processor_dict",
+            side_effect=OSError("404"),
+        ):
+            _populate_image_size_from_preprocessor(
+                "some/model",
+                shape_kwargs,
+                hf_config,
+            )
+
+        assert shape_kwargs["height"] == 320
+        assert shape_kwargs["width"] == 320
+
+    def test_pretrained_cfg_without_input_size_ignored(self) -> None:
+        """``pretrained_cfg`` without ``input_size`` (e.g. only mean/std) is skipped."""
+        hf_config = SimpleNamespace(
+            pretrained_cfg={"mean": [0.5, 0.5, 0.5], "std": [0.5, 0.5, 0.5]},
+        )
+        shape_kwargs: dict = {}
+
+        with patch(
+            "transformers.image_processing_utils.ImageProcessingMixin.get_image_processor_dict",
+            side_effect=OSError("404"),
+        ):
+            _populate_image_size_from_preprocessor(
+                "some/model",
+                shape_kwargs,
+                hf_config,
+            )
+
+        assert shape_kwargs == {}
+
+    def test_existing_height_blocks_nested_dict_too(self) -> None:
+        """If height/width already set, nested-dict path must also be skipped."""
+        hf_config = SimpleNamespace(pretrained_cfg={"input_size": [3, 224, 224]})
+        shape_kwargs = {"height": 128}
+
+        with patch(
+            "transformers.image_processing_utils.ImageProcessingMixin.get_image_processor_dict",
+            side_effect=OSError("404"),
+        ):
+            _populate_image_size_from_preprocessor(
+                "some/model",
+                shape_kwargs,
+                hf_config,
+            )
+
+        assert shape_kwargs == {"height": 128}
+
 
 # =============================================================================
 # PastKeyValueInputGenerator — shared KV cache dummy input generation

From ec8725145ff4d7cc697b67df372e7584a81fe419 Mon Sep 17 00:00:00 2001
From: Yi Ren <reny@microsoft.com>
Date: Wed, 3 Jun 2026 15:53:36 +0800
Subject: [PATCH 2/5] chore(e2e): bump timm models to group=ISV, priority=P1

---
 scripts/e2e_eval/testsets/models_all.json | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/e2e_eval/testsets/models_all.json b/scripts/e2e_eval/testsets/models_all.json
index d3a4701a0..d0710b409 100644
--- a/scripts/e2e_eval/testsets/models_all.json
+++ b/scripts/e2e_eval/testsets/models_all.json
@@ -4843,8 +4843,8 @@
     "hf_id": "timm/mobilenetv3_small_100.lamb_in1k",
     "task": "image-classification",
     "model_type": "timm_wrapper",
-    "group": "Top200",
-    "priority": "P2",
+    "group": "ISV",
+    "priority": "P1",
     "downloads": 0,
     "last_update_time": "2024-01-01T00:00:00+00:00",
     "optimum_supported": true,
@@ -4854,8 +4854,8 @@
     "hf_id": "timm/repghostnet_200.in1k",
     "task": "image-classification",
     "model_type": "timm_wrapper",
-    "group": "Top200",
-    "priority": "P2",
+    "group": "ISV",
+    "priority": "P1",
     "downloads": 0,
     "last_update_time": "2024-01-01T00:00:00+00:00",
     "optimum_supported": true,

From 387a794ec0d383c40b1763a46895ff5a9de6e395 Mon Sep 17 00:00:00 2001
From: Yi Ren <reny@microsoft.com>
Date: Thu, 4 Jun 2026 12:10:48 +0800
Subject: [PATCH 3/5] refactor

---
 src/winml/modelkit/export/io.py | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/src/winml/modelkit/export/io.py b/src/winml/modelkit/export/io.py
index f9737d141..f22d0bad9 100644
--- a/src/winml/modelkit/export/io.py
+++ b/src/winml/modelkit/export/io.py
@@ -268,18 +268,17 @@ def _get_preprocessor_dict(
     so downstream parsing logic does not need to know which source it came
     from. Returns an empty dict when neither source yields a usable size.
     """
-    if model_id:
-        try:
-            from transformers.image_processing_utils import ImageProcessingMixin
-
-            config, _ = ImageProcessingMixin.get_image_processor_dict(model_id)
-            return config
-        except (OSError, ValueError, KeyError) as e:
-            logger.debug("Could not load preprocessor_config.json for %s: %s", model_id, e)
-
-    if hf_config is not None:
-        return _synthesize_preprocessor_dict(hf_config)
-    return {}
+    try:
+        from transformers.image_processing_utils import ImageProcessingMixin
+
+        config, _ = ImageProcessingMixin.get_image_processor_dict(model_id)
+        return config
+    except (OSError, ValueError, KeyError) as e:
+        # if model_id is None, OSError is raised
+        logger.debug("Could not load preprocessor_config.json for %s: %s", model_id, e)
+        if hf_config is not None:
+            return _synthesize_preprocessor_dict(hf_config)
+        return {}
 
 
 def _synthesize_preprocessor_dict(hf_config: PretrainedConfig) -> dict:

From 5d9291f0a8cc6de346e59161ab0d2d5985c679f3 Mon Sep 17 00:00:00 2001
From: Yi Ren <reny@microsoft.com>
Date: Thu, 4 Jun 2026 12:38:28 +0800
Subject: [PATCH 4/5] test(export): build T5/Qwen KV fixtures synthetically (no
 network)

The t5_config and qwen_config fixtures fetched configs from the HF Hub at
collection time (AutoConfig.from_pretrained for google-t5/t5-small and
Qwen/Qwen3-0.6B), so the unit suite failed on offline/rate-limited runners
(429 Too Many Requests -> OSError). Build them synthetically with small dims,
matching the existing marian_config/bart_config_* fixtures in the same file.

n_positions (T5) and max_position_embeddings (Qwen) map to max_cache_len via
their NormalizedConfigs, keeping the cache-length assertions (32 / 256) valid.
---
 tests/unit/export/test_io.py | 49 ++++++++++++++++++++++++++----------
 1 file changed, 36 insertions(+), 13 deletions(-)

diff --git a/tests/unit/export/test_io.py b/tests/unit/export/test_io.py
index 8e4dfb04b..c69f4907c 100644
--- a/tests/unit/export/test_io.py
+++ b/tests/unit/export/test_io.py
@@ -19,7 +19,6 @@
 import pytest
 import torch
 from transformers import (
-    AutoConfig,
     CLIPTextConfig,
     CLIPTextModelWithProjection,
     CLIPVisionConfig,
@@ -792,18 +791,42 @@ def _make_normalized_config(
 
 @pytest.fixture(scope="module")
 def t5_config():
-    """T5-small config with n_positions overridden to 32 for fast tests."""
-    cfg = AutoConfig.from_pretrained("google-t5/t5-small")
-    cfg.n_positions = 32
-    return cfg
+    """Synthetic T5Config — small dims, no network.
+
+    ``n_positions`` maps to ``max_cache_len`` (decoder static buffer size) via
+    the T5 NormalizedConfig, so it fixes the KV cache length at 32.
+    """
+    from transformers import T5Config
+
+    return T5Config(
+        d_model=32,
+        num_layers=2,
+        num_heads=2,
+        d_kv=16,
+        vocab_size=100,
+        n_positions=32,
+    )
 
 
 @pytest.fixture(scope="module")
 def qwen_config():
-    """Qwen3-0.6B config with max_position_embeddings overridden to 256."""
-    cfg = AutoConfig.from_pretrained("Qwen/Qwen3-0.6B")
-    cfg.max_position_embeddings = 256
-    return cfg
+    """Synthetic Qwen3Config — small dims, no network.
+
+    ``max_position_embeddings`` maps to ``max_cache_len`` via the Qwen
+    NormalizedConfig, so it fixes the KV cache length at 256.
+    """
+    from transformers import Qwen3Config
+
+    return Qwen3Config(
+        hidden_size=32,
+        num_hidden_layers=2,
+        num_attention_heads=4,
+        num_key_value_heads=2,
+        head_dim=8,
+        vocab_size=100,
+        intermediate_size=64,
+        max_position_embeddings=256,
+    )
 
 
 class TestPastKeyValueInputGenerator:
@@ -869,7 +892,7 @@ class TestT5DecoderKVInputs:
 
     def test_kv_input_names(self, t5_config) -> None:
         inputs = generate_dummy_inputs("t5", "text2text-generation", t5_config)
-        num_layers = t5_config.num_layers  # 6
+        num_layers = t5_config.num_layers  # 2 (synthetic)
         for i in range(num_layers):
             assert f"past_{i}_key" in inputs
             assert f"past_{i}_value" in inputs
@@ -877,7 +900,7 @@ def test_kv_input_names(self, t5_config) -> None:
     def test_kv_shape(self, t5_config) -> None:
         inputs = generate_dummy_inputs("t5", "text2text-generation", t5_config)
         kv = inputs["past_0_key"]
-        # [batch=1, heads=8, max_cache_len=32, d_kv=64]
+        # [batch=1, heads=num_heads, max_cache_len=32 (n_positions), d_kv]
         assert kv.shape == (1, t5_config.num_heads, 32, t5_config.d_kv)
 
     def test_decoder_attention_mask_matches_cache_len(self, t5_config) -> None:
@@ -895,7 +918,7 @@ class TestQwenPrefillKVInputs:
 
     def test_kv_input_names(self, qwen_config) -> None:
         inputs = generate_dummy_inputs("qwen3", "feature-extraction", qwen_config)
-        num_layers = qwen_config.num_hidden_layers  # 28
+        num_layers = qwen_config.num_hidden_layers  # 2 (synthetic)
         for i in range(num_layers):
             assert f"past_{i}_key" in inputs
             assert f"past_{i}_value" in inputs
@@ -903,7 +926,7 @@ def test_kv_input_names(self, qwen_config) -> None:
     def test_kv_shape(self, qwen_config) -> None:
         inputs = generate_dummy_inputs("qwen3", "feature-extraction", qwen_config)
         kv = inputs["past_0_key"]
-        # [batch=1, kv_heads=8, max_cache_len=256, head_dim=128]
+        # [batch=1, kv_heads, max_cache_len=256 (max_position_embeddings), head_dim]
         assert kv.shape == (1, qwen_config.num_key_value_heads, 256, qwen_config.head_dim)
 
     def test_attention_mask_matches_cache_len(self, qwen_config) -> None:

From dd2601b1b744e5878e1af20fcc2e01da2d15f4fb Mon Sep 17 00:00:00 2001
From: Yi Ren <reny@microsoft.com>
Date: Thu, 4 Jun 2026 16:56:02 +0800
Subject: [PATCH 5/5] fix(export): synthesize image size when
 preprocessor_config lacks "size"

_get_preprocessor_dict only fell through to synthesis when the hub fetch
raised. A partial preprocessor_config.json (e.g. only mean/std) returned a
dict without a "size" key, so size stayed unresolved and Optimum defaulted
to 64x64 -- the same bug via a different path. Return the hub dict only when
it carries "size"; otherwise fall through to _synthesize_preprocessor_dict.
---
 src/winml/modelkit/export/io.py | 18 ++++++++++++------
 tests/unit/export/test_io.py    | 23 +++++++++++++++++++++++
 2 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/src/winml/modelkit/export/io.py b/src/winml/modelkit/export/io.py
index f22d0bad9..a2111a86b 100644
--- a/src/winml/modelkit/export/io.py
+++ b/src/winml/modelkit/export/io.py
@@ -259,10 +259,12 @@ def _get_preprocessor_dict(
 
     Resolution order:
 
-    1. ``preprocessor_config.json`` fetched from the hub (standard HF vision).
+    1. ``preprocessor_config.json`` fetched from the hub (standard HF vision),
+       used only when it carries a ``size`` key.
     2. Synthesized from a nested plain-dict attribute on ``hf_config``
        carrying ``input_size`` or ``image_size`` (e.g.
-       ``TimmWrapperConfig.pretrained_cfg``).
+       ``TimmWrapperConfig.pretrained_cfg``). Reached when the hub file is
+       unavailable *or* present but missing ``size`` (a partial config).
 
     Returns the dict in the standard preprocessor schema (``{"size": ...}``)
     so downstream parsing logic does not need to know which source it came
@@ -272,13 +274,17 @@ def _get_preprocessor_dict(
         from transformers.image_processing_utils import ImageProcessingMixin
 
         config, _ = ImageProcessingMixin.get_image_processor_dict(model_id)
-        return config
+        if "size" in config:
+            return config
+        # Partial preprocessor_config.json without a "size" key: fall through
+        # to synthesis so we don't silently use Optimum's 64x64 default.
     except (OSError, ValueError, KeyError) as e:
         # if model_id is None, OSError is raised
         logger.debug("Could not load preprocessor_config.json for %s: %s", model_id, e)
-        if hf_config is not None:
-            return _synthesize_preprocessor_dict(hf_config)
-        return {}
+
+    if hf_config is not None:
+        return _synthesize_preprocessor_dict(hf_config)
+    return {}
 
 
 def _synthesize_preprocessor_dict(hf_config: PretrainedConfig) -> dict:
diff --git a/tests/unit/export/test_io.py b/tests/unit/export/test_io.py
index c69f4907c..d030b5e7a 100644
--- a/tests/unit/export/test_io.py
+++ b/tests/unit/export/test_io.py
@@ -675,6 +675,29 @@ def test_no_size_key_in_config(self) -> None:
         assert "height" not in shape_kwargs
         assert "width" not in shape_kwargs
 
+    def test_partial_preprocessor_without_size_falls_back_to_synthesis(self) -> None:
+        """A partial preprocessor_config.json (no ``size``) synthesizes from hf_config.
+
+        Without the fall-through, a hub dict carrying only mean/std would leave
+        ``size`` unresolved and Optimum would default to 64x64.
+        """
+        mock_config = {"mean": [0.5, 0.5, 0.5], "std": [0.5, 0.5, 0.5]}  # no "size"
+        hf_config = SimpleNamespace(pretrained_cfg={"input_size": [3, 224, 224]})
+        shape_kwargs: dict = {}
+
+        with patch(
+            "transformers.image_processing_utils.ImageProcessingMixin.get_image_processor_dict",
+            return_value=(mock_config, {}),
+        ):
+            _populate_image_size_from_preprocessor(
+                "timm/some-model",
+                shape_kwargs,
+                hf_config,
+            )
+
+        assert shape_kwargs["height"] == 224
+        assert shape_kwargs["width"] == 224
+
     def test_nested_dict_input_size_chw(self) -> None:
         """``pretrained_cfg.input_size = [C, H, W]`` (timm) synthesizes a size dict."""
         hf_config = SimpleNamespace(