diff --git a/.gitignore b/.gitignore
index 2379f36..8efb67f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,8 @@
.idea/
.claude/
cmake-build-debug*/
+.claude/
+CLAUDE.md
*.onnx
diff --git a/README.md b/README.md
index b02a1f8..c3a0b22 100644
--- a/README.md
+++ b/README.md
@@ -150,7 +150,7 @@ sudo cmake --install build --prefix /usr/local/onnxruntime-server
| `--workers` | `ONNX_SERVER_WORKERS` | Worker thread pool size.
Default: `4` |
| `--request-payload-limit` | `ONNX_SERVER_REQUEST_PAYLOAD_LIMIT` | HTTP/HTTPS request payload size limit.
Default: 1024 * 1024 * 10(10MB)` |
| `--model-dir` | `ONNX_SERVER_MODEL_DIR` | Model directory path
The onnx model files must be located in the following path:
`${model_dir}/${model_name}/${model_version}/model.onnx` or
`${model_dir}/${model_name}/${model_version}.onnx`
Default: `models` |
-| `--prepare-model` | `ONNX_SERVER_PREPARE_MODEL` | Pre-create some model sessions at server startup.
Format as a space-separated list of `model_name:model_version` or `model_name:model_version(session_options, ...)`.
Available session_options are
- cuda=device_id`[ or true or false]`
eg) `model1:v1 model2:v9`
`model1:v1(cuda=true) model2:v9(cuda=1)` |
+| `--prepare-model` | `ONNX_SERVER_PREPARE_MODEL` | Pre-create some model sessions at server startup.
Format as a space-separated list of `model_name:model_version` or `model_name:model_version(opt1=val1, opt2=val2, ...)`. Option keys may use dotted notation to address nested groups (e.g. `cuda.device_id`, `session_options.intra_op_num_threads`). Repeating the `extensions` key accumulates a deduplicated array. Option entries that do not match the grammar are skipped silently rather than failing the whole list.
Examples:
- `model1:v1 model2:v9`
- `model1:v1(cuda=true) model2:v9(cuda=1)`
- `bert:v1(cuda.device_id=0, cuda.gpu_mem_limit=2147483648)`
- `bert:v1(session_options.intra_op_num_threads=4, session_options.graph_optimization_level=all)`
- `bert:v1(extensions=/usr/local/lib/libortextensions.so)` |
### Backend options
@@ -223,8 +223,9 @@ docker run --name onnxruntime_server_container -d --rm --gpus all \
## ONNXRuntime Extensions Support
-To use the [onnxruntime-extensions](https://github.com/microsoft/onnxruntime-extensions)(Custom Ops Library), set the
-options as follows when creating a session.
+To use the [onnxruntime-extensions](https://github.com/microsoft/onnxruntime-extensions) (Custom Ops Library), supply
+one or more library paths through the `extensions` array. The server registers each path with ORT in order and
+deduplicates entries.
```json
{
@@ -232,11 +233,82 @@ options as follows when creating a session.
"version": "string",
"option": {
"cuda": ...,
- "ortextensions_path": "/absolute/path/to/libonnxruntime_extensions.so"
+ "extensions": [
+ "/absolute/path/to/libonnxruntime_extensions.so"
+ ]
}
}
```
+The legacy `ortextensions_path` (single string) is still accepted for backward compatibility; it is normalized into the
+`extensions` array on the server side and the response always echoes the normalized form.
+
+## Session-level options
+
+The optional `session_options` object on a session-create request forwards the listed keys to the underlying
+onnxruntime `SessionOptions`. Only the JSON shape (types and our enum-string mapping) is validated on the server side;
+the actual value validation is delegated to ORT, and the response echoes only the values ORT accepted.
+
+```json
+{
+ "model": "string",
+ "version": "string",
+ "option": {
+ "session_options": {
+ "intra_op_num_threads": 4,
+ "inter_op_num_threads": 1,
+ "execution_mode": "sequential",
+ "graph_optimization_level": "all",
+ "enable_cpu_mem_arena": true,
+ "enable_mem_pattern": true,
+ "log_severity_level": 2,
+ "logid": "my-model",
+ "enable_profiling": false,
+ "profile_file_prefix": "/var/log/onnx/profile-",
+ "optimized_model_filepath": "/cache/optimized.onnx",
+ "free_dimension_overrides": { "batch": 1 },
+ "config_entries": {
+ "session.disable_prepacking": "1"
+ }
+ }
+ }
+}
+```
+
+`config_entries` is round-tripped through `GetSessionConfigEntry`, so the response shows what ORT actually stored
+(string values; `true`/`42` become `"1"`/`"42"`).
+
+## CUDA execution provider options
+
+When CUDA is enabled, the `cuda` field accepts either a boolean / integer (legacy shorthand) or an object that maps to
+[CUDA Execution Provider options](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html). The
+server forwards the object to ORT via `UpdateCUDAProviderOptions` in a single batched call (per-key calls trigger a
+sibling-reset quirk in ORT V2). If any key is rejected by ORT, session creation fails with the ORT error message
+identifying the offending key. The response is built from `GetCUDAProviderOptionsAsString` readback, so it reflects
+exactly what ORT stored.
+
+```json
+{
+ "model": "string",
+ "version": "string",
+ "option": {
+ "cuda": {
+ "device_id": 0,
+ "gpu_mem_limit": 2147483648,
+ "arena_extend_strategy": "kNextPowerOfTwo",
+ "cudnn_conv_algo_search": "EXHAUSTIVE",
+ "cudnn_conv_use_max_workspace": true,
+ "do_copy_in_default_stream": true,
+ "enable_cuda_graph": false
+ }
+ }
+}
+```
+
+Backward-compatible shortcuts:
+- `"cuda": true` — enable CUDA with all defaults (`device_id=0`).
+- `"cuda": 1` — enable CUDA on `device_id=1`.
+
For more details on the session creation request, please refer to
the [API documentation](https://kibae.github.io/onnxruntime-server/swagger/#/ONNX%20Runtime%20Session/createSession).
diff --git a/docs/swagger/openapi.yaml b/docs/swagger/openapi.yaml
index 686e2f4..04ed05c 100644
--- a/docs/swagger/openapi.yaml
+++ b/docs/swagger/openapi.yaml
@@ -269,6 +269,11 @@ components:
$ref: '#/components/schemas/ONNXSessionOption'
ONNXSessionOption:
type: object
+ description: |
+ Normalized echo of the options applied to the session. The server only includes
+ keys whose corresponding ORT calls succeeded; values reflect what ORT actually
+ stored (read back via GetCUDAProviderOptionsAsString and GetSessionConfigEntry
+ where applicable).
nullable: true
properties:
cuda:
@@ -276,8 +281,18 @@ components:
required: false
oneOf:
- type: boolean
- description: Use CUDA
+ description: CUDA disabled (false) — present for backward compatibility.
- $ref: '#/components/schemas/ONNXSessionOptionCUDA'
+ extensions:
+ type: array
+ description: Registered onnxruntime-extensions library paths in registration order, deduplicated.
+ required: false
+ items:
+ type: string
+ example:
+ - /absolute/path/to/libonnxruntime_extensions.so
+ session_options:
+ $ref: '#/components/schemas/ONNXSessionOptionsGroup'
ONNXSessionOptionRequest:
type: object
nullable: true
@@ -287,11 +302,13 @@ components:
required: false
oneOf:
- type: boolean
- description: Use CUDA
+ description: Enable CUDA with all defaults (device_id=0).
+ - type: integer
+ description: Enable CUDA on the given device_id.
- $ref: '#/components/schemas/ONNXSessionOptionCUDA'
input_shape:
type: object
- description: Input shape
+ description: Input shape overrides keyed by input name.
nullable: false
required: false
example: {
@@ -301,25 +318,157 @@ components:
}
output_shape:
type: object
- description: Output shape
+ description: Output shape overrides keyed by output name.
nullable: false
required: false
example: {
"output": [ 1, 1 ]
}
+ extensions:
+ type: array
+ description: |
+ One or more absolute paths to onnxruntime-extensions custom-ops libraries.
+ Each path is registered with ORT in array order; duplicate paths are deduplicated.
+ nullable: false
+ required: false
+ items:
+ type: string
+ example:
+ - /absolute/path/to/libonnxruntime_extensions.so
ortextensions_path:
type: string
- description: To use the ONNXRuntime Extension (Custom Ops Library), you must provide the library path.
+ description: |
+ Deprecated alias for `extensions`. A single library path. The server normalizes
+ it into the `extensions` array on input and the response always echoes the
+ normalized form.
+ deprecated: true
nullable: false
required: false
- example: /absolute/path/to/libonnxruntime_extensions
+ example: /absolute/path/to/libonnxruntime_extensions.so
+ session_options:
+ $ref: '#/components/schemas/ONNXSessionOptionsGroup'
ONNXSessionOptionCUDA:
type: object
+ description: |
+ CUDA Execution Provider V2 options. The server forwards every supplied key to
+ UpdateCUDAProviderOptions in a single batched call; if ORT rejects any key the
+ whole session creation fails with the ORT error message. The response is built
+ from GetCUDAProviderOptionsAsString readback, so it shows exactly what ORT
+ stored (which may differ from the requested value if ORT normalized it).
properties:
device_id:
type: integer
description: CUDA device ID
nullable: false
+ gpu_mem_limit:
+ type: integer
+ description: Per-session GPU memory limit, in bytes.
+ nullable: false
+ arena_extend_strategy:
+ type: string
+ description: Arena extension strategy, e.g. "kNextPowerOfTwo" or "kSameAsRequested".
+ nullable: false
+ cudnn_conv_algo_search:
+ type: string
+ description: cuDNN convolution algorithm search policy. Accepted values are ORT-defined enum names.
+ nullable: false
+ cudnn_conv_use_max_workspace:
+ type: boolean
+ nullable: false
+ do_copy_in_default_stream:
+ type: boolean
+ nullable: false
+ enable_cuda_graph:
+ type: boolean
+ description: Capture and replay a CUDA graph (requires static input shapes).
+ nullable: false
+ tunable_op_enable:
+ type: boolean
+ nullable: false
+ tunable_op_tuning_enable:
+ type: boolean
+ nullable: false
+ cudnn_conv1d_pad_to_nc1d:
+ type: boolean
+ nullable: false
+ additionalProperties:
+ description: |
+ Any additional CUDA Execution Provider V2 key understood by your ORT build is
+ forwarded as-is. Refer to the ORT CUDA EP documentation for the full list of
+ accepted keys.
+ ONNXSessionOptionsGroup:
+ type: object
+ description: |
+ Session-level options forwarded to onnxruntime SessionOptions. The server only
+ validates JSON shape (types and our enum-string mapping); ORT decides whether the
+ value itself is acceptable. Keys whose ORT setter throws are silently dropped from
+ the echoed response. The `config_entries` object is round-tripped through
+ GetSessionConfigEntry so the echo shows what ORT actually stored (always strings).
+ nullable: false
+ required: false
+ properties:
+ intra_op_num_threads:
+ type: integer
+ description: Number of threads used for parallelizing operators. 0 means ORT default.
+ nullable: false
+ inter_op_num_threads:
+ type: integer
+ description: Number of threads used for parallelizing the graph. 0 means ORT default.
+ nullable: false
+ execution_mode:
+ type: string
+ enum: [sequential, parallel]
+ nullable: false
+ graph_optimization_level:
+ type: string
+ enum: [disable, basic, extended, all]
+ nullable: false
+ enable_cpu_mem_arena:
+ type: boolean
+ nullable: false
+ enable_mem_pattern:
+ type: boolean
+ nullable: false
+ log_severity_level:
+ type: integer
+ description: ORT log severity level (0=verbose ... 4=fatal).
+ nullable: false
+ logid:
+ type: string
+ nullable: false
+ enable_profiling:
+ type: boolean
+ description: Enable profiling. When true, profile_file_prefix must also be supplied.
+ nullable: false
+ profile_file_prefix:
+ type: string
+ nullable: false
+ optimized_model_filepath:
+ type: string
+ description: Filepath where ORT writes the optimized model after graph transformations.
+ nullable: false
+ free_dimension_overrides:
+ type: object
+ description: Map of free dimension name to a fixed integer size.
+ additionalProperties:
+ type: integer
+ nullable: false
+ example:
+ batch: 1
+ config_entries:
+ type: object
+ description: |
+ Generic passthrough to AddSessionConfigEntry (e.g. "session.disable_prepacking").
+ Booleans and integers are stringified before being passed to ORT; values in the
+ response are always strings (round-tripped through GetSessionConfigEntry).
+ additionalProperties:
+ oneOf:
+ - type: string
+ - type: boolean
+ - type: integer
+ nullable: false
+ example:
+ session.disable_prepacking: "1"
ONNXSessionCreateRequest:
type: object
properties:
diff --git a/src/onnx/cuda/session_options.cpp b/src/onnx/cuda/session_options.cpp
index 5cb0603..ef371f0 100644
--- a/src/onnx/cuda/session_options.cpp
+++ b/src/onnx/cuda/session_options.cpp
@@ -3,18 +3,185 @@
//
#include "session_options.hpp"
+#include
+
+namespace {
+
+std::string to_provider_string(const json &v) {
+ if (v.is_boolean())
+ return v.get() ? "1" : "0";
+ if (v.is_number_integer())
+ return std::to_string(v.get());
+ if (v.is_number_unsigned())
+ return std::to_string(v.get());
+ if (v.is_string())
+ return v.get();
+ return v.dump();
+}
+
+// Apply all caller-supplied CUDA provider options in a single UpdateCUDAProviderOptions call.
+//
+// Why a single call: ORT V2's UpdateCUDAProviderOptions silently resets sibling keys that share
+// an internal options group (e.g. updating arena_extend_strategy alone reverts gpu_mem_limit to
+// its default). Calling it once with the full key/value set is the only way to apply multiple
+// keys safely. The trade-off is that any single invalid key aborts the whole batch; that is
+// acceptable here because ORT's error message identifies the offending key, so the caller can
+// see exactly what was rejected.
+void update_all(OrtCUDAProviderOptionsV2 *cuda_options, const std::vector &keys,
+ const std::vector &values) {
+ if (keys.empty())
+ return;
+ std::vector ck;
+ std::vector cv;
+ ck.reserve(keys.size());
+ cv.reserve(values.size());
+ for (size_t i = 0; i < keys.size(); ++i) {
+ ck.push_back(keys[i].c_str());
+ cv.push_back(values[i].c_str());
+ }
+ OrtStatus *st = Ort::GetApi().UpdateCUDAProviderOptions(cuda_options, ck.data(), cv.data(), ck.size());
+ if (st != nullptr) {
+ const char *err = Ort::GetApi().GetErrorMessage(st);
+ std::string msg = err ? err : "unknown error";
+ Ort::GetApi().ReleaseStatus(st);
+ throw onnxruntime_server::runtime_error(std::string("Failed to update CUDA provider options: ") + msg);
+ }
+}
+
+// Convert the readback value (always a string from ORT) back to the most natural JSON type so
+// the response shape matches what callers typically send: integers as integers, "true"/"false"
+// as booleans, anything else as a string.
+json infer_readback_value(const std::string &raw) {
+ if (raw == "true")
+ return true;
+ if (raw == "false")
+ return false;
+ if (!raw.empty()) {
+ bool numeric = (raw[0] == '-' || (raw[0] >= '0' && raw[0] <= '9'));
+ if (numeric) {
+ for (size_t i = 1; i < raw.size(); ++i) {
+ if (raw[i] < '0' || raw[i] > '9') {
+ numeric = false;
+ break;
+ }
+ }
+ if (numeric) {
+ try {
+ return json(std::stoll(raw));
+ } catch (...) {
+ // fall through to string
+ }
+ }
+ }
+ }
+ return raw;
+}
+
+// Parse "key1=value1;key2=value2" produced by GetCUDAProviderOptionsAsString.
+json parse_options_string(const std::string &s) {
+ json out = json::object();
+ size_t pos = 0;
+ while (pos < s.size()) {
+ auto eq = s.find('=', pos);
+ if (eq == std::string::npos)
+ break;
+ auto sc = s.find(';', eq);
+ if (sc == std::string::npos)
+ sc = s.size();
+ auto k = s.substr(pos, eq - pos);
+ auto v = s.substr(eq + 1, sc - eq - 1);
+ if (!k.empty())
+ out[k] = infer_readback_value(v);
+ pos = sc + 1;
+ }
+ return out;
+}
+
+} // namespace
+
+// Apply CUDA provider options.
+//
+// Validation policy mirrors apply_session_options(): we forward every shape-valid input entry to
+// ORT one key at a time; ORT decides whether to accept it. The echoed object is built from
+// GetCUDAProviderOptionsAsString readback (the ground truth of what ORT stored), filtered to the
+// keys the caller actually supplied (plus device_id, which is always meaningful).
json append_cuda_session_options(OrtSessionOptions *session_options, const json &option) {
auto cuda = option["cuda"];
- json result = json::object();
+ OrtCUDAProviderOptionsV2 *cuda_options = nullptr;
+ Ort::ThrowOnError(Ort::GetApi().CreateCUDAProviderOptions(&cuda_options));
+
+ // Track which keys the caller asked about — these are the keys we will echo from readback.
+ // device_id is always included for backward compatibility with the previous response shape.
+ std::set requested_keys;
+ requested_keys.insert("device_id");
- // device_id
- int device_id = 0;
- if (cuda.is_object() && cuda.contains("device_id"))
- device_id = cuda["device_id"].get();
- result["device_id"] = device_id;
+ std::vector keys;
+ std::vector values;
+ if (cuda.is_object()) {
+ for (auto it = cuda.begin(); it != cuda.end(); ++it) {
+ keys.push_back(it.key());
+ values.push_back(to_provider_string(it.value()));
+ requested_keys.insert(it.key());
+ }
+ } else if (cuda.is_number_integer()) {
+ keys.push_back("device_id");
+ values.push_back(std::to_string(cuda.get()));
+ }
+ // cuda == true or false: nothing to update; default V2 options are used.
+
+ try {
+ update_all(cuda_options, keys, values);
+ } catch (...) {
+ Ort::GetApi().ReleaseCUDAProviderOptions(cuda_options);
+ throw;
+ }
+
+ OrtStatus *append_status =
+ Ort::GetApi().SessionOptionsAppendExecutionProvider_CUDA_V2(session_options, cuda_options);
+ if (append_status != nullptr) {
+ const char *err = Ort::GetApi().GetErrorMessage(append_status);
+ std::string msg = err ? err : "unknown error";
+ Ort::GetApi().ReleaseStatus(append_status);
+ Ort::GetApi().ReleaseCUDAProviderOptions(cuda_options);
+ throw onnxruntime_server::runtime_error(std::string("Failed to append CUDA EP: ") + msg);
+ }
+
+ // Readback the full options string and echo only the keys the caller cared about.
+ // The whole readback section is wrapped in a try/catch so that an exception in any of the
+ // allocations (std::bad_alloc, json construction) cannot leak the ORT allocator buffer or the
+ // cuda_options handle.
+ json result = json::object();
+ try {
+ OrtAllocator *allocator = nullptr;
+ OrtStatus *alloc_st = Ort::GetApi().GetAllocatorWithDefaultOptions(&allocator);
+ if (alloc_st != nullptr) {
+ Ort::GetApi().ReleaseStatus(alloc_st);
+ } else {
+ char *cstr = nullptr;
+ OrtStatus *st = Ort::GetApi().GetCUDAProviderOptionsAsString(cuda_options, allocator, &cstr);
+ if (st != nullptr) {
+ Ort::GetApi().ReleaseStatus(st);
+ } else if (cstr != nullptr) {
+ try {
+ auto all = parse_options_string(std::string(cstr));
+ for (auto it = all.begin(); it != all.end(); ++it) {
+ if (requested_keys.count(it.key()))
+ result[it.key()] = it.value();
+ }
+ } catch (...) {
+ allocator->Free(allocator, cstr);
+ throw;
+ }
+ allocator->Free(allocator, cstr);
+ }
+ }
+ } catch (...) {
+ Ort::GetApi().ReleaseCUDAProviderOptions(cuda_options);
+ throw;
+ }
- Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, device_id));
+ Ort::GetApi().ReleaseCUDAProviderOptions(cuda_options);
return result;
}
diff --git a/src/onnx/session.cpp b/src/onnx/session.cpp
index 66a8ac5..4c9cc29 100644
--- a/src/onnx/session.cpp
+++ b/src/onnx/session.cpp
@@ -10,30 +10,282 @@
#include "cuda/session_options.hpp"
#endif
-Orts::onnx::session::session(session_key key, const json &option)
- : session_options(), created_at(std::chrono::system_clock::now()), allocator(), key(std::move(key)) {
- _option["cuda"] = false;
+namespace {
+
+#ifdef _WIN32
+std::wstring to_wide(const std::string &s) {
+ int size_needed = MultiByteToWideChar(CP_ACP, 0, s.c_str(), -1, NULL, 0);
+ std::wstring wstr(size_needed, 0);
+ MultiByteToWideChar(CP_ACP, 0, s.c_str(), -1, &wstr[0], size_needed);
+ return wstr;
+}
+#endif
+
+void register_extension(Ort::SessionOptions &session_options, const std::string &path) {
+#ifdef _WIN32
+ auto wpath = to_wide(path);
+ auto p = wpath.c_str();
+#else
+ auto p = path.c_str();
+#endif
+ OrtStatus *status = Ort::GetApi().RegisterCustomOpsLibrary_V2(session_options, p);
+ if (status != nullptr) {
+ const char *err = Ort::GetApi().GetErrorMessage(status);
+ std::string msg = err ? err : "unknown error";
+ Ort::GetApi().ReleaseStatus(status);
+ throw onnxruntime_server::runtime_error(
+ std::string("Failed to register ORT extensions (") + path + "): " + msg
+ );
+ }
+}
+
+GraphOptimizationLevel parse_graph_opt_level(const std::string &v, bool &valid) {
+ valid = true;
+ if (v == "disable" || v == "disabled" || v == "off")
+ return ORT_DISABLE_ALL;
+ if (v == "basic")
+ return ORT_ENABLE_BASIC;
+ if (v == "extended")
+ return ORT_ENABLE_EXTENDED;
+ if (v == "all")
+ return ORT_ENABLE_ALL;
+ valid = false;
+ return ORT_ENABLE_ALL;
+}
+
+ExecutionMode parse_execution_mode(const std::string &v, bool &valid) {
+ valid = true;
+ if (v == "parallel")
+ return ORT_PARALLEL;
+ if (v == "sequential")
+ return ORT_SEQUENTIAL;
+ valid = false;
+ return ORT_SEQUENTIAL;
+}
+
+// Apply session-level options.
+//
+// Validation policy: we only check JSON shape (types, our enum string mapping). We do NOT
+// re-implement ORT's own value validation (allowed ranges, defaults, etc.) — that knowledge
+// belongs to ORT and would force us to track every ORT version's rules. Instead, every shape-
+// valid value is forwarded to ORT, and the setter's outcome decides the echo:
+// - setter succeeds -> echo the value (or the readback value where an API exists)
+// - setter throws -> skip silently, do not echo (the option was rejected by ORT)
+// Where ORT exposes a readback (currently config_entries via GetSessionConfigEntry), the echo
+// uses the readback value so it reflects what ORT actually stored, not what we sent.
+template
+bool try_apply(F &&f) {
+ try {
+ f();
+ return true;
+ } catch (const Ort::Exception &) {
+ return false;
+ } catch (const std::exception &) {
+ return false;
+ }
+}
+
+json apply_session_options(Ort::SessionOptions &session_options, const json &input) {
+ json applied = json::object();
+ if (!input.is_object())
+ return applied;
+
+ if (input.contains("intra_op_num_threads") && input["intra_op_num_threads"].is_number_integer()) {
+ auto v = input["intra_op_num_threads"].get();
+ if (try_apply([&] { session_options.SetIntraOpNumThreads(v); }))
+ applied["intra_op_num_threads"] = v;
+ }
+
+ if (input.contains("inter_op_num_threads") && input["inter_op_num_threads"].is_number_integer()) {
+ auto v = input["inter_op_num_threads"].get();
+ if (try_apply([&] { session_options.SetInterOpNumThreads(v); }))
+ applied["inter_op_num_threads"] = v;
+ }
+
+ if (input.contains("execution_mode") && input["execution_mode"].is_string()) {
+ bool valid = false;
+ auto s = input["execution_mode"].get();
+ auto mode = parse_execution_mode(s, valid);
+ if (valid && try_apply([&] { session_options.SetExecutionMode(mode); }))
+ applied["execution_mode"] = (mode == ORT_PARALLEL) ? "parallel" : "sequential";
+ }
+
+ if (input.contains("graph_optimization_level") && input["graph_optimization_level"].is_string()) {
+ bool valid = false;
+ auto s = input["graph_optimization_level"].get();
+ auto lvl = parse_graph_opt_level(s, valid);
+ if (valid && try_apply([&] { session_options.SetGraphOptimizationLevel(lvl); }))
+ applied["graph_optimization_level"] = s;
+ }
+
+ if (input.contains("enable_cpu_mem_arena") && input["enable_cpu_mem_arena"].is_boolean()) {
+ auto v = input["enable_cpu_mem_arena"].get();
+ if (try_apply([&] {
+ if (v)
+ session_options.EnableCpuMemArena();
+ else
+ session_options.DisableCpuMemArena();
+ }))
+ applied["enable_cpu_mem_arena"] = v;
+ }
- if (option.contains("ortextensions_path") && option["ortextensions_path"].is_string()) {
- auto ext_path_str = option["ortextensions_path"].get();
+ if (input.contains("enable_mem_pattern") && input["enable_mem_pattern"].is_boolean()) {
+ auto v = input["enable_mem_pattern"].get();
+ if (try_apply([&] {
+ if (v)
+ session_options.EnableMemPattern();
+ else
+ session_options.DisableMemPattern();
+ }))
+ applied["enable_mem_pattern"] = v;
+ }
+
+ if (input.contains("log_severity_level") && input["log_severity_level"].is_number_integer()) {
+ auto v = input["log_severity_level"].get();
+ if (try_apply([&] { session_options.SetLogSeverityLevel(v); }))
+ applied["log_severity_level"] = v;
+ }
+
+ if (input.contains("logid") && input["logid"].is_string()) {
+ auto v = input["logid"].get();
+ if (try_apply([&] { session_options.SetLogId(v.c_str()); }))
+ applied["logid"] = v;
+ }
+
+ if (input.contains("enable_profiling") && input["enable_profiling"].is_boolean() &&
+ input["enable_profiling"].get()) {
+ std::string prefix;
+ if (input.contains("profile_file_prefix") && input["profile_file_prefix"].is_string())
+ prefix = input["profile_file_prefix"].get();
+ bool ok = try_apply([&] {
#ifdef _WIN32
- int size_needed = MultiByteToWideChar(CP_ACP, 0, ext_path_str.c_str(), -1, NULL, 0);
- std::wstring wstr(size_needed, 0);
- MultiByteToWideChar(CP_ACP, 0, ext_path_str.c_str(), -1, &wstr[0], size_needed);
+ auto wprefix = to_wide(prefix);
+ session_options.EnableProfiling(wprefix.c_str());
+#else
+ session_options.EnableProfiling(prefix.c_str());
+#endif
+ });
+ if (ok) {
+ applied["enable_profiling"] = true;
+ applied["profile_file_prefix"] = prefix;
+ }
+ }
- auto ext_path = wstr.c_str();
+ if (input.contains("optimized_model_filepath") && input["optimized_model_filepath"].is_string()) {
+ auto s = input["optimized_model_filepath"].get();
+ bool ok = try_apply([&] {
+#ifdef _WIN32
+ auto ws = to_wide(s);
+ session_options.SetOptimizedModelFilePath(ws.c_str());
#else
- auto ext_path = ext_path_str.c_str();
+ session_options.SetOptimizedModelFilePath(s.c_str());
#endif
- OrtStatus *status = Ort::GetApi().RegisterCustomOpsLibrary_V2(session_options, ext_path);
- if (status != nullptr) {
- const char *err = Ort::GetApi().GetErrorMessage(status);
- std::string msg = err ? err : "unknown error";
- Ort::GetApi().ReleaseStatus(status);
- throw runtime_error(std::string("Failed to register ORT extensions: ") + msg);
+ });
+ if (ok)
+ applied["optimized_model_filepath"] = s;
+ }
+
+ if (input.contains("free_dimension_overrides") && input["free_dimension_overrides"].is_object()) {
+ json normalized = json::object();
+ for (auto it = input["free_dimension_overrides"].begin();
+ it != input["free_dimension_overrides"].end(); ++it) {
+ if (!it.value().is_number_integer())
+ continue;
+ auto dim = it.value().get();
+ auto name = it.key();
+ if (try_apply([&] { session_options.AddFreeDimensionOverrideByName(name.c_str(), dim); }))
+ normalized[name] = dim;
}
+ if (!normalized.empty())
+ applied["free_dimension_overrides"] = normalized;
+ }
+
+ // config_entries: AddSessionConfigEntry accepts any string key, so we readback each entry
+ // via GetSessionConfigEntry to ensure the echo reflects what ORT actually stored.
+ if (input.contains("config_entries") && input["config_entries"].is_object()) {
+ json normalized = json::object();
+ for (auto it = input["config_entries"].begin(); it != input["config_entries"].end(); ++it) {
+ std::string sv;
+ if (it.value().is_string())
+ sv = it.value().get();
+ else if (it.value().is_boolean())
+ sv = it.value().get() ? "1" : "0";
+ else if (it.value().is_number_integer())
+ sv = std::to_string(it.value().get());
+ else
+ continue;
+ auto key = it.key();
+ if (!try_apply([&] { session_options.AddConfigEntry(key.c_str(), sv.c_str()); }))
+ continue;
+
+ size_t needed = 0;
+ OrtStatus *st = Ort::GetApi().GetSessionConfigEntry(
+ session_options, key.c_str(), nullptr, &needed
+ );
+ if (st != nullptr) {
+ Ort::GetApi().ReleaseStatus(st);
+ continue;
+ }
+ std::string out(needed, '\0');
+ st = Ort::GetApi().GetSessionConfigEntry(
+ session_options, key.c_str(), out.data(), &needed
+ );
+ if (st != nullptr) {
+ Ort::GetApi().ReleaseStatus(st);
+ continue;
+ }
+ if (!out.empty() && out.back() == '\0')
+ out.pop_back();
+ normalized[key] = out;
+ }
+ if (!normalized.empty())
+ applied["config_entries"] = normalized;
+ }
+
+ return applied;
+}
+
+} // namespace
+
+json Orts::onnx::session::collect_extensions(const json &option) {
+ json result = json::array();
+ if (!option.is_object())
+ return result;
+ auto add = [&](const std::string &path) {
+ for (auto &e : result) {
+ if (e.is_string() && e.get() == path)
+ return;
+ }
+ result.push_back(path);
+ };
+ if (option.contains("extensions") && option["extensions"].is_array()) {
+ for (auto &e : option["extensions"]) {
+ if (e.is_string())
+ add(e.get());
+ }
+ }
+ if (option.contains("ortextensions_path") && option["ortextensions_path"].is_string())
+ add(option["ortextensions_path"].get());
+ return result;
+}
+
+Orts::onnx::session::session(session_key key, const json &option)
+ : session_options(), created_at(std::chrono::system_clock::now()), allocator(), key(std::move(key)) {
+ _option["cuda"] = false;
+
+ // session-level options (apply before EP/extension registration)
+ if (option.contains("session_options") && option["session_options"].is_object()) {
+ auto applied = apply_session_options(session_options, option["session_options"]);
+ if (!applied.empty())
+ _option["session_options"] = applied;
+ }
- _option["ortextensions_path"] = option["ortextensions_path"];
+ // register custom op libraries: extensions array + legacy ortextensions_path, deduplicated
+ auto extensions = collect_extensions(option);
+ if (!extensions.empty()) {
+ for (auto &e : extensions)
+ register_extension(session_options, e.get());
+ _option["extensions"] = extensions;
}
if (providers::available_providers.has_cuda() && option.contains("cuda") && (
diff --git a/src/onnx/session_key_with_option.cpp b/src/onnx/session_key_with_option.cpp
index 8d2dee6..9cd711b 100644
--- a/src/onnx/session_key_with_option.cpp
+++ b/src/onnx/session_key_with_option.cpp
@@ -6,18 +6,88 @@
#include "../onnxruntime_server.hpp"
+namespace {
+
std::regex space_re(R"(\s+)");
std::regex trim_re(R"(^\s*|\s*$)");
std::string key_rule = R"(([-_a-zA-Z0-9]+):([-_/a-zA-Z0-9]+)(\(([^)]+)\))?)";
std::regex key_re(key_rule);
-std::string option_rule = R"(([_a-zA-Z0-9]+)\s*=\s*([^,\s]+))";
+// option key supports dotted notation (e.g. cuda.device_id, session_options.intra_op_num_threads)
+std::string option_rule = R"(([_a-zA-Z0-9][_a-zA-Z0-9.]*)\s*=\s*([^,\s]+))";
std::regex option_re(option_rule);
+std::regex int_re(R"(^-?[0-9]+$)");
+
+const std::string EXTENSIONS_KEY = "extensions";
+const std::string LEGACY_EXTENSION_KEY = "ortextensions_path";
+
+json infer_value(const std::string &raw) {
+ if (raw == "true")
+ return true;
+ if (raw == "false")
+ return false;
+ if (std::regex_match(raw, int_re)) {
+ try {
+ return json(std::stoll(raw));
+ } catch (...) {
+ return raw;
+ }
+ }
+ return raw;
+}
+
+std::vector split_dot(const std::string &k) {
+ std::vector parts;
+ std::string cur;
+ for (char c : k) {
+ if (c == '.') {
+ if (!cur.empty())
+ parts.push_back(cur);
+ cur.clear();
+ } else {
+ cur += c;
+ }
+ }
+ if (!cur.empty())
+ parts.push_back(cur);
+ return parts;
+}
+
+void set_nested(json &option, const std::vector &path, const json &value) {
+ json *cur = &option;
+ for (size_t i = 0; i + 1 < path.size(); ++i) {
+ if (!cur->is_object())
+ *cur = json::object();
+ if (!cur->contains(path[i]) || !(*cur)[path[i]].is_object())
+ (*cur)[path[i]] = json::object();
+ cur = &(*cur)[path[i]];
+ }
+ if (!cur->is_object())
+ *cur = json::object();
+ (*cur)[path.back()] = value;
+}
+
+void append_extension(json &option, const std::string &path) {
+ if (!option.contains(EXTENSIONS_KEY) || !option[EXTENSIONS_KEY].is_array())
+ option[EXTENSIONS_KEY] = json::array();
+ auto &arr = option[EXTENSIONS_KEY];
+ for (auto &e : arr) {
+ if (e.is_string() && e.get() == path)
+ return;
+ }
+ arr.push_back(path);
+}
+
+} // namespace
+
std::vector
onnxruntime_server::onnx::session_key_with_option::parse(const std::string &model_key_list) {
- // model_key_list is a space separated list of model_name:model_version
+ // model_key_list is a space separated list of model_name:model_version[(opt1=val1, opt2=val2)]
+ // option keys may be dotted (cuda.device_id, session_options.intra_op_num_threads) producing nested objects.
+ // extensions/ortextensions_path keys accumulate into an "extensions" array (deduplicated).
+ // option entries that don't match the grammar are silently skipped.
std::vector models;
std::string list = std::regex_replace(std::regex_replace(model_key_list, space_re, " "), trim_re, "");
if (list.empty())
@@ -27,20 +97,26 @@ onnxruntime_server::onnx::session_key_with_option::parse(const std::string &mode
while (std::regex_search(list, keys, key_re)) {
json option = json::object();
- // parse option
auto option_str = keys[4].str();
if (!option_str.empty()) {
std::smatch options;
while (std::regex_search(option_str, options, option_re)) {
- auto option_key = options[1].str();
- auto option_val = options[2].str();
-
- // cuda option: device_id or true/false
- if (option_key == "cuda") {
- if (option_val == "true" || option_val == "false")
- option[option_key] = option_val == "true";
- else
- option[option_key] = std::stoi(option_val);
+ auto raw_key = options[1].str();
+ auto raw_val = options[2].str();
+ auto value = infer_value(raw_val);
+
+ auto parts = split_dot(raw_key);
+ if (parts.empty()) {
+ option_str = options.suffix().str();
+ continue;
+ }
+
+ if (parts.size() == 1 &&
+ (parts[0] == EXTENSIONS_KEY || parts[0] == LEGACY_EXTENSION_KEY) &&
+ value.is_string()) {
+ append_extension(option, value.get());
+ } else {
+ set_nested(option, parts, value);
}
option_str = options.suffix().str();
diff --git a/src/onnxruntime_server.hpp b/src/onnxruntime_server.hpp
index ba6873b..629507b 100644
--- a/src/onnxruntime_server.hpp
+++ b/src/onnxruntime_server.hpp
@@ -138,6 +138,11 @@ namespace onnxruntime_server {
[[nodiscard]] const std::vector &inputs() const;
[[nodiscard]] const std::vector &outputs() const;
+
+ // Normalize the extensions input (the new "extensions" array and the legacy
+ // "ortextensions_path" string) into a single deduplicated array of paths in the
+ // order they would be registered. Pure function; no file system or onnxruntime calls.
+ static json collect_extensions(const json &option);
};
typedef std::shared_ptr session_ptr;
diff --git a/src/test/unit/unit_test_context_cuda.cpp b/src/test/unit/unit_test_context_cuda.cpp
index 077e401..42bfdfa 100644
--- a/src/test/unit/unit_test_context_cuda.cpp
+++ b/src/test/unit/unit_test_context_cuda.cpp
@@ -4,6 +4,8 @@
#include "../../onnxruntime_server.hpp"
#include "../test_common.hpp"
+// End-to-end smoke test: build a session on the CUDA EP with the simplest "cuda": true input,
+// run the BERT SQuAD sample model, and assert the output tensor shape.
TEST(test_onnxruntime_server_context_cuda, BertSquadModelTest) {
Orts::onnx::session_key key("sample", "2");
auto session = std::make_shared(key, model2_path.string(), json::parse(R"({"cuda": true})"));
@@ -31,3 +33,58 @@ TEST(test_onnxruntime_server_context_cuda, BertSquadModelTest) {
std::cout << json.dump(4) << "\n";
ASSERT_EQ(json["output"].size(), 3);
}
+
+// CUDA EP V2 options passed as a "cuda" object are forwarded to ORT in a single batched
+// UpdateCUDAProviderOptions call (ORT silently resets sibling keys when called per-key, so a
+// single batched call is the only safe way). The echoed value comes from
+// GetCUDAProviderOptionsAsString readback, i.e. what ORT actually stored. Every supplied key
+// that ORT accepted should round-trip; if ORT had rejected any of them the whole session
+// construction would have thrown rather than silently returning a partial echo.
+TEST(test_onnxruntime_server_context_cuda, CudaObjectOptionsEcho) {
+ Orts::onnx::session_key key("sample", "2");
+ auto session = std::make_shared(
+ key, model2_path.string(),
+ R"({
+ "cuda": {
+ "device_id": 0,
+ "gpu_mem_limit": 2147483648,
+ "arena_extend_strategy": "kNextPowerOfTwo",
+ "cudnn_conv_algo_search": "HEURISTIC"
+ }
+ })"_json
+ );
+ auto j = session->to_json();
+ ASSERT_TRUE(j["option"]["cuda"].is_object());
+ auto cu = j["option"]["cuda"];
+
+ ASSERT_EQ(cu["device_id"], 0);
+ ASSERT_EQ(cu["gpu_mem_limit"], 2147483648);
+ ASSERT_EQ(cu["arena_extend_strategy"], "kNextPowerOfTwo");
+ ASSERT_EQ(cu["cudnn_conv_algo_search"], "HEURISTIC");
+}
+
+// An unknown CUDA option key (or one ORT cannot parse) must abort session construction with a
+// clear error rather than silently producing a partial echo. This is the natural consequence of
+// the batched-update strategy and is the contract callers can rely on.
+TEST(test_onnxruntime_server_context_cuda, CudaObjectRejectsUnknownKey) {
+ Orts::onnx::session_key key("sample", "2");
+ EXPECT_ANY_THROW(
+ auto session = std::make_shared(
+ key, model2_path.string(),
+ R"({"cuda": {"device_id": 0, "totally_not_a_real_cuda_option": "xyz"}})"_json
+ );
+ );
+}
+
+// Backward compatibility: the legacy scalar shortcuts ("cuda": true and "cuda": )
+// must keep working under the V2 EP path and still echo as a normalized object with device_id.
+TEST(test_onnxruntime_server_context_cuda, CudaScalarShortcutStillWorks) {
+ // Backward compat: cuda=true (boolean) and cuda= must keep working
+ Orts::onnx::session_key key("sample", "2");
+ auto session_bool = std::make_shared(
+ key, model2_path.string(), R"({"cuda": true})"_json
+ );
+ auto j_bool = session_bool->to_json();
+ ASSERT_TRUE(j_bool["option"]["cuda"].is_object());
+ ASSERT_EQ(j_bool["option"]["cuda"]["device_id"], 0);
+}
diff --git a/src/test/unit/unit_test_session.cpp b/src/test/unit/unit_test_session.cpp
index 266ce74..69a39ff 100644
--- a/src/test/unit/unit_test_session.cpp
+++ b/src/test/unit/unit_test_session.cpp
@@ -4,6 +4,8 @@
#include "../../onnxruntime_server.hpp"
#include "../test_common.hpp"
+// input_shape / output_shape options must override the model's dynamic dimensions with the
+// supplied static values, and any shape whose rank does not match the model's must be rejected.
TEST(unit_test_session, SesionWithShapeOption) {
Orts::onnx::session_key key("sample", "1");
const auto session1 = std::make_shared(key, model1_path.string());
@@ -45,6 +47,207 @@ TEST(unit_test_session, SesionWithShapeOption) {
);
}
+// Each key in the session_options group (threads, execution mode, graph optimization level,
+// memory, logging, config_entries) must be applied to onnxruntime's SessionOptions and echoed
+// back in option.session_options in a normalized form.
+TEST(unit_test_session, SessionWithSessionOptions) {
+ Orts::onnx::session_key key("sample", "1");
+ auto session = std::make_shared(
+ key, model1_path.string(),
+ R"({
+ "session_options": {
+ "intra_op_num_threads": 2,
+ "inter_op_num_threads": 1,
+ "execution_mode": "sequential",
+ "graph_optimization_level": "all",
+ "enable_cpu_mem_arena": false,
+ "enable_mem_pattern": true,
+ "logid": "test-session",
+ "log_severity_level": 3,
+ "config_entries": {
+ "session.disable_prepacking": "1"
+ }
+ }
+ })"_json
+ );
+ auto j = session->to_json();
+ ASSERT_TRUE(j["option"].contains("session_options"));
+ auto so = j["option"]["session_options"];
+ ASSERT_EQ(so["intra_op_num_threads"], 2);
+ ASSERT_EQ(so["inter_op_num_threads"], 1);
+ ASSERT_EQ(so["execution_mode"], "sequential");
+ ASSERT_EQ(so["graph_optimization_level"], "all");
+ ASSERT_EQ(so["enable_cpu_mem_arena"], false);
+ ASSERT_EQ(so["enable_mem_pattern"], true);
+ ASSERT_EQ(so["logid"], "test-session");
+ ASSERT_EQ(so["log_severity_level"], 3);
+ ASSERT_EQ(so["config_entries"]["session.disable_prepacking"], "1");
+}
+
+// Type-mismatched values (e.g. string for an int field), enum strings outside our mapping, and
+// keys we do not pass to ORT at all must be silently dropped from the echo. Sibling entries that
+// pass our shape check and our enum mapping are still applied and echoed. Note that ORT's own
+// validity checks (e.g. allowed numeric ranges) are intentionally not duplicated here; we only
+// validate JSON shape and our enum string -> ORT enum mapping.
+TEST(unit_test_session, SessionOptionsIgnoresInvalidEntries) {
+ // Bad types or unknown keys under session_options are silently dropped; valid ones still apply.
+ Orts::onnx::session_key key("sample", "1");
+ auto session = std::make_shared(
+ key, model1_path.string(),
+ R"({
+ "session_options": {
+ "intra_op_num_threads": "not-a-number",
+ "graph_optimization_level": "absurd-level",
+ "execution_mode": "weird",
+ "logid": "still-applies",
+ "totally_unknown_key": "ignore-me"
+ }
+ })"_json
+ );
+ auto j = session->to_json();
+ ASSERT_TRUE(j["option"].contains("session_options"));
+ auto so = j["option"]["session_options"];
+ ASSERT_FALSE(so.contains("intra_op_num_threads"));
+ ASSERT_FALSE(so.contains("graph_optimization_level"));
+ ASSERT_FALSE(so.contains("execution_mode"));
+ ASSERT_FALSE(so.contains("totally_unknown_key"));
+ ASSERT_EQ(so["logid"], "still-applies");
+}
+
+// AddSessionConfigEntry round-trips through GetSessionConfigEntry. The echo therefore reflects
+// what ORT actually stored, which proves the bool/int -> string conversion the server performs
+// before forwarding to ORT (true -> "1", 42 -> "42") matches what ORT will return on lookup.
+TEST(unit_test_session, SessionOptionsConfigEntriesReadback) {
+ Orts::onnx::session_key key("sample", "1");
+ auto session = std::make_shared(
+ key, model1_path.string(),
+ R"({
+ "session_options": {
+ "config_entries": {
+ "key.string": "hello",
+ "key.bool": true,
+ "key.int": 42
+ }
+ }
+ })"_json
+ );
+ auto j = session->to_json();
+ auto ce = j["option"]["session_options"]["config_entries"];
+ ASSERT_EQ(ce["key.string"], "hello");
+ ASSERT_EQ(ce["key.bool"], "1");
+ ASSERT_EQ(ce["key.int"], "42");
+}
+
+// free_dimension_overrides has no readback API; AddFreeDimensionOverrideByName accepts any name
+// without raising, so the echo just confirms what we asked ORT to store. Whether a name actually
+// matches a model dimension is decided later at session creation time and is ORT's concern, not
+// ours. Non-integer values are dropped at our shape-check stage.
+TEST(unit_test_session, SessionOptionsFreeDimensionOverrides) {
+ Orts::onnx::session_key key("sample", "1");
+ auto session = std::make_shared(
+ key, model1_path.string(),
+ R"({
+ "session_options": {
+ "free_dimension_overrides": {
+ "batch": 1,
+ "seq": 128,
+ "bad": "not-an-int"
+ }
+ }
+ })"_json
+ );
+ auto j = session->to_json();
+ ASSERT_TRUE(j["option"]["session_options"].contains("free_dimension_overrides"));
+ auto fd = j["option"]["session_options"]["free_dimension_overrides"];
+ ASSERT_EQ(fd["batch"], 1);
+ ASSERT_EQ(fd["seq"], 128);
+ ASSERT_FALSE(fd.contains("bad"));
+}
+
+// session::collect_extensions normalizes both the new "extensions" array and the legacy
+// "ortextensions_path" string into a single ordered, deduplicated array of paths in the order
+// they would be registered. Pure-function checks here cover what session construction would
+// actually attempt to register, without needing a loadable shared library on disk.
+TEST(unit_test_session, CollectExtensionsNormalization) {
+ using S = Orts::onnx::session;
+
+ // Empty / missing input yields an empty array.
+ ASSERT_EQ(S::collect_extensions(json::object()), json::array());
+ ASSERT_EQ(S::collect_extensions(R"({"extensions": []})"_json), json::array());
+
+ // Bare extensions array, single element.
+ auto only_array = S::collect_extensions(R"({"extensions": ["/lib1.so"]})"_json);
+ ASSERT_EQ(only_array, json::array({"/lib1.so"}));
+
+ // Multiple entries preserve input order.
+ auto ordered = S::collect_extensions(R"({"extensions": ["/lib1.so", "/lib2.so", "/lib3.so"]})"_json);
+ ASSERT_EQ(ordered, json::array({"/lib1.so", "/lib2.so", "/lib3.so"}));
+
+ // Duplicates within the extensions array are dropped, first occurrence wins.
+ auto deduped = S::collect_extensions(R"({"extensions": ["/lib.so", "/lib.so", "/lib.so"]})"_json);
+ ASSERT_EQ(deduped, json::array({"/lib.so"}));
+
+ // Legacy ortextensions_path alone is normalized into the extensions array.
+ auto only_legacy = S::collect_extensions(R"({"ortextensions_path": "/legacy.so"})"_json);
+ ASSERT_EQ(only_legacy, json::array({"/legacy.so"}));
+
+ // Legacy is appended after the extensions array, with dedupe across both sources.
+ auto mixed = S::collect_extensions(
+ R"({"extensions": ["/a.so", "/b.so"], "ortextensions_path": "/a.so"})"_json
+ );
+ ASSERT_EQ(mixed, json::array({"/a.so", "/b.so"}));
+
+ // Legacy path that is not in the extensions array is appended at the end.
+ auto mixed_distinct = S::collect_extensions(
+ R"({"extensions": ["/a.so"], "ortextensions_path": "/b.so"})"_json
+ );
+ ASSERT_EQ(mixed_distinct, json::array({"/a.so", "/b.so"}));
+
+ // Non-string entries inside the extensions array are silently ignored.
+ auto with_garbage = S::collect_extensions(
+ R"({"extensions": ["/a.so", 42, null, {"x":1}, "/b.so"]})"_json
+ );
+ ASSERT_EQ(with_garbage, json::array({"/a.so", "/b.so"}));
+
+ // extensions field with a wrong type is treated as if absent.
+ ASSERT_EQ(S::collect_extensions(R"({"extensions": "/lib.so"})"_json), json::array());
+ ASSERT_EQ(S::collect_extensions(R"({"extensions": 123})"_json), json::array());
+
+ // ortextensions_path with a wrong type is also ignored.
+ ASSERT_EQ(S::collect_extensions(R"({"ortextensions_path": 123})"_json), json::array());
+
+ // Non-object option input does not crash and yields an empty array.
+ ASSERT_EQ(S::collect_extensions(json("/raw-string")), json::array());
+ ASSERT_EQ(S::collect_extensions(json(nullptr)), json::array());
+}
+
+// At session construction time, both the new "extensions" array and the legacy "ortextensions_path"
+// string must reach the registration path: an unloadable library must surface as a clear
+// runtime_error instead of being silently dropped. (The actual successful registration path
+// requires a real onnxruntime_extensions shared library on disk and is not exercised here;
+// the normalization that drives it is fully covered by the CollectExtensionsNormalization test
+// above and by the parse-level tests below.)
+TEST(unit_test_session, ExtensionsRegistrationFailsLoudly) {
+ Orts::onnx::session_key key("sample", "1");
+ EXPECT_ANY_THROW(
+ auto session = std::make_shared(
+ key, model1_path.string(),
+ R"({"ortextensions_path": "/nonexistent/path/to/lib.so"})"_json
+ );
+ );
+ EXPECT_ANY_THROW(
+ auto session = std::make_shared(
+ key, model1_path.string(),
+ R"({"extensions": ["/nonexistent/path/to/lib.so"]})"_json
+ );
+ );
+}
+
+// Cover the option string grammar end-to-end: empty/whitespace input, malformed model keys that
+// must throw, well-formed lists with various spacing, the legacy scalar cuda shortcut, dotted
+// notation producing nested objects, repeated "extensions" keys accumulating into a deduped
+// array, the legacy ortextensions_path normalization, value type inference (bool/int/string),
+// pass-through of unknown keys, and lenient skipping of malformed option entries.
TEST(unit_test_session_key, Parse) {
// empty cases
std::string empty_cases[] = {"", " ", "\n", "\r\n", "\n \n", " \r \n \r \n "};
@@ -92,4 +295,78 @@ TEST(unit_test_session_key, Parse) {
auto parse_case4 = Orts::onnx::session_key_with_option::parse("model:version(cuda=true)");
ASSERT_TRUE(parse_case4[0].option["cuda"]);
+
+ // dotted notation produces nested objects
+ auto parse_dotted_cuda = Orts::onnx::session_key_with_option::parse("model:version(cuda.device_id=0)");
+ ASSERT_TRUE(parse_dotted_cuda[0].option["cuda"].is_object());
+ ASSERT_EQ(parse_dotted_cuda[0].option["cuda"]["device_id"], 0);
+
+ auto parse_dotted_session = Orts::onnx::session_key_with_option::parse(
+ "model:version(session_options.intra_op_num_threads=4, session_options.graph_optimization_level=all)"
+ );
+ ASSERT_TRUE(parse_dotted_session[0].option["session_options"].is_object());
+ ASSERT_EQ(parse_dotted_session[0].option["session_options"]["intra_op_num_threads"], 4);
+ ASSERT_EQ(parse_dotted_session[0].option["session_options"]["graph_optimization_level"], "all");
+
+ // scalar followed by dotted on the same group: dotted wins (scalar discarded)
+ auto parse_scalar_then_dotted = Orts::onnx::session_key_with_option::parse(
+ "model:version(cuda=true, cuda.device_id=1)"
+ );
+ ASSERT_TRUE(parse_scalar_then_dotted[0].option["cuda"].is_object());
+ ASSERT_EQ(parse_scalar_then_dotted[0].option["cuda"]["device_id"], 1);
+
+ // extensions key accumulates as an array
+ auto parse_extensions = Orts::onnx::session_key_with_option::parse(
+ "model:version(extensions=/lib1.so, extensions=/lib2.so)"
+ );
+ ASSERT_TRUE(parse_extensions[0].option["extensions"].is_array());
+ ASSERT_EQ(parse_extensions[0].option["extensions"].size(), 2);
+ ASSERT_EQ(parse_extensions[0].option["extensions"][0], "/lib1.so");
+ ASSERT_EQ(parse_extensions[0].option["extensions"][1], "/lib2.so");
+
+ // extensions dedupe
+ auto parse_extensions_dedup = Orts::onnx::session_key_with_option::parse(
+ "model:version(extensions=/lib.so, extensions=/lib.so)"
+ );
+ ASSERT_EQ(parse_extensions_dedup[0].option["extensions"].size(), 1);
+
+ // legacy ortextensions_path is normalized into the extensions array
+ auto parse_legacy_ext = Orts::onnx::session_key_with_option::parse(
+ "model:version(ortextensions_path=/usr/local/lib/libortextensions.so)"
+ );
+ ASSERT_FALSE(parse_legacy_ext[0].option.contains("ortextensions_path"));
+ ASSERT_TRUE(parse_legacy_ext[0].option["extensions"].is_array());
+ ASSERT_EQ(parse_legacy_ext[0].option["extensions"].size(), 1);
+ ASSERT_EQ(parse_legacy_ext[0].option["extensions"][0], "/usr/local/lib/libortextensions.so");
+
+ // extensions + legacy ortextensions_path mixed, with dedupe
+ auto parse_mixed_ext = Orts::onnx::session_key_with_option::parse(
+ "model:version(extensions=/a.so, ortextensions_path=/a.so, extensions=/b.so)"
+ );
+ ASSERT_EQ(parse_mixed_ext[0].option["extensions"].size(), 2);
+
+ // value type inference: bool, int, string
+ auto parse_types = Orts::onnx::session_key_with_option::parse(
+ "model:version(session_options.enable_cpu_mem_arena=false, "
+ "session_options.intra_op_num_threads=8, "
+ "session_options.logid=my-model)"
+ );
+ ASSERT_EQ(parse_types[0].option["session_options"]["enable_cpu_mem_arena"], false);
+ ASSERT_EQ(parse_types[0].option["session_options"]["intra_op_num_threads"], 8);
+ ASSERT_EQ(parse_types[0].option["session_options"]["logid"], "my-model");
+
+ // unknown / unrecognized option keys pass through silently (caller decides what to do)
+ auto parse_unknown = Orts::onnx::session_key_with_option::parse(
+ "model:version(some_unknown_key=hello, another.deep.key=42)"
+ );
+ ASSERT_EQ(parse_unknown[0].option["some_unknown_key"], "hello");
+ ASSERT_EQ(parse_unknown[0].option["another"]["deep"]["key"], 42);
+
+ // malformed option entries inside parens are silently skipped, well-formed ones still apply
+ auto parse_malformed_options = Orts::onnx::session_key_with_option::parse(
+ "model:version(=garbage, cuda=1, !!!, session_options.intra_op_num_threads=2)"
+ );
+ ASSERT_EQ(parse_malformed_options.size(), 1);
+ ASSERT_EQ(parse_malformed_options[0].option["cuda"], 1);
+ ASSERT_EQ(parse_malformed_options[0].option["session_options"]["intra_op_num_threads"], 2);
}