diff --git a/.gitignore b/.gitignore index 2379f36..8efb67f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,8 @@ .idea/ .claude/ cmake-build-debug*/ +.claude/ +CLAUDE.md *.onnx diff --git a/README.md b/README.md index b02a1f8..c3a0b22 100644 --- a/README.md +++ b/README.md @@ -150,7 +150,7 @@ sudo cmake --install build --prefix /usr/local/onnxruntime-server | `--workers` | `ONNX_SERVER_WORKERS` | Worker thread pool size.
Default: `4` | | `--request-payload-limit` | `ONNX_SERVER_REQUEST_PAYLOAD_LIMIT` | HTTP/HTTPS request payload size limit.
Default: 1024 * 1024 * 10(10MB)` | | `--model-dir` | `ONNX_SERVER_MODEL_DIR` | Model directory path
The onnx model files must be located in the following path:
`${model_dir}/${model_name}/${model_version}/model.onnx` or
`${model_dir}/${model_name}/${model_version}.onnx`
Default: `models` | -| `--prepare-model` | `ONNX_SERVER_PREPARE_MODEL` | Pre-create some model sessions at server startup.

Format as a space-separated list of `model_name:model_version` or `model_name:model_version(session_options, ...)`.

Available session_options are
- cuda=device_id`[ or true or false]`

eg) `model1:v1 model2:v9`
`model1:v1(cuda=true) model2:v9(cuda=1)` | +| `--prepare-model` | `ONNX_SERVER_PREPARE_MODEL` | Pre-create some model sessions at server startup.

Format as a space-separated list of `model_name:model_version` or `model_name:model_version(opt1=val1, opt2=val2, ...)`. Option keys may use dotted notation to address nested groups (e.g. `cuda.device_id`, `session_options.intra_op_num_threads`). Repeating the `extensions` key accumulates a deduplicated array. Option entries that do not match the grammar are skipped silently rather than failing the whole list.

Examples:
- `model1:v1 model2:v9`
- `model1:v1(cuda=true) model2:v9(cuda=1)`
- `bert:v1(cuda.device_id=0, cuda.gpu_mem_limit=2147483648)`
- `bert:v1(session_options.intra_op_num_threads=4, session_options.graph_optimization_level=all)`
- `bert:v1(extensions=/usr/local/lib/libortextensions.so)` | ### Backend options @@ -223,8 +223,9 @@ docker run --name onnxruntime_server_container -d --rm --gpus all \ ## ONNXRuntime Extensions Support -To use the [onnxruntime-extensions](https://github.com/microsoft/onnxruntime-extensions)(Custom Ops Library), set the -options as follows when creating a session. +To use the [onnxruntime-extensions](https://github.com/microsoft/onnxruntime-extensions) (Custom Ops Library), supply +one or more library paths through the `extensions` array. The server registers each path with ORT in order and +deduplicates entries. ```json { @@ -232,11 +233,82 @@ options as follows when creating a session. "version": "string", "option": { "cuda": ..., - "ortextensions_path": "/absolute/path/to/libonnxruntime_extensions.so" + "extensions": [ + "/absolute/path/to/libonnxruntime_extensions.so" + ] } } ``` +The legacy `ortextensions_path` (single string) is still accepted for backward compatibility; it is normalized into the +`extensions` array on the server side and the response always echoes the normalized form. + +## Session-level options + +The optional `session_options` object on a session-create request forwards the listed keys to the underlying +onnxruntime `SessionOptions`. Only the JSON shape (types and our enum-string mapping) is validated on the server side; +the actual value validation is delegated to ORT, and the response echoes only the values ORT accepted. + +```json +{ + "model": "string", + "version": "string", + "option": { + "session_options": { + "intra_op_num_threads": 4, + "inter_op_num_threads": 1, + "execution_mode": "sequential", + "graph_optimization_level": "all", + "enable_cpu_mem_arena": true, + "enable_mem_pattern": true, + "log_severity_level": 2, + "logid": "my-model", + "enable_profiling": false, + "profile_file_prefix": "/var/log/onnx/profile-", + "optimized_model_filepath": "/cache/optimized.onnx", + "free_dimension_overrides": { "batch": 1 }, + "config_entries": { + "session.disable_prepacking": "1" + } + } + } +} +``` + +`config_entries` is round-tripped through `GetSessionConfigEntry`, so the response shows what ORT actually stored +(string values; `true`/`42` become `"1"`/`"42"`). + +## CUDA execution provider options + +When CUDA is enabled, the `cuda` field accepts either a boolean / integer (legacy shorthand) or an object that maps to +[CUDA Execution Provider options](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html). The +server forwards the object to ORT via `UpdateCUDAProviderOptions` in a single batched call (per-key calls trigger a +sibling-reset quirk in ORT V2). If any key is rejected by ORT, session creation fails with the ORT error message +identifying the offending key. The response is built from `GetCUDAProviderOptionsAsString` readback, so it reflects +exactly what ORT stored. + +```json +{ + "model": "string", + "version": "string", + "option": { + "cuda": { + "device_id": 0, + "gpu_mem_limit": 2147483648, + "arena_extend_strategy": "kNextPowerOfTwo", + "cudnn_conv_algo_search": "EXHAUSTIVE", + "cudnn_conv_use_max_workspace": true, + "do_copy_in_default_stream": true, + "enable_cuda_graph": false + } + } +} +``` + +Backward-compatible shortcuts: +- `"cuda": true` — enable CUDA with all defaults (`device_id=0`). +- `"cuda": 1` — enable CUDA on `device_id=1`. + For more details on the session creation request, please refer to the [API documentation](https://kibae.github.io/onnxruntime-server/swagger/#/ONNX%20Runtime%20Session/createSession). diff --git a/docs/swagger/openapi.yaml b/docs/swagger/openapi.yaml index 686e2f4..04ed05c 100644 --- a/docs/swagger/openapi.yaml +++ b/docs/swagger/openapi.yaml @@ -269,6 +269,11 @@ components: $ref: '#/components/schemas/ONNXSessionOption' ONNXSessionOption: type: object + description: | + Normalized echo of the options applied to the session. The server only includes + keys whose corresponding ORT calls succeeded; values reflect what ORT actually + stored (read back via GetCUDAProviderOptionsAsString and GetSessionConfigEntry + where applicable). nullable: true properties: cuda: @@ -276,8 +281,18 @@ components: required: false oneOf: - type: boolean - description: Use CUDA + description: CUDA disabled (false) — present for backward compatibility. - $ref: '#/components/schemas/ONNXSessionOptionCUDA' + extensions: + type: array + description: Registered onnxruntime-extensions library paths in registration order, deduplicated. + required: false + items: + type: string + example: + - /absolute/path/to/libonnxruntime_extensions.so + session_options: + $ref: '#/components/schemas/ONNXSessionOptionsGroup' ONNXSessionOptionRequest: type: object nullable: true @@ -287,11 +302,13 @@ components: required: false oneOf: - type: boolean - description: Use CUDA + description: Enable CUDA with all defaults (device_id=0). + - type: integer + description: Enable CUDA on the given device_id. - $ref: '#/components/schemas/ONNXSessionOptionCUDA' input_shape: type: object - description: Input shape + description: Input shape overrides keyed by input name. nullable: false required: false example: { @@ -301,25 +318,157 @@ components: } output_shape: type: object - description: Output shape + description: Output shape overrides keyed by output name. nullable: false required: false example: { "output": [ 1, 1 ] } + extensions: + type: array + description: | + One or more absolute paths to onnxruntime-extensions custom-ops libraries. + Each path is registered with ORT in array order; duplicate paths are deduplicated. + nullable: false + required: false + items: + type: string + example: + - /absolute/path/to/libonnxruntime_extensions.so ortextensions_path: type: string - description: To use the ONNXRuntime Extension (Custom Ops Library), you must provide the library path. + description: | + Deprecated alias for `extensions`. A single library path. The server normalizes + it into the `extensions` array on input and the response always echoes the + normalized form. + deprecated: true nullable: false required: false - example: /absolute/path/to/libonnxruntime_extensions + example: /absolute/path/to/libonnxruntime_extensions.so + session_options: + $ref: '#/components/schemas/ONNXSessionOptionsGroup' ONNXSessionOptionCUDA: type: object + description: | + CUDA Execution Provider V2 options. The server forwards every supplied key to + UpdateCUDAProviderOptions in a single batched call; if ORT rejects any key the + whole session creation fails with the ORT error message. The response is built + from GetCUDAProviderOptionsAsString readback, so it shows exactly what ORT + stored (which may differ from the requested value if ORT normalized it). properties: device_id: type: integer description: CUDA device ID nullable: false + gpu_mem_limit: + type: integer + description: Per-session GPU memory limit, in bytes. + nullable: false + arena_extend_strategy: + type: string + description: Arena extension strategy, e.g. "kNextPowerOfTwo" or "kSameAsRequested". + nullable: false + cudnn_conv_algo_search: + type: string + description: cuDNN convolution algorithm search policy. Accepted values are ORT-defined enum names. + nullable: false + cudnn_conv_use_max_workspace: + type: boolean + nullable: false + do_copy_in_default_stream: + type: boolean + nullable: false + enable_cuda_graph: + type: boolean + description: Capture and replay a CUDA graph (requires static input shapes). + nullable: false + tunable_op_enable: + type: boolean + nullable: false + tunable_op_tuning_enable: + type: boolean + nullable: false + cudnn_conv1d_pad_to_nc1d: + type: boolean + nullable: false + additionalProperties: + description: | + Any additional CUDA Execution Provider V2 key understood by your ORT build is + forwarded as-is. Refer to the ORT CUDA EP documentation for the full list of + accepted keys. + ONNXSessionOptionsGroup: + type: object + description: | + Session-level options forwarded to onnxruntime SessionOptions. The server only + validates JSON shape (types and our enum-string mapping); ORT decides whether the + value itself is acceptable. Keys whose ORT setter throws are silently dropped from + the echoed response. The `config_entries` object is round-tripped through + GetSessionConfigEntry so the echo shows what ORT actually stored (always strings). + nullable: false + required: false + properties: + intra_op_num_threads: + type: integer + description: Number of threads used for parallelizing operators. 0 means ORT default. + nullable: false + inter_op_num_threads: + type: integer + description: Number of threads used for parallelizing the graph. 0 means ORT default. + nullable: false + execution_mode: + type: string + enum: [sequential, parallel] + nullable: false + graph_optimization_level: + type: string + enum: [disable, basic, extended, all] + nullable: false + enable_cpu_mem_arena: + type: boolean + nullable: false + enable_mem_pattern: + type: boolean + nullable: false + log_severity_level: + type: integer + description: ORT log severity level (0=verbose ... 4=fatal). + nullable: false + logid: + type: string + nullable: false + enable_profiling: + type: boolean + description: Enable profiling. When true, profile_file_prefix must also be supplied. + nullable: false + profile_file_prefix: + type: string + nullable: false + optimized_model_filepath: + type: string + description: Filepath where ORT writes the optimized model after graph transformations. + nullable: false + free_dimension_overrides: + type: object + description: Map of free dimension name to a fixed integer size. + additionalProperties: + type: integer + nullable: false + example: + batch: 1 + config_entries: + type: object + description: | + Generic passthrough to AddSessionConfigEntry (e.g. "session.disable_prepacking"). + Booleans and integers are stringified before being passed to ORT; values in the + response are always strings (round-tripped through GetSessionConfigEntry). + additionalProperties: + oneOf: + - type: string + - type: boolean + - type: integer + nullable: false + example: + session.disable_prepacking: "1" ONNXSessionCreateRequest: type: object properties: diff --git a/src/onnx/cuda/session_options.cpp b/src/onnx/cuda/session_options.cpp index 5cb0603..ef371f0 100644 --- a/src/onnx/cuda/session_options.cpp +++ b/src/onnx/cuda/session_options.cpp @@ -3,18 +3,185 @@ // #include "session_options.hpp" +#include + +namespace { + +std::string to_provider_string(const json &v) { + if (v.is_boolean()) + return v.get() ? "1" : "0"; + if (v.is_number_integer()) + return std::to_string(v.get()); + if (v.is_number_unsigned()) + return std::to_string(v.get()); + if (v.is_string()) + return v.get(); + return v.dump(); +} + +// Apply all caller-supplied CUDA provider options in a single UpdateCUDAProviderOptions call. +// +// Why a single call: ORT V2's UpdateCUDAProviderOptions silently resets sibling keys that share +// an internal options group (e.g. updating arena_extend_strategy alone reverts gpu_mem_limit to +// its default). Calling it once with the full key/value set is the only way to apply multiple +// keys safely. The trade-off is that any single invalid key aborts the whole batch; that is +// acceptable here because ORT's error message identifies the offending key, so the caller can +// see exactly what was rejected. +void update_all(OrtCUDAProviderOptionsV2 *cuda_options, const std::vector &keys, + const std::vector &values) { + if (keys.empty()) + return; + std::vector ck; + std::vector cv; + ck.reserve(keys.size()); + cv.reserve(values.size()); + for (size_t i = 0; i < keys.size(); ++i) { + ck.push_back(keys[i].c_str()); + cv.push_back(values[i].c_str()); + } + OrtStatus *st = Ort::GetApi().UpdateCUDAProviderOptions(cuda_options, ck.data(), cv.data(), ck.size()); + if (st != nullptr) { + const char *err = Ort::GetApi().GetErrorMessage(st); + std::string msg = err ? err : "unknown error"; + Ort::GetApi().ReleaseStatus(st); + throw onnxruntime_server::runtime_error(std::string("Failed to update CUDA provider options: ") + msg); + } +} + +// Convert the readback value (always a string from ORT) back to the most natural JSON type so +// the response shape matches what callers typically send: integers as integers, "true"/"false" +// as booleans, anything else as a string. +json infer_readback_value(const std::string &raw) { + if (raw == "true") + return true; + if (raw == "false") + return false; + if (!raw.empty()) { + bool numeric = (raw[0] == '-' || (raw[0] >= '0' && raw[0] <= '9')); + if (numeric) { + for (size_t i = 1; i < raw.size(); ++i) { + if (raw[i] < '0' || raw[i] > '9') { + numeric = false; + break; + } + } + if (numeric) { + try { + return json(std::stoll(raw)); + } catch (...) { + // fall through to string + } + } + } + } + return raw; +} + +// Parse "key1=value1;key2=value2" produced by GetCUDAProviderOptionsAsString. +json parse_options_string(const std::string &s) { + json out = json::object(); + size_t pos = 0; + while (pos < s.size()) { + auto eq = s.find('=', pos); + if (eq == std::string::npos) + break; + auto sc = s.find(';', eq); + if (sc == std::string::npos) + sc = s.size(); + auto k = s.substr(pos, eq - pos); + auto v = s.substr(eq + 1, sc - eq - 1); + if (!k.empty()) + out[k] = infer_readback_value(v); + pos = sc + 1; + } + return out; +} + +} // namespace + +// Apply CUDA provider options. +// +// Validation policy mirrors apply_session_options(): we forward every shape-valid input entry to +// ORT one key at a time; ORT decides whether to accept it. The echoed object is built from +// GetCUDAProviderOptionsAsString readback (the ground truth of what ORT stored), filtered to the +// keys the caller actually supplied (plus device_id, which is always meaningful). json append_cuda_session_options(OrtSessionOptions *session_options, const json &option) { auto cuda = option["cuda"]; - json result = json::object(); + OrtCUDAProviderOptionsV2 *cuda_options = nullptr; + Ort::ThrowOnError(Ort::GetApi().CreateCUDAProviderOptions(&cuda_options)); + + // Track which keys the caller asked about — these are the keys we will echo from readback. + // device_id is always included for backward compatibility with the previous response shape. + std::set requested_keys; + requested_keys.insert("device_id"); - // device_id - int device_id = 0; - if (cuda.is_object() && cuda.contains("device_id")) - device_id = cuda["device_id"].get(); - result["device_id"] = device_id; + std::vector keys; + std::vector values; + if (cuda.is_object()) { + for (auto it = cuda.begin(); it != cuda.end(); ++it) { + keys.push_back(it.key()); + values.push_back(to_provider_string(it.value())); + requested_keys.insert(it.key()); + } + } else if (cuda.is_number_integer()) { + keys.push_back("device_id"); + values.push_back(std::to_string(cuda.get())); + } + // cuda == true or false: nothing to update; default V2 options are used. + + try { + update_all(cuda_options, keys, values); + } catch (...) { + Ort::GetApi().ReleaseCUDAProviderOptions(cuda_options); + throw; + } + + OrtStatus *append_status = + Ort::GetApi().SessionOptionsAppendExecutionProvider_CUDA_V2(session_options, cuda_options); + if (append_status != nullptr) { + const char *err = Ort::GetApi().GetErrorMessage(append_status); + std::string msg = err ? err : "unknown error"; + Ort::GetApi().ReleaseStatus(append_status); + Ort::GetApi().ReleaseCUDAProviderOptions(cuda_options); + throw onnxruntime_server::runtime_error(std::string("Failed to append CUDA EP: ") + msg); + } + + // Readback the full options string and echo only the keys the caller cared about. + // The whole readback section is wrapped in a try/catch so that an exception in any of the + // allocations (std::bad_alloc, json construction) cannot leak the ORT allocator buffer or the + // cuda_options handle. + json result = json::object(); + try { + OrtAllocator *allocator = nullptr; + OrtStatus *alloc_st = Ort::GetApi().GetAllocatorWithDefaultOptions(&allocator); + if (alloc_st != nullptr) { + Ort::GetApi().ReleaseStatus(alloc_st); + } else { + char *cstr = nullptr; + OrtStatus *st = Ort::GetApi().GetCUDAProviderOptionsAsString(cuda_options, allocator, &cstr); + if (st != nullptr) { + Ort::GetApi().ReleaseStatus(st); + } else if (cstr != nullptr) { + try { + auto all = parse_options_string(std::string(cstr)); + for (auto it = all.begin(); it != all.end(); ++it) { + if (requested_keys.count(it.key())) + result[it.key()] = it.value(); + } + } catch (...) { + allocator->Free(allocator, cstr); + throw; + } + allocator->Free(allocator, cstr); + } + } + } catch (...) { + Ort::GetApi().ReleaseCUDAProviderOptions(cuda_options); + throw; + } - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, device_id)); + Ort::GetApi().ReleaseCUDAProviderOptions(cuda_options); return result; } diff --git a/src/onnx/session.cpp b/src/onnx/session.cpp index 66a8ac5..4c9cc29 100644 --- a/src/onnx/session.cpp +++ b/src/onnx/session.cpp @@ -10,30 +10,282 @@ #include "cuda/session_options.hpp" #endif -Orts::onnx::session::session(session_key key, const json &option) - : session_options(), created_at(std::chrono::system_clock::now()), allocator(), key(std::move(key)) { - _option["cuda"] = false; +namespace { + +#ifdef _WIN32 +std::wstring to_wide(const std::string &s) { + int size_needed = MultiByteToWideChar(CP_ACP, 0, s.c_str(), -1, NULL, 0); + std::wstring wstr(size_needed, 0); + MultiByteToWideChar(CP_ACP, 0, s.c_str(), -1, &wstr[0], size_needed); + return wstr; +} +#endif + +void register_extension(Ort::SessionOptions &session_options, const std::string &path) { +#ifdef _WIN32 + auto wpath = to_wide(path); + auto p = wpath.c_str(); +#else + auto p = path.c_str(); +#endif + OrtStatus *status = Ort::GetApi().RegisterCustomOpsLibrary_V2(session_options, p); + if (status != nullptr) { + const char *err = Ort::GetApi().GetErrorMessage(status); + std::string msg = err ? err : "unknown error"; + Ort::GetApi().ReleaseStatus(status); + throw onnxruntime_server::runtime_error( + std::string("Failed to register ORT extensions (") + path + "): " + msg + ); + } +} + +GraphOptimizationLevel parse_graph_opt_level(const std::string &v, bool &valid) { + valid = true; + if (v == "disable" || v == "disabled" || v == "off") + return ORT_DISABLE_ALL; + if (v == "basic") + return ORT_ENABLE_BASIC; + if (v == "extended") + return ORT_ENABLE_EXTENDED; + if (v == "all") + return ORT_ENABLE_ALL; + valid = false; + return ORT_ENABLE_ALL; +} + +ExecutionMode parse_execution_mode(const std::string &v, bool &valid) { + valid = true; + if (v == "parallel") + return ORT_PARALLEL; + if (v == "sequential") + return ORT_SEQUENTIAL; + valid = false; + return ORT_SEQUENTIAL; +} + +// Apply session-level options. +// +// Validation policy: we only check JSON shape (types, our enum string mapping). We do NOT +// re-implement ORT's own value validation (allowed ranges, defaults, etc.) — that knowledge +// belongs to ORT and would force us to track every ORT version's rules. Instead, every shape- +// valid value is forwarded to ORT, and the setter's outcome decides the echo: +// - setter succeeds -> echo the value (or the readback value where an API exists) +// - setter throws -> skip silently, do not echo (the option was rejected by ORT) +// Where ORT exposes a readback (currently config_entries via GetSessionConfigEntry), the echo +// uses the readback value so it reflects what ORT actually stored, not what we sent. +template +bool try_apply(F &&f) { + try { + f(); + return true; + } catch (const Ort::Exception &) { + return false; + } catch (const std::exception &) { + return false; + } +} + +json apply_session_options(Ort::SessionOptions &session_options, const json &input) { + json applied = json::object(); + if (!input.is_object()) + return applied; + + if (input.contains("intra_op_num_threads") && input["intra_op_num_threads"].is_number_integer()) { + auto v = input["intra_op_num_threads"].get(); + if (try_apply([&] { session_options.SetIntraOpNumThreads(v); })) + applied["intra_op_num_threads"] = v; + } + + if (input.contains("inter_op_num_threads") && input["inter_op_num_threads"].is_number_integer()) { + auto v = input["inter_op_num_threads"].get(); + if (try_apply([&] { session_options.SetInterOpNumThreads(v); })) + applied["inter_op_num_threads"] = v; + } + + if (input.contains("execution_mode") && input["execution_mode"].is_string()) { + bool valid = false; + auto s = input["execution_mode"].get(); + auto mode = parse_execution_mode(s, valid); + if (valid && try_apply([&] { session_options.SetExecutionMode(mode); })) + applied["execution_mode"] = (mode == ORT_PARALLEL) ? "parallel" : "sequential"; + } + + if (input.contains("graph_optimization_level") && input["graph_optimization_level"].is_string()) { + bool valid = false; + auto s = input["graph_optimization_level"].get(); + auto lvl = parse_graph_opt_level(s, valid); + if (valid && try_apply([&] { session_options.SetGraphOptimizationLevel(lvl); })) + applied["graph_optimization_level"] = s; + } + + if (input.contains("enable_cpu_mem_arena") && input["enable_cpu_mem_arena"].is_boolean()) { + auto v = input["enable_cpu_mem_arena"].get(); + if (try_apply([&] { + if (v) + session_options.EnableCpuMemArena(); + else + session_options.DisableCpuMemArena(); + })) + applied["enable_cpu_mem_arena"] = v; + } - if (option.contains("ortextensions_path") && option["ortextensions_path"].is_string()) { - auto ext_path_str = option["ortextensions_path"].get(); + if (input.contains("enable_mem_pattern") && input["enable_mem_pattern"].is_boolean()) { + auto v = input["enable_mem_pattern"].get(); + if (try_apply([&] { + if (v) + session_options.EnableMemPattern(); + else + session_options.DisableMemPattern(); + })) + applied["enable_mem_pattern"] = v; + } + + if (input.contains("log_severity_level") && input["log_severity_level"].is_number_integer()) { + auto v = input["log_severity_level"].get(); + if (try_apply([&] { session_options.SetLogSeverityLevel(v); })) + applied["log_severity_level"] = v; + } + + if (input.contains("logid") && input["logid"].is_string()) { + auto v = input["logid"].get(); + if (try_apply([&] { session_options.SetLogId(v.c_str()); })) + applied["logid"] = v; + } + + if (input.contains("enable_profiling") && input["enable_profiling"].is_boolean() && + input["enable_profiling"].get()) { + std::string prefix; + if (input.contains("profile_file_prefix") && input["profile_file_prefix"].is_string()) + prefix = input["profile_file_prefix"].get(); + bool ok = try_apply([&] { #ifdef _WIN32 - int size_needed = MultiByteToWideChar(CP_ACP, 0, ext_path_str.c_str(), -1, NULL, 0); - std::wstring wstr(size_needed, 0); - MultiByteToWideChar(CP_ACP, 0, ext_path_str.c_str(), -1, &wstr[0], size_needed); + auto wprefix = to_wide(prefix); + session_options.EnableProfiling(wprefix.c_str()); +#else + session_options.EnableProfiling(prefix.c_str()); +#endif + }); + if (ok) { + applied["enable_profiling"] = true; + applied["profile_file_prefix"] = prefix; + } + } - auto ext_path = wstr.c_str(); + if (input.contains("optimized_model_filepath") && input["optimized_model_filepath"].is_string()) { + auto s = input["optimized_model_filepath"].get(); + bool ok = try_apply([&] { +#ifdef _WIN32 + auto ws = to_wide(s); + session_options.SetOptimizedModelFilePath(ws.c_str()); #else - auto ext_path = ext_path_str.c_str(); + session_options.SetOptimizedModelFilePath(s.c_str()); #endif - OrtStatus *status = Ort::GetApi().RegisterCustomOpsLibrary_V2(session_options, ext_path); - if (status != nullptr) { - const char *err = Ort::GetApi().GetErrorMessage(status); - std::string msg = err ? err : "unknown error"; - Ort::GetApi().ReleaseStatus(status); - throw runtime_error(std::string("Failed to register ORT extensions: ") + msg); + }); + if (ok) + applied["optimized_model_filepath"] = s; + } + + if (input.contains("free_dimension_overrides") && input["free_dimension_overrides"].is_object()) { + json normalized = json::object(); + for (auto it = input["free_dimension_overrides"].begin(); + it != input["free_dimension_overrides"].end(); ++it) { + if (!it.value().is_number_integer()) + continue; + auto dim = it.value().get(); + auto name = it.key(); + if (try_apply([&] { session_options.AddFreeDimensionOverrideByName(name.c_str(), dim); })) + normalized[name] = dim; } + if (!normalized.empty()) + applied["free_dimension_overrides"] = normalized; + } + + // config_entries: AddSessionConfigEntry accepts any string key, so we readback each entry + // via GetSessionConfigEntry to ensure the echo reflects what ORT actually stored. + if (input.contains("config_entries") && input["config_entries"].is_object()) { + json normalized = json::object(); + for (auto it = input["config_entries"].begin(); it != input["config_entries"].end(); ++it) { + std::string sv; + if (it.value().is_string()) + sv = it.value().get(); + else if (it.value().is_boolean()) + sv = it.value().get() ? "1" : "0"; + else if (it.value().is_number_integer()) + sv = std::to_string(it.value().get()); + else + continue; + auto key = it.key(); + if (!try_apply([&] { session_options.AddConfigEntry(key.c_str(), sv.c_str()); })) + continue; + + size_t needed = 0; + OrtStatus *st = Ort::GetApi().GetSessionConfigEntry( + session_options, key.c_str(), nullptr, &needed + ); + if (st != nullptr) { + Ort::GetApi().ReleaseStatus(st); + continue; + } + std::string out(needed, '\0'); + st = Ort::GetApi().GetSessionConfigEntry( + session_options, key.c_str(), out.data(), &needed + ); + if (st != nullptr) { + Ort::GetApi().ReleaseStatus(st); + continue; + } + if (!out.empty() && out.back() == '\0') + out.pop_back(); + normalized[key] = out; + } + if (!normalized.empty()) + applied["config_entries"] = normalized; + } + + return applied; +} + +} // namespace + +json Orts::onnx::session::collect_extensions(const json &option) { + json result = json::array(); + if (!option.is_object()) + return result; + auto add = [&](const std::string &path) { + for (auto &e : result) { + if (e.is_string() && e.get() == path) + return; + } + result.push_back(path); + }; + if (option.contains("extensions") && option["extensions"].is_array()) { + for (auto &e : option["extensions"]) { + if (e.is_string()) + add(e.get()); + } + } + if (option.contains("ortextensions_path") && option["ortextensions_path"].is_string()) + add(option["ortextensions_path"].get()); + return result; +} + +Orts::onnx::session::session(session_key key, const json &option) + : session_options(), created_at(std::chrono::system_clock::now()), allocator(), key(std::move(key)) { + _option["cuda"] = false; + + // session-level options (apply before EP/extension registration) + if (option.contains("session_options") && option["session_options"].is_object()) { + auto applied = apply_session_options(session_options, option["session_options"]); + if (!applied.empty()) + _option["session_options"] = applied; + } - _option["ortextensions_path"] = option["ortextensions_path"]; + // register custom op libraries: extensions array + legacy ortextensions_path, deduplicated + auto extensions = collect_extensions(option); + if (!extensions.empty()) { + for (auto &e : extensions) + register_extension(session_options, e.get()); + _option["extensions"] = extensions; } if (providers::available_providers.has_cuda() && option.contains("cuda") && ( diff --git a/src/onnx/session_key_with_option.cpp b/src/onnx/session_key_with_option.cpp index 8d2dee6..9cd711b 100644 --- a/src/onnx/session_key_with_option.cpp +++ b/src/onnx/session_key_with_option.cpp @@ -6,18 +6,88 @@ #include "../onnxruntime_server.hpp" +namespace { + std::regex space_re(R"(\s+)"); std::regex trim_re(R"(^\s*|\s*$)"); std::string key_rule = R"(([-_a-zA-Z0-9]+):([-_/a-zA-Z0-9]+)(\(([^)]+)\))?)"; std::regex key_re(key_rule); -std::string option_rule = R"(([_a-zA-Z0-9]+)\s*=\s*([^,\s]+))"; +// option key supports dotted notation (e.g. cuda.device_id, session_options.intra_op_num_threads) +std::string option_rule = R"(([_a-zA-Z0-9][_a-zA-Z0-9.]*)\s*=\s*([^,\s]+))"; std::regex option_re(option_rule); +std::regex int_re(R"(^-?[0-9]+$)"); + +const std::string EXTENSIONS_KEY = "extensions"; +const std::string LEGACY_EXTENSION_KEY = "ortextensions_path"; + +json infer_value(const std::string &raw) { + if (raw == "true") + return true; + if (raw == "false") + return false; + if (std::regex_match(raw, int_re)) { + try { + return json(std::stoll(raw)); + } catch (...) { + return raw; + } + } + return raw; +} + +std::vector split_dot(const std::string &k) { + std::vector parts; + std::string cur; + for (char c : k) { + if (c == '.') { + if (!cur.empty()) + parts.push_back(cur); + cur.clear(); + } else { + cur += c; + } + } + if (!cur.empty()) + parts.push_back(cur); + return parts; +} + +void set_nested(json &option, const std::vector &path, const json &value) { + json *cur = &option; + for (size_t i = 0; i + 1 < path.size(); ++i) { + if (!cur->is_object()) + *cur = json::object(); + if (!cur->contains(path[i]) || !(*cur)[path[i]].is_object()) + (*cur)[path[i]] = json::object(); + cur = &(*cur)[path[i]]; + } + if (!cur->is_object()) + *cur = json::object(); + (*cur)[path.back()] = value; +} + +void append_extension(json &option, const std::string &path) { + if (!option.contains(EXTENSIONS_KEY) || !option[EXTENSIONS_KEY].is_array()) + option[EXTENSIONS_KEY] = json::array(); + auto &arr = option[EXTENSIONS_KEY]; + for (auto &e : arr) { + if (e.is_string() && e.get() == path) + return; + } + arr.push_back(path); +} + +} // namespace + std::vector onnxruntime_server::onnx::session_key_with_option::parse(const std::string &model_key_list) { - // model_key_list is a space separated list of model_name:model_version + // model_key_list is a space separated list of model_name:model_version[(opt1=val1, opt2=val2)] + // option keys may be dotted (cuda.device_id, session_options.intra_op_num_threads) producing nested objects. + // extensions/ortextensions_path keys accumulate into an "extensions" array (deduplicated). + // option entries that don't match the grammar are silently skipped. std::vector models; std::string list = std::regex_replace(std::regex_replace(model_key_list, space_re, " "), trim_re, ""); if (list.empty()) @@ -27,20 +97,26 @@ onnxruntime_server::onnx::session_key_with_option::parse(const std::string &mode while (std::regex_search(list, keys, key_re)) { json option = json::object(); - // parse option auto option_str = keys[4].str(); if (!option_str.empty()) { std::smatch options; while (std::regex_search(option_str, options, option_re)) { - auto option_key = options[1].str(); - auto option_val = options[2].str(); - - // cuda option: device_id or true/false - if (option_key == "cuda") { - if (option_val == "true" || option_val == "false") - option[option_key] = option_val == "true"; - else - option[option_key] = std::stoi(option_val); + auto raw_key = options[1].str(); + auto raw_val = options[2].str(); + auto value = infer_value(raw_val); + + auto parts = split_dot(raw_key); + if (parts.empty()) { + option_str = options.suffix().str(); + continue; + } + + if (parts.size() == 1 && + (parts[0] == EXTENSIONS_KEY || parts[0] == LEGACY_EXTENSION_KEY) && + value.is_string()) { + append_extension(option, value.get()); + } else { + set_nested(option, parts, value); } option_str = options.suffix().str(); diff --git a/src/onnxruntime_server.hpp b/src/onnxruntime_server.hpp index ba6873b..629507b 100644 --- a/src/onnxruntime_server.hpp +++ b/src/onnxruntime_server.hpp @@ -138,6 +138,11 @@ namespace onnxruntime_server { [[nodiscard]] const std::vector &inputs() const; [[nodiscard]] const std::vector &outputs() const; + + // Normalize the extensions input (the new "extensions" array and the legacy + // "ortextensions_path" string) into a single deduplicated array of paths in the + // order they would be registered. Pure function; no file system or onnxruntime calls. + static json collect_extensions(const json &option); }; typedef std::shared_ptr session_ptr; diff --git a/src/test/unit/unit_test_context_cuda.cpp b/src/test/unit/unit_test_context_cuda.cpp index 077e401..42bfdfa 100644 --- a/src/test/unit/unit_test_context_cuda.cpp +++ b/src/test/unit/unit_test_context_cuda.cpp @@ -4,6 +4,8 @@ #include "../../onnxruntime_server.hpp" #include "../test_common.hpp" +// End-to-end smoke test: build a session on the CUDA EP with the simplest "cuda": true input, +// run the BERT SQuAD sample model, and assert the output tensor shape. TEST(test_onnxruntime_server_context_cuda, BertSquadModelTest) { Orts::onnx::session_key key("sample", "2"); auto session = std::make_shared(key, model2_path.string(), json::parse(R"({"cuda": true})")); @@ -31,3 +33,58 @@ TEST(test_onnxruntime_server_context_cuda, BertSquadModelTest) { std::cout << json.dump(4) << "\n"; ASSERT_EQ(json["output"].size(), 3); } + +// CUDA EP V2 options passed as a "cuda" object are forwarded to ORT in a single batched +// UpdateCUDAProviderOptions call (ORT silently resets sibling keys when called per-key, so a +// single batched call is the only safe way). The echoed value comes from +// GetCUDAProviderOptionsAsString readback, i.e. what ORT actually stored. Every supplied key +// that ORT accepted should round-trip; if ORT had rejected any of them the whole session +// construction would have thrown rather than silently returning a partial echo. +TEST(test_onnxruntime_server_context_cuda, CudaObjectOptionsEcho) { + Orts::onnx::session_key key("sample", "2"); + auto session = std::make_shared( + key, model2_path.string(), + R"({ + "cuda": { + "device_id": 0, + "gpu_mem_limit": 2147483648, + "arena_extend_strategy": "kNextPowerOfTwo", + "cudnn_conv_algo_search": "HEURISTIC" + } + })"_json + ); + auto j = session->to_json(); + ASSERT_TRUE(j["option"]["cuda"].is_object()); + auto cu = j["option"]["cuda"]; + + ASSERT_EQ(cu["device_id"], 0); + ASSERT_EQ(cu["gpu_mem_limit"], 2147483648); + ASSERT_EQ(cu["arena_extend_strategy"], "kNextPowerOfTwo"); + ASSERT_EQ(cu["cudnn_conv_algo_search"], "HEURISTIC"); +} + +// An unknown CUDA option key (or one ORT cannot parse) must abort session construction with a +// clear error rather than silently producing a partial echo. This is the natural consequence of +// the batched-update strategy and is the contract callers can rely on. +TEST(test_onnxruntime_server_context_cuda, CudaObjectRejectsUnknownKey) { + Orts::onnx::session_key key("sample", "2"); + EXPECT_ANY_THROW( + auto session = std::make_shared( + key, model2_path.string(), + R"({"cuda": {"device_id": 0, "totally_not_a_real_cuda_option": "xyz"}})"_json + ); + ); +} + +// Backward compatibility: the legacy scalar shortcuts ("cuda": true and "cuda": ) +// must keep working under the V2 EP path and still echo as a normalized object with device_id. +TEST(test_onnxruntime_server_context_cuda, CudaScalarShortcutStillWorks) { + // Backward compat: cuda=true (boolean) and cuda= must keep working + Orts::onnx::session_key key("sample", "2"); + auto session_bool = std::make_shared( + key, model2_path.string(), R"({"cuda": true})"_json + ); + auto j_bool = session_bool->to_json(); + ASSERT_TRUE(j_bool["option"]["cuda"].is_object()); + ASSERT_EQ(j_bool["option"]["cuda"]["device_id"], 0); +} diff --git a/src/test/unit/unit_test_session.cpp b/src/test/unit/unit_test_session.cpp index 266ce74..69a39ff 100644 --- a/src/test/unit/unit_test_session.cpp +++ b/src/test/unit/unit_test_session.cpp @@ -4,6 +4,8 @@ #include "../../onnxruntime_server.hpp" #include "../test_common.hpp" +// input_shape / output_shape options must override the model's dynamic dimensions with the +// supplied static values, and any shape whose rank does not match the model's must be rejected. TEST(unit_test_session, SesionWithShapeOption) { Orts::onnx::session_key key("sample", "1"); const auto session1 = std::make_shared(key, model1_path.string()); @@ -45,6 +47,207 @@ TEST(unit_test_session, SesionWithShapeOption) { ); } +// Each key in the session_options group (threads, execution mode, graph optimization level, +// memory, logging, config_entries) must be applied to onnxruntime's SessionOptions and echoed +// back in option.session_options in a normalized form. +TEST(unit_test_session, SessionWithSessionOptions) { + Orts::onnx::session_key key("sample", "1"); + auto session = std::make_shared( + key, model1_path.string(), + R"({ + "session_options": { + "intra_op_num_threads": 2, + "inter_op_num_threads": 1, + "execution_mode": "sequential", + "graph_optimization_level": "all", + "enable_cpu_mem_arena": false, + "enable_mem_pattern": true, + "logid": "test-session", + "log_severity_level": 3, + "config_entries": { + "session.disable_prepacking": "1" + } + } + })"_json + ); + auto j = session->to_json(); + ASSERT_TRUE(j["option"].contains("session_options")); + auto so = j["option"]["session_options"]; + ASSERT_EQ(so["intra_op_num_threads"], 2); + ASSERT_EQ(so["inter_op_num_threads"], 1); + ASSERT_EQ(so["execution_mode"], "sequential"); + ASSERT_EQ(so["graph_optimization_level"], "all"); + ASSERT_EQ(so["enable_cpu_mem_arena"], false); + ASSERT_EQ(so["enable_mem_pattern"], true); + ASSERT_EQ(so["logid"], "test-session"); + ASSERT_EQ(so["log_severity_level"], 3); + ASSERT_EQ(so["config_entries"]["session.disable_prepacking"], "1"); +} + +// Type-mismatched values (e.g. string for an int field), enum strings outside our mapping, and +// keys we do not pass to ORT at all must be silently dropped from the echo. Sibling entries that +// pass our shape check and our enum mapping are still applied and echoed. Note that ORT's own +// validity checks (e.g. allowed numeric ranges) are intentionally not duplicated here; we only +// validate JSON shape and our enum string -> ORT enum mapping. +TEST(unit_test_session, SessionOptionsIgnoresInvalidEntries) { + // Bad types or unknown keys under session_options are silently dropped; valid ones still apply. + Orts::onnx::session_key key("sample", "1"); + auto session = std::make_shared( + key, model1_path.string(), + R"({ + "session_options": { + "intra_op_num_threads": "not-a-number", + "graph_optimization_level": "absurd-level", + "execution_mode": "weird", + "logid": "still-applies", + "totally_unknown_key": "ignore-me" + } + })"_json + ); + auto j = session->to_json(); + ASSERT_TRUE(j["option"].contains("session_options")); + auto so = j["option"]["session_options"]; + ASSERT_FALSE(so.contains("intra_op_num_threads")); + ASSERT_FALSE(so.contains("graph_optimization_level")); + ASSERT_FALSE(so.contains("execution_mode")); + ASSERT_FALSE(so.contains("totally_unknown_key")); + ASSERT_EQ(so["logid"], "still-applies"); +} + +// AddSessionConfigEntry round-trips through GetSessionConfigEntry. The echo therefore reflects +// what ORT actually stored, which proves the bool/int -> string conversion the server performs +// before forwarding to ORT (true -> "1", 42 -> "42") matches what ORT will return on lookup. +TEST(unit_test_session, SessionOptionsConfigEntriesReadback) { + Orts::onnx::session_key key("sample", "1"); + auto session = std::make_shared( + key, model1_path.string(), + R"({ + "session_options": { + "config_entries": { + "key.string": "hello", + "key.bool": true, + "key.int": 42 + } + } + })"_json + ); + auto j = session->to_json(); + auto ce = j["option"]["session_options"]["config_entries"]; + ASSERT_EQ(ce["key.string"], "hello"); + ASSERT_EQ(ce["key.bool"], "1"); + ASSERT_EQ(ce["key.int"], "42"); +} + +// free_dimension_overrides has no readback API; AddFreeDimensionOverrideByName accepts any name +// without raising, so the echo just confirms what we asked ORT to store. Whether a name actually +// matches a model dimension is decided later at session creation time and is ORT's concern, not +// ours. Non-integer values are dropped at our shape-check stage. +TEST(unit_test_session, SessionOptionsFreeDimensionOverrides) { + Orts::onnx::session_key key("sample", "1"); + auto session = std::make_shared( + key, model1_path.string(), + R"({ + "session_options": { + "free_dimension_overrides": { + "batch": 1, + "seq": 128, + "bad": "not-an-int" + } + } + })"_json + ); + auto j = session->to_json(); + ASSERT_TRUE(j["option"]["session_options"].contains("free_dimension_overrides")); + auto fd = j["option"]["session_options"]["free_dimension_overrides"]; + ASSERT_EQ(fd["batch"], 1); + ASSERT_EQ(fd["seq"], 128); + ASSERT_FALSE(fd.contains("bad")); +} + +// session::collect_extensions normalizes both the new "extensions" array and the legacy +// "ortextensions_path" string into a single ordered, deduplicated array of paths in the order +// they would be registered. Pure-function checks here cover what session construction would +// actually attempt to register, without needing a loadable shared library on disk. +TEST(unit_test_session, CollectExtensionsNormalization) { + using S = Orts::onnx::session; + + // Empty / missing input yields an empty array. + ASSERT_EQ(S::collect_extensions(json::object()), json::array()); + ASSERT_EQ(S::collect_extensions(R"({"extensions": []})"_json), json::array()); + + // Bare extensions array, single element. + auto only_array = S::collect_extensions(R"({"extensions": ["/lib1.so"]})"_json); + ASSERT_EQ(only_array, json::array({"/lib1.so"})); + + // Multiple entries preserve input order. + auto ordered = S::collect_extensions(R"({"extensions": ["/lib1.so", "/lib2.so", "/lib3.so"]})"_json); + ASSERT_EQ(ordered, json::array({"/lib1.so", "/lib2.so", "/lib3.so"})); + + // Duplicates within the extensions array are dropped, first occurrence wins. + auto deduped = S::collect_extensions(R"({"extensions": ["/lib.so", "/lib.so", "/lib.so"]})"_json); + ASSERT_EQ(deduped, json::array({"/lib.so"})); + + // Legacy ortextensions_path alone is normalized into the extensions array. + auto only_legacy = S::collect_extensions(R"({"ortextensions_path": "/legacy.so"})"_json); + ASSERT_EQ(only_legacy, json::array({"/legacy.so"})); + + // Legacy is appended after the extensions array, with dedupe across both sources. + auto mixed = S::collect_extensions( + R"({"extensions": ["/a.so", "/b.so"], "ortextensions_path": "/a.so"})"_json + ); + ASSERT_EQ(mixed, json::array({"/a.so", "/b.so"})); + + // Legacy path that is not in the extensions array is appended at the end. + auto mixed_distinct = S::collect_extensions( + R"({"extensions": ["/a.so"], "ortextensions_path": "/b.so"})"_json + ); + ASSERT_EQ(mixed_distinct, json::array({"/a.so", "/b.so"})); + + // Non-string entries inside the extensions array are silently ignored. + auto with_garbage = S::collect_extensions( + R"({"extensions": ["/a.so", 42, null, {"x":1}, "/b.so"]})"_json + ); + ASSERT_EQ(with_garbage, json::array({"/a.so", "/b.so"})); + + // extensions field with a wrong type is treated as if absent. + ASSERT_EQ(S::collect_extensions(R"({"extensions": "/lib.so"})"_json), json::array()); + ASSERT_EQ(S::collect_extensions(R"({"extensions": 123})"_json), json::array()); + + // ortextensions_path with a wrong type is also ignored. + ASSERT_EQ(S::collect_extensions(R"({"ortextensions_path": 123})"_json), json::array()); + + // Non-object option input does not crash and yields an empty array. + ASSERT_EQ(S::collect_extensions(json("/raw-string")), json::array()); + ASSERT_EQ(S::collect_extensions(json(nullptr)), json::array()); +} + +// At session construction time, both the new "extensions" array and the legacy "ortextensions_path" +// string must reach the registration path: an unloadable library must surface as a clear +// runtime_error instead of being silently dropped. (The actual successful registration path +// requires a real onnxruntime_extensions shared library on disk and is not exercised here; +// the normalization that drives it is fully covered by the CollectExtensionsNormalization test +// above and by the parse-level tests below.) +TEST(unit_test_session, ExtensionsRegistrationFailsLoudly) { + Orts::onnx::session_key key("sample", "1"); + EXPECT_ANY_THROW( + auto session = std::make_shared( + key, model1_path.string(), + R"({"ortextensions_path": "/nonexistent/path/to/lib.so"})"_json + ); + ); + EXPECT_ANY_THROW( + auto session = std::make_shared( + key, model1_path.string(), + R"({"extensions": ["/nonexistent/path/to/lib.so"]})"_json + ); + ); +} + +// Cover the option string grammar end-to-end: empty/whitespace input, malformed model keys that +// must throw, well-formed lists with various spacing, the legacy scalar cuda shortcut, dotted +// notation producing nested objects, repeated "extensions" keys accumulating into a deduped +// array, the legacy ortextensions_path normalization, value type inference (bool/int/string), +// pass-through of unknown keys, and lenient skipping of malformed option entries. TEST(unit_test_session_key, Parse) { // empty cases std::string empty_cases[] = {"", " ", "\n", "\r\n", "\n \n", " \r \n \r \n "}; @@ -92,4 +295,78 @@ TEST(unit_test_session_key, Parse) { auto parse_case4 = Orts::onnx::session_key_with_option::parse("model:version(cuda=true)"); ASSERT_TRUE(parse_case4[0].option["cuda"]); + + // dotted notation produces nested objects + auto parse_dotted_cuda = Orts::onnx::session_key_with_option::parse("model:version(cuda.device_id=0)"); + ASSERT_TRUE(parse_dotted_cuda[0].option["cuda"].is_object()); + ASSERT_EQ(parse_dotted_cuda[0].option["cuda"]["device_id"], 0); + + auto parse_dotted_session = Orts::onnx::session_key_with_option::parse( + "model:version(session_options.intra_op_num_threads=4, session_options.graph_optimization_level=all)" + ); + ASSERT_TRUE(parse_dotted_session[0].option["session_options"].is_object()); + ASSERT_EQ(parse_dotted_session[0].option["session_options"]["intra_op_num_threads"], 4); + ASSERT_EQ(parse_dotted_session[0].option["session_options"]["graph_optimization_level"], "all"); + + // scalar followed by dotted on the same group: dotted wins (scalar discarded) + auto parse_scalar_then_dotted = Orts::onnx::session_key_with_option::parse( + "model:version(cuda=true, cuda.device_id=1)" + ); + ASSERT_TRUE(parse_scalar_then_dotted[0].option["cuda"].is_object()); + ASSERT_EQ(parse_scalar_then_dotted[0].option["cuda"]["device_id"], 1); + + // extensions key accumulates as an array + auto parse_extensions = Orts::onnx::session_key_with_option::parse( + "model:version(extensions=/lib1.so, extensions=/lib2.so)" + ); + ASSERT_TRUE(parse_extensions[0].option["extensions"].is_array()); + ASSERT_EQ(parse_extensions[0].option["extensions"].size(), 2); + ASSERT_EQ(parse_extensions[0].option["extensions"][0], "/lib1.so"); + ASSERT_EQ(parse_extensions[0].option["extensions"][1], "/lib2.so"); + + // extensions dedupe + auto parse_extensions_dedup = Orts::onnx::session_key_with_option::parse( + "model:version(extensions=/lib.so, extensions=/lib.so)" + ); + ASSERT_EQ(parse_extensions_dedup[0].option["extensions"].size(), 1); + + // legacy ortextensions_path is normalized into the extensions array + auto parse_legacy_ext = Orts::onnx::session_key_with_option::parse( + "model:version(ortextensions_path=/usr/local/lib/libortextensions.so)" + ); + ASSERT_FALSE(parse_legacy_ext[0].option.contains("ortextensions_path")); + ASSERT_TRUE(parse_legacy_ext[0].option["extensions"].is_array()); + ASSERT_EQ(parse_legacy_ext[0].option["extensions"].size(), 1); + ASSERT_EQ(parse_legacy_ext[0].option["extensions"][0], "/usr/local/lib/libortextensions.so"); + + // extensions + legacy ortextensions_path mixed, with dedupe + auto parse_mixed_ext = Orts::onnx::session_key_with_option::parse( + "model:version(extensions=/a.so, ortextensions_path=/a.so, extensions=/b.so)" + ); + ASSERT_EQ(parse_mixed_ext[0].option["extensions"].size(), 2); + + // value type inference: bool, int, string + auto parse_types = Orts::onnx::session_key_with_option::parse( + "model:version(session_options.enable_cpu_mem_arena=false, " + "session_options.intra_op_num_threads=8, " + "session_options.logid=my-model)" + ); + ASSERT_EQ(parse_types[0].option["session_options"]["enable_cpu_mem_arena"], false); + ASSERT_EQ(parse_types[0].option["session_options"]["intra_op_num_threads"], 8); + ASSERT_EQ(parse_types[0].option["session_options"]["logid"], "my-model"); + + // unknown / unrecognized option keys pass through silently (caller decides what to do) + auto parse_unknown = Orts::onnx::session_key_with_option::parse( + "model:version(some_unknown_key=hello, another.deep.key=42)" + ); + ASSERT_EQ(parse_unknown[0].option["some_unknown_key"], "hello"); + ASSERT_EQ(parse_unknown[0].option["another"]["deep"]["key"], 42); + + // malformed option entries inside parens are silently skipped, well-formed ones still apply + auto parse_malformed_options = Orts::onnx::session_key_with_option::parse( + "model:version(=garbage, cuda=1, !!!, session_options.intra_op_num_threads=2)" + ); + ASSERT_EQ(parse_malformed_options.size(), 1); + ASSERT_EQ(parse_malformed_options[0].option["cuda"], 1); + ASSERT_EQ(parse_malformed_options[0].option["session_options"]["intra_op_num_threads"], 2); }