diff --git a/.gitignore b/.gitignore
index 2379f36..8efb67f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,8 @@
 .idea/
 .claude/
 cmake-build-debug*/
+.claude/
+CLAUDE.md
 
 *.onnx
 
diff --git a/README.md b/README.md
index b02a1f8..c3a0b22 100644
--- a/README.md
+++ b/README.md
@@ -150,7 +150,7 @@ sudo cmake --install build --prefix /usr/local/onnxruntime-server
 | `--workers`               | `ONNX_SERVER_WORKERS`               | Worker thread pool size.<br/>Default: `4`                                                                                                                                                                                                                                                                                                       |
 | `--request-payload-limit` | `ONNX_SERVER_REQUEST_PAYLOAD_LIMIT` | HTTP/HTTPS request payload size limit.<br />Default: 1024 * 1024 * 10(10MB)`                                                                                                                                                                                                                                                                    |
 | `--model-dir`             | `ONNX_SERVER_MODEL_DIR`             | Model directory path<br/>The onnx model files must be located in the following path:<br/>`${model_dir}/${model_name}/${model_version}/model.onnx` or<br/>`${model_dir}/${model_name}/${model_version}.onnx`<br/>Default: `models`                                                                                                               |
-| `--prepare-model`         | `ONNX_SERVER_PREPARE_MODEL`         | Pre-create some model sessions at server startup.<br/><br/>Format as a space-separated list of `model_name:model_version` or `model_name:model_version(session_options, ...)`.<br/><br/>Available session_options are<br/>- cuda=device_id`[ or true or false]`<br/><br/>eg) `model1:v1 model2:v9`<br/>`model1:v1(cuda=true) model2:v9(cuda=1)` |
+| `--prepare-model`         | `ONNX_SERVER_PREPARE_MODEL`         | Pre-create some model sessions at server startup.<br/><br/>Format as a space-separated list of `model_name:model_version` or `model_name:model_version(opt1=val1, opt2=val2, ...)`. Option keys may use dotted notation to address nested groups (e.g. `cuda.device_id`, `session_options.intra_op_num_threads`). Repeating the `extensions` key accumulates a deduplicated array. Option entries that do not match the grammar are skipped silently rather than failing the whole list.<br/><br/>Examples:<br/>- `model1:v1 model2:v9`<br/>- `model1:v1(cuda=true) model2:v9(cuda=1)`<br/>- `bert:v1(cuda.device_id=0, cuda.gpu_mem_limit=2147483648)`<br/>- `bert:v1(session_options.intra_op_num_threads=4, session_options.graph_optimization_level=all)`<br/>- `bert:v1(extensions=/usr/local/lib/libortextensions.so)` |
 
 ### Backend options
 
@@ -223,8 +223,9 @@ docker run --name onnxruntime_server_container -d --rm --gpus all \
 
 ## ONNXRuntime Extensions Support
 
-To use the [onnxruntime-extensions](https://github.com/microsoft/onnxruntime-extensions)(Custom Ops Library), set the
-options as follows when creating a session.
+To use the [onnxruntime-extensions](https://github.com/microsoft/onnxruntime-extensions) (Custom Ops Library), supply
+one or more library paths through the `extensions` array. The server registers each path with ORT in order and
+deduplicates entries.
 
 ```json
 {
@@ -232,11 +233,82 @@ options as follows when creating a session.
   "version": "string",
   "option": {
     "cuda": ...,
-    "ortextensions_path": "/absolute/path/to/libonnxruntime_extensions.so"
+    "extensions": [
+      "/absolute/path/to/libonnxruntime_extensions.so"
+    ]
   }
 }
 ```
 
+The legacy `ortextensions_path` (single string) is still accepted for backward compatibility; it is normalized into the
+`extensions` array on the server side and the response always echoes the normalized form.
+
+## Session-level options
+
+The optional `session_options` object on a session-create request forwards the listed keys to the underlying
+onnxruntime `SessionOptions`. Only the JSON shape (types and our enum-string mapping) is validated on the server side;
+the actual value validation is delegated to ORT, and the response echoes only the values ORT accepted.
+
+```json
+{
+  "model": "string",
+  "version": "string",
+  "option": {
+    "session_options": {
+      "intra_op_num_threads": 4,
+      "inter_op_num_threads": 1,
+      "execution_mode": "sequential",
+      "graph_optimization_level": "all",
+      "enable_cpu_mem_arena": true,
+      "enable_mem_pattern": true,
+      "log_severity_level": 2,
+      "logid": "my-model",
+      "enable_profiling": false,
+      "profile_file_prefix": "/var/log/onnx/profile-",
+      "optimized_model_filepath": "/cache/optimized.onnx",
+      "free_dimension_overrides": { "batch": 1 },
+      "config_entries": {
+        "session.disable_prepacking": "1"
+      }
+    }
+  }
+}
+```
+
+`config_entries` is round-tripped through `GetSessionConfigEntry`, so the response shows what ORT actually stored
+(string values; `true`/`42` become `"1"`/`"42"`).
+
+## CUDA execution provider options
+
+When CUDA is enabled, the `cuda` field accepts either a boolean / integer (legacy shorthand) or an object that maps to
+[CUDA Execution Provider options](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html). The
+server forwards the object to ORT via `UpdateCUDAProviderOptions` in a single batched call (per-key calls trigger a
+sibling-reset quirk in ORT V2). If any key is rejected by ORT, session creation fails with the ORT error message
+identifying the offending key. The response is built from `GetCUDAProviderOptionsAsString` readback, so it reflects
+exactly what ORT stored.
+
+```json
+{
+  "model": "string",
+  "version": "string",
+  "option": {
+    "cuda": {
+      "device_id": 0,
+      "gpu_mem_limit": 2147483648,
+      "arena_extend_strategy": "kNextPowerOfTwo",
+      "cudnn_conv_algo_search": "EXHAUSTIVE",
+      "cudnn_conv_use_max_workspace": true,
+      "do_copy_in_default_stream": true,
+      "enable_cuda_graph": false
+    }
+  }
+}
+```
+
+Backward-compatible shortcuts:
+- `"cuda": true`  — enable CUDA with all defaults (`device_id=0`).
+- `"cuda": 1`     — enable CUDA on `device_id=1`.
+
 For more details on the session creation request, please refer to
 the [API documentation](https://kibae.github.io/onnxruntime-server/swagger/#/ONNX%20Runtime%20Session/createSession).
 
diff --git a/docs/swagger/openapi.yaml b/docs/swagger/openapi.yaml
index 686e2f4..04ed05c 100644
--- a/docs/swagger/openapi.yaml
+++ b/docs/swagger/openapi.yaml
@@ -269,6 +269,11 @@ components:
           $ref: '#/components/schemas/ONNXSessionOption'
     ONNXSessionOption:
       type: object
+      description: |
+        Normalized echo of the options applied to the session. The server only includes
+        keys whose corresponding ORT calls succeeded; values reflect what ORT actually
+        stored (read back via GetCUDAProviderOptionsAsString and GetSessionConfigEntry
+        where applicable).
       nullable: true
       properties:
         cuda:
@@ -276,8 +281,18 @@ components:
           required: false
           oneOf:
             - type: boolean
-              description: Use CUDA
+              description: CUDA disabled (false) — present for backward compatibility.
             - $ref: '#/components/schemas/ONNXSessionOptionCUDA'
+        extensions:
+          type: array
+          description: Registered onnxruntime-extensions library paths in registration order, deduplicated.
+          required: false
+          items:
+            type: string
+          example:
+            - /absolute/path/to/libonnxruntime_extensions.so
+        session_options:
+          $ref: '#/components/schemas/ONNXSessionOptionsGroup'
     ONNXSessionOptionRequest:
       type: object
       nullable: true
@@ -287,11 +302,13 @@ components:
           required: false
           oneOf:
             - type: boolean
-              description: Use CUDA
+              description: Enable CUDA with all defaults (device_id=0).
+            - type: integer
+              description: Enable CUDA on the given device_id.
             - $ref: '#/components/schemas/ONNXSessionOptionCUDA'
         input_shape:
           type: object
-          description: Input shape
+          description: Input shape overrides keyed by input name.
           nullable: false
           required: false
           example: {
@@ -301,25 +318,157 @@ components:
           }
         output_shape:
           type: object
-          description: Output shape
+          description: Output shape overrides keyed by output name.
           nullable: false
           required: false
           example: {
             "output": [ 1, 1 ]
           }
+        extensions:
+          type: array
+          description: |
+            One or more absolute paths to onnxruntime-extensions custom-ops libraries.
+            Each path is registered with ORT in array order; duplicate paths are deduplicated.
+          nullable: false
+          required: false
+          items:
+            type: string
+          example:
+            - /absolute/path/to/libonnxruntime_extensions.so
         ortextensions_path:
           type: string
-          description: To use the ONNXRuntime Extension (Custom Ops Library), you must provide the library path.
+          description: |
+            Deprecated alias for `extensions`. A single library path. The server normalizes
+            it into the `extensions` array on input and the response always echoes the
+            normalized form.
+          deprecated: true
           nullable: false
           required: false
-          example: /absolute/path/to/libonnxruntime_extensions
+          example: /absolute/path/to/libonnxruntime_extensions.so
+        session_options:
+          $ref: '#/components/schemas/ONNXSessionOptionsGroup'
     ONNXSessionOptionCUDA:
       type: object
+      description: |
+        CUDA Execution Provider V2 options. The server forwards every supplied key to
+        UpdateCUDAProviderOptions in a single batched call; if ORT rejects any key the
+        whole session creation fails with the ORT error message. The response is built
+        from GetCUDAProviderOptionsAsString readback, so it shows exactly what ORT
+        stored (which may differ from the requested value if ORT normalized it).
       properties:
         device_id:
           type: integer
           description: CUDA device ID
           nullable: false
+        gpu_mem_limit:
+          type: integer
+          description: Per-session GPU memory limit, in bytes.
+          nullable: false
+        arena_extend_strategy:
+          type: string
+          description: Arena extension strategy, e.g. "kNextPowerOfTwo" or "kSameAsRequested".
+          nullable: false
+        cudnn_conv_algo_search:
+          type: string
+          description: cuDNN convolution algorithm search policy. Accepted values are ORT-defined enum names.
+          nullable: false
+        cudnn_conv_use_max_workspace:
+          type: boolean
+          nullable: false
+        do_copy_in_default_stream:
+          type: boolean
+          nullable: false
+        enable_cuda_graph:
+          type: boolean
+          description: Capture and replay a CUDA graph (requires static input shapes).
+          nullable: false
+        tunable_op_enable:
+          type: boolean
+          nullable: false
+        tunable_op_tuning_enable:
+          type: boolean
+          nullable: false
+        cudnn_conv1d_pad_to_nc1d:
+          type: boolean
+          nullable: false
+      additionalProperties:
+        description: |
+          Any additional CUDA Execution Provider V2 key understood by your ORT build is
+          forwarded as-is. Refer to the ORT CUDA EP documentation for the full list of
+          accepted keys.
+    ONNXSessionOptionsGroup:
+      type: object
+      description: |
+        Session-level options forwarded to onnxruntime SessionOptions. The server only
+        validates JSON shape (types and our enum-string mapping); ORT decides whether the
+        value itself is acceptable. Keys whose ORT setter throws are silently dropped from
+        the echoed response. The `config_entries` object is round-tripped through
+        GetSessionConfigEntry so the echo shows what ORT actually stored (always strings).
+      nullable: false
+      required: false
+      properties:
+        intra_op_num_threads:
+          type: integer
+          description: Number of threads used for parallelizing operators. 0 means ORT default.
+          nullable: false
+        inter_op_num_threads:
+          type: integer
+          description: Number of threads used for parallelizing the graph. 0 means ORT default.
+          nullable: false
+        execution_mode:
+          type: string
+          enum: [sequential, parallel]
+          nullable: false
+        graph_optimization_level:
+          type: string
+          enum: [disable, basic, extended, all]
+          nullable: false
+        enable_cpu_mem_arena:
+          type: boolean
+          nullable: false
+        enable_mem_pattern:
+          type: boolean
+          nullable: false
+        log_severity_level:
+          type: integer
+          description: ORT log severity level (0=verbose ... 4=fatal).
+          nullable: false
+        logid:
+          type: string
+          nullable: false
+        enable_profiling:
+          type: boolean
+          description: Enable profiling. When true, profile_file_prefix must also be supplied.
+          nullable: false
+        profile_file_prefix:
+          type: string
+          nullable: false
+        optimized_model_filepath:
+          type: string
+          description: Filepath where ORT writes the optimized model after graph transformations.
+          nullable: false
+        free_dimension_overrides:
+          type: object
+          description: Map of free dimension name to a fixed integer size.
+          additionalProperties:
+            type: integer
+          nullable: false
+          example:
+            batch: 1
+        config_entries:
+          type: object
+          description: |
+            Generic passthrough to AddSessionConfigEntry (e.g. "session.disable_prepacking").
+            Booleans and integers are stringified before being passed to ORT; values in the
+            response are always strings (round-tripped through GetSessionConfigEntry).
+          additionalProperties:
+            oneOf:
+              - type: string
+              - type: boolean
+              - type: integer
+          nullable: false
+          example:
+            session.disable_prepacking: "1"
     ONNXSessionCreateRequest:
       type: object
       properties:
diff --git a/src/onnx/cuda/session_options.cpp b/src/onnx/cuda/session_options.cpp
index 5cb0603..ef371f0 100644
--- a/src/onnx/cuda/session_options.cpp
+++ b/src/onnx/cuda/session_options.cpp
@@ -3,18 +3,185 @@
 //
 #include "session_options.hpp"
 
+#include <set>
+
+namespace {
+
+std::string to_provider_string(const json &v) {
+	if (v.is_boolean())
+		return v.get<bool>() ? "1" : "0";
+	if (v.is_number_integer())
+		return std::to_string(v.get<int64_t>());
+	if (v.is_number_unsigned())
+		return std::to_string(v.get<uint64_t>());
+	if (v.is_string())
+		return v.get<std::string>();
+	return v.dump();
+}
+
+// Apply all caller-supplied CUDA provider options in a single UpdateCUDAProviderOptions call.
+//
+// Why a single call: ORT V2's UpdateCUDAProviderOptions silently resets sibling keys that share
+// an internal options group (e.g. updating arena_extend_strategy alone reverts gpu_mem_limit to
+// its default). Calling it once with the full key/value set is the only way to apply multiple
+// keys safely. The trade-off is that any single invalid key aborts the whole batch; that is
+// acceptable here because ORT's error message identifies the offending key, so the caller can
+// see exactly what was rejected.
+void update_all(OrtCUDAProviderOptionsV2 *cuda_options, const std::vector<std::string> &keys,
+				const std::vector<std::string> &values) {
+	if (keys.empty())
+		return;
+	std::vector<const char *> ck;
+	std::vector<const char *> cv;
+	ck.reserve(keys.size());
+	cv.reserve(values.size());
+	for (size_t i = 0; i < keys.size(); ++i) {
+		ck.push_back(keys[i].c_str());
+		cv.push_back(values[i].c_str());
+	}
+	OrtStatus *st = Ort::GetApi().UpdateCUDAProviderOptions(cuda_options, ck.data(), cv.data(), ck.size());
+	if (st != nullptr) {
+		const char *err = Ort::GetApi().GetErrorMessage(st);
+		std::string msg = err ? err : "unknown error";
+		Ort::GetApi().ReleaseStatus(st);
+		throw onnxruntime_server::runtime_error(std::string("Failed to update CUDA provider options: ") + msg);
+	}
+}
+
+// Convert the readback value (always a string from ORT) back to the most natural JSON type so
+// the response shape matches what callers typically send: integers as integers, "true"/"false"
+// as booleans, anything else as a string.
+json infer_readback_value(const std::string &raw) {
+	if (raw == "true")
+		return true;
+	if (raw == "false")
+		return false;
+	if (!raw.empty()) {
+		bool numeric = (raw[0] == '-' || (raw[0] >= '0' && raw[0] <= '9'));
+		if (numeric) {
+			for (size_t i = 1; i < raw.size(); ++i) {
+				if (raw[i] < '0' || raw[i] > '9') {
+					numeric = false;
+					break;
+				}
+			}
+			if (numeric) {
+				try {
+					return json(std::stoll(raw));
+				} catch (...) {
+					// fall through to string
+				}
+			}
+		}
+	}
+	return raw;
+}
+
+// Parse "key1=value1;key2=value2" produced by GetCUDAProviderOptionsAsString.
+json parse_options_string(const std::string &s) {
+	json out = json::object();
+	size_t pos = 0;
+	while (pos < s.size()) {
+		auto eq = s.find('=', pos);
+		if (eq == std::string::npos)
+			break;
+		auto sc = s.find(';', eq);
+		if (sc == std::string::npos)
+			sc = s.size();
+		auto k = s.substr(pos, eq - pos);
+		auto v = s.substr(eq + 1, sc - eq - 1);
+		if (!k.empty())
+			out[k] = infer_readback_value(v);
+		pos = sc + 1;
+	}
+	return out;
+}
+
+} // namespace
+
+// Apply CUDA provider options.
+//
+// Validation policy mirrors apply_session_options(): we forward every shape-valid input entry to
+// ORT one key at a time; ORT decides whether to accept it. The echoed object is built from
+// GetCUDAProviderOptionsAsString readback (the ground truth of what ORT stored), filtered to the
+// keys the caller actually supplied (plus device_id, which is always meaningful).
 json append_cuda_session_options(OrtSessionOptions *session_options, const json &option) {
 	auto cuda = option["cuda"];
 
-	json result = json::object();
+	OrtCUDAProviderOptionsV2 *cuda_options = nullptr;
+	Ort::ThrowOnError(Ort::GetApi().CreateCUDAProviderOptions(&cuda_options));
+
+	// Track which keys the caller asked about — these are the keys we will echo from readback.
+	// device_id is always included for backward compatibility with the previous response shape.
+	std::set<std::string> requested_keys;
+	requested_keys.insert("device_id");
 
-	// device_id
-	int device_id = 0;
-	if (cuda.is_object() && cuda.contains("device_id"))
-		device_id = cuda["device_id"].get<int>();
-	result["device_id"] = device_id;
+	std::vector<std::string> keys;
+	std::vector<std::string> values;
+	if (cuda.is_object()) {
+		for (auto it = cuda.begin(); it != cuda.end(); ++it) {
+			keys.push_back(it.key());
+			values.push_back(to_provider_string(it.value()));
+			requested_keys.insert(it.key());
+		}
+	} else if (cuda.is_number_integer()) {
+		keys.push_back("device_id");
+		values.push_back(std::to_string(cuda.get<int>()));
+	}
+	// cuda == true or false: nothing to update; default V2 options are used.
+
+	try {
+		update_all(cuda_options, keys, values);
+	} catch (...) {
+		Ort::GetApi().ReleaseCUDAProviderOptions(cuda_options);
+		throw;
+	}
+
+	OrtStatus *append_status =
+		Ort::GetApi().SessionOptionsAppendExecutionProvider_CUDA_V2(session_options, cuda_options);
+	if (append_status != nullptr) {
+		const char *err = Ort::GetApi().GetErrorMessage(append_status);
+		std::string msg = err ? err : "unknown error";
+		Ort::GetApi().ReleaseStatus(append_status);
+		Ort::GetApi().ReleaseCUDAProviderOptions(cuda_options);
+		throw onnxruntime_server::runtime_error(std::string("Failed to append CUDA EP: ") + msg);
+	}
+
+	// Readback the full options string and echo only the keys the caller cared about.
+	// The whole readback section is wrapped in a try/catch so that an exception in any of the
+	// allocations (std::bad_alloc, json construction) cannot leak the ORT allocator buffer or the
+	// cuda_options handle.
+	json result = json::object();
+	try {
+		OrtAllocator *allocator = nullptr;
+		OrtStatus *alloc_st = Ort::GetApi().GetAllocatorWithDefaultOptions(&allocator);
+		if (alloc_st != nullptr) {
+			Ort::GetApi().ReleaseStatus(alloc_st);
+		} else {
+			char *cstr = nullptr;
+			OrtStatus *st = Ort::GetApi().GetCUDAProviderOptionsAsString(cuda_options, allocator, &cstr);
+			if (st != nullptr) {
+				Ort::GetApi().ReleaseStatus(st);
+			} else if (cstr != nullptr) {
+				try {
+					auto all = parse_options_string(std::string(cstr));
+					for (auto it = all.begin(); it != all.end(); ++it) {
+						if (requested_keys.count(it.key()))
+							result[it.key()] = it.value();
+					}
+				} catch (...) {
+					allocator->Free(allocator, cstr);
+					throw;
+				}
+				allocator->Free(allocator, cstr);
+			}
+		}
+	} catch (...) {
+		Ort::GetApi().ReleaseCUDAProviderOptions(cuda_options);
+		throw;
+	}
 
-	Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, device_id));
+	Ort::GetApi().ReleaseCUDAProviderOptions(cuda_options);
 
 	return result;
 }
diff --git a/src/onnx/session.cpp b/src/onnx/session.cpp
index 66a8ac5..4c9cc29 100644
--- a/src/onnx/session.cpp
+++ b/src/onnx/session.cpp
@@ -10,30 +10,282 @@
 #include "cuda/session_options.hpp"
 #endif
 
-Orts::onnx::session::session(session_key key, const json &option)
-	: session_options(), created_at(std::chrono::system_clock::now()), allocator(), key(std::move(key)) {
-	_option["cuda"] = false;
+namespace {
+
+#ifdef _WIN32
+std::wstring to_wide(const std::string &s) {
+	int size_needed = MultiByteToWideChar(CP_ACP, 0, s.c_str(), -1, NULL, 0);
+	std::wstring wstr(size_needed, 0);
+	MultiByteToWideChar(CP_ACP, 0, s.c_str(), -1, &wstr[0], size_needed);
+	return wstr;
+}
+#endif
+
+void register_extension(Ort::SessionOptions &session_options, const std::string &path) {
+#ifdef _WIN32
+	auto wpath = to_wide(path);
+	auto p = wpath.c_str();
+#else
+	auto p = path.c_str();
+#endif
+	OrtStatus *status = Ort::GetApi().RegisterCustomOpsLibrary_V2(session_options, p);
+	if (status != nullptr) {
+		const char *err = Ort::GetApi().GetErrorMessage(status);
+		std::string msg = err ? err : "unknown error";
+		Ort::GetApi().ReleaseStatus(status);
+		throw onnxruntime_server::runtime_error(
+			std::string("Failed to register ORT extensions (") + path + "): " + msg
+		);
+	}
+}
+
+GraphOptimizationLevel parse_graph_opt_level(const std::string &v, bool &valid) {
+	valid = true;
+	if (v == "disable" || v == "disabled" || v == "off")
+		return ORT_DISABLE_ALL;
+	if (v == "basic")
+		return ORT_ENABLE_BASIC;
+	if (v == "extended")
+		return ORT_ENABLE_EXTENDED;
+	if (v == "all")
+		return ORT_ENABLE_ALL;
+	valid = false;
+	return ORT_ENABLE_ALL;
+}
+
+ExecutionMode parse_execution_mode(const std::string &v, bool &valid) {
+	valid = true;
+	if (v == "parallel")
+		return ORT_PARALLEL;
+	if (v == "sequential")
+		return ORT_SEQUENTIAL;
+	valid = false;
+	return ORT_SEQUENTIAL;
+}
+
+// Apply session-level options.
+//
+// Validation policy: we only check JSON shape (types, our enum string mapping). We do NOT
+// re-implement ORT's own value validation (allowed ranges, defaults, etc.) — that knowledge
+// belongs to ORT and would force us to track every ORT version's rules. Instead, every shape-
+// valid value is forwarded to ORT, and the setter's outcome decides the echo:
+//   - setter succeeds  -> echo the value (or the readback value where an API exists)
+//   - setter throws    -> skip silently, do not echo (the option was rejected by ORT)
+// Where ORT exposes a readback (currently config_entries via GetSessionConfigEntry), the echo
+// uses the readback value so it reflects what ORT actually stored, not what we sent.
+template <typename F>
+bool try_apply(F &&f) {
+	try {
+		f();
+		return true;
+	} catch (const Ort::Exception &) {
+		return false;
+	} catch (const std::exception &) {
+		return false;
+	}
+}
+
+json apply_session_options(Ort::SessionOptions &session_options, const json &input) {
+	json applied = json::object();
+	if (!input.is_object())
+		return applied;
+
+	if (input.contains("intra_op_num_threads") && input["intra_op_num_threads"].is_number_integer()) {
+		auto v = input["intra_op_num_threads"].get<int>();
+		if (try_apply([&] { session_options.SetIntraOpNumThreads(v); }))
+			applied["intra_op_num_threads"] = v;
+	}
+
+	if (input.contains("inter_op_num_threads") && input["inter_op_num_threads"].is_number_integer()) {
+		auto v = input["inter_op_num_threads"].get<int>();
+		if (try_apply([&] { session_options.SetInterOpNumThreads(v); }))
+			applied["inter_op_num_threads"] = v;
+	}
+
+	if (input.contains("execution_mode") && input["execution_mode"].is_string()) {
+		bool valid = false;
+		auto s = input["execution_mode"].get<std::string>();
+		auto mode = parse_execution_mode(s, valid);
+		if (valid && try_apply([&] { session_options.SetExecutionMode(mode); }))
+			applied["execution_mode"] = (mode == ORT_PARALLEL) ? "parallel" : "sequential";
+	}
+
+	if (input.contains("graph_optimization_level") && input["graph_optimization_level"].is_string()) {
+		bool valid = false;
+		auto s = input["graph_optimization_level"].get<std::string>();
+		auto lvl = parse_graph_opt_level(s, valid);
+		if (valid && try_apply([&] { session_options.SetGraphOptimizationLevel(lvl); }))
+			applied["graph_optimization_level"] = s;
+	}
+
+	if (input.contains("enable_cpu_mem_arena") && input["enable_cpu_mem_arena"].is_boolean()) {
+		auto v = input["enable_cpu_mem_arena"].get<bool>();
+		if (try_apply([&] {
+				if (v)
+					session_options.EnableCpuMemArena();
+				else
+					session_options.DisableCpuMemArena();
+			}))
+			applied["enable_cpu_mem_arena"] = v;
+	}
 
-	if (option.contains("ortextensions_path") && option["ortextensions_path"].is_string()) {
-		auto ext_path_str = option["ortextensions_path"].get<std::string>();
+	if (input.contains("enable_mem_pattern") && input["enable_mem_pattern"].is_boolean()) {
+		auto v = input["enable_mem_pattern"].get<bool>();
+		if (try_apply([&] {
+				if (v)
+					session_options.EnableMemPattern();
+				else
+					session_options.DisableMemPattern();
+			}))
+			applied["enable_mem_pattern"] = v;
+	}
+
+	if (input.contains("log_severity_level") && input["log_severity_level"].is_number_integer()) {
+		auto v = input["log_severity_level"].get<int>();
+		if (try_apply([&] { session_options.SetLogSeverityLevel(v); }))
+			applied["log_severity_level"] = v;
+	}
+
+	if (input.contains("logid") && input["logid"].is_string()) {
+		auto v = input["logid"].get<std::string>();
+		if (try_apply([&] { session_options.SetLogId(v.c_str()); }))
+			applied["logid"] = v;
+	}
+
+	if (input.contains("enable_profiling") && input["enable_profiling"].is_boolean() &&
+		input["enable_profiling"].get<bool>()) {
+		std::string prefix;
+		if (input.contains("profile_file_prefix") && input["profile_file_prefix"].is_string())
+			prefix = input["profile_file_prefix"].get<std::string>();
+		bool ok = try_apply([&] {
 #ifdef _WIN32
-		int size_needed = MultiByteToWideChar(CP_ACP, 0, ext_path_str.c_str(), -1, NULL, 0);
-		std::wstring wstr(size_needed, 0);
-		MultiByteToWideChar(CP_ACP, 0, ext_path_str.c_str(), -1, &wstr[0], size_needed);
+			auto wprefix = to_wide(prefix);
+			session_options.EnableProfiling(wprefix.c_str());
+#else
+			session_options.EnableProfiling(prefix.c_str());
+#endif
+		});
+		if (ok) {
+			applied["enable_profiling"] = true;
+			applied["profile_file_prefix"] = prefix;
+		}
+	}
 
-		auto ext_path = wstr.c_str();
+	if (input.contains("optimized_model_filepath") && input["optimized_model_filepath"].is_string()) {
+		auto s = input["optimized_model_filepath"].get<std::string>();
+		bool ok = try_apply([&] {
+#ifdef _WIN32
+			auto ws = to_wide(s);
+			session_options.SetOptimizedModelFilePath(ws.c_str());
 #else
-		auto ext_path = ext_path_str.c_str();
+			session_options.SetOptimizedModelFilePath(s.c_str());
 #endif
-		OrtStatus *status = Ort::GetApi().RegisterCustomOpsLibrary_V2(session_options, ext_path);
-		if (status != nullptr) {
-			const char *err = Ort::GetApi().GetErrorMessage(status);
-			std::string msg = err ? err : "unknown error";
-			Ort::GetApi().ReleaseStatus(status);
-			throw runtime_error(std::string("Failed to register ORT extensions: ") + msg);
+		});
+		if (ok)
+			applied["optimized_model_filepath"] = s;
+	}
+
+	if (input.contains("free_dimension_overrides") && input["free_dimension_overrides"].is_object()) {
+		json normalized = json::object();
+		for (auto it = input["free_dimension_overrides"].begin();
+			 it != input["free_dimension_overrides"].end(); ++it) {
+			if (!it.value().is_number_integer())
+				continue;
+			auto dim = it.value().get<int64_t>();
+			auto name = it.key();
+			if (try_apply([&] { session_options.AddFreeDimensionOverrideByName(name.c_str(), dim); }))
+				normalized[name] = dim;
 		}
+		if (!normalized.empty())
+			applied["free_dimension_overrides"] = normalized;
+	}
+
+	// config_entries: AddSessionConfigEntry accepts any string key, so we readback each entry
+	// via GetSessionConfigEntry to ensure the echo reflects what ORT actually stored.
+	if (input.contains("config_entries") && input["config_entries"].is_object()) {
+		json normalized = json::object();
+		for (auto it = input["config_entries"].begin(); it != input["config_entries"].end(); ++it) {
+			std::string sv;
+			if (it.value().is_string())
+				sv = it.value().get<std::string>();
+			else if (it.value().is_boolean())
+				sv = it.value().get<bool>() ? "1" : "0";
+			else if (it.value().is_number_integer())
+				sv = std::to_string(it.value().get<int64_t>());
+			else
+				continue;
+			auto key = it.key();
+			if (!try_apply([&] { session_options.AddConfigEntry(key.c_str(), sv.c_str()); }))
+				continue;
+
+			size_t needed = 0;
+			OrtStatus *st = Ort::GetApi().GetSessionConfigEntry(
+				session_options, key.c_str(), nullptr, &needed
+			);
+			if (st != nullptr) {
+				Ort::GetApi().ReleaseStatus(st);
+				continue;
+			}
+			std::string out(needed, '\0');
+			st = Ort::GetApi().GetSessionConfigEntry(
+				session_options, key.c_str(), out.data(), &needed
+			);
+			if (st != nullptr) {
+				Ort::GetApi().ReleaseStatus(st);
+				continue;
+			}
+			if (!out.empty() && out.back() == '\0')
+				out.pop_back();
+			normalized[key] = out;
+		}
+		if (!normalized.empty())
+			applied["config_entries"] = normalized;
+	}
+
+	return applied;
+}
+
+} // namespace
+
+json Orts::onnx::session::collect_extensions(const json &option) {
+	json result = json::array();
+	if (!option.is_object())
+		return result;
+	auto add = [&](const std::string &path) {
+		for (auto &e : result) {
+			if (e.is_string() && e.get<std::string>() == path)
+				return;
+		}
+		result.push_back(path);
+	};
+	if (option.contains("extensions") && option["extensions"].is_array()) {
+		for (auto &e : option["extensions"]) {
+			if (e.is_string())
+				add(e.get<std::string>());
+		}
+	}
+	if (option.contains("ortextensions_path") && option["ortextensions_path"].is_string())
+		add(option["ortextensions_path"].get<std::string>());
+	return result;
+}
+
+Orts::onnx::session::session(session_key key, const json &option)
+	: session_options(), created_at(std::chrono::system_clock::now()), allocator(), key(std::move(key)) {
+	_option["cuda"] = false;
+
+	// session-level options (apply before EP/extension registration)
+	if (option.contains("session_options") && option["session_options"].is_object()) {
+		auto applied = apply_session_options(session_options, option["session_options"]);
+		if (!applied.empty())
+			_option["session_options"] = applied;
+	}
 
-		_option["ortextensions_path"] = option["ortextensions_path"];
+	// register custom op libraries: extensions array + legacy ortextensions_path, deduplicated
+	auto extensions = collect_extensions(option);
+	if (!extensions.empty()) {
+		for (auto &e : extensions)
+			register_extension(session_options, e.get<std::string>());
+		_option["extensions"] = extensions;
 	}
 
 	if (providers::available_providers.has_cuda() && option.contains("cuda") && (
diff --git a/src/onnx/session_key_with_option.cpp b/src/onnx/session_key_with_option.cpp
index 8d2dee6..9cd711b 100644
--- a/src/onnx/session_key_with_option.cpp
+++ b/src/onnx/session_key_with_option.cpp
@@ -6,18 +6,88 @@
 
 #include "../onnxruntime_server.hpp"
 
+namespace {
+
 std::regex space_re(R"(\s+)");
 std::regex trim_re(R"(^\s*|\s*$)");
 
 std::string key_rule = R"(([-_a-zA-Z0-9]+):([-_/a-zA-Z0-9]+)(\(([^)]+)\))?)";
 std::regex key_re(key_rule);
 
-std::string option_rule = R"(([_a-zA-Z0-9]+)\s*=\s*([^,\s]+))";
+// option key supports dotted notation (e.g. cuda.device_id, session_options.intra_op_num_threads)
+std::string option_rule = R"(([_a-zA-Z0-9][_a-zA-Z0-9.]*)\s*=\s*([^,\s]+))";
 std::regex option_re(option_rule);
 
+std::regex int_re(R"(^-?[0-9]+$)");
+
+const std::string EXTENSIONS_KEY = "extensions";
+const std::string LEGACY_EXTENSION_KEY = "ortextensions_path";
+
+json infer_value(const std::string &raw) {
+	if (raw == "true")
+		return true;
+	if (raw == "false")
+		return false;
+	if (std::regex_match(raw, int_re)) {
+		try {
+			return json(std::stoll(raw));
+		} catch (...) {
+			return raw;
+		}
+	}
+	return raw;
+}
+
+std::vector<std::string> split_dot(const std::string &k) {
+	std::vector<std::string> parts;
+	std::string cur;
+	for (char c : k) {
+		if (c == '.') {
+			if (!cur.empty())
+				parts.push_back(cur);
+			cur.clear();
+		} else {
+			cur += c;
+		}
+	}
+	if (!cur.empty())
+		parts.push_back(cur);
+	return parts;
+}
+
+void set_nested(json &option, const std::vector<std::string> &path, const json &value) {
+	json *cur = &option;
+	for (size_t i = 0; i + 1 < path.size(); ++i) {
+		if (!cur->is_object())
+			*cur = json::object();
+		if (!cur->contains(path[i]) || !(*cur)[path[i]].is_object())
+			(*cur)[path[i]] = json::object();
+		cur = &(*cur)[path[i]];
+	}
+	if (!cur->is_object())
+		*cur = json::object();
+	(*cur)[path.back()] = value;
+}
+
+void append_extension(json &option, const std::string &path) {
+	if (!option.contains(EXTENSIONS_KEY) || !option[EXTENSIONS_KEY].is_array())
+		option[EXTENSIONS_KEY] = json::array();
+	auto &arr = option[EXTENSIONS_KEY];
+	for (auto &e : arr) {
+		if (e.is_string() && e.get<std::string>() == path)
+			return;
+	}
+	arr.push_back(path);
+}
+
+} // namespace
+
 std::vector<Orts::onnx::session_key_with_option>
 onnxruntime_server::onnx::session_key_with_option::parse(const std::string &model_key_list) {
-	// model_key_list is a space separated list of model_name:model_version
+	// model_key_list is a space separated list of model_name:model_version[(opt1=val1, opt2=val2)]
+	// option keys may be dotted (cuda.device_id, session_options.intra_op_num_threads) producing nested objects.
+	// extensions/ortextensions_path keys accumulate into an "extensions" array (deduplicated).
+	// option entries that don't match the grammar are silently skipped.
 	std::vector<Orts::onnx::session_key_with_option> models;
 	std::string list = std::regex_replace(std::regex_replace(model_key_list, space_re, " "), trim_re, "");
 	if (list.empty())
@@ -27,20 +97,26 @@ onnxruntime_server::onnx::session_key_with_option::parse(const std::string &mode
 	while (std::regex_search(list, keys, key_re)) {
 		json option = json::object();
 
-		// parse option
 		auto option_str = keys[4].str();
 		if (!option_str.empty()) {
 			std::smatch options;
 			while (std::regex_search(option_str, options, option_re)) {
-				auto option_key = options[1].str();
-				auto option_val = options[2].str();
-
-				// cuda option: device_id or true/false
-				if (option_key == "cuda") {
-					if (option_val == "true" || option_val == "false")
-						option[option_key] = option_val == "true";
-					else
-						option[option_key] = std::stoi(option_val);
+				auto raw_key = options[1].str();
+				auto raw_val = options[2].str();
+				auto value = infer_value(raw_val);
+
+				auto parts = split_dot(raw_key);
+				if (parts.empty()) {
+					option_str = options.suffix().str();
+					continue;
+				}
+
+				if (parts.size() == 1 &&
+					(parts[0] == EXTENSIONS_KEY || parts[0] == LEGACY_EXTENSION_KEY) &&
+					value.is_string()) {
+					append_extension(option, value.get<std::string>());
+				} else {
+					set_nested(option, parts, value);
 				}
 
 				option_str = options.suffix().str();
diff --git a/src/onnxruntime_server.hpp b/src/onnxruntime_server.hpp
index ba6873b..629507b 100644
--- a/src/onnxruntime_server.hpp
+++ b/src/onnxruntime_server.hpp
@@ -138,6 +138,11 @@ namespace onnxruntime_server {
 
 			[[nodiscard]] const std::vector<value_info> &inputs() const;
 			[[nodiscard]] const std::vector<value_info> &outputs() const;
+
+			// Normalize the extensions input (the new "extensions" array and the legacy
+			// "ortextensions_path" string) into a single deduplicated array of paths in the
+			// order they would be registered. Pure function; no file system or onnxruntime calls.
+			static json collect_extensions(const json &option);
 		};
 
 		typedef std::shared_ptr<session> session_ptr;
diff --git a/src/test/unit/unit_test_context_cuda.cpp b/src/test/unit/unit_test_context_cuda.cpp
index 077e401..42bfdfa 100644
--- a/src/test/unit/unit_test_context_cuda.cpp
+++ b/src/test/unit/unit_test_context_cuda.cpp
@@ -4,6 +4,8 @@
 #include "../../onnxruntime_server.hpp"
 #include "../test_common.hpp"
 
+// End-to-end smoke test: build a session on the CUDA EP with the simplest "cuda": true input,
+// run the BERT SQuAD sample model, and assert the output tensor shape.
 TEST(test_onnxruntime_server_context_cuda, BertSquadModelTest) {
 	Orts::onnx::session_key key("sample", "2");
 	auto session = std::make_shared<Orts::onnx::session>(key, model2_path.string(), json::parse(R"({"cuda": true})"));
@@ -31,3 +33,58 @@ TEST(test_onnxruntime_server_context_cuda, BertSquadModelTest) {
 	std::cout << json.dump(4) << "\n";
 	ASSERT_EQ(json["output"].size(), 3);
 }
+
+// CUDA EP V2 options passed as a "cuda" object are forwarded to ORT in a single batched
+// UpdateCUDAProviderOptions call (ORT silently resets sibling keys when called per-key, so a
+// single batched call is the only safe way). The echoed value comes from
+// GetCUDAProviderOptionsAsString readback, i.e. what ORT actually stored. Every supplied key
+// that ORT accepted should round-trip; if ORT had rejected any of them the whole session
+// construction would have thrown rather than silently returning a partial echo.
+TEST(test_onnxruntime_server_context_cuda, CudaObjectOptionsEcho) {
+	Orts::onnx::session_key key("sample", "2");
+	auto session = std::make_shared<Orts::onnx::session>(
+		key, model2_path.string(),
+		R"({
+			"cuda": {
+				"device_id": 0,
+				"gpu_mem_limit": 2147483648,
+				"arena_extend_strategy": "kNextPowerOfTwo",
+				"cudnn_conv_algo_search": "HEURISTIC"
+			}
+		})"_json
+	);
+	auto j = session->to_json();
+	ASSERT_TRUE(j["option"]["cuda"].is_object());
+	auto cu = j["option"]["cuda"];
+
+	ASSERT_EQ(cu["device_id"], 0);
+	ASSERT_EQ(cu["gpu_mem_limit"], 2147483648);
+	ASSERT_EQ(cu["arena_extend_strategy"], "kNextPowerOfTwo");
+	ASSERT_EQ(cu["cudnn_conv_algo_search"], "HEURISTIC");
+}
+
+// An unknown CUDA option key (or one ORT cannot parse) must abort session construction with a
+// clear error rather than silently producing a partial echo. This is the natural consequence of
+// the batched-update strategy and is the contract callers can rely on.
+TEST(test_onnxruntime_server_context_cuda, CudaObjectRejectsUnknownKey) {
+	Orts::onnx::session_key key("sample", "2");
+	EXPECT_ANY_THROW(
+		auto session = std::make_shared<Orts::onnx::session>(
+			key, model2_path.string(),
+			R"({"cuda": {"device_id": 0, "totally_not_a_real_cuda_option": "xyz"}})"_json
+		);
+	);
+}
+
+// Backward compatibility: the legacy scalar shortcuts ("cuda": true and "cuda": <int device_id>)
+// must keep working under the V2 EP path and still echo as a normalized object with device_id.
+TEST(test_onnxruntime_server_context_cuda, CudaScalarShortcutStillWorks) {
+	// Backward compat: cuda=true (boolean) and cuda=<int device_id> must keep working
+	Orts::onnx::session_key key("sample", "2");
+	auto session_bool = std::make_shared<Orts::onnx::session>(
+		key, model2_path.string(), R"({"cuda": true})"_json
+	);
+	auto j_bool = session_bool->to_json();
+	ASSERT_TRUE(j_bool["option"]["cuda"].is_object());
+	ASSERT_EQ(j_bool["option"]["cuda"]["device_id"], 0);
+}
diff --git a/src/test/unit/unit_test_session.cpp b/src/test/unit/unit_test_session.cpp
index 266ce74..69a39ff 100644
--- a/src/test/unit/unit_test_session.cpp
+++ b/src/test/unit/unit_test_session.cpp
@@ -4,6 +4,8 @@
 #include "../../onnxruntime_server.hpp"
 #include "../test_common.hpp"
 
+// input_shape / output_shape options must override the model's dynamic dimensions with the
+// supplied static values, and any shape whose rank does not match the model's must be rejected.
 TEST(unit_test_session, SesionWithShapeOption) {
 	Orts::onnx::session_key key("sample", "1");
 	const auto session1 = std::make_shared<Orts::onnx::session>(key, model1_path.string());
@@ -45,6 +47,207 @@ TEST(unit_test_session, SesionWithShapeOption) {
 	);
 }
 
+// Each key in the session_options group (threads, execution mode, graph optimization level,
+// memory, logging, config_entries) must be applied to onnxruntime's SessionOptions and echoed
+// back in option.session_options in a normalized form.
+TEST(unit_test_session, SessionWithSessionOptions) {
+	Orts::onnx::session_key key("sample", "1");
+	auto session = std::make_shared<Orts::onnx::session>(
+		key, model1_path.string(),
+		R"({
+			"session_options": {
+				"intra_op_num_threads": 2,
+				"inter_op_num_threads": 1,
+				"execution_mode": "sequential",
+				"graph_optimization_level": "all",
+				"enable_cpu_mem_arena": false,
+				"enable_mem_pattern": true,
+				"logid": "test-session",
+				"log_severity_level": 3,
+				"config_entries": {
+					"session.disable_prepacking": "1"
+				}
+			}
+		})"_json
+	);
+	auto j = session->to_json();
+	ASSERT_TRUE(j["option"].contains("session_options"));
+	auto so = j["option"]["session_options"];
+	ASSERT_EQ(so["intra_op_num_threads"], 2);
+	ASSERT_EQ(so["inter_op_num_threads"], 1);
+	ASSERT_EQ(so["execution_mode"], "sequential");
+	ASSERT_EQ(so["graph_optimization_level"], "all");
+	ASSERT_EQ(so["enable_cpu_mem_arena"], false);
+	ASSERT_EQ(so["enable_mem_pattern"], true);
+	ASSERT_EQ(so["logid"], "test-session");
+	ASSERT_EQ(so["log_severity_level"], 3);
+	ASSERT_EQ(so["config_entries"]["session.disable_prepacking"], "1");
+}
+
+// Type-mismatched values (e.g. string for an int field), enum strings outside our mapping, and
+// keys we do not pass to ORT at all must be silently dropped from the echo. Sibling entries that
+// pass our shape check and our enum mapping are still applied and echoed. Note that ORT's own
+// validity checks (e.g. allowed numeric ranges) are intentionally not duplicated here; we only
+// validate JSON shape and our enum string -> ORT enum mapping.
+TEST(unit_test_session, SessionOptionsIgnoresInvalidEntries) {
+	// Bad types or unknown keys under session_options are silently dropped; valid ones still apply.
+	Orts::onnx::session_key key("sample", "1");
+	auto session = std::make_shared<Orts::onnx::session>(
+		key, model1_path.string(),
+		R"({
+			"session_options": {
+				"intra_op_num_threads": "not-a-number",
+				"graph_optimization_level": "absurd-level",
+				"execution_mode": "weird",
+				"logid": "still-applies",
+				"totally_unknown_key": "ignore-me"
+			}
+		})"_json
+	);
+	auto j = session->to_json();
+	ASSERT_TRUE(j["option"].contains("session_options"));
+	auto so = j["option"]["session_options"];
+	ASSERT_FALSE(so.contains("intra_op_num_threads"));
+	ASSERT_FALSE(so.contains("graph_optimization_level"));
+	ASSERT_FALSE(so.contains("execution_mode"));
+	ASSERT_FALSE(so.contains("totally_unknown_key"));
+	ASSERT_EQ(so["logid"], "still-applies");
+}
+
+// AddSessionConfigEntry round-trips through GetSessionConfigEntry. The echo therefore reflects
+// what ORT actually stored, which proves the bool/int -> string conversion the server performs
+// before forwarding to ORT (true -> "1", 42 -> "42") matches what ORT will return on lookup.
+TEST(unit_test_session, SessionOptionsConfigEntriesReadback) {
+	Orts::onnx::session_key key("sample", "1");
+	auto session = std::make_shared<Orts::onnx::session>(
+		key, model1_path.string(),
+		R"({
+			"session_options": {
+				"config_entries": {
+					"key.string": "hello",
+					"key.bool": true,
+					"key.int": 42
+				}
+			}
+		})"_json
+	);
+	auto j = session->to_json();
+	auto ce = j["option"]["session_options"]["config_entries"];
+	ASSERT_EQ(ce["key.string"], "hello");
+	ASSERT_EQ(ce["key.bool"], "1");
+	ASSERT_EQ(ce["key.int"], "42");
+}
+
+// free_dimension_overrides has no readback API; AddFreeDimensionOverrideByName accepts any name
+// without raising, so the echo just confirms what we asked ORT to store. Whether a name actually
+// matches a model dimension is decided later at session creation time and is ORT's concern, not
+// ours. Non-integer values are dropped at our shape-check stage.
+TEST(unit_test_session, SessionOptionsFreeDimensionOverrides) {
+	Orts::onnx::session_key key("sample", "1");
+	auto session = std::make_shared<Orts::onnx::session>(
+		key, model1_path.string(),
+		R"({
+			"session_options": {
+				"free_dimension_overrides": {
+					"batch": 1,
+					"seq": 128,
+					"bad": "not-an-int"
+				}
+			}
+		})"_json
+	);
+	auto j = session->to_json();
+	ASSERT_TRUE(j["option"]["session_options"].contains("free_dimension_overrides"));
+	auto fd = j["option"]["session_options"]["free_dimension_overrides"];
+	ASSERT_EQ(fd["batch"], 1);
+	ASSERT_EQ(fd["seq"], 128);
+	ASSERT_FALSE(fd.contains("bad"));
+}
+
+// session::collect_extensions normalizes both the new "extensions" array and the legacy
+// "ortextensions_path" string into a single ordered, deduplicated array of paths in the order
+// they would be registered. Pure-function checks here cover what session construction would
+// actually attempt to register, without needing a loadable shared library on disk.
+TEST(unit_test_session, CollectExtensionsNormalization) {
+	using S = Orts::onnx::session;
+
+	// Empty / missing input yields an empty array.
+	ASSERT_EQ(S::collect_extensions(json::object()), json::array());
+	ASSERT_EQ(S::collect_extensions(R"({"extensions": []})"_json), json::array());
+
+	// Bare extensions array, single element.
+	auto only_array = S::collect_extensions(R"({"extensions": ["/lib1.so"]})"_json);
+	ASSERT_EQ(only_array, json::array({"/lib1.so"}));
+
+	// Multiple entries preserve input order.
+	auto ordered = S::collect_extensions(R"({"extensions": ["/lib1.so", "/lib2.so", "/lib3.so"]})"_json);
+	ASSERT_EQ(ordered, json::array({"/lib1.so", "/lib2.so", "/lib3.so"}));
+
+	// Duplicates within the extensions array are dropped, first occurrence wins.
+	auto deduped = S::collect_extensions(R"({"extensions": ["/lib.so", "/lib.so", "/lib.so"]})"_json);
+	ASSERT_EQ(deduped, json::array({"/lib.so"}));
+
+	// Legacy ortextensions_path alone is normalized into the extensions array.
+	auto only_legacy = S::collect_extensions(R"({"ortextensions_path": "/legacy.so"})"_json);
+	ASSERT_EQ(only_legacy, json::array({"/legacy.so"}));
+
+	// Legacy is appended after the extensions array, with dedupe across both sources.
+	auto mixed = S::collect_extensions(
+		R"({"extensions": ["/a.so", "/b.so"], "ortextensions_path": "/a.so"})"_json
+	);
+	ASSERT_EQ(mixed, json::array({"/a.so", "/b.so"}));
+
+	// Legacy path that is not in the extensions array is appended at the end.
+	auto mixed_distinct = S::collect_extensions(
+		R"({"extensions": ["/a.so"], "ortextensions_path": "/b.so"})"_json
+	);
+	ASSERT_EQ(mixed_distinct, json::array({"/a.so", "/b.so"}));
+
+	// Non-string entries inside the extensions array are silently ignored.
+	auto with_garbage = S::collect_extensions(
+		R"({"extensions": ["/a.so", 42, null, {"x":1}, "/b.so"]})"_json
+	);
+	ASSERT_EQ(with_garbage, json::array({"/a.so", "/b.so"}));
+
+	// extensions field with a wrong type is treated as if absent.
+	ASSERT_EQ(S::collect_extensions(R"({"extensions": "/lib.so"})"_json), json::array());
+	ASSERT_EQ(S::collect_extensions(R"({"extensions": 123})"_json), json::array());
+
+	// ortextensions_path with a wrong type is also ignored.
+	ASSERT_EQ(S::collect_extensions(R"({"ortextensions_path": 123})"_json), json::array());
+
+	// Non-object option input does not crash and yields an empty array.
+	ASSERT_EQ(S::collect_extensions(json("/raw-string")), json::array());
+	ASSERT_EQ(S::collect_extensions(json(nullptr)), json::array());
+}
+
+// At session construction time, both the new "extensions" array and the legacy "ortextensions_path"
+// string must reach the registration path: an unloadable library must surface as a clear
+// runtime_error instead of being silently dropped. (The actual successful registration path
+// requires a real onnxruntime_extensions shared library on disk and is not exercised here;
+// the normalization that drives it is fully covered by the CollectExtensionsNormalization test
+// above and by the parse-level tests below.)
+TEST(unit_test_session, ExtensionsRegistrationFailsLoudly) {
+	Orts::onnx::session_key key("sample", "1");
+	EXPECT_ANY_THROW(
+		auto session = std::make_shared<Orts::onnx::session>(
+			key, model1_path.string(),
+			R"({"ortextensions_path": "/nonexistent/path/to/lib.so"})"_json
+		);
+	);
+	EXPECT_ANY_THROW(
+		auto session = std::make_shared<Orts::onnx::session>(
+			key, model1_path.string(),
+			R"({"extensions": ["/nonexistent/path/to/lib.so"]})"_json
+		);
+	);
+}
+
+// Cover the option string grammar end-to-end: empty/whitespace input, malformed model keys that
+// must throw, well-formed lists with various spacing, the legacy scalar cuda shortcut, dotted
+// notation producing nested objects, repeated "extensions" keys accumulating into a deduped
+// array, the legacy ortextensions_path normalization, value type inference (bool/int/string),
+// pass-through of unknown keys, and lenient skipping of malformed option entries.
 TEST(unit_test_session_key, Parse) {
 	// empty cases
 	std::string empty_cases[] = {"", " ", "\n", "\r\n", "\n \n", " \r \n \r \n "};
@@ -92,4 +295,78 @@ TEST(unit_test_session_key, Parse) {
 
 	auto parse_case4 = Orts::onnx::session_key_with_option::parse("model:version(cuda=true)");
 	ASSERT_TRUE(parse_case4[0].option["cuda"]);
+
+	// dotted notation produces nested objects
+	auto parse_dotted_cuda = Orts::onnx::session_key_with_option::parse("model:version(cuda.device_id=0)");
+	ASSERT_TRUE(parse_dotted_cuda[0].option["cuda"].is_object());
+	ASSERT_EQ(parse_dotted_cuda[0].option["cuda"]["device_id"], 0);
+
+	auto parse_dotted_session = Orts::onnx::session_key_with_option::parse(
+		"model:version(session_options.intra_op_num_threads=4, session_options.graph_optimization_level=all)"
+	);
+	ASSERT_TRUE(parse_dotted_session[0].option["session_options"].is_object());
+	ASSERT_EQ(parse_dotted_session[0].option["session_options"]["intra_op_num_threads"], 4);
+	ASSERT_EQ(parse_dotted_session[0].option["session_options"]["graph_optimization_level"], "all");
+
+	// scalar followed by dotted on the same group: dotted wins (scalar discarded)
+	auto parse_scalar_then_dotted = Orts::onnx::session_key_with_option::parse(
+		"model:version(cuda=true, cuda.device_id=1)"
+	);
+	ASSERT_TRUE(parse_scalar_then_dotted[0].option["cuda"].is_object());
+	ASSERT_EQ(parse_scalar_then_dotted[0].option["cuda"]["device_id"], 1);
+
+	// extensions key accumulates as an array
+	auto parse_extensions = Orts::onnx::session_key_with_option::parse(
+		"model:version(extensions=/lib1.so, extensions=/lib2.so)"
+	);
+	ASSERT_TRUE(parse_extensions[0].option["extensions"].is_array());
+	ASSERT_EQ(parse_extensions[0].option["extensions"].size(), 2);
+	ASSERT_EQ(parse_extensions[0].option["extensions"][0], "/lib1.so");
+	ASSERT_EQ(parse_extensions[0].option["extensions"][1], "/lib2.so");
+
+	// extensions dedupe
+	auto parse_extensions_dedup = Orts::onnx::session_key_with_option::parse(
+		"model:version(extensions=/lib.so, extensions=/lib.so)"
+	);
+	ASSERT_EQ(parse_extensions_dedup[0].option["extensions"].size(), 1);
+
+	// legacy ortextensions_path is normalized into the extensions array
+	auto parse_legacy_ext = Orts::onnx::session_key_with_option::parse(
+		"model:version(ortextensions_path=/usr/local/lib/libortextensions.so)"
+	);
+	ASSERT_FALSE(parse_legacy_ext[0].option.contains("ortextensions_path"));
+	ASSERT_TRUE(parse_legacy_ext[0].option["extensions"].is_array());
+	ASSERT_EQ(parse_legacy_ext[0].option["extensions"].size(), 1);
+	ASSERT_EQ(parse_legacy_ext[0].option["extensions"][0], "/usr/local/lib/libortextensions.so");
+
+	// extensions + legacy ortextensions_path mixed, with dedupe
+	auto parse_mixed_ext = Orts::onnx::session_key_with_option::parse(
+		"model:version(extensions=/a.so, ortextensions_path=/a.so, extensions=/b.so)"
+	);
+	ASSERT_EQ(parse_mixed_ext[0].option["extensions"].size(), 2);
+
+	// value type inference: bool, int, string
+	auto parse_types = Orts::onnx::session_key_with_option::parse(
+		"model:version(session_options.enable_cpu_mem_arena=false, "
+		"session_options.intra_op_num_threads=8, "
+		"session_options.logid=my-model)"
+	);
+	ASSERT_EQ(parse_types[0].option["session_options"]["enable_cpu_mem_arena"], false);
+	ASSERT_EQ(parse_types[0].option["session_options"]["intra_op_num_threads"], 8);
+	ASSERT_EQ(parse_types[0].option["session_options"]["logid"], "my-model");
+
+	// unknown / unrecognized option keys pass through silently (caller decides what to do)
+	auto parse_unknown = Orts::onnx::session_key_with_option::parse(
+		"model:version(some_unknown_key=hello, another.deep.key=42)"
+	);
+	ASSERT_EQ(parse_unknown[0].option["some_unknown_key"], "hello");
+	ASSERT_EQ(parse_unknown[0].option["another"]["deep"]["key"], 42);
+
+	// malformed option entries inside parens are silently skipped, well-formed ones still apply
+	auto parse_malformed_options = Orts::onnx::session_key_with_option::parse(
+		"model:version(=garbage, cuda=1, !!!, session_options.intra_op_num_threads=2)"
+	);
+	ASSERT_EQ(parse_malformed_options.size(), 1);
+	ASSERT_EQ(parse_malformed_options[0].option["cuda"], 1);
+	ASSERT_EQ(parse_malformed_options[0].option["session_options"]["intra_op_num_threads"], 2);
 }