@@ -16,6 +16,7 @@ import { ConnectionOptions } from '../contracts/IDBSQLClient';
1616import { InternalConnectionOptions } from '../contracts/InternalConnectionOptions' ;
1717import AuthenticationError from '../errors/AuthenticationError' ;
1818import HiveDriverError from '../errors/HiveDriverError' ;
19+ import { buildUserAgentString } from '../utils' ;
1920
2021/**
2122 * Default local listener port for the U2M authorization-code callback.
@@ -113,12 +114,54 @@ export interface SeaTlsOptions {
113114 * `customCaCert` over disabling verification entirely.
114115 */
115116 checkServerCertificate ?: boolean ;
117+ /**
118+ * Verify the server certificate's hostname (hostname-vs-SNI), independently
119+ * of chain validation. Omit ⇒ kernel default (on). `false` skips only the
120+ * hostname check. No-op when `checkServerCertificate` is `false`. Mirrors
121+ * the kernel napi `checkServerCertificateHostname` / Python
122+ * `tls_verify_hostname`.
123+ */
124+ checkServerCertificateHostname ?: boolean ;
116125 /** PEM-encoded CA bytes to add to the trust store. */
117126 customCaCert ?: Buffer ;
127+ /**
128+ * PEM-encoded client certificate for mutual TLS (kernel
129+ * `TlsConfig::client_cert_pem`). Paired with {@link clientKeyPem} —
130+ * `buildSeaTlsOptions` rejects supplying only one before the FFI hop.
131+ * The napi shape takes a `Buffer`; the public surface also accepts a
132+ * PEM string, normalised here.
133+ */
134+ clientCertPem ?: Buffer ;
135+ /**
136+ * PEM-encoded private key for the mTLS client certificate (kernel
137+ * `TlsConfig::client_key_pem`). Paired with {@link clientCertPem}.
138+ */
139+ clientKeyPem ?: Buffer ;
140+ }
141+
142+ /**
143+ * HTTP options shared across all auth-mode variants. Mirrors the napi
144+ * binding's `ConnectionOptions.customHeaders` (kernel
145+ * `HttpConfig::custom_headers`).
146+ *
147+ * Carries the extra request headers the SEA path sends on every request:
148+ * the caller's `customHeaders` plus the composed `User-Agent` (the kernel
149+ * appends a `User-Agent` entry to its base UA rather than replacing it).
150+ *
151+ * An **ordered list** of `{ name, value }` pairs — the napi shape
152+ * (`Array<HeaderEntry>`), which mirrors the kernel core's
153+ * `Vec<(String, String)>` and the Python connector's `http_headers`
154+ * `List[Tuple[str, str]]`. Order is preserved and duplicate names are
155+ * allowed (e.g. a caller `User-Agent` followed by the connector's, which
156+ * the kernel folds last-wins).
157+ */
158+ export interface SeaHttpOptions {
159+ customHeaders ?: Array < { name : string ; value : string } > ;
118160}
119161
120162export type SeaNativeConnectionOptions = SeaSessionDefaults &
121163 SeaTlsOptions &
164+ SeaHttpOptions &
122165 (
123166 | {
124167 hostName : string ;
@@ -168,57 +211,160 @@ export function isBlankOrReserved(s: string): boolean {
168211const MAX_U32 = 0xffffffff ;
169212
170213/**
171- * Normalise the public TLS options (`checkServerCertificate` /
172- * `customCaCert`) into the napi shape.
214+ * Normalise a PEM input (`string` or `Buffer`) accepted on the public
215+ * surface into the `Buffer` the napi shape requires. Does a light,
216+ * ordered BEGIN…END sanity check so a truncated/headerless blob (or a
217+ * stray page that merely contains the literals out of order, e.g. a
218+ * proxy-intercept page) is rejected here rather than surfacing as an
219+ * opaque kernel TLS error. The bytes are NOT fully parsed in JS — that
220+ * is deferred to the kernel, which returns a meaningful error on a
221+ * malformed PEM/key.
222+ *
223+ * `kind` selects the expected block: `'certificate'` matches a
224+ * `CERTIFICATE` block; `'private key'` matches any `… PRIVATE KEY` block
225+ * (PKCS#8 `PRIVATE KEY`, PKCS#1 `RSA PRIVATE KEY`, SEC1 `EC PRIVATE KEY`).
226+ *
227+ * Throws `HiveDriverError` when the value is empty or (for strings)
228+ * lacks the expected PEM header.
229+ */
230+ function normalizePemBytes ( value : Buffer | string , optionName : string , kind : 'certificate' | 'private key' ) : Buffer {
231+ if ( typeof value === 'string' ) {
232+ const re =
233+ kind === 'certificate'
234+ ? / - - - - - B E G I N C E R T I F I C A T E - - - - - [ \s \S ] + ?- - - - - E N D C E R T I F I C A T E - - - - - /
235+ : / - - - - - B E G I N [ A - Z 0 - 9 ] * P R I V A T E K E Y - - - - - [ \s \S ] + ?- - - - - E N D [ A - Z 0 - 9 ] * P R I V A T E K E Y - - - - - / ;
236+ if ( ! re . test ( value ) ) {
237+ const expected =
238+ kind === 'certificate'
239+ ? "a '-----BEGIN CERTIFICATE-----' … '-----END CERTIFICATE-----' block"
240+ : "a 'BEGIN … PRIVATE KEY' / 'END … PRIVATE KEY' PEM block (PKCS#8, PKCS#1, or SEC1)" ;
241+ throw new HiveDriverError (
242+ `SEA backend: \`${ optionName } \` string does not look like a PEM ${ kind } (expected ${ expected } ). ` +
243+ 'Pass PEM text or a Buffer of PEM bytes.' ,
244+ ) ;
245+ }
246+ return Buffer . from ( value , 'utf8' ) ;
247+ }
248+ if ( Buffer . isBuffer ( value ) ) {
249+ if ( value . length === 0 ) {
250+ throw new HiveDriverError ( `SEA backend: \`${ optionName } \` Buffer is empty.` ) ;
251+ }
252+ return value ;
253+ }
254+ throw new HiveDriverError ( `SEA backend: \`${ optionName } \` must be a PEM string or a Buffer.` ) ;
255+ }
256+
257+ /**
258+ * Normalise the public TLS options into the napi shape.
173259 *
174260 * - `checkServerCertificate` passes through verbatim (only when set; an
175261 * absent value leaves the kernel default, which is secure — verify on).
176- * - `customCaCert` accepts a PEM string or `Buffer` on the public
177- * surface; we convert a string to a `Buffer` here and do a light PEM
178- * sanity check. The bytes are NOT parsed in JS — the kernel returns a
179- * meaningful error if the PEM is malformed.
262+ * - `checkServerCertificateHostname` passes through verbatim — the
263+ * independent hostname-vs-SNI toggle (kernel applies it only when the
264+ * master verify toggle is on). Mirrors Python's `tls_verify_hostname`.
265+ * - `customCaCert` accepts a PEM string or `Buffer`; normalised to a
266+ * `Buffer` via {@link normalizePemBytes}.
267+ * - `clientCertPem` / `clientKeyPem` carry the mutual-TLS client identity.
268+ * They must be supplied **together** — supplying only one is rejected
269+ * here with an actionable error (rather than waiting for the kernel's
270+ * `InvalidArgument` at `openSession`). Each accepts a PEM string or
271+ * `Buffer`, normalised the same way.
180272 *
181- * Throws `HiveDriverError` when `customCaCert` is supplied but empty or
182- * (for strings) lacks a PEM certificate header .
273+ * Throws `HiveDriverError` when a cert/key is empty, mis-typed, lacks the
274+ * expected PEM header, or when only one half of the mTLS pair is set .
183275 */
184276export function buildSeaTlsOptions ( options : ConnectionOptions ) : SeaTlsOptions {
185277 // Read the SEA-only fields through the purpose-built internal options type
186278 // rather than an ad-hoc inline cast, so the shape can't silently drift from
187279 // its declaration and a typo'd key fails to compile.
188- const { checkServerCertificate, customCaCert } = options as ConnectionOptions & InternalConnectionOptions ;
280+ const { checkServerCertificate, checkServerCertificateHostname, customCaCert, clientCertPem, clientKeyPem } =
281+ options as ConnectionOptions & InternalConnectionOptions ;
189282
190283 const tls : SeaTlsOptions = { } ;
191284
192285 if ( checkServerCertificate !== undefined ) {
193286 tls . checkServerCertificate = checkServerCertificate ;
194287 }
195288
289+ if ( checkServerCertificateHostname !== undefined ) {
290+ tls . checkServerCertificateHostname = checkServerCertificateHostname ;
291+ }
292+
196293 if ( customCaCert !== undefined ) {
197- if ( typeof customCaCert === 'string' ) {
198- // Light PEM sanity check — require a well-ordered BEGIN…END block so a
199- // truncated/headerless cert (or a stray page that merely contains both
200- // literals out of order, e.g. a proxy-intercept page) is rejected here
201- // rather than surfacing as an opaque kernel TLS error. Ordered match, not
202- // two independent substring checks. Full parsing is deferred to the kernel.
203- if ( ! / - - - - - B E G I N C E R T I F I C A T E - - - - - [ \s \S ] + ?- - - - - E N D C E R T I F I C A T E - - - - - / . test ( customCaCert ) ) {
204- throw new HiveDriverError (
205- 'SEA backend: `customCaCert` string does not look like a PEM certificate ' +
206- "(expected a '-----BEGIN CERTIFICATE-----' … '-----END CERTIFICATE-----' block). " +
207- 'Pass PEM text or a Buffer of PEM bytes.' ,
208- ) ;
209- }
210- tls . customCaCert = Buffer . from ( customCaCert , 'utf8' ) ;
211- } else if ( Buffer . isBuffer ( customCaCert ) ) {
212- if ( customCaCert . length === 0 ) {
213- throw new HiveDriverError ( 'SEA backend: `customCaCert` Buffer is empty.' ) ;
294+ tls . customCaCert = normalizePemBytes ( customCaCert , 'customCaCert' , 'certificate' ) ;
295+ }
296+
297+ // mTLS client identity. Enforce both-or-neither up front so a caller who
298+ // sets only one gets a clear message naming the missing half, instead of
299+ // the kernel's generic `InvalidArgument` after the FFI hop.
300+ const hasCert = clientCertPem !== undefined ;
301+ const hasKey = clientKeyPem !== undefined ;
302+ if ( hasCert !== hasKey ) {
303+ throw new HiveDriverError (
304+ 'SEA backend: mutual TLS requires both `clientCertPem` and `clientKeyPem`; only ' +
305+ `\`${ hasCert ? 'clientCertPem' : 'clientKeyPem' } \` was supplied. ` +
306+ `Provide the matching ${ hasCert ? 'private key (`clientKeyPem`)' : 'certificate (`clientCertPem`)' } , ` +
307+ 'or omit both.' ,
308+ ) ;
309+ }
310+ if ( hasCert && hasKey ) {
311+ tls . clientCertPem = normalizePemBytes ( clientCertPem as Buffer | string , 'clientCertPem' , 'certificate' ) ;
312+ tls . clientKeyPem = normalizePemBytes ( clientKeyPem as Buffer | string , 'clientKeyPem' , 'private key' ) ;
313+ }
314+
315+ return tls ;
316+ }
317+
318+ /**
319+ * Build the napi HTTP options (`customHeaders`) from the public
320+ * `customHeaders` map and `userAgentEntry`.
321+ *
322+ * Mirrors the Python connector's `use_kernel` path (`session.py` +
323+ * `backend/kernel/client.py`), which:
324+ * 1. composes a single connector `User-Agent` and **unconditionally**
325+ * appends it last —
326+ * `all_headers = (http_headers or []) + [("User-Agent", useragent_header)]`;
327+ * 2. before forwarding to the kernel, **drops** the kernel-managed
328+ * reserved names `Authorization` / `x-databricks-org-id`
329+ * (case-insensitive) — the kernel applies the auth token itself and
330+ * re-derives the org id from the `?o=` in the http path, and would
331+ * otherwise skip-and-warn on every request.
332+ *
333+ * The result is an ordered list (the napi `Array<HeaderEntry>` shape,
334+ * matching the kernel core `Vec<(String, String)>`): the caller's
335+ * `customHeaders` first (minus reserved names), then the connector's
336+ * `User-Agent` last. The connector UA is always present and, being last,
337+ * is authoritative (the kernel folds the last `User-Agent` into its base
338+ * UA — `DatabricksJDBCDriverOSS/...` — preserving the result-disposition
339+ * gating token). The value is composed via the same `buildUserAgentString`
340+ * the Thrift path uses, so the SEA UA carries the identical
341+ * `NodejsDatabricksSqlConnector/...` identity (with `userAgentEntry`
342+ * folded in). A caller `User-Agent` in `customHeaders` is forwarded too
343+ * (mirroring Python, which doesn't dedupe it); the kernel's last-wins fold
344+ * means the connector UA still wins.
345+ */
346+ const KERNEL_MANAGED_HEADERS = new Set ( [ 'authorization' , 'x-databricks-org-id' ] ) ;
347+
348+ export function buildSeaHttpOptions ( options : ConnectionOptions ) : SeaHttpOptions {
349+ const { customHeaders, userAgentEntry } = options ;
350+
351+ const headers : Array < { name : string ; value : string } > = [ ] ;
352+ if ( customHeaders ) {
353+ for ( const [ name , value ] of Object . entries ( customHeaders ) ) {
354+ // Drop kernel-managed reserved names before the FFI hop — same
355+ // double-wall as the Python connector's `_KERNEL_MANAGED_HEADERS`.
356+ if ( KERNEL_MANAGED_HEADERS . has ( name . toLowerCase ( ) ) ) {
357+ continue ;
214358 }
215- tls . customCaCert = customCaCert ;
216- } else {
217- throw new HiveDriverError ( 'SEA backend: `customCaCert` must be a PEM string or a Buffer.' ) ;
359+ headers . push ( { name, value } ) ;
218360 }
219361 }
220362
221- return tls ;
363+ // Always append the connector's composed User-Agent last — exactly the
364+ // Python connector's unconditional `base_headers` append.
365+ headers . push ( { name : 'User-Agent' , value : buildUserAgentString ( userAgentEntry ) } ) ;
366+
367+ return { customHeaders : headers } ;
222368}
223369
224370/**
@@ -282,7 +428,8 @@ export function buildSeaConnectionOptions(options: ConnectionOptions): SeaNative
282428 httpPath : string ;
283429 intervalsAsString : boolean ;
284430 maxConnections ?: number ;
285- } & SeaTlsOptions = {
431+ } & SeaTlsOptions &
432+ SeaHttpOptions = {
286433 hostName : options . host ,
287434 httpPath : prependSlash ( options . path ) ,
288435 // Match the NodeJS Thrift driver, which surfaces INTERVAL columns as
@@ -292,9 +439,12 @@ export function buildSeaConnectionOptions(options: ConnectionOptions): SeaNative
292439 // (native Arrow) — they already decode identically to Thrift via the
293440 // shared Arrow converter, so `complexTypesAsJson` is not forced on.
294441 intervalsAsString : true ,
295- // TLS knobs (server-cert verification toggle + custom CA). Validated and
296- // normalised (string PEM → Buffer) here so the napi shape only sees a Buffer.
442+ // TLS knobs (server-cert verification toggle + custom CA + mTLS client
443+ // identity). Validated and normalised (string PEM → Buffer) here so the
444+ // napi shape only sees a Buffer.
297445 ...buildSeaTlsOptions ( options ) ,
446+ // HTTP headers (caller `customHeaders` + composed `User-Agent`).
447+ ...buildSeaHttpOptions ( options ) ,
298448 } ;
299449
300450 // SEA-only pool sizing; read via cast to match how this function reads the
0 commit comments