Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions KERNEL_REV
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
639e19ef97decc1c5aa2365c0b3a229c1ccd5b58
25 changes: 20 additions & 5 deletions lib/DBSQLParameter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@ export enum DBSQLParameterType {
STRING = 'STRING',
DATE = 'DATE',
TIMESTAMP = 'TIMESTAMP',
// `TIMESTAMP_NTZ` binds a timezone-free (wall-clock) timestamp. It is a real
// Spark type, bound natively on both the Thrift and kernel backends (requires
// a server that supports TIMESTAMP_NTZ; Spark 3.4+ / recent DBR).
TIMESTAMP_NTZ = 'TIMESTAMP_NTZ',
// `TIMESTAMP_LTZ` is an alias for `TIMESTAMP`: Spark has no distinct
// TIMESTAMP_LTZ type — `TIMESTAMP` already carries local/instant (LTZ)
// semantics. `toSparkParameter` therefore binds it as `TIMESTAMP` on the wire
// (valid on both backends); it exists only as a self-documenting alias.
TIMESTAMP_LTZ = 'TIMESTAMP_LTZ',
FLOAT = 'FLOAT',
DECIMAL = 'DECIMAL',
DOUBLE = 'DOUBLE',
Expand Down Expand Up @@ -50,10 +59,16 @@ export class DBSQLParameter {
return new TSparkParameter({ name }); // for NULL neither `type` nor `value` should be set
}

// Map timezone-explicit timestamp aliases to their Spark wire type. Spark
// has no distinct TIMESTAMP_LTZ type (TIMESTAMP carries LTZ semantics), so
// bind it as TIMESTAMP — valid on both the Thrift and kernel backends.
// TIMESTAMP_NTZ is a real Spark type and is bound natively.
const wireType = this.type === DBSQLParameterType.TIMESTAMP_LTZ ? DBSQLParameterType.TIMESTAMP : this.type;

if (typeof this.value === 'boolean') {
return new TSparkParameter({
name,
type: this.type ?? DBSQLParameterType.BOOLEAN,
type: wireType ?? DBSQLParameterType.BOOLEAN,
value: new TSparkParameterValue({
stringValue: this.value ? 'TRUE' : 'FALSE',
}),
Expand All @@ -63,7 +78,7 @@ export class DBSQLParameter {
if (typeof this.value === 'number') {
return new TSparkParameter({
name,
type: this.type ?? (Number.isInteger(this.value) ? DBSQLParameterType.INTEGER : DBSQLParameterType.DOUBLE),
type: wireType ?? (Number.isInteger(this.value) ? DBSQLParameterType.INTEGER : DBSQLParameterType.DOUBLE),
value: new TSparkParameterValue({
stringValue: Number(this.value).toString(),
}),
Expand All @@ -73,7 +88,7 @@ export class DBSQLParameter {
if (this.value instanceof Int64 || typeof this.value === 'bigint') {
return new TSparkParameter({
name,
type: this.type ?? DBSQLParameterType.BIGINT,
type: wireType ?? DBSQLParameterType.BIGINT,
value: new TSparkParameterValue({
stringValue: this.value.toString(),
}),
Expand All @@ -83,7 +98,7 @@ export class DBSQLParameter {
if (this.value instanceof Date) {
return new TSparkParameter({
name,
type: this.type ?? DBSQLParameterType.TIMESTAMP,
type: wireType ?? DBSQLParameterType.TIMESTAMP,
value: new TSparkParameterValue({
stringValue: this.value.toISOString(),
}),
Expand All @@ -92,7 +107,7 @@ export class DBSQLParameter {

return new TSparkParameter({
name,
type: this.type ?? DBSQLParameterType.STRING,
type: wireType ?? DBSQLParameterType.STRING,
value: new TSparkParameterValue({
stringValue: this.value,
}),
Expand Down
25 changes: 24 additions & 1 deletion lib/contracts/IDBSQLSession.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,18 @@ export type ExecuteStatementOptions = {
*/
queryTimeout?: number | bigint | Int64;
/**
* @deprecated This option is no longer supported and will be removed in future releases
* Selects the execution lifecycle. The only observable effect is WHEN
* `executeStatement` resolves; the result data, schema, and error classes are
* identical regardless.
*
* - **Thrift backend:** no-op. The Thrift path always submits asynchronously
* (`runAsync: true` on the wire) and polls during fetch; this option is not
* read.
* - **Kernel backend (`useSEA`):** selects the kernel execution path —
* `false`/unset (default) runs the blocking direct-results path (faster,
* cancellable mid-compute); `true` submits and polls (returns a pending
* handle before completion). Default is sync, matching the python
* connector's `cursor.execute()`.
*/
runAsync?: boolean;
maxRows?: number | bigint | Int64 | null;
Expand All @@ -27,6 +38,18 @@ export type ExecuteStatementOptions = {
* These tags apply only to this statement and do not persist across queries.
*/
queryTags?: Record<string, string | null | undefined>;
/**
* SEA-only: server-side row cap for this statement (kernel `row_limit`). The
* Thrift backend has no execute-time server cap, so this is a no-op there;
* use `maxRows` for the cross-backend client-side fetch limit.
*/
rowLimit?: number;
/**
* SEA-only: per-statement Spark conf overlay (kernel `statement_conf`).
* Merged with the serialized `queryTags` (which land under the reserved
* `query_tags` key). Ignored by the Thrift backend.
*/
statementConf?: Record<string, string>;
};

export type TypeInfoRequest = {
Expand Down
23 changes: 23 additions & 0 deletions lib/contracts/InternalConnectionOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,27 @@ export interface InternalConnectionOptions {
* @internal Not stable; M0 stub only.
*/
useSEA?: boolean;

/**
* SEA-only: kernel connection-pool size (`ConnectionOptions.max_connections`).
* Validated as a positive integer within the napi `u32` range.
* @internal SEA path only.
*/
maxConnections?: number;

/**
* SEA-only: verify the server's TLS certificate. Secure-by-default — omit
* to keep full chain + hostname verification; set `false` only to opt into
* the insecure accept-anything mode.
* @internal SEA path only.
*/
checkServerCertificate?: boolean;

/**
* SEA-only: PEM-encoded CA certificate (string or `Buffer`) added to the
* trust store on top of the system roots — for TLS-inspecting proxies or
* on-prem internal CAs. Honoured regardless of `checkServerCertificate`.
* @internal SEA path only.
*/
customCaCert?: Buffer | string;
}
141 changes: 140 additions & 1 deletion lib/sea/SeaAuth.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.

import { ConnectionOptions } from '../contracts/IDBSQLClient';
import { InternalConnectionOptions } from '../contracts/InternalConnectionOptions';
import AuthenticationError from '../errors/AuthenticationError';
import HiveDriverError from '../errors/HiveDriverError';

Expand Down Expand Up @@ -66,9 +67,58 @@ export interface SeaSessionDefaults {
catalog?: string;
schema?: string;
sessionConf?: Record<string, string>;
/**
* Render `INTERVAL` / `DURATION` result columns as strings
* (kernel `ResultConfig.intervals_as_string`). The kernel default is
* native Arrow `month_interval` / `duration[us]`, but the NodeJS
* Thrift driver surfaces intervals as strings — so the SEA path sets
* this `true` so its result shape is a byte-compatible drop-in for the
* Thrift backend. Omitting it falls back to the kernel's native types.
*/
intervalsAsString?: boolean;
/**
* Render complex (`ARRAY` / `MAP` / `STRUCT` / `VARIANT`) result
* columns as JSON strings (kernel `ResultConfig.complex_types_as_json`).
* Left unset on the SEA path: native Arrow nested types already decode
* identically to the Thrift backend through the shared Arrow converter,
* so forcing JSON here would *introduce* a divergence rather than
* remove one.
*/
complexTypesAsJson?: boolean;
/**
* Per-session kernel connection-pool size
* (kernel `ConnectionOptions.max_connections`). Validated as a positive
* integer within the napi `u32` range by `buildSeaConnectionOptions`.
*/
maxConnections?: number;
}

/**
* TLS options shared across all auth-mode variants. Mirror the napi
* binding's `ConnectionOptions.checkServerCertificate` / `.customCaCert`
* (kernel `Session::builder().tls(TlsConfig)`).
*
* The napi shape takes `customCaCert` as a `Buffer` only; the public
* `ConnectionOptions` additionally accepts a PEM string, which
* `buildSeaConnectionOptions` normalises to a `Buffer` before crossing
* the FFI boundary.
*/
export interface SeaTlsOptions {
/**
* Verify the server's TLS certificate. The SEA backend is
* **secure-by-default**: omitting this leaves the kernel default of
* `true` (full chain + hostname verification). Set `false` only to opt
* into the insecure, accept-anything mode (analogous to Thrift's
* `rejectUnauthorized: false`); prefer pairing strict checking with
* `customCaCert` over disabling verification entirely.
*/
checkServerCertificate?: boolean;
/** PEM-encoded CA bytes to add to the trust store. */
customCaCert?: Buffer;
}

export type SeaNativeConnectionOptions = SeaSessionDefaults &
SeaTlsOptions &
(
| {
hostName: string;
Expand Down Expand Up @@ -114,6 +164,63 @@ export function isBlankOrReserved(s: string): boolean {
return normalized.length === 0 || normalized === 'undefined' || normalized === 'null';
}

/** napi-rs marshals `maxConnections` as a `u32`; reject values it can't hold. */
const MAX_U32 = 0xffffffff;

/**
* Normalise the public TLS options (`checkServerCertificate` /
* `customCaCert`) into the napi shape.
*
* - `checkServerCertificate` passes through verbatim (only when set; an
* absent value leaves the kernel default, which is secure — verify on).
* - `customCaCert` accepts a PEM string or `Buffer` on the public
* surface; we convert a string to a `Buffer` here and do a light PEM
* sanity check. The bytes are NOT parsed in JS — the kernel returns a
* meaningful error if the PEM is malformed.
*
* Throws `HiveDriverError` when `customCaCert` is supplied but empty or
* (for strings) lacks a PEM certificate header.
*/
export function buildSeaTlsOptions(options: ConnectionOptions): SeaTlsOptions {
// Read the SEA-only fields through the purpose-built internal options type
// rather than an ad-hoc inline cast, so the shape can't silently drift from
// its declaration and a typo'd key fails to compile.
const { checkServerCertificate, customCaCert } = options as ConnectionOptions & InternalConnectionOptions;

const tls: SeaTlsOptions = {};

if (checkServerCertificate !== undefined) {
tls.checkServerCertificate = checkServerCertificate;
}

if (customCaCert !== undefined) {
if (typeof customCaCert === 'string') {
// Light PEM sanity check — require a well-ordered BEGIN…END block so a
// truncated/headerless cert (or a stray page that merely contains both
// literals out of order, e.g. a proxy-intercept page) is rejected here
// rather than surfacing as an opaque kernel TLS error. Ordered match, not
// two independent substring checks. Full parsing is deferred to the kernel.
if (!/-----BEGIN CERTIFICATE-----[\s\S]+?-----END CERTIFICATE-----/.test(customCaCert)) {
throw new HiveDriverError(
'SEA backend: `customCaCert` string does not look like a PEM certificate ' +
"(expected a '-----BEGIN CERTIFICATE-----' … '-----END CERTIFICATE-----' block). " +
'Pass PEM text or a Buffer of PEM bytes.',
);
}
tls.customCaCert = Buffer.from(customCaCert, 'utf8');
} else if (Buffer.isBuffer(customCaCert)) {
if (customCaCert.length === 0) {
throw new HiveDriverError('SEA backend: `customCaCert` Buffer is empty.');
}
tls.customCaCert = customCaCert;
} else {
throw new HiveDriverError('SEA backend: `customCaCert` must be a PEM string or a Buffer.');
}
}

return tls;
}

/**
* Validate the user-supplied `ConnectionOptions` and build the
* napi-binding's connection-options shape.
Expand Down Expand Up @@ -170,11 +277,43 @@ export function isBlankOrReserved(s: string): boolean {
export function buildSeaConnectionOptions(options: ConnectionOptions): SeaNativeConnectionOptions {
const { authType } = options as { authType?: string };

const base = {
const base: {
hostName: string;
httpPath: string;
intervalsAsString: boolean;
maxConnections?: number;
} & SeaTlsOptions = {
hostName: options.host,
httpPath: prependSlash(options.path),
// Match the NodeJS Thrift driver, which surfaces INTERVAL columns as
// strings. The kernel defaults to native Arrow interval/duration types;
// forcing the string rendering here keeps the SEA path a byte-compatible
// drop-in. Complex types are intentionally left at the kernel default
// (native Arrow) — they already decode identically to Thrift via the
// shared Arrow converter, so `complexTypesAsJson` is not forced on.
intervalsAsString: true,
// TLS knobs (server-cert verification toggle + custom CA). Validated and
// normalised (string PEM → Buffer) here so the napi shape only sees a Buffer.
...buildSeaTlsOptions(options),
};

// SEA-only pool sizing; read via cast to match how this function reads the
// other SEA-specific options (TLS) — they live on the internal options
// surface, not the published public `ConnectionOptions` `.d.ts`.
const { maxConnections } = options as ConnectionOptions & InternalConnectionOptions;
if (maxConnections !== undefined) {
if (!Number.isInteger(maxConnections) || maxConnections < 1) {
throw new HiveDriverError(`SEA backend: \`maxConnections\` must be a positive integer; got ${maxConnections}.`);
}
if (maxConnections > MAX_U32) {
throw new HiveDriverError(
`SEA backend: \`maxConnections\` exceeds the napi u32 limit (${MAX_U32}); got ${maxConnections}. ` +
'Typical pool sizes are 10-500.',
);
}
base.maxConnections = maxConnections;
}

const oauth = options as {
oauthClientId?: string;
oauthClientSecret?: string;
Expand Down
18 changes: 18 additions & 0 deletions lib/sea/SeaBackend.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ import IBackend from '../contracts/IBackend';
import ISessionBackend from '../contracts/ISessionBackend';
import IClientContext from '../contracts/IClientContext';
import { ConnectionOptions, OpenSessionRequest } from '../contracts/IDBSQLClient';
import { InternalConnectionOptions } from '../contracts/InternalConnectionOptions';
import { LogLevel } from '../contracts/IDBSQLLogger';
import HiveDriverError from '../errors/HiveDriverError';
import { getSeaNative, SeaNativeBinding, SeaConnection } from './SeaNativeLoader';
import { decodeNapiKernelError } from './SeaErrorMapping';
Expand Down Expand Up @@ -78,6 +80,22 @@ export default class SeaBackend implements IBackend {
// Any non-PAT mode (or a missing/empty token) throws here, before
// we ever touch the native binding.
this.nativeOptions = buildSeaConnectionOptions(options);

// Warn on the insecure combo: a `customCaCert` paired with
// `checkServerCertificate: false` is almost always a mistake — verification
// is fully off, so the custom trust anchor is never used. The combo is
// still honoured (kernel contract), but a secure-looking `customCaCert`
// shouldn't silently mask disabled verification.
const tlsOpts = options as ConnectionOptions & InternalConnectionOptions;
if (tlsOpts.checkServerCertificate === false && tlsOpts.customCaCert !== undefined) {
this.context
.getLogger()
.log(
LogLevel.warn,
'SEA: `customCaCert` is set but `checkServerCertificate: false` disables certificate ' +
'verification entirely — the custom CA is not used. Set `checkServerCertificate: true` to use it.',
);
}
}

public async openSession(request: OpenSessionRequest): Promise<ISessionBackend> {
Expand Down
19 changes: 19 additions & 0 deletions lib/sea/SeaNativeLoader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ import type {
ExecuteOptions as NativeExecuteOptions,
TypedValueInput as NativeTypedValueInput,
NamedTypedValueInput as NativeNamedTypedValueInput,
AsyncStatement as NativeAsyncStatement,
AsyncResultHandle as NativeAsyncResultHandle,
CancellableExecution as NativeCancellableExecution,
} from '../../native/sea';

// SEA-prefixed re-exports. The kernel-generated `.d.ts` keeps the
Expand All @@ -59,6 +62,22 @@ export type SeaNativeExecuteOptions = NativeExecuteOptions;
export type SeaNativeTypedValueInput = NativeTypedValueInput;
export type SeaNativeNamedTypedValueInput = NativeNamedTypedValueInput;

// Async-submit surface: `Connection.submitStatement` returns an
// `AsyncStatement` (status / awaitResult / cancel / close); `awaitResult`
// yields an `AsyncResultHandle` whose `fetchNextBatch` / `schema` match the
// blocking `Statement`'s fetch surface, so the results pipeline consumes
// either interchangeably.
export type SeaNativeAsyncStatement = NativeAsyncStatement;
export type SeaNativeAsyncResultHandle = NativeAsyncResultHandle;

// Cancellable sync-execute surface: `Connection.executeStatementCancellable`
// returns a `CancellableExecution` that captures a detached StatementCanceller
// BEFORE dispatching the blocking `execute()`, so a concurrent `cancel()`
// interrupts a still-running query mid-compute. `result()` drives the blocking
// execute and resolves to the same terminal `Statement` `executeStatement`
// returns.
export type SeaNativeCancellableExecution = NativeCancellableExecution;

/**
* The full native binding surface, derived from the generated module
* so it can never drift from the `.d.ts` contract: when the kernel
Expand Down
Loading
Loading