Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
14 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion KERNEL_REV
Original file line number Diff line number Diff line change
@@ -1 +1 @@
80b68e1eef3b613910183a50dfa4dace854d50dd
fcc459bbf3f39bf57e2ee02f14b99c0ec7a70123
7 changes: 7 additions & 0 deletions lib/DBSQLClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I

useLZ4Compression: true,

preserveBigNumericPrecision: false,

// Telemetry defaults are sourced from DEFAULT_TELEMETRY_CONFIG so
// every component reads from the same single frozen const. Mapping the
// unprefixed TelemetryConfiguration keys to the `telemetry`-prefixed
Expand Down Expand Up @@ -604,6 +606,11 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I
this.config.enableMetricViewMetadata = options.enableMetricViewMetadata;
}

// Opt-in: preserve DECIMAL (string) / BIGINT (bigint) precision in results.
if (options.preserveBigNumericPrecision !== undefined) {
this.config.preserveBigNumericPrecision = options.preserveBigNumericPrecision;
}

// Override telemetry config if provided in options. Per-key narrowed copy
// preserves the structural type system: `ConnectionOptions` and
// `ClientConfig` declare identical types for these knobs, so a user
Expand Down
41 changes: 39 additions & 2 deletions lib/DBSQLParameter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,37 @@ export enum DBSQLParameterType {
INTERVALDAY = 'INTERVAL DAY',
}

// 32-bit signed integer bounds — the range of the Spark `INT` type.
const INT32_MIN = -2147483648;
const INT32_MAX = 2147483647;

/**
* Infer the Spark parameter type for a JS `number` when the caller didn't set
* one explicitly.
*
* A JS `number` is an IEEE-754 double, so a whole-number value can still be far
* outside the `INT` range (e.g. `1e30`). Typing such a value as `INTEGER`
* makes the server reject it (`invalid INT literal "1e+30"`). Pick the
* narrowest type that actually fits:
* - non-integer / non-finite → `DOUBLE`
* - integer within INT (i32) range → `INTEGER`
* - integer within the safe-integer range → `BIGINT`
* - anything larger → `DOUBLE` (can't be represented exactly as an integer
* anyway; callers needing exact 64-bit integers should pass a `bigint`).
*/
function inferNumberType(value: number): DBSQLParameterType {
if (!Number.isInteger(value)) {
return DBSQLParameterType.DOUBLE;
}
if (value >= INT32_MIN && value <= INT32_MAX) {
return DBSQLParameterType.INTEGER;
}
if (Number.isSafeInteger(value)) {
return DBSQLParameterType.BIGINT;
}
return DBSQLParameterType.DOUBLE;
}

interface DBSQLParameterOptions {
type?: DBSQLParameterType;
value: DBSQLParameterValue;
Expand Down Expand Up @@ -78,7 +109,7 @@ export class DBSQLParameter {
if (typeof this.value === 'number') {
return new TSparkParameter({
name,
type: wireType ?? (Number.isInteger(this.value) ? DBSQLParameterType.INTEGER : DBSQLParameterType.DOUBLE),
type: wireType ?? inferNumberType(this.value),
value: new TSparkParameterValue({
stringValue: Number(this.value).toString(),
}),
Expand All @@ -96,11 +127,17 @@ export class DBSQLParameter {
}

if (this.value instanceof Date) {
// A `Date` bound as `DATE` must project a calendar date (`yyyy-mm-dd`),
// not a full ISO-8601 timestamp: the SEA wire rejects
// `2024-03-14T00:00:00.000Z` as a DATE literal ("trailing input"), and
// Thrift accepts the date-only form just as well. Without an explicit
// DATE type the value still binds as a TIMESTAMP from the full ISO string.
const isDateType = wireType === DBSQLParameterType.DATE;
return new TSparkParameter({
name,
type: wireType ?? DBSQLParameterType.TIMESTAMP,
value: new TSparkParameterValue({
stringValue: this.value.toISOString(),
stringValue: isDateType ? this.value.toISOString().slice(0, 10) : this.value.toISOString(),
}),
});
}
Expand Down
6 changes: 6 additions & 0 deletions lib/contracts/IClientContext.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ export interface ClientConfig {
useLZ4Compression: boolean;
enableMetricViewMetadata?: boolean;

// When true, DECIMAL values are returned as exact strings and 64-bit
// integers as JS `bigint`, instead of being coerced to a lossy `number`.
// Off by default to preserve the long-standing representation on both the
// Thrift and SEA backends. See `ConnectionOptions.preserveBigNumericPrecision`.
preserveBigNumericPrecision?: boolean;

// Telemetry configuration
telemetryEnabled?: boolean;
telemetryBatchSize?: number;
Expand Down
9 changes: 9 additions & 0 deletions lib/contracts/IDBSQLClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,15 @@ export type ConnectionOptions = {
proxy?: ProxyOptions;
enableMetricViewMetadata?: boolean;

/**
* Preserve full numeric precision in results. When `true`, DECIMAL columns
* are returned as exact strings and 64-bit integers (BIGINT) as JS `bigint`,
* instead of the default lossy coercion to a JS `number` (which silently
* rounds DECIMALs and integers beyond 2^53). Applies to both the Thrift and
* SEA backends. Defaults to `false` to preserve the existing representation.
*/
preserveBigNumericPrecision?: boolean;

/**
* Extra HTTP headers attached to driver-owned out-of-band requests
* (telemetry POSTs and feature-flag GETs). Not applied to the primary
Expand Down
35 changes: 34 additions & 1 deletion lib/contracts/InternalConnectionOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,49 @@ export interface InternalConnectionOptions {
/**
* SEA-only: verify the server's TLS certificate. Secure-by-default — omit
* to keep full chain + hostname verification; set `false` only to opt into
* the insecure accept-anything mode.
* the insecure accept-anything mode. This is the master verify toggle:
* `false` also subsumes the hostname check (see
* `checkServerCertificateHostname`). Mirrors the Python connector's
* `_tls_no_verify` (inverted).
* @internal SEA path only.
*/
checkServerCertificate?: boolean;

/**
* SEA-only: verify that the server certificate matches the host
* (hostname-vs-SNI check), independently of full chain validation. Omit
* to keep the secure default (on); set `false` to skip only the hostname
* check while still validating the chain — e.g. connecting via an IP
* literal or a host the cert wasn't issued for. No-op when
* `checkServerCertificate` is `false` (that disables everything). Mirrors
* the Python connector's `_tls_verify_hostname`.
* @internal SEA path only.
*/
checkServerCertificateHostname?: boolean;

/**
* SEA-only: PEM-encoded CA certificate (string or `Buffer`) added to the
* trust store on top of the system roots — for TLS-inspecting proxies or
* on-prem internal CAs. Honoured regardless of `checkServerCertificate`.
* @internal SEA path only.
*/
customCaCert?: Buffer | string;

/**
* SEA-only: PEM-encoded client certificate (string or `Buffer`) for
* mutual TLS (mTLS). Must be supplied together with `clientKeyPem`; a
* leaf cert optionally followed by its intermediate chain is accepted.
* Mirrors the Python connector's `_tls_client_cert_file`.
* @internal SEA path only.
*/
clientCertPem?: Buffer | string;

/**
* SEA-only: PEM-encoded private key (string or `Buffer`) for the mTLS
* client certificate. Must be supplied together with `clientCertPem`.
* For portability supply a PKCS#8 key (`BEGIN PRIVATE KEY`). Mirrors the
* Python connector's `_tls_client_cert_key_file`.
* @internal SEA path only.
*/
clientKeyPem?: Buffer | string;
}
29 changes: 29 additions & 0 deletions lib/contracts/OperationStatus.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,33 @@ export interface OperationStatus {
* to `WaitUntilReadyOptions.callback` for the consumer to interpret.
*/
progressUpdateResponse?: unknown;

/**
* Number of rows modified by a DML statement (UPDATE / INSERT / DELETE /
* MERGE). `undefined`/`null` for SELECT and on backends/warehouses that do
* not surface the counter. Mirrors Thrift's
* `TGetOperationStatusResp.numModifiedRows`.
*/
numModifiedRows?: number | null;

/**
* Server-supplied user-facing message, when the backend exposes one. Mirrors
* Thrift's `TGetOperationStatusResp.displayMessage`. May contain SQL
* fragments or parameter values — treat as potentially sensitive.
*/
displayMessage?: string | null;

/**
* Server-supplied diagnostic detail (multi-line operator / stack context),
* when available. Mirrors Thrift's `TGetOperationStatusResp.diagnosticInfo`.
* For support surfaces, not user-facing.
*/
diagnosticInfo?: string | null;

/**
* Server-supplied JSON blob with extended error details, when available.
* Mirrors Thrift's `TGetOperationStatusResp.errorDetailsJson`. Pass-through
* string — callers parse with `JSON.parse` if they need structured access.
*/
errorDetailsJson?: string | null;
}
67 changes: 64 additions & 3 deletions lib/result/ArrowResultConverter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ import {
RecordBatchReader,
util as arrowUtils,
} from 'apache-arrow';
import { TTableSchema, TColumnDesc } from '../../thrift/TCLIService_types';
import { TTableSchema, TColumnDesc, TTypeId } from '../../thrift/TCLIService_types';
import IClientContext from '../contracts/IClientContext';
import HiveDriverError from '../errors/HiveDriverError';
import IResultsProvider, { ResultsProviderFetchNextOptions } from './IResultsProvider';
Expand Down Expand Up @@ -169,13 +169,41 @@ function formatDayTimeFromTotal(totalNanos: bigint): string {
return `${sign}${days.toString()} ${pad2(hours)}:${pad2(minutes)}:${pad2(seconds)}${fraction}`;
}

/**
* Render an Arrow `Decimal` value — supplied as its unscaled integer (from
* `bigNumToBigInt`) plus the column `scale` — as an exact decimal string,
* e.g. unscaled `1234567890` / scale `5` → `"12345.67890"`. Used by the
* precision-preserving path so high-precision DECIMALs survive the round-trip
* instead of being flattened to an IEEE-754 double.
*/
export function bigNumDecimalToString(unscaled: bigint, scale: number): string {
if (scale <= 0) {
return unscaled.toString();
}
const negative = unscaled < ZERO_BIGINT;
// `padStart(scale + 1)` guarantees at least one digit before the point
// (e.g. unscaled `5` / scale `2` → `"005"` → `"0.05"`).
const digits = (negative ? -unscaled : unscaled).toString().padStart(scale + 1, '0');
const cut = digits.length - scale;
return `${negative ? '-' : ''}${digits.slice(0, cut)}.${digits.slice(cut)}`;
}

export default class ArrowResultConverter implements IResultsProvider<Array<any>> {
private readonly context: IClientContext;

private readonly source: IResultsProvider<ArrowBatch>;

private readonly schema: Array<TColumnDesc>;

// When true, DECIMAL and 64-bit integer values keep full precision —
// DECIMAL as an exact string and BIGINT as a JS `bigint` — instead of being
// coerced to a lossy `number`. Enabled by the SEA backend, which always
// receives native Arrow `Decimal128` / `Int64` from the kernel and has no
// server-side "send as string" escape hatch (the Thrift backend gets the
// string form via `useArrowNativeTypes=false`). Off by default so the Thrift
// path keeps its long-standing `number` representation unchanged.
private readonly preserveBigNumericPrecision: boolean;

private recordBatchReader?: IterableIterator<RecordBatch<TypeMap>>;

// Remaining rows in current Arrow batch (not the record batch!)
Expand All @@ -193,10 +221,16 @@ export default class ArrowResultConverter implements IResultsProvider<Array<any>
// operation backend and the SEA backend's neutral `ResultMetadata` —
// which both carry `schema?: TTableSchema` — can construct the converter
// without an adapter at the call site.
constructor(context: IClientContext, source: IResultsProvider<ArrowBatch>, { schema }: { schema?: TTableSchema }) {
constructor(
context: IClientContext,
source: IResultsProvider<ArrowBatch>,
{ schema }: { schema?: TTableSchema },
{ preserveBigNumericPrecision = false }: { preserveBigNumericPrecision?: boolean } = {},
) {
this.context = context;
this.source = source;
this.schema = getSchemaColumns(schema);
this.preserveBigNumericPrecision = preserveBigNumericPrecision;
}

public async hasMore() {
Expand Down Expand Up @@ -374,6 +408,11 @@ export default class ArrowResultConverter implements IResultsProvider<Array<any>
if (value instanceof Object && value[isArrowBigNumSymbol]) {
const result = bigNumToBigInt(value);
if (DataType.isDecimal(valueType)) {
// Preserve full precision as an exact string when requested (SEA);
// otherwise keep the historical lossy `number` form.
if (this.preserveBigNumericPrecision) {
return bigNumDecimalToString(result, valueType.scale);
}
return Number(result) / 10 ** valueType.scale;
}
// A rewritten Duration Int64 surfaces as a raw `bigint`, not a BigNum
Expand All @@ -397,6 +436,12 @@ export default class ArrowResultConverter implements IResultsProvider<Array<any>
if (durationUnit) {
return formatDurationToIntervalDayTime(value, durationUnit);
}
// Keep the exact `bigint` when precision must be preserved (SEA); the
// default path narrows to `number` for backward compatibility (the
// Thrift backend has always returned BIGINT as a JS `number`).
if (this.preserveBigNumericPrecision) {
return value;
}
return Number(value);
}

Expand All @@ -411,7 +456,23 @@ export default class ArrowResultConverter implements IResultsProvider<Array<any>
const typeDescriptor = column.typeDesc.types[0]?.primitiveEntry;
const field = column.columnName;
const value = record[field];
result[field] = value === null ? null : convertThriftValue(typeDescriptor, value);
if (value === null) {
result[field] = null;
return;
}
// When preserving precision, DECIMAL and BIGINT values were already
// produced in their exact form by `convertArrowTypes` (string / bigint).
// `convertThriftValue` would narrow both back to a lossy `number`
// (DECIMAL_TYPE → `Number(value)`, BIGINT_TYPE → `convertBigInt`), so
// pass them through untouched on this path.
if (
this.preserveBigNumericPrecision &&
(typeDescriptor?.type === TTypeId.DECIMAL_TYPE || typeDescriptor?.type === TTypeId.BIGINT_TYPE)
) {
result[field] = value;
return;
}
result[field] = convertThriftValue(typeDescriptor, value);
});

return result;
Expand Down
Loading
Loading