Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions js/src/factories.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ export function makeBuilder<T extends dtypes.DataType = any, TNull = any>(option
export function vectorFromArray(values: readonly (null | undefined)[], type?: dtypes.Null): Vector<dtypes.Null>;
export function vectorFromArray(values: readonly (null | undefined | boolean)[], type?: dtypes.Bool): Vector<dtypes.Bool>;
export function vectorFromArray<T extends dtypes.Utf8 | dtypes.Dictionary<dtypes.Utf8> = dtypes.Dictionary<dtypes.Utf8, dtypes.Int32>>(values: readonly (null | undefined | string)[], type?: T): Vector<T>;
export function vectorFromArray<T extends dtypes.Date_>(values: readonly (null | undefined | Date)[], type?: T): Vector<T>;
export function vectorFromArray<T extends dtypes.TimestampMillisecond>(values: readonly (null | undefined | Date)[], type?: T): Vector<T>;
export function vectorFromArray<T extends dtypes.Int>(values: readonly (null | undefined | number)[], type: T): Vector<T>;
export function vectorFromArray<T extends dtypes.Int64 | dtypes.Uint64 = dtypes.Int64>(values: readonly (null | undefined | bigint)[], type?: T): Vector<T>;
export function vectorFromArray<T extends dtypes.Float = dtypes.Float64>(values: readonly (null | undefined | number)[], type?: T): Vector<T>;
Expand Down Expand Up @@ -145,7 +145,7 @@ function inferType(value: readonly unknown[]): dtypes.DataType {
} else if (booleansCount + nullsCount === value.length) {
return new dtypes.Bool;
} else if (datesCount + nullsCount === value.length) {
return new dtypes.DateMillisecond;
return new dtypes.TimestampMillisecond;
} else if (arraysCount + nullsCount === value.length) {
const array = value as Array<unknown>[];
const childType = inferType(array[array.findIndex((ary) => ary != null)]);
Expand Down
40 changes: 31 additions & 9 deletions js/src/type.ts
Original file line number Diff line number Diff line change
Expand Up @@ -333,23 +333,47 @@ export class Decimal extends DataType<Type.Decimal> {
/** @ignore */
export type Dates = Type.Date | Type.DateDay | Type.DateMillisecond;
/** @ignore */
export interface Date_<T extends Dates = Dates> extends DataType<T> { TArray: Int32Array; TValue: Date; ArrayType: TypedArrayConstructor<Int32Array> }
type DateType = {
[Type.Date]: { TArray: Int32Array | BigInt64Array };
[Type.DateDay]: { TArray: Int32Array };
[Type.DateMillisecond]: { TArray: BigInt64Array };
};
/** @ignore */
export interface Date_<T extends Dates = Dates> extends DataType<T> {
TArray: DateType[T]['TArray'];
TValue: number;
}
/** @ignore */
export class Date_<T extends Dates = Dates> extends DataType<T> {
constructor(public readonly unit: DateUnit) {
super(Type.Date as T);
}
public toString() { return `Date${(this.unit + 1) * 32}<${DateUnit[this.unit]}>`; }

public get ArrayType() {
return this.unit === DateUnit.DAY ? Int32Array : BigInt64Array;
}
protected static [Symbol.toStringTag] = ((proto: Date_) => {
(<any>proto).unit = null;
(<any>proto).ArrayType = Int32Array;
return proto[Symbol.toStringTag] = 'Date';
})(Date_.prototype);
}

/** @ignore */
export class DateDay extends Date_<Type.DateDay> { constructor() { super(DateUnit.DAY); } }
/** @ignore */
/**
* A signed 64-bit date representing the elapsed time since UNIX epoch (1970-01-01) in milliseconds.
* According to the specification, this should be treated as the number of days, in milliseconds, since the UNIX epoch.
* Therefore, values must be evenly divisible by `86_400_000` (the number of milliseconds in a standard day).
*
* Practically, validation that values of this type are evenly divisible by `86_400_000` is not enforced by this library
* for performance and usability reasons.
*
* Users should prefer to use {@link DateDay} to cleanly represent the number of days. For JS dates,
* {@link TimestampMillisecond} is the preferred type.
*
* @ignore
*/
export class DateMillisecond extends Date_<Type.DateMillisecond> { constructor() { super(DateUnit.MILLISECOND); } }

/** @ignore */
Expand Down Expand Up @@ -405,9 +429,9 @@ export class TimeNanosecond extends Time_<Type.TimeNanosecond> { constructor() {
type Timestamps = Type.Timestamp | Type.TimestampSecond | Type.TimestampMillisecond | Type.TimestampMicrosecond | Type.TimestampNanosecond;
/** @ignore */
interface Timestamp_<T extends Timestamps = Timestamps> extends DataType<T> {
TArray: Int32Array;
TArray: BigInt64Array;
TValue: number;
ArrayType: TypedArrayConstructor<Int32Array>;
ArrayType: BigIntArrayConstructor<BigInt64Array>;
}

/** @ignore */
Expand All @@ -420,7 +444,7 @@ class Timestamp_<T extends Timestamps = Timestamps> extends DataType<T> {
protected static [Symbol.toStringTag] = ((proto: Timestamp_) => {
(<any>proto).unit = null;
(<any>proto).timezone = null;
(<any>proto).ArrayType = Int32Array;
(<any>proto).ArrayType = BigInt64Array;
return proto[Symbol.toStringTag] = 'Timestamp';
})(Timestamp_.prototype);
}
Expand Down Expand Up @@ -471,7 +495,7 @@ type Durations = Type.Duration | Type.DurationSecond | Type.DurationMillisecond
export interface Duration<T extends Durations = Durations> extends DataType<T> {
TArray: BigInt64Array;
TValue: bigint;
ArrayType: BigInt64Array;
ArrayType: BigIntArrayConstructor<BigInt64Array>;
}

/** @ignore */
Expand Down Expand Up @@ -725,8 +749,6 @@ export function strideForType(type: DataType) {
const t: any = type;
switch (type.typeId) {
case Type.Decimal: return (type as Decimal).bitWidth / 32;
case Type.Timestamp: return 2;
case Type.Date: return 1 + (t as Date_).unit;
case Type.Interval: return 1 + (t as Interval_).unit;
// case Type.Int: return 1 + +((t as Int_).bitWidth > 32);
// case Type.Time: return 1 + +((t as Time_).bitWidth > 32);
Expand Down
13 changes: 13 additions & 0 deletions js/src/util/bigint.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,16 @@ export function bigIntToNumber(number: bigint | number): number {
}
return Number(number);
}

/**
* Duivides the bigint number by the divisor and returns the result as a number.
* Dividing bigints always results in bigints so we don't get the remainder.
* This function gives us the remainder but assumes that the result fits into a number.
*
* @param number The number to divide.
* @param divisor The divisor.
* @returns The result of the division as a number.
*/
export function divideBigInts(number: bigint, divisor: bigint): number {
return bigIntToNumber(number / divisor) + bigIntToNumber(number % divisor) / bigIntToNumber(divisor);
}
21 changes: 7 additions & 14 deletions js/src/visitor/get.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import { Vector } from '../vector.js';
import { Visitor } from '../visitor.js';
import { MapRow } from '../row/map.js';
import { StructRow, StructRowProxy } from '../row/struct.js';
import { bigIntToNumber } from '../util/bigint.js';
import { bigIntToNumber, divideBigInts } from '../util/bigint.js';
import { decodeUtf8 } from '../util/utf8.js';
import { TypeToDataType } from '../interfaces.js';
import { uint16ToFloat64 } from '../util/math.js';
Expand Down Expand Up @@ -106,13 +106,6 @@ function wrapGet<T extends DataType>(fn: (data: Data<T>, _1: any) => any) {
}

/** @ignore */const epochDaysToMs = (data: Int32Array, index: number) => 86400000 * data[index];
/** @ignore */const epochMillisecondsLongToMs = (data: Int32Array, index: number) => 4294967296 * (data[index + 1]) + (data[index] >>> 0);
/** @ignore */const epochMicrosecondsLongToMs = (data: Int32Array, index: number) => 4294967296 * (data[index + 1] / 1000) + ((data[index] >>> 0) / 1000);
/** @ignore */const epochNanosecondsLongToMs = (data: Int32Array, index: number) => 4294967296 * (data[index + 1] / 1000000) + ((data[index] >>> 0) / 1000000);

/** @ignore */const epochMillisecondsToDate = (epochMs: number) => new Date(epochMs);
/** @ignore */const epochDaysToDate = (data: Int32Array, index: number) => epochMillisecondsToDate(epochDaysToMs(data, index));
/** @ignore */const epochMillisecondsLongToDate = (data: Int32Array, index: number) => epochMillisecondsToDate(epochMillisecondsLongToMs(data, index));

/** @ignore */
const getNull = <T extends Null>(_data: Data<T>, _index: number): T['TValue'] => null;
Expand All @@ -139,9 +132,9 @@ type Numeric1X = Int8 | Int16 | Int32 | Uint8 | Uint16 | Uint32 | Float32 | Floa
type Numeric2X = Int64 | Uint64;

/** @ignore */
const getDateDay = <T extends DateDay>({ values }: Data<T>, index: number): T['TValue'] => epochDaysToDate(values, index);
const getDateDay = <T extends DateDay>({ values }: Data<T>, index: number): T['TValue'] => epochDaysToMs(values, index);
/** @ignore */
const getDateMillisecond = <T extends DateMillisecond>({ values }: Data<T>, index: number): T['TValue'] => epochMillisecondsLongToDate(values, index * 2);
const getDateMillisecond = <T extends DateMillisecond>({ values }: Data<T>, index: number): T['TValue'] => bigIntToNumber(values[index]);
/** @ignore */
const getNumeric = <T extends Numeric1X>({ stride, values }: Data<T>, index: number): T['TValue'] => values[stride * index];
/** @ignore */
Expand Down Expand Up @@ -178,13 +171,13 @@ const getDate = <T extends Date_>(data: Data<T>, index: number): T['TValue'] =>
);

/** @ignore */
const getTimestampSecond = <T extends TimestampSecond>({ values }: Data<T>, index: number): T['TValue'] => 1000 * epochMillisecondsLongToMs(values, index * 2);
const getTimestampSecond = <T extends TimestampSecond>({ values }: Data<T>, index: number): T['TValue'] => 1000 * bigIntToNumber(values[index]);
/** @ignore */
const getTimestampMillisecond = <T extends TimestampMillisecond>({ values }: Data<T>, index: number): T['TValue'] => epochMillisecondsLongToMs(values, index * 2);
const getTimestampMillisecond = <T extends TimestampMillisecond>({ values }: Data<T>, index: number): T['TValue'] => bigIntToNumber(values[index]);
/** @ignore */
const getTimestampMicrosecond = <T extends TimestampMicrosecond>({ values }: Data<T>, index: number): T['TValue'] => epochMicrosecondsLongToMs(values, index * 2);
const getTimestampMicrosecond = <T extends TimestampMicrosecond>({ values }: Data<T>, index: number): T['TValue'] => divideBigInts(values[index], 1000n);
/** @ignore */
const getTimestampNanosecond = <T extends TimestampNanosecond>({ values }: Data<T>, index: number): T['TValue'] => epochNanosecondsLongToMs(values, index * 2);
const getTimestampNanosecond = <T extends TimestampNanosecond>({ values }: Data<T>, index: number): T['TValue'] => divideBigInts(values[index], 1000000n);
/* istanbul ignore next */
/** @ignore */
const getTimestamp = <T extends Timestamp>(data: Data<T>, index: number): T['TValue'] => {
Expand Down
9 changes: 5 additions & 4 deletions js/src/visitor/iterator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,11 @@ function vectorIterator<T extends DataType>(vector: Vector<T>): IterableIterator

// Fast case, defer to native iterators if possible
if (vector.nullCount === 0 && vector.stride === 1 && (
(type.typeId === Type.Timestamp) ||
(type instanceof Int && (type as Int).bitWidth !== 64) ||
(type instanceof Time && (type as Time).bitWidth !== 64) ||
(type instanceof Float && (type as Float).precision !== Precision.HALF)
// Don't defer to native iterator for timestamps since Numbers are expected
// (DataType.isTimestamp(type)) && type.unit === TimeUnit.MILLISECOND ||
(DataType.isInt(type) && type.bitWidth !== 64) ||
(DataType.isTime(type) && type.bitWidth !== 64) ||
(DataType.isFloat(type) && type.precision !== Precision.HALF)
)) {
return new ChunkedIterator(vector.data.length, (chunkIndex) => {
const data = vector.data[chunkIndex];
Expand Down
25 changes: 5 additions & 20 deletions js/src/visitor/set.ts
Original file line number Diff line number Diff line change
Expand Up @@ -109,21 +109,6 @@ function wrapSet<T extends DataType>(fn: (data: Data<T>, _1: any, _2: any) => vo

/** @ignore */
export const setEpochMsToDays = (data: Int32Array, index: number, epochMs: number) => { data[index] = Math.floor(epochMs / 86400000); };
/** @ignore */
export const setEpochMsToMillisecondsLong = (data: Int32Array, index: number, epochMs: number) => {
data[index] = Math.floor(epochMs % 4294967296);
data[index + 1] = Math.floor(epochMs / 4294967296);
};
/** @ignore */
export const setEpochMsToMicrosecondsLong = (data: Int32Array, index: number, epochMs: number) => {
data[index] = Math.floor((epochMs * 1000) % 4294967296);
data[index + 1] = Math.floor((epochMs * 1000) / 4294967296);
};
/** @ignore */
export const setEpochMsToNanosecondsLong = (data: Int32Array, index: number, epochMs: number) => {
data[index] = Math.floor((epochMs * 1000000) % 4294967296);
data[index + 1] = Math.floor((epochMs * 1000000) / 4294967296);
};

/** @ignore */
export const setVariableWidthBytes = <T extends Int32Array | BigInt64Array>(values: Uint8Array, valueOffsets: T, index: number, value: Uint8Array) => {
Expand Down Expand Up @@ -161,7 +146,7 @@ export const setAnyFloat = <T extends Float>(data: Data<T>, index: number, value
/** @ignore */
export const setDateDay = <T extends DateDay>({ values }: Data<T>, index: number, value: T['TValue']): void => { setEpochMsToDays(values, index, value.valueOf()); };
/** @ignore */
export const setDateMillisecond = <T extends DateMillisecond>({ values }: Data<T>, index: number, value: T['TValue']): void => { setEpochMsToMillisecondsLong(values, index * 2, value.valueOf()); };
export const setDateMillisecond = <T extends DateMillisecond>({ values }: Data<T>, index: number, value: T['TValue']): void => { values[index] = BigInt(value); };
/** @ignore */
export const setFixedSizeBinary = <T extends FixedSizeBinary>({ stride, values }: Data<T>, index: number, value: T['TValue']): void => { values.set(value.subarray(0, stride), stride * index); };

Expand All @@ -178,13 +163,13 @@ export const setDate = <T extends Date_>(data: Data<T>, index: number, value: T[
};

/** @ignore */
export const setTimestampSecond = <T extends TimestampSecond>({ values }: Data<T>, index: number, value: T['TValue']): void => setEpochMsToMillisecondsLong(values, index * 2, value / 1000);
export const setTimestampSecond = <T extends TimestampSecond>({ values }: Data<T>, index: number, value: T['TValue']): void => { values[index] = BigInt(value / 1000); };
/** @ignore */
export const setTimestampMillisecond = <T extends TimestampMillisecond>({ values }: Data<T>, index: number, value: T['TValue']): void => setEpochMsToMillisecondsLong(values, index * 2, value);
export const setTimestampMillisecond = <T extends TimestampMillisecond>({ values }: Data<T>, index: number, value: T['TValue']): void => { values[index] = BigInt(value); };
/** @ignore */
export const setTimestampMicrosecond = <T extends TimestampMicrosecond>({ values }: Data<T>, index: number, value: T['TValue']): void => setEpochMsToMicrosecondsLong(values, index * 2, value);
export const setTimestampMicrosecond = <T extends TimestampMicrosecond>({ values }: Data<T>, index: number, value: T['TValue']): void => { values[index] = BigInt(value * 1000); };
/** @ignore */
export const setTimestampNanosecond = <T extends TimestampNanosecond>({ values }: Data<T>, index: number, value: T['TValue']): void => setEpochMsToNanosecondsLong(values, index * 2, value);
export const setTimestampNanosecond = <T extends TimestampNanosecond>({ values }: Data<T>, index: number, value: T['TValue']): void => { values[index] = BigInt(value * 1000000); };
/* istanbul ignore next */
/** @ignore */
export const setTimestamp = <T extends Timestamp>(data: Data<T>, index: number, value: T['TValue']): void => {
Expand Down
22 changes: 5 additions & 17 deletions js/test/generate-test-data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -402,10 +402,7 @@ function generateDate<T extends Date_>(this: TestDataVectorGenerator, type: T, l
const data = type.unit === DateUnit.DAY
? createDate32(length, nullBitmap, values)
: createDate64(length, nullBitmap, values);
return {
values: () => values.map((x) => x == null ? null : new Date(x)),
vector: new Vector([makeData({ type, length, nullCount, nullBitmap, data })])
};
return { values: () => values, vector: new Vector([makeData({ type, length, nullCount, nullBitmap, data })]) };
}

function generateTimestamp<T extends Timestamp>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = Math.trunc(length * 0.2)): GeneratedVector<T> {
Expand Down Expand Up @@ -750,32 +747,23 @@ function createDate32(length: number, nullBitmap: Uint8Array, values: (number |
}

function createDate64(length: number, nullBitmap: Uint8Array, values: (number | null)[] = []) {
const data = new Int32Array(length * 2).fill(0);
const data = new BigInt64Array(length).fill(0n);
const data32 = createDate32(length, nullBitmap, values);
iterateBitmap(length, nullBitmap, (i, valid) => {
if (valid) {
const value = data32[i] * 86400000;
const hi = Math.trunc(value / 4294967296);
const lo = Math.trunc(value - 4294967296 * hi);
values[i] = value;
data[i * 2 + 0] = lo;
data[i * 2 + 1] = hi;
data[i] = BigInt(data32[i] * 86400000);
}
});
return data;
}

function createTimestamp(length: number, nullBitmap: Uint8Array, multiple: number, values: (number | null)[] = []) {
const mult = 86400 * multiple;
const data = new Int32Array(length * 2).fill(0);
const data = new BigInt64Array(length).fill(0n);
const data32 = createDate32(length, nullBitmap, values);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bit weird, don't you want to create timestamp data that does not represent whole days?

Copy link
Member Author

@domoritz domoritz Apr 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch. I rewrote the timestamp generation logic to generate meaningful timestamps.

iterateBitmap(length, nullBitmap, (i, valid) => {
if (valid) {
const value = data32[i] * mult;
const hi = Math.trunc(value / 4294967296);
const lo = Math.trunc(value - 4294967296 * hi);
data[i * 2 + 0] = lo;
data[i * 2 + 1] = hi;
data[i] = BigInt(data32[i] * mult);
}
});
return data;
Expand Down
20 changes: 9 additions & 11 deletions js/test/unit/builders/date-tests.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,8 @@

import 'web-streams-polyfill';
import {
date32sNoNulls,
date32sWithNulls,
date64sNoNulls,
date64sWithNulls,
dateNoNulls,
dateWithNulls,
encodeAll,
encodeEach,
encodeEachDOM,
Expand All @@ -41,14 +39,14 @@ describe('DateDayBuilder', () => {
testDOMStreams && runTestsWithEncoder('encodeEachDOM: 25', encodeEachDOM(() => new DateDay(), 25));
testNodeStreams && runTestsWithEncoder('encodeEachNode: 25', encodeEachNode(() => new DateDay(), 25));

function runTestsWithEncoder(name: string, encode: (vals: (Date | null)[], nullVals?: any[]) => Promise<Vector<DateDay>>) {
function runTestsWithEncoder(name: string, encode: (vals: (number | null)[], nullVals?: any[]) => Promise<Vector<DateDay>>) {
describe(`${encode.name} ${name}`, () => {
it(`encodes dates no nulls`, async () => {
const vals = date32sNoNulls(20);
const vals = dateNoNulls(20);
validateVector(vals, await encode(vals, []), []);
});
it(`encodes dates with nulls`, async () => {
const vals = date32sWithNulls(20);
const vals = dateWithNulls(20);
validateVector(vals, await encode(vals, [null]), [null]);
});
});
Expand All @@ -63,14 +61,14 @@ describe('DateMillisecondBuilder', () => {
testDOMStreams && runTestsWithEncoder('encodeEachDOM: 25', encodeEachDOM(() => new DateMillisecond(), 25));
testNodeStreams && runTestsWithEncoder('encodeEachNode: 25', encodeEachNode(() => new DateMillisecond(), 25));

function runTestsWithEncoder(name: string, encode: (vals: (Date | null)[], nullVals?: any[]) => Promise<Vector<DateMillisecond>>) {
function runTestsWithEncoder(name: string, encode: (vals: (number | null)[], nullVals?: any[]) => Promise<Vector<DateMillisecond>>) {
describe(`${encode.name} ${name}`, () => {
it(`encodes dates no nulls`, async () => {
const vals = date64sNoNulls(20);
const vals = dateNoNulls(20);
validateVector(vals, await encode(vals, []), []);
});
it(`encodes dates with nulls`, async () => {
const vals = date64sWithNulls(20);
const vals = dateWithNulls(20);
validateVector(vals, await encode(vals, [null]), [null]);
});
});
Expand Down Expand Up @@ -100,7 +98,7 @@ describe('DateMillisecondBuilder with nulls', () => {
'2019-03-10T21:15:32.237Z',
'2019-03-21T07:25:34.864Z',
null
].map((x) => x === null ? x : new Date(x));
].map((x) => x === null ? x : new Date(x).getTime());
it(`encodes dates with nulls`, async () => {
const vals = dates.slice();
validateVector(vals, await encode(vals, [null]), [null]);
Expand Down
Loading