Skip to content

Commit 54d4f5b

Browse files
committed
lazily allocate table and recordbatch columns, support NestedView's getChildAt(i) method in ChunkedView
1 parent 40b3638 commit 54d4f5b

File tree

8 files changed

+50
-45
lines changed

8 files changed

+50
-45
lines changed

js/src/predicate.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ export class Col<T= any> extends Value<T> {
6161
}
6262
if (this.colidx < 0) { throw new Error(`Failed to bind Col "${this.name}"`); }
6363
}
64-
this.vector = batch.columns[this.colidx];
64+
this.vector = batch.getChildAt(this.colidx);
6565
return this.vector.get.bind(this.vector);
6666
}
6767

js/src/recordbatch.ts

+2-7
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import { Schema, Struct } from './type';
1919
import { flatbuffers } from 'flatbuffers';
2020
import { View, Vector, StructVector } from './vector';
21-
import { Data, NestedData, ChunkedData } from './data';
21+
import { Data, NestedData } from './data';
2222

2323
import Long = flatbuffers.Long;
2424

@@ -32,7 +32,6 @@ export class RecordBatch extends StructVector {
3232
public readonly schema: Schema;
3333
public readonly length: number;
3434
public readonly numCols: number;
35-
public readonly columns: Vector<any>[];
3635
constructor(schema: Schema, data: Data<Struct>, view: View<Struct>);
3736
constructor(schema: Schema, numRows: Long | number, cols: Data<any> | Vector[]);
3837
constructor(...args: any[]) {
@@ -42,9 +41,6 @@ export class RecordBatch extends StructVector {
4241
this.schema = args[0];
4342
this.length = data.length;
4443
this.numCols = this.schema.fields.length;
45-
this.columns = data instanceof ChunkedData
46-
? data.childVectors
47-
: data.childData.map((col) => Vector.create(col));
4844
} else {
4945
const [schema, numRows, cols] = args;
5046
const columns: Vector<any>[] = new Array(cols.length);
@@ -59,7 +55,6 @@ export class RecordBatch extends StructVector {
5955
}
6056
super(new NestedData(new Struct(schema.fields), numRows, null, columnsData));
6157
this.schema = schema;
62-
this.columns = columns;
6358
this.length = numRows;
6459
this.numCols = schema.fields.length;
6560
}
@@ -72,7 +67,7 @@ export class RecordBatch extends StructVector {
7267
const namesToKeep = columnNames.reduce((xs, x) => (xs[x] = true) && xs, Object.create(null));
7368
return new RecordBatch(
7469
this.schema.select(...columnNames), this.length,
75-
this.columns.filter((_, index) => namesToKeep[fields[index].name])
70+
this.childData.filter((_, index) => namesToKeep[fields[index].name])
7671
);
7772
}
7873
}

js/src/table.ts

+6-7
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ export class Table implements DataFrame {
6868
// List of inner RecordBatches
6969
public readonly batches: RecordBatch[];
7070
// List of inner Vectors, possibly spanning batches
71-
public readonly columns: Vector<any>[];
71+
protected readonly _columns: Vector<any>[] = [];
7272
// Union of all inner RecordBatches into one RecordBatch, possibly chunked.
7373
// If the Table has just one inner RecordBatch, this points to that.
7474
// If the Table has multiple inner RecordBatches, then this is a Chunked view
@@ -94,10 +94,7 @@ export class Table implements DataFrame {
9494
this.schema = schema;
9595
this.batches = batches;
9696
this.batchesUnion = batches.reduce((union, batch) => union.concat(batch));
97-
this.columns = batches.slice(1).reduce((columns, batch) =>
98-
columns.map((col, idx) => col.concat(batch.columns[idx])),
99-
batches[0].columns
100-
);
97+
// this.columns = schema.fields.map((_, i) => this.batchesUnion.getChildAt(i));
10198
this.length = this.batchesUnion.length;
10299
this.numCols = this.batchesUnion.numCols;
103100
}
@@ -108,7 +105,8 @@ export class Table implements DataFrame {
108105
return this.getColumnAt(this.getColumnIndex(name));
109106
}
110107
public getColumnAt(index: number) {
111-
return this.columns[index];
108+
return this._columns[index] || (
109+
this._columns[index] = this.batchesUnion.getChildAt(index));
112110
}
113111
public getColumnIndex(name: string) {
114112
return this.schema.fields.findIndex((f) => f.name === name);
@@ -265,7 +263,8 @@ export class CountByResult extends Table implements DataFrame {
265263
));
266264
}
267265
public toJSON(): Object {
268-
const [values, counts] = this.columns;
266+
const values = this.getColumnAt(0);
267+
const counts = this.getColumnAt(1);
269268
const result = {} as { [k: string]: number | null };
270269
for (let i = -1; ++i < this.length;) {
271270
result[values.get(i)] = counts.get(i);

js/src/vector.ts

+5-2
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ export class Vector<T extends DataType = any> implements VectorLike, View<T>, Vi
3535
public static create<T extends DataType>(data: Data<T>): Vector<T> {
3636
return createVector(data);
3737
}
38+
public static concat<T extends DataType>(...sources: Vector<T>[]): Vector<T> {
39+
return sources.length === 1 ? sources[0] : sources.reduce((a, b) => a.concat(b));
40+
}
3841
public type: T;
3942
public length: number;
4043
public readonly data: Data<T>;
@@ -84,7 +87,7 @@ export class Vector<T extends DataType = any> implements VectorLike, View<T>, Vi
8487
const { view } = this;
8588
const vecs = !(view instanceof ChunkedView)
8689
? [this, ...others]
87-
: [...view.chunks, ...others];
90+
: [...view.childVectors, ...others];
8891
const offsets = ChunkedData.computeOffsets(vecs);
8992
const chunksLength = offsets[offsets.length - 1];
9093
const chunkedData = new ChunkedData(this.type, chunksLength, vecs, 0, -1, offsets);
@@ -377,7 +380,7 @@ export class DictionaryVector<T extends DataType = DataType> extends Vector<Dict
377380
this.indicies = view.indicies;
378381
this.dictionary = data.dictionary;
379382
} else if (data instanceof ChunkedData && view instanceof ChunkedView) {
380-
const chunks = view.chunks as DictionaryVector<T>[];
383+
const chunks = view.childVectors as DictionaryVector<T>[];
381384
// Assume the last chunk's dictionary data is the most up-to-date,
382385
// including data from DictionaryBatches that were marked as deltas
383386
this.dictionary = chunks[chunks.length - 1].dictionary;

js/src/vector/chunked.ts

+19-13
Original file line numberDiff line numberDiff line change
@@ -16,31 +16,37 @@
1616
// under the License.
1717

1818
import { ChunkedData } from '../data';
19-
import { View, Vector } from '../vector';
19+
import { View, Vector, NestedVector } from '../vector';
2020
import { DataType, TypedArray, IterableArrayLike } from '../type';
2121

2222
export class ChunkedView<T extends DataType> implements View<T> {
23-
public chunks: Vector<T>[];
24-
public offsets: Uint32Array;
23+
public childVectors: Vector<T>[];
24+
public childOffsets: Uint32Array;
25+
protected _childColumns: Vector<any>[];
2526
constructor(data: ChunkedData<T>) {
26-
this.chunks = data.childVectors;
27-
this.offsets = data.childOffsets;
27+
this.childVectors = data.childVectors;
28+
this.childOffsets = data.childOffsets;
2829
}
2930
public clone(data: ChunkedData<T>): this {
3031
return new ChunkedView(data) as this;
3132
}
3233
public *[Symbol.iterator](): IterableIterator<T['TValue'] | null> {
33-
for (const vector of this.chunks) {
34+
for (const vector of this.childVectors) {
3435
yield* vector;
3536
}
3637
}
38+
public getChildAt<R extends DataType = DataType>(index: number) {
39+
return (this._childColumns || (this._childColumns = []))[index] || (
40+
this._childColumns[index] = Vector.concat<R>(
41+
...(<any> this.childVectors as NestedVector<any>[]).map((v) => v.getChildAt(index))));
42+
}
3743
public isValid(index: number): boolean {
3844
// binary search to find the child vector and value index offset (inlined for speed)
39-
let offsets = this.offsets, pos = 0;
45+
let offsets = this.childOffsets, pos = 0;
4046
let lhs = 0, mid = 0, rhs = offsets.length - 1;
4147
while (index < offsets[rhs] && index >= (pos = offsets[lhs])) {
4248
if (lhs + 1 === rhs) {
43-
return this.chunks[lhs].isValid(index - pos);
49+
return this.childVectors[lhs].isValid(index - pos);
4450
}
4551
mid = lhs + ((rhs - lhs) / 2) | 0;
4652
index >= offsets[mid] ? (lhs = mid) : (rhs = mid);
@@ -49,11 +55,11 @@ export class ChunkedView<T extends DataType> implements View<T> {
4955
}
5056
public get(index: number): T['TValue'] | null {
5157
// binary search to find the child vector and value index offset (inlined for speed)
52-
let offsets = this.offsets, pos = 0;
58+
let offsets = this.childOffsets, pos = 0;
5359
let lhs = 0, mid = 0, rhs = offsets.length - 1;
5460
while (index < offsets[rhs] && index >= (pos = offsets[lhs])) {
5561
if (lhs + 1 === rhs) {
56-
return this.chunks[lhs].get(index - pos);
62+
return this.childVectors[lhs].get(index - pos);
5763
}
5864
mid = lhs + ((rhs - lhs) / 2) | 0;
5965
index >= offsets[mid] ? (lhs = mid) : (rhs = mid);
@@ -62,18 +68,18 @@ export class ChunkedView<T extends DataType> implements View<T> {
6268
}
6369
public set(index: number, value: T['TValue'] | null): void {
6470
// binary search to find the child vector and value index offset (inlined for speed)
65-
let offsets = this.offsets, pos = 0;
71+
let offsets = this.childOffsets, pos = 0;
6672
let lhs = 0, mid = 0, rhs = offsets.length - 1;
6773
while (index < offsets[rhs] && index >= (pos = offsets[lhs])) {
6874
if (lhs + 1 === rhs) {
69-
return this.chunks[lhs].set(index - pos, value);
75+
return this.childVectors[lhs].set(index - pos, value);
7076
}
7177
mid = lhs + ((rhs - lhs) / 2) | 0;
7278
index >= offsets[mid] ? (lhs = mid) : (rhs = mid);
7379
}
7480
}
7581
public toArray(): IterableArrayLike<T['TValue'] | null> {
76-
const chunks = this.chunks;
82+
const chunks = this.childVectors;
7783
const numChunks = chunks.length;
7884
if (numChunks === 1) {
7985
return chunks[0].toArray();

js/src/vector/nested.ts

+9-9
Original file line numberDiff line numberDiff line change
@@ -16,23 +16,23 @@
1616
// under the License.
1717

1818
import { Data } from '../data';
19+
import { View, Vector } from '../vector';
1920
import { IterableArrayLike } from '../type';
20-
import { View, Vector, createVector } from '../vector';
2121
import { DataType, NestedType, DenseUnion, SparseUnion, Struct, Map_ } from '../type';
2222

2323
export abstract class NestedView<T extends NestedType> implements View<T> {
2424
public length: number;
2525
public numChildren: number;
2626
public childData: Data<any>[];
27-
protected children: Vector<any>[];
27+
protected _childColumns: Vector<any>[];
2828
constructor(data: Data<T>, children?: Vector<any>[]) {
2929
this.length = data.length;
3030
this.childData = data.childData;
3131
this.numChildren = data.childData.length;
32-
this.children = children || new Array(this.numChildren);
32+
this._childColumns = children || new Array(this.numChildren);
3333
}
3434
public clone(data: Data<T>): this {
35-
return new (<any> this.constructor)(data, this.children) as this;
35+
return new (<any> this.constructor)(data, this._childColumns) as this;
3636
}
3737
public isValid(): boolean {
3838
return true;
@@ -53,8 +53,8 @@ export abstract class NestedView<T extends NestedType> implements View<T> {
5353
protected abstract getNested(self: NestedView<T>, index: number): T['TValue'];
5454
protected abstract setNested(self: NestedView<T>, index: number, value: T['TValue']): void;
5555
public getChildAt<R extends DataType = DataType>(index: number) {
56-
return this.children[index] || (
57-
this.children[index] = createVector<R>(this.childData[index]));
56+
return this._childColumns[index] || (
57+
this._childColumns[index] = Vector.create<R>(this.childData[index]));
5858
}
5959
public *[Symbol.iterator](): IterableIterator<T['TValue']> {
6060
const get = this.getNested;
@@ -120,7 +120,7 @@ export class DenseUnionView extends UnionView<DenseUnion> {
120120

121121
export class StructView extends NestedView<Struct> {
122122
protected getNested(self: StructView, index: number) {
123-
return new RowView(self as any, self.children, index);
123+
return new RowView(self as any, self._childColumns, index);
124124
}
125125
protected setNested(self: StructView, index: number, value: any): void {
126126
let idx = -1, len = self.numChildren;
@@ -140,7 +140,7 @@ export class MapView extends NestedView<Map_> {
140140
(xs[x.name] = i) && xs || xs, Object.create(null));
141141
}
142142
protected getNested(self: MapView, index: number) {
143-
return new MapRowView(self as any, self.children, index);
143+
return new MapRowView(self as any, self._childColumns, index);
144144
}
145145
protected setNested(self: MapView, index: number, value: { [k: string]: any }): void {
146146
const typeIds = self.typeIds as any;
@@ -160,7 +160,7 @@ export class RowView extends UnionView<SparseUnion> {
160160
this.length = data.numChildren;
161161
}
162162
public clone(data: Data<SparseUnion> & NestedView<any>): this {
163-
return new (<any> this.constructor)(data, this.children, this.rowIndex) as this;
163+
return new (<any> this.constructor)(data, this._childColumns, this.rowIndex) as this;
164164
}
165165
protected getChildValue(self: RowView, index: number, _typeIds: any, _valueOffsets?: any): any | null {
166166
const child = self.getChildAt(index);

js/test/integration/validate-tests.ts

+4-4
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,8 @@ function testReaderIntegration(jsonData: any, arrowBuffer: Uint8Array) {
132132
expect(jsonRecordBatch.length).toEqual(binaryRecordBatch.length);
133133
expect(jsonRecordBatch.numCols).toEqual(binaryRecordBatch.numCols);
134134
for (let i = -1, n = jsonRecordBatch.numCols; ++i < n;) {
135-
(jsonRecordBatch.columns[i] as any).name = jsonRecordBatch.schema.fields[i].name;
136-
(expect(jsonRecordBatch.columns[i]) as any).toEqualVector(binaryRecordBatch.columns[i]);
135+
(jsonRecordBatch.getChildAt(i) as any).name = jsonRecordBatch.schema.fields[i].name;
136+
(expect(jsonRecordBatch.getChildAt(i)) as any).toEqualVector(binaryRecordBatch.getChildAt(i));
137137
}
138138
}
139139
});
@@ -147,8 +147,8 @@ function testTableFromBuffersIntegration(jsonData: any, arrowBuffer: Uint8Array)
147147
expect(jsonTable.length).toEqual(binaryTable.length);
148148
expect(jsonTable.numCols).toEqual(binaryTable.numCols);
149149
for (let i = -1, n = jsonTable.numCols; ++i < n;) {
150-
(jsonTable.columns[i] as any).name = jsonTable.schema.fields[i].name;
151-
(expect(jsonTable.columns[i]) as any).toEqualVector(binaryTable.columns[i]);
150+
(jsonTable.getColumnAt(i) as any).name = jsonTable.schema.fields[i].name;
151+
(expect(jsonTable.getColumnAt(i)) as any).toEqualVector(binaryTable.getColumnAt(i));
152152
}
153153
});
154154
}

js/test/unit/table-tests.ts

+4-2
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,8 @@ describe(`Table`, () => {
143143
test(`scans expected values`, () => {
144144
let expected_idx = 0;
145145
table.scan((idx, batch) => {
146-
expect(batch.columns.map((c) => c.get(idx))).toEqual(values[expected_idx++]);
146+
const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i));
147+
expect(columns.map((c) => c.get(idx))).toEqual(values[expected_idx++]);
147148
});
148149
});
149150
test(`count() returns the correct length`, () => {
@@ -348,7 +349,8 @@ describe(`Table`, () => {
348349
test(`scans expected values`, () => {
349350
let expected_idx = 0;
350351
table.scan((idx, batch) => {
351-
expect(batch.columns.map((c) => c.get(idx))).toEqual(values[expected_idx++]);
352+
const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i));
353+
expect(columns.map((c) => c.get(idx))).toEqual(values[expected_idx++]);
352354
});
353355
});
354356
test(`count() returns the correct length`, () => {

0 commit comments

Comments
 (0)