16
16
// under the License.
17
17
18
18
// Use the ES5 UMD target as perf baseline
19
- // const { Table, readVectors } = require('../targets/es5/umd');
20
- // const { Table, readVectors } = require('../targets/es5/cjs');
21
- const { Table, readVectors } = require ( '../targets/es2015/umd' ) ;
22
- // const { Table, readVectors } = require('../targets/es2015/cjs');
19
+ // const { DataFrame, Table, readVectors } = require('../targets/es5/umd');
20
+ // const { DataFrame, Table, readVectors } = require('../targets/es5/cjs');
21
+ // const { DataFrame, Table, readVectors } = require('../targets/es2015/umd');
22
+ const { DataFrame , Table, readVectors } = require ( '../targets/es2015/cjs' ) ;
23
23
24
24
const config = require ( './config' ) ;
25
25
const Benchmark = require ( 'benchmark' ) ;
26
26
27
27
const suites = [ ] ;
28
28
29
- for ( let { name, buffers} of config ) {
30
- const parseSuite = new Benchmark . Suite ( `Parse ${ name } ` , { async : true } ) ;
31
- const sliceSuite = new Benchmark . Suite ( `Slice ${ name } vectors` , { async : true } ) ;
32
- const iterateSuite = new Benchmark . Suite ( `Iterate ${ name } vectors` , { async : true } ) ;
33
- const getByIndexSuite = new Benchmark . Suite ( `Get ${ name } values by index` , { async : true } ) ;
34
- parseSuite . add ( createFromTableTest ( name , buffers ) ) ;
35
- parseSuite . add ( createReadVectorsTest ( name , buffers ) ) ;
36
- for ( const vector of Table . from ( buffers ) . columns ) {
37
- sliceSuite . add ( createSliceTest ( vector ) ) ;
38
- iterateSuite . add ( createIterateTest ( vector ) ) ;
39
- getByIndexSuite . add ( createGetByIndexTest ( vector ) ) ;
40
- }
41
- suites . push ( getByIndexSuite , iterateSuite , sliceSuite , parseSuite ) ;
42
- }
29
+ // for (let { name, buffers} of config) {
30
+ // const parseSuite = new Benchmark.Suite(`Parse " ${name}" `, { async: true });
31
+ // const sliceSuite = new Benchmark.Suite(`Slice " ${name}" vectors`, { async: true });
32
+ // const iterateSuite = new Benchmark.Suite(`Iterate " ${name}" vectors`, { async: true });
33
+ // const getByIndexSuite = new Benchmark.Suite(`Get " ${name}" values by index`, { async: true });
34
+ // parseSuite.add(createFromTableTest(name, buffers));
35
+ // parseSuite.add(createReadVectorsTest(name, buffers));
36
+ // for (const vector of Table.from(buffers).columns) {
37
+ // sliceSuite.add(createSliceTest(vector));
38
+ // iterateSuite.add(createIterateTest(vector));
39
+ // getByIndexSuite.add(createGetByIndexTest(vector));
40
+ // }
41
+ // suites.push(getByIndexSuite, iterateSuite, sliceSuite, parseSuite);
42
+ // }
43
43
44
44
for ( let { name, buffers, tests} of require ( './table_config' ) ) {
45
- const tableIterateSuite = new Benchmark . Suite ( `Table Iterate ${ name } ` , { async : true } ) ;
46
- const tableCountBySuite = new Benchmark . Suite ( `Table Count By ${ name } ` , { async : true } ) ;
47
- const vectorCountBySuite = new Benchmark . Suite ( `Vector Count By ${ name } ` , { async : true } ) ;
45
+ const tableIteratorSuite = new Benchmark . Suite ( `Table Iterator "${ name } "` , { async : true } ) ;
46
+ const tableCountSuite = new Benchmark . Suite ( `Table Count "${ name } "` , { async : true } ) ;
47
+ const dfIteratorSuite = new Benchmark . Suite ( `DataFrame Iterator "${ name } "` , { async : true } ) ;
48
+ const dfIteratorCountSuite = new Benchmark . Suite ( `DataFrame Iterator Count "${ name } "` , { async : true } ) ;
49
+ const dfDirectCountSuite = new Benchmark . Suite ( `DataFrame Direct Count "${ name } "` , { async : true } ) ;
50
+ const dfScanCountSuite = new Benchmark . Suite ( `DataFrame Scan Count "${ name } "` , { async : true } ) ;
51
+ const vectorCountSuite = new Benchmark . Suite ( `Vector Count "${ name } "` , { async : true } ) ;
48
52
const table = Table . from ( buffers ) ;
49
53
50
- tableIterateSuite . add ( createTableIterateTest ( table ) ) ;
54
+ tableIteratorSuite . add ( createTableIteratorTest ( table ) ) ;
55
+ dfIteratorSuite . add ( createDataFrameIteratorTest ( table ) ) ;
51
56
for ( test of tests ) {
52
- tableCountBySuite . add ( createTableCountByTest ( table , test . col , test . test , test . value ) )
53
- vectorCountBySuite . add ( createVectorCountByTest ( table . columns [ test . col ] , test . test , test . value ) )
57
+ tableCountSuite . add ( createTableCountTest ( table , test . col , test . test , test . value ) )
58
+ dfIteratorCountSuite . add ( createDataFrameIteratorCountTest ( table , test . col , test . test , test . value ) )
59
+ dfDirectCountSuite . add ( createDataFrameDirectCountTest ( table , test . col , test . test , test . value ) )
60
+ dfScanCountSuite . add ( createDataFrameScanCountTest ( table , test . col , test . test , test . value ) )
61
+ vectorCountSuite . add ( createVectorCountTest ( table . columns [ test . col ] , test . test , test . value ) )
54
62
}
55
63
56
- suites . push ( tableIterateSuite , tableCountBySuite , vectorCountBySuite )
64
+ suites . push ( tableIteratorSuite , tableCountSuite , dfIteratorSuite , dfIteratorCountSuite , dfDirectCountSuite , dfScanCountSuite , vectorCountSuite )
57
65
}
58
66
59
67
console . log ( 'Running apache-arrow performance tests...\n' ) ;
@@ -125,7 +133,7 @@ function createGetByIndexTest(vector) {
125
133
} ;
126
134
}
127
135
128
- function createVectorCountByTest ( vector , test , value ) {
136
+ function createVectorCountTest ( vector , test , value ) {
129
137
let op ;
130
138
if ( test == 'gteq' ) {
131
139
op = function ( ) {
@@ -152,7 +160,7 @@ function createVectorCountByTest(vector, test, value) {
152
160
} ;
153
161
}
154
162
155
- function createTableIterateTest ( table ) {
163
+ function createTableIteratorTest ( table ) {
156
164
let row ;
157
165
return {
158
166
async : true ,
@@ -161,7 +169,7 @@ function createTableIterateTest(table) {
161
169
} ;
162
170
}
163
171
164
- function createTableCountByTest ( table , column , test , value ) {
172
+ function createTableCountTest ( table , column , test , value ) {
165
173
let op ;
166
174
if ( test == 'gteq' ) {
167
175
op = function ( ) {
@@ -187,3 +195,110 @@ function createTableCountByTest(table, column, test, value) {
187
195
fn : op
188
196
} ;
189
197
}
198
+
199
+ function createDataFrameIteratorTest ( table ) {
200
+ let df = DataFrame . from ( table ) ;
201
+ let idx ;
202
+ return {
203
+ async : true ,
204
+ name : `length: ${ table . length } ` ,
205
+ fn ( ) { for ( idx of table ) { } }
206
+ } ;
207
+ }
208
+
209
+ function createDataFrameDirectCountTest ( table , column , test , value ) {
210
+ let df = DataFrame . from ( table ) ;
211
+
212
+ if ( test == 'gteq' ) {
213
+ op = function ( ) {
214
+ sum = 0 ;
215
+ for ( let batch = - 1 ; ++ batch < df . lengths . length ; ) {
216
+ const length = df . lengths [ batch ] ;
217
+
218
+ // load batches
219
+ const columns = df . getBatch ( batch ) ;
220
+
221
+ // yield all indices
222
+ for ( let idx = - 1 ; ++ idx < length ; ) {
223
+ sum += ( columns [ column ] . get ( idx ) >= value ) ;
224
+ }
225
+ }
226
+ }
227
+ } else if ( test == 'eq' ) {
228
+ op = function ( ) {
229
+ sum = 0 ;
230
+ for ( let batch = - 1 ; ++ batch < df . lengths . length ; ) {
231
+ const length = df . lengths [ batch ] ;
232
+
233
+ // load batches
234
+ const columns = df . getBatch ( batch ) ;
235
+
236
+ // yield all indices
237
+ for ( let idx = - 1 ; ++ idx < length ; ) {
238
+ sum += ( columns [ column ] . get ( idx ) == value ) ;
239
+ }
240
+ }
241
+ }
242
+ } else {
243
+ throw new Error ( `Unrecognized test "${ test } "` ) ;
244
+ }
245
+
246
+ return {
247
+ async : true ,
248
+ name : `name: '${ table . columns [ column ] . name } ', length: ${ table . length } , type: ${ table . columns [ column ] . type } , test: ${ test } , value: ${ value } ` ,
249
+ fn : op
250
+ } ;
251
+ }
252
+
253
+ function createDataFrameScanCountTest ( table , column , test , value ) {
254
+ let df = DataFrame . from ( table ) ;
255
+
256
+ if ( test == 'gteq' ) {
257
+ op = function ( ) {
258
+ sum = 0 ;
259
+ df . scan ( ( idx , cols ) => { sum += cols [ column ] . get ( idx ) >= value } ) ;
260
+ }
261
+ } else if ( test == 'eq' ) {
262
+ op = function ( ) {
263
+ sum = 0 ;
264
+ df . scan ( ( idx , cols ) => { sum += cols [ column ] . get ( idx ) == value } ) ;
265
+ console . log ( sum ) ;
266
+ }
267
+ } else {
268
+ throw new Error ( `Unrecognized test "${ test } "` ) ;
269
+ }
270
+
271
+ return {
272
+ async : true ,
273
+ name : `name: '${ table . columns [ column ] . name } ', length: ${ table . length } , type: ${ table . columns [ column ] . type } , test: ${ test } , value: ${ value } ` ,
274
+ fn : op
275
+ } ;
276
+ }
277
+
278
+ function createDataFrameIteratorCountTest ( table , column , test , value ) {
279
+ let df = DataFrame . from ( table ) ;
280
+
281
+ if ( test == 'gteq' ) {
282
+ op = function ( ) {
283
+ sum = 0 ;
284
+ for ( idx of df ) {
285
+ sum += ( df . columns [ column ] . get ( idx ) >= value ) ;
286
+ }
287
+ }
288
+ } else if ( test == 'eq' ) {
289
+ op = function ( ) {
290
+ sum = 0 ;
291
+ for ( idx of df ) {
292
+ sum += ( df . columns [ column ] . get ( idx ) == value ) ;
293
+ }
294
+ }
295
+ } else {
296
+ throw new Error ( `Unrecognized test "${ test } "` ) ;
297
+ }
298
+
299
+ return {
300
+ async : true ,
301
+ name : `name: '${ table . columns [ column ] . name } ', length: ${ table . length } , type: ${ table . columns [ column ] . type } , test: ${ test } , value: ${ value } ` ,
302
+ fn : op
303
+ } ;
304
+ }
0 commit comments