16
16
// under the License.
17
17
18
18
// Use the ES5 UMD target as perf baseline
19
- // const { Table, readVectors } = require('../targets/es5/umd');
20
- // const { Table, readVectors } = require('../targets/es5/cjs');
21
- const { Table, readVectors } = require ( '../targets/es2015/umd' ) ;
22
- // const { Table, readVectors } = require('../targets/es2015/cjs');
19
+ // const { col, Table, read: readBatches } = require('../targets/es5/umd');
20
+ // const { col, Table, read: readBatches } = require('../targets/es5/cjs');
21
+ // const { col, Table, read: readBatches } = require('../targets/es2015/umd');
22
+ const { col , Table, read : readBatches } = require ( '../targets/es2015/cjs' ) ;
23
23
24
- const config = require ( './config' ) ;
25
24
const Benchmark = require ( 'benchmark' ) ;
26
25
27
26
const suites = [ ] ;
28
27
29
- for ( let { name, buffers} of config ) {
30
- const parseSuite = new Benchmark . Suite ( `Parse ${ name } ` , { async : true } ) ;
31
- const sliceSuite = new Benchmark . Suite ( `Slice ${ name } vectors` , { async : true } ) ;
32
- const iterateSuite = new Benchmark . Suite ( `Iterate ${ name } vectors` , { async : true } ) ;
33
- const getByIndexSuite = new Benchmark . Suite ( `Get ${ name } values by index` , { async : true } ) ;
34
- parseSuite . add ( createFromTableTest ( name , buffers ) ) ;
35
- parseSuite . add ( createReadVectorsTest ( name , buffers ) ) ;
36
- for ( const vector of Table . from ( buffers ) . columns ) {
37
- sliceSuite . add ( createSliceTest ( vector ) ) ;
38
- iterateSuite . add ( createIterateTest ( vector ) ) ;
39
- getByIndexSuite . add ( createGetByIndexTest ( vector ) ) ;
40
- }
41
- suites . push ( getByIndexSuite , iterateSuite , sliceSuite , parseSuite ) ;
28
+ for ( let { name, buffers } of require ( './table_config' ) ) {
29
+ const parseSuiteName = `Parse "${ name } "` ;
30
+ const sliceSuiteName = `Slice "${ name } " vectors` ;
31
+ const iterateSuiteName = `Iterate "${ name } " vectors` ;
32
+ const getByIndexSuiteName = `Get "${ name } " values by index` ;
33
+ const sliceToArraySuiteName = `Slice toArray "${ name } " vectors` ;
34
+ suites . push ( createTestSuite ( parseSuiteName , createFromTableTest ( name , buffers ) ) ) ;
35
+ suites . push ( createTestSuite ( parseSuiteName , createReadBatchesTest ( name , buffers ) ) ) ;
36
+ const table = Table . from ( buffers ) ;
37
+ suites . push ( ...table . columns . map ( ( vector , i ) => createTestSuite ( getByIndexSuiteName , createGetByIndexTest ( vector , table . schema . fields [ i ] . name ) ) ) ) ;
38
+ suites . push ( ...table . columns . map ( ( vector , i ) => createTestSuite ( iterateSuiteName , createIterateTest ( vector , table . schema . fields [ i ] . name ) ) ) ) ;
39
+ suites . push ( ...table . columns . map ( ( vector , i ) => createTestSuite ( sliceToArraySuiteName , createSliceToArrayTest ( vector , table . schema . fields [ i ] . name ) ) ) ) ;
40
+ suites . push ( ...table . columns . map ( ( vector , i ) => createTestSuite ( sliceSuiteName , createSliceTest ( vector , table . schema . fields [ i ] . name ) ) ) ) ;
41
+ }
42
+
43
+ for ( let { name, buffers, countBys, counts} of require ( './table_config' ) ) {
44
+ const table = Table . from ( buffers ) ;
45
+
46
+ const dfCountBySuiteName = `DataFrame Count By "${ name } "` ;
47
+ const dfFilterCountSuiteName = `DataFrame Filter-Scan Count "${ name } "` ;
48
+ const dfDirectCountSuiteName = `DataFrame Direct Count "${ name } "` ;
49
+
50
+ suites . push ( ...countBys . map ( ( countBy ) => createTestSuite ( dfCountBySuiteName , createDataFrameCountByTest ( table , countBy ) ) ) ) ;
51
+ suites . push ( ...counts . map ( ( { col, test, value } ) => createTestSuite ( dfFilterCountSuiteName , createDataFrameFilterCountTest ( table , col , test , value ) ) ) ) ;
52
+ suites . push ( ...counts . map ( ( { col, test, value } ) => createTestSuite ( dfDirectCountSuiteName , createDataFrameDirectCountTest ( table , col , test , value ) ) ) ) ;
42
53
}
43
54
44
55
console . log ( 'Running apache-arrow performance tests...\n' ) ;
@@ -52,7 +63,7 @@ function run() {
52
63
var str = x . toString ( ) ;
53
64
var meanMsPerOp = Math . round ( x . stats . mean * 100000 ) / 100 ;
54
65
var sliceOf60FPS = Math . round ( ( meanMsPerOp / ( 1000 / 60 ) ) * 100000 ) / 1000 ;
55
- return `${ str } ( avg: ${ meanMsPerOp } ms, or ${ sliceOf60FPS } % of a frame @ 60FPS) ${ x . suffix || '' } ` ;
66
+ return `${ str } \n avg: ${ meanMsPerOp } ms\n ${ sliceOf60FPS } % of a frame @ 60FPS ${ x . suffix || '' } ` ;
56
67
} ) . join ( '\n' ) + '\n' ) ;
57
68
if ( suites . length > 0 ) {
58
69
setTimeout ( run , 1000 ) ;
@@ -61,51 +72,141 @@ function run() {
61
72
. run ( { async : true } ) ;
62
73
}
63
74
75
+ function createTestSuite ( name , test ) {
76
+ return new Benchmark . Suite ( name , { async : true } ) . add ( test ) ;
77
+ }
78
+
64
79
function createFromTableTest ( name , buffers ) {
65
80
let table ;
66
81
return {
67
82
async : true ,
68
- name : `Table.from` ,
83
+ name : `Table.from\n ` ,
69
84
fn ( ) { table = Table . from ( buffers ) ; }
70
85
} ;
71
86
}
72
87
73
- function createReadVectorsTest ( name , buffers ) {
74
- let vectors ;
88
+ function createReadBatchesTest ( name , buffers ) {
89
+ let recordBatch ;
75
90
return {
76
91
async : true ,
77
- name : `readVectors ` ,
78
- fn ( ) { for ( vectors of readVectors ( buffers ) ) { } }
92
+ name : `readBatches\n ` ,
93
+ fn ( ) { for ( recordBatch of readBatches ( buffers ) ) { } }
79
94
} ;
80
95
}
81
96
82
- function createSliceTest ( vector ) {
97
+ function createSliceTest ( vector , name ) {
83
98
let xs ;
84
99
return {
85
100
async : true ,
86
- name : `name: '${ vector . name } ', length: ${ vector . length } , type: ${ vector . type } ` ,
101
+ name : `name: '${ name } ', length: ${ vector . length } , type: ${ vector . type } \n ` ,
87
102
fn ( ) { xs = vector . slice ( ) ; }
88
103
} ;
89
104
}
90
105
91
- function createIterateTest ( vector ) {
106
+ function createSliceToArrayTest ( vector , name ) {
107
+ let xs ;
108
+ return {
109
+ async : true ,
110
+ name : `name: '${ name } ', length: ${ vector . length } , type: ${ vector . type } \n` ,
111
+ fn ( ) { xs = vector . slice ( ) . toArray ( ) ; }
112
+ } ;
113
+ }
114
+
115
+ function createIterateTest ( vector , name ) {
92
116
let value ;
93
117
return {
94
118
async : true ,
95
- name : `name: '${ vector . name } ', length: ${ vector . length } , type: ${ vector . type } ` ,
119
+ name : `name: '${ name } ', length: ${ vector . length } , type: ${ vector . type } \n ` ,
96
120
fn ( ) { for ( value of vector ) { } }
97
121
} ;
98
122
}
99
123
100
- function createGetByIndexTest ( vector ) {
124
+ function createGetByIndexTest ( vector , name ) {
101
125
let value ;
102
126
return {
103
127
async : true ,
104
- name : `name: '${ vector . name } ', length: ${ vector . length } , type: ${ vector . type } ` ,
128
+ name : `name: '${ name } ', length: ${ vector . length } , type: ${ vector . type } \n ` ,
105
129
fn ( ) {
106
130
for ( let i = - 1 , n = vector . length ; ++ i < n ; ) {
107
131
value = vector . get ( i ) ;
108
132
}
109
133
}
110
134
} ;
111
135
}
136
+
137
+ function createDataFrameDirectCountTest ( table , column , test , value ) {
138
+ let sum , colidx = table . schema . fields . findIndex ( ( c ) => c . name === column ) ;
139
+
140
+ if ( test == 'gteq' ) {
141
+ op = function ( ) {
142
+ sum = 0 ;
143
+ let batches = table . batches ;
144
+ let numBatches = batches . length ;
145
+ for ( let batchIndex = - 1 ; ++ batchIndex < numBatches ; ) {
146
+ // load batches
147
+ const { numRows, columns } = batches [ batchIndex ] ;
148
+ const vector = columns [ colidx ] ;
149
+ // yield all indices
150
+ for ( let index = - 1 ; ++ index < numRows ; ) {
151
+ sum += ( vector . get ( index ) >= value ) ;
152
+ }
153
+ }
154
+ }
155
+ } else if ( test == 'eq' ) {
156
+ op = function ( ) {
157
+ sum = 0 ;
158
+ let batches = table . batches ;
159
+ let numBatches = batches . length ;
160
+ for ( let batchIndex = - 1 ; ++ batchIndex < numBatches ; ) {
161
+ // load batches
162
+ const { numRows, columns } = batches [ batchIndex ] ;
163
+ const vector = columns [ colidx ] ;
164
+ // yield all indices
165
+ for ( let index = - 1 ; ++ index < numRows ; ) {
166
+ sum += ( vector . get ( index ) === value ) ;
167
+ }
168
+ }
169
+ }
170
+ } else {
171
+ throw new Error ( `Unrecognized test "${ test } "` ) ;
172
+ }
173
+
174
+ return {
175
+ async : true ,
176
+ name : `name: '${ column } ', length: ${ table . numRows } , type: ${ table . columns [ colidx ] . type } , test: ${ test } , value: ${ value } \n` ,
177
+ fn : op
178
+ } ;
179
+ }
180
+
181
+ function createDataFrameCountByTest ( table , column ) {
182
+ let colidx = table . schema . fields . findIndex ( ( c ) => c . name === column ) ;
183
+
184
+ return {
185
+ async : true ,
186
+ name : `name: '${ column } ', length: ${ table . numRows } , type: ${ table . columns [ colidx ] . type } \n` ,
187
+ fn ( ) {
188
+ table . countBy ( column ) ;
189
+ }
190
+ } ;
191
+ }
192
+
193
+ function createDataFrameFilterCountTest ( table , column , test , value ) {
194
+ let colidx = table . schema . fields . findIndex ( ( c ) => c . name === column ) ;
195
+ let df ;
196
+
197
+ if ( test == 'gteq' ) {
198
+ df = table . filter ( col ( column ) . gteq ( value ) ) ;
199
+ } else if ( test == 'eq' ) {
200
+ df = table . filter ( col ( column ) . eq ( value ) ) ;
201
+ } else {
202
+ throw new Error ( `Unrecognized test "${ test } "` ) ;
203
+ }
204
+
205
+ return {
206
+ async : true ,
207
+ name : `name: '${ column } ', length: ${ table . numRows } , type: ${ table . columns [ colidx ] . type } , test: ${ test } , value: ${ value } \n` ,
208
+ fn ( ) {
209
+ df . count ( ) ;
210
+ }
211
+ } ;
212
+ }
0 commit comments