dremio
diff --git a/‎js/gulp/uglify-task.js
+1 b/‎js/gulp/uglify-task.js
+1
diff --git a/‎js/gulp/util.js
+1-1 b/‎js/gulp/util.js
+1-1
diff --git a/‎js/package.json
+7-7 b/‎js/package.json
+7-7
diff --git a/‎js/perf/index.js
+131-30 b/‎js/perf/index.js
+131-30
diff --git a/‎js/perf/table_config.js
+48 b/‎js/perf/table_config.js
+48
@@ -91,6 +91,7 @@ const reservePublicNames = ((ESKeywords) => function reservePublicNames(target,
         `../${src}/table.js`,
         `../${src}/vector.js`,
         `../${src}/util/int.js`,
+        `../${src}/recordbatch.js`,
         `../${src}/${mainExport}.js`,
     ];
     return publicModulePaths.reduce((keywords, publicModulePath) => [
 
@@ -87,7 +87,7 @@ const ESKeywords = [
     // EventTarget
     `addListener`, `removeListener`, `addEventListener`, `removeEventListener`,
     // Arrow properties
-    `low`, `high`, `data`, `index`, `field`, `validity`, `columns`, `fieldNode`, `subarray`,
+    `low`, `high`, `data`, `index`, `field`, `columns`, 'numCols', 'numRows', `values`, `valueOffsets`, `nullBitmap`, `subarray`
 ];
 
 function taskName(target, format) {
 
@@ -12,6 +12,7 @@
     "clean": "gulp clean",
     "debug": "gulp debug",
     "perf": "node ./perf/index.js",
+    "create:perfdata": "python ./test/data/tables/generate.py ./test/data/tables/tracks.arrow",
     "release": "./npm-release.sh",
     "clean:all": "run-p clean clean:testdata",
     "clean:testdata": "gulp clean:testdata",
@@ -51,16 +52,15 @@
   ],
   "dependencies": {
     "@types/text-encoding-utf-8": "1.0.1",
-    "command-line-args": "5.0.0",
+    "command-line-args": "5.0.1",
     "command-line-usage": "4.1.0",
     "flatbuffers": "trxcllnt/flatbuffers-esm",
     "json-bignum": "0.0.3",
     "text-encoding-utf-8": "^1.0.2",
-    "ts-node": "4.1.0",
-    "tslib": "1.8.1"
+    "tslib": "1.9.0"
   },
   "devDependencies": {
-    "@std/esm": "0.19.6",
+    "@std/esm": "0.19.7",
     "@types/flatbuffers": "1.6.5",
     "@types/glob": "5.0.34",
     "@types/jest": "22.0.1",
@@ -80,11 +80,11 @@
     "gulp-transform-js-ast": "1.0.2",
     "gulp-typescript": "3.2.3",
     "ix": "2.3.4",
-    "jest": "22.1.2",
+    "jest": "22.1.3",
     "jest-environment-node-debug": "2.0.0",
     "json": "9.0.6",
     "lerna": "2.7.1",
-    "lint-staged": "6.0.0",
+    "lint-staged": "6.0.1",
     "merge2": "1.2.1",
     "mkdirp": "0.5.1",
     "npm-run-all": "4.1.2",
@@ -130,7 +130,7 @@
       "lcov"
     ],
     "coveragePathIgnorePatterns": [
-      "format\\/(File|Message|Schema|Tensor)_generated\\.(js|ts)$",
+      "fb\\/(File|Message|Schema|Tensor)_generated\\.(js|ts)$",
       "test\\/.*\\.(ts|tsx|js)$",
       "/node_modules/"
     ],
 
@@ -16,29 +16,40 @@
 // under the License.
 
 // Use the ES5 UMD target as perf baseline
-// const { Table, readVectors } = require('../targets/es5/umd');
-// const { Table, readVectors } = require('../targets/es5/cjs');
-const { Table, readVectors } = require('../targets/es2015/umd');
-// const { Table, readVectors } = require('../targets/es2015/cjs');
+// const { col, Table, read: readBatches } = require('../targets/es5/umd');
+// const { col, Table, read: readBatches } = require('../targets/es5/cjs');
+// const { col, Table, read: readBatches } = require('../targets/es2015/umd');
+const { col, Table, read: readBatches } = require('../targets/es2015/cjs');
 
-const config = require('./config');
 const Benchmark = require('benchmark');
 
 const suites = [];
 
-for (let { name, buffers} of config) {
-    const parseSuite = new Benchmark.Suite(`Parse ${name}`, { async: true });
-    const sliceSuite = new Benchmark.Suite(`Slice ${name} vectors`, { async: true });
-    const iterateSuite = new Benchmark.Suite(`Iterate ${name} vectors`, { async: true });
-    const getByIndexSuite = new Benchmark.Suite(`Get ${name} values by index`, { async: true });
-    parseSuite.add(createFromTableTest(name, buffers));
-    parseSuite.add(createReadVectorsTest(name, buffers));
-    for (const vector of Table.from(buffers).columns) {
-        sliceSuite.add(createSliceTest(vector));
-        iterateSuite.add(createIterateTest(vector));
-        getByIndexSuite.add(createGetByIndexTest(vector));
-    }
-    suites.push(getByIndexSuite, iterateSuite, sliceSuite, parseSuite);
+for (let { name, buffers } of require('./table_config')) {
+    const parseSuiteName = `Parse "${name}"`;
+    const sliceSuiteName = `Slice "${name}" vectors`;
+    const iterateSuiteName = `Iterate "${name}" vectors`;
+    const getByIndexSuiteName = `Get "${name}" values by index`;
+    const sliceToArraySuiteName = `Slice toArray "${name}" vectors`;
+    suites.push(createTestSuite(parseSuiteName, createFromTableTest(name, buffers)));
+    suites.push(createTestSuite(parseSuiteName, createReadBatchesTest(name, buffers)));
+    const table = Table.from(buffers);
+    suites.push(...table.columns.map((vector, i) => createTestSuite(getByIndexSuiteName, createGetByIndexTest(vector, table.schema.fields[i].name))));
+    suites.push(...table.columns.map((vector, i) => createTestSuite(iterateSuiteName, createIterateTest(vector, table.schema.fields[i].name))));
+    suites.push(...table.columns.map((vector, i) => createTestSuite(sliceToArraySuiteName, createSliceToArrayTest(vector, table.schema.fields[i].name))));
+    suites.push(...table.columns.map((vector, i) => createTestSuite(sliceSuiteName, createSliceTest(vector, table.schema.fields[i].name))));
+}
+
+for (let {name, buffers, countBys, counts} of require('./table_config')) {
+    const table = Table.from(buffers);
+
+    const dfCountBySuiteName = `DataFrame Count By "${name}"`;
+    const dfFilterCountSuiteName = `DataFrame Filter-Scan Count "${name}"`;
+    const dfDirectCountSuiteName = `DataFrame Direct Count "${name}"`;
+
+    suites.push(...countBys.map((countBy) => createTestSuite(dfCountBySuiteName, createDataFrameCountByTest(table, countBy))));
+    suites.push(...counts.map(({ col, test, value }) => createTestSuite(dfFilterCountSuiteName, createDataFrameFilterCountTest(table, col, test, value))));
+    suites.push(...counts.map(({ col, test, value }) => createTestSuite(dfDirectCountSuiteName, createDataFrameDirectCountTest(table, col, test, value))));
 }
 
 console.log('Running apache-arrow performance tests...\n');
@@ -52,7 +63,7 @@ function run() {
             var str = x.toString();
             var meanMsPerOp = Math.round(x.stats.mean * 100000)/100;
             var sliceOf60FPS = Math.round((meanMsPerOp / (1000/60)) * 100000)/1000;
-            return `${str} (avg: ${meanMsPerOp}ms, or ${sliceOf60FPS}% of a frame @ 60FPS) ${x.suffix || ''}`;
+            return `${str}\n   avg: ${meanMsPerOp}ms\n   ${sliceOf60FPS}% of a frame @ 60FPS ${x.suffix || ''}`;
         }).join('\n') + '\n');
         if (suites.length > 0) {
             setTimeout(run, 1000);
@@ -61,51 +72,141 @@ function run() {
     .run({ async: true });
 }
 
+function createTestSuite(name, test) {
+    return new Benchmark.Suite(name, { async: true }).add(test);
+}
+
 function createFromTableTest(name, buffers) {
     let table;
     return {
         async: true,
-        name: `Table.from`,
+        name: `Table.from\n`,
         fn() { table = Table.from(buffers); }
     };
 }
 
-function createReadVectorsTest(name, buffers) {
-    let vectors;
+function createReadBatchesTest(name, buffers) {
+    let recordBatch;
     return {
         async: true,
-        name: `readVectors`,
-        fn() { for (vectors of readVectors(buffers)) {} }
+        name: `readBatches\n`,
+        fn() { for (recordBatch of readBatches(buffers)) {} }
     };
 }
 
-function createSliceTest(vector) {
+function createSliceTest(vector, name) {
     let xs;
     return {
         async: true,
-        name: `name: '${vector.name}', length: ${vector.length}, type: ${vector.type}`,
+        name: `name: '${name}', length: ${vector.length}, type: ${vector.type}\n`,
         fn() { xs = vector.slice(); }
     };
 }
 
-function createIterateTest(vector) {
+function createSliceToArrayTest(vector, name) {
+    let xs;
+    return {
+        async: true,
+        name: `name: '${name}', length: ${vector.length}, type: ${vector.type}\n`,
+        fn() { xs = vector.slice().toArray(); }
+    };
+}
+
+function createIterateTest(vector, name) {
     let value;
     return {
         async: true,
-        name: `name: '${vector.name}', length: ${vector.length}, type: ${vector.type}`,
+        name: `name: '${name}', length: ${vector.length}, type: ${vector.type}\n`,
         fn() { for (value of vector) {} }
     };
 }
 
-function createGetByIndexTest(vector) {
+function createGetByIndexTest(vector, name) {
     let value;
     return {
         async: true,
-        name: `name: '${vector.name}', length: ${vector.length}, type: ${vector.type}`,
+        name: `name: '${name}', length: ${vector.length}, type: ${vector.type}\n`,
         fn() {
             for (let i = -1, n = vector.length; ++i < n;) {
                 value = vector.get(i);
             }
         }
     };
 }
+
+function createDataFrameDirectCountTest(table, column, test, value) {
+    let sum, colidx = table.schema.fields.findIndex((c)=>c.name === column);
+
+    if (test == 'gteq') {
+        op = function () {
+            sum = 0;
+            let batches = table.batches;
+            let numBatches = batches.length;
+            for (let batchIndex = -1; ++batchIndex < numBatches;) {
+                // load batches
+                const { numRows, columns } = batches[batchIndex];
+                const vector = columns[colidx];
+                // yield all indices
+                for (let index = -1; ++index < numRows;) {
+                    sum += (vector.get(index) >= value);
+                }
+            }
+        }
+    } else if (test == 'eq') {
+        op = function() {
+            sum = 0;
+            let batches = table.batches;
+            let numBatches = batches.length;
+            for (let batchIndex = -1; ++batchIndex < numBatches;) {
+                // load batches
+                const { numRows, columns } = batches[batchIndex];
+                const vector = columns[colidx];
+                // yield all indices
+                for (let index = -1; ++index < numRows;) {
+                    sum += (vector.get(index) === value);
+                }
+            }
+        }
+    } else {
+        throw new Error(`Unrecognized test "${test}"`);
+    }
+
+    return {
+        async: true,
+        name: `name: '${column}', length: ${table.numRows}, type: ${table.columns[colidx].type}, test: ${test}, value: ${value}\n`,
+        fn: op
+    };
+}
+
+function createDataFrameCountByTest(table, column) {
+    let colidx = table.schema.fields.findIndex((c)=> c.name === column);
+
+    return {
+        async: true,
+        name: `name: '${column}', length: ${table.numRows}, type: ${table.columns[colidx].type}\n`,
+        fn() {
+            table.countBy(column);
+        }
+    };
+}
+
+function createDataFrameFilterCountTest(table, column, test, value) {
+    let colidx = table.schema.fields.findIndex((c)=> c.name === column);
+    let df;
+
+    if (test == 'gteq') {
+        df = table.filter(col(column).gteq(value));
+    } else if (test == 'eq') {
+        df = table.filter(col(column).eq(value));
+    } else {
+        throw new Error(`Unrecognized test "${test}"`);
+    }
+
+    return {
+        async: true,
+        name: `name: '${column}', length: ${table.numRows}, type: ${table.columns[colidx].type}, test: ${test}, value: ${value}\n`,
+        fn() {
+            df.count();
+        }
+    };
+}
@@ -0,0 +1,48 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+const fs = require('fs');
+const path = require('path');
+const glob = require('glob');
+
+const config = [];
+const filenames = glob.sync(path.resolve(__dirname, `../test/data/tables/`, `*.arrow`));
+
+countBys = {
+    "tracks": ['origin', 'destination']
+}
+counts = {
+    "tracks": [
+        {col: 'lat',    test: 'gteq', value: 0        },
+        {col: 'lng',    test: 'gteq', value: 0        },
+        {col: 'origin', test:   'eq', value: 'Seattle'},
+    ]
+}
+
+for (const filename of filenames) {
+    const { name } = path.parse(filename);
+    if (name in counts) {
+        config.push({
+            name,
+            buffers: [fs.readFileSync(filename)],
+            countBys: countBys[name],
+            counts: counts[name],
+        });
+    }
+}
+
+module.exports = config;