ARROW-89: [Python] Add benchmarks for Arrow<->Pandas conversion

apache · xhochy · Mar 29, 2016 · Mar 30, 2016 · Mar 29, 2016 · 8f7452877a84236be8dcf9274064e14ae09a483b
commit 8f7452877a84236be8dcf9274064e14ae09a483b
diff --git a/python/benchmarks/array.py b/python/benchmarks/array.py
@@ -15,22 +15,52 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import pyarrow
+import numpy as np
+import pandas as pd
+import pyarrow as A
 
-class Conversions(object):
+
+class PyListConversions(object):
+    param_names = ('size',)
     params = (1, 10 ** 5, 10 ** 6, 10 ** 7)
 
+    def setup(self, n):
+        self.data = list(range(n))
+
     def time_from_pylist(self, n):
-        pyarrow.from_pylist(list(range(n)))
+        A.from_pylist(self.data)
 
     def peakmem_from_pylist(self, n):
-        pyarrow.from_pylist(list(range(n)))
+        A.from_pylist(self.data)
+
+
+class PandasConversions(object):
+    param_names = ('size', 'dtype')
+    params = ((1, 10 ** 5, 10 ** 6, 10 ** 7), ('int64', 'float64', 'str'))
+
+    def setup(self, n, dtype):
+        self.data = pd.DataFrame({'column': pd.Series(np.arange(n).astype(dtype))})
+        self.arrow_data = A.from_pandas_dataframe(self.data)
+
+    def time_from_series(self, n, dtype):
+        A.from_pandas_dataframe(self.data)
+
+    def peakmem_from_series(self, n, dtype):
+        A.from_pandas_dataframe(self.data)
+
+    def time_to_series(self, n, dtype):
+        self.arrow_data.to_pandas()
+
+    def peakmem_to_series(self, n, dtype):
+        self.arrow_data.to_pandas()
+
 
 class ScalarAccess(object):
+    param_names = ('size',)
     params = (1, 10 ** 5, 10 ** 6, 10 ** 7)
 
     def setUp(self, n):
-        self._array = pyarrow.from_pylist(list(range(n)))
+        self._array = A.from_pylist(list(range(n)))
 
     def time_as_py(self, n):
         for i in range(n):