diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 47876e6..3afd8dc 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -41,6 +41,17 @@ add_library(spqlios-testlib SHARED
 		testlib/negacyclic_polynomial.cpp
 		testlib/negacyclic_polynomial.h
 		testlib/negacyclic_polynomial_impl.h
+		testlib/reim4_elem.cpp
+		testlib/reim4_elem.h
+		testlib/fft64_dft.cpp
+		testlib/fft64_dft.h
+		testlib/fft64_layouts.h
+		testlib/fft64_layouts.cpp
+		testlib/test_hash.cpp
+		testlib/sha3.h
+		testlib/sha3.c
+		testlib/polynomial_vector.h
+		testlib/polynomial_vector.cpp
 )
 if (VALGRIND_DIR)
 	target_include_directories(spqlios-testlib PRIVATE ${VALGRIND_DIR})
@@ -60,6 +71,10 @@ add_executable(spqlios-test
 		spqlios_q120_ntt_test.cpp
 		spqlios_q120_arithmetic_test.cpp
 		spqlios_znx_small_test.cpp
+		spqlios_coeffs_arithmetic_test.cpp
+		spqlios_reim4_arithmetic_test.cpp
+		spqlios_vec_znx_big_test.cpp
+		spqlios_vmp_product_test.cpp
 )
 target_link_libraries(spqlios-test spqlios-testlib libspqlios ${gtest_libs})
 target_include_directories(spqlios-test PRIVATE ${test_incs})
@@ -80,3 +95,10 @@ if (${X86})
     target_link_libraries(spqlios-q120-arithmetic-bench libspqlios benchmark pthread)
     target_include_directories(spqlios-q120-arithmetic-bench PRIVATE ${test_incs})
 endif ()
+
+if (${X86})
+	add_executable(spqlios_reim4_arithmetic_bench spqlios_reim4_arithmetic_bench.cpp)
+	target_compile_options(spqlios_reim4_arithmetic_bench PRIVATE "-march=native")
+	target_link_libraries(spqlios_reim4_arithmetic_bench benchmark libspqlios pthread)
+	target_include_directories(spqlios_reim4_arithmetic_bench PRIVATE ../)
+endif ()
\ No newline at end of file
diff --git a/test/spqlios_coeffs_arithmetic_test.cpp b/test/spqlios_coeffs_arithmetic_test.cpp
new file mode 100644
index 0000000..ba1843f
--- /dev/null
+++ b/test/spqlios_coeffs_arithmetic_test.cpp
@@ -0,0 +1,439 @@
+#include <gtest/gtest.h>
+#include <sys/types.h>
+
+#include <cstdint>
+#include <limits>
+#include <random>
+#include <vector>
+
+#include "../spqlios/coeffs/coeffs_arithmetic.h"
+#include "test/testlib/mod_q120.h"
+#include "testlib/negacyclic_polynomial.h"
+#include "testlib/test_commons.h"
+
+/// tests of element-wise operations
+template <typename T, typename F, typename G>
+void test_elemw_op(F elemw_op, G poly_elemw_op) {
+  for (uint64_t n : {1, 2, 4, 8, 16, 64, 256, 4096}) {
+    polynomial<T> a = polynomial<T>::random(n);
+    polynomial<T> b = polynomial<T>::random(n);
+    polynomial<T> expect(n);
+    polynomial<T> actual(n);
+    // out of place
+    expect = poly_elemw_op(a, b);
+    elemw_op(n, actual.data(), a.data(), b.data());
+    ASSERT_EQ(actual, expect);
+    // in place 1
+    actual = polynomial<T>::random(n);
+    expect = poly_elemw_op(actual, b);
+    elemw_op(n, actual.data(), actual.data(), b.data());
+    ASSERT_EQ(actual, expect);
+    // in place 2
+    actual = polynomial<T>::random(n);
+    expect = poly_elemw_op(a, actual);
+    elemw_op(n, actual.data(), a.data(), actual.data());
+    ASSERT_EQ(actual, expect);
+    // in place 3
+    actual = polynomial<T>::random(n);
+    expect = poly_elemw_op(actual, actual);
+    elemw_op(n, actual.data(), actual.data(), actual.data());
+    ASSERT_EQ(actual, expect);
+  }
+}
+
+static polynomial<int64_t> poly_i64_add(const polynomial<int64_t>& u, polynomial<int64_t>& v) { return u + v; }
+static polynomial<int64_t> poly_i64_sub(const polynomial<int64_t>& u, polynomial<int64_t>& v) { return u - v; }
+TEST(coeffs_arithmetic, znx_add_i64_ref) { test_elemw_op<int64_t>(znx_add_i64_ref, poly_i64_add); }
+TEST(coeffs_arithmetic, znx_sub_i64_ref) { test_elemw_op<int64_t>(znx_sub_i64_ref, poly_i64_sub); }
+#ifdef __x86_64__
+TEST(coeffs_arithmetic, znx_add_i64_avx) { test_elemw_op<int64_t>(znx_add_i64_avx, poly_i64_add); }
+TEST(coeffs_arithmetic, znx_sub_i64_avx) { test_elemw_op<int64_t>(znx_sub_i64_avx, poly_i64_sub); }
+#endif
+
+/// tests of element-wise operations
+template <typename T, typename F, typename G>
+void test_elemw_unary_op(F elemw_op, G poly_elemw_op) {
+  for (uint64_t n : {1, 2, 4, 8, 16, 64, 256, 4096}) {
+    polynomial<T> a = polynomial<T>::random(n);
+    polynomial<T> expect(n);
+    polynomial<T> actual(n);
+    // out of place
+    expect = poly_elemw_op(a);
+    elemw_op(n, actual.data(), a.data());
+    ASSERT_EQ(actual, expect);
+    // in place
+    actual = polynomial<T>::random(n);
+    expect = poly_elemw_op(actual);
+    elemw_op(n, actual.data(), actual.data());
+    ASSERT_EQ(actual, expect);
+  }
+}
+
+static polynomial<int64_t> poly_i64_neg(const polynomial<int64_t>& u) { return -u; }
+static polynomial<int64_t> poly_i64_copy(const polynomial<int64_t>& u) { return u; }
+TEST(coeffs_arithmetic, znx_neg_i64_ref) { test_elemw_unary_op<int64_t>(znx_negate_i64_ref, poly_i64_neg); }
+TEST(coeffs_arithmetic, znx_copy_i64_ref) { test_elemw_unary_op<int64_t>(znx_copy_i64_ref, poly_i64_copy); }
+#ifdef __x86_64__
+TEST(coeffs_arithmetic, znx_neg_i64_avx) { test_elemw_unary_op<int64_t>(znx_negate_i64_avx, poly_i64_neg); }
+#endif
+
+/// tests of the rotations out of place
+template <typename T, typename F>
+void test_rotation_outplace(F rotate) {
+  for (uint64_t n : {1, 2, 4, 8, 16, 64, 256, 4096}) {
+    polynomial<T> poly = polynomial<T>::random(n);
+    polynomial<T> expect(n);
+    polynomial<T> actual(n);
+    for (uint64_t trial = 0; trial < 10; ++trial) {
+      int64_t p = uniform_i64_bits(32);
+      // rotate by p
+      for (uint64_t i = 0; i < n; ++i) {
+        expect.set_coeff(i, poly.get_coeff(i - p));
+      }
+      // rotate using the function
+      rotate(n, p, actual.data(), poly.data());
+      ASSERT_EQ(actual, expect);
+    }
+  }
+}
+
+TEST(coeffs_arithmetic, rnx_rotate_f64) { test_rotation_outplace<double>(rnx_rotate_f64); }
+TEST(coeffs_arithmetic, znx_rotate_i64) { test_rotation_outplace<int64_t>(znx_rotate_i64); }
+
+/// tests of the rotations out of place
+template <typename T, typename F>
+void test_rotation_inplace(F rotate) {
+  for (uint64_t n : {1, 2, 4, 8, 16, 64, 256, 4096}) {
+    polynomial<T> poly = polynomial<T>::random(n);
+    polynomial<T> expect(n);
+    for (uint64_t trial = 0; trial < 10; ++trial) {
+      polynomial<T> actual = poly;
+      int64_t p = uniform_i64_bits(32);
+      // rotate by p
+      for (uint64_t i = 0; i < n; ++i) {
+        expect.set_coeff(i, poly.get_coeff(i - p));
+      }
+      // rotate using the function
+      rotate(n, p, actual.data());
+      ASSERT_EQ(actual, expect);
+    }
+  }
+}
+
+TEST(coeffs_arithmetic, rnx_rotate_inplace_f64) { test_rotation_inplace<double>(rnx_rotate_inplace_f64); }
+
+TEST(coeffs_arithmetic, znx_rotate_inplace_i64) { test_rotation_inplace<int64_t>(znx_rotate_inplace_i64); }
+
+/// tests of the automorphisms out of place
+template <typename T, typename F>
+void test_automorphism_outplace(F automorphism) {
+  for (uint64_t n : {1, 2, 4, 8, 16, 64, 256, 4096}) {
+    polynomial<T> poly = polynomial<T>::random(n);
+    polynomial<T> expect(n);
+    polynomial<T> actual(n);
+    for (uint64_t trial = 0; trial < 10; ++trial) {
+      int64_t p = uniform_i64_bits(32) | int64_t(1);  // make it odd
+      // automorphism p
+      for (uint64_t i = 0; i < n; ++i) {
+        expect.set_coeff(i * p, poly.get_coeff(i));
+      }
+      // rotate using the function
+      automorphism(n, p, actual.data(), poly.data());
+      ASSERT_EQ(actual, expect);
+    }
+  }
+}
+
+TEST(coeffs_arithmetic, rnx_automorphism_f64) { test_automorphism_outplace<double>(rnx_automorphism_f64); }
+TEST(coeffs_arithmetic, znx_automorphism_i64) { test_automorphism_outplace<int64_t>(znx_automorphism_i64); }
+
+/// tests of the automorphisms out of place
+template <typename T, typename F>
+void test_automorphism_inplace(F automorphism) {
+  for (uint64_t n : {1, 2, 4, 8, 16, 64, 256, 4096, 16384}) {
+    polynomial<T> poly = polynomial<T>::random(n);
+    polynomial<T> expect(n);
+    for (uint64_t trial = 0; trial < 20; ++trial) {
+      polynomial<T> actual = poly;
+      int64_t p = uniform_i64_bits(32) | int64_t(1);  // make it odd
+      // automorphism p
+      for (uint64_t i = 0; i < n; ++i) {
+        expect.set_coeff(i * p, poly.get_coeff(i));
+      }
+      automorphism(n, p, actual.data());
+      if (!(actual == expect)) {
+        std::cerr << "automorphism p: " << p << std::endl;
+        for (uint64_t i = 0; i < n; ++i) {
+          std::cerr << i << " " << actual.get_coeff(i) << " vs " << expect.get_coeff(i) << " "
+                    << (actual.get_coeff(i) == expect.get_coeff(i)) << std::endl;
+        }
+      }
+      ASSERT_EQ(actual, expect);
+    }
+  }
+}
+TEST(coeffs_arithmetic, rnx_automorphism_inplace_f64) {
+  test_automorphism_inplace<double>(rnx_automorphism_inplace_f64);
+}
+TEST(coeffs_arithmetic, znx_automorphism_inplace_i64) {
+  test_automorphism_inplace<int64_t>(znx_automorphism_inplace_i64);
+}
+
+// TODO: write a test later!
+
+/**
+ * @brief res = (X^p-1).in
+ * @param nn the ring dimension
+ * @param p must be between -2nn <= p <= 2nn
+ * @param in is a rnx/znx vector of dimension nn
+ */
+EXPORT void rnx_mul_xp_minus_one(uint64_t nn, int64_t p, double* res, const double* in);
+EXPORT void znx_mul_xp_minus_one(uint64_t nn, int64_t p, int64_t* res, const int64_t* in);
+
+// normalize with no carry in nor carry out
+template <uint8_t inplace_flag, typename F>
+void test_znx_normalize(F normalize) {
+  for (const uint64_t n : {1, 2, 4, 8, 16, 64, 256, 4096}) {
+    polynomial<int64_t> inp = znx_i64::random_log2bound(n, 62);
+    if (n >= 2) {
+      inp.set_coeff(0, -(1l << 62));
+      inp.set_coeff(1, (1l << 62));
+    }
+    for (const uint64_t base_k : {2, 3, 19, 35, 62}) {
+      polynomial<int64_t> out;
+      int64_t* inp_ptr;
+      if (inplace_flag == 1) {
+        out = polynomial<int64_t>(inp);
+        inp_ptr = out.data();
+      } else {
+        out = polynomial<int64_t>(n);
+        inp_ptr = inp.data();
+      }
+
+      znx_normalize(n, base_k, out.data(), nullptr, inp_ptr, nullptr);
+      for (uint64_t i = 0; i < n; ++i) {
+        const int64_t x = inp.get_coeff(i);
+        const int64_t y = out.get_coeff(i);
+        const int64_t y_exp = centermod(x, 1l << base_k);
+        ASSERT_EQ(y, y_exp) << n << " " << base_k << " " << i << " " << x << " " << y;
+      }
+    }
+  }
+}
+
+TEST(coeffs_arithmetic, znx_normalize_outplace) { test_znx_normalize<0>(znx_normalize); }
+TEST(coeffs_arithmetic, znx_normalize_inplace) { test_znx_normalize<1>(znx_normalize); }
+
+// normalize with no carry in nor carry out
+template <uint8_t inplace_flag, bool has_output, typename F>
+void test_znx_normalize_cout(F normalize) {
+  static_assert(inplace_flag < 3, "either out or cout can be inplace with inp");
+  for (const uint64_t n : {1, 2, 4, 8, 16, 64, 256, 4096}) {
+    polynomial<int64_t> inp = znx_i64::random_log2bound(n, 62);
+    if (n >= 2) {
+      inp.set_coeff(0, -(1l << 62));
+      inp.set_coeff(1, (1l << 62));
+    }
+    for (const uint64_t base_k : {2, 3, 19, 35, 62}) {
+      polynomial<int64_t> out, cout;
+      int64_t* inp_ptr;
+      if (inplace_flag == 1) {
+        // out and inp are the same
+        out = polynomial<int64_t>(inp);
+        inp_ptr = out.data();
+        cout = polynomial<int64_t>(n);
+      } else if (inplace_flag == 2) {
+        // carry out and inp are the same
+        cout = polynomial<int64_t>(inp);
+        inp_ptr = cout.data();
+        out = polynomial<int64_t>(n);
+      } else {
+        // inp, out and carry out are distinct
+        out = polynomial<int64_t>(n);
+        cout = polynomial<int64_t>(n);
+        inp_ptr = inp.data();
+      }
+
+      znx_normalize(n, base_k, has_output ? out.data() : nullptr, cout.data(), inp_ptr, nullptr);
+      for (uint64_t i = 0; i < n; ++i) {
+        const int64_t x = inp.get_coeff(i);
+        const int64_t co = cout.get_coeff(i);
+        const int64_t y_exp = centermod((int64_t)x, 1l << base_k);
+        const int64_t co_exp = (x - y_exp) >> base_k;
+        ASSERT_EQ(co, co_exp);
+
+        if (has_output) {
+          const int64_t y = out.get_coeff(i);
+          ASSERT_EQ(y, y_exp);
+        }
+      }
+    }
+  }
+}
+
+TEST(coeffs_arithmetic, znx_normalize_cout_outplace) { test_znx_normalize_cout<0, false>(znx_normalize); }
+TEST(coeffs_arithmetic, znx_normalize_out_cout_outplace) { test_znx_normalize_cout<0, true>(znx_normalize); }
+TEST(coeffs_arithmetic, znx_normalize_cout_inplace1) { test_znx_normalize_cout<1, false>(znx_normalize); }
+TEST(coeffs_arithmetic, znx_normalize_out_cout_inplace1) { test_znx_normalize_cout<1, true>(znx_normalize); }
+TEST(coeffs_arithmetic, znx_normalize_cout_inplace2) { test_znx_normalize_cout<2, false>(znx_normalize); }
+TEST(coeffs_arithmetic, znx_normalize_out_cout_inplace2) { test_znx_normalize_cout<2, true>(znx_normalize); }
+
+// normalize with no carry in nor carry out
+template <uint8_t inplace_flag, typename F>
+void test_znx_normalize_cin(F normalize) {
+  static_assert(inplace_flag < 3, "either inp or cin can be inplace with out");
+  for (const uint64_t n : {1, 2, 4, 8, 16, 64, 256, 4096}) {
+    polynomial<int64_t> inp = znx_i64::random_log2bound(n, 62);
+    if (n >= 4) {
+      inp.set_coeff(0, -(1l << 62));
+      inp.set_coeff(1, -(1l << 62));
+      inp.set_coeff(2, (1l << 62));
+      inp.set_coeff(3, (1l << 62));
+    }
+    for (const uint64_t base_k : {2, 3, 19, 35, 62}) {
+      polynomial<int64_t> cin = znx_i64::random_log2bound(n, 62);
+      if (n >= 4) {
+        inp.set_coeff(0, -(1l << 62));
+        inp.set_coeff(1, (1l << 62));
+        inp.set_coeff(0, -(1l << 62));
+        inp.set_coeff(1, (1l << 62));
+      }
+
+      polynomial<int64_t> out;
+      int64_t *inp_ptr, *cin_ptr;
+      if (inplace_flag == 1) {
+        // out and inp are the same
+        out = polynomial<int64_t>(inp);
+        inp_ptr = out.data();
+        cin_ptr = cin.data();
+      } else if (inplace_flag == 2) {
+        // out and carry in are the same
+        out = polynomial<int64_t>(cin);
+        inp_ptr = inp.data();
+        cin_ptr = out.data();
+      } else {
+        // inp, carry in and out are distinct
+        out = polynomial<int64_t>(n);
+        inp_ptr = inp.data();
+        cin_ptr = cin.data();
+      }
+
+      znx_normalize(n, base_k, out.data(), nullptr, inp_ptr, cin_ptr);
+      for (uint64_t i = 0; i < n; ++i) {
+        const int64_t x = inp.get_coeff(i);
+        const int64_t ci = cin.get_coeff(i);
+        const int64_t y = out.get_coeff(i);
+
+        const __int128_t xp = (__int128_t)x + ci;
+        const int64_t y_exp = centermod((int64_t)xp, 1l << base_k);
+
+        ASSERT_EQ(y, y_exp) << n << " " << base_k << " " << i << " " << x << " " << y << " " << ci;
+      }
+    }
+  }
+}
+
+TEST(coeffs_arithmetic, znx_normalize_cin_outplace) { test_znx_normalize_cin<0>(znx_normalize); }
+TEST(coeffs_arithmetic, znx_normalize_cin_inplace1) { test_znx_normalize_cin<1>(znx_normalize); }
+TEST(coeffs_arithmetic, znx_normalize_cin_inplace2) { test_znx_normalize_cin<2>(znx_normalize); }
+
+// normalize with no carry in nor carry out
+template <uint8_t inplace_flag, bool has_output, typename F>
+void test_znx_normalize_cin_cout(F normalize) {
+  static_assert(inplace_flag < 7, "either inp or cin can be inplace with out");
+  for (const uint64_t n : {1, 2, 4, 8, 16, 64, 256, 4096}) {
+    polynomial<int64_t> inp = znx_i64::random_log2bound(n, 62);
+    if (n >= 4) {
+      inp.set_coeff(0, -(1l << 62));
+      inp.set_coeff(1, -(1l << 62));
+      inp.set_coeff(2, (1l << 62));
+      inp.set_coeff(3, (1l << 62));
+    }
+    for (const uint64_t base_k : {2, 3, 19, 35, 62}) {
+      polynomial<int64_t> cin = znx_i64::random_log2bound(n, 62);
+      if (n >= 4) {
+        inp.set_coeff(0, -(1l << 62));
+        inp.set_coeff(1, (1l << 62));
+        inp.set_coeff(0, -(1l << 62));
+        inp.set_coeff(1, (1l << 62));
+      }
+
+      polynomial<int64_t> out, cout;
+      int64_t *inp_ptr, *cin_ptr;
+      if (inplace_flag == 1) {
+        // out == inp
+        out = polynomial<int64_t>(inp);
+        cout = polynomial<int64_t>(n);
+        inp_ptr = out.data();
+        cin_ptr = cin.data();
+      } else if (inplace_flag == 2) {
+        // cout == inp
+        out = polynomial<int64_t>(n);
+        cout = polynomial<int64_t>(inp);
+        inp_ptr = cout.data();
+        cin_ptr = cin.data();
+      } else if (inplace_flag == 3) {
+        // out == cin
+        out = polynomial<int64_t>(cin);
+        cout = polynomial<int64_t>(n);
+        inp_ptr = inp.data();
+        cin_ptr = out.data();
+      } else if (inplace_flag == 4) {
+        // cout == cin
+        out = polynomial<int64_t>(n);
+        cout = polynomial<int64_t>(cin);
+        inp_ptr = inp.data();
+        cin_ptr = cout.data();
+      } else if (inplace_flag == 5) {
+        // out == inp, cout == cin
+        out = polynomial<int64_t>(inp);
+        cout = polynomial<int64_t>(cin);
+        inp_ptr = out.data();
+        cin_ptr = cout.data();
+      } else if (inplace_flag == 6) {
+        // out == cin, cout == inp
+        out = polynomial<int64_t>(cin);
+        cout = polynomial<int64_t>(inp);
+        inp_ptr = cout.data();
+        cin_ptr = out.data();
+      } else {
+        out = polynomial<int64_t>(n);
+        cout = polynomial<int64_t>(n);
+        inp_ptr = inp.data();
+        cin_ptr = cin.data();
+      }
+
+      znx_normalize(n, base_k, has_output ? out.data() : nullptr, cout.data(), inp_ptr, cin_ptr);
+      for (uint64_t i = 0; i < n; ++i) {
+        const int64_t x = inp.get_coeff(i);
+        const int64_t ci = cin.get_coeff(i);
+        const int64_t co = cout.get_coeff(i);
+
+        const __int128_t xp = (__int128_t)x + ci;
+        const int64_t y_exp = centermod((int64_t)xp, 1l << base_k);
+        const int64_t co_exp = (xp - y_exp) >> base_k;
+        ASSERT_EQ(co, co_exp);
+
+        if (has_output) {
+          const int64_t y = out.get_coeff(i);
+          ASSERT_EQ(y, y_exp);
+        }
+      }
+    }
+  }
+}
+
+TEST(coeffs_arithmetic, znx_normalize_cin_cout_outplace) { test_znx_normalize_cin_cout<0, false>(znx_normalize); }
+TEST(coeffs_arithmetic, znx_normalize_out_cin_cout_outplace) { test_znx_normalize_cin_cout<0, true>(znx_normalize); }
+TEST(coeffs_arithmetic, znx_normalize_cin_cout_inplace1) { test_znx_normalize_cin_cout<1, false>(znx_normalize); }
+TEST(coeffs_arithmetic, znx_normalize_out_cin_cout_inplace1) { test_znx_normalize_cin_cout<1, true>(znx_normalize); }
+TEST(coeffs_arithmetic, znx_normalize_cin_cout_inplace2) { test_znx_normalize_cin_cout<2, false>(znx_normalize); }
+TEST(coeffs_arithmetic, znx_normalize_out_cin_cout_inplace2) { test_znx_normalize_cin_cout<2, true>(znx_normalize); }
+TEST(coeffs_arithmetic, znx_normalize_cin_cout_inplace3) { test_znx_normalize_cin_cout<3, false>(znx_normalize); }
+TEST(coeffs_arithmetic, znx_normalize_out_cin_cout_inplace3) { test_znx_normalize_cin_cout<3, true>(znx_normalize); }
+TEST(coeffs_arithmetic, znx_normalize_cin_cout_inplace4) { test_znx_normalize_cin_cout<4, false>(znx_normalize); }
+TEST(coeffs_arithmetic, znx_normalize_out_cin_cout_inplace4) { test_znx_normalize_cin_cout<4, true>(znx_normalize); }
+TEST(coeffs_arithmetic, znx_normalize_cin_cout_inplace5) { test_znx_normalize_cin_cout<5, false>(znx_normalize); }
+TEST(coeffs_arithmetic, znx_normalize_out_cin_cout_inplace5) { test_znx_normalize_cin_cout<5, true>(znx_normalize); }
+TEST(coeffs_arithmetic, znx_normalize_cin_cout_inplace6) { test_znx_normalize_cin_cout<6, false>(znx_normalize); }
+TEST(coeffs_arithmetic, znx_normalize_out_cin_cout_inplace6) { test_znx_normalize_cin_cout<6, true>(znx_normalize); }
diff --git a/test/spqlios_reim4_arithmetic_bench.cpp b/test/spqlios_reim4_arithmetic_bench.cpp
new file mode 100644
index 0000000..e4c365b
--- /dev/null
+++ b/test/spqlios_reim4_arithmetic_bench.cpp
@@ -0,0 +1,51 @@
+#include <benchmark/benchmark.h>
+
+#include "spqlios/reim4/reim4_arithmetic.h"
+
+void init_random_values(uint64_t n, double* v) {
+  for (uint64_t i = 0; i < n; ++i) v[i] = (double(rand() % (1ul << 14)) - (1ul << 13)) / (1ul << 12);
+}
+
+// Run the benchmark
+BENCHMARK_MAIN();
+
+#undef ARGS
+#define ARGS Args({47, 16384})->Args({93, 32768})
+
+/*
+ *  reim4_vec_mat1col_product
+ *  reim4_vec_mat2col_product
+ *  reim4_vec_mat3col_product
+ *  reim4_vec_mat4col_product
+ */
+
+template <uint64_t X,
+          void (*fnc)(const uint64_t nrows, double* const dst, const double* const u, const double* const v)>
+void benchmark_reim4_vec_matXcols_product(benchmark::State& state) {
+  const uint64_t nrows = state.range(0);
+
+  double* u = new double[nrows * 8];
+  init_random_values(8 * nrows, u);
+  double* v = new double[nrows * X * 8];
+  init_random_values(X * 8 * nrows, v);
+  double* dst = new double[X * 8];
+
+  for (auto _ : state) {
+    fnc(nrows, dst, u, v);
+  }
+
+  delete[] dst;
+  delete[] v;
+  delete[] u;
+}
+
+#undef ARGS
+#define ARGS Arg(128)->Arg(1024)->Arg(4096)
+
+#ifdef __x86_64__
+BENCHMARK(benchmark_reim4_vec_matXcols_product<1, reim4_vec_mat1col_product_avx2>)->ARGS;
+// TODO: please remove when fixed:
+BENCHMARK(benchmark_reim4_vec_matXcols_product<2, reim4_vec_mat2cols_product_avx2>)->ARGS;
+#endif
+BENCHMARK(benchmark_reim4_vec_matXcols_product<1, reim4_vec_mat1col_product_ref>)->ARGS;
+BENCHMARK(benchmark_reim4_vec_matXcols_product<2, reim4_vec_mat2cols_product_ref>)->ARGS;
diff --git a/test/spqlios_reim4_arithmetic_test.cpp b/test/spqlios_reim4_arithmetic_test.cpp
new file mode 100644
index 0000000..671bb7e
--- /dev/null
+++ b/test/spqlios_reim4_arithmetic_test.cpp
@@ -0,0 +1,253 @@
+#include <gtest/gtest.h>
+
+#include <iostream>
+#include <random>
+
+#include "../spqlios/reim4/reim4_arithmetic.h"
+#include "test/testlib/reim4_elem.h"
+
+/// Actual tests
+
+typedef typeof(reim4_extract_1blk_from_reim_ref) reim4_extract_1blk_from_reim_f;
+void test_reim4_extract_1blk_from_reim(reim4_extract_1blk_from_reim_f reim4_extract_1blk_from_reim) {
+  static const uint64_t numtrials = 100;
+  for (uint64_t m : {4, 8, 16, 1024, 4096, 32768}) {
+    double* v = (double*)malloc(2 * m * sizeof(double));
+    double* w = (double*)malloc(8 * sizeof(double));
+    reim_view vv(m, v);
+    for (uint64_t i = 0; i < numtrials; ++i) {
+      reim4_elem el = gaussian_reim4();
+      uint64_t blk = rand() % (m / 4);
+      vv.set_blk(blk, el);
+      reim4_extract_1blk_from_reim(m, blk, w, v);
+      reim4_elem actual(w);
+      ASSERT_EQ(el, actual);
+    }
+    free(v);
+    free(w);
+  }
+}
+
+TEST(reim4_arithmetic, reim4_extract_1blk_from_reim_ref) {
+  test_reim4_extract_1blk_from_reim(reim4_extract_1blk_from_reim_ref);
+}
+#ifdef __x86_64__
+TEST(reim4_arithmetic, reim4_extract_1blk_from_reim_avx) {
+  test_reim4_extract_1blk_from_reim(reim4_extract_1blk_from_reim_avx);
+}
+#endif
+
+typedef typeof(reim4_save_1blk_to_reim_ref) reim4_save_1blk_to_reim_f;
+void test_reim4_save_1blk_to_reim(reim4_save_1blk_to_reim_f reim4_save_1blk_to_reim) {
+  static const uint64_t numtrials = 100;
+  for (uint64_t m : {4, 8, 16, 1024, 4096, 32768}) {
+    double* v = (double*)malloc(2 * m * sizeof(double));
+    double* w = (double*)malloc(8 * sizeof(double));
+    reim_view vv(m, v);
+    for (uint64_t i = 0; i < numtrials; ++i) {
+      reim4_elem el = gaussian_reim4();
+      el.save_as(w);
+      uint64_t blk = rand() % (m / 4);
+      reim4_save_1blk_to_reim_ref(m, blk, v, w);
+      reim4_elem actual = vv.get_blk(blk);
+      ASSERT_EQ(el, actual);
+    }
+    free(v);
+    free(w);
+  }
+}
+
+TEST(reim4_arithmetic, reim4_save_1blk_to_reim_ref) { test_reim4_save_1blk_to_reim(reim4_save_1blk_to_reim_ref); }
+#ifdef __x86_64__
+TEST(reim4_arithmetic, reim4_save_1blk_to_reim_avx) { test_reim4_save_1blk_to_reim(reim4_save_1blk_to_reim_avx); }
+#endif
+
+typedef typeof(reim4_extract_1blk_from_contiguous_reim_ref) reim4_extract_1blk_from_contiguous_reim_f;
+void test_reim4_extract_1blk_from_contiguous_reim(
+    reim4_extract_1blk_from_contiguous_reim_f reim4_extract_1blk_from_contiguous_reim) {
+  static const uint64_t numtrials = 20;
+  for (uint64_t m : {4, 8, 16, 1024, 4096, 32768}) {
+    for (uint64_t nrows : {1, 2, 5, 128}) {
+      double* v = (double*)malloc(2 * m * nrows * sizeof(double));
+      double* w = (double*)malloc(8 * nrows * sizeof(double));
+      reim_vector_view vv(m, nrows, v);
+      reim4_array_view ww(nrows, w);
+      for (uint64_t i = 0; i < numtrials; ++i) {
+        uint64_t blk = rand() % (m / 4);
+        for (uint64_t j = 0; j < nrows; ++j) {
+          reim4_elem el = gaussian_reim4();
+          vv.row(j).set_blk(blk, el);
+        }
+        reim4_extract_1blk_from_contiguous_reim_ref(m, nrows, blk, w, v);
+        for (uint64_t j = 0; j < nrows; ++j) {
+          reim4_elem el = vv.row(j).get_blk(blk);
+          reim4_elem actual = ww.get(j);
+          ASSERT_EQ(el, actual);
+        }
+      }
+      free(v);
+      free(w);
+    }
+  }
+}
+
+TEST(reim4_arithmetic, reim4_extract_1blk_from_contiguous_reim_ref) {
+  test_reim4_extract_1blk_from_contiguous_reim(reim4_extract_1blk_from_contiguous_reim_ref);
+}
+#ifdef __x86_64__
+TEST(reim4_arithmetic, reim4_extract_1blk_from_contiguous_reim_avx) {
+  test_reim4_extract_1blk_from_contiguous_reim(reim4_extract_1blk_from_contiguous_reim_avx);
+}
+#endif
+
+// test of basic arithmetic functions
+
+TEST(reim4_arithmetic, add) {
+  reim4_elem x = gaussian_reim4();
+  reim4_elem y = gaussian_reim4();
+  reim4_elem expect = x + y;
+  reim4_elem actual;
+  reim4_add(actual.value, x.value, y.value);
+  ASSERT_EQ(actual, expect);
+}
+
+TEST(reim4_arithmetic, mul) {
+  reim4_elem x = gaussian_reim4();
+  reim4_elem y = gaussian_reim4();
+  reim4_elem expect = x * y;
+  reim4_elem actual;
+  reim4_mul(actual.value, x.value, y.value);
+  ASSERT_EQ(actual, expect);
+}
+
+TEST(reim4_arithmetic, add_mul) {
+  reim4_elem x = gaussian_reim4();
+  reim4_elem y = gaussian_reim4();
+  reim4_elem z = gaussian_reim4();
+  reim4_elem expect = z;
+  reim4_elem actual = z;
+  expect += x * y;
+  reim4_add_mul(actual.value, x.value, y.value);
+  ASSERT_EQ(actual, expect) << infty_dist(expect, actual);
+}
+
+// test of dot products
+
+typedef typeof(reim4_vec_mat1col_product_ref) reim4_vec_mat1col_product_f;
+void test_reim4_vec_mat1col_product(reim4_vec_mat1col_product_f product) {
+  for (uint64_t ell : {1, 2, 5, 13, 69, 129}) {
+    std::vector<double> actual(8);
+    std::vector<double> a(ell * 8);
+    std::vector<double> b(ell * 8);
+    reim4_array_view va(ell, a.data());
+    reim4_array_view vb(ell, b.data());
+    reim4_array_view vactual(1, actual.data());
+    // initialize random values
+    for (uint64_t i = 0; i < ell; ++i) {
+      va.set(i, gaussian_reim4());
+      vb.set(i, gaussian_reim4());
+    }
+    // compute the mat1col product
+    reim4_elem expect;
+    for (uint64_t i = 0; i < ell; ++i) {
+      expect += va.get(i) * vb.get(i);
+    }
+    // compute the actual product
+    product(ell, actual.data(), a.data(), b.data());
+    // compare
+    ASSERT_LE(infty_dist(vactual.get(0), expect), 1e-10);
+  }
+}
+
+TEST(reim4_arithmetic, reim4_vec_mat1col_product_ref) { test_reim4_vec_mat1col_product(reim4_vec_mat1col_product_ref); }
+#ifdef __x86_64__
+TEST(reim4_arena, reim4_vec_mat1col_product_avx2) { test_reim4_vec_mat1col_product(reim4_vec_mat1col_product_avx2); }
+#endif
+
+typedef typeof(reim4_vec_mat2cols_product_ref) reim4_vec_mat2col_product_f;
+void test_reim4_vec_mat2cols_product(reim4_vec_mat2col_product_f product) {
+  for (uint64_t ell : {1, 2, 5, 13, 69, 129}) {
+    std::vector<double> actual(16);
+    std::vector<double> a(ell * 8);
+    std::vector<double> b(ell * 16);
+    reim4_array_view va(ell, a.data());
+    reim4_matrix_view vb(ell, 2, b.data());
+    reim4_array_view vactual(2, actual.data());
+    // initialize random values
+    for (uint64_t i = 0; i < ell; ++i) {
+      va.set(i, gaussian_reim4());
+      vb.set(i, 0, gaussian_reim4());
+      vb.set(i, 1, gaussian_reim4());
+    }
+    // compute the mat1col product
+    reim4_elem expect[2];
+    for (uint64_t i = 0; i < ell; ++i) {
+      expect[0] += va.get(i) * vb.get(i, 0);
+      expect[1] += va.get(i) * vb.get(i, 1);
+    }
+    // compute the actual product
+    product(ell, actual.data(), a.data(), b.data());
+    // compare
+    ASSERT_LE(infty_dist(vactual.get(0), expect[0]), 1e-10);
+    ASSERT_LE(infty_dist(vactual.get(1), expect[1]), 1e-10);
+  }
+}
+
+TEST(reim4_arithmetic, reim4_vec_mat2cols_product_ref) {
+  test_reim4_vec_mat2cols_product(reim4_vec_mat2cols_product_ref);
+}
+#ifdef __x86_64__
+TEST(reim4_arithmetic, reim4_vec_mat2cols_product_avx2) {
+  test_reim4_vec_mat2cols_product(reim4_vec_mat2cols_product_avx2);
+}
+#endif
+
+// for now, we do not need avx implementations,
+// so we will keep a single test function
+TEST(reim4_arithmetic, reim4_vec_convolution_ref) {
+  for (uint64_t sizea : {1, 2, 3, 5, 8}) {
+    for (uint64_t sizeb : {1, 3, 6, 9, 13}) {
+      std::vector<double> a(8 * sizea);
+      std::vector<double> b(8 * sizeb);
+      std::vector<double> expect(8 * (sizea + sizeb - 1));
+      std::vector<double> actual(8 * (sizea + sizeb - 1));
+      reim4_array_view va(sizea, a.data());
+      reim4_array_view vb(sizeb, b.data());
+      std::vector<reim4_elem> vexpect(sizea + sizeb + 3);
+      reim4_array_view vactual(sizea + sizeb - 1, actual.data());
+      for (uint64_t i = 0; i < sizea; ++i) {
+        va.set(i, gaussian_reim4());
+      }
+      for (uint64_t j = 0; j < sizeb; ++j) {
+        vb.set(j, gaussian_reim4());
+      }
+      // manual convolution
+      for (uint64_t i = 0; i < sizea; ++i) {
+        for (uint64_t j = 0; j < sizeb; ++j) {
+          vexpect[i + j] += va.get(i) * vb.get(j);
+        }
+      }
+      // partial convolution single coeff
+      for (uint64_t k = 0; k < sizea + sizeb + 3; ++k) {
+        double dest[8] = {0};
+        reim4_convolution_1coeff_ref(k, dest, a.data(), sizea, b.data(), sizeb);
+        ASSERT_LE(infty_dist(reim4_elem(dest), vexpect[k]), 1e-10);
+      }
+      // partial convolution dual coeff
+      for (uint64_t k = 0; k < sizea + sizeb + 2; ++k) {
+        double dest[16] = {0};
+        reim4_convolution_2coeff_ref(k, dest, a.data(), sizea, b.data(), sizeb);
+        ASSERT_LE(infty_dist(reim4_elem(dest), vexpect[k]), 1e-10);
+        ASSERT_LE(infty_dist(reim4_elem(dest + 8), vexpect[k + 1]), 1e-10);
+      }
+      // actual convolution
+      reim4_convolution_ref(actual.data(), sizea + sizeb - 1, 0, a.data(), sizea, b.data(), sizeb);
+      for (uint64_t k = 0; k < sizea + sizeb - 1; ++k) {
+        ASSERT_LE(infty_dist(vactual.get(k), vexpect[k]), 1e-10) << k;
+      }
+    }
+  }
+}
+
+EXPORT void reim4_convolution_ref(double* dest, uint64_t dest_size, uint64_t dest_offset, const double* a,
+                                  uint64_t sizea, const double* b, uint64_t sizeb);
diff --git a/test/spqlios_vec_znx_big_test.cpp b/test/spqlios_vec_znx_big_test.cpp
new file mode 100644
index 0000000..a2c6981
--- /dev/null
+++ b/test/spqlios_vec_znx_big_test.cpp
@@ -0,0 +1,265 @@
+#include <gtest/gtest.h>
+
+#include "spqlios/arithmetic/vec_znx_arithmetic_private.h"
+#include "test/testlib/polynomial_vector.h"
+#include "testlib/fft64_layouts.h"
+#include "testlib/test_commons.h"
+
+#define def_rand_big(varname, ringdim, varsize)       \
+  fft64_vec_znx_big_layout varname(ringdim, varsize); \
+  varname.fill_random()
+
+#define def_rand_small(varname, ringdim, varsize)            \
+  znx_vec_i64_layout varname(ringdim, varsize, 2 * ringdim); \
+  varname.fill_random()
+
+#define test_prelude(ringdim, moduletype, dim1, dim2, dim3) \
+  uint64_t n = ringdim;                                     \
+  MODULE* module = new_module_info(ringdim, moduletype);    \
+  for (uint64_t sa : {dim1, dim2, dim3}) {                  \
+    for (uint64_t sb : {dim1, dim2, dim3}) {                \
+      for (uint64_t sr : {dim1, dim2, dim3})
+
+#define test_end() \
+  }                \
+  }                \
+  free(module)
+
+void test_fft64_vec_znx_big_add(VEC_ZNX_BIG_ADD_F vec_znx_big_add_fcn) {
+  test_prelude(8, FFT64, 3, 5, 7) {
+    fft64_vec_znx_big_layout r(n, sr);
+    def_rand_big(a, n, sa);
+    def_rand_big(b, n, sb);
+    vec_znx_big_add_fcn(module, r.data, sr, a.data, sa, b.data, sb);
+    for (uint64_t i = 0; i < sr; ++i) {
+      ASSERT_EQ(r.get_copy(i), a.get_copy_zext(i) + b.get_copy_zext(i));
+    }
+  }
+  test_end();
+}
+void test_fft64_vec_znx_big_add_small(VEC_ZNX_BIG_ADD_SMALL_F vec_znx_big_add_fcn) {
+  test_prelude(16, FFT64, 2, 4, 5) {
+    fft64_vec_znx_big_layout r(n, sr);
+    def_rand_big(a, n, sa);
+    def_rand_small(b, n, sb);
+    vec_znx_big_add_fcn(module, r.data, sr, a.data, sa, b.data(), sb, 2 * n);
+    for (uint64_t i = 0; i < sr; ++i) {
+      ASSERT_EQ(r.get_copy(i), a.get_copy_zext(i) + b.get_copy_zext(i));
+    }
+  }
+  test_end();
+}
+void test_fft64_vec_znx_big_add_small2(VEC_ZNX_BIG_ADD_SMALL2_F vec_znx_big_add_fcn) {
+  test_prelude(64, FFT64, 3, 6, 7) {
+    fft64_vec_znx_big_layout r(n, sr);
+    def_rand_small(a, n, sa);
+    def_rand_small(b, n, sb);
+    vec_znx_big_add_fcn(module, r.data, sr, a.data(), sa, 2 * n, b.data(), sb, 2 * n);
+    for (uint64_t i = 0; i < sr; ++i) {
+      ASSERT_EQ(r.get_copy(i), a.get_copy_zext(i) + b.get_copy_zext(i));
+    }
+  }
+  test_end();
+}
+
+TEST(fft64_vec_znx_big, fft64_vec_znx_big_add) { test_fft64_vec_znx_big_add(fft64_vec_znx_big_add); }
+TEST(vec_znx_big, vec_znx_big_add) { test_fft64_vec_znx_big_add(vec_znx_big_add); }
+
+TEST(fft64_vec_znx_big, fft64_vec_znx_big_add_small) { test_fft64_vec_znx_big_add_small(fft64_vec_znx_big_add_small); }
+TEST(vec_znx_big, vec_znx_big_add_small) { test_fft64_vec_znx_big_add_small(vec_znx_big_add_small); }
+
+TEST(fft64_vec_znx_big, fft64_vec_znx_big_add_small2) {
+  test_fft64_vec_znx_big_add_small2(fft64_vec_znx_big_add_small2);
+}
+TEST(vec_znx_big, vec_znx_big_add_small2) { test_fft64_vec_znx_big_add_small2(vec_znx_big_add_small2); }
+
+void test_fft64_vec_znx_big_sub(VEC_ZNX_BIG_SUB_F vec_znx_big_sub_fcn) {
+  test_prelude(16, FFT64, 3, 5, 7) {
+    fft64_vec_znx_big_layout r(n, sr);
+    def_rand_big(a, n, sa);
+    def_rand_big(b, n, sb);
+    vec_znx_big_sub_fcn(module, r.data, sr, a.data, sa, b.data, sb);
+    for (uint64_t i = 0; i < sr; ++i) {
+      ASSERT_EQ(r.get_copy(i), a.get_copy_zext(i) - b.get_copy_zext(i));
+    }
+  }
+  test_end();
+}
+void test_fft64_vec_znx_big_sub_small_a(VEC_ZNX_BIG_SUB_SMALL_A_F vec_znx_big_sub_fcn) {
+  test_prelude(32, FFT64, 2, 4, 5) {
+    fft64_vec_znx_big_layout r(n, sr);
+    def_rand_small(a, n, sa);
+    def_rand_big(b, n, sb);
+    vec_znx_big_sub_fcn(module, r.data, sr, a.data(), sa, 2 * n, b.data, sb);
+    for (uint64_t i = 0; i < sr; ++i) {
+      ASSERT_EQ(r.get_copy(i), a.get_copy_zext(i) - b.get_copy_zext(i));
+    }
+  }
+  test_end();
+}
+void test_fft64_vec_znx_big_sub_small_b(VEC_ZNX_BIG_SUB_SMALL_B_F vec_znx_big_sub_fcn) {
+  test_prelude(16, FFT64, 2, 4, 5) {
+    fft64_vec_znx_big_layout r(n, sr);
+    def_rand_big(a, n, sa);
+    def_rand_small(b, n, sb);
+    vec_znx_big_sub_fcn(module, r.data, sr, a.data, sa, b.data(), sb, 2 * n);
+    for (uint64_t i = 0; i < sr; ++i) {
+      ASSERT_EQ(r.get_copy(i), a.get_copy_zext(i) - b.get_copy_zext(i));
+    }
+  }
+  test_end();
+}
+void test_fft64_vec_znx_big_sub_small2(VEC_ZNX_BIG_SUB_SMALL2_F vec_znx_big_sub_fcn) {
+  test_prelude(8, FFT64, 3, 6, 7) {
+    fft64_vec_znx_big_layout r(n, sr);
+    def_rand_small(a, n, sa);
+    def_rand_small(b, n, sb);
+    vec_znx_big_sub_fcn(module, r.data, sr, a.data(), sa, 2 * n, b.data(), sb, 2 * n);
+    for (uint64_t i = 0; i < sr; ++i) {
+      ASSERT_EQ(r.get_copy(i), a.get_copy_zext(i) - b.get_copy_zext(i));
+    }
+  }
+  test_end();
+}
+
+TEST(fft64_vec_znx_big, fft64_vec_znx_big_sub) { test_fft64_vec_znx_big_sub(fft64_vec_znx_big_sub); }
+TEST(vec_znx_big, vec_znx_big_sub) { test_fft64_vec_znx_big_sub(vec_znx_big_sub); }
+
+TEST(fft64_vec_znx_big, fft64_vec_znx_big_sub_small_a) {
+  test_fft64_vec_znx_big_sub_small_a(fft64_vec_znx_big_sub_small_a);
+}
+TEST(vec_znx_big, vec_znx_big_sub_small_a) { test_fft64_vec_znx_big_sub_small_a(vec_znx_big_sub_small_a); }
+
+TEST(fft64_vec_znx_big, fft64_vec_znx_big_sub_small_b) {
+  test_fft64_vec_znx_big_sub_small_b(fft64_vec_znx_big_sub_small_b);
+}
+TEST(vec_znx_big, vec_znx_big_sub_small_b) { test_fft64_vec_znx_big_sub_small_b(vec_znx_big_sub_small_b); }
+
+TEST(fft64_vec_znx_big, fft64_vec_znx_big_sub_small2) {
+  test_fft64_vec_znx_big_sub_small2(fft64_vec_znx_big_sub_small2);
+}
+TEST(vec_znx_big, vec_znx_big_sub_small2) { test_fft64_vec_znx_big_sub_small2(vec_znx_big_sub_small2); }
+
+static void test_vec_znx_big_normalize(VEC_ZNX_BIG_NORMALIZE_BASE2K_F normalize,
+                                       VEC_ZNX_BIG_NORMALIZE_BASE2K_TMP_BYTES_F normalize_tmp_bytes) {
+  // in the FFT64 case, big_normalize is just a forward.
+  // we will just test that the functions are callable
+  uint64_t n = 16;
+  uint64_t k = 12;
+  MODULE* module = new_module_info(n, FFT64);
+  for (uint64_t sa : {3, 5, 7}) {
+    for (uint64_t sr : {3, 5, 7}) {
+      uint64_t r_sl = n + 3;
+      def_rand_big(a, n, sa);
+      znx_vec_i64_layout r(n, sr, r_sl);
+      std::vector<uint8_t> tmp_space(normalize_tmp_bytes(module, sr, sa));
+      normalize(module, k, r.data(), sr, r_sl, a.data, sa, tmp_space.data());
+    }
+  }
+  delete_module_info(module);
+}
+
+TEST(vec_znx_big, fft64_vec_znx_big_normalize_base2k) {
+  test_vec_znx_big_normalize(fft64_vec_znx_big_normalize_base2k, fft64_vec_znx_big_normalize_base2k_tmp_bytes);
+}
+TEST(vec_znx_big, vec_znx_big_normalize_base2k) {
+  test_vec_znx_big_normalize(vec_znx_big_normalize_base2k, vec_znx_big_normalize_base2k_tmp_bytes);
+}
+
+static void test_vec_znx_big_range_normalize(  //
+    VEC_ZNX_BIG_RANGE_NORMALIZE_BASE2K_F normalize,
+    VEC_ZNX_BIG_RANGE_NORMALIZE_BASE2K_TMP_BYTES_F normalize_tmp_bytes) {
+  // in the FFT64 case, big_normalize is just a forward.
+  // we will just test that the functions are callable
+  uint64_t n = 16;
+  uint64_t k = 11;
+  MODULE* module = new_module_info(n, FFT64);
+  for (uint64_t sa : {6, 15, 21}) {
+    for (uint64_t sr : {3, 5, 7}) {
+      uint64_t r_sl = n + 3;
+      def_rand_big(a, n, sa);
+      uint64_t a_start = uniform_u64_bits(30) % (sa / 2);
+      uint64_t a_end = sa - (uniform_u64_bits(30) % (sa / 2));
+      uint64_t a_step = (uniform_u64_bits(30) % 3) + 1;
+      uint64_t range_size = (a_end + a_step - 1 - a_start) / a_step;
+      fft64_vec_znx_big_layout aextr(n, range_size);
+      for (uint64_t i = 0, idx = a_start; idx < a_end; ++i, idx += a_step) {
+        aextr.set(i, a.get_copy(idx));
+      }
+      znx_vec_i64_layout r(n, sr, r_sl);
+      znx_vec_i64_layout r2(n, sr, r_sl);
+      // tmp_space is large-enough for both
+      std::vector<uint8_t> tmp_space(normalize_tmp_bytes(module, sr, sa));
+      normalize(module, k, r.data(), sr, r_sl, a.data, a_start, a_end, a_step, tmp_space.data());
+      fft64_vec_znx_big_normalize_base2k(module, k, r2.data(), sr, r_sl, aextr.data, range_size, tmp_space.data());
+      for (uint64_t i = 0; i < sr; ++i) {
+        ASSERT_EQ(r.get_copy(i), r2.get_copy(i));
+      }
+    }
+  }
+  delete_module_info(module);
+}
+
+TEST(vec_znx_big, fft64_vec_znx_big_range_normalize_base2k) {
+  test_vec_znx_big_range_normalize(fft64_vec_znx_big_range_normalize_base2k,
+                                   fft64_vec_znx_big_range_normalize_base2k_tmp_bytes);
+}
+TEST(vec_znx_big, vec_znx_big_range_normalize_base2k) {
+  test_vec_znx_big_range_normalize(vec_znx_big_range_normalize_base2k, vec_znx_big_range_normalize_base2k_tmp_bytes);
+}
+
+static void test_vec_znx_big_rotate(VEC_ZNX_BIG_ROTATE_F rotate) {
+  // in the FFT64 case, big_normalize is just a forward.
+  // we will just test that the functions are callable
+  uint64_t n = 16;
+  int64_t p = 12;
+  MODULE* module = new_module_info(n, FFT64);
+  for (uint64_t sa : {3, 5, 7}) {
+    for (uint64_t sr : {3, 5, 7}) {
+      def_rand_big(a, n, sa);
+      fft64_vec_znx_big_layout r(n, sr);
+      rotate(module, p, r.data, sr, a.data, sa);
+      for (uint64_t i = 0; i < sr; ++i) {
+        znx_i64 aa = a.get_copy_zext(i);
+        znx_i64 expect(n);
+        for (uint64_t j = 0; j < n; ++j) {
+          expect.set_coeff(j, aa.get_coeff(int64_t(j) - p));
+        }
+        znx_i64 actual = r.get_copy(i);
+        ASSERT_EQ(expect, actual);
+      }
+    }
+  }
+  delete_module_info(module);
+}
+
+TEST(vec_znx_big, fft64_vec_znx_big_rotate) { test_vec_znx_big_rotate(fft64_vec_znx_big_rotate); }
+TEST(vec_znx_big, vec_znx_big_rotate) { test_vec_znx_big_rotate(vec_znx_big_rotate); }
+
+static void test_vec_znx_big_automorphism(VEC_ZNX_BIG_AUTOMORPHISM_F automorphism) {
+  // in the FFT64 case, big_normalize is just a forward.
+  // we will just test that the functions are callable
+  uint64_t n = 16;
+  int64_t p = 11;
+  MODULE* module = new_module_info(n, FFT64);
+  for (uint64_t sa : {3, 5, 7}) {
+    for (uint64_t sr : {3, 5, 7}) {
+      def_rand_big(a, n, sa);
+      fft64_vec_znx_big_layout r(n, sr);
+      automorphism(module, p, r.data, sr, a.data, sa);
+      for (uint64_t i = 0; i < sr; ++i) {
+        znx_i64 aa = a.get_copy_zext(i);
+        znx_i64 expect(n);
+        for (uint64_t j = 0; j < n; ++j) {
+          expect.set_coeff(p * j, aa.get_coeff(j));
+        }
+        znx_i64 actual = r.get_copy(i);
+        ASSERT_EQ(expect, actual);
+      }
+    }
+  }
+  delete_module_info(module);
+}
+
+TEST(vec_znx_big, fft64_vec_znx_big_automorphism) { test_vec_znx_big_automorphism(fft64_vec_znx_big_automorphism); }
+TEST(vec_znx_big, vec_znx_big_automorphism) { test_vec_znx_big_automorphism(vec_znx_big_automorphism); }
diff --git a/test/spqlios_vmp_product_test.cpp b/test/spqlios_vmp_product_test.cpp
new file mode 100644
index 0000000..a785f3f
--- /dev/null
+++ b/test/spqlios_vmp_product_test.cpp
@@ -0,0 +1,121 @@
+#include <gtest/gtest.h>
+
+#include "../spqlios/arithmetic/vec_znx_arithmetic_private.h"
+#include "testlib/fft64_layouts.h"
+#include "testlib/polynomial_vector.h"
+
+static void test_vmp_prepare_contiguous(VMP_PREPARE_CONTIGUOUS_F* prepare_contiguous,
+                                        VMP_PREPARE_CONTIGUOUS_TMP_BYTES_F* tmp_bytes) {
+  // tests when n < 8
+  for (uint64_t nn : {2, 4}) {
+    MODULE* module = new_module_info(nn, FFT64);
+    for (uint64_t nrows : {1, 2, 5}) {
+      for (uint64_t ncols : {2, 6, 7}) {
+        znx_vec_i64_layout mat(nn, nrows * ncols, nn);
+        fft64_vmp_pmat_layout pmat(nn, nrows, ncols);
+        mat.fill_random(30);
+        std::vector<uint8_t> tmp_space(fft64_vmp_prepare_contiguous_tmp_bytes(module, nrows, ncols));
+        thash hash_before = mat.content_hash();
+        prepare_contiguous(module, pmat.data, mat.data(), nrows, ncols, tmp_space.data());
+        ASSERT_EQ(mat.content_hash(), hash_before);
+        for (uint64_t row = 0; row < nrows; ++row) {
+          for (uint64_t col = 0; col < ncols; ++col) {
+            const double* pmatv = (double*)pmat.data + (col * nrows + row) * nn;
+            reim_fft64vec tmp = simple_fft64(mat.get_copy(row * ncols + col));
+            const double* tmpv = tmp.data();
+            for (uint64_t i = 0; i < nn; ++i) {
+              ASSERT_LE(abs(pmatv[i] - tmpv[i]), 1e-10);
+            }
+          }
+        }
+      }
+    }
+    delete_module_info(module);
+  }
+  // tests when n >= 8
+  for (uint64_t nn : {8, 32}) {
+    MODULE* module = new_module_info(nn, FFT64);
+    uint64_t nblk = nn / 8;
+    for (uint64_t nrows : {1, 2, 5}) {
+      for (uint64_t ncols : {2, 6, 7}) {
+        znx_vec_i64_layout mat(nn, nrows * ncols, nn);
+        fft64_vmp_pmat_layout pmat(nn, nrows, ncols);
+        mat.fill_random(30);
+        std::vector<uint8_t> tmp_space(tmp_bytes(module, nrows, ncols));
+        thash hash_before = mat.content_hash();
+        prepare_contiguous(module, pmat.data, mat.data(), nrows, ncols, tmp_space.data());
+        ASSERT_EQ(mat.content_hash(), hash_before);
+        for (uint64_t row = 0; row < nrows; ++row) {
+          for (uint64_t col = 0; col < ncols; ++col) {
+            reim_fft64vec tmp = simple_fft64(mat.get_copy(row * ncols + col));
+            for (uint64_t blk = 0; blk < nblk; ++blk) {
+              reim4_elem expect = tmp.get_blk(blk);
+              reim4_elem actual = pmat.get(row, col, blk);
+              ASSERT_LE(infty_dist(actual, expect), 1e-10);
+            }
+          }
+        }
+      }
+    }
+    delete_module_info(module);
+  }
+}
+
+TEST(vec_znx, vmp_prepare_contiguous) {
+  test_vmp_prepare_contiguous(vmp_prepare_contiguous, vmp_prepare_contiguous_tmp_bytes);
+}
+TEST(vec_znx, fft64_vmp_prepare_contiguous_ref) {
+  test_vmp_prepare_contiguous(fft64_vmp_prepare_contiguous_ref, fft64_vmp_prepare_contiguous_tmp_bytes);
+}
+#ifdef __x86_64__
+TEST(vec_znx, fft64_vmp_prepare_contiguous_avx) {
+  test_vmp_prepare_contiguous(fft64_vmp_prepare_contiguous_avx, fft64_vmp_prepare_contiguous_tmp_bytes);
+}
+#endif
+
+static void test_vmp_apply(VMP_APPLY_DFT_TO_DFT_F* apply, VMP_APPLY_DFT_TO_DFT_TMP_BYTES_F* tmp_bytes) {
+  for (uint64_t nn : {2, 4, 8, 64}) {
+    MODULE* module = new_module_info(nn, FFT64);
+    for (uint64_t mat_nrows : {1, 4, 7}) {
+      for (uint64_t mat_ncols : {1, 2, 5}) {
+        for (uint64_t in_size : {1, 4, 7}) {
+          for (uint64_t out_size : {1, 2, 5}) {
+            fft64_vec_znx_dft_layout in(nn, in_size);
+            fft64_vmp_pmat_layout pmat(nn, mat_nrows, mat_ncols);
+            fft64_vec_znx_dft_layout out(nn, out_size);
+            in.fill_random(0);
+            pmat.fill_random(0);
+            // naive computation of the product
+            std::vector<reim_fft64vec> expect(out_size, nn);
+            for (uint64_t col = 0; col < out_size; ++col) {
+              reim_fft64vec ex = reim_fft64vec::zero(nn);
+              for (uint64_t row = 0; row < std::min(mat_nrows, in_size); ++row) {
+                ex += pmat.get_zext(row, col) * in.get_copy_zext(row);
+              }
+              expect[col] = ex;
+            }
+            // apply the product
+            std::vector<uint8_t> tmp(tmp_bytes(module, out_size, in_size, mat_nrows, mat_ncols));
+            apply(module, out.data, out_size, in.data, in_size, pmat.data, mat_nrows, mat_ncols, tmp.data());
+            // check that the output is close from the expectation
+            for (uint64_t col = 0; col < out_size; ++col) {
+              reim_fft64vec actual = out.get_copy_zext(col);
+              ASSERT_LE(infty_dist(actual, expect[col]), 1e-10);
+            }
+          }
+        }
+      }
+    }
+    delete_module_info(module);
+  }
+}
+
+TEST(vec_znx, vmp_apply_to_dft) { test_vmp_apply(vmp_apply_dft_to_dft, vmp_apply_dft_to_dft_tmp_bytes); }
+TEST(vec_znx, fft64_vmp_apply_dft_to_dft_ref) {
+  test_vmp_apply(fft64_vmp_apply_dft_to_dft_ref, fft64_vmp_apply_dft_to_dft_tmp_bytes);
+}
+#ifdef __x86_64__
+TEST(vec_znx, fft64_vmp_apply_dft_to_dft_avx) {
+  test_vmp_apply(fft64_vmp_apply_dft_to_dft_avx, fft64_vmp_apply_dft_to_dft_tmp_bytes);
+}
+#endif
diff --git a/test/testlib/fft64_dft.cpp b/test/testlib/fft64_dft.cpp
new file mode 100644
index 0000000..1ab2cab
--- /dev/null
+++ b/test/testlib/fft64_dft.cpp
@@ -0,0 +1,139 @@
+#include "fft64_dft.h"
+
+#include <cstring>
+
+#include "../../spqlios/reim/reim_fft.h"
+#include "../../spqlios/reim/reim_fft_internal.h"
+
+reim_fft64vec::reim_fft64vec(uint64_t n) : v(n, 0) {}
+reim4_elem reim_fft64vec::get_blk(uint64_t blk) const {
+  return reim_view(v.size() / 2, (double*)v.data()).get_blk(blk);
+}
+double* reim_fft64vec::data() { return v.data(); }
+const double* reim_fft64vec::data() const { return v.data(); }
+uint64_t reim_fft64vec::nn() const { return v.size(); }
+reim_fft64vec::reim_fft64vec(uint64_t n, const double* data) : v(data, data + n) {}
+void reim_fft64vec::save_as(double* dest) const { memcpy(dest, v.data(), nn() * sizeof(double)); }
+reim_fft64vec reim_fft64vec::zero(uint64_t n) { return reim_fft64vec(n); }
+void reim_fft64vec::set_blk(uint64_t blk, const reim4_elem& value) {
+  reim_view(v.size() / 2, (double*)v.data()).set_blk(blk, value);
+}
+reim_fft64vec reim_fft64vec::dft_random(uint64_t n, uint64_t log2bound) {
+  return simple_fft64(znx_i64::random_log2bound(n, log2bound));
+}
+reim_fft64vec reim_fft64vec::random(uint64_t n, double log2bound) {
+  double bound = pow(2., log2bound);
+  reim_fft64vec res(n);
+  for (uint64_t i = 0; i < n; ++i) {
+    res.v[i] = uniform_f64_bounds(-bound, bound);
+  }
+  return res;
+}
+
+reim_fft64vec operator+(const reim_fft64vec& a, const reim_fft64vec& b) {
+  uint64_t nn = a.nn();
+  REQUIRE_DRAMATICALLY(b.nn() == a.nn(), "ring dimension mismatch");
+  reim_fft64vec res(nn);
+  double* rv = res.data();
+  const double* av = a.data();
+  const double* bv = b.data();
+  for (uint64_t i = 0; i < nn; ++i) {
+    rv[i] = av[i] + bv[i];
+  }
+  return res;
+}
+reim_fft64vec operator-(const reim_fft64vec& a, const reim_fft64vec& b) {
+  uint64_t nn = a.nn();
+  REQUIRE_DRAMATICALLY(b.nn() == a.nn(), "ring dimension mismatch");
+  reim_fft64vec res(nn);
+  double* rv = res.data();
+  const double* av = a.data();
+  const double* bv = b.data();
+  for (uint64_t i = 0; i < nn; ++i) {
+    rv[i] = av[i] - bv[i];
+  }
+  return res;
+}
+reim_fft64vec operator*(const reim_fft64vec& a, const reim_fft64vec& b) {
+  uint64_t nn = a.nn();
+  REQUIRE_DRAMATICALLY(b.nn() == a.nn(), "ring dimension mismatch");
+  REQUIRE_DRAMATICALLY(nn >= 2, "test not defined for nn=1");
+  uint64_t m = nn / 2;
+  reim_fft64vec res(nn);
+  double* rv = res.data();
+  const double* av = a.data();
+  const double* bv = b.data();
+  for (uint64_t i = 0; i < m; ++i) {
+    rv[i] = av[i] * bv[i] - av[m + i] * bv[m + i];
+    rv[m + i] = av[i] * bv[m + i] + av[m + i] * bv[i];
+  }
+  return res;
+}
+reim_fft64vec& operator+=(reim_fft64vec& a, const reim_fft64vec& b) {
+  uint64_t nn = a.nn();
+  REQUIRE_DRAMATICALLY(b.nn() == a.nn(), "ring dimension mismatch");
+  double* av = a.data();
+  const double* bv = b.data();
+  for (uint64_t i = 0; i < nn; ++i) {
+    av[i] = av[i] + bv[i];
+  }
+  return a;
+}
+reim_fft64vec& operator-=(reim_fft64vec& a, const reim_fft64vec& b) {
+  uint64_t nn = a.nn();
+  REQUIRE_DRAMATICALLY(b.nn() == a.nn(), "ring dimension mismatch");
+  double* av = a.data();
+  const double* bv = b.data();
+  for (uint64_t i = 0; i < nn; ++i) {
+    av[i] = av[i] - bv[i];
+  }
+  return a;
+}
+
+reim_fft64vec simple_fft64(const znx_i64& polynomial) {
+  const uint64_t nn = polynomial.nn();
+  const uint64_t m = nn / 2;
+  reim_fft64vec res(nn);
+  double* dat = res.data();
+  for (uint64_t i = 0; i < nn; ++i) dat[i] = polynomial.get_coeff(i);
+  reim_fft_simple(m, dat);
+  return res;
+}
+
+znx_i64 simple_rint_ifft64(const reim_fft64vec& fftvec) {
+  const uint64_t nn = fftvec.nn();
+  const uint64_t m = nn / 2;
+  std::vector<double> vv(fftvec.data(), fftvec.data() + nn);
+  double* v = vv.data();
+  reim_ifft_simple(m, v);
+  znx_i64 res(nn);
+  for (uint64_t i = 0; i < nn; ++i) {
+    res.set_coeff(i, rint(v[i] / m));
+  }
+  return res;
+}
+
+rnx_f64 naive_ifft64(const reim_fft64vec& fftvec) {
+  const uint64_t nn = fftvec.nn();
+  const uint64_t m = nn / 2;
+  std::vector<double> vv(fftvec.data(), fftvec.data() + nn);
+  double* v = vv.data();
+  reim_ifft_simple(m, v);
+  rnx_f64 res(nn);
+  for (uint64_t i = 0; i < nn; ++i) {
+    res.set_coeff(i, v[i] / m);
+  }
+  return res;
+}
+double infty_dist(const reim_fft64vec& a, const reim_fft64vec& b) {
+  const uint64_t n = a.nn();
+  REQUIRE_DRAMATICALLY(b.nn() == a.nn(), "dimensions mismatch");
+  const double* da = a.data();
+  const double* db = b.data();
+  double d = 0;
+  for (uint64_t i = 0; i < n; ++i) {
+    double di = abs(da[i] - db[i]);
+    if (di > d) d = di;
+  }
+  return d;
+}
diff --git a/test/testlib/fft64_dft.h b/test/testlib/fft64_dft.h
new file mode 100644
index 0000000..973ceed
--- /dev/null
+++ b/test/testlib/fft64_dft.h
@@ -0,0 +1,40 @@
+#ifndef SPQLIOS_FFT64_DFT_H
+#define SPQLIOS_FFT64_DFT_H
+
+#include "negacyclic_polynomial.h"
+#include "reim4_elem.h"
+
+class reim_fft64vec {
+  std::vector<double> v;
+
+ public:
+  reim_fft64vec() = default;
+  reim_fft64vec(uint64_t n);
+  reim_fft64vec(uint64_t n, const double* data);
+  uint64_t nn() const;
+  static reim_fft64vec zero(uint64_t n);
+  /** random complex coefficients (unstructured) */
+  static reim_fft64vec random(uint64_t n, double log2bound);
+  /** random fft of a small int polynomial */
+  static reim_fft64vec dft_random(uint64_t n, uint64_t log2bound);
+  double* data();
+  const double* data() const;
+  void save_as(double* dest) const;
+  reim4_elem get_blk(uint64_t blk) const;
+  void set_blk(uint64_t blk, const reim4_elem& value);
+};
+
+reim_fft64vec operator+(const reim_fft64vec& a, const reim_fft64vec& b);
+reim_fft64vec operator-(const reim_fft64vec& a, const reim_fft64vec& b);
+reim_fft64vec operator*(const reim_fft64vec& a, const reim_fft64vec& b);
+reim_fft64vec& operator+=(reim_fft64vec& a, const reim_fft64vec& b);
+reim_fft64vec& operator-=(reim_fft64vec& a, const reim_fft64vec& b);
+
+/** infty distance */
+double infty_dist(const reim_fft64vec& a, const reim_fft64vec& b);
+
+reim_fft64vec simple_fft64(const znx_i64& polynomial);
+znx_i64 simple_rint_ifft64(const reim_fft64vec& fftvec);
+rnx_f64 naive_ifft64(const reim_fft64vec& fftvec);
+
+#endif  // SPQLIOS_FFT64_DFT_H
diff --git a/test/testlib/fft64_layouts.cpp b/test/testlib/fft64_layouts.cpp
new file mode 100644
index 0000000..b815930
--- /dev/null
+++ b/test/testlib/fft64_layouts.cpp
@@ -0,0 +1,238 @@
+#include "fft64_layouts.h"
+#ifdef VALGRIND_MEM_TESTS
+#include "valgrind/memcheck.h"
+#endif
+
+void* alloc64(uint64_t size) {
+  static uint64_t _msk64 = -64;
+  if (size == 0) return nullptr;
+  uint64_t rsize = (size + 63) & _msk64;
+  uint8_t* reps = (uint8_t*)std::aligned_alloc(64, rsize);
+  REQUIRE_DRAMATICALLY(reps != 0, "Out of memory");
+#ifdef VALGRIND_MEM_TESTS
+  VALGRIND_MAKE_MEM_NOACCESS(reps + size, rsize - size);
+#endif
+  return reps;
+}
+
+fft64_vec_znx_dft_layout::fft64_vec_znx_dft_layout(uint64_t n, uint64_t size)
+    : nn(n),                                      //
+      size(size),                                 //
+      data((VEC_ZNX_DFT*)alloc64(n * size * 8)),  //
+      view(n / 2, size, (double*)data) {}
+
+fft64_vec_znx_dft_layout::~fft64_vec_znx_dft_layout() { free(data); }
+
+double* fft64_vec_znx_dft_layout::get_addr(uint64_t idx) {
+  REQUIRE_DRAMATICALLY(idx < size, "index overflow " << idx << " / " << size);
+  return ((double*)data) + idx * nn;
+}
+const double* fft64_vec_znx_dft_layout::get_addr(uint64_t idx) const {
+  REQUIRE_DRAMATICALLY(idx < size, "index overflow " << idx << " / " << size);
+  return ((double*)data) + idx * nn;
+}
+reim_fft64vec fft64_vec_znx_dft_layout::get_copy_zext(uint64_t idx) const {
+  if (idx < size) {
+    return reim_fft64vec(nn, get_addr(idx));
+  } else {
+    return reim_fft64vec::zero(nn);
+  }
+}
+void fft64_vec_znx_dft_layout::fill_dft_random_log2bound(uint64_t bits) {
+  for (uint64_t i = 0; i < size; ++i) {
+    set(i, simple_fft64(znx_i64::random_log2bound(nn, bits)));
+  }
+}
+void fft64_vec_znx_dft_layout::set(uint64_t idx, const reim_fft64vec& value) {
+  REQUIRE_DRAMATICALLY(value.nn() == nn, "ring dimension mismatch");
+  value.save_as(get_addr(idx));
+}
+thash fft64_vec_znx_dft_layout::content_hash() const { return test_hash(data, size * nn * sizeof(double)); }
+
+reim4_elem fft64_vec_znx_dft_layout::get(uint64_t idx, uint64_t blk) const {
+  REQUIRE_DRAMATICALLY(idx < size, "index overflow: " << idx << " / " << size);
+  REQUIRE_DRAMATICALLY(blk < nn / 8, "blk overflow: " << blk << " / " << nn / 8);
+  double* reim = ((double*)data) + idx * nn;
+  return reim4_elem(reim + blk * 4, reim + nn / 2 + blk * 4);
+}
+reim4_elem fft64_vec_znx_dft_layout::get_zext(uint64_t idx, uint64_t blk) const {
+  REQUIRE_DRAMATICALLY(blk < nn / 8, "blk overflow: " << blk << " / " << nn / 8);
+  if (idx < size) {
+    return get(idx, blk);
+  } else {
+    return reim4_elem::zero();
+  }
+}
+void fft64_vec_znx_dft_layout::set(uint64_t idx, uint64_t blk, const reim4_elem& value) {
+  REQUIRE_DRAMATICALLY(idx < size, "index overflow: " << idx << " / " << size);
+  REQUIRE_DRAMATICALLY(blk < nn / 8, "blk overflow: " << blk << " / " << nn / 8);
+  double* reim = ((double*)data) + idx * nn;
+  value.save_re_im(reim + blk * 4, reim + nn / 2 + blk * 4);
+}
+void fft64_vec_znx_dft_layout::fill_random(double log2bound) {
+  for (uint64_t i = 0; i < size; ++i) {
+    set(i, reim_fft64vec::random(nn, log2bound));
+  }
+}
+void fft64_vec_znx_dft_layout::fill_dft_random(uint64_t log2bound) {
+  for (uint64_t i = 0; i < size; ++i) {
+    set(i, reim_fft64vec::dft_random(nn, log2bound));
+  }
+}
+
+fft64_vec_znx_big_layout::fft64_vec_znx_big_layout(uint64_t n, uint64_t size)
+    : nn(n),       //
+      size(size),  //
+      data((VEC_ZNX_BIG*)alloc64(n * size * 8)) {}
+
+znx_i64 fft64_vec_znx_big_layout::get_copy(uint64_t index) const {
+  REQUIRE_DRAMATICALLY(index < size, "index overflow: " << index << " / " << size);
+  return znx_i64(nn, ((int64_t*)data) + index * nn);
+}
+znx_i64 fft64_vec_znx_big_layout::get_copy_zext(uint64_t index) const {
+  if (index < size) {
+    return znx_i64(nn, ((int64_t*)data) + index * nn);
+  } else {
+    return znx_i64::zero(nn);
+  }
+}
+void fft64_vec_znx_big_layout::set(uint64_t index, const znx_i64& value) {
+  REQUIRE_DRAMATICALLY(index < size, "index overflow: " << index << " / " << size);
+  value.save_as(((int64_t*)data) + index * nn);
+}
+void fft64_vec_znx_big_layout::fill_random() {
+  for (uint64_t i = 0; i < size; ++i) {
+    set(i, znx_i64::random_log2bound(nn, 1));
+  }
+}
+fft64_vec_znx_big_layout::~fft64_vec_znx_big_layout() { free(data); }
+
+fft64_vmp_pmat_layout::fft64_vmp_pmat_layout(uint64_t n, uint64_t nrows, uint64_t ncols)
+    : nn(n),
+      nrows(nrows),
+      ncols(ncols),  //
+      data((VMP_PMAT*)alloc64(nrows * ncols * nn * 8)) {}
+
+double* fft64_vmp_pmat_layout::get_addr(uint64_t row, uint64_t col, uint64_t blk) const {
+  REQUIRE_DRAMATICALLY(row < nrows, "row overflow: " << row << " / " << nrows);
+  REQUIRE_DRAMATICALLY(col < ncols, "col overflow: " << col << " / " << ncols);
+  REQUIRE_DRAMATICALLY(blk < nn / 8, "block overflow: " << blk << " / " << (nn / 8));
+  double* d = (double*)data;
+  if (col == (ncols - 1) && (ncols % 2 == 1)) {
+    // special case: last column out of an odd column number
+    return d + blk * nrows * ncols * 8  // major: blk
+           + col * nrows * 8            // col == ncols-1
+           + row * 8;
+  } else {
+    // general case: columns go by pair
+    return d + blk * nrows * ncols * 8    // major: blk
+           + (col / 2) * (2 * nrows) * 8  // second: col pair index
+           + row * 2 * 8                  // third: row index
+           + (col % 2) * 8;               // minor: col in colpair
+  }
+}
+
+reim4_elem fft64_vmp_pmat_layout::get(uint64_t row, uint64_t col, uint64_t blk) const {
+  return reim4_elem(get_addr(row, col, blk));
+}
+reim4_elem fft64_vmp_pmat_layout::get_zext(uint64_t row, uint64_t col, uint64_t blk) const {
+  REQUIRE_DRAMATICALLY(blk < nn / 8, "block overflow: " << blk << " / " << (nn / 8));
+  if (row < nrows && col < ncols) {
+    return reim4_elem(get_addr(row, col, blk));
+  } else {
+    return reim4_elem::zero();
+  }
+}
+void fft64_vmp_pmat_layout::set(uint64_t row, uint64_t col, uint64_t blk, const reim4_elem& value) const {
+  value.save_as(get_addr(row, col, blk));
+}
+
+fft64_vmp_pmat_layout::~fft64_vmp_pmat_layout() { free(data); }
+
+reim_fft64vec fft64_vmp_pmat_layout::get_zext(uint64_t row, uint64_t col) const {
+  if (row >= nrows || col >= ncols) {
+    return reim_fft64vec::zero(nn);
+  }
+  if (nn < 8) {
+    // the pmat is just col major
+    double* addr = (double*)data + (row + col * nrows) * nn;
+    return reim_fft64vec(nn, addr);
+  }
+  // otherwise, reconstruct it block by block
+  reim_fft64vec res(nn);
+  for (uint64_t blk = 0; blk < nn / 8; ++blk) {
+    reim4_elem v = get(row, col, blk);
+    res.set_blk(blk, v);
+  }
+  return res;
+}
+void fft64_vmp_pmat_layout::set(uint64_t row, uint64_t col, const reim_fft64vec& value) {
+  REQUIRE_DRAMATICALLY(row < nrows, "row overflow: " << row << " / " << nrows);
+  REQUIRE_DRAMATICALLY(col < ncols, "row overflow: " << col << " / " << ncols);
+  if (nn < 8) {
+    // the pmat is just col major
+    double* addr = (double*)data + (row + col * nrows) * nn;
+    value.save_as(addr);
+    return;
+  }
+  // otherwise, reconstruct it block by block
+  for (uint64_t blk = 0; blk < nn / 8; ++blk) {
+    reim4_elem v = value.get_blk(blk);
+    set(row, col, blk, v);
+  }
+}
+void fft64_vmp_pmat_layout::fill_random(double log2bound) {
+  for (uint64_t row = 0; row < nrows; ++row) {
+    for (uint64_t col = 0; col < ncols; ++col) {
+      set(row, col, reim_fft64vec::random(nn, log2bound));
+    }
+  }
+}
+void fft64_vmp_pmat_layout::fill_dft_random(uint64_t log2bound) {
+  for (uint64_t row = 0; row < nrows; ++row) {
+    for (uint64_t col = 0; col < ncols; ++col) {
+      set(row, col, reim_fft64vec::dft_random(nn, log2bound));
+    }
+  }
+}
+
+fft64_svp_ppol_layout::fft64_svp_ppol_layout(uint64_t n)
+    : nn(n),  //
+      data((SVP_PPOL*)alloc64(nn * 8)) {}
+
+reim_fft64vec fft64_svp_ppol_layout::get_copy() const { return reim_fft64vec(nn, (double*)data); }
+
+void fft64_svp_ppol_layout::set(const reim_fft64vec& value) { value.save_as((double*)data); }
+
+void fft64_svp_ppol_layout::fill_dft_random(uint64_t log2bound) { set(reim_fft64vec::dft_random(nn, log2bound)); }
+
+void fft64_svp_ppol_layout::fill_random(double log2bound) { set(reim_fft64vec::random(nn, log2bound)); }
+
+fft64_svp_ppol_layout::~fft64_svp_ppol_layout() { free(data); }
+thash fft64_svp_ppol_layout::content_hash() const { return test_hash(data, nn * sizeof(double)); }
+
+fft64_cnv_left_layout::fft64_cnv_left_layout(uint64_t n, uint64_t size)
+    : nn(n),  //
+      size(size),
+      data((CNV_PVEC_L*)alloc64(size * nn * 8)) {}
+
+reim4_elem fft64_cnv_left_layout::get(uint64_t idx, uint64_t blk) {
+  REQUIRE_DRAMATICALLY(idx < size, "idx overflow: " << idx << " / " << size);
+  REQUIRE_DRAMATICALLY(blk < nn / 8, "block overflow: " << blk << " / " << (nn / 8));
+  return reim4_elem(((double*)data) + blk * size + idx);
+}
+
+fft64_cnv_left_layout::~fft64_cnv_left_layout() { free(data); }
+
+fft64_cnv_right_layout::fft64_cnv_right_layout(uint64_t n, uint64_t size)
+    : nn(n),  //
+      size(size),
+      data((CNV_PVEC_R*)alloc64(size * nn * 8)) {}
+
+reim4_elem fft64_cnv_right_layout::get(uint64_t idx, uint64_t blk) {
+  REQUIRE_DRAMATICALLY(idx < size, "idx overflow: " << idx << " / " << size);
+  REQUIRE_DRAMATICALLY(blk < nn / 8, "block overflow: " << blk << " / " << (nn / 8));
+  return reim4_elem(((double*)data) + blk * size + idx);
+}
+
+fft64_cnv_right_layout::~fft64_cnv_right_layout() { free(data); }
diff --git a/test/testlib/fft64_layouts.h b/test/testlib/fft64_layouts.h
new file mode 100644
index 0000000..ba71448
--- /dev/null
+++ b/test/testlib/fft64_layouts.h
@@ -0,0 +1,109 @@
+#ifndef SPQLIOS_FFT64_LAYOUTS_H
+#define SPQLIOS_FFT64_LAYOUTS_H
+
+#include "../../spqlios/arithmetic/vec_znx_arithmetic.h"
+#include "fft64_dft.h"
+#include "negacyclic_polynomial.h"
+#include "reim4_elem.h"
+
+/** @brief test layout for the VEC_ZNX_DFT */
+struct fft64_vec_znx_dft_layout {
+ public:
+  const uint64_t nn;
+  const uint64_t size;
+  VEC_ZNX_DFT* const data;
+  reim_vector_view view;
+  /** @brief fill with random double values (unstructured) */
+  void fill_random(double log2bound);
+  /** @brief fill with random ffts of small int polynomials */
+  void fill_dft_random(uint64_t log2bound);
+  reim4_elem get(uint64_t idx, uint64_t blk) const;
+  reim4_elem get_zext(uint64_t idx, uint64_t blk) const;
+  void set(uint64_t idx, uint64_t blk, const reim4_elem& value);
+  fft64_vec_znx_dft_layout(uint64_t n, uint64_t size);
+  void fill_random_log2bound(uint64_t bits);
+  void fill_dft_random_log2bound(uint64_t bits);
+  double* get_addr(uint64_t idx);
+  const double* get_addr(uint64_t idx) const;
+  reim_fft64vec get_copy_zext(uint64_t idx) const;
+  void set(uint64_t idx, const reim_fft64vec& value);
+  thash content_hash() const;
+  ~fft64_vec_znx_dft_layout();
+};
+
+/** @brief test layout for the VEC_ZNX_BIG */
+class fft64_vec_znx_big_layout {
+ public:
+  const uint64_t nn;
+  const uint64_t size;
+  VEC_ZNX_BIG* const data;
+  fft64_vec_znx_big_layout(uint64_t n, uint64_t size);
+  void fill_random();
+  znx_i64 get_copy(uint64_t index) const;
+  znx_i64 get_copy_zext(uint64_t index) const;
+  void set(uint64_t index, const znx_i64& value);
+  thash content_hash() const;
+  ~fft64_vec_znx_big_layout();
+};
+
+/** @brief test layout for the VMP_PMAT */
+class fft64_vmp_pmat_layout {
+ public:
+  const uint64_t nn;
+  const uint64_t nrows;
+  const uint64_t ncols;
+  VMP_PMAT* const data;
+  fft64_vmp_pmat_layout(uint64_t n, uint64_t nrows, uint64_t ncols);
+  double* get_addr(uint64_t row, uint64_t col, uint64_t blk) const;
+  reim4_elem get(uint64_t row, uint64_t col, uint64_t blk) const;
+  thash content_hash() const;
+  reim4_elem get_zext(uint64_t row, uint64_t col, uint64_t blk) const;
+  reim_fft64vec get_zext(uint64_t row, uint64_t col) const;
+  void set(uint64_t row, uint64_t col, uint64_t blk, const reim4_elem& v) const;
+  void set(uint64_t row, uint64_t col, const reim_fft64vec& value);
+  /** @brief fill with random double values (unstructured) */
+  void fill_random(double log2bound);
+  /** @brief fill with random ffts of small int polynomials */
+  void fill_dft_random(uint64_t log2bound);
+  ~fft64_vmp_pmat_layout();
+};
+
+/** @brief test layout for the SVP_PPOL */
+class fft64_svp_ppol_layout {
+ public:
+  const uint64_t nn;
+  SVP_PPOL* const data;
+  fft64_svp_ppol_layout(uint64_t n);
+  thash content_hash() const;
+  reim_fft64vec get_copy() const;
+  void set(const reim_fft64vec&);
+  /** @brief fill with random double values (unstructured) */
+  void fill_random(double log2bound);
+  /** @brief fill with random ffts of small int polynomials */
+  void fill_dft_random(uint64_t log2bound);
+  ~fft64_svp_ppol_layout();
+};
+
+/** @brief test layout for the CNV_PVEC_L */
+class fft64_cnv_left_layout {
+  const uint64_t nn;
+  const uint64_t size;
+  CNV_PVEC_L* const data;
+  fft64_cnv_left_layout(uint64_t n, uint64_t size);
+  reim4_elem get(uint64_t idx, uint64_t blk);
+  thash content_hash() const;
+  ~fft64_cnv_left_layout();
+};
+
+/** @brief test layout for the CNV_PVEC_R */
+class fft64_cnv_right_layout {
+  const uint64_t nn;
+  const uint64_t size;
+  CNV_PVEC_R* const data;
+  fft64_cnv_right_layout(uint64_t n, uint64_t size);
+  reim4_elem get(uint64_t idx, uint64_t blk);
+  thash content_hash() const;
+  ~fft64_cnv_right_layout();
+};
+
+#endif  // SPQLIOS_FFT64_LAYOUTS_H
diff --git a/test/testlib/polynomial_vector.cpp b/test/testlib/polynomial_vector.cpp
new file mode 100644
index 0000000..95d4e78
--- /dev/null
+++ b/test/testlib/polynomial_vector.cpp
@@ -0,0 +1,69 @@
+#include "polynomial_vector.h"
+
+#include <cstring>
+
+#ifdef VALGRIND_MEM_TESTS
+#include "valgrind/memcheck.h"
+#endif
+
+#define CANARY_PADDING (1024)
+#define GARBAGE_VALUE (242)
+
+znx_vec_i64_layout::znx_vec_i64_layout(uint64_t n, uint64_t size, uint64_t slice) : n(n), size(size), slice(slice) {
+  REQUIRE_DRAMATICALLY(is_pow2(n), "not a power of 2" << n);
+  REQUIRE_DRAMATICALLY(slice >= n, "slice too small" << slice << " < " << n);
+  this->region = (uint8_t*)malloc(size * slice * sizeof(int64_t) + 2 * CANARY_PADDING);
+  this->data_start = (int64_t*)(region + CANARY_PADDING);
+  // ensure that any invalid value is kind-of garbage
+  memset(region, GARBAGE_VALUE, size * slice * sizeof(int64_t) + 2 * CANARY_PADDING);
+  // mark inter-slice memory as non accessible
+#ifdef VALGRIND_MEM_TESTS
+  VALGRIND_MAKE_MEM_NOACCESS(region, CANARY_PADDING);
+  VALGRIND_MAKE_MEM_NOACCESS(region + size * slice * sizeof(int64_t) + CANARY_PADDING, CANARY_PADDING);
+  for (uint64_t i = 0; i < size; ++i) {
+    VALGRIND_MAKE_MEM_UNDEFINED(data_start + i * slice, n * sizeof(int64_t));
+  }
+  if (size != slice) {
+    for (uint64_t i = 0; i < size; ++i) {
+      VALGRIND_MAKE_MEM_NOACCESS(data_start + i * slice + n, (slice - n) * sizeof(int64_t));
+    }
+  }
+#endif
+}
+
+znx_vec_i64_layout::~znx_vec_i64_layout() { free(region); }
+
+znx_i64 znx_vec_i64_layout::get_copy_zext(uint64_t index) const {
+  if (index < size) {
+    return znx_i64(n, data_start + index * slice);
+  } else {
+    return znx_i64::zero(n);
+  }
+}
+
+znx_i64 znx_vec_i64_layout::get_copy(uint64_t index) const {
+  REQUIRE_DRAMATICALLY(index < size, "index overflow: " << index << " / " << size);
+  return znx_i64(n, data_start + index * slice);
+}
+
+void znx_vec_i64_layout::set(uint64_t index, const znx_i64& elem) {
+  REQUIRE_DRAMATICALLY(index < size, "index overflow: " << index << " / " << size);
+  REQUIRE_DRAMATICALLY(elem.nn() == n, "incompatible ring dimensions: " << elem.nn() << " / " << n);
+  elem.save_as(data_start + index * slice);
+}
+
+int64_t* znx_vec_i64_layout::data() { return data_start; }
+const int64_t* znx_vec_i64_layout::data() const { return data_start; }
+
+void znx_vec_i64_layout::fill_random(uint64_t bits) {
+  for (uint64_t i = 0; i < size; ++i) {
+    set(i, znx_i64::random_log2bound(n, bits));
+  }
+}
+__uint128_t znx_vec_i64_layout::content_hash() const {
+  test_hasher hasher;
+  for (uint64_t i = 0; i < size; ++i) {
+    hasher.update(data() + i * slice, n * sizeof(int64_t));
+  }
+  return hasher.hash();
+}
diff --git a/test/testlib/polynomial_vector.h b/test/testlib/polynomial_vector.h
new file mode 100644
index 0000000..d821193
--- /dev/null
+++ b/test/testlib/polynomial_vector.h
@@ -0,0 +1,42 @@
+#ifndef SPQLIOS_POLYNOMIAL_VECTOR_H
+#define SPQLIOS_POLYNOMIAL_VECTOR_H
+
+#include "negacyclic_polynomial.h"
+#include "test_commons.h"
+
+/** @brief a test memory layout for znx i64 polynomials vectors */
+class znx_vec_i64_layout {
+  uint64_t n;
+  uint64_t size;
+  uint64_t slice;
+  int64_t* data_start;
+  uint8_t* region;
+
+ public:
+  // NO-COPY structure
+  znx_vec_i64_layout(const znx_vec_i64_layout&) = delete;
+  void operator=(const znx_vec_i64_layout&) = delete;
+  znx_vec_i64_layout(znx_vec_i64_layout&&) = delete;
+  void operator=(znx_vec_i64_layout&&) = delete;
+  /** @brief initialises a memory layout */
+  znx_vec_i64_layout(uint64_t n, uint64_t size, uint64_t slice);
+  /** @brief destructor */
+  ~znx_vec_i64_layout();
+
+  /** @brief get a copy of item index index (extended with zeros) */
+  znx_i64 get_copy_zext(uint64_t index) const;
+  /** @brief get a copy of item index index (extended with zeros) */
+  znx_i64 get_copy(uint64_t index) const;
+  /** @brief get a copy of item index index (index<size) */
+  void set(uint64_t index, const znx_i64& elem);
+  /** @brief fill with random values */
+  void fill_random(uint64_t bits = 63);
+  /** @brief raw pointer access */
+  int64_t* data();
+  /** @brief raw pointer access (const version) */
+  const int64_t* data() const;
+  /** @brief content hashcode */
+  __uint128_t content_hash() const;
+};
+
+#endif  // SPQLIOS_POLYNOMIAL_VECTOR_H
diff --git a/test/testlib/reim4_elem.cpp b/test/testlib/reim4_elem.cpp
new file mode 100644
index 0000000..2028a31
--- /dev/null
+++ b/test/testlib/reim4_elem.cpp
@@ -0,0 +1,145 @@
+#include "reim4_elem.h"
+
+reim4_elem::reim4_elem(const double* re, const double* im) {
+  for (uint64_t i = 0; i < 4; ++i) {
+    value[i] = re[i];
+    value[4 + i] = im[i];
+  }
+}
+reim4_elem::reim4_elem(const double* layout) {
+  for (uint64_t i = 0; i < 8; ++i) {
+    value[i] = layout[i];
+  }
+}
+reim4_elem::reim4_elem() {
+  for (uint64_t i = 0; i < 8; ++i) {
+    value[i] = 0.;
+  }
+}
+void reim4_elem::save_re_im(double* re, double* im) const {
+  for (uint64_t i = 0; i < 4; ++i) {
+    re[i] = value[i];
+    im[i] = value[4 + i];
+  }
+}
+void reim4_elem::save_as(double* reim4) const {
+  for (uint64_t i = 0; i < 8; ++i) {
+    reim4[i] = value[i];
+  }
+}
+reim4_elem reim4_elem::zero() { return reim4_elem(); }
+
+bool operator==(const reim4_elem& x, const reim4_elem& y) {
+  for (uint64_t i = 0; i < 8; ++i) {
+    if (x.value[i] != y.value[i]) return false;
+  }
+  return true;
+}
+
+reim4_elem gaussian_reim4() {
+  test_rng& gen = randgen();
+  std::normal_distribution<double> dist(0, 1);
+  reim4_elem res;
+  for (uint64_t i = 0; i < 8; ++i) {
+    res.value[i] = dist(gen);
+  }
+  return res;
+}
+
+reim4_array_view::reim4_array_view(uint64_t size, double* data) : size(size), data(data) {}
+reim4_elem reim4_array_view::get(uint64_t i) const {
+  REQUIRE_DRAMATICALLY(i < size, "reim4 array overflow");
+  return reim4_elem(data + 8 * i);
+}
+void reim4_array_view::set(uint64_t i, const reim4_elem& value) {
+  REQUIRE_DRAMATICALLY(i < size, "reim4 array overflow");
+  value.save_as(data + 8 * i);
+}
+
+reim_view::reim_view(uint64_t m, double* data) : m(m), data(data) {}
+reim4_elem reim_view::get_blk(uint64_t i) {
+  REQUIRE_DRAMATICALLY(i < m / 4, "block overflow");
+  return reim4_elem(data + 4 * i, data + m + 4 * i);
+}
+void reim_view::set_blk(uint64_t i, const reim4_elem& value) {
+  REQUIRE_DRAMATICALLY(i < m / 4, "block overflow");
+  value.save_re_im(data + 4 * i, data + m + 4 * i);
+}
+
+reim_vector_view::reim_vector_view(uint64_t m, uint64_t nrows, double* data) : m(m), nrows(nrows), data(data) {}
+reim_view reim_vector_view::row(uint64_t row) {
+  REQUIRE_DRAMATICALLY(row < nrows, "row overflow");
+  return reim_view(m, data + 2 * m * row);
+}
+
+/** @brief addition */
+reim4_elem operator+(const reim4_elem& x, const reim4_elem& y) {
+  reim4_elem reps;
+  for (uint64_t i = 0; i < 8; ++i) {
+    reps.value[i] = x.value[i] + y.value[i];
+  }
+  return reps;
+}
+reim4_elem& operator+=(reim4_elem& x, const reim4_elem& y) {
+  for (uint64_t i = 0; i < 8; ++i) {
+    x.value[i] += y.value[i];
+  }
+  return x;
+}
+/** @brief subtraction */
+reim4_elem operator-(const reim4_elem& x, const reim4_elem& y) {
+  reim4_elem reps;
+  for (uint64_t i = 0; i < 8; ++i) {
+    reps.value[i] = x.value[i] + y.value[i];
+  }
+  return reps;
+}
+reim4_elem& operator-=(reim4_elem& x, const reim4_elem& y) {
+  for (uint64_t i = 0; i < 8; ++i) {
+    x.value[i] -= y.value[i];
+  }
+  return x;
+}
+/** @brief product */
+reim4_elem operator*(const reim4_elem& x, const reim4_elem& y) {
+  reim4_elem reps;
+  for (uint64_t i = 0; i < 4; ++i) {
+    double xre = x.value[i];
+    double yre = y.value[i];
+    double xim = x.value[i + 4];
+    double yim = y.value[i + 4];
+    reps.value[i] = xre * yre - xim * yim;
+    reps.value[i + 4] = xre * yim + xim * yre;
+  }
+  return reps;
+}
+/** @brief distance in infty norm */
+double infty_dist(const reim4_elem& x, const reim4_elem& y) {
+  double dist = 0;
+  for (uint64_t i = 0; i < 8; ++i) {
+    double d = fabs(x.value[i] - y.value[i]);
+    if (d > dist) dist = d;
+  }
+  return dist;
+}
+
+std::ostream& operator<<(std::ostream& out, const reim4_elem& x) {
+  out << "[\n";
+  for (uint64_t i = 0; i < 4; ++i) {
+    out << "  re=" << x.value[i] << ", im=" << x.value[i + 4] << "\n";
+  }
+  return out << "]";
+}
+
+reim4_matrix_view::reim4_matrix_view(uint64_t nrows, uint64_t ncols, double* data)
+    : nrows(nrows), ncols(ncols), data(data) {}
+reim4_elem reim4_matrix_view::get(uint64_t row, uint64_t col) const {
+  REQUIRE_DRAMATICALLY(row < nrows, "rows out of bounds" << row << " / " << nrows);
+  REQUIRE_DRAMATICALLY(col < ncols, "cols out of bounds" << col << " / " << ncols);
+  return reim4_elem(data + 8 * (row * ncols + col));
+}
+void reim4_matrix_view::set(uint64_t row, uint64_t col, const reim4_elem& value) {
+  REQUIRE_DRAMATICALLY(row < nrows, "rows out of bounds" << row << " / " << nrows);
+  REQUIRE_DRAMATICALLY(col < ncols, "cols out of bounds" << col << " / " << ncols);
+  value.save_as(data + 8 * (row * ncols + col));
+}
diff --git a/test/testlib/reim4_elem.h b/test/testlib/reim4_elem.h
new file mode 100644
index 0000000..1b7e967
--- /dev/null
+++ b/test/testlib/reim4_elem.h
@@ -0,0 +1,95 @@
+#ifndef SPQLIOS_REIM4_ELEM_H
+#define SPQLIOS_REIM4_ELEM_H
+
+#include "test_commons.h"
+
+/** @brief test class representing one single reim4 element */
+class reim4_elem {
+ public:
+  /** @brief 8 components (4 real parts followed by 4 imag parts) */
+  double value[8];
+  /** @brief constructs from 4 real parts and 4 imaginary parts */
+  reim4_elem(const double* re, const double* im);
+  /** @brief constructs from 8 components */
+  reim4_elem(const double* layout);
+  /** @brief zero */
+  reim4_elem();
+  /** @brief saves the real parts to re and the 4 imag to im */
+  void save_re_im(double* re, double* im) const;
+  /** @brief saves the 8 components to reim4 */
+  void save_as(double* reim4) const;
+  static reim4_elem zero();
+};
+
+/** @brief checks for equality */
+bool operator==(const reim4_elem& x, const reim4_elem& y);
+/** @brief random gaussian reim4 of stdev 1 and mean 0 */
+reim4_elem gaussian_reim4();
+/** @brief addition */
+reim4_elem operator+(const reim4_elem& x, const reim4_elem& y);
+reim4_elem& operator+=(reim4_elem& x, const reim4_elem& y);
+/** @brief subtraction */
+reim4_elem operator-(const reim4_elem& x, const reim4_elem& y);
+reim4_elem& operator-=(reim4_elem& x, const reim4_elem& y);
+/** @brief product */
+reim4_elem operator*(const reim4_elem& x, const reim4_elem& y);
+std::ostream& operator<<(std::ostream& out, const reim4_elem& x);
+/** @brief distance in infty norm */
+double infty_dist(const reim4_elem& x, const reim4_elem& y);
+
+/** @brief test class representing the view of one reim of m complexes */
+class reim4_array_view {
+  uint64_t size;  ///< size of the reim array
+  double* data;   ///< pointer to the start of the array
+ public:
+  /** @brief ininitializes a view at an existing given address */
+  reim4_array_view(uint64_t size, double* data);
+  ;
+  /** @brief gets the i-th element */
+  reim4_elem get(uint64_t i) const;
+  /** @brief sets the i-th element */
+  void set(uint64_t i, const reim4_elem& value);
+};
+
+/** @brief test class representing the view of one matrix of nrowsxncols reim4's */
+class reim4_matrix_view {
+  uint64_t nrows;  ///< number of rows
+  uint64_t ncols;  ///< number of columns
+  double* data;    ///< pointer to the start of the matrix
+ public:
+  /** @brief ininitializes a view at an existing given address */
+  reim4_matrix_view(uint64_t nrows, uint64_t ncols, double* data);
+  /** @brief gets the i-th element */
+  reim4_elem get(uint64_t row, uint64_t col) const;
+  /** @brief sets the i-th element */
+  void set(uint64_t row, uint64_t col, const reim4_elem& value);
+};
+
+/** @brief test class representing the view of one reim of m complexes */
+class reim_view {
+  uint64_t m;    ///< (complex) dimension of the reim polynomial
+  double* data;  ///< address of the start of the reim polynomial
+ public:
+  /** @brief ininitializes a view at an existing given address */
+  reim_view(uint64_t m, double* data);
+  ;
+  /** @brief extracts the i-th reim4 block (i<m/4) */
+  reim4_elem get_blk(uint64_t i);
+  /** @brief sets the i-th reim4 block (i<m/4) */
+  void set_blk(uint64_t i, const reim4_elem& value);
+};
+
+/** @brief view of one contiguous reim vector */
+class reim_vector_view {
+  uint64_t m;      ///< (complex) dimension of the reim polynomial
+  uint64_t nrows;  ///< number of reim polynomials
+  double* data;    ///< address of the start of the reim polynomial
+
+ public:
+  /** @brief ininitializes a view at an existing given address */
+  reim_vector_view(uint64_t m, uint64_t nrows, double* data);
+  /** @brief view of the given reim */
+  reim_view row(uint64_t row);
+};
+
+#endif  // SPQLIOS_REIM4_ELEM_H
diff --git a/test/testlib/sha3.c b/test/testlib/sha3.c
new file mode 100644
index 0000000..5dfcb4c
--- /dev/null
+++ b/test/testlib/sha3.c
@@ -0,0 +1,168 @@
+// sha3.c
+// 19-Nov-11  Markku-Juhani O. Saarinen <mjos@iki.fi>
+// https://github.com/mjosaarinen/tiny_sha3
+// LICENSE: MIT
+
+// Revised 07-Aug-15 to match with official release of FIPS PUB 202 "SHA3"
+// Revised 03-Sep-15 for portability + OpenSSL - style API
+
+#include "sha3.h"
+
+// update the state with given number of rounds
+
+void sha3_keccakf(uint64_t st[25]) {
+  // constants
+  const uint64_t keccakf_rndc[24] = {0x0000000000000001, 0x0000000000008082, 0x800000000000808a, 0x8000000080008000,
+                                     0x000000000000808b, 0x0000000080000001, 0x8000000080008081, 0x8000000000008009,
+                                     0x000000000000008a, 0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
+                                     0x000000008000808b, 0x800000000000008b, 0x8000000000008089, 0x8000000000008003,
+                                     0x8000000000008002, 0x8000000000000080, 0x000000000000800a, 0x800000008000000a,
+                                     0x8000000080008081, 0x8000000000008080, 0x0000000080000001, 0x8000000080008008};
+  const int keccakf_rotc[24] = {1,  3,  6,  10, 15, 21, 28, 36, 45, 55, 2,  14,
+                                27, 41, 56, 8,  25, 43, 62, 18, 39, 61, 20, 44};
+  const int keccakf_piln[24] = {10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1};
+
+  // variables
+  int i, j, r;
+  uint64_t t, bc[5];
+
+#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
+  uint8_t* v;
+
+  // endianess conversion. this is redundant on little-endian targets
+  for (i = 0; i < 25; i++) {
+    v = (uint8_t*)&st[i];
+    st[i] = ((uint64_t)v[0]) | (((uint64_t)v[1]) << 8) | (((uint64_t)v[2]) << 16) | (((uint64_t)v[3]) << 24) |
+            (((uint64_t)v[4]) << 32) | (((uint64_t)v[5]) << 40) | (((uint64_t)v[6]) << 48) | (((uint64_t)v[7]) << 56);
+  }
+#endif
+
+  // actual iteration
+  for (r = 0; r < KECCAKF_ROUNDS; r++) {
+    // Theta
+    for (i = 0; i < 5; i++) bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15] ^ st[i + 20];
+
+    for (i = 0; i < 5; i++) {
+      t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1);
+      for (j = 0; j < 25; j += 5) st[j + i] ^= t;
+    }
+
+    // Rho Pi
+    t = st[1];
+    for (i = 0; i < 24; i++) {
+      j = keccakf_piln[i];
+      bc[0] = st[j];
+      st[j] = ROTL64(t, keccakf_rotc[i]);
+      t = bc[0];
+    }
+
+    //  Chi
+    for (j = 0; j < 25; j += 5) {
+      for (i = 0; i < 5; i++) bc[i] = st[j + i];
+      for (i = 0; i < 5; i++) st[j + i] ^= (~bc[(i + 1) % 5]) & bc[(i + 2) % 5];
+    }
+
+    //  Iota
+    st[0] ^= keccakf_rndc[r];
+  }
+
+#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
+  // endianess conversion. this is redundant on little-endian targets
+  for (i = 0; i < 25; i++) {
+    v = (uint8_t*)&st[i];
+    t = st[i];
+    v[0] = t & 0xFF;
+    v[1] = (t >> 8) & 0xFF;
+    v[2] = (t >> 16) & 0xFF;
+    v[3] = (t >> 24) & 0xFF;
+    v[4] = (t >> 32) & 0xFF;
+    v[5] = (t >> 40) & 0xFF;
+    v[6] = (t >> 48) & 0xFF;
+    v[7] = (t >> 56) & 0xFF;
+  }
+#endif
+}
+
+// Initialize the context for SHA3
+
+int sha3_init(sha3_ctx_t* c, int mdlen) {
+  int i;
+
+  for (i = 0; i < 25; i++) c->st.q[i] = 0;
+  c->mdlen = mdlen;
+  c->rsiz = 200 - 2 * mdlen;
+  c->pt = 0;
+
+  return 1;
+}
+
+// update state with more data
+
+int sha3_update(sha3_ctx_t* c, const void* data, size_t len) {
+  size_t i;
+  int j;
+
+  j = c->pt;
+  for (i = 0; i < len; i++) {
+    c->st.b[j++] ^= ((const uint8_t*)data)[i];
+    if (j >= c->rsiz) {
+      sha3_keccakf(c->st.q);
+      j = 0;
+    }
+  }
+  c->pt = j;
+
+  return 1;
+}
+
+// finalize and output a hash
+
+int sha3_final(void* md, sha3_ctx_t* c) {
+  int i;
+
+  c->st.b[c->pt] ^= 0x06;
+  c->st.b[c->rsiz - 1] ^= 0x80;
+  sha3_keccakf(c->st.q);
+
+  for (i = 0; i < c->mdlen; i++) {
+    ((uint8_t*)md)[i] = c->st.b[i];
+  }
+
+  return 1;
+}
+
+// compute a SHA-3 hash (md) of given byte length from "in"
+
+void* sha3(const void* in, size_t inlen, void* md, int mdlen) {
+  sha3_ctx_t sha3;
+
+  sha3_init(&sha3, mdlen);
+  sha3_update(&sha3, in, inlen);
+  sha3_final(md, &sha3);
+
+  return md;
+}
+
+// SHAKE128 and SHAKE256 extensible-output functionality
+
+void shake_xof(sha3_ctx_t* c) {
+  c->st.b[c->pt] ^= 0x1F;
+  c->st.b[c->rsiz - 1] ^= 0x80;
+  sha3_keccakf(c->st.q);
+  c->pt = 0;
+}
+
+void shake_out(sha3_ctx_t* c, void* out, size_t len) {
+  size_t i;
+  int j;
+
+  j = c->pt;
+  for (i = 0; i < len; i++) {
+    if (j >= c->rsiz) {
+      sha3_keccakf(c->st.q);
+      j = 0;
+    }
+    ((uint8_t*)out)[i] = c->st.b[j++];
+  }
+  c->pt = j;
+}
diff --git a/test/testlib/sha3.h b/test/testlib/sha3.h
new file mode 100644
index 0000000..08a7c86
--- /dev/null
+++ b/test/testlib/sha3.h
@@ -0,0 +1,56 @@
+// sha3.h
+// 19-Nov-11  Markku-Juhani O. Saarinen <mjos@iki.fi>
+// https://github.com/mjosaarinen/tiny_sha3
+// License: MIT
+
+#ifndef SHA3_H
+#define SHA3_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifndef KECCAKF_ROUNDS
+#define KECCAKF_ROUNDS 24
+#endif
+
+#ifndef ROTL64
+#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
+#endif
+
+// state context
+typedef struct {
+  union {            // state:
+    uint8_t b[200];  // 8-bit bytes
+    uint64_t q[25];  // 64-bit words
+  } st;
+  int pt, rsiz, mdlen;  // these don't overflow
+} sha3_ctx_t;
+
+// Compression function.
+void sha3_keccakf(uint64_t st[25]);
+
+// OpenSSL - like interfece
+int sha3_init(sha3_ctx_t* c, int mdlen);  // mdlen = hash output in bytes
+int sha3_update(sha3_ctx_t* c, const void* data, size_t len);
+int sha3_final(void* md, sha3_ctx_t* c);  // digest goes to md
+
+// compute a sha3 hash (md) of given byte length from "in"
+void* sha3(const void* in, size_t inlen, void* md, int mdlen);
+
+// SHAKE128 and SHAKE256 extensible-output functions
+#define shake128_init(c) sha3_init(c, 16)
+#define shake256_init(c) sha3_init(c, 32)
+#define shake_update sha3_update
+
+void shake_xof(sha3_ctx_t* c);
+void shake_out(sha3_ctx_t* c, void* out, size_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // SHA3_H
diff --git a/test/testlib/test_hash.cpp b/test/testlib/test_hash.cpp
new file mode 100644
index 0000000..2e065af
--- /dev/null
+++ b/test/testlib/test_hash.cpp
@@ -0,0 +1,24 @@
+#include "sha3.h"
+#include "test_commons.h"
+
+/** @brief returns some pseudorandom hash of the content */
+thash test_hash(const void* data, uint64_t size) {
+  thash res;
+  sha3(data, size, &res, sizeof(res));
+  return res;
+}
+/** @brief class to return a pseudorandom hash of the content */
+test_hasher::test_hasher() {
+  md = malloc(sizeof(sha3_ctx_t));
+  sha3_init((sha3_ctx_t*)md, 16);
+}
+
+void test_hasher::update(const void* data, uint64_t size) { sha3_update((sha3_ctx_t*)md, data, size); }
+
+thash test_hasher::hash() {
+  thash res;
+  sha3_final(&res, (sha3_ctx_t*)md);
+  return res;
+}
+
+test_hasher::~test_hasher() { free(md); }