|
| 1 | +#ifndef SPQLIOS_VEC_ZNX_ARITHMETIC_PRIVATE_H |
| 2 | +#define SPQLIOS_VEC_ZNX_ARITHMETIC_PRIVATE_H |
| 3 | + |
| 4 | +#include "../commons_private.h" |
| 5 | +#include "../q120/q120_ntt.h" |
| 6 | +#include "vec_znx_arithmetic.h" |
| 7 | + |
| 8 | +/** |
| 9 | + * Layouts families: |
| 10 | + * |
| 11 | + * fft64: |
| 12 | + * K: <= 20, N: <= 65536, ell: <= 200 |
| 13 | + * vec<ZnX> normalized: represented by int64 |
| 14 | + * vec<ZnX> large: represented by int64 (expect <=52 bits) |
| 15 | + * vec<ZnX> DFT: represented by double (reim_fft space) |
| 16 | + * On AVX2 inftastructure, PMAT, LCNV, RCNV use a special reim4_fft space |
| 17 | + * |
| 18 | + * ntt120: |
| 19 | + * K: <= 50, N: <= 65536, ell: <= 80 |
| 20 | + * vec<ZnX> normalized: represented by int64 |
| 21 | + * vec<ZnX> large: represented by int128 (expect <=120 bits) |
| 22 | + * vec<ZnX> DFT: represented by int64x4 (ntt120 space) |
| 23 | + * On AVX2 inftastructure, PMAT, LCNV, RCNV use a special ntt120 space |
| 24 | + * |
| 25 | + * ntt104: |
| 26 | + * K: <= 40, N: <= 65536, ell: <= 80 |
| 27 | + * vec<ZnX> normalized: represented by int64 |
| 28 | + * vec<ZnX> large: represented by int128 (expect <=120 bits) |
| 29 | + * vec<ZnX> DFT: represented by int64x4 (ntt120 space) |
| 30 | + * On AVX512 inftastructure, PMAT, LCNV, RCNV use a special ntt104 space |
| 31 | + */ |
| 32 | + |
| 33 | +struct fft64_module_info_t { |
| 34 | + // pre-computation for reim_fft |
| 35 | + REIM_FFT_PRECOMP* p_fft; |
| 36 | + // pre-computation for mul_fft |
| 37 | + REIM_FFTVEC_MUL_PRECOMP* mul_fft; |
| 38 | + // pre-computation for reim_from_znx6 |
| 39 | + REIM_FROM_ZNX64_PRECOMP* p_conv; |
| 40 | + // pre-computation for reim_tp_znx6 |
| 41 | + REIM_TO_ZNX64_PRECOMP* p_reim_to_znx; |
| 42 | + // pre-computation for reim_fft |
| 43 | + REIM_IFFT_PRECOMP* p_ifft; |
| 44 | + // pre-computation for reim_fftvec_addmul |
| 45 | + REIM_FFTVEC_ADDMUL_PRECOMP* p_addmul; |
| 46 | +}; |
| 47 | + |
| 48 | +struct q120_module_info_t { |
| 49 | + // pre-computation for q120b to q120b ntt |
| 50 | + q120_ntt_precomp* p_ntt; |
| 51 | + // pre-computation for q120b to q120b intt |
| 52 | + q120_ntt_precomp* p_intt; |
| 53 | +}; |
| 54 | + |
| 55 | +// TODO add function types here |
| 56 | +typedef typeof(vmp_pmat_alloc) VMP_PMAT_ALLOC_F; |
| 57 | +typedef typeof(vec_znx_dft_alloc) VEC_ZNX_DFT_ALLOC_F; |
| 58 | +typedef typeof(vec_znx_big_alloc) VEC_ZNX_BIG_ALLOC_F; |
| 59 | +typedef typeof(svp_ppol_alloc) SVP_PPOL_ALLOC_F; |
| 60 | +typedef typeof(vec_znx_zero) VEC_ZNX_ZERO_F; |
| 61 | +typedef typeof(vec_znx_copy) VEC_ZNX_COPY_F; |
| 62 | +typedef typeof(vec_znx_negate) VEC_ZNX_NEGATE_F; |
| 63 | +typedef typeof(vec_znx_add) VEC_ZNX_ADD_F; |
| 64 | +typedef typeof(vec_znx_dft) VEC_ZNX_DFT_F; |
| 65 | +typedef typeof(vec_znx_idft) VEC_ZNX_IDFT_F; |
| 66 | +typedef typeof(vec_znx_idft_tmp_bytes) VEC_ZNX_IDFT_TMP_BYTES_F; |
| 67 | +typedef typeof(vec_znx_idft_tmp_a) VEC_ZNX_IDFT_TMP_A_F; |
| 68 | +typedef typeof(vec_znx_sub) VEC_ZNX_SUB_F; |
| 69 | +typedef typeof(vec_znx_rotate) VEC_ZNX_ROTATE_F; |
| 70 | +typedef typeof(vec_znx_automorphism) VEC_ZNX_AUTOMORPHISM_F; |
| 71 | +typedef typeof(vec_znx_normalize_base2k) VEC_ZNX_NORMALIZE_BASE2K_F; |
| 72 | +typedef typeof(vec_znx_normalize_base2k_tmp_bytes) VEC_ZNX_NORMALIZE_BASE2K_TMP_BYTES_F; |
| 73 | +typedef typeof(vec_znx_big_normalize_base2k) VEC_ZNX_BIG_NORMALIZE_BASE2K_F; |
| 74 | +typedef typeof(vec_znx_big_normalize_base2k_tmp_bytes) VEC_ZNX_BIG_NORMALIZE_BASE2K_TMP_BYTES_F; |
| 75 | +typedef typeof(vec_znx_big_range_normalize_base2k) VEC_ZNX_BIG_RANGE_NORMALIZE_BASE2K_F; |
| 76 | +typedef typeof(vec_znx_big_range_normalize_base2k_tmp_bytes) VEC_ZNX_BIG_RANGE_NORMALIZE_BASE2K_TMP_BYTES_F; |
| 77 | +typedef typeof(vec_znx_big_add) VEC_ZNX_BIG_ADD_F; |
| 78 | +typedef typeof(vec_znx_big_add_small) VEC_ZNX_BIG_ADD_SMALL_F; |
| 79 | +typedef typeof(vec_znx_big_add_small2) VEC_ZNX_BIG_ADD_SMALL2_F; |
| 80 | +typedef typeof(vec_znx_big_sub) VEC_ZNX_BIG_SUB_F; |
| 81 | +typedef typeof(vec_znx_big_sub_small_a) VEC_ZNX_BIG_SUB_SMALL_A_F; |
| 82 | +typedef typeof(vec_znx_big_sub_small_b) VEC_ZNX_BIG_SUB_SMALL_B_F; |
| 83 | +typedef typeof(vec_znx_big_sub_small2) VEC_ZNX_BIG_SUB_SMALL2_F; |
| 84 | +typedef typeof(vec_znx_big_rotate) VEC_ZNX_BIG_ROTATE_F; |
| 85 | +typedef typeof(vec_znx_big_automorphism) VEC_ZNX_BIG_AUTOMORPHISM_F; |
| 86 | +typedef typeof(svp_prepare) SVP_PREPARE; |
| 87 | +typedef typeof(svp_apply_dft) SVP_APPLY_DFT_F; |
| 88 | +typedef typeof(znx_small_single_product) ZNX_SMALL_SINGLE_PRODUCT_F; |
| 89 | +typedef typeof(znx_small_single_product_tmp_bytes) ZNX_SMALL_SINGLE_PRODUCT_TMP_BYTES_F; |
| 90 | +typedef typeof(vmp_prepare_contiguous) VMP_PREPARE_CONTIGUOUS_F; |
| 91 | +typedef typeof(vmp_prepare_contiguous_tmp_bytes) VMP_PREPARE_CONTIGUOUS_TMP_BYTES_F; |
| 92 | +typedef typeof(vmp_apply_dft) VMP_APPLY_DFT_F; |
| 93 | +typedef typeof(vmp_apply_dft_tmp_bytes) VMP_APPLY_DFT_TMP_BYTES_F; |
| 94 | +typedef typeof(vmp_apply_dft_to_dft) VMP_APPLY_DFT_TO_DFT_F; |
| 95 | +typedef typeof(vmp_apply_dft_to_dft_tmp_bytes) VMP_APPLY_DFT_TO_DFT_TMP_BYTES_F; |
| 96 | + |
| 97 | +struct module_virtual_functions_t { |
| 98 | + // TODO add functions here |
| 99 | + VMP_PMAT_ALLOC_F* vmp_pmat_alloc; |
| 100 | + VEC_ZNX_DFT_ALLOC_F* vec_znx_dft_alloc; |
| 101 | + VEC_ZNX_BIG_ALLOC_F* vec_znx_big_alloc; |
| 102 | + SVP_PPOL_ALLOC_F* svp_ppol_alloc; |
| 103 | + VEC_ZNX_ZERO_F* vec_znx_zero; |
| 104 | + VEC_ZNX_COPY_F* vec_znx_copy; |
| 105 | + VEC_ZNX_NEGATE_F* vec_znx_negate; |
| 106 | + VEC_ZNX_ADD_F* vec_znx_add; |
| 107 | + VEC_ZNX_DFT_F* vec_znx_dft; |
| 108 | + VEC_ZNX_IDFT_F* vec_znx_idft; |
| 109 | + VEC_ZNX_IDFT_TMP_BYTES_F* vec_znx_idft_tmp_bytes; |
| 110 | + VEC_ZNX_IDFT_TMP_A_F* vec_znx_idft_tmp_a; |
| 111 | + VEC_ZNX_SUB_F* vec_znx_sub; |
| 112 | + VEC_ZNX_ROTATE_F* vec_znx_rotate; |
| 113 | + VEC_ZNX_AUTOMORPHISM_F* vec_znx_automorphism; |
| 114 | + VEC_ZNX_NORMALIZE_BASE2K_F* vec_znx_normalize_base2k; |
| 115 | + VEC_ZNX_NORMALIZE_BASE2K_TMP_BYTES_F* vec_znx_normalize_base2k_tmp_bytes; |
| 116 | + VEC_ZNX_BIG_NORMALIZE_BASE2K_F* vec_znx_big_normalize_base2k; |
| 117 | + VEC_ZNX_BIG_NORMALIZE_BASE2K_TMP_BYTES_F* vec_znx_big_normalize_base2k_tmp_bytes; |
| 118 | + VEC_ZNX_BIG_RANGE_NORMALIZE_BASE2K_F* vec_znx_big_range_normalize_base2k; |
| 119 | + VEC_ZNX_BIG_RANGE_NORMALIZE_BASE2K_TMP_BYTES_F* vec_znx_big_range_normalize_base2k_tmp_bytes; |
| 120 | + VEC_ZNX_BIG_ADD_F* vec_znx_big_add; |
| 121 | + VEC_ZNX_BIG_ADD_SMALL_F* vec_znx_big_add_small; |
| 122 | + VEC_ZNX_BIG_ADD_SMALL2_F* vec_znx_big_add_small2; |
| 123 | + VEC_ZNX_BIG_SUB_F* vec_znx_big_sub; |
| 124 | + VEC_ZNX_BIG_SUB_SMALL_A_F* vec_znx_big_sub_small_a; |
| 125 | + VEC_ZNX_BIG_SUB_SMALL_B_F* vec_znx_big_sub_small_b; |
| 126 | + VEC_ZNX_BIG_SUB_SMALL2_F* vec_znx_big_sub_small2; |
| 127 | + VEC_ZNX_BIG_ROTATE_F* vec_znx_big_rotate; |
| 128 | + VEC_ZNX_BIG_AUTOMORPHISM_F* vec_znx_big_automorphism; |
| 129 | + SVP_PREPARE* svp_prepare; |
| 130 | + SVP_APPLY_DFT_F* svp_apply_dft; |
| 131 | + ZNX_SMALL_SINGLE_PRODUCT_F* znx_small_single_product; |
| 132 | + ZNX_SMALL_SINGLE_PRODUCT_TMP_BYTES_F* znx_small_single_product_tmp_bytes; |
| 133 | + VMP_PREPARE_CONTIGUOUS_F* vmp_prepare_contiguous; |
| 134 | + VMP_PREPARE_CONTIGUOUS_TMP_BYTES_F* vmp_prepare_contiguous_tmp_bytes; |
| 135 | + VMP_APPLY_DFT_F* vmp_apply_dft; |
| 136 | + VMP_APPLY_DFT_TMP_BYTES_F* vmp_apply_dft_tmp_bytes; |
| 137 | + VMP_APPLY_DFT_TO_DFT_F* vmp_apply_dft_to_dft; |
| 138 | + VMP_APPLY_DFT_TO_DFT_TMP_BYTES_F* vmp_apply_dft_to_dft_tmp_bytes; |
| 139 | +}; |
| 140 | + |
| 141 | +union backend_module_info_t { |
| 142 | + struct fft64_module_info_t fft64; |
| 143 | + struct q120_module_info_t q120; |
| 144 | +}; |
| 145 | + |
| 146 | +struct module_info_t { |
| 147 | + // generic parameters |
| 148 | + MODULE_TYPE module_type; |
| 149 | + uint64_t nn; |
| 150 | + uint64_t m; |
| 151 | + // backend_dependent functions |
| 152 | + union backend_module_info_t mod; |
| 153 | + // virtual functions |
| 154 | + struct module_virtual_functions_t func; |
| 155 | +}; |
| 156 | + |
| 157 | +EXPORT VMP_PMAT* fft64_vmp_pmat_alloc(const MODULE* module, // N |
| 158 | + uint64_t nrows, uint64_t ncols // dimensions |
| 159 | +); |
| 160 | + |
| 161 | +EXPORT VEC_ZNX_DFT* fft64_vec_znx_dft_alloc(const MODULE* module, // N |
| 162 | + uint64_t size); |
| 163 | + |
| 164 | +EXPORT VEC_ZNX_BIG* fft64_vec_znx_big_alloc(const MODULE* module, // N |
| 165 | + uint64_t size); |
| 166 | + |
| 167 | +EXPORT SVP_PPOL* fft64_svp_ppol_alloc(const MODULE* module); // N |
| 168 | + |
| 169 | +EXPORT void vec_znx_zero_ref(const MODULE* module, // N |
| 170 | + int64_t* res, uint64_t res_size, uint64_t res_sl // res |
| 171 | +); |
| 172 | + |
| 173 | +EXPORT void vec_znx_copy_ref(const MODULE* precomp, // N |
| 174 | + int64_t* res, uint64_t res_size, uint64_t res_sl, // res |
| 175 | + const int64_t* a, uint64_t a_size, uint64_t a_sl // a |
| 176 | +); |
| 177 | + |
| 178 | +EXPORT void vec_znx_negate_ref(const MODULE* module, // N |
| 179 | + int64_t* res, uint64_t res_size, uint64_t res_sl, // res |
| 180 | + const int64_t* a, uint64_t a_size, uint64_t a_sl // a |
| 181 | +); |
| 182 | + |
| 183 | +EXPORT void vec_znx_negate_avx(const MODULE* module, // N |
| 184 | + int64_t* res, uint64_t res_size, uint64_t res_sl, // res |
| 185 | + const int64_t* a, uint64_t a_size, uint64_t a_sl // a |
| 186 | +); |
| 187 | + |
| 188 | +EXPORT void vec_znx_add_ref(const MODULE* module, // N |
| 189 | + int64_t* res, uint64_t res_size, uint64_t res_sl, // res |
| 190 | + const int64_t* a, uint64_t a_size, uint64_t a_sl, // a |
| 191 | + const int64_t* b, uint64_t b_size, uint64_t b_sl // b |
| 192 | +); |
| 193 | +EXPORT void vec_znx_add_avx(const MODULE* module, // N |
| 194 | + int64_t* res, uint64_t res_size, uint64_t res_sl, // res |
| 195 | + const int64_t* a, uint64_t a_size, uint64_t a_sl, // a |
| 196 | + const int64_t* b, uint64_t b_size, uint64_t b_sl // b |
| 197 | +); |
| 198 | + |
| 199 | +EXPORT void vec_znx_sub_ref(const MODULE* precomp, // N |
| 200 | + int64_t* res, uint64_t res_size, uint64_t res_sl, // res |
| 201 | + const int64_t* a, uint64_t a_size, uint64_t a_sl, // a |
| 202 | + const int64_t* b, uint64_t b_size, uint64_t b_sl // b |
| 203 | +); |
| 204 | + |
| 205 | +EXPORT void vec_znx_sub_avx(const MODULE* module, // N |
| 206 | + int64_t* res, uint64_t res_size, uint64_t res_sl, // res |
| 207 | + const int64_t* a, uint64_t a_size, uint64_t a_sl, // a |
| 208 | + const int64_t* b, uint64_t b_size, uint64_t b_sl // b |
| 209 | +); |
| 210 | + |
| 211 | +EXPORT void vec_znx_normalize_base2k_ref(const MODULE* module, // N |
| 212 | + uint64_t log2_base2k, // output base 2^K |
| 213 | + int64_t* res, uint64_t res_size, uint64_t res_sl, // res |
| 214 | + const int64_t* a, uint64_t a_size, uint64_t a_sl, // inp |
| 215 | + uint8_t* tmp_space // scratch space |
| 216 | +); |
| 217 | + |
| 218 | +EXPORT uint64_t vec_znx_normalize_base2k_tmp_bytes_ref(const MODULE* module, // N |
| 219 | + uint64_t res_size, // res size |
| 220 | + uint64_t inp_size // inp size |
| 221 | +); |
| 222 | + |
| 223 | +EXPORT void vec_znx_rotate_ref(const MODULE* module, // N |
| 224 | + const int64_t p, // rotation value |
| 225 | + int64_t* res, uint64_t res_size, uint64_t res_sl, // res |
| 226 | + const int64_t* a, uint64_t a_size, uint64_t a_sl // a |
| 227 | +); |
| 228 | + |
| 229 | +EXPORT void vec_znx_automorphism_ref(const MODULE* module, // N |
| 230 | + const int64_t p, // X->X^p |
| 231 | + int64_t* res, uint64_t res_size, uint64_t res_sl, // res |
| 232 | + const int64_t* a, uint64_t a_size, uint64_t a_sl // a |
| 233 | +); |
| 234 | + |
| 235 | +EXPORT void vmp_prepare_ref(const MODULE* precomp, // N |
| 236 | + VMP_PMAT* pmat, // output |
| 237 | + const int64_t* mat, uint64_t nrows, uint64_t ncols // a |
| 238 | +); |
| 239 | + |
| 240 | +EXPORT void vmp_apply_dft_ref(const MODULE* precomp, // N |
| 241 | + VEC_ZNX_DFT* res, uint64_t res_size, // res |
| 242 | + const int64_t* a, uint64_t a_size, uint64_t a_sl, // a |
| 243 | + const VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols // prep matrix |
| 244 | +); |
| 245 | + |
| 246 | +EXPORT void vec_dft_zero_ref(const MODULE* precomp, // N |
| 247 | + VEC_ZNX_DFT* res, uint64_t res_size // res |
| 248 | +); |
| 249 | + |
| 250 | +EXPORT void vec_dft_add_ref(const MODULE* precomp, // N |
| 251 | + VEC_ZNX_DFT* res, uint64_t res_size, // res |
| 252 | + const VEC_ZNX_DFT* a, uint64_t a_size, // a |
| 253 | + const VEC_ZNX_DFT* b, uint64_t b_size // b |
| 254 | +); |
| 255 | + |
| 256 | +EXPORT void vec_dft_sub_ref(const MODULE* precomp, // N |
| 257 | + VEC_ZNX_DFT* res, uint64_t res_size, // res |
| 258 | + const VEC_ZNX_DFT* a, uint64_t a_size, // a |
| 259 | + const VEC_ZNX_DFT* b, uint64_t b_size // b |
| 260 | +); |
| 261 | + |
| 262 | +EXPORT void vec_dft_ref(const MODULE* precomp, // N |
| 263 | + VEC_ZNX_DFT* res, uint64_t res_size, // res |
| 264 | + const int64_t* a, uint64_t a_size, uint64_t a_sl // a |
| 265 | +); |
| 266 | + |
| 267 | +EXPORT void vec_idft_ref(const MODULE* precomp, // N |
| 268 | + VEC_ZNX_BIG* res, uint64_t res_size, // res |
| 269 | + const VEC_ZNX_DFT* a_dft, uint64_t a_size); |
| 270 | + |
| 271 | +EXPORT void vec_znx_big_normalize_ref(const MODULE* precomp, // N |
| 272 | + uint64_t k, // base-2^k |
| 273 | + int64_t* res, uint64_t res_size, uint64_t res_sl, // res |
| 274 | + const VEC_ZNX_BIG* a, uint64_t a_size // a |
| 275 | +); |
| 276 | + |
| 277 | +/** @brief apply a svp product, result = ppol * a, presented in DFT space */ |
| 278 | +EXPORT void fft64_svp_apply_dft_ref(const MODULE* module, // N |
| 279 | + const VEC_ZNX_DFT* res, uint64_t res_size, // output |
| 280 | + const SVP_PPOL* ppol, // prepared pol |
| 281 | + const int64_t* a, uint64_t a_size, uint64_t a_sl // a |
| 282 | +); |
| 283 | + |
| 284 | +/** @brief sets res = k-normalize(a) -- output in int64 coeffs space */ |
| 285 | +EXPORT void fft64_vec_znx_big_normalize_base2k(const MODULE* module, // N |
| 286 | + uint64_t k, // base-2^k |
| 287 | + int64_t* res, uint64_t res_size, uint64_t res_sl, // res |
| 288 | + const VEC_ZNX_BIG* a, uint64_t a_size, // a |
| 289 | + uint8_t* tmp_space // temp space |
| 290 | +); |
| 291 | + |
| 292 | +/** @brief returns the minimal byte length of scratch space for vec_znx_big_normalize_base2k */ |
| 293 | +EXPORT uint64_t fft64_vec_znx_big_normalize_base2k_tmp_bytes(const MODULE* module, // N |
| 294 | + uint64_t res_size, // res size |
| 295 | + uint64_t inp_size // inp size |
| 296 | + |
| 297 | +); |
| 298 | + |
| 299 | +/** @brief sets res = k-normalize(a.subrange) -- output in int64 coeffs space */ |
| 300 | +EXPORT void fft64_vec_znx_big_range_normalize_base2k(const MODULE* module, // N |
| 301 | + uint64_t log2_base2k, // base-2^k |
| 302 | + int64_t* res, uint64_t res_size, uint64_t res_sl, // res |
| 303 | + const VEC_ZNX_BIG* a, uint64_t a_range_begin, // a |
| 304 | + uint64_t a_range_xend, uint64_t a_range_step, // range |
| 305 | + uint8_t* tmp_space // temp space |
| 306 | +); |
| 307 | + |
| 308 | +/** @brief returns the minimal byte length of scratch space for vec_znx_big_range_normalize_base2k */ |
| 309 | +EXPORT uint64_t fft64_vec_znx_big_range_normalize_base2k_tmp_bytes(const MODULE* module, // N |
| 310 | + uint64_t res_size, // res size |
| 311 | + uint64_t inp_size // inp size |
| 312 | +); |
| 313 | + |
| 314 | +EXPORT void fft64_vec_znx_dft(const MODULE* module, // N |
| 315 | + VEC_ZNX_DFT* res, uint64_t res_size, // res |
| 316 | + const int64_t* a, uint64_t a_size, uint64_t a_sl // a |
| 317 | +); |
| 318 | + |
| 319 | +EXPORT void fft64_vec_znx_idft(const MODULE* module, // N |
| 320 | + VEC_ZNX_BIG* res, uint64_t res_size, // res |
| 321 | + const VEC_ZNX_DFT* a_dft, uint64_t a_size, // a |
| 322 | + uint8_t* tmp // scratch space |
| 323 | +); |
| 324 | + |
| 325 | +EXPORT uint64_t fft64_vec_znx_idft_tmp_bytes(const MODULE* module); |
| 326 | + |
| 327 | +EXPORT void fft64_vec_znx_idft_tmp_a(const MODULE* module, // N |
| 328 | + VEC_ZNX_BIG* res, uint64_t res_size, // res |
| 329 | + VEC_ZNX_DFT* a_dft, uint64_t a_size // a is overwritten |
| 330 | +); |
| 331 | + |
| 332 | +EXPORT void ntt120_vec_znx_dft_avx(const MODULE* module, // N |
| 333 | + VEC_ZNX_DFT* res, uint64_t res_size, // res |
| 334 | + const int64_t* a, uint64_t a_size, uint64_t a_sl // a |
| 335 | +); |
| 336 | + |
| 337 | +/** */ |
| 338 | +EXPORT void ntt120_vec_znx_idft_avx(const MODULE* module, // N |
| 339 | + VEC_ZNX_BIG* res, uint64_t res_size, // res |
| 340 | + const VEC_ZNX_DFT* a_dft, uint64_t a_size, // a |
| 341 | + uint8_t* tmp // scratch space |
| 342 | +); |
| 343 | + |
| 344 | +EXPORT uint64_t ntt120_vec_znx_idft_tmp_bytes_avx(const MODULE* module); |
| 345 | + |
| 346 | +EXPORT void ntt120_vec_znx_idft_tmp_a_avx(const MODULE* module, // N |
| 347 | + VEC_ZNX_BIG* res, uint64_t res_size, // res |
| 348 | + VEC_ZNX_DFT* a_dft, uint64_t a_size // a is overwritten |
| 349 | +); |
| 350 | + |
| 351 | +// big additions/subtractions |
| 352 | + |
| 353 | +/** @brief sets res = a+b */ |
| 354 | +EXPORT void fft64_vec_znx_big_add(const MODULE* module, // N |
| 355 | + VEC_ZNX_BIG* res, uint64_t res_size, // res |
| 356 | + const VEC_ZNX_BIG* a, uint64_t a_size, // a |
| 357 | + const VEC_ZNX_BIG* b, uint64_t b_size // b |
| 358 | +); |
| 359 | +/** @brief sets res = a+b */ |
| 360 | +EXPORT void fft64_vec_znx_big_add_small(const MODULE* module, // N |
| 361 | + VEC_ZNX_BIG* res, uint64_t res_size, // res |
| 362 | + const VEC_ZNX_BIG* a, uint64_t a_size, // a |
| 363 | + const int64_t* b, uint64_t b_size, uint64_t b_sl // b |
| 364 | +); |
| 365 | +EXPORT void fft64_vec_znx_big_add_small2(const MODULE* module, // N |
| 366 | + VEC_ZNX_BIG* res, uint64_t res_size, // res |
| 367 | + const int64_t* a, uint64_t a_size, uint64_t a_sl, // a |
| 368 | + const int64_t* b, uint64_t b_size, uint64_t b_sl // b |
| 369 | +); |
| 370 | + |
| 371 | +/** @brief sets res = a-b */ |
| 372 | +EXPORT void fft64_vec_znx_big_sub(const MODULE* module, // N |
| 373 | + VEC_ZNX_BIG* res, uint64_t res_size, // res |
| 374 | + const VEC_ZNX_BIG* a, uint64_t a_size, // a |
| 375 | + const VEC_ZNX_BIG* b, uint64_t b_size // b |
| 376 | +); |
| 377 | +EXPORT void fft64_vec_znx_big_sub_small_b(const MODULE* module, // N |
| 378 | + VEC_ZNX_BIG* res, uint64_t res_size, // res |
| 379 | + const VEC_ZNX_BIG* a, uint64_t a_size, // a |
| 380 | + const int64_t* b, uint64_t b_size, uint64_t b_sl // b |
| 381 | +); |
| 382 | +EXPORT void fft64_vec_znx_big_sub_small_a(const MODULE* module, // N |
| 383 | + VEC_ZNX_BIG* res, uint64_t res_size, // res |
| 384 | + const int64_t* a, uint64_t a_size, uint64_t a_sl, // a |
| 385 | + const VEC_ZNX_BIG* b, uint64_t b_size // b |
| 386 | +); |
| 387 | +EXPORT void fft64_vec_znx_big_sub_small2(const MODULE* module, // N |
| 388 | + VEC_ZNX_BIG* res, uint64_t res_size, // res |
| 389 | + const int64_t* a, uint64_t a_size, uint64_t a_sl, // a |
| 390 | + const int64_t* b, uint64_t b_size, uint64_t b_sl // b |
| 391 | +); |
| 392 | + |
| 393 | +/** @brief sets res = a . X^p */ |
| 394 | +EXPORT void fft64_vec_znx_big_rotate(const MODULE* module, // N |
| 395 | + int64_t p, // rotation value |
| 396 | + VEC_ZNX_BIG* res, uint64_t res_size, // res |
| 397 | + const VEC_ZNX_BIG* a, uint64_t a_size // a |
| 398 | +); |
| 399 | + |
| 400 | +/** @brief sets res = a(X^p) */ |
| 401 | +EXPORT void fft64_vec_znx_big_automorphism(const MODULE* module, // N |
| 402 | + int64_t p, // X-X^p |
| 403 | + VEC_ZNX_BIG* res, uint64_t res_size, // res |
| 404 | + const VEC_ZNX_BIG* a, uint64_t a_size // a |
| 405 | +); |
| 406 | + |
| 407 | +/** @brief prepares a svp polynomial */ |
| 408 | +EXPORT void fft64_svp_prepare_ref(const MODULE* module, // N |
| 409 | + SVP_PPOL* ppol, // output |
| 410 | + const int64_t* pol // a |
| 411 | +); |
| 412 | + |
| 413 | +/** @brief res = a * b : small integer polynomial product */ |
| 414 | +EXPORT void fft64_znx_small_single_product(const MODULE* module, // N |
| 415 | + int64_t* res, // output |
| 416 | + const int64_t* a, // a |
| 417 | + const int64_t* b, // b |
| 418 | + uint8_t* tmp); |
| 419 | + |
| 420 | +/** @brief tmp bytes required for znx_small_single_product */ |
| 421 | +EXPORT uint64_t fft64_znx_small_single_product_tmp_bytes(const MODULE* module); |
| 422 | + |
| 423 | +/** @brief prepares a vmp matrix (contiguous row-major version) */ |
| 424 | +EXPORT void fft64_vmp_prepare_contiguous_ref(const MODULE* module, // N |
| 425 | + VMP_PMAT* pmat, // output |
| 426 | + const int64_t* mat, uint64_t nrows, uint64_t ncols, // a |
| 427 | + uint8_t* tmp_space // scratch space |
| 428 | +); |
| 429 | + |
| 430 | +/** @brief prepares a vmp matrix (contiguous row-major version) */ |
| 431 | +EXPORT void fft64_vmp_prepare_contiguous_avx(const MODULE* module, // N |
| 432 | + VMP_PMAT* pmat, // output |
| 433 | + const int64_t* mat, uint64_t nrows, uint64_t ncols, // a |
| 434 | + uint8_t* tmp_space // scratch space |
| 435 | +); |
| 436 | + |
| 437 | +/** @brief minimal scratch space byte-size required for the vmp_prepare function */ |
| 438 | +EXPORT uint64_t fft64_vmp_prepare_contiguous_tmp_bytes(const MODULE* module, // N |
| 439 | + uint64_t nrows, uint64_t ncols); |
| 440 | + |
| 441 | +/** @brief applies a vmp product (result in DFT space) */ |
| 442 | +EXPORT void fft64_vmp_apply_dft_ref(const MODULE* module, // N |
| 443 | + VEC_ZNX_DFT* res, uint64_t res_size, // res |
| 444 | + const int64_t* a, uint64_t a_size, uint64_t a_sl, // a |
| 445 | + const VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols, // prep matrix |
| 446 | + uint8_t* tmp_space // scratch space |
| 447 | +); |
| 448 | + |
| 449 | +/** @brief applies a vmp product (result in DFT space) */ |
| 450 | +EXPORT void fft64_vmp_apply_dft_avx(const MODULE* module, // N |
| 451 | + VEC_ZNX_DFT* res, uint64_t res_size, // res |
| 452 | + const int64_t* a, uint64_t a_size, uint64_t a_sl, // a |
| 453 | + const VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols, // prep matrix |
| 454 | + uint8_t* tmp_space // scratch space |
| 455 | +); |
| 456 | + |
| 457 | +/** @brief this inner function could be very handy */ |
| 458 | +EXPORT void fft64_vmp_apply_dft_to_dft_ref(const MODULE* module, // N |
| 459 | + VEC_ZNX_DFT* res, const uint64_t res_size, // res |
| 460 | + const VEC_ZNX_DFT* a_dft, uint64_t a_size, // a |
| 461 | + const VMP_PMAT* pmat, const uint64_t nrows, |
| 462 | + const uint64_t ncols, // prep matrix |
| 463 | + uint8_t* tmp_space // scratch space (a_size*sizeof(reim4) bytes) |
| 464 | +); |
| 465 | + |
| 466 | +/** @brief this inner function could be very handy */ |
| 467 | +EXPORT void fft64_vmp_apply_dft_to_dft_avx(const MODULE* module, // N |
| 468 | + VEC_ZNX_DFT* res, const uint64_t res_size, // res |
| 469 | + const VEC_ZNX_DFT* a_dft, uint64_t a_size, // a |
| 470 | + const VMP_PMAT* pmat, const uint64_t nrows, |
| 471 | + const uint64_t ncols, // prep matrix |
| 472 | + uint8_t* tmp_space // scratch space (a_size*sizeof(reim4) bytes) |
| 473 | +); |
| 474 | + |
| 475 | +/** @brief minimal size of the tmp_space */ |
| 476 | +EXPORT uint64_t fft64_vmp_apply_dft_tmp_bytes(const MODULE* module, // N |
| 477 | + uint64_t res_size, // res |
| 478 | + uint64_t a_size, // a |
| 479 | + uint64_t nrows, uint64_t ncols // prep matrix |
| 480 | +); |
| 481 | + |
| 482 | +/** @brief minimal size of the tmp_space */ |
| 483 | +EXPORT uint64_t fft64_vmp_apply_dft_to_dft_tmp_bytes(const MODULE* module, // N |
| 484 | + uint64_t res_size, // res |
| 485 | + uint64_t a_size, // a |
| 486 | + uint64_t nrows, uint64_t ncols // prep matrix |
| 487 | +); |
| 488 | +#endif // SPQLIOS_VEC_ZNX_ARITHMETIC_PRIVATE_H |
0 commit comments