@@ -55,6 +55,15 @@ namespace faiss {
55
55
#endif
56
56
#endif
57
57
58
+ #if defined(__aarch64__)
59
+ #if defined(__GNUC__) && __GNUC__ < 8
60
+ #warning \
61
+ " Cannot enable NEON optimizations in scalar quantizer if the compiler is GCC<8"
62
+ #else
63
+ #define USE_NEON
64
+ #endif
65
+ #endif
66
+
58
67
namespace {
59
68
60
69
typedef ScalarQuantizer::QuantizerType QuantizerType;
@@ -105,7 +114,7 @@ struct Codec8bit {
105
114
}
106
115
#endif
107
116
108
- #ifdef __aarch64__
117
+ #ifdef USE_NEON
109
118
static FAISS_ALWAYS_INLINE float32x4x2_t
110
119
decode_8_components (const uint8_t * code, int i) {
111
120
float32_t result[8 ] = {};
@@ -175,7 +184,7 @@ struct Codec4bit {
175
184
}
176
185
#endif
177
186
178
- #ifdef __aarch64__
187
+ #ifdef USE_NEON
179
188
static FAISS_ALWAYS_INLINE float32x4x2_t
180
189
decode_8_components (const uint8_t * code, int i) {
181
190
float32_t result[8 ] = {};
@@ -336,7 +345,7 @@ struct Codec6bit {
336
345
337
346
#endif
338
347
339
- #ifdef __aarch64__
348
+ #ifdef USE_NEON
340
349
static FAISS_ALWAYS_INLINE float32x4x2_t
341
350
decode_8_components (const uint8_t * code, int i) {
342
351
float32_t result[8 ] = {};
@@ -437,7 +446,7 @@ struct QuantizerTemplate<Codec, QuantizerTemplateScaling::UNIFORM, 8>
437
446
438
447
#endif
439
448
440
- #ifdef __aarch64__
449
+ #ifdef USE_NEON
441
450
442
451
template <class Codec >
443
452
struct QuantizerTemplate <Codec, QuantizerTemplateScaling::UNIFORM, 8 >
@@ -546,7 +555,7 @@ struct QuantizerTemplate<Codec, QuantizerTemplateScaling::NON_UNIFORM, 8>
546
555
547
556
#endif
548
557
549
- #ifdef __aarch64__
558
+ #ifdef USE_NEON
550
559
551
560
template <class Codec >
552
561
struct QuantizerTemplate <Codec, QuantizerTemplateScaling::NON_UNIFORM, 8 >
@@ -634,7 +643,7 @@ struct QuantizerFP16<8> : QuantizerFP16<1> {
634
643
635
644
#endif
636
645
637
- #ifdef __aarch64__
646
+ #ifdef USE_NEON
638
647
639
648
template <>
640
649
struct QuantizerFP16 <8 > : QuantizerFP16<1 > {
@@ -714,7 +723,7 @@ struct QuantizerBF16<8> : QuantizerBF16<1> {
714
723
715
724
#endif
716
725
717
- #ifdef __aarch64__
726
+ #ifdef USE_NEON
718
727
719
728
template <>
720
729
struct QuantizerBF16 <8 > : QuantizerBF16<1 > {
@@ -795,7 +804,7 @@ struct Quantizer8bitDirect<8> : Quantizer8bitDirect<1> {
795
804
796
805
#endif
797
806
798
- #ifdef __aarch64__
807
+ #ifdef USE_NEON
799
808
800
809
template <>
801
810
struct Quantizer8bitDirect <8 > : Quantizer8bitDirect<1 > {
@@ -884,7 +893,7 @@ struct Quantizer8bitDirectSigned<8> : Quantizer8bitDirectSigned<1> {
884
893
885
894
#endif
886
895
887
- #ifdef __aarch64__
896
+ #ifdef USE_NEON
888
897
889
898
template <>
890
899
struct Quantizer8bitDirectSigned <8 > : Quantizer8bitDirectSigned<1 > {
@@ -1242,7 +1251,7 @@ struct SimilarityL2<8> {
1242
1251
1243
1252
#endif
1244
1253
1245
- #ifdef __aarch64__
1254
+ #ifdef USE_NEON
1246
1255
template <>
1247
1256
struct SimilarityL2 <8 > {
1248
1257
static constexpr int simdwidth = 8 ;
@@ -1402,7 +1411,7 @@ struct SimilarityIP<8> {
1402
1411
};
1403
1412
#endif
1404
1413
1405
- #ifdef __aarch64__
1414
+ #ifdef USE_NEON
1406
1415
1407
1416
template <>
1408
1417
struct SimilarityIP <8 > {
@@ -1599,7 +1608,7 @@ struct DCTemplate<Quantizer, Similarity, 8> : SQDistanceComputer {
1599
1608
1600
1609
#endif
1601
1610
1602
- #ifdef __aarch64__
1611
+ #ifdef USE_NEON
1603
1612
1604
1613
template <class Quantizer , class Similarity >
1605
1614
struct DCTemplate <Quantizer, Similarity, 8 > : SQDistanceComputer {
@@ -1815,7 +1824,7 @@ struct DistanceComputerByte<Similarity, 8> : SQDistanceComputer {
1815
1824
1816
1825
#endif
1817
1826
1818
- #ifdef __aarch64__
1827
+ #ifdef USE_NEON
1819
1828
1820
1829
template <class Similarity >
1821
1830
struct DistanceComputerByte <Similarity, 8 > : SQDistanceComputer {
@@ -2041,7 +2050,7 @@ ScalarQuantizer::SQuantizer* ScalarQuantizer::select_quantizer() const {
2041
2050
if (d % 16 == 0 ) {
2042
2051
return select_quantizer_1<16 >(qtype, d, trained);
2043
2052
} else
2044
- #elif defined(USE_F16C) || defined(__aarch64__ )
2053
+ #elif defined(USE_F16C) || defined(USE_NEON )
2045
2054
if (d % 8 == 0 ) {
2046
2055
return select_quantizer_1<8 >(qtype, d, trained);
2047
2056
} else
@@ -2082,7 +2091,7 @@ SQDistanceComputer* ScalarQuantizer::get_distance_computer(
2082
2091
qtype, d, trained);
2083
2092
}
2084
2093
} else
2085
- #elif defined(USE_F16C) || defined(__aarch64__ )
2094
+ #elif defined(USE_F16C) || defined(USE_NEON )
2086
2095
if (d % 8 == 0 ) {
2087
2096
if (metric == METRIC_L2) {
2088
2097
return select_distance_computer<SimilarityL2<8 >>(qtype, d, trained);
@@ -2457,7 +2466,7 @@ InvertedListScanner* ScalarQuantizer::select_InvertedListScanner(
2457
2466
return sel0_InvertedListScanner<16 >(
2458
2467
mt, this , quantizer, store_pairs, sel, by_residual);
2459
2468
} else
2460
- #elif defined(USE_F16C) || defined(__aarch64__ )
2469
+ #elif defined(USE_F16C) || defined(USE_NEON )
2461
2470
if (d % 8 == 0 ) {
2462
2471
return sel0_InvertedListScanner<8 >(
2463
2472
mt, this , quantizer, store_pairs, sel, by_residual);
0 commit comments