Skip to content

Commit 1ee7561

Browse files
JAicewizardfacebook-github-bot
authored andcommitted
Disable the NEON optimisations on gcc <= 8 (#3869)
Summary: GCC7 doesnt support all the necessary NEON intrinsics, which is really a shame. However this means that for aarch64 GCC cannot compile faiss with neon intrinsics, so we should avoid using them. This is similar to #3860, build issues on GCC7, which I need. This one is a bit uglier, since GCC7 does support NEON just not all of the intrinsics. Pull Request resolved: #3869 Reviewed By: asadoughi Differential Revision: D63081962 Pulled By: junjieqi fbshipit-source-id: 69827cd447dd405b3ef70d651996f9ad00b6213e
1 parent d8aec60 commit 1ee7561

File tree

1 file changed

+25
-16
lines changed

1 file changed

+25
-16
lines changed

faiss/impl/ScalarQuantizer.cpp

+25-16
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,15 @@ namespace faiss {
5555
#endif
5656
#endif
5757

58+
#if defined(__aarch64__)
59+
#if defined(__GNUC__) && __GNUC__ < 8
60+
#warning \
61+
"Cannot enable NEON optimizations in scalar quantizer if the compiler is GCC<8"
62+
#else
63+
#define USE_NEON
64+
#endif
65+
#endif
66+
5867
namespace {
5968

6069
typedef ScalarQuantizer::QuantizerType QuantizerType;
@@ -105,7 +114,7 @@ struct Codec8bit {
105114
}
106115
#endif
107116

108-
#ifdef __aarch64__
117+
#ifdef USE_NEON
109118
static FAISS_ALWAYS_INLINE float32x4x2_t
110119
decode_8_components(const uint8_t* code, int i) {
111120
float32_t result[8] = {};
@@ -175,7 +184,7 @@ struct Codec4bit {
175184
}
176185
#endif
177186

178-
#ifdef __aarch64__
187+
#ifdef USE_NEON
179188
static FAISS_ALWAYS_INLINE float32x4x2_t
180189
decode_8_components(const uint8_t* code, int i) {
181190
float32_t result[8] = {};
@@ -336,7 +345,7 @@ struct Codec6bit {
336345

337346
#endif
338347

339-
#ifdef __aarch64__
348+
#ifdef USE_NEON
340349
static FAISS_ALWAYS_INLINE float32x4x2_t
341350
decode_8_components(const uint8_t* code, int i) {
342351
float32_t result[8] = {};
@@ -437,7 +446,7 @@ struct QuantizerTemplate<Codec, QuantizerTemplateScaling::UNIFORM, 8>
437446

438447
#endif
439448

440-
#ifdef __aarch64__
449+
#ifdef USE_NEON
441450

442451
template <class Codec>
443452
struct QuantizerTemplate<Codec, QuantizerTemplateScaling::UNIFORM, 8>
@@ -546,7 +555,7 @@ struct QuantizerTemplate<Codec, QuantizerTemplateScaling::NON_UNIFORM, 8>
546555

547556
#endif
548557

549-
#ifdef __aarch64__
558+
#ifdef USE_NEON
550559

551560
template <class Codec>
552561
struct QuantizerTemplate<Codec, QuantizerTemplateScaling::NON_UNIFORM, 8>
@@ -634,7 +643,7 @@ struct QuantizerFP16<8> : QuantizerFP16<1> {
634643

635644
#endif
636645

637-
#ifdef __aarch64__
646+
#ifdef USE_NEON
638647

639648
template <>
640649
struct QuantizerFP16<8> : QuantizerFP16<1> {
@@ -714,7 +723,7 @@ struct QuantizerBF16<8> : QuantizerBF16<1> {
714723

715724
#endif
716725

717-
#ifdef __aarch64__
726+
#ifdef USE_NEON
718727

719728
template <>
720729
struct QuantizerBF16<8> : QuantizerBF16<1> {
@@ -795,7 +804,7 @@ struct Quantizer8bitDirect<8> : Quantizer8bitDirect<1> {
795804

796805
#endif
797806

798-
#ifdef __aarch64__
807+
#ifdef USE_NEON
799808

800809
template <>
801810
struct Quantizer8bitDirect<8> : Quantizer8bitDirect<1> {
@@ -884,7 +893,7 @@ struct Quantizer8bitDirectSigned<8> : Quantizer8bitDirectSigned<1> {
884893

885894
#endif
886895

887-
#ifdef __aarch64__
896+
#ifdef USE_NEON
888897

889898
template <>
890899
struct Quantizer8bitDirectSigned<8> : Quantizer8bitDirectSigned<1> {
@@ -1242,7 +1251,7 @@ struct SimilarityL2<8> {
12421251

12431252
#endif
12441253

1245-
#ifdef __aarch64__
1254+
#ifdef USE_NEON
12461255
template <>
12471256
struct SimilarityL2<8> {
12481257
static constexpr int simdwidth = 8;
@@ -1402,7 +1411,7 @@ struct SimilarityIP<8> {
14021411
};
14031412
#endif
14041413

1405-
#ifdef __aarch64__
1414+
#ifdef USE_NEON
14061415

14071416
template <>
14081417
struct SimilarityIP<8> {
@@ -1599,7 +1608,7 @@ struct DCTemplate<Quantizer, Similarity, 8> : SQDistanceComputer {
15991608

16001609
#endif
16011610

1602-
#ifdef __aarch64__
1611+
#ifdef USE_NEON
16031612

16041613
template <class Quantizer, class Similarity>
16051614
struct DCTemplate<Quantizer, Similarity, 8> : SQDistanceComputer {
@@ -1815,7 +1824,7 @@ struct DistanceComputerByte<Similarity, 8> : SQDistanceComputer {
18151824

18161825
#endif
18171826

1818-
#ifdef __aarch64__
1827+
#ifdef USE_NEON
18191828

18201829
template <class Similarity>
18211830
struct DistanceComputerByte<Similarity, 8> : SQDistanceComputer {
@@ -2041,7 +2050,7 @@ ScalarQuantizer::SQuantizer* ScalarQuantizer::select_quantizer() const {
20412050
if (d % 16 == 0) {
20422051
return select_quantizer_1<16>(qtype, d, trained);
20432052
} else
2044-
#elif defined(USE_F16C) || defined(__aarch64__)
2053+
#elif defined(USE_F16C) || defined(USE_NEON)
20452054
if (d % 8 == 0) {
20462055
return select_quantizer_1<8>(qtype, d, trained);
20472056
} else
@@ -2082,7 +2091,7 @@ SQDistanceComputer* ScalarQuantizer::get_distance_computer(
20822091
qtype, d, trained);
20832092
}
20842093
} else
2085-
#elif defined(USE_F16C) || defined(__aarch64__)
2094+
#elif defined(USE_F16C) || defined(USE_NEON)
20862095
if (d % 8 == 0) {
20872096
if (metric == METRIC_L2) {
20882097
return select_distance_computer<SimilarityL2<8>>(qtype, d, trained);
@@ -2457,7 +2466,7 @@ InvertedListScanner* ScalarQuantizer::select_InvertedListScanner(
24572466
return sel0_InvertedListScanner<16>(
24582467
mt, this, quantizer, store_pairs, sel, by_residual);
24592468
} else
2460-
#elif defined(USE_F16C) || defined(__aarch64__)
2469+
#elif defined(USE_F16C) || defined(USE_NEON)
24612470
if (d % 8 == 0) {
24622471
return sel0_InvertedListScanner<8>(
24632472
mt, this, quantizer, store_pairs, sel, by_residual);

0 commit comments

Comments
 (0)