Skip to content

Commit 74e4ae9

Browse files
fix formatting
Signed-off-by: Alexandr Guzhva <alexanderguzhva@gmail.com>
1 parent 3be9fc9 commit 74e4ae9

File tree

3 files changed

+35
-29
lines changed

3 files changed

+35
-29
lines changed

faiss/impl/pq4_fast_scan_search_qbs.cpp

+24-23
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ void kernel_accumulate_block(
115115

116116
#else
117117

118-
// a special version for NQ=1.
118+
// a special version for NQ=1.
119119
// Despite the function being large in the text form, it compiles to a very
120120
// compact assembler code.
121121
FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
@@ -143,10 +143,8 @@ void kernel_accumulate_block_avx512_nq1(
143143

144144
// process "nsq - scaler.nscale" part
145145
const int nsq_minus_nscale = nsq - scaler.nscale;
146-
const int nsq_minus_nscale_8 =
147-
(nsq_minus_nscale / 8) * 8;
148-
const int nsq_minus_nscale_4 =
149-
(nsq_minus_nscale / 4) * 4;
146+
const int nsq_minus_nscale_8 = (nsq_minus_nscale / 8) * 8;
147+
const int nsq_minus_nscale_4 = (nsq_minus_nscale / 4) * 4;
150148

151149
// process in chunks of 8
152150
for (int sq = 0; sq < nsq_minus_nscale_8; sq += 8) {
@@ -291,7 +289,7 @@ void kernel_accumulate_block_avx512_nq1(
291289
accu[q][3] += scaler.scale_hi(res1); // handle vectors 48..63
292290
}
293291
}
294-
292+
295293
for (int q = 0; q < NQ; q++) {
296294
// load LUTs for 4 quantizers
297295
simd64uint8 lut(LUT);
@@ -352,12 +350,16 @@ void kernel_accumulate_block_avx512_nq1(
352350
LUT += 32;
353351

354352
simd32uint8 res0 = scaler.lookup(lut, clo);
355-
accu[q][0] += simd32uint16(scaler.scale_lo(res0)); // handle vectors 0..7
356-
accu[q][1] += simd32uint16(scaler.scale_hi(res0)); // handle vectors 8..15
353+
accu[q][0] +=
354+
simd32uint16(scaler.scale_lo(res0)); // handle vectors 0..7
355+
accu[q][1] +=
356+
simd32uint16(scaler.scale_hi(res0)); // handle vectors 8..15
357357

358358
simd32uint8 res1 = scaler.lookup(lut, chi);
359-
accu[q][2] += simd32uint16(scaler.scale_lo(res1)); // handle vectors 16..23
360-
accu[q][3] += simd32uint16(scaler.scale_hi(res1)); // handle vectors 24..31
359+
accu[q][2] += simd32uint16(
360+
scaler.scale_lo(res1)); // handle vectors 16..23
361+
accu[q][3] += simd32uint16(
362+
scaler.scale_hi(res1)); // handle vectors 24..31
361363
}
362364
}
363365

@@ -385,7 +387,6 @@ void kernel_accumulate_block_avx512_nqx(
385387
const uint8_t* LUT,
386388
ResultHandler& res,
387389
const Scaler& scaler) {
388-
389390
// dummy alloc to keep the windows compiler happy
390391
constexpr int NQA = NQ > 0 ? NQ : 1;
391392
// distance accumulators
@@ -400,8 +401,7 @@ void kernel_accumulate_block_avx512_nqx(
400401

401402
// process "nsq - scaler.nscale" part
402403
const int nsq_minus_nscale = nsq - scaler.nscale;
403-
const int nsq_minus_nscale_4 =
404-
(nsq_minus_nscale / 4) * 4;
404+
const int nsq_minus_nscale_4 = (nsq_minus_nscale / 4) * 4;
405405

406406
// process in chunks of 8
407407
for (int sq = 0; sq < nsq_minus_nscale_4; sq += 4) {
@@ -518,12 +518,16 @@ void kernel_accumulate_block_avx512_nqx(
518518
LUT += 32;
519519

520520
simd32uint8 res0 = scaler.lookup(lut, clo);
521-
accu[q][0] += simd32uint16(scaler.scale_lo(res0)); // handle vectors 0..7
522-
accu[q][1] += simd32uint16(scaler.scale_hi(res0)); // handle vectors 8..15
521+
accu[q][0] +=
522+
simd32uint16(scaler.scale_lo(res0)); // handle vectors 0..7
523+
accu[q][1] +=
524+
simd32uint16(scaler.scale_hi(res0)); // handle vectors 8..15
523525

524526
simd32uint8 res1 = scaler.lookup(lut, chi);
525-
accu[q][2] += simd32uint16(scaler.scale_lo(res1)); // handle vectors 16..23
526-
accu[q][3] += simd32uint16(scaler.scale_hi(res1)); // handle vectors 24..31
527+
accu[q][2] += simd32uint16(
528+
scaler.scale_lo(res1)); // handle vectors 16..23
529+
accu[q][3] += simd32uint16(
530+
scaler.scale_hi(res1)); // handle vectors 24..31
527531
}
528532
}
529533

@@ -542,16 +546,13 @@ void kernel_accumulate_block(
542546
const uint8_t* codes,
543547
const uint8_t* LUT,
544548
ResultHandler& res,
545-
const Scaler& scaler
546-
) {
549+
const Scaler& scaler) {
547550
if constexpr (NQ == 1) {
548551
kernel_accumulate_block_avx512_nq1<ResultHandler, Scaler>(
549-
nsq, codes, LUT, res, scaler
550-
);
552+
nsq, codes, LUT, res, scaler);
551553
} else {
552554
kernel_accumulate_block_avx512_nqx<NQ, ResultHandler, Scaler>(
553-
nsq, codes, LUT, res, scaler
554-
);
555+
nsq, codes, LUT, res, scaler);
555556
}
556557
}
557558

faiss/utils/simdlib.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616

1717
#if defined(__AVX512F__)
1818

19-
#include <faiss/utils/simdlib_avx512.h>
2019
#include <faiss/utils/simdlib_avx2.h>
20+
#include <faiss/utils/simdlib_avx512.h>
2121

2222
#elif defined(__AVX2__)
2323

faiss/utils/simdlib_avx512.h

+10-5
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,18 @@ struct simd512bit {
4343
: i(_mm512_loadu_si512((__m512i const*)x)) {}
4444

4545
// sets up a lower half of the register while keeping upper one as zero
46-
explicit simd512bit(simd256bit lo) :
47-
simd512bit(_mm512_inserti32x8(_mm512_castsi256_si512(lo.i), _mm256_setzero_si256(), 1)) {}
46+
explicit simd512bit(simd256bit lo)
47+
: simd512bit(_mm512_inserti32x8(
48+
_mm512_castsi256_si512(lo.i),
49+
_mm256_setzero_si256(),
50+
1)) {}
4851

4952
// constructs from lower and upper halves
50-
explicit simd512bit(simd256bit lo, simd256bit hi) :
51-
simd512bit(_mm512_inserti32x8(_mm512_castsi256_si512(lo.i), hi.i, 1)) {}
52-
53+
explicit simd512bit(simd256bit lo, simd256bit hi)
54+
: simd512bit(_mm512_inserti32x8(
55+
_mm512_castsi256_si512(lo.i),
56+
hi.i,
57+
1)) {}
5358

5459
void clear() {
5560
i = _mm512_setzero_si512();

0 commit comments

Comments
 (0)