Skip to content

Commit 3001b51

Browse files
authored
MB-61093 Add method to compute distance from codes for IVF index (#41)
* Added new function compute_distance_to_codes_for_list to IndexIVF. Given a query vector x, this function computes distance to provided codes for the input list_no. This is a special purpose method to be used as a flat distance computer for an inverted list where codes are provided externally. This allows to use the quantizer independently while computing distance for the quantized codes. * This function is currently implemented only for IndexIVFScalarQuantizer. * Extended c_api interface for the new function.
1 parent b747c55 commit 3001b51

7 files changed

+106
-1
lines changed

c_api/IndexIVF_c_ex.cpp

+15
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,18 @@ int faiss_IndexIVF_search_preassigned_with_params(
7070
}
7171
CATCH_AND_HANDLE
7272
}
73+
74+
int faiss_IndexIVF_compute_distance_to_codes_for_list(
75+
FaissIndexIVF* index,
76+
idx_t list_no,
77+
const float* x,
78+
idx_t n,
79+
const uint8_t* codes,
80+
float* dists) {
81+
try {
82+
reinterpret_cast<IndexIVF*>(index)->compute_distance_to_codes_for_list(
83+
list_no, x, n, codes, dists);
84+
return 0;
85+
}
86+
CATCH_AND_HANDLE
87+
}

c_api/IndexIVF_c_ex.h

+23-1
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,31 @@ int faiss_IndexIVF_search_preassigned_with_params(
6868
int store_pairs,
6969
const FaissSearchParametersIVF* params);
7070

71+
/*
72+
Given a query vector x, compute distance to provided codes
73+
for the input list_no. This is a special purpose method
74+
to be used as a flat distance computer for an inverted
75+
list where codes are provided externally. This allows to
76+
use the quantizer independently while computing distance
77+
for the quantized codes.
78+
79+
@param list_no list number for inverted list
80+
@param x - input query vector
81+
@param n - number of codes
82+
@param codes - input codes
83+
@param dists - output computed distances
84+
*/
85+
86+
int faiss_IndexIVF_compute_distance_to_codes_for_list(
87+
FaissIndexIVF* index,
88+
idx_t list_no,
89+
const float* x,
90+
idx_t n,
91+
const uint8_t* codes,
92+
float* dists);
7193

7294
#ifdef __cplusplus
7395
}
7496
#endif
7597

76-
#endif
98+
#endif

faiss/IndexIVF.h

+23
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,29 @@ struct IndexIVF : Index, IndexIVFInterface {
435435
size_t sa_code_size() const override;
436436
void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
437437

438+
439+
/** Given a query vector x, compute distance to provided codes
440+
* for the input list_no. This is a special purpose method
441+
* to be used as a flat distance computer for an inverted
442+
* list where codes are provided externally. This allows to
443+
* use the quantizer independently while computing distance
444+
* for the quantized codes.
445+
*
446+
* @param list_no list number for inverted list
447+
* @param x - input query vector
448+
* @param n - number of codes
449+
* @param codes - input codes
450+
* @param dists - output computed distances
451+
*/
452+
453+
virtual void compute_distance_to_codes_for_list(
454+
const idx_t list_no,
455+
const float* x,
456+
idx_t n,
457+
const uint8_t* codes,
458+
float* dists) const {};
459+
460+
438461
IndexIVF();
439462
};
440463

faiss/IndexScalarQuantizer.cpp

+26
Original file line numberDiff line numberDiff line change
@@ -282,4 +282,30 @@ void IndexIVFScalarQuantizer::reconstruct_from_offset(
282282
}
283283
}
284284

285+
void IndexIVFScalarQuantizer::compute_distance_to_codes_for_list(
286+
const idx_t list_no,
287+
const float* x,
288+
idx_t n,
289+
const uint8_t* codes,
290+
float* dists) const {
291+
292+
ScalarQuantizer::SQDistanceComputer* dc =
293+
sq.get_distance_computer(metric_type);
294+
dc->code_size = sq.code_size;
295+
296+
if (by_residual) {
297+
// shift of x_in wrt centroid
298+
std::vector<float> tmp(d);
299+
quantizer->compute_residual(x, tmp.data(), list_no);
300+
dc->set_query(tmp.data());
301+
} else {
302+
dc->set_query(x);
303+
}
304+
305+
dc->distance_to_codes(n, codes, dists);
306+
307+
return;
308+
}
309+
310+
285311
} // namespace faiss

faiss/IndexScalarQuantizer.h

+9
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,15 @@ struct IndexIVFScalarQuantizer : IndexIVF {
103103

104104
/* standalone codec interface */
105105
void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
106+
107+
108+
void compute_distance_to_codes_for_list(
109+
const idx_t list_no,
110+
const float* x,
111+
idx_t n,
112+
const uint8_t* codes,
113+
float* dists) const override;
114+
106115
};
107116

108117
} // namespace faiss

faiss/impl/ScalarQuantizer.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -1543,6 +1543,14 @@ SQDistanceComputer* ScalarQuantizer::get_distance_computer(
15431543
}
15441544
}
15451545

1546+
void SQDistanceComputer::distance_to_codes(idx_t n, const uint8_t* codes, float* dists) {
1547+
for (idx_t i = 0; i < n; i++) {
1548+
const uint8_t* code = codes + i * code_size;
1549+
dists[i] = query_to_code(code);
1550+
}
1551+
return;
1552+
}
1553+
15461554
/*******************************************************************
15471555
* IndexScalarQuantizer/IndexIVFScalarQuantizer scanner object
15481556
*

faiss/impl/ScalarQuantizer.h

+2
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ struct ScalarQuantizer : Quantizer {
104104
float distance_to_code(const uint8_t* code) final {
105105
return query_to_code(code);
106106
}
107+
108+
void distance_to_codes(idx_t n, const uint8_t* codes, float* dists);
107109
};
108110

109111
SQDistanceComputer* get_distance_computer(

0 commit comments

Comments
 (0)