Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Removed index_shard_and_quantize OIVFBBS #3291

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 0 additions & 58 deletions demos/offline_ivf/offline_ivf.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,64 +227,6 @@ def _iterate_transformed(self, ds, start, batch_size, dt):
for buffer in ds.iterate(start, batch_size, dt):
yield buffer

def index_shard_and_quantize(self):
assert os.path.exists(self.index_template_file)
index = faiss.read_index(self.index_template_file)
index_ivf = faiss.downcast_index(faiss.extract_index_ivf(index))
assert self.nprobe <= index_ivf.quantizer.ntotal, (
f"the number of vectors {index_ivf.quantizer.ntotal} is not enough"
f" to retrieve {self.nprobe} neighbours, check."
)

if is_pretransform_index(index):
d = index.chain.at(0).d_out
else:
d = self.input_d
for i in range(0, self.nshards):
sfn = f"{self.index_shard_prefix}{i}"
cqfn = f"{self.coarse_quantization_prefix}{i}" # fixme
if os.path.exists(sfn) or os.path.exists(cqfn):
logging.info(f"skipping shard: {i}")
continue
try:
with open(cqfn, "xb") as cqf:
index.reset()
start = i * self.shard_size
j = 0
quantizer = faiss.index_cpu_to_all_gpus(
index_ivf.quantizer
)
for xb_j in tqdm(
self._iterate_transformed(
self.xb_ds,
start,
EMBEDDINGS_BATCH_SIZE,
np.float32,
),
file=sys.stdout,
):
assert xb_j.shape[1] == d
_, I = quantizer.search(xb_j, self.nprobe)
assert np.amin(I) >= 0, f"{I}"
assert np.amax(I) < index_ivf.nlist
cqf.write(I)
self._index_add_core_wrapper( # fixme
index_ivf,
xb_j,
np.arange(start + j, start + j + xb_j.shape[0]),
I[:, 0],
)
j += xb_j.shape[0]
assert j <= self.shard_size
if j == self.shard_size:
break
logging.info(f"writing {sfn}...")
faiss.write_index(index, sfn)
except FileExistsError:
logging.info(f"skipping shard: {i}")
continue
logging.info("done")

def index_shard(self):
assert os.path.exists(self.index_template_file)
index = faiss.read_index(self.index_template_file)
Expand Down