Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changing dependency for bench_fw to *_cpu instead of *_gpu #3889

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion benchs/bench_fw/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from statistics import mean, median
from typing import Any, Dict, List, Optional

import faiss # @manual=//faiss/python:pyfaiss_gpu
import faiss # @manual=//faiss/python:pyfaiss

import numpy as np

Expand Down Expand Up @@ -214,6 +214,7 @@ def set_io(self, benchmark_io: BenchmarkIO):
@dataclass
class TrainOperator(IndexOperator):
codec_descs: List[CodecDescriptor] = field(default_factory=lambda: [])
assemble_opaque: bool = True

def get_desc(self, name: str) -> Optional[CodecDescriptor]:
for desc in self.codec_descs:
Expand Down Expand Up @@ -248,6 +249,7 @@ def build_index_wrapper(self, codec_desc: CodecDescriptor):
factory=codec_desc.factory,
training_vectors=codec_desc.training_vectors,
codec_name=codec_desc.get_name(),
assemble_opaque=self.assemble_opaque,
)
index.set_io(self.io)
codec_desc.index = index
Expand Down
4 changes: 2 additions & 2 deletions benchs/bench_fw/benchmark_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
from typing import Any, Dict, List, Optional
from zipfile import ZipFile

import faiss # @manual=//faiss/python:pyfaiss_gpu
import faiss # @manual=//faiss/python:pyfaiss

import numpy as np
import submitit
from faiss.contrib.datasets import ( # @manual=//faiss/contrib:faiss_contrib_gpu
from faiss.contrib.datasets import ( # @manual=//faiss/contrib:faiss_contrib
dataset_from_name,
)

Expand Down
19 changes: 13 additions & 6 deletions benchs/bench_fw/descriptors.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,17 @@
from dataclasses import dataclass
from typing import Any, Dict, List, Optional

import faiss # @manual=//faiss/python:pyfaiss_gpu
import faiss # @manual=//faiss/python:pyfaiss

from .benchmark_io import BenchmarkIO
from .utils import timer

logger = logging.getLogger(__name__)


# Important: filenames end with . without extension (npy, codec, index),
# when writing files, you are required to filename + "npy" etc.

@dataclass
class IndexDescriptorClassic:
bucket: Optional[str] = None
Expand Down Expand Up @@ -110,21 +113,25 @@ def get_filename(
filename += "."
return filename

def get_kmeans_filename(self, k):
return f"{self.get_filename()}kmeans_{k}."

def k_means(self, io, k, dry_run):
logger.info(f"k_means {k} {self}")
kmeans_vectors = DatasetDescriptor(
tablename=f"{self.get_filename()}kmeans_{k}.npy"
tablename=f"{self.get_filename()}kmeans_{k}"
)
meta_filename = kmeans_vectors.tablename + ".json"
if not io.file_exist(kmeans_vectors.tablename) or not io.file_exist(
kmeans_filename = kmeans_vectors.get_filename() + "npy"
meta_filename = kmeans_vectors.get_filename() + "json"
if not io.file_exist(kmeans_filename) or not io.file_exist(
meta_filename
):
if dry_run:
return None, None, kmeans_vectors.tablename
return None, None, kmeans_filename
x = io.get_dataset(self)
kmeans = faiss.Kmeans(d=x.shape[1], k=k, gpu=True)
_, t, _ = timer("k_means", lambda: kmeans.train(x))
io.write_nparray(kmeans.centroids, kmeans_vectors.tablename)
io.write_nparray(kmeans.centroids, kmeans_filename)
io.write_json({"k_means_time": t}, meta_filename)
else:
t = io.read_json(meta_filename)["k_means_time"]
Expand Down
32 changes: 23 additions & 9 deletions benchs/bench_fw/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,18 @@
from dataclasses import dataclass
from typing import ClassVar, Dict, List, Optional

import faiss # @manual=//faiss/python:pyfaiss_gpu
import faiss # @manual=//faiss/python:pyfaiss
import numpy as np
from faiss.benchs.bench_fw.descriptors import IndexBaseDescriptor

from faiss.contrib.evaluation import ( # @manual=//faiss/contrib:faiss_contrib_gpu
from faiss.contrib.evaluation import ( # @manual=//faiss/contrib:faiss_contrib
knn_intersection_measure,
OperatingPointsWithRanges,
)
from faiss.contrib.factory_tools import ( # @manual=//faiss/contrib:faiss_contrib_gpu
from faiss.contrib.factory_tools import ( # @manual=//faiss/contrib:faiss_contrib
reverse_index_factory,
)
from faiss.contrib.ivf_tools import ( # @manual=//faiss/contrib:faiss_contrib_gpu
from faiss.contrib.ivf_tools import ( # @manual=//faiss/contrib:faiss_contrib
add_preassigned,
replace_ivf_quantizer,
)
Expand Down Expand Up @@ -635,11 +635,12 @@ def get_index_name(self) -> Optional[str]:

def fetch_index(self):
# read index from file if it is already available
index_filename = None
if self.index_path:
index_filename = os.path.basename(self.index_path)
else:
elif self.index_name:
index_filename = self.index_name + "index"
if self.io.file_exist(index_filename):
if index_filename and self.io.file_exist(index_filename):
if self.index_path:
index = self.io.read_index(
index_filename,
Expand Down Expand Up @@ -681,7 +682,7 @@ def fetch_index(self):
)
assert index.ntotal == xb.shape[0] or index_ivf.ntotal == xb.shape[0]
logger.info("Added vectors to index")
if self.serialize_full_index:
if self.serialize_full_index and index_filename:
codec_size = self.io.write_index(index, index_filename)
assert codec_size is not None

Expand Down Expand Up @@ -908,6 +909,7 @@ def get_codec(self):
class IndexFromFactory(Index):
factory: Optional[str] = None
training_vectors: Optional[DatasetDescriptor] = None
assemble_opaque: bool = True

def __post_init__(self):
super().__post_init__()
Expand All @@ -916,6 +918,19 @@ def __post_init__(self):
if self.factory != "Flat" and self.training_vectors is None:
raise ValueError(f"training_vectors is not set for {self.factory}")

def get_codec_name(self):
codec_name = super().get_codec_name()
if codec_name is None:
codec_name = f"{self.factory.replace(',', '_')}."
codec_name += f"d_{self.d}.{self.metric.upper()}."
if self.factory != "Flat":
assert self.training_vectors is not None
codec_name += self.training_vectors.get_filename("xt")
if self.construction_params is not None:
codec_name += IndexBaseDescriptor.param_dict_list_to_name(self.construction_params)
self.codec_name = codec_name
return self.codec_name

def fetch_meta(self, dry_run=False):
meta_filename = self.get_codec_name() + "json"
if self.io.file_exist(meta_filename):
Expand Down Expand Up @@ -1021,14 +1036,13 @@ def get_quantizer(self, dry_run, pretransform=None):
def assemble(self, dry_run):
logger.info(f"assemble {self.factory}")
model = self.get_model()
opaque = True
t_aggregate = 0
# try:
# reverse_index_factory(model)
# opaque = False
# except NotImplementedError:
# opaque = True
if opaque:
if self.assemble_opaque:
codec = model
else:
if isinstance(model, faiss.IndexPreTransform):
Expand Down
4 changes: 2 additions & 2 deletions benchs/bench_fw/optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
from dataclasses import dataclass
from typing import Dict, List, Tuple

import faiss # @manual=//faiss/python:pyfaiss_gpu
import faiss # @manual=//faiss/python:pyfaiss

# from faiss.contrib.evaluation import ( # @manual=//faiss/contrib:faiss_contrib_gpu
# from faiss.contrib.evaluation import ( # @manual=//faiss/contrib:faiss_contrib
# OperatingPoints,
# )

Expand Down
4 changes: 2 additions & 2 deletions benchs/bench_fw/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
from multiprocessing.pool import ThreadPool
from time import perf_counter

import faiss # @manual=//faiss/python:pyfaiss_gpu
import faiss # @manual=//faiss/python:pyfaiss
import numpy as np

from faiss.contrib.evaluation import ( # @manual=//faiss/contrib:faiss_contrib_gpu
from faiss.contrib.evaluation import ( # @manual=//faiss/contrib:faiss_contrib
OperatingPoints,
)

Expand Down
Loading