Skip to content

Commit d8aec60

Browse files
kuarorafacebook-github-bot
authored andcommitted
Changing dependency for bench_fw to *_cpu instead of *_gpu (#3889)
Summary: Pull Request resolved: #3889 1.Changing dependency for bench_fw to *_cpu instead of *_gpu - faiss_gpu and torch get incompatible. Once, that is fixed, I'll add gpu dependency back. - today, we are not using gpu in benchmarking yet. 2.Fixing some naming issue in kmeans which is used when using opaque as false in assemble. 3.codec_name when it is not assigned explicitly, it happens when using assembly Reviewed By: satymish Differential Revision: D62671870 fbshipit-source-id: 4a4ecfeef948c99fffba407cbf69d2349544bdfd
1 parent c8d1474 commit d8aec60

File tree

6 files changed

+45
-22
lines changed

6 files changed

+45
-22
lines changed

benchs/bench_fw/benchmark.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from statistics import mean, median
1010
from typing import Any, Dict, List, Optional
1111

12-
import faiss # @manual=//faiss/python:pyfaiss_gpu
12+
import faiss # @manual=//faiss/python:pyfaiss
1313

1414
import numpy as np
1515

@@ -214,6 +214,7 @@ def set_io(self, benchmark_io: BenchmarkIO):
214214
@dataclass
215215
class TrainOperator(IndexOperator):
216216
codec_descs: List[CodecDescriptor] = field(default_factory=lambda: [])
217+
assemble_opaque: bool = True
217218

218219
def get_desc(self, name: str) -> Optional[CodecDescriptor]:
219220
for desc in self.codec_descs:
@@ -248,6 +249,7 @@ def build_index_wrapper(self, codec_desc: CodecDescriptor):
248249
factory=codec_desc.factory,
249250
training_vectors=codec_desc.training_vectors,
250251
codec_name=codec_desc.get_name(),
252+
assemble_opaque=self.assemble_opaque,
251253
)
252254
index.set_io(self.io)
253255
codec_desc.index = index

benchs/bench_fw/benchmark_io.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@
1313
from typing import Any, Dict, List, Optional
1414
from zipfile import ZipFile
1515

16-
import faiss # @manual=//faiss/python:pyfaiss_gpu
16+
import faiss # @manual=//faiss/python:pyfaiss
1717

1818
import numpy as np
1919
import submitit
20-
from faiss.contrib.datasets import ( # @manual=//faiss/contrib:faiss_contrib_gpu
20+
from faiss.contrib.datasets import ( # @manual=//faiss/contrib:faiss_contrib
2121
dataset_from_name,
2222
)
2323

benchs/bench_fw/descriptors.py

+13-6
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,17 @@
88
from dataclasses import dataclass
99
from typing import Any, Dict, List, Optional
1010

11-
import faiss # @manual=//faiss/python:pyfaiss_gpu
11+
import faiss # @manual=//faiss/python:pyfaiss
1212

1313
from .benchmark_io import BenchmarkIO
1414
from .utils import timer
1515

1616
logger = logging.getLogger(__name__)
1717

1818

19+
# Important: filenames end with . without extension (npy, codec, index),
20+
# when writing files, you are required to filename + "npy" etc.
21+
1922
@dataclass
2023
class IndexDescriptorClassic:
2124
bucket: Optional[str] = None
@@ -110,21 +113,25 @@ def get_filename(
110113
filename += "."
111114
return filename
112115

116+
def get_kmeans_filename(self, k):
117+
return f"{self.get_filename()}kmeans_{k}."
118+
113119
def k_means(self, io, k, dry_run):
114120
logger.info(f"k_means {k} {self}")
115121
kmeans_vectors = DatasetDescriptor(
116-
tablename=f"{self.get_filename()}kmeans_{k}.npy"
122+
tablename=f"{self.get_filename()}kmeans_{k}"
117123
)
118-
meta_filename = kmeans_vectors.tablename + ".json"
119-
if not io.file_exist(kmeans_vectors.tablename) or not io.file_exist(
124+
kmeans_filename = kmeans_vectors.get_filename() + "npy"
125+
meta_filename = kmeans_vectors.get_filename() + "json"
126+
if not io.file_exist(kmeans_filename) or not io.file_exist(
120127
meta_filename
121128
):
122129
if dry_run:
123-
return None, None, kmeans_vectors.tablename
130+
return None, None, kmeans_filename
124131
x = io.get_dataset(self)
125132
kmeans = faiss.Kmeans(d=x.shape[1], k=k, gpu=True)
126133
_, t, _ = timer("k_means", lambda: kmeans.train(x))
127-
io.write_nparray(kmeans.centroids, kmeans_vectors.tablename)
134+
io.write_nparray(kmeans.centroids, kmeans_filename)
128135
io.write_json({"k_means_time": t}, meta_filename)
129136
else:
130137
t = io.read_json(meta_filename)["k_means_time"]

benchs/bench_fw/index.py

+23-9
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,18 @@
1111
from dataclasses import dataclass
1212
from typing import ClassVar, Dict, List, Optional
1313

14-
import faiss # @manual=//faiss/python:pyfaiss_gpu
14+
import faiss # @manual=//faiss/python:pyfaiss
1515
import numpy as np
1616
from faiss.benchs.bench_fw.descriptors import IndexBaseDescriptor
1717

18-
from faiss.contrib.evaluation import ( # @manual=//faiss/contrib:faiss_contrib_gpu
18+
from faiss.contrib.evaluation import ( # @manual=//faiss/contrib:faiss_contrib
1919
knn_intersection_measure,
2020
OperatingPointsWithRanges,
2121
)
22-
from faiss.contrib.factory_tools import ( # @manual=//faiss/contrib:faiss_contrib_gpu
22+
from faiss.contrib.factory_tools import ( # @manual=//faiss/contrib:faiss_contrib
2323
reverse_index_factory,
2424
)
25-
from faiss.contrib.ivf_tools import ( # @manual=//faiss/contrib:faiss_contrib_gpu
25+
from faiss.contrib.ivf_tools import ( # @manual=//faiss/contrib:faiss_contrib
2626
add_preassigned,
2727
replace_ivf_quantizer,
2828
)
@@ -635,11 +635,12 @@ def get_index_name(self) -> Optional[str]:
635635

636636
def fetch_index(self):
637637
# read index from file if it is already available
638+
index_filename = None
638639
if self.index_path:
639640
index_filename = os.path.basename(self.index_path)
640-
else:
641+
elif self.index_name:
641642
index_filename = self.index_name + "index"
642-
if self.io.file_exist(index_filename):
643+
if index_filename and self.io.file_exist(index_filename):
643644
if self.index_path:
644645
index = self.io.read_index(
645646
index_filename,
@@ -681,7 +682,7 @@ def fetch_index(self):
681682
)
682683
assert index.ntotal == xb.shape[0] or index_ivf.ntotal == xb.shape[0]
683684
logger.info("Added vectors to index")
684-
if self.serialize_full_index:
685+
if self.serialize_full_index and index_filename:
685686
codec_size = self.io.write_index(index, index_filename)
686687
assert codec_size is not None
687688

@@ -908,6 +909,7 @@ def get_codec(self):
908909
class IndexFromFactory(Index):
909910
factory: Optional[str] = None
910911
training_vectors: Optional[DatasetDescriptor] = None
912+
assemble_opaque: bool = True
911913

912914
def __post_init__(self):
913915
super().__post_init__()
@@ -916,6 +918,19 @@ def __post_init__(self):
916918
if self.factory != "Flat" and self.training_vectors is None:
917919
raise ValueError(f"training_vectors is not set for {self.factory}")
918920

921+
def get_codec_name(self):
922+
codec_name = super().get_codec_name()
923+
if codec_name is None:
924+
codec_name = f"{self.factory.replace(',', '_')}."
925+
codec_name += f"d_{self.d}.{self.metric.upper()}."
926+
if self.factory != "Flat":
927+
assert self.training_vectors is not None
928+
codec_name += self.training_vectors.get_filename("xt")
929+
if self.construction_params is not None:
930+
codec_name += IndexBaseDescriptor.param_dict_list_to_name(self.construction_params)
931+
self.codec_name = codec_name
932+
return self.codec_name
933+
919934
def fetch_meta(self, dry_run=False):
920935
meta_filename = self.get_codec_name() + "json"
921936
if self.io.file_exist(meta_filename):
@@ -1021,14 +1036,13 @@ def get_quantizer(self, dry_run, pretransform=None):
10211036
def assemble(self, dry_run):
10221037
logger.info(f"assemble {self.factory}")
10231038
model = self.get_model()
1024-
opaque = True
10251039
t_aggregate = 0
10261040
# try:
10271041
# reverse_index_factory(model)
10281042
# opaque = False
10291043
# except NotImplementedError:
10301044
# opaque = True
1031-
if opaque:
1045+
if self.assemble_opaque:
10321046
codec = model
10331047
else:
10341048
if isinstance(model, faiss.IndexPreTransform):

benchs/bench_fw/optimize.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@
77
from dataclasses import dataclass
88
from typing import Dict, List, Tuple
99

10-
import faiss # @manual=//faiss/python:pyfaiss_gpu
10+
import faiss # @manual=//faiss/python:pyfaiss
1111

12-
# from faiss.contrib.evaluation import ( # @manual=//faiss/contrib:faiss_contrib_gpu
12+
# from faiss.contrib.evaluation import ( # @manual=//faiss/contrib:faiss_contrib
1313
# OperatingPoints,
1414
# )
1515

benchs/bench_fw/utils.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@
99
from multiprocessing.pool import ThreadPool
1010
from time import perf_counter
1111

12-
import faiss # @manual=//faiss/python:pyfaiss_gpu
12+
import faiss # @manual=//faiss/python:pyfaiss
1313
import numpy as np
1414

15-
from faiss.contrib.evaluation import ( # @manual=//faiss/contrib:faiss_contrib_gpu
15+
from faiss.contrib.evaluation import ( # @manual=//faiss/contrib:faiss_contrib
1616
OperatingPoints,
1717
)
1818

0 commit comments

Comments
 (0)