Skip to content

Commit ac85d10

Browse files
author
rsteca
committed
improve code formatting
1 parent a5e9b9a commit ac85d10

File tree

4 files changed

+347
-177
lines changed

4 files changed

+347
-177
lines changed

evolutionary_search/cv.py

+175-77
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,9 @@
1313
from sklearn.metrics import check_scoring
1414
from sklearn.utils.validation import _num_samples, indexable
1515

16+
1617
def enum(**enums):
17-
return type('Enum', (), enums)
18+
return type("Enum", (), enums)
1819

1920

2021
param_types = enum(Categorical=1, Numerical=2)
@@ -52,11 +53,13 @@ def _mutIndividual(individual, up, indpb, gene_type=None):
5253
for i, up, rn in zip(range(len(up)), up, [random.random() for _ in range(len(up))]):
5354
if rn < indpb:
5455
individual[i] = random.randint(0, up)
55-
return individual,
56+
return (individual,)
5657

5758

5859
def _cxIndividual(ind1, ind2, indpb, gene_type):
59-
for i, gt, rn in zip(range(len(ind1)), gene_type, [random.random() for _ in range(len(ind1))]):
60+
for i, gt, rn in zip(
61+
range(len(ind1)), gene_type, [random.random() for _ in range(len(ind1))]
62+
):
6063
if rn > indpb:
6164
continue
6265
if gt is param_types.Categorical:
@@ -74,19 +77,32 @@ def _cxIndividual(ind1, ind2, indpb, gene_type):
7477

7578

7679
def _individual_to_params(individual, name_values):
77-
return dict((name, values[gene]) for gene, (name, values) in zip(individual, name_values))
78-
79-
80-
def _evalFunction(individual, name_values, X, y, scorer, cv, iid, fit_params,
81-
verbose=0, error_score='raise', score_cache={}):
82-
""" Developer Note:
83-
--------------------
84-
score_cache was purposefully moved to parameters, and given a dict reference.
85-
It will be modified in-place by _evalFunction based on it's reference.
86-
This is to allow for a managed, paralell memoization dict,
87-
and also for different memoization per instance of EvolutionaryAlgorithmSearchCV.
88-
Remember that dicts created inside function definitions are presistent between calls,
89-
So unless it is replaced this function will be memoized each call automatically. """
80+
return dict(
81+
(name, values[gene]) for gene, (name, values) in zip(individual, name_values)
82+
)
83+
84+
85+
def _evalFunction(
86+
individual,
87+
name_values,
88+
X,
89+
y,
90+
scorer,
91+
cv,
92+
iid,
93+
fit_params,
94+
verbose=0,
95+
error_score="raise",
96+
score_cache={},
97+
):
98+
"""Developer Note:
99+
--------------------
100+
score_cache was purposefully moved to parameters, and given a dict reference.
101+
It will be modified in-place by _evalFunction based on it's reference.
102+
This is to allow for a managed, paralell memoization dict,
103+
and also for different memoization per instance of EvolutionaryAlgorithmSearchCV.
104+
Remember that dicts created inside function definitions are presistent between calls,
105+
So unless it is replaced this function will be memoized each call automatically."""
90106

91107
parameters = _individual_to_params(individual, name_values)
92108
score = 0
@@ -97,11 +113,21 @@ def _evalFunction(individual, name_values, X, y, scorer, cv, iid, fit_params,
97113
score = score_cache[paramkey]
98114
else:
99115
for train, test in cv.split(X, y):
100-
assert len(train) > 0 and len(test) > 0, "Training and/or testing not long enough for evaluation."
101-
_score = _fit_and_score(estimator=individual.est, X=X, y=y, scorer=scorer,
102-
train=train, test=test, verbose=verbose,
103-
parameters=parameters, fit_params=fit_params,
104-
error_score=error_score)['test_scores']
116+
assert (
117+
len(train) > 0 and len(test) > 0
118+
), "Training and/or testing not long enough for evaluation."
119+
_score = _fit_and_score(
120+
estimator=individual.est,
121+
X=X,
122+
y=y,
123+
scorer=scorer,
124+
train=train,
125+
test=test,
126+
verbose=verbose,
127+
parameters=parameters,
128+
fit_params=fit_params,
129+
error_score=error_score,
130+
)["test_scores"]
105131

106132
if iid:
107133
score += _score * len(test)
@@ -110,7 +136,9 @@ def _evalFunction(individual, name_values, X, y, scorer, cv, iid, fit_params,
110136
score += _score
111137
n_test += 1
112138

113-
assert n_test > 0, "No fitting was accomplished, check data and cross validation method."
139+
assert (
140+
n_test > 0
141+
), "No fitting was accomplished, check data and cross validation method."
114142
score /= float(n_test)
115143
score_cache[paramkey] = score
116144

@@ -278,20 +306,37 @@ class EvolutionaryAlgorithmSearchCV(BaseSearchCV):
278306

279307
def _run_search(self, evaluate_candidates):
280308
"""
281-
scikit-learn new version introduce a new abstract function hence we have to implement an anonymous function
309+
scikit-learn new version introduce a new abstract function hence we have to implement an anonymous function
282310
"""
283311
pass
284312

285-
def __init__(self, estimator, params, scoring=None, cv=4,
286-
refit=True, verbose=False, population_size=50,
287-
gene_mutation_prob=0.1, gene_crossover_prob=0.5,
288-
tournament_size=3, generations_number=10, gene_type=None,
289-
n_jobs=1, iid=True, error_score='raise',
290-
fit_params={}):
313+
def __init__(
314+
self,
315+
estimator,
316+
params,
317+
scoring=None,
318+
cv=4,
319+
refit=True,
320+
verbose=False,
321+
population_size=50,
322+
gene_mutation_prob=0.1,
323+
gene_crossover_prob=0.5,
324+
tournament_size=3,
325+
generations_number=10,
326+
gene_type=None,
327+
n_jobs=1,
328+
iid=True,
329+
error_score="raise",
330+
fit_params={},
331+
):
291332
super(EvolutionaryAlgorithmSearchCV, self).__init__(
292-
estimator=estimator, scoring=scoring,
293-
refit=refit, cv=cv, verbose=verbose,
294-
error_score=error_score)
333+
estimator=estimator,
334+
scoring=scoring,
335+
refit=refit,
336+
cv=cv,
337+
verbose=verbose,
338+
error_score=error_score,
339+
)
295340
self.iid = iid
296341
self.params = params
297342
self.population_size = population_size
@@ -309,7 +354,9 @@ def __init__(self, estimator, params, scoring=None, cv=4,
309354
self.n_jobs = n_jobs
310355
self.fit_params = fit_params
311356
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
312-
creator.create("Individual", list, est=clone(self.estimator), fitness=creator.FitnessMax)
357+
creator.create(
358+
"Individual", list, est=clone(self.estimator), fitness=creator.FitnessMax
359+
)
313360

314361
@property
315362
def possible_params(self):
@@ -321,37 +368,47 @@ def cv_results_(self):
321368
if self._cv_results is None: # This is to cache the answer until updated
322369
# Populate output and return
323370
# If not already fit, returns an empty dictionary
324-
possible_params = self.possible_params # Pre-load property for use in this function
371+
possible_params = (
372+
self.possible_params
373+
) # Pre-load property for use in this function
325374
out = defaultdict(list)
326375
for p, gen in enumerate(self.all_history_):
327376
# Get individuals and indexes, their list of scores,
328377
# and additionally the name_values for this set of parameters
329378

330-
idxs, individuals, each_scores = zip(*[(idx, indiv, np.mean(indiv.fitness.values))
331-
for idx, indiv in list(gen.genealogy_history.items())
332-
if indiv.fitness.valid and not np.all(np.isnan(indiv.fitness.values))])
379+
idxs, individuals, each_scores = zip(
380+
*[
381+
(idx, indiv, np.mean(indiv.fitness.values))
382+
for idx, indiv in list(gen.genealogy_history.items())
383+
if indiv.fitness.valid
384+
and not np.all(np.isnan(indiv.fitness.values))
385+
]
386+
)
333387

334388
name_values, _, _ = _get_param_types_maxint(possible_params[p])
335389

336390
# Add to output
337-
out['param_index'] += [p] * len(idxs)
338-
out['index'] += idxs
339-
out['params'] += [_individual_to_params(indiv, name_values)
340-
for indiv in individuals]
341-
out['mean_test_score'] += [np.nanmean(scores) for scores in each_scores]
342-
out['std_test_score'] += [np.nanstd(scores) for scores in each_scores]
343-
out['min_test_score'] += [np.nanmin(scores) for scores in each_scores]
344-
out['max_test_score'] += [np.nanmax(scores) for scores in each_scores]
345-
out['nan_test_score?'] += [np.any(np.isnan(scores)) for scores in each_scores]
391+
out["param_index"] += [p] * len(idxs)
392+
out["index"] += idxs
393+
out["params"] += [
394+
_individual_to_params(indiv, name_values) for indiv in individuals
395+
]
396+
out["mean_test_score"] += [np.nanmean(scores) for scores in each_scores]
397+
out["std_test_score"] += [np.nanstd(scores) for scores in each_scores]
398+
out["min_test_score"] += [np.nanmin(scores) for scores in each_scores]
399+
out["max_test_score"] += [np.nanmax(scores) for scores in each_scores]
400+
out["nan_test_score?"] += [
401+
np.any(np.isnan(scores)) for scores in each_scores
402+
]
346403
self._cv_results = out
347404

348405
return self._cv_results
349406

350407
@property
351408
def best_index_(self):
352-
""" Returns the absolute index (not the 'index' column) with the best max_score
353-
from cv_results_. """
354-
return np.argmax(self.cv_results_['max_test_score'])
409+
"""Returns the absolute index (not the 'index' column) with the best max_score
410+
from cv_results_."""
411+
return np.argmax(self.cv_results_["max_test_score"])
355412

356413
def fit(self, X, y=None):
357414
self.best_estimator_ = None
@@ -376,9 +433,10 @@ def _fit(self, X, y, parameter_dict):
376433

377434
if y is not None:
378435
if len(y) != n_samples:
379-
raise ValueError('Target variable (y) has a different number '
380-
'of samples (%i) than data (X: %i samples)'
381-
% (len(y), n_samples))
436+
raise ValueError(
437+
"Target variable (y) has a different number "
438+
"of samples (%i) than data (X: %i samples)" % (len(y), n_samples)
439+
)
382440
cv = check_cv(self.cv, y=y, classifier=is_classifier(self.estimator))
383441

384442
toolbox = base.Toolbox()
@@ -390,7 +448,9 @@ def _fit(self, X, y, parameter_dict):
390448
if self.verbose:
391449
print("Types %s and maxint %s detected" % (self.gene_type, maxints))
392450

393-
toolbox.register("individual", _initIndividual, creator.Individual, maxints=maxints)
451+
toolbox.register(
452+
"individual", _initIndividual, creator.Individual, maxints=maxints
453+
)
394454
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
395455

396456
# If n_jobs is an int, greater than 1 or less than 0 (indicating to use as
@@ -401,12 +461,17 @@ def _fit(self, X, y, parameter_dict):
401461
if isinstance(self.n_jobs, int):
402462
if self.n_jobs > 1 or self.n_jobs < 0:
403463
from multiprocessing import Pool # Only imports if needed
404-
if os.name == 'nt': # Checks if we are on Windows
405-
warnings.warn(("Windows requires Pools to be declared from within "
406-
"an \'if __name__==\"__main__\":\' structure. In this "
407-
"case, n_jobs will accept map functions as well to "
408-
"facilitate custom parallelism. Please check to see "
409-
"that all code is working as expected."))
464+
465+
if os.name == "nt": # Checks if we are on Windows
466+
warnings.warn(
467+
(
468+
"Windows requires Pools to be declared from within "
469+
"an 'if __name__==\"__main__\":' structure. In this "
470+
"case, n_jobs will accept map functions as well to "
471+
"facilitate custom parallelism. Please check to see "
472+
"that all code is working as expected."
473+
)
474+
)
410475
pool = Pool(self.n_jobs)
411476
toolbox.register("map", pool.map)
412477

@@ -415,17 +480,37 @@ def _fit(self, X, y, parameter_dict):
415480
try:
416481
toolbox.register("map", self.n_jobs)
417482
except Exception:
418-
raise TypeError("n_jobs must be either an integer or map function. Received: {}".format(type(self.n_jobs)))
419-
420-
toolbox.register("evaluate", _evalFunction,
421-
name_values=name_values, X=X, y=y,
422-
scorer=self.scorer_, cv=cv, iid=self.iid, verbose=self.verbose,
423-
error_score=self.error_score, fit_params=self.fit_params,
424-
score_cache=self.score_cache)
425-
426-
toolbox.register("mate", _cxIndividual, indpb=self.gene_crossover_prob, gene_type=self.gene_type)
427-
428-
toolbox.register("mutate", _mutIndividual, indpb=self.gene_mutation_prob, up=maxints)
483+
raise TypeError(
484+
"n_jobs must be either an integer or map function. Received: {}".format(
485+
type(self.n_jobs)
486+
)
487+
)
488+
489+
toolbox.register(
490+
"evaluate",
491+
_evalFunction,
492+
name_values=name_values,
493+
X=X,
494+
y=y,
495+
scorer=self.scorer_,
496+
cv=cv,
497+
iid=self.iid,
498+
verbose=self.verbose,
499+
error_score=self.error_score,
500+
fit_params=self.fit_params,
501+
score_cache=self.score_cache,
502+
)
503+
504+
toolbox.register(
505+
"mate",
506+
_cxIndividual,
507+
indpb=self.gene_crossover_prob,
508+
gene_type=self.gene_type,
509+
)
510+
511+
toolbox.register(
512+
"mutate", _mutIndividual, indpb=self.gene_mutation_prob, up=maxints
513+
)
429514
toolbox.register("select", tools.selTournament, tournsize=self.tournament_size)
430515

431516
pop = toolbox.population(n=self.population_size)
@@ -445,20 +530,33 @@ def _fit(self, X, y, parameter_dict):
445530
hist.update(pop)
446531

447532
if self.verbose:
448-
print('--- Evolve in {0} possible combinations ---'.format(np.prod(np.array(maxints) + 1)))
449-
450-
pop, logbook = algorithms.eaSimple(pop, toolbox, cxpb=0.5, mutpb=0.2,
451-
ngen=self.generations_number, stats=stats,
452-
halloffame=hof, verbose=self.verbose)
533+
print(
534+
"--- Evolve in {0} possible combinations ---".format(
535+
np.prod(np.array(maxints) + 1)
536+
)
537+
)
538+
539+
pop, logbook = algorithms.eaSimple(
540+
pop,
541+
toolbox,
542+
cxpb=0.5,
543+
mutpb=0.2,
544+
ngen=self.generations_number,
545+
stats=stats,
546+
halloffame=hof,
547+
verbose=self.verbose,
548+
)
453549

454550
# Save History
455551
self.all_history_.append(hist)
456552
self.all_logbooks_.append(logbook)
457553
current_best_score_ = hof[0].fitness.values[0]
458554
current_best_params_ = _individual_to_params(hof[0], name_values)
459555
if self.verbose:
460-
print("Best individual is: %s\nwith fitness: %s" % (
461-
current_best_params_, current_best_score_))
556+
print(
557+
"Best individual is: %s\nwith fitness: %s"
558+
% (current_best_params_, current_best_score_)
559+
)
462560

463561
if current_best_score_ > self.best_mem_score_:
464562
self.best_mem_score_ = current_best_score_

0 commit comments

Comments
 (0)