13
13
from sklearn .metrics import check_scoring
14
14
from sklearn .utils .validation import _num_samples , indexable
15
15
16
+
16
17
def enum (** enums ):
17
- return type (' Enum' , (), enums )
18
+ return type (" Enum" , (), enums )
18
19
19
20
20
21
param_types = enum (Categorical = 1 , Numerical = 2 )
@@ -52,11 +53,13 @@ def _mutIndividual(individual, up, indpb, gene_type=None):
52
53
for i , up , rn in zip (range (len (up )), up , [random .random () for _ in range (len (up ))]):
53
54
if rn < indpb :
54
55
individual [i ] = random .randint (0 , up )
55
- return individual ,
56
+ return ( individual ,)
56
57
57
58
58
59
def _cxIndividual (ind1 , ind2 , indpb , gene_type ):
59
- for i , gt , rn in zip (range (len (ind1 )), gene_type , [random .random () for _ in range (len (ind1 ))]):
60
+ for i , gt , rn in zip (
61
+ range (len (ind1 )), gene_type , [random .random () for _ in range (len (ind1 ))]
62
+ ):
60
63
if rn > indpb :
61
64
continue
62
65
if gt is param_types .Categorical :
@@ -74,19 +77,32 @@ def _cxIndividual(ind1, ind2, indpb, gene_type):
74
77
75
78
76
79
def _individual_to_params (individual , name_values ):
77
- return dict ((name , values [gene ]) for gene , (name , values ) in zip (individual , name_values ))
78
-
79
-
80
- def _evalFunction (individual , name_values , X , y , scorer , cv , iid , fit_params ,
81
- verbose = 0 , error_score = 'raise' , score_cache = {}):
82
- """ Developer Note:
83
- --------------------
84
- score_cache was purposefully moved to parameters, and given a dict reference.
85
- It will be modified in-place by _evalFunction based on it's reference.
86
- This is to allow for a managed, paralell memoization dict,
87
- and also for different memoization per instance of EvolutionaryAlgorithmSearchCV.
88
- Remember that dicts created inside function definitions are presistent between calls,
89
- So unless it is replaced this function will be memoized each call automatically. """
80
+ return dict (
81
+ (name , values [gene ]) for gene , (name , values ) in zip (individual , name_values )
82
+ )
83
+
84
+
85
+ def _evalFunction (
86
+ individual ,
87
+ name_values ,
88
+ X ,
89
+ y ,
90
+ scorer ,
91
+ cv ,
92
+ iid ,
93
+ fit_params ,
94
+ verbose = 0 ,
95
+ error_score = "raise" ,
96
+ score_cache = {},
97
+ ):
98
+ """Developer Note:
99
+ --------------------
100
+ score_cache was purposefully moved to parameters, and given a dict reference.
101
+ It will be modified in-place by _evalFunction based on it's reference.
102
+ This is to allow for a managed, paralell memoization dict,
103
+ and also for different memoization per instance of EvolutionaryAlgorithmSearchCV.
104
+ Remember that dicts created inside function definitions are presistent between calls,
105
+ So unless it is replaced this function will be memoized each call automatically."""
90
106
91
107
parameters = _individual_to_params (individual , name_values )
92
108
score = 0
@@ -97,11 +113,21 @@ def _evalFunction(individual, name_values, X, y, scorer, cv, iid, fit_params,
97
113
score = score_cache [paramkey ]
98
114
else :
99
115
for train , test in cv .split (X , y ):
100
- assert len (train ) > 0 and len (test ) > 0 , "Training and/or testing not long enough for evaluation."
101
- _score = _fit_and_score (estimator = individual .est , X = X , y = y , scorer = scorer ,
102
- train = train , test = test , verbose = verbose ,
103
- parameters = parameters , fit_params = fit_params ,
104
- error_score = error_score )['test_scores' ]
116
+ assert (
117
+ len (train ) > 0 and len (test ) > 0
118
+ ), "Training and/or testing not long enough for evaluation."
119
+ _score = _fit_and_score (
120
+ estimator = individual .est ,
121
+ X = X ,
122
+ y = y ,
123
+ scorer = scorer ,
124
+ train = train ,
125
+ test = test ,
126
+ verbose = verbose ,
127
+ parameters = parameters ,
128
+ fit_params = fit_params ,
129
+ error_score = error_score ,
130
+ )["test_scores" ]
105
131
106
132
if iid :
107
133
score += _score * len (test )
@@ -110,7 +136,9 @@ def _evalFunction(individual, name_values, X, y, scorer, cv, iid, fit_params,
110
136
score += _score
111
137
n_test += 1
112
138
113
- assert n_test > 0 , "No fitting was accomplished, check data and cross validation method."
139
+ assert (
140
+ n_test > 0
141
+ ), "No fitting was accomplished, check data and cross validation method."
114
142
score /= float (n_test )
115
143
score_cache [paramkey ] = score
116
144
@@ -278,20 +306,37 @@ class EvolutionaryAlgorithmSearchCV(BaseSearchCV):
278
306
279
307
def _run_search (self , evaluate_candidates ):
280
308
"""
281
- scikit-learn new version introduce a new abstract function hence we have to implement an anonymous function
309
+ scikit-learn new version introduce a new abstract function hence we have to implement an anonymous function
282
310
"""
283
311
pass
284
312
285
- def __init__ (self , estimator , params , scoring = None , cv = 4 ,
286
- refit = True , verbose = False , population_size = 50 ,
287
- gene_mutation_prob = 0.1 , gene_crossover_prob = 0.5 ,
288
- tournament_size = 3 , generations_number = 10 , gene_type = None ,
289
- n_jobs = 1 , iid = True , error_score = 'raise' ,
290
- fit_params = {}):
313
+ def __init__ (
314
+ self ,
315
+ estimator ,
316
+ params ,
317
+ scoring = None ,
318
+ cv = 4 ,
319
+ refit = True ,
320
+ verbose = False ,
321
+ population_size = 50 ,
322
+ gene_mutation_prob = 0.1 ,
323
+ gene_crossover_prob = 0.5 ,
324
+ tournament_size = 3 ,
325
+ generations_number = 10 ,
326
+ gene_type = None ,
327
+ n_jobs = 1 ,
328
+ iid = True ,
329
+ error_score = "raise" ,
330
+ fit_params = {},
331
+ ):
291
332
super (EvolutionaryAlgorithmSearchCV , self ).__init__ (
292
- estimator = estimator , scoring = scoring ,
293
- refit = refit , cv = cv , verbose = verbose ,
294
- error_score = error_score )
333
+ estimator = estimator ,
334
+ scoring = scoring ,
335
+ refit = refit ,
336
+ cv = cv ,
337
+ verbose = verbose ,
338
+ error_score = error_score ,
339
+ )
295
340
self .iid = iid
296
341
self .params = params
297
342
self .population_size = population_size
@@ -309,7 +354,9 @@ def __init__(self, estimator, params, scoring=None, cv=4,
309
354
self .n_jobs = n_jobs
310
355
self .fit_params = fit_params
311
356
creator .create ("FitnessMax" , base .Fitness , weights = (1.0 ,))
312
- creator .create ("Individual" , list , est = clone (self .estimator ), fitness = creator .FitnessMax )
357
+ creator .create (
358
+ "Individual" , list , est = clone (self .estimator ), fitness = creator .FitnessMax
359
+ )
313
360
314
361
@property
315
362
def possible_params (self ):
@@ -321,37 +368,47 @@ def cv_results_(self):
321
368
if self ._cv_results is None : # This is to cache the answer until updated
322
369
# Populate output and return
323
370
# If not already fit, returns an empty dictionary
324
- possible_params = self .possible_params # Pre-load property for use in this function
371
+ possible_params = (
372
+ self .possible_params
373
+ ) # Pre-load property for use in this function
325
374
out = defaultdict (list )
326
375
for p , gen in enumerate (self .all_history_ ):
327
376
# Get individuals and indexes, their list of scores,
328
377
# and additionally the name_values for this set of parameters
329
378
330
- idxs , individuals , each_scores = zip (* [(idx , indiv , np .mean (indiv .fitness .values ))
331
- for idx , indiv in list (gen .genealogy_history .items ())
332
- if indiv .fitness .valid and not np .all (np .isnan (indiv .fitness .values ))])
379
+ idxs , individuals , each_scores = zip (
380
+ * [
381
+ (idx , indiv , np .mean (indiv .fitness .values ))
382
+ for idx , indiv in list (gen .genealogy_history .items ())
383
+ if indiv .fitness .valid
384
+ and not np .all (np .isnan (indiv .fitness .values ))
385
+ ]
386
+ )
333
387
334
388
name_values , _ , _ = _get_param_types_maxint (possible_params [p ])
335
389
336
390
# Add to output
337
- out ['param_index' ] += [p ] * len (idxs )
338
- out ['index' ] += idxs
339
- out ['params' ] += [_individual_to_params (indiv , name_values )
340
- for indiv in individuals ]
341
- out ['mean_test_score' ] += [np .nanmean (scores ) for scores in each_scores ]
342
- out ['std_test_score' ] += [np .nanstd (scores ) for scores in each_scores ]
343
- out ['min_test_score' ] += [np .nanmin (scores ) for scores in each_scores ]
344
- out ['max_test_score' ] += [np .nanmax (scores ) for scores in each_scores ]
345
- out ['nan_test_score?' ] += [np .any (np .isnan (scores )) for scores in each_scores ]
391
+ out ["param_index" ] += [p ] * len (idxs )
392
+ out ["index" ] += idxs
393
+ out ["params" ] += [
394
+ _individual_to_params (indiv , name_values ) for indiv in individuals
395
+ ]
396
+ out ["mean_test_score" ] += [np .nanmean (scores ) for scores in each_scores ]
397
+ out ["std_test_score" ] += [np .nanstd (scores ) for scores in each_scores ]
398
+ out ["min_test_score" ] += [np .nanmin (scores ) for scores in each_scores ]
399
+ out ["max_test_score" ] += [np .nanmax (scores ) for scores in each_scores ]
400
+ out ["nan_test_score?" ] += [
401
+ np .any (np .isnan (scores )) for scores in each_scores
402
+ ]
346
403
self ._cv_results = out
347
404
348
405
return self ._cv_results
349
406
350
407
@property
351
408
def best_index_ (self ):
352
- """ Returns the absolute index (not the 'index' column) with the best max_score
353
- from cv_results_. """
354
- return np .argmax (self .cv_results_ [' max_test_score' ])
409
+ """Returns the absolute index (not the 'index' column) with the best max_score
410
+ from cv_results_."""
411
+ return np .argmax (self .cv_results_ [" max_test_score" ])
355
412
356
413
def fit (self , X , y = None ):
357
414
self .best_estimator_ = None
@@ -376,9 +433,10 @@ def _fit(self, X, y, parameter_dict):
376
433
377
434
if y is not None :
378
435
if len (y ) != n_samples :
379
- raise ValueError ('Target variable (y) has a different number '
380
- 'of samples (%i) than data (X: %i samples)'
381
- % (len (y ), n_samples ))
436
+ raise ValueError (
437
+ "Target variable (y) has a different number "
438
+ "of samples (%i) than data (X: %i samples)" % (len (y ), n_samples )
439
+ )
382
440
cv = check_cv (self .cv , y = y , classifier = is_classifier (self .estimator ))
383
441
384
442
toolbox = base .Toolbox ()
@@ -390,7 +448,9 @@ def _fit(self, X, y, parameter_dict):
390
448
if self .verbose :
391
449
print ("Types %s and maxint %s detected" % (self .gene_type , maxints ))
392
450
393
- toolbox .register ("individual" , _initIndividual , creator .Individual , maxints = maxints )
451
+ toolbox .register (
452
+ "individual" , _initIndividual , creator .Individual , maxints = maxints
453
+ )
394
454
toolbox .register ("population" , tools .initRepeat , list , toolbox .individual )
395
455
396
456
# If n_jobs is an int, greater than 1 or less than 0 (indicating to use as
@@ -401,12 +461,17 @@ def _fit(self, X, y, parameter_dict):
401
461
if isinstance (self .n_jobs , int ):
402
462
if self .n_jobs > 1 or self .n_jobs < 0 :
403
463
from multiprocessing import Pool # Only imports if needed
404
- if os .name == 'nt' : # Checks if we are on Windows
405
- warnings .warn (("Windows requires Pools to be declared from within "
406
- "an \' if __name__==\" __main__\" :\' structure. In this "
407
- "case, n_jobs will accept map functions as well to "
408
- "facilitate custom parallelism. Please check to see "
409
- "that all code is working as expected." ))
464
+
465
+ if os .name == "nt" : # Checks if we are on Windows
466
+ warnings .warn (
467
+ (
468
+ "Windows requires Pools to be declared from within "
469
+ "an 'if __name__==\" __main__\" :' structure. In this "
470
+ "case, n_jobs will accept map functions as well to "
471
+ "facilitate custom parallelism. Please check to see "
472
+ "that all code is working as expected."
473
+ )
474
+ )
410
475
pool = Pool (self .n_jobs )
411
476
toolbox .register ("map" , pool .map )
412
477
@@ -415,17 +480,37 @@ def _fit(self, X, y, parameter_dict):
415
480
try :
416
481
toolbox .register ("map" , self .n_jobs )
417
482
except Exception :
418
- raise TypeError ("n_jobs must be either an integer or map function. Received: {}" .format (type (self .n_jobs )))
419
-
420
- toolbox .register ("evaluate" , _evalFunction ,
421
- name_values = name_values , X = X , y = y ,
422
- scorer = self .scorer_ , cv = cv , iid = self .iid , verbose = self .verbose ,
423
- error_score = self .error_score , fit_params = self .fit_params ,
424
- score_cache = self .score_cache )
425
-
426
- toolbox .register ("mate" , _cxIndividual , indpb = self .gene_crossover_prob , gene_type = self .gene_type )
427
-
428
- toolbox .register ("mutate" , _mutIndividual , indpb = self .gene_mutation_prob , up = maxints )
483
+ raise TypeError (
484
+ "n_jobs must be either an integer or map function. Received: {}" .format (
485
+ type (self .n_jobs )
486
+ )
487
+ )
488
+
489
+ toolbox .register (
490
+ "evaluate" ,
491
+ _evalFunction ,
492
+ name_values = name_values ,
493
+ X = X ,
494
+ y = y ,
495
+ scorer = self .scorer_ ,
496
+ cv = cv ,
497
+ iid = self .iid ,
498
+ verbose = self .verbose ,
499
+ error_score = self .error_score ,
500
+ fit_params = self .fit_params ,
501
+ score_cache = self .score_cache ,
502
+ )
503
+
504
+ toolbox .register (
505
+ "mate" ,
506
+ _cxIndividual ,
507
+ indpb = self .gene_crossover_prob ,
508
+ gene_type = self .gene_type ,
509
+ )
510
+
511
+ toolbox .register (
512
+ "mutate" , _mutIndividual , indpb = self .gene_mutation_prob , up = maxints
513
+ )
429
514
toolbox .register ("select" , tools .selTournament , tournsize = self .tournament_size )
430
515
431
516
pop = toolbox .population (n = self .population_size )
@@ -445,20 +530,33 @@ def _fit(self, X, y, parameter_dict):
445
530
hist .update (pop )
446
531
447
532
if self .verbose :
448
- print ('--- Evolve in {0} possible combinations ---' .format (np .prod (np .array (maxints ) + 1 )))
449
-
450
- pop , logbook = algorithms .eaSimple (pop , toolbox , cxpb = 0.5 , mutpb = 0.2 ,
451
- ngen = self .generations_number , stats = stats ,
452
- halloffame = hof , verbose = self .verbose )
533
+ print (
534
+ "--- Evolve in {0} possible combinations ---" .format (
535
+ np .prod (np .array (maxints ) + 1 )
536
+ )
537
+ )
538
+
539
+ pop , logbook = algorithms .eaSimple (
540
+ pop ,
541
+ toolbox ,
542
+ cxpb = 0.5 ,
543
+ mutpb = 0.2 ,
544
+ ngen = self .generations_number ,
545
+ stats = stats ,
546
+ halloffame = hof ,
547
+ verbose = self .verbose ,
548
+ )
453
549
454
550
# Save History
455
551
self .all_history_ .append (hist )
456
552
self .all_logbooks_ .append (logbook )
457
553
current_best_score_ = hof [0 ].fitness .values [0 ]
458
554
current_best_params_ = _individual_to_params (hof [0 ], name_values )
459
555
if self .verbose :
460
- print ("Best individual is: %s\n with fitness: %s" % (
461
- current_best_params_ , current_best_score_ ))
556
+ print (
557
+ "Best individual is: %s\n with fitness: %s"
558
+ % (current_best_params_ , current_best_score_ )
559
+ )
462
560
463
561
if current_best_score_ > self .best_mem_score_ :
464
562
self .best_mem_score_ = current_best_score_
0 commit comments