-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathkmeans_mp.py
36 lines (23 loc) · 875 Bytes
/
kmeans_mp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from sklearn.cluster import KMeans
import numpy as np
from multiprocessing import Pool
import os
def run(ncl,max_iter,sampl,clusteringID):
C = KMeans(n_clusters=ncl,max_iter=max_iter,n_init=1,verbose=False).fit(sampl)
print('Clustering ID = ',clusteringID,'n_iter = ',C.n_iter_,'Inertia = ',C.inertia_)
return C
def kmeans_with_multiple_runs(ncl,max_iter,nclustering,sampl):
num_processors = os.cpu_count()
p=Pool(processes = num_processors)
args = []
for i in range(nclustering):
args.append([ncl,max_iter,sampl,i])
clusters = p.starmap(run,args)
inertias = []
for i in range(len(clusters)):
inertias.append(clusters[i].inertia_)
index = inertias.index(min(inertias))
print('The best inertia = ',min(inertias))
p.close()
p.join()
return clusters[index]