I need to cluster data using the Fuzzy C-Means
. So, I use fcm
from pyclustering.cluster.fcm
. So, I would like to know if there is a way to get the labels.
JavaScript
x
19
19
1
import numpy as np
2
import pandas as pd
3
from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer
4
from pyclustering.cluster.fcm import fcm
5
import random
6
7
coords = [(random.random()*2.0, random.random()*2.0) for _ in range(100)]
8
dfcluster = pd.DataFrame(coords, columns = ['x','y'])
9
sample = dfcluster.to_numpy()
10
# initialize
11
initial_centers = kmeans_plusplus_initializer(sample, 5, kmeans_plusplus_initializer.FARTHEST_CENTER_CANDIDATE).initialize()
12
# create instance of Fuzzy C-Means algorithm
13
fcm_instance = fcm(sample, initial_centers)
14
# run cluster analysis and obtain results
15
fcm_instance.process()
16
clusters = fcm_instance.get_clusters()
17
18
print(clusters)
19
Advertisement
Answer
I have tried it this way, and it works, but I do not think that it is a perfect answer
JavaScript
1
27
27
1
import pandas as pd
2
from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer
3
from pyclustering.cluster.fcm import fcm
4
import random
5
6
coords = [(random.random()*2.0, random.random()*2.0) for _ in range(100)]
7
dfcluster = pd.DataFrame(coords, columns = ['x','y'])
8
sample = dfcluster.to_numpy()
9
# initialize
10
initial_centers = kmeans_plusplus_initializer(sample, 5, kmeans_plusplus_initializer.FARTHEST_CENTER_CANDIDATE).initialize()
11
# create instance of Fuzzy C-Means algorithm
12
fcm_instance = fcm(sample, initial_centers)
13
# run cluster analysis and obtain results
14
fcm_instance.process()
15
clusters = fcm_instance.get_clusters()
16
17
cluster=0
18
dfclusternew = pd.DataFrame(columns = ['cluster','x', 'y'])
19
for index, i in enumerate(clusters):
20
for j in i:
21
dfclusternew = dfclusternew.append(
22
pd.Series([cluster, dfcluster['x'].iloc[j], dfcluster['y'].iloc[j]], index=['cluster', 'x', 'y']),
23
ignore_index=True)
24
cluster += 1
25
dfcluster =dfclusternew
26
print(dfcluster)
27
However, I think I have another way to do that, and it is faster. As the result is the index in every cluster. So, I used loc[df.index[results[i]]
JavaScript
1
21
21
1
import pandas as pd
2
from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer
3
from pyclustering.cluster.fcm import fcm
4
import random
5
6
coords = [(random.random()*2.0, random.random()*2.0) for _ in range(100)]
7
dfcluster = pd.DataFrame(coords, columns = ['x','y'])
8
dfcluster['cluster'] = 0
9
sample = dfcluster.to_numpy()
10
# initialize
11
initial_centers = kmeans_plusplus_initializer(sample, 5, kmeans_plusplus_initializer.FARTHEST_CENTER_CANDIDATE).initialize()
12
# create instance of Fuzzy C-Means algorithm
13
fcm_instance = fcm(sample, initial_centers)
14
# run cluster analysis and obtain results
15
fcm_instance.process()
16
dfcluster.reset_index()
17
results=fcm_instance.get_clusters()
18
for i in range(len(results)):
19
dfcluster.loc[dfcluster.index[results[i]], 'cluster'] = i
20
print(dfcluster)
21