import matplotlib.pyplot as plt
from matplotlib import style
style.use('ggplot')
import numpy as np
import math;
X = np.array([[1, 2],
[1.5, 1.8],
[5, 8 ],
[8, 8],
[9,9],
[1, 0.6],
[9,11]])
plt.scatter(X[:,0], X[:,1], s=150)
plt.show()
class K_Means:
def __init__(self, k=2, tol=0.001, max_iter=300):
self.k = k
self.tol = tol
self.max_iter = max_iter
def fit(self,data):
self.centroids = {}
for i in range(self.k):
self.centroids[i] = data[i]
for i in range(self.max_iter):
self.classifications = {}
for i in range(self.k):
self.classifications[i] = []
for featureset in data:
distances = [np.linalg.norm(featureset-self.centroids[centroid]) for centroid in self.centroids]
classification = distances.index(min(distances))
self.classifications[classification].append(featureset)
for classification in self.classifications:
self.centroids[classification] = np.average(self.classifications[classification],axis=0)
clust = K_Means()
clust.fit(X);
print(clust.classifications[0])
print(clust.classifications[1])
plt.scatter([x[0] for x in clust.classifications[0]],[x[1] for x in clust.classifications[0]], s=150)
plt.scatter([x[0] for x in clust.classifications[1]],[x[1] for x in clust.classifications[1]], s=150, marker='o')
plt.show()
X = np.array([[1, 2],
[1.5, 1.8],
[5, 8 ],
[8, 8],
[9, 9],
[1, 0.6],
[9,111]])
plt.scatter(X[:,0], X[:,1], s=150)
plt.show()
clust = K_Means()
clust.fit(X);
plt.scatter([x[0] for x in clust.classifications[0]],[x[1] for x in clust.classifications[0]], s=150)
plt.scatter([x[0] for x in clust.classifications[1]],[x[1] for x in clust.classifications[1]], s=150, marker='o')
plt.show()
class K_Medoids:
def __init__(self, k=2, tol=0.001, max_iter=300):
self.k = k
self.tol = tol
self.max_iter = max_iter
def fit(self,data):
self.medoids = {}
for i in range(self.k):
self.medoids[i] = data[i]
for i in range(self.max_iter):
self.classifications = {}
for i in range(self.k):
self.classifications[i] = []
for featureset in data:
distances = [np.linalg.norm(featureset-self.medoids[medoid]) for medoid in self.medoids]
classification = distances.index(min(distances))
self.classifications[classification].append(featureset)
for classification in self.classifications:
class_medoid = self.classifications[classification][0]
class_dist = sum([np.linalg.norm(class_medoid-item) for item in self.classifications[classification]])
for item1 in self.classifications[classification][1:]:
new_dist = sum([np.linalg.norm(item1-item) for item in self.classifications[classification]])
if new_dist < class_dist:
class_medoid = item1;
self.medoids[classification] = class_medoid;
clust = K_Medoids()
clust.fit(X);
print(clust.classifications[0])
print(clust.classifications[1])
plt.scatter([x[0] for x in clust.classifications[0]],[x[1] for x in clust.classifications[0]], s=150)
plt.scatter([x[0] for x in clust.classifications[1]],[x[1] for x in clust.classifications[1]], s=150, marker='o')
plt.show()
class fuuzy_c_Means:
def __init__(self, k=2, tol=0.001, max_iter=100):
self.k = k
self.tol = tol
self.max_iter = max_iter
self.m = 2
def deg(self,dists,index):
suma = 0
for i in range(self.k):
suma += (dists[index]/dists[i])**(2/(self.m -1))
return 1/suma
def wavg(self,data,index):
total = 0
suma = 0
for item,featureset in zip(self.classifications,data):
total += item[index]**self.m
suma += featureset * item[index]**self.m
return suma/total;
def fit(self,data):
self.centroids = {}
self.classifications = [[] for item in data]
for i in range(self.k):
self.centroids[i] = data[4*i]+1
for i in range(self.max_iter):
for j,featureset in enumerate(data):
dists = [np.linalg.norm(featureset-self.centroids[centroid]) for centroid in self.centroids]
degrees = [self.deg(dists,i) for i in range(self.k)]
self.classifications[j] = degrees
for kk in range(self.k):
self.centroids[kk] = self.wavg(data,kk)
clust = fuuzy_c_Means()
clust.fit(X);
print(clust.classifications)
print(clust.centroids)
my_data = np.genfromtxt('s1.txt', delimiter=',')[:1500,:]
my_data
np.shape(my_data)
plt.scatter(my_data[:,0], my_data[:,1], s=150)
plt.show()
clust = K_Means()
clust.k = 3
clust.fit(my_data)
plt.scatter([x[0] for x in clust.classifications[0]],[x[1] for x in clust.classifications[0]], s=150)
plt.scatter([x[0] for x in clust.classifications[1]],[x[1] for x in clust.classifications[1]], s=150, marker='o')
plt.scatter([x[0] for x in clust.classifications[2]],[x[1] for x in clust.classifications[2]], s=150, marker='x')
plt.show()
clust.k = 4
clust.fit(my_data)
plt.scatter([x[0] for x in clust.classifications[0]],[x[1] for x in clust.classifications[0]], s=150)
plt.scatter([x[0] for x in clust.classifications[1]],[x[1] for x in clust.classifications[1]], s=150, marker='o')
plt.scatter([x[0] for x in clust.classifications[2]],[x[1] for x in clust.classifications[2]], s=150, marker='x')
plt.scatter([x[0] for x in clust.classifications[3]],[x[1] for x in clust.classifications[3]], s=150, marker='s')
plt.show()