import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets.samples_generator import make_blobs
from itertools import compress
%matplotlib notebook
centers = [[0, 1], [-1, -1], [1, -1], [-0.4,0.6], [0.4,0.6], [0,1.4]]
stds = [0.3]*3+[0.08]*3
X1, y1 = make_blobs(n_samples=200, centers=centers, cluster_std=stds,
random_state=0)
colors = np.array(['#377eb8', '#ff7f00', '#4daf4a','#e41a1c', '#a65628', '#984ea3','#999999', '#f781bf', '#dede00'])
plt.figure(figsize=(10,5))
plt.scatter(X1[:,0], X1[:,1],color=colors[y1])
X1 = np.append(X1, np.zeros((len(X1),1)), axis=1)
X1 = np.append(X1, np.full ((len(X1),1), np.inf), axis=1)
X1 = np.append(X1, np.zeros((len(X1),1)), axis=1)
X1 = [X1[i,] for i in range(len(X1))]
# DBSCAN
def dist(a,b):
return np.linalg.norm(a-b)
# epsilon sousedstvi
def N(e,x,X1):
distances = [dist(x,y[:-3]) for y in X1]
neigh_distances = [d <= e for d in distances]
return list(compress(X1, neigh_distances))
def core_distance(e,q,x,X1):
n = N(e,x,X1)
if len(n) < q:
return np.inf
else:
distances = [dist(x,y[:-3]) for y in n]
distances.sort()
return distances[q-1]
def reachability_distance(e,q,x,y,X1):
n = N(e,x,X1)
if len(n) < q:
return np.inf
else:
return max(dist(x,n[q-1,:-3]),dist(x,y))
#OPTICS
output = []
def ExpandClusterOrder(x,X1,e,q):
#mark as processed
x[-1]=1
x[-2]=np.inf
x[-3]=core_distance(e,q,x[:-3],X1)
output.append(x)
if x[-3] != np.inf:
n = N(e,x[:-3],X1)
update_order_seeds(x,n,e,X1)
while not order_seeds_empty():
y = order_seeds_next();
y[-1] = 1
y[-3] = core_distance(e,q,y[:-3],X1)
output.append(y)
if y[-3] != np.inf:
n = N(e,y[:-3],X1)
update_order_seeds(y,n,e,X1)
def update_order_seeds(x,n,e,X1):
c = core_distance(e,q,x[:-3],X1)
for y in n:
if y[-1]==0:
r = max(c,dist(x[:-3],y[:-3]))
if y[-2] == np.inf:
y[-2] = r
insert_to_order_seeds(y)
elif r < y[-2]:
y[-2] = r
decrease_in_order_seeds(y)
order_seeds = []
def insert_to_order_seeds(y):
order_seeds.append(y)
def decrease_in_order_seeds(y):
pass
def order_seeds_next():
order_seeds.sort(key=lambda a: a[-2],reverse = True)
return order_seeds.pop()
def order_seeds_empty():
return not order_seeds
for x in X1:
if x[-1]==0:
ExpandClusterOrder(x,X1,epsilon,q)
plt.figure(figsize=(10,5))
plt.plot([ min(x[-2],epsilon) for x in output])
def ExtractDBSCAN(output,eps,q):
clId = -1
for x in output:
if x[-2] > eps:
if x[-3] <= eps:
clId += 1
x[-1] = clId
else:
x[-1] = -1
else:
x[-1] = clId
ExtractDBSCAN(output,0.5,q)
plt.figure(figsize=(10,5))
plt.scatter([x[0] for x in output], [x[1] for x in output],color=[colors[int(x[-1])] for x in output])
ExtractDBSCAN(output,0.1,q)
plt.figure(figsize=(10,5))
plt.scatter([x[0] for x in output], [x[1] for x in output],color=[colors[int(x[-1])] for x in output])