In [18]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets.samples_generator import make_blobs
from itertools import compress
In [19]:
%matplotlib notebook

centers = [[0, 1], [-1, -1], [1, -1], [-0.4,0.6], [0.4,0.6], [0,1.4]]
stds = [0.3]*3+[0.08]*3

X1, y1 = make_blobs(n_samples=200, centers=centers, cluster_std=stds,
                  random_state=0)

colors = np.array(['#377eb8', '#ff7f00', '#4daf4a','#e41a1c', '#a65628', '#984ea3','#999999', '#f781bf', '#dede00'])
plt.figure(figsize=(10,5))
plt.scatter(X1[:,0], X1[:,1],color=colors[y1])
Out[19]:
<matplotlib.collections.PathCollection at 0x210a323ef28>
In [20]:
X1 = np.append(X1, np.zeros((len(X1),1)), axis=1)
X1 = np.append(X1, np.full ((len(X1),1), np.inf), axis=1)
X1 = np.append(X1, np.zeros((len(X1),1)), axis=1)


X1 = [X1[i,] for i in range(len(X1))]

# DBSCAN

def dist(a,b):
    
    return np.linalg.norm(a-b)

# epsilon sousedstvi
def N(e,x,X1):
    distances = [dist(x,y[:-3]) for y in X1]
    neigh_distances = [d <= e for d in distances]
    return list(compress(X1, neigh_distances))

def core_distance(e,q,x,X1):
    n = N(e,x,X1)
    if len(n) < q:
        return np.inf
    else:
        
        distances = [dist(x,y[:-3]) for y in n]
        distances.sort()
        
        return distances[q-1]


def reachability_distance(e,q,x,y,X1):
    n = N(e,x,X1)
    if len(n) < q:
        return np.inf
    else:
        return max(dist(x,n[q-1,:-3]),dist(x,y))

#OPTICS


output = []


def ExpandClusterOrder(x,X1,e,q):
    #mark as processed

    x[-1]=1
    x[-2]=np.inf    
    x[-3]=core_distance(e,q,x[:-3],X1)
        
    output.append(x)

    if x[-3] != np.inf:
        
        n = N(e,x[:-3],X1)
        
        update_order_seeds(x,n,e,X1)
        
        while not order_seeds_empty():
            
            y = order_seeds_next();
            
            y[-1] = 1
            y[-3] = core_distance(e,q,y[:-3],X1)
            output.append(y)
            
            if y[-3] != np.inf:
                n = N(e,y[:-3],X1)
                update_order_seeds(y,n,e,X1)
                
                
def update_order_seeds(x,n,e,X1):
    c = core_distance(e,q,x[:-3],X1)
    for y in n:
        if y[-1]==0:
            r = max(c,dist(x[:-3],y[:-3]))
            if y[-2] == np.inf:
                y[-2] = r
                insert_to_order_seeds(y)
            elif r < y[-2]:
                y[-2] = r
                decrease_in_order_seeds(y)


order_seeds = []

def insert_to_order_seeds(y):
    order_seeds.append(y)

def decrease_in_order_seeds(y):
    pass

def order_seeds_next():
    order_seeds.sort(key=lambda a: a[-2],reverse = True)
    return order_seeds.pop()
    
def order_seeds_empty():
    return not order_seeds

for x in X1:
    if x[-1]==0:
        ExpandClusterOrder(x,X1,epsilon,q)
        
In [21]:
plt.figure(figsize=(10,5))
plt.plot([ min(x[-2],epsilon) for x in output])
Out[21]:
[<matplotlib.lines.Line2D at 0x2109e79dda0>]
In [22]:
def ExtractDBSCAN(output,eps,q):
    clId = -1
    for x in output:
        if x[-2] > eps:
            if x[-3] <= eps:
                clId += 1
                x[-1] = clId
            else:
                x[-1] = -1
        else:
            x[-1] = clId
            
      
In [23]:
      
ExtractDBSCAN(output,0.5,q)

plt.figure(figsize=(10,5))
plt.scatter([x[0] for x in output], [x[1] for x in output],color=[colors[int(x[-1])] for x in output])
Out[23]:
<matplotlib.collections.PathCollection at 0x210a0ac6be0>
In [24]:
ExtractDBSCAN(output,0.1,q)

plt.figure(figsize=(10,5))
plt.scatter([x[0] for x in output], [x[1] for x in output],color=[colors[int(x[-1])] for x in output])
Out[24]:
<matplotlib.collections.PathCollection at 0x2109e1e09e8>