In [1]:
import numpy as np
In [2]:
class tree_node:
    
    def __init__(self):
        self.array = [];
        self.parent = False;
        self.childs = [];
        self.i = [];
        self.support = 0;
        
    def show(self,lvl):
        for i in range(lvl):
            print("\t",end="")
        print(list(self.array.nonzero()[0]),self.support)
        
        for child in self.childs:
            child.show(lvl+1)
            
    def level(self,lvl):
        if (lvl== 0):
            return([self])
        else:
            result = []
            for el in [child.level(lvl-1) for child in self.childs]:
                result.extend(el)
            return(result)
In [3]:
data_checked = 0
In [4]:
def frequent_set(s,data,minsupp):
    global data_checked;
    
    data_checked = data_checked + 1;
    count = 0;
    for i in range(len(data)):
        count += (s <= data[i]).all()
    if (count >= minsupp):
        return count
    else:
        return False
In [8]:
def subset_tree1(n,node):
    
    for j in range(node.i+1,n):
        new_node = tree_node()
        new_node.array = np.copy(node.array)
        new_node.array[j] = 1
        
        frequency = frequent_set(new_node.array,data,minsupp);
        if not frequency:
            continue;
        
        new_node.support = frequency
        new_node.i = j
        new_node.parent = node
        node.childs.append(new_node)
        
    for child in node.childs:
        subset_tree1(n,child)
In [9]:
#                  a b c d e 
data = np.matrix([[1,1,0,0,0],
                  [0,1,1,1,0],
                  [1,0,1,1,1],
                  [1,0,0,1,1],
                  [1,1,1,0,0],
                  [1,1,1,1,0],
                  [1,0,0,0,0],
                  [1,1,1,0,0],
                  [1,1,0,1,0],
                  [0,1,1,0,1]])
In [11]:
minsupp = 2
In [12]:
n = 5
data_checked = 0;
root = tree_node();
root.i = -1;
root.array = np.full(n,False)
root.parent = False
subset_tree1(n,root);
root.show(0)
print(data_checked)
[] 0
	[0] 8
		[0, 1] 5
			[0, 1, 2] 3
			[0, 1, 3] 2
		[0, 2] 4
			[0, 2, 3] 2
		[0, 3] 4
			[0, 3, 4] 2
		[0, 4] 2
	[1] 7
		[1, 2] 5
			[1, 2, 3] 2
		[1, 3] 3
	[2] 6
		[2, 3] 3
		[2, 4] 2
	[3] 5
		[3, 4] 2
	[4] 3
30
In [13]:
def subset_tree2(n,node):
    
    for j in range(node.i+1,n):
        new_node = tree_node()
        
        if (node.parent):
            found = False
            for sibling in node.parent.childs:
                if sibling.i == j:
                    new_node.array = np.logical_or(node.array, sibling.array)
                    found = True
            if (not found):
                continue;
            
        else:
            new_node.array = np.copy(node.array)
            new_node.array[j] = 1
        
        frequency = frequent_set(new_node.array,data,minsupp);
        if not frequency:
            continue;

        new_node.support = frequency
            
        new_node.i = j
        new_node.parent = node
        node.childs.append(new_node)
        
    for child in node.childs:
        subset_tree2(n,child)
In [14]:
data_checked = 0;
root = tree_node();
root.i = -1;
root.array = np.full(n,False)
root.parent = False
subset_tree2(n,root);
root.show(0)
print(data_checked)
[] 0
	[0] 8
		[0, 1] 5
			[0, 1, 2] 3
			[0, 1, 3] 2
		[0, 2] 4
			[0, 2, 3] 2
		[0, 3] 4
			[0, 3, 4] 2
		[0, 4] 2
	[1] 7
		[1, 2] 5
			[1, 2, 3] 2
		[1, 3] 3
	[2] 6
		[2, 3] 3
		[2, 4] 2
	[3] 5
		[3, 4] 2
	[4] 3
24
In [15]:
def subset_tree3_lvl(n,nodes,root):

    for node in nodes:
    
        for j in range(node.i+1,n):
            new_node = tree_node()
            
            if (node.parent):
    
                found = False
                for sibling in node.parent.childs:
                    if sibling.i == j:
                        new_node.array = np.logical_or(node.array, sibling.array)
                        found = True
                if (not found):
                    continue;
                                        
                # check subsets
                all_subsets_frequent = True;
                
                for k in range(n):
                    if new_node.array[k] == True:
                        new_node.array[k] = False
    
                        subset_found = False;
                        
                        for x in nodes:
                            if np.array_equal(x.array, new_node.array):
                                subset_found = True;
                                break;
    
                        new_node.array[k] = True
                                
                        if (not subset_found):
                            all_subsets_frequent = False;
                            break;
                            
                    
                if (not all_subsets_frequent):
                    continue;
                
            else:
                new_node.array = np.copy(node.array)
                new_node.array[j] = 1
            
            
            frequency = frequent_set(new_node.array,data,minsupp);
            if not frequency:
                continue;
            
            new_node.support = frequency
            new_node.i = j
            new_node.parent = node
            node.childs.append(new_node)

def subset_tree3(n,node,root,lvl):
    
    lvl = 0
    level = root.level(lvl)
    while level:
        subset_tree3_lvl(n,level,root)
        lvl += 1
        level = root.level(lvl)
In [16]:
data_checked = 0;
root = tree_node();
root.i = -1;
root.array = np.full(n,False)
subset_tree3(n,root,root,1);
root.show(0)
print(data_checked)
[] 0
	[0] 8
		[0, 1] 5
			[0, 1, 2] 3
			[0, 1, 3] 2
		[0, 2] 4
			[0, 2, 3] 2
		[0, 3] 4
			[0, 3, 4] 2
		[0, 4] 2
	[1] 7
		[1, 2] 5
			[1, 2, 3] 2
		[1, 3] 3
	[2] 6
		[2, 3] 3
		[2, 4] 2
	[3] 5
		[3, 4] 2
	[4] 3
23