import numpy as np
class tree_node:
def __init__(self):
self.array = [];
self.parent = False;
self.childs = [];
self.i = [];
self.support = 0;
def show(self,lvl):
for i in range(lvl):
print("\t",end="")
print(list(self.array.nonzero()[0]),self.support)
for child in self.childs:
child.show(lvl+1)
def level(self,lvl):
if (lvl== 0):
return([self])
else:
result = []
for el in [child.level(lvl-1) for child in self.childs]:
result.extend(el)
return(result)
data_checked = 0
def frequent_set(s,data,minsupp):
global data_checked;
data_checked = data_checked + 1;
count = 0;
for i in range(len(data)):
count += (s <= data[i]).all()
if (count >= minsupp):
return count
else:
return False
def subset_tree1(n,node):
for j in range(node.i+1,n):
new_node = tree_node()
new_node.array = np.copy(node.array)
new_node.array[j] = 1
frequency = frequent_set(new_node.array,data,minsupp);
if not frequency:
continue;
new_node.support = frequency
new_node.i = j
new_node.parent = node
node.childs.append(new_node)
for child in node.childs:
subset_tree1(n,child)
# a b c d e
data = np.matrix([[1,1,0,0,0],
[0,1,1,1,0],
[1,0,1,1,1],
[1,0,0,1,1],
[1,1,1,0,0],
[1,1,1,1,0],
[1,0,0,0,0],
[1,1,1,0,0],
[1,1,0,1,0],
[0,1,1,0,1]])
minsupp = 2
n = 5
data_checked = 0;
root = tree_node();
root.i = -1;
root.array = np.full(n,False)
root.parent = False
subset_tree1(n,root);
root.show(0)
print(data_checked)
def subset_tree2(n,node):
for j in range(node.i+1,n):
new_node = tree_node()
if (node.parent):
found = False
for sibling in node.parent.childs:
if sibling.i == j:
new_node.array = np.logical_or(node.array, sibling.array)
found = True
if (not found):
continue;
else:
new_node.array = np.copy(node.array)
new_node.array[j] = 1
frequency = frequent_set(new_node.array,data,minsupp);
if not frequency:
continue;
new_node.support = frequency
new_node.i = j
new_node.parent = node
node.childs.append(new_node)
for child in node.childs:
subset_tree2(n,child)
data_checked = 0;
root = tree_node();
root.i = -1;
root.array = np.full(n,False)
root.parent = False
subset_tree2(n,root);
root.show(0)
print(data_checked)
def subset_tree3_lvl(n,nodes,root):
for node in nodes:
for j in range(node.i+1,n):
new_node = tree_node()
if (node.parent):
found = False
for sibling in node.parent.childs:
if sibling.i == j:
new_node.array = np.logical_or(node.array, sibling.array)
found = True
if (not found):
continue;
# check subsets
all_subsets_frequent = True;
for k in range(n):
if new_node.array[k] == True:
new_node.array[k] = False
subset_found = False;
for x in nodes:
if np.array_equal(x.array, new_node.array):
subset_found = True;
break;
new_node.array[k] = True
if (not subset_found):
all_subsets_frequent = False;
break;
if (not all_subsets_frequent):
continue;
else:
new_node.array = np.copy(node.array)
new_node.array[j] = 1
frequency = frequent_set(new_node.array,data,minsupp);
if not frequency:
continue;
new_node.support = frequency
new_node.i = j
new_node.parent = node
node.childs.append(new_node)
def subset_tree3(n,node,root,lvl):
lvl = 0
level = root.level(lvl)
while level:
subset_tree3_lvl(n,level,root)
lvl += 1
level = root.level(lvl)
data_checked = 0;
root = tree_node();
root.i = -1;
root.array = np.full(n,False)
subset_tree3(n,root,root,1);
root.show(0)
print(data_checked)