5
5
# when the native code emitter is enabled. Which is critical for performance...
6
6
# maybe we can move the inner part into a kmeans_cluster_step done in C
7
7
8
+
8
9
#@micropython.native
9
- def cluster (values , centroids ,
10
- channels = 3 , max_iter = 10 , stop_changes = 0 ):
10
+ def cluster_iter (values , centroids , assignments , features ,
11
+ max_iter = 10 , stop_changes = 0 ):
11
12
"""
12
13
Perform K-Means clustering of @values
13
14
@@ -16,16 +17,14 @@ def cluster(values, centroids,
16
17
NOTE: will mutate @centroids
17
18
"""
18
19
20
+ channels = features
19
21
n_clusters = len (centroids ) // channels
20
22
n_samples = len (values ) // channels
21
23
22
- assert channels == 3 , 'only support 3 channels for now'
23
-
24
24
assert channels < 255 , channels
25
25
assert n_clusters < 255 , n_clusters
26
26
assert n_samples < 65535 , n_samples
27
27
28
- assignments = array .array ('B' , (255 for _ in range (n_samples )))
29
28
cluster_sums = array .array ('L' , (0 for _ in range (n_clusters * channels )))
30
29
cluster_counts = array .array ('H' , (0 for _ in range (n_clusters )))
31
30
@@ -36,18 +35,24 @@ def cluster(values, centroids,
36
35
for s in range (n_samples ):
37
36
v = values [s * channels :(s + 1 )* channels ]
38
37
38
+ # PERF: considering taking all N points at the same time, filling indices and (optionally) distances
39
39
idx , dist = euclidean_argmin (centroids , v )
40
40
#idx, dist = 0, 0
41
41
42
42
if idx != assignments [s ]:
43
43
changes += 1
44
44
assignments [s ] = idx
45
45
46
- print ('iter' , i , changes )
46
+ # Pass control back to caller
47
+ # So one can do other work between the iterations
48
+ # or implement custom stopping criteria
49
+ yield changes
50
+
47
51
if changes <= stop_changes :
48
52
break
49
53
50
54
## update cluster centroids
55
+ # PERF: consider moving this to C. With a update_centroids() function
51
56
# reset old values
52
57
for c in range (n_clusters * channels ):
53
58
cluster_sums [c ] = 0
@@ -70,9 +75,18 @@ def cluster(values, centroids,
70
75
71
76
for i in range (channels ):
72
77
centroids [(c * channels )+ i ] = cluster_sums [(c * channels )+ i ] // count
73
-
74
- #yield assignments
75
- # TODO: make this into a generator? so other work can be done in between
76
78
77
79
80
+
81
+ def cluster (values , centroids , features , ** kwargs ):
82
+ """Convenience wrapper around cluster_iter"""
83
+
84
+ n_samples = len (values ) // features
85
+ assignments = array .array ('B' , (255 for _ in range (n_samples )))
86
+
87
+ generator = cluster_iter (values , centroids , assignments , features , ** kwargs )
88
+ for changes in generator :
89
+ pass
90
+
78
91
return assignments
92
+
0 commit comments