@@ -65,6 +65,7 @@ def extract_windows(sensordata : pandas.DataFrame,
65
65
window_length : int ,
66
66
window_hop : int ,
67
67
groupby : list [str ],
68
+ time_column = 'time' ,
68
69
):
69
70
70
71
groups = sensordata .groupby (groupby , observed = True )
@@ -75,7 +76,7 @@ def extract_windows(sensordata : pandas.DataFrame,
75
76
windows = []
76
77
77
78
# make sure order is correct
78
- group_df = group_df .reset_index ().set_index ('time' ).sort_index ()
79
+ group_df = group_df .reset_index ().set_index (time_column ).sort_index ()
79
80
80
81
# create windows
81
82
win_start = 0
@@ -167,6 +168,7 @@ def extract_features(sensordata : pandas.DataFrame,
167
168
quant_div = 4 ,
168
169
quant_depth = 6 ,
169
170
label_column = 'activity' ,
171
+ time_column = 'time' ,
170
172
) -> pandas .DataFrame :
171
173
"""
172
174
Convert sensor data into fixed-sized time windows and extact features
@@ -181,7 +183,7 @@ def extract_features(sensordata : pandas.DataFrame,
181
183
182
184
# Split into fixed-length windows
183
185
features_values = []
184
- generator = extract_windows (sensordata , window_length , window_hop , groupby = groupby )
186
+ generator = extract_windows (sensordata , window_length , window_hop , groupby = groupby , time_column = time_column )
185
187
for windows in generator :
186
188
187
189
# drop invalid data
@@ -262,8 +264,21 @@ def run_pipeline(run, hyperparameters, dataset,
262
264
'squat' , 'jumpingjack' , 'lunge' , 'other' ,
263
265
],
264
266
),
267
+ 'toothbrush_hussain2021' : dict (
268
+ groups = ['subject' ],
269
+ label_column = 'is_brushing' ,
270
+ time_column = 'elapsed' ,
271
+ data_columns = ['acc_x' , 'acc_y' , 'acc_z' ],
272
+ classes = [
273
+ #'mixed',
274
+ 'True' , 'False' ,
275
+ ],
276
+ ),
265
277
}
266
278
279
+ if not dataset in dataset_config .keys ():
280
+ raise ValueError (f"Unknown dataset { dataset } " )
281
+
267
282
if not os .path .exists (out_dir ):
268
283
os .makedirs (out_dir )
269
284
@@ -278,21 +293,25 @@ def run_pipeline(run, hyperparameters, dataset,
278
293
groups = dataset_config [dataset ]['groups' ]
279
294
data_columns = dataset_config [dataset ]['data_columns' ]
280
295
enabled_classes = dataset_config [dataset ]['classes' ]
296
+ label_column = dataset_config [dataset ].get ('label_column' , 'activity' )
297
+ time_column = dataset_config [dataset ].get ('time_column' , 'time' )
298
+
299
+ data [label_column ] = data [label_column ].astype (str )
281
300
282
301
data_load_duration = time .time () - data_load_start
283
302
log .info ('data-loaded' , dataset = dataset , samples = len (data ), duration = data_load_duration )
284
303
285
-
286
-
287
304
feature_extraction_start = time .time ()
288
305
features = extract_features (data ,
289
306
columns = data_columns ,
290
307
groupby = groups ,
291
308
features = features ,
292
309
window_length = model_settings ['window_length' ],
293
310
window_hop = model_settings ['window_hop' ],
311
+ label_column = label_column ,
312
+ time_column = time_column ,
294
313
)
295
- labeled = numpy .count_nonzero (features ['activity' ].notna ())
314
+ labeled = numpy .count_nonzero (features [label_column ].notna ())
296
315
297
316
feature_extraction_duration = time .time () - feature_extraction_start
298
317
log .info ('feature-extraction-done' ,
@@ -303,19 +322,20 @@ def run_pipeline(run, hyperparameters, dataset,
303
322
)
304
323
305
324
# Drop windows without labels
306
- features = features [features . activity .notna ()]
325
+ features = features [features [ label_column ] .notna ()]
307
326
308
327
# Keep only windows with enabled classes
309
- features = features [features . activity .isin (enabled_classes )]
328
+ features = features [features [ label_column ] .isin (enabled_classes )]
310
329
311
- print ('Class distribution\n ' , features ['activity' ].value_counts (dropna = False ))
330
+ print ('Class distribution\n ' , features [label_column ].value_counts (dropna = False ))
312
331
313
332
# Run train-evaluate
314
333
evaluate_groupby = groups [0 ]
315
334
results , estimator = evaluate (features ,
316
335
hyperparameters = hyperparameters ,
317
336
groupby = evaluate_groupby ,
318
337
n_splits = n_splits ,
338
+ label_column = label_column ,
319
339
)
320
340
321
341
# Save a model
@@ -328,7 +348,6 @@ def run_pipeline(run, hyperparameters, dataset,
328
348
export_model (estimator_path , model_path )
329
349
330
350
# Save testdata
331
- label_column = 'activity'
332
351
classes = estimator .classes_
333
352
class_mapping = dict (zip (classes , range (len (classes ))))
334
353
meta_path = os .path .join (out_dir , f'{ dataset } .meta.json' )
0 commit comments