-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathpreprocess_soli.py
114 lines (89 loc) · 3.5 KB
/
preprocess_soli.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
################################################################################
# Title: preprocess_soli.py #
# Description: Perform preprocessing on Google's soli gesture data #
# Author: Aidin Attar #
# Date: 2024-10-29 #
# Version: 0.1 #
# Usage: None #
# Notes: None #
# Python version: 3.11.7 #
################################################################################
import os
import h5py
import numpy as np
from tqdm import tqdm
def generate_mdoppler(data):
"""
Generate the mDoppler map from the input range-doppler data.
Parameters
----------
data : numpy.ndarray
The input range-doppler data.
Returns
-------
numpy.ndarray
The mDoppler map.
"""
data = data.reshape(data.shape[0], 32, 32)
# Aggregate along the range dimension for each frame to get the velocity profile
micro_doppler = np.mean(data, axis=1)
# Threshold the spectrogram to remove noise and binarize the data
micro_doppler[micro_doppler > 0] = 1.
return micro_doppler
def unroll_stack(data):
"""
Unroll the range-doppler stack into a single array.
Parameters
----------
data : numpy.ndarray
The input range-doppler stack.
Returns
-------
numpy.ndarray
The unrolled range-doppler stack.
"""
data[data > 0] = 1.
# Cut off everything after 384 bins
data = data[:, :384]
return data
def preprocess_soli(data_dir, output_dir, channel = 3):
"""
Preprocess the Google Soli gesture data.
Parameters
----------
data_dir : str
The directory containing the raw data.
output_dir : str
The directory to save the preprocessed data.
channel : int
The number of channel to use in the data.
mode : str
The preprocessing mode to use. Options are 'mdoppler' and 'unrolled'.
"""
# Create the output directory if it does not exist
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Get a list of all the files in the data directory and consider only the HDF5 files
files = os.listdir(data_dir)
files = [f for f in files if f.endswith('.h5')]
output_path = os.path.join(output_dir, 'soli_Preprocessed.h5')
with h5py.File(output_path, 'w') as h5f:
# Loop through each file in the data directory
for file in tqdm(files):
# Load the data from the file
with h5py.File(os.path.join(data_dir, file), 'r') as f:
data = f[f'ch{channel}'][:]
label = f['label'][0]
# Preprocess the data
mdoppler = generate_mdoppler(data)
unrolled = unroll_stack(data)
# Create a group for each file
grp = h5f.require_group(file.split('.')[0])
# Save the preprocessed data and label
grp.create_dataset('mdoppler', data=mdoppler)
grp.create_dataset('unrolled', data=unrolled)
grp.create_dataset('label', data=label)
if __name__ == '__main__':
data_dir = 'data/SoliData'
output_dir = 'data/Soli_Preprocessed'
preprocess_soli(data_dir, output_dir)