-
Notifications
You must be signed in to change notification settings - Fork 0
/
DataProcessor.py
49 lines (39 loc) · 1.64 KB
/
DataProcessor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
import xmltodict
import numpy as np
import glob
import os
image_dim = 228
images_dir = 'wildfire_training_images'
output_dir = 'wildfire_processed_data'
xml_filepaths = glob.glob( os.path.join( images_dir , '*.xml' ) )
jpg_filepaths = glob.glob( os.path.join( images_dir , '*.jpg' ) )
images = []
for filepath in jpg_filepaths:
image = Image.open( filepath ).resize( ( image_dim , image_dim ) )
images.append( np.asarray( image ) / 255 )
bboxes = []
classes = []
for filepath in xml_filepaths:
bbox_dict = xmltodict.parse( open( filepath , 'rb' ) )
classes.append( bbox_dict[ 'annotation' ][ 'object' ][ 'name' ] )
bndbox = bbox_dict[ 'annotation' ][ 'object' ][ 'bndbox' ]
bounding_box = [ 0.0 ] * 4
bounding_box[0] = int(bndbox[ 'xmin' ]) / image_dim
bounding_box[1] = int(bndbox[ 'ymin' ]) / image_dim
bounding_box[2] = int(bndbox[ 'xmax' ]) / image_dim
bounding_box[3] = int(bndbox[ 'ymax' ]) / image_dim
bboxes.append( bounding_box )
bboxes = np.array( bboxes )
classes = np.array( classes )
encoder = LabelBinarizer()
classes_onehot = encoder.fit_transform( classes )
X = images
Y = np.concatenate( [ bboxes , classes_onehot ] , axis=1 )
train_features , test_features ,train_labels, test_labels = train_test_split( X , Y , test_size=0.5 ) # was previously 0.4
np.save( os.path.join( output_dir , 'x.npy' ) , train_features )
np.save( os.path.join( output_dir , 'y.npy' ) , train_labels )
np.save( os.path.join( output_dir , 'test_x.npy' ) , test_features )
np.save( os.path.join( output_dir , 'test_y.npy' ) , test_labels )