@@ -216,20 +216,22 @@ def _map_text_to_scalar(self, label_type, value):
216
216
"""
217
217
map_dic = {'occlusion' : {'none' : 0 , 'part' : 1 , 'full' : 2 },
218
218
'action' : {'standing' : 0 , 'walking' : 1 },
219
- 'nod' : {'__undefined__' : 0 , 'nodding' : 1 },
220
219
'look' : {'not-looking' : 0 , 'looking' : 1 },
221
- 'hand_gesture' : {'__undefined__' : 0 , 'greet' : 1 , 'yield' : 2 ,
222
- 'rightofway' : 3 , 'other' : 4 },
223
- 'reaction' : {'__undefined__' : 0 , 'clear_path' : 1 , 'speed_up' : 2 ,
224
- 'slow_down' : 3 },
225
- 'cross' : {'not-crossing' : 0 , 'crossing' : 1 },
220
+ 'gesture' : {'__undefined__' : 0 , 'hand_ack' : 1 , 'hand_yield' : 2 ,
221
+ 'hand_rightofway' : 3 , 'nod' : 4 , 'other' : 5 },
222
+ 'cross' : {'not-crossing' : 0 , 'crossing' : 1 , 'crossing-irrelevant' : - 1 },
226
223
'age' : {'child' : 0 , 'young' : 1 , 'adult' : 2 , 'senior' : 3 },
227
224
'designated' : {'ND' : 0 , 'D' : 1 },
228
225
'gender' : {'n/a' : 0 , 'female' : 1 , 'male' : 2 },
229
226
'intersection' : {'midblock' : 0 , 'T' : 1 , 'T-left' : 2 , 'T-right' : 3 , 'four-way' : 4 },
230
227
'motion_direction' : {'n/a' : 0 , 'LAT' : 1 , 'LONG' : 2 },
231
228
'traffic_direction' : {'OW' : 0 , 'TW' : 1 },
232
- 'signalized' : {'n/a' : 0 , 'C' : 1 , 'S' : 2 , 'CS' : 3 }}
229
+ 'signalized' : {'n/a' : 0 , 'C' : 1 , 'S' : 2 , 'CS' : 3 },
230
+ 'vehicle' : {'car' : 0 , 'truck' : 1 , 'bus' : 2 , 'train' : 3 , 'bicycle' : 4 , 'bike' : 5 },
231
+ 'sign' : {'ped_blue' : 0 , 'ped_yellow' : 1 , 'ped_white' : 2 , 'ped_text' : 3 , 'stop_sign' : 4 ,
232
+ 'bus_stop' : 5 , 'train_stop' : 6 , 'construction' : 7 , 'other' : 8 },
233
+ 'traffic_light' : {'regular' : 0 , 'transit' : 1 , 'pedestrian' : 2 },
234
+ 'state' : {'__undefined__' : 0 , 'red' : 1 , 'yellow' : 2 , 'green' : 3 }}
233
235
234
236
return map_dic [label_type ][value ]
235
237
@@ -242,21 +244,23 @@ def _map_scalar_to_text(self, label_type, value):
242
244
"""
243
245
map_dic = {'occlusion' : {0 : 'none' , 1 : 'part' , 2 : 'full' },
244
246
'action' : {0 : 'standing' , 1 : 'walking' },
245
- 'nod' : {0 : '__undefined__' , 1 : 'nodding' },
246
247
'look' : {0 : 'not-looking' , 1 : 'looking' },
247
- 'hand_gesture' : {0 : '__undefined__' , 1 : 'greet' ,
248
- 2 : 'yield' , 3 : 'rightofway' ,
249
- 4 : 'other' },
250
- 'reaction' : {0 : '__undefined__' , 1 : 'clear_path' ,
251
- 2 : 'speed_up' , 3 : 'slow_down' },
252
- 'cross' : {0 : 'not-crossing' , 1 : 'crossing' },
248
+ 'hand_gesture' : {0 : '__undefined__' , 1 : 'hand_ack' ,
249
+ 2 : 'hand_yield' , 3 : 'hand_rightofway' ,
250
+ 4 : 'nod' , 5 : 'other' },
251
+ 'cross' : {0 : 'not-crossing' , 1 : 'crossing' , - 1 : 'crossing-irrelevant' },
253
252
'age' : {0 : 'child' , 1 : 'young' , 2 : 'adult' , 3 : 'senior' },
254
253
'designated' : {0 : 'ND' , 1 : 'D' },
255
254
'gender' : {0 : 'n/a' , 1 : 'female' , 2 : 'male' },
256
255
'intersection' : {0 : 'midblock' , 1 : 'T' , 2 : 'T-left' , 3 : 'T-right' , 4 : 'four-way' },
257
256
'motion_direction' : {0 : 'n/a' , 1 : 'LAT' , 2 : 'LONG' },
258
257
'traffic_direction' : {0 : 'OW' , 1 : 'TW' },
259
- 'signalized' : {0 : 'n/a' , 1 : 'C' , 2 : 'S' , 3 : 'CS' }}
258
+ 'signalized' : {0 : 'n/a' , 1 : 'C' , 2 : 'S' , 3 : 'CS' },
259
+ 'vehicle' : {0 : 'car' , 1 : 'truck' , 2 : 'bus' , 3 : 'train' , 4 : 'bicycle' , 5 : 'bike' },
260
+ 'sign' : {0 : 'ped_blue' , 1 : 'ped_yellow' , 2 : 'ped_white' , 3 : 'ped_text' , 4 : 'stop_sign' ,
261
+ 5 : 'bus_stop' , 6 : 'train_stop' , 7 : 'construction' , 8 : 'other' },
262
+ 'traffic_light' : {0 : 'regular' , 1 : 'transit' , 2 : 'pedestrian' },
263
+ 'state' : {0 : '__undefined__' , 1 : 'red' , 2 : 'yellow' , 3 : 'green' }}
260
264
261
265
return map_dic [label_type ][value ]
262
266
@@ -272,37 +276,64 @@ def _get_annotations(self, setid, vid):
272
276
273
277
tree = ET .parse (path_to_file )
274
278
ped_annt = 'ped_annotations'
279
+ traffic_annt = 'traffic_annotations'
275
280
276
281
annotations = {}
277
282
annotations ['num_frames' ] = int (tree .find ("./meta/task/size" ).text )
278
283
annotations ['width' ] = int (tree .find ("./meta/task/original_size/width" ).text )
279
284
annotations ['height' ] = int (tree .find ("./meta/task/original_size/height" ).text )
280
285
annotations [ped_annt ] = {}
286
+ annotations [traffic_annt ] = {}
281
287
282
- ped_tracks = tree .findall (" ./track" )
288
+ tracks = tree .findall (' ./track' )
283
289
284
- for t in ped_tracks :
290
+ for t in tracks :
285
291
boxes = t .findall ('./box' )
286
- ped_id = boxes [0 ].find ('./attribute[@name=\" id\" ]' ).text
287
- annotations [ped_annt ][ped_id ] = {'frames' : [], 'bbox' : [], 'occlusion' : []}
288
- annotations ['ped_annotations' ][ped_id ]['behavior' ] = {'hand_gesture' : [],
289
- 'look' : [],
290
- 'action' : [],
291
- 'nod' : []}
292
-
293
- for b in boxes :
294
- annotations [ped_annt ][ped_id ]['bbox' ].append (
295
- [float (b .get ('xtl' )), float (b .get ('ytl' )),
296
- float (b .get ('xbr' )), float (b .get ('ybr' ))])
297
- occ = self ._map_text_to_scalar ('occlusion' ,
298
- b .find ('./attribute[@name=\" occlusion\" ]' ).text )
299
- annotations [ped_annt ][ped_id ]['occlusion' ].append (occ )
300
- annotations [ped_annt ][ped_id ]['frames' ].append (int (b .get ('frame' )))
301
- for beh in annotations ['ped_annotations' ][ped_id ]['behavior' ].keys ():
302
- annotations [ped_annt ][ped_id ]['behavior' ][beh ].append (
303
- self ._map_text_to_scalar (beh ,
304
- b .find ('./attribute[@name=\" ' + beh + '\" ]' ).text ))
292
+ obj_label = t .get ('label' )
293
+ obj_id = boxes [0 ].find ('./attribute[@name=\" id\" ]' ).text
294
+
295
+ if obj_label == 'pedestrian' :
296
+ annotations [ped_annt ][obj_id ] = {'frames' : [], 'bbox' : [], 'occlusion' : []}
297
+ annotations [ped_annt ][obj_id ]['behavior' ] = {'gesture' : [], 'look' : [], 'action' : [], 'cross' : []}
298
+ for b in boxes :
299
+ # Exclude the annotations that are outside of the frame
300
+ if int (b .get ('outside' )) == 1 :
301
+ continue
302
+ annotations [ped_annt ][obj_id ]['bbox' ].append (
303
+ [float (b .get ('xtl' )), float (b .get ('ytl' )),
304
+ float (b .get ('xbr' )), float (b .get ('ybr' ))])
305
+ occ = self ._map_text_to_scalar ('occlusion' , b .find ('./attribute[@name=\" occlusion\" ]' ).text )
306
+ annotations [ped_annt ][obj_id ]['occlusion' ].append (occ )
307
+ annotations [ped_annt ][obj_id ]['frames' ].append (int (b .get ('frame' )))
308
+ for beh in annotations ['ped_annotations' ][obj_id ]['behavior' ]:
309
+ # Read behavior tags for each frame and add to the database
310
+ annotations [ped_annt ][obj_id ]['behavior' ][beh ].append (
311
+ self ._map_text_to_scalar (beh , b .find ('./attribute[@name=\" ' + beh + '\" ]' ).text ))
305
312
313
+ else :
314
+ obj_type = boxes [0 ].find ('./attribute[@name=\" type\" ]' )
315
+ if obj_type is not None :
316
+ obj_type = self ._map_text_to_scalar (obj_label ,
317
+ boxes [0 ].find ('./attribute[@name=\" type\" ]' ).text )
318
+
319
+ annotations [traffic_annt ][obj_id ] = {'frames' : [], 'bbox' : [], 'occlusion' : [],
320
+ 'obj_class' : obj_label ,
321
+ 'obj_type' : obj_type ,
322
+ 'state' : []}
323
+
324
+ for b in boxes :
325
+ # Exclude the annotations that are outside of the frame
326
+ if int (b .get ('outside' )) == 1 :
327
+ continue
328
+ annotations [traffic_annt ][obj_id ]['bbox' ].append (
329
+ [float (b .get ('xtl' )), float (b .get ('ytl' )),
330
+ float (b .get ('xbr' )), float (b .get ('ybr' ))])
331
+ annotations [traffic_annt ][obj_id ]['occlusion' ].append (int (b .get ('occluded' )))
332
+ annotations [traffic_annt ][obj_id ]['frames' ].append (int (b .get ('frame' )))
333
+ annotations [traffic_annt ][obj_id ]['frames' ].append (int (b .get ('frame' )))
334
+ if obj_label == 'traffic_light' :
335
+ annotations [traffic_annt ][obj_id ]['frames' ].append (self ._map_text_to_scalar ('state' ,
336
+ b .find ('./attribute[@name=\" state\" ]' ).text ))
306
337
return annotations
307
338
308
339
def _get_ped_attributes (self , setid , vid ):
@@ -361,16 +392,23 @@ def generate_database(self):
361
392
'num_frames': int
362
393
'width': int
363
394
'height': int
395
+ 'traffic_annotations'(str): {
396
+ 'obj_id'(str): {
397
+ 'frames': list(int)
398
+ 'occlusion': list(int)
399
+ 'bbox': list([x1, y1, x2, y2]) (float)
400
+ 'obj_class': str,
401
+ 'obj_type': str, # only for traffic lights, vehicles, signs
402
+ 'state': list(int) # only for traffic lights
364
403
'ped_annotations'(str): {
365
404
'ped_id'(str): {
366
405
'frames': list(int)
367
406
'occlusion': list(int)
368
407
'bbox': list([x1, y1, x2, y2]) (float)
369
408
'behavior'(str): {
370
409
'action': list(int)
371
- 'reaction': list(int)
372
- 'nod': list(int)
373
- 'hand_gesture': list(int)
410
+ 'gesture': list(int)
411
+ 'cross': list(int)
374
412
'look': list(int)
375
413
'attributes'(str): {
376
414
'age': int
@@ -422,7 +460,6 @@ def generate_database(self):
422
460
video_ids = [v .split ('_annt.xml' )[0 ] for v in sorted (listdir (join (self ._annotation_path ,
423
461
setid ))) if v .endswith ("annt.xml" )]
424
462
database [setid ] = {}
425
-
426
463
for vid in video_ids :
427
464
print ('Getting annotations for %s, %s' % (setid , vid ))
428
465
database [setid ][vid ] = self ._get_annotations (setid , vid )
@@ -856,34 +893,9 @@ def _get_crossing(self, image_set, annotations, **params):
856
893
num_pedestrians += 1
857
894
858
895
frame_ids = pid_annots [pid ]['frames' ]
859
- # avoid bounding boxes with width 1 or less
860
- correction_offset = 3
861
-
862
- event_frame = pid_annots [pid ]['attributes' ]['crossing_point' ] - correction_offset
863
-
864
- # # if pid_annots[pid]['attributes']['crossing_point'] < pid_annots[pid]['attributes']['critical_point']:
865
- # if pid_annots[pid]['attributes']['crossing'] < 1 and abs(event_frame - frame_ids[-1]) > 20:
866
- #
867
- # # and \
868
- # # pid_annots[pid]['attributes']['crossing'] < 1:
869
- # print(pid)
870
- # # count += 1
871
- # # print(event_frame)
872
- # # print(frame_ids[-1])
873
- # # print(abs(event_frame - frame_ids[-1]))
874
- # elif pid_annots[pid]['attributes']['crossing'] < 1 and \
875
- # abs(event_frame - frame_ids[-1]) > 20 :
876
- # continue
877
- # # print(pid)
878
- try :
879
- end_idx = frame_ids .index (event_frame )
880
- except :
881
- continue
882
- # print(pid)
883
- # print(event_frame)
884
- # print(frame_ids[0])
885
- # print(frame_ids[-1])
896
+ event_frame = pid_annots [pid ]['attributes' ]['crossing_point' ]
886
897
898
+ end_idx = frame_ids .index (event_frame )
887
899
boxes = pid_annots [pid ]['bbox' ][:end_idx + 1 ]
888
900
frame_ids = frame_ids [: end_idx + 1 ]
889
901
images = [self ._get_image_path (sid , vid , f ) for f in frame_ids ]
0 commit comments