-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathdata_loader.py
executable file
·115 lines (96 loc) · 4.21 KB
/
data_loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import os
import torch
from torchvision import transforms
from lib.config import cfg
from datasets.coco_dataset import CocoDataset
import samplers.distributed
import numpy as np
def sample_collate(batch):
indices, input_seq, target_seq, gv_feat, att_feats = zip(*batch)
indices = np.stack(indices, axis=0).reshape(-1)
input_seq = torch.cat([torch.from_numpy(b) for b in input_seq], 0)
target_seq = torch.cat([torch.from_numpy(b) for b in target_seq], 0)
gv_feat = torch.cat([torch.from_numpy(b) for b in gv_feat], 0)
"""
# 读取图像的预训练特征时,大小为[L, D],其中L的长度可能不一(如目标特征)
# 因此需要进行特征数量判断,并生成特征掩码 att_mask
atts_num = [x.shape[0] for x in att_feats]
max_att_num = np.max(atts_num)
feat_arr = []
mask_arr = []
for i, num in enumerate(atts_num):
tmp_feat = np.zeros((1, max_att_num, att_feats[i].shape[1]), dtype=np.float32)
tmp_feat[:, 0:att_feats[i].shape[0], :] = att_feats[i]
feat_arr.append(torch.from_numpy(tmp_feat))
tmp_mask = np.zeros((1, max_att_num), dtype=np.float32)
tmp_mask[:, 0:num] = 1
mask_arr.append(torch.from_numpy(tmp_mask))
att_feats = torch.cat(feat_arr, 0)
att_mask = torch.cat(mask_arr, 0)
"""
# 图像特征,无需与预训练特征一样进行特征数量判断,直接合并即可
# att_mask为最终grid特征大小,实际上grid特征无需att_mask亦可
att_feats = torch.stack(att_feats, 0) # [B, 3, 384, 384]
att_mask = torch.ones(att_feats.size()[0], 12*12)
return indices, input_seq, target_seq, gv_feat, att_feats, att_mask
def sample_collate_val(batch):
indices, gv_feat, att_feats = zip(*batch)
indices = np.stack(indices, axis=0).reshape(-1)
gv_feat = torch.cat([torch.from_numpy(b) for b in gv_feat], 0)
"""
# 读取图像的预训练特征时,大小为[L, D],其中L的长度可能不一(如目标特征)
# 因此需要进行特征数量判断,并生成特征掩码 att_mask
atts_num = [x.shape[0] for x in att_feats]
max_att_num = np.max(atts_num)
feat_arr = []
mask_arr = []
for i, num in enumerate(atts_num):
tmp_feat = np.zeros((1, max_att_num, att_feats[i].shape[1]), dtype=np.float32)
tmp_feat[:, 0:att_feats[i].shape[0], :] = att_feats[i]
feat_arr.append(torch.from_numpy(tmp_feat))
tmp_mask = np.zeros((1, max_att_num), dtype=np.float32)
tmp_mask[:, 0:num] = 1
mask_arr.append(torch.from_numpy(tmp_mask))
att_feats = torch.cat(feat_arr, 0)
att_mask = torch.cat(mask_arr, 0)
"""
# 图像特征,无需与预训练特征一样进行特征数量判断,直接合并即可
# att_mask为最终grid特征大小,实际上grid特征无需att_mask亦可
att_feats = torch.stack(att_feats, 0) # [B, 3, 384, 384]
att_mask = torch.ones(att_feats.size()[0], 12*12)
return indices, gv_feat, att_feats, att_mask
def load_train(distributed, epoch, coco_set):
sampler = samplers.distributed.DistributedSampler(coco_set, epoch=epoch) \
if distributed else None
shuffle = cfg.DATA_LOADER.SHUFFLE if sampler is None else False
loader = torch.utils.data.DataLoader(
coco_set,
batch_size = cfg.TRAIN.BATCH_SIZE,
shuffle = shuffle,
num_workers = cfg.DATA_LOADER.NUM_WORKERS,
drop_last = cfg.DATA_LOADER.DROP_LAST,
pin_memory = cfg.DATA_LOADER.PIN_MEMORY,
sampler = sampler,
collate_fn = sample_collate
)
return loader
def load_val(image_ids_path, gv_feat_path, att_feats_folder):
coco_set = CocoDataset(
image_ids_path = image_ids_path,
input_seq = None,
target_seq = None,
gv_feat_path = gv_feat_path,
att_feats_folder = att_feats_folder,
seq_per_img = 1,
max_feat_num = cfg.DATA_LOADER.MAX_FEAT
)
loader = torch.utils.data.DataLoader(
coco_set,
batch_size = cfg.TEST.BATCH_SIZE,
shuffle = False,
num_workers = cfg.DATA_LOADER.NUM_WORKERS,
drop_last = False,
pin_memory = cfg.DATA_LOADER.PIN_MEMORY,
collate_fn = sample_collate_val
)
return loader