-
Notifications
You must be signed in to change notification settings - Fork 0
/
training.py
130 lines (116 loc) · 5.36 KB
/
training.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import os
import cv2
from detectron2.engine import DefaultTrainer, DefaultPredictor
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader, DatasetMapper
from detectron2.data.datasets import coco
from detectron2.engine.hooks import HookBase
from detectron2.utils.logger import log_every_n_seconds
from detectron2.utils.visualizer import Visualizer
import detectron2.utils.comm as comm
import torch
import time
import datetime
import logging
import numpy as np
from data import load_data
class LossEvalHook(HookBase):
def __init__(self, eval_period, model, data_loader):
self._model = model
self._period = eval_period
self._data_loader = data_loader
def _do_loss_eval(self):
# Copying inference_on_dataset from evaluator.py
total = len(self._data_loader)
num_warmup = min(5, total - 1)
start_time = time.perf_counter()
total_compute_time = 0
losses = []
for idx, inputs in enumerate(self._data_loader):
if idx == num_warmup:
start_time = time.perf_counter()
total_compute_time = 0
start_compute_time = time.perf_counter()
if torch.cuda.is_available():
torch.cuda.synchronize()
total_compute_time += time.perf_counter() - start_compute_time
iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup)
seconds_per_img = total_compute_time / iters_after_start
if idx >= num_warmup * 2 or seconds_per_img > 5:
total_seconds_per_img = (time.perf_counter() - start_time) / iters_after_start
eta = datetime.timedelta(seconds=int(total_seconds_per_img * (total - idx - 1)))
log_every_n_seconds(
logging.INFO,
"Loss on Validation done {}/{}. {:.4f} s / img. ETA={}".format(
idx + 1, total, seconds_per_img, str(eta)
),
n=5,
)
loss_batch = self._get_loss(inputs)
losses.append(loss_batch)
mean_loss = np.mean(losses)
self.trainer.storage.put_scalar('validation_loss', mean_loss)
comm.synchronize()
return losses
def _get_loss(self, data):
# How loss is calculated on train_loop
metrics_dict = self._model(data)
metrics_dict = {
k: v.detach().cpu().item() if isinstance(v, torch.Tensor) else float(v)
for k, v in metrics_dict.items()
}
total_losses_reduced = sum(loss for loss in metrics_dict.values())
return total_losses_reduced
def after_step(self):
next_iter = self.trainer.iter + 1
is_final = next_iter == self.trainer.max_iter
if is_final or (self._period > 0 and next_iter % self._period == 0):
self._do_loss_eval()
self.trainer.storage.put_scalars(timetest=12)
class MyTrainer(DefaultTrainer):
@classmethod
def build_evaluator(cls, cfg, dataset_name, output_folder=None):
if output_folder is None:
output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
return COCOEvaluator(dataset_name, cfg, True, output_folder)
def build_hooks(self):
hooks = super().build_hooks()
hooks.insert(-1,LossEvalHook(
self.cfg.TEST.EVAL_PERIOD,
self.model,
build_detection_test_loader(
self.cfg,
self.cfg.DATASETS.TEST[0],
DatasetMapper(self.cfg,True)
)
))
return hooks
def evaluate(self):
self.cfg.MODEL.WEIGHTS = os.path.join(self.cfg.OUTPUT_DIR, "model_final.pth")
self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 # set the testing threshold for this model
# Evaluate AP and svae results
evaluator = COCOEvaluator("my_dataset_test", self.cfg, False, output_dir=self.cfg.OUTPUT_DIR)
data_loader = build_detection_test_loader(self.cfg, "my_dataset_test")
inference_on_dataset(self.model, data_loader, evaluator)
def predict(self, data_dir, img_dir, test_metadata, output_img_dir):
predictor = DefaultPredictor(self.cfg)
dataset_dicts = coco.load_coco_json(data_dir + "test_dataset.json", img_dir + "test/", "my_dataset_test")
for d in dataset_dicts:
im = cv2.imread(d["file_name"])
outputs = predictor(im)
v = Visualizer(im[:, :, ::-1], metadata=test_metadata)
v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
cv2.imwrite(output_img_dir + d["file_name"].split('/')[-1],v.get_image()[:, :, ::-1])
print("saving test image: {}".format(d["file_name"]))
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
def train(cfg, data_dir, img_dir, multiple, output_img_dir, load_ox=False, vis=False):
mode = 'train'
_, _, test_metadata = load_data(mode, data_dir, img_dir, multiple)
trainer = MyTrainer(cfg)
trainer.resume_or_load(resume=load_ox)
print('--------------------------count_parameters : ',count_parameters(trainer.model))
trainer.train()
trainer.evaluate()
if vis:
trainer.predict(data_dir, img_dir, test_metadata, output_img_dir)