diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..74a733e --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,23 @@ +name: lint + +on: [push, pull_request] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.8 + uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: Install pre-commit hook + run: | + pip install pre-commit + pre-commit install + - name: Linting + run: pre-commit run --all-files diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..d9a0479 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,51 @@ +exclude: ^(tests/data/|requirements.txt) +repos: + - repo: https://gitee.com/openmmlab/mirrors-flake8 + rev: 5.0.4 + hooks: + - id: flake8 + args: ["--max-line-length=119"] + - repo: https://gitee.com/openmmlab/mirrors-isort + rev: 5.11.5 + hooks: + - id: isort + - repo: https://gitee.com/openmmlab/mirrors-yapf + rev: v0.32.0 + hooks: + - id: yapf + - repo: https://gitee.com/openmmlab/mirrors-pre-commit-hooks + rev: v4.3.0 + hooks: + - id: trailing-whitespace + - id: check-yaml + - id: end-of-file-fixer + - id: requirements-txt-fixer + - id: double-quote-string-fixer + - id: check-merge-conflict + - id: fix-encoding-pragma + args: ["--remove"] + - id: mixed-line-ending + args: ["--fix=lf"] + - repo: https://gitee.com/openmmlab/mirrors-codespell + rev: v2.2.1 + hooks: + - id: codespell + - repo: https://gitee.com/openmmlab/mirrors-mdformat + rev: 0.7.9 + hooks: + - id: mdformat + args: ["--number"] + additional_dependencies: + - mdformat-openmmlab + - mdformat_frontmatter + - linkify-it-py + - repo: https://gitee.com/openmmlab/mirrors-docformatter + rev: v1.3.1 + hooks: + - id: docformatter + args: ["--in-place", "--wrap-descriptions", "119"] + - repo: https://gitee.com/openmmlab/mirrors-pyupgrade + rev: v3.0.0 + hooks: + - id: pyupgrade + args: ["--py36-plus"] diff --git a/README.md b/README.md index dc6af16..c2fb536 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# DynamicDet [[arXiv]](https://arxiv.org/abs/2304.05552) +# DynamicDet [\[arXiv\]](https://arxiv.org/abs/2304.05552) This repo contains the official implementation of [**"DynamicDet: A Unified Dynamic Architecture for Object Detection"**](https://arxiv.org/abs/2304.05552). @@ -12,23 +12,22 @@ This repo contains the official implementation of [**"DynamicDet: A Unified Dyna MS COCO -| Model | Easy / Hard | Size | FLOPs | FPS | APval | APtest | -| :----------------------------------------------------------- | :---------: | :--: | :----: | :--: | :--------------: | :---------------: | -| [**Dy-YOLOv7**](https://github.com/VDIGPKU/DynamicDet/releases/download/v0.1/dy-yolov7.pt) | 90% / 10% | 640 | 112.4G | 110 | 51.4% | 52.1% | -| | 50% / 50% | 640 | 143.2G | 96 | 52.7% | 53.3% | -| | 10% / 90% | 640 | 174.0G | 85 | 53.3% | 53.8% | -| | 0% / 100% | 640 | 181.7G | 83 | 53.5% | 53.9% | -| | | | | | | | -| [**Dy-YOLOv7-X**](https://github.com/VDIGPKU/DynamicDet/releases/download/v0.1/dy-yolov7x.pt) | 90% / 10% | 640 | 201.7G | 98 | 53.0% | 53.3% | -| | 50% / 50% | 640 | 248.9G | 78 | 54.2% | 54.4% | -| | 10% / 90% | 640 | 296.1G | 65 | 54.7% | 55.0% | -| | 0% / 100% | 640 | 307.9G | 64 | 54.8% | 55.0% | -| | | | | | | | -| [**Dy-YOLOv7-W6**](https://github.com/VDIGPKU/DynamicDet/releases/download/v0.1/dy-yolov7-w6.pt) | 90% / 10% | 1280 | 384.2G | 74 | 54.9% | 55.2% | -| | 50% / 50% | 1280 | 480.8G | 58 | 55.9% | 56.1% | -| | 10% / 90% | 1280 | 577.4G | 48 | 56.4% | 56.7% | -| | 0% / 100% | 1280 | 601.6G | 46 | 56.5% | 56.8% | - +| Model | Easy / Hard | Size | FLOPs | FPS | APval | APtest | +| :----------------------------------------------------------------------------------------------- | :---------: | :--: | :----: | :-: | :--------------: | :---------------: | +| [**Dy-YOLOv7**](https://github.com/VDIGPKU/DynamicDet/releases/download/v0.1/dy-yolov7.pt) | 90% / 10% | 640 | 112.4G | 110 | 51.4% | 52.1% | +| | 50% / 50% | 640 | 143.2G | 96 | 52.7% | 53.3% | +| | 10% / 90% | 640 | 174.0G | 85 | 53.3% | 53.8% | +| | 0% / 100% | 640 | 181.7G | 83 | 53.5% | 53.9% | +| | | | | | | | +| [**Dy-YOLOv7-X**](https://github.com/VDIGPKU/DynamicDet/releases/download/v0.1/dy-yolov7x.pt) | 90% / 10% | 640 | 201.7G | 98 | 53.0% | 53.3% | +| | 50% / 50% | 640 | 248.9G | 78 | 54.2% | 54.4% | +| | 10% / 90% | 640 | 296.1G | 65 | 54.7% | 55.0% | +| | 0% / 100% | 640 | 307.9G | 64 | 54.8% | 55.0% | +| | | | | | | | +| [**Dy-YOLOv7-W6**](https://github.com/VDIGPKU/DynamicDet/releases/download/v0.1/dy-yolov7-w6.pt) | 90% / 10% | 1280 | 384.2G | 74 | 54.9% | 55.2% | +| | 50% / 50% | 1280 | 480.8G | 58 | 55.9% | 56.1% | +| | 10% / 90% | 1280 | 577.4G | 48 | 56.4% | 56.7% | +| | 0% / 100% | 1280 | 601.6G | 46 | 56.5% | 56.8% |
Table Notes @@ -39,7 +38,6 @@ MS COCO
- ## Quick Start ### Installation @@ -50,7 +48,6 @@ conda install pytorch=1.11 cudatoolkit=11.3 torchvision -c pytorch pip install -r requirements.txt ``` - ### Data preparation Download MS COCO dataset images ([train](http://images.cocodataset.org/zips/train2017.zip), [val](http://images.cocodataset.org/zips/val2017.zip), [test](http://images.cocodataset.org/zips/test2017.zip)) and [labels](https://github.com/VDIGPKU/DynamicDet/releases/download/v0.1/coco2017labels-segments.zip). @@ -95,21 +92,21 @@ python train_step2.py --workers 4 --device 0 --batch-size 1 --epochs 2 --img 640 ### Getting the dynamic thresholds for variable-speed inference - ```bash +```bash python get_dynamic_thres.py --device 0 --batch-size 1 --img-size 640 --cfg cfg/dy-yolov7-step2.yaml --weight weights/dy-yolov7.pt --data data/coco.yaml --task val - ``` +``` ### Testing - ```bash +```bash python test.py --img-size 640 --batch-size 1 --conf 0.001 --iou 0.65 --device 0 --cfg cfg/dy-yolov7-step2.yaml --weight weights/dy-yolov7.pt --data data/coco.yaml --dy-thres - ``` +``` ### Inference - ```bash +```bash python detect.py --cfg cfg/dy-yolov7-step2.yaml --weight weights/dy-yolov7.pt --num-classes 80 --source --device 0 --dy-thres - ``` +``` ## Citation diff --git a/detect.py b/detect.py index 49eba4b..49ec7e1 100644 --- a/detect.py +++ b/detect.py @@ -1,34 +1,40 @@ import argparse -import time import logging +import time from pathlib import Path import cv2 import torch -import torch.nn as nn import torch.backends.cudnn as cudnn +import torch.nn as nn from numpy import random from models.yolo import Model -from utils.datasets import LoadStreams, LoadImages -from utils.general import check_img_size, check_imshow, non_max_suppression, \ - scale_coords, xyxy2xywh, strip_optimizer, set_logging, increment_path +from utils.datasets import LoadImages, LoadStreams +from utils.general import (check_img_size, check_imshow, increment_path, + non_max_suppression, scale_coords, set_logging, + xyxy2xywh) from utils.plots import plot_one_box -from utils.torch_utils import select_device, time_synchronized, intersect_dicts - +from utils.torch_utils import intersect_dicts, select_device, time_synchronized logger = logging.getLogger(__name__) + def detect(save_img=False): source, cfg, weight, view_img, save_txt, nc, imgsz = opt.source, opt.cfg, opt.weight, \ opt.view_img, opt.save_txt, opt.num_classes, opt.img_size - save_img = not opt.nosave and not source.endswith('.txt') # save inference images - webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith( - ('rtsp://', 'rtmp://', 'http://', 'https://')) + save_img = not opt.nosave and not source.endswith( + '.txt') # save inference images + webcam = source.isnumeric() or source.endswith( + '.txt') or source.lower().startswith( + ('rtsp://', 'rtmp://', 'http://', 'https://')) # Directories - save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run - (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir + save_dir = Path( + increment_path(Path(opt.project) / opt.name, + exist_ok=opt.exist_ok)) # increment run + (save_dir / 'labels' if save_txt else save_dir).mkdir( + parents=True, exist_ok=True) # make dir # Initialize set_logging() @@ -41,7 +47,8 @@ def detect(save_img=False): state_dict = intersect_dicts(state_dict, model.state_dict()) # intersect model.load_state_dict(state_dict, strict=False) # load model.to(device) - logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weight)) # report + logger.info('Transferred %g/%g items from %s' % + (len(state_dict), len(model.state_dict()), weight)) # report for p in model.parameters(): p.requires_grad = False model.float().fuse().eval() @@ -77,7 +84,9 @@ def detect(save_img=False): # Run inference if device.type != 'cpu': - model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once + model( + torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( + next(model.parameters()))) # run once old_img_w = old_img_h = imgsz old_img_b = 1 @@ -89,8 +98,10 @@ def detect(save_img=False): if img.ndimension() == 3: img = img.unsqueeze(0) - # Warmup - if device.type != 'cpu' and (old_img_b != img.shape[0] or old_img_h != img.shape[2] or old_img_w != img.shape[3]): + # warm up + if device.type != 'cpu' and (old_img_b != img.shape[0] + or old_img_h != img.shape[2] + or old_img_w != img.shape[3]): old_img_b = img.shape[0] old_img_h = img.shape[2] old_img_w = img.shape[3] @@ -99,28 +110,37 @@ def detect(save_img=False): # Inference t1 = time_synchronized() - with torch.no_grad(): # Calculating gradients would cause a GPU memory leak + with torch.no_grad( + ): # Calculating gradients would cause a GPU memory leak pred = model(img, augment=opt.augment)[0] t2 = time_synchronized() # Apply NMS - pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) + pred = non_max_suppression(pred, + opt.conf_thres, + opt.iou_thres, + classes=opt.classes, + agnostic=opt.agnostic_nms) t3 = time_synchronized() # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 - p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count + p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( + ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg - txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt - gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh + txt_path = str(save_dir / 'labels' / p.stem) + ( + '' if dataset.mode == 'image' else f'_{frame}') # img.txt + gn = torch.tensor(im0.shape)[[1, 0, 1, + 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size - det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() + det[:, :4] = scale_coords(img.shape[2:], det[:, :4], + im0.shape).round() # Print results for c in det[:, -1].unique(): @@ -130,17 +150,25 @@ def detect(save_img=False): # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file - xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh - line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format + xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / + gn).view(-1).tolist() # normalized xywh + line = (cls, *xywh, conf) if opt.save_conf else ( + cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = f'{names[int(cls)]} {conf:.2f}' - plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=1) + plot_one_box(xyxy, + im0, + label=label, + color=colors[int(cls)], + line_thickness=1) # Print time (inference + NMS) - print(f'{s}Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference, ({(1E3 * (t3 - t2)):.1f}ms) NMS') + print( + f'{s}Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference, ({(1E3 * (t3 - t2)):.1f}ms) NMS' + ) # Stream results if view_img: @@ -151,12 +179,14 @@ def detect(save_img=False): if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) - print(f" The image with the result is saved in: {save_path}") + print( + f' The image with the result is saved in: {save_path}') else: # 'video' or 'stream' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): - vid_writer.release() # release previous video writer + vid_writer.release( + ) # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) @@ -164,12 +194,14 @@ def detect(save_img=False): else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path += '.mp4' - vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) + vid_writer = cv2.VideoWriter( + save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, + (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' - #print(f"Results saved to {save_dir}{s}") + # print(f"Results saved to {save_dir}{s}") print(f'Done. ({time.time() - t0:.3f}s)') @@ -177,24 +209,68 @@ def detect(save_img=False): if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--cfg', type=str, default='', help='model.yaml path') - parser.add_argument('--weight', type=str, default='', help='model.pt path(s)') - parser.add_argument('--num-classes', type=int, default=80, help='number of classes') - parser.add_argument('--source', type=str, default='inference/images', help='source') # file/folder, 0 for webcam - parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)') - parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold') - parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS') - parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') - parser.add_argument('--view-img', action='store_true', help='display results') - parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') - parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') - parser.add_argument('--nosave', action='store_true', help='do not save images/videos') - parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3') - parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') - parser.add_argument('--augment', action='store_true', help='augmented inference') - parser.add_argument('--project', default='runs/detect', help='save results to project/name') - parser.add_argument('--name', default='exp', help='save results to project/name') - parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') - parser.add_argument('--dy-thres', type=float, default=0.5, help='dynamic thres') + parser.add_argument('--weight', + type=str, + default='', + help='model.pt path(s)') + parser.add_argument('--num-classes', + type=int, + default=80, + help='number of classes') + parser.add_argument('--source', + type=str, + default='inference/images', + help='source') # file/folder, 0 for webcam + parser.add_argument('--img-size', + type=int, + default=640, + help='inference size (pixels)') + parser.add_argument('--conf-thres', + type=float, + default=0.25, + help='object confidence threshold') + parser.add_argument('--iou-thres', + type=float, + default=0.45, + help='IOU threshold for NMS') + parser.add_argument('--device', + default='', + help='cuda device, i.e. 0 or 0,1,2,3 or cpu') + parser.add_argument('--view-img', + action='store_true', + help='display results') + parser.add_argument('--save-txt', + action='store_true', + help='save results to *.txt') + parser.add_argument('--save-conf', + action='store_true', + help='save confidences in --save-txt labels') + parser.add_argument('--nosave', + action='store_true', + help='do not save images/videos') + parser.add_argument('--classes', + nargs='+', + type=int, + help='filter by class: --class 0, or --class 0 2 3') + parser.add_argument('--agnostic-nms', + action='store_true', + help='class-agnostic NMS') + parser.add_argument('--augment', + action='store_true', + help='augmented inference') + parser.add_argument('--project', + default='runs/detect', + help='save results to project/name') + parser.add_argument('--name', + default='exp', + help='save results to project/name') + parser.add_argument('--exist-ok', + action='store_true', + help='existing project/name ok, do not increment') + parser.add_argument('--dy-thres', + type=float, + default=0.5, + help='dynamic thres') opt = parser.parse_args() print(opt) diff --git a/get_dynamic_thres.py b/get_dynamic_thres.py index b8f445b..29e4a79 100644 --- a/get_dynamic_thres.py +++ b/get_dynamic_thres.py @@ -1,23 +1,21 @@ import argparse -import os import logging -from pathlib import Path -from threading import Thread -import yaml -from tqdm import tqdm import numpy as np import torch import torch.nn as nn +import yaml +from tqdm import tqdm from models.yolo import Model from utils.datasets import create_dataloader -from utils.general import check_dataset, check_file, check_img_size, set_logging, colorstr +from utils.general import (check_dataset, check_file, check_img_size, colorstr, + set_logging) from utils.torch_utils import select_device - logger = logging.getLogger(__name__) + def get_thres(data, cfg=None, weight=None, @@ -28,20 +26,22 @@ def get_thres(data, set_logging() device = select_device(opt.device, batch_size=batch_size) if isinstance(data, str): - is_coco = data.endswith('coco.yaml') + # is_coco = data.endswith('coco.yaml') with open(data) as f: data = yaml.load(f, Loader=yaml.SafeLoader) check_dataset(data) # check nc = int(data['nc']) # number of classes - iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for mAP@0.5:0.95 - niou = iouv.numel() + # iouv = torch.linspace(0.5, 0.95, + # 10).to(device) # iou vector for mAP@0.5:0.95 + # niou = iouv.numel() # Load model model = Model(cfg, ch=3, nc=nc) # create state_dict = torch.load(weight, map_location='cpu')['model'] model.load_state_dict(state_dict, strict=True) # load model.to(device) - logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weight)) # report + logger.info('Transferred %g/%g items from %s' % + (len(state_dict), len(model.state_dict()), weight)) # report for p in model.parameters(): p.requires_grad = False model.float().fuse().eval() @@ -63,10 +63,19 @@ def get_thres(data, # Dataloader if device.type != 'cpu': - model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once - task = opt.task if opt.task in ('train', 'val', 'test') else 'val' # path to train/val/test images - dataloader = create_dataloader(data[task], imgsz, batch_size, gs, opt, pad=0.5, rect=True, - prefix=colorstr(f'{task}: '))[0] + model( + torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( + next(model.parameters()))) # run once + task = opt.task if opt.task in ( + 'train', 'val', 'test') else 'val' # path to train/val/test images + dataloader = create_dataloader(data[task], + imgsz, + batch_size, + gs, + opt, + pad=0.5, + rect=True, + prefix=colorstr(f'{task}: '))[0] score_list = [] for batch_i, (img, _, _, _) in enumerate(tqdm(dataloader)): @@ -75,7 +84,8 @@ def get_thres(data, img /= 255.0 # 0 - 255 to 0.0 - 1.0 with torch.no_grad(): # Run model - cur_score = model(img, augment=augment) # inference and training outputs + cur_score = model( + img, augment=augment) # inference and training outputs score_list.append(cur_score.item()) thres = ['0'] @@ -88,28 +98,50 @@ def get_thres(data, if __name__ == '__main__': parser = argparse.ArgumentParser(prog='test.py') parser.add_argument('--cfg', type=str, default='', help='model.yaml path') - parser.add_argument('--weight', type=str, default='', help='model.pt path(s)') - parser.add_argument('--data', type=str, default='data/coco.yaml', help='*.data path') - parser.add_argument('--batch-size', type=int, default=1, help='size of each image batch') - parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)') + parser.add_argument('--weight', + type=str, + default='', + help='model.pt path(s)') + parser.add_argument('--data', + type=str, + default='data/coco.yaml', + help='*.data path') + parser.add_argument('--batch-size', + type=int, + default=1, + help='size of each image batch') + parser.add_argument('--img-size', + type=int, + default=640, + help='inference size (pixels)') parser.add_argument('--task', default='val', help='train, val, test') - parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') - parser.add_argument('--augment', action='store_true', help='augmented inference') - parser.add_argument('--project', default='runs/test', help='save to project/name') + parser.add_argument('--device', + default='', + help='cuda device, i.e. 0 or 0,1,2,3 or cpu') + parser.add_argument('--augment', + action='store_true', + help='augmented inference') + parser.add_argument('--project', + default='runs/test', + help='save to project/name') parser.add_argument('--name', default='exp', help='save to project/name') - parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') + parser.add_argument('--exist-ok', + action='store_true', + help='existing project/name ok, do not increment') opt = parser.parse_args() opt.single_cls = False opt.data = check_file(opt.data) # check file print(opt) if opt.task in ('train', 'val', 'test'): # run normally - thres = get_thres(opt.data, opt.cfg, opt.weight, opt.batch_size, opt.img_size, opt.augment) + thres = get_thres(opt.data, opt.cfg, opt.weight, opt.batch_size, + opt.img_size, opt.augment) print() print('***************************************************') print(' '.join(thres)) for idx, thr in enumerate(thres): - print('First: {}%\tSecond: {}%\tThreshold: {}'.format(100 - idx * 10, idx * 10, thr)) + print('First: {}%\tSecond: {}%\tThreshold: {}'.format( + 100 - idx * 10, idx * 10, thr)) print('***************************************************') else: raise NotImplementedError diff --git a/hyp/hyp.finetune.dynamic.adam.yaml b/hyp/hyp.finetune.dynamic.adam.yaml index 56c175a..d57c51d 100644 --- a/hyp/hyp.finetune.dynamic.adam.yaml +++ b/hyp/hyp.finetune.dynamic.adam.yaml @@ -2,9 +2,9 @@ lr0: 0.00001 # initial learning rate lrf: 1 # final OneCycleLR learning rate (lr0 * lrf) momentum: 0.937 # SGD momentum/Adam beta1 weight_decay: 0.005 # optimizer weight decay 5e-4 -warmup_epochs: 0.01 # warmup epochs (fractions ok) -warmup_momentum: 0.8 # warmup initial momentum -warmup_bias_lr: 0.01 # warmup initial bias lr +warmup_epochs: 0.01 # warm up epochs (fractions ok) +warmup_momentum: 0.8 # warm up initial momentum +warmup_bias_lr: 0.01 # warm up initial bias lr box: 0.05 # box loss gain cls: 0.3 # cls loss gain cls_pw: 1.0 # cls BCELoss positive_weight diff --git a/hyp/hyp.scratch.p5.yaml b/hyp/hyp.scratch.p5.yaml index d046ae4..de2246e 100644 --- a/hyp/hyp.scratch.p5.yaml +++ b/hyp/hyp.scratch.p5.yaml @@ -2,9 +2,9 @@ lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) lrf: 0.1 # final OneCycleLR learning rate (lr0 * lrf) momentum: 0.937 # SGD momentum/Adam beta1 weight_decay: 0.0005 # optimizer weight decay 5e-4 -warmup_epochs: 3.0 # warmup epochs (fractions ok) -warmup_momentum: 0.8 # warmup initial momentum -warmup_bias_lr: 0.1 # warmup initial bias lr +warmup_epochs: 3.0 # warm up epochs (fractions ok) +warmup_momentum: 0.8 # warm up initial momentum +warmup_bias_lr: 0.1 # warm up initial bias lr box: 0.05 # box loss gain cls: 0.3 # cls loss gain cls_pw: 1.0 # cls BCELoss positive_weight diff --git a/hyp/hyp.scratch.p6.yaml b/hyp/hyp.scratch.p6.yaml index bc2c5ca..ffd067a 100644 --- a/hyp/hyp.scratch.p6.yaml +++ b/hyp/hyp.scratch.p6.yaml @@ -2,9 +2,9 @@ lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf) momentum: 0.937 # SGD momentum/Adam beta1 weight_decay: 0.0005 # optimizer weight decay 5e-4 -warmup_epochs: 3.0 # warmup epochs (fractions ok) -warmup_momentum: 0.8 # warmup initial momentum -warmup_bias_lr: 0.1 # warmup initial bias lr +warmup_epochs: 3.0 # warm up epochs (fractions ok) +warmup_momentum: 0.8 # warm up initial momentum +warmup_bias_lr: 0.1 # warm up initial bias lr box: 0.05 # box loss gain cls: 0.3 # cls loss gain cls_pw: 1.0 # cls BCELoss positive_weight diff --git a/models/__init__.py b/models/__init__.py index 84952a8..a6131c1 100644 --- a/models/__init__.py +++ b/models/__init__.py @@ -1 +1 @@ -# init \ No newline at end of file +# init diff --git a/models/common.py b/models/common.py index 7b370e4..d405617 100644 --- a/models/common.py +++ b/models/common.py @@ -6,8 +6,8 @@ from utils.general import non_max_suppression +# basic -##### basic #### def autopad(k, p=None): # kernel, padding # Pad to 'same' @@ -17,8 +17,9 @@ def autopad(k, p=None): # kernel, padding class MP(nn.Module): + def __init__(self, k=2): - super(MP, self).__init__() + super().__init__() self.m = nn.MaxPool2d(kernel_size=k, stride=k) def forward(self, x): @@ -26,16 +27,21 @@ def forward(self, x): class ReOrg(nn.Module): + def __init__(self): - super(ReOrg, self).__init__() + super().__init__() def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) - return torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1) + return torch.cat([ + x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], + x[..., 1::2, 1::2] + ], 1) class Concat(nn.Module): + def __init__(self, dimension=1): - super(Concat, self).__init__() + super().__init__() self.d = dimension def forward(self, x): @@ -43,21 +49,36 @@ def forward(self, x): class Shortcut(nn.Module): + def __init__(self, dimension=0): - super(Shortcut, self).__init__() + super().__init__() self.d = dimension def forward(self, x): - return x[0]+x[1] + return x[0] + x[1] class Conv(nn.Module): # Standard convolution - def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups - super(Conv, self).__init__() - self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) + def __init__(self, + c1, + c2, + k=1, + s=1, + p=None, + g=1, + act=True): # ch_in, ch_out, kernel, stride, padding, groups + super().__init__() + self.conv = nn.Conv2d(c1, + c2, + k, + s, + autopad(k, p), + groups=g, + bias=False) self.bn = nn.BatchNorm2d(c2) - self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) + self.act = nn.SiLU() if act is True else ( + act if isinstance(act, nn.Module) else nn.Identity()) def forward(self, x): return self.act(self.bn(self.conv(x))) @@ -68,29 +89,36 @@ def fuseforward(self, x): class ConvCheckpoint(nn.Module): # Standard convolution - def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups - super(ConvCheckpoint, self).__init__() + def __init__(self, + c1, + c2, + k=1, + s=1, + p=None, + g=1, + act=True): # ch_in, ch_out, kernel, stride, padding, groups + super().__init__() self.conv = Conv(c1, c2, k, s, p, g, act) def forward(self, x): x = checkpoint.checkpoint(self.conv, x) return x -##### end of basic ##### +# cspnet -##### cspnet ##### class SPPCSPC(nn.Module): # CSP https://github.com/WongKinYiu/CrossStagePartialNetworks def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5, k=(5, 9, 13)): - super(SPPCSPC, self).__init__() + super().__init__() c_ = int(2 * c2 * e) # hidden channels self.cv1 = Conv(c1, c_, 1, 1) self.cv2 = Conv(c1, c_, 1, 1) self.cv3 = Conv(c_, c_, 3, 1) self.cv4 = Conv(c_, c_, 1, 1) - self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) + self.m = nn.ModuleList( + [nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) self.cv5 = Conv(4 * c_, c_, 1, 1) self.cv6 = Conv(c_, c_, 3, 1) self.cv7 = Conv(2 * c_, c2, 1, 1) @@ -101,14 +129,14 @@ def forward(self, x): y2 = self.cv2(x) return self.cv7(torch.cat((y1, y2), dim=1)) -##### end of cspnet ##### +# yolor -##### yolor ##### class ImplicitA(nn.Module): + def __init__(self, channel, mean=0., std=.02): - super(ImplicitA, self).__init__() + super().__init__() self.channel = channel self.mean = mean self.std = std @@ -120,8 +148,9 @@ def forward(self, x): class ImplicitM(nn.Module): + def __init__(self, channel, mean=1., std=.02): - super(ImplicitM, self).__init__() + super().__init__() self.channel = channel self.mean = mean self.std = std @@ -131,17 +160,16 @@ def __init__(self, channel, mean=1., std=.02): def forward(self, x): return self.implicit * x -##### end of yolor ##### +# repvgg -##### repvgg ##### class RepConv(nn.Module): # Represented convolution # https://arxiv.org/abs/2101.03697 def __init__(self, c1, c2, k=3, s=1, p=None, g=1, act=True, deploy=False): - super(RepConv, self).__init__() + super().__init__() self.deploy = deploy self.groups = g @@ -153,13 +181,21 @@ def __init__(self, c1, c2, k=3, s=1, p=None, g=1, act=True, deploy=False): padding_11 = autopad(k, p) - k // 2 - self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) + self.act = nn.SiLU() if act is True else ( + act if isinstance(act, nn.Module) else nn.Identity()) if deploy: - self.rbr_reparam = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=True) + self.rbr_reparam = nn.Conv2d(c1, + c2, + k, + s, + autopad(k, p), + groups=g, + bias=True) else: - self.rbr_identity = (nn.BatchNorm2d(num_features=c1) if c2 == c1 and s == 1 else None) + self.rbr_identity = (nn.BatchNorm2d( + num_features=c1) if c2 == c1 and s == 1 else None) self.rbr_dense = nn.Sequential( nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False), @@ -167,12 +203,12 @@ def __init__(self, c1, c2, k=3, s=1, p=None, g=1, act=True, deploy=False): ) self.rbr_1x1 = nn.Sequential( - nn.Conv2d( c1, c2, 1, s, padding_11, groups=g, bias=False), + nn.Conv2d(c1, c2, 1, s, padding_11, groups=g, bias=False), nn.BatchNorm2d(num_features=c2), ) def forward(self, inputs): - if hasattr(self, "rbr_reparam"): + if hasattr(self, 'rbr_reparam'): return self.act(self.rbr_reparam(inputs)) if self.rbr_identity is None: @@ -209,14 +245,14 @@ def _fuse_bn_tensor(self, branch): eps = branch[1].eps else: assert isinstance(branch, nn.BatchNorm2d) - if not hasattr(self, "id_tensor"): + if not hasattr(self, 'id_tensor'): input_dim = self.in_channels // self.groups - kernel_value = np.zeros( - (self.in_channels, input_dim, 3, 3), dtype=np.float32 - ) + kernel_value = np.zeros((self.in_channels, input_dim, 3, 3), + dtype=np.float32) for i in range(self.in_channels): kernel_value[i, i % input_dim, 1, 1] = 1 - self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device) + self.id_tensor = torch.from_numpy(kernel_value).to( + branch.weight.device) kernel = self.id_tensor running_mean = branch.running_mean running_var = branch.running_var @@ -243,15 +279,15 @@ def fuse_conv_bn(self, conv, bn): weights = conv.weight * t bn = nn.Identity() - conv = nn.Conv2d(in_channels = conv.in_channels, - out_channels = conv.out_channels, - kernel_size = conv.kernel_size, - stride=conv.stride, - padding = conv.padding, - dilation = conv.dilation, - groups = conv.groups, - bias = True, - padding_mode = conv.padding_mode) + conv = nn.Conv2d(in_channels=conv.in_channels, + out_channels=conv.out_channels, + kernel_size=conv.kernel_size, + stride=conv.stride, + padding=conv.padding, + dilation=conv.dilation, + groups=conv.groups, + bias=True, + padding_mode=conv.padding_mode) conv.weight = torch.nn.Parameter(weights) conv.bias = torch.nn.Parameter(bias) @@ -260,48 +296,60 @@ def fuse_conv_bn(self, conv, bn): def fuse_repvgg_block(self): if self.deploy: return - print(f"RepConv.fuse_repvgg_block") + print('RepConv.fuse_repvgg_block') - self.rbr_dense = self.fuse_conv_bn(self.rbr_dense[0], self.rbr_dense[1]) + self.rbr_dense = self.fuse_conv_bn(self.rbr_dense[0], + self.rbr_dense[1]) self.rbr_1x1 = self.fuse_conv_bn(self.rbr_1x1[0], self.rbr_1x1[1]) rbr_1x1_bias = self.rbr_1x1.bias - weight_1x1_expanded = torch.nn.functional.pad(self.rbr_1x1.weight, [1, 1, 1, 1]) + weight_1x1_expanded = torch.nn.functional.pad(self.rbr_1x1.weight, + [1, 1, 1, 1]) # Fuse self.rbr_identity - if (isinstance(self.rbr_identity, nn.BatchNorm2d) or isinstance(self.rbr_identity, nn.modules.batchnorm.SyncBatchNorm)): + if (isinstance(self.rbr_identity, nn.BatchNorm2d) or isinstance( + self.rbr_identity, nn.modules.batchnorm.SyncBatchNorm)): # print(f"fuse: rbr_identity == BatchNorm2d or SyncBatchNorm") - identity_conv_1x1 = nn.Conv2d( - in_channels=self.in_channels, - out_channels=self.out_channels, - kernel_size=1, - stride=1, - padding=0, - groups=self.groups, - bias=False) - identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.to(self.rbr_1x1.weight.data.device) - identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.squeeze().squeeze() + identity_conv_1x1 = nn.Conv2d(in_channels=self.in_channels, + out_channels=self.out_channels, + kernel_size=1, + stride=1, + padding=0, + groups=self.groups, + bias=False) + identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.to( + self.rbr_1x1.weight.data.device) + identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.squeeze( + ).squeeze() # print(f" identity_conv_1x1.weight = {identity_conv_1x1.weight.shape}") identity_conv_1x1.weight.data.fill_(0.0) identity_conv_1x1.weight.data.fill_diagonal_(1.0) - identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.unsqueeze(2).unsqueeze(3) + identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.unsqueeze( + 2).unsqueeze(3) # print(f" identity_conv_1x1.weight = {identity_conv_1x1.weight.shape}") - identity_conv_1x1 = self.fuse_conv_bn(identity_conv_1x1, self.rbr_identity) + identity_conv_1x1 = self.fuse_conv_bn(identity_conv_1x1, + self.rbr_identity) bias_identity_expanded = identity_conv_1x1.bias - weight_identity_expanded = torch.nn.functional.pad(identity_conv_1x1.weight, [1, 1, 1, 1]) + weight_identity_expanded = torch.nn.functional.pad( + identity_conv_1x1.weight, [1, 1, 1, 1]) else: # print(f"fuse: rbr_identity != BatchNorm2d, rbr_identity = {self.rbr_identity}") - bias_identity_expanded = torch.nn.Parameter( torch.zeros_like(rbr_1x1_bias) ) - weight_identity_expanded = torch.nn.Parameter( torch.zeros_like(weight_1x1_expanded) ) - - - #print(f"self.rbr_1x1.weight = {self.rbr_1x1.weight.shape}, ") - #print(f"weight_1x1_expanded = {weight_1x1_expanded.shape}, ") - #print(f"self.rbr_dense.weight = {self.rbr_dense.weight.shape}, ") - - self.rbr_dense.weight = torch.nn.Parameter(self.rbr_dense.weight + weight_1x1_expanded + weight_identity_expanded) - self.rbr_dense.bias = torch.nn.Parameter(self.rbr_dense.bias + rbr_1x1_bias + bias_identity_expanded) + bias_identity_expanded = torch.nn.Parameter( + torch.zeros_like(rbr_1x1_bias)) + weight_identity_expanded = torch.nn.Parameter( + torch.zeros_like(weight_1x1_expanded)) + + # print(f"self.rbr_1x1.weight = {self.rbr_1x1.weight.shape}, ") + # print(f"weight_1x1_expanded = {weight_1x1_expanded.shape}, ") + # print(f"self.rbr_dense.weight = {self.rbr_dense.weight.shape}, ") + + self.rbr_dense.weight = torch.nn.Parameter(self.rbr_dense.weight + + weight_1x1_expanded + + weight_identity_expanded) + self.rbr_dense.bias = torch.nn.Parameter(self.rbr_dense.bias + + rbr_1x1_bias + + bias_identity_expanded) self.rbr_reparam = self.rbr_dense self.deploy = True @@ -318,10 +366,9 @@ def fuse_repvgg_block(self): del self.rbr_dense self.rbr_dense = None -##### end of repvgg ##### +# yolov5 -##### yolov5 ##### class NMS(nn.Module): # Non-Maximum Suppression (NMS) module @@ -330,47 +377,67 @@ class NMS(nn.Module): classes = None # (optional list) filter by class def __init__(self): - super(NMS, self).__init__() + super().__init__() def forward(self, x): - return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) + return non_max_suppression(x[0], + conf_thres=self.conf, + iou_thres=self.iou, + classes=self.classes) -##### end of yolov5 ###### +# CBNet -##### CBNet ##### class CBLinear(nn.Module): - def __init__(self, c1, c2s, k=1, s=1, p=None, g=1): # ch_in, ch_outs, kernel, stride, padding, groups - super(CBLinear, self).__init__() + + def __init__(self, + c1, + c2s, + k=1, + s=1, + p=None, + g=1): # ch_in, ch_outs, kernel, stride, padding, groups + super().__init__() self.c2s = c2s - self.conv = nn.Conv2d(c1, sum(c2s), k, s, autopad(k, p), groups=g, bias=True) + self.conv = nn.Conv2d(c1, + sum(c2s), + k, + s, + autopad(k, p), + groups=g, + bias=True) def forward(self, x): outs = self.conv(x).split(self.c2s, dim=1) return outs + class CBFuse(nn.Module): + def __init__(self, idx): - super(CBFuse, self).__init__() + super().__init__() self.idx = idx def forward(self, xs): target_size = xs[-1].shape[2:] - res = [F.interpolate(x[self.idx[i]], size=target_size, mode='nearest') for i, x in enumerate(xs[:-1])] + res = [ + F.interpolate(x[self.idx[i]], size=target_size, mode='nearest') + for i, x in enumerate(xs[:-1]) + ] out = torch.sum(torch.stack(res + xs[-1:]), dim=0) return out -##### end of CBNet ##### +# DynamicDet -##### DynamicDet ##### def sigmoid(logits, hard=False, threshold=0.5): y_soft = logits.sigmoid() if hard: indices = (y_soft < threshold).nonzero(as_tuple=True) - y_hard = torch.zeros_like(logits, memory_format=torch.legacy_contiguous_format) + y_hard = torch.zeros_like(logits, + memory_format=torch.legacy_contiguous_format) y_hard[indices[0], indices[1]] = 1.0 ret = y_hard - y_soft.detach() + y_soft else: @@ -379,13 +446,20 @@ def sigmoid(logits, hard=False, threshold=0.5): class AdaptiveRouter(nn.Module): + def __init__(self, features_channels, out_channels, reduction=4): - super(AdaptiveRouter, self).__init__() + super().__init__() self.inp = sum(features_channels) self.oup = out_channels self.reduction = reduction - self.layer1 = nn.Conv2d(self.inp, self.inp//self.reduction, kernel_size=1, bias=True) - self.layer2 = nn.Conv2d(self.inp//self.reduction, self.oup, kernel_size=1, bias=True) + self.layer1 = nn.Conv2d(self.inp, + self.inp // self.reduction, + kernel_size=1, + bias=True) + self.layer2 = nn.Conv2d(self.inp // self.reduction, + self.oup, + kernel_size=1, + bias=True) def forward(self, xs, thres=0.5): xs = [x.mean(dim=(2, 3), keepdim=True) for x in xs] @@ -398,5 +472,3 @@ def forward(self, xs, thres=0.5): else: xs = xs.sigmoid() return xs - -##### end of DynamicDet ##### diff --git a/models/yolo.py b/models/yolo.py index 52f3b52..9f60643 100644 --- a/models/yolo.py +++ b/models/yolo.py @@ -1,26 +1,30 @@ -import math -import argparse import logging -from pathlib import Path -import yaml +import math import sys from copy import deepcopy +from pathlib import Path -sys.path.append('./') # to run '$ python *.py' files in subdirectories -logger = logging.getLogger(__name__) import torch +import torch.nn as nn +import yaml from torch.nn.modules.batchnorm import _BatchNorm -from models.common import * + +from models.common import (NMS, SPPCSPC, AdaptiveRouter, CBFuse, CBLinear, + Concat, Conv, ConvCheckpoint, ImplicitA, ImplicitM, + ReOrg, RepConv, Shortcut, autoShape) from utils.autoanchor import check_anchor_order -from utils.general import make_divisible, check_file, set_logging -from utils.torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \ - select_device, copy_attr +from utils.general import make_divisible +from utils.torch_utils import (copy_attr, fuse_conv_and_bn, initialize_weights, + model_info, scale_img, time_synchronized) try: import thop # for FLOPS computation except ImportError: thop = None +sys.path.append('./') # to run '$ python *.py' files in subdirectories +logger = logging.getLogger(__name__) + class IDetect(nn.Module): stride = None # strides computed during build @@ -30,7 +34,7 @@ class IDetect(nn.Module): concat = False def __init__(self, nc=80, anchors=(), ch=()): # detection layer - super(IDetect, self).__init__() + super().__init__() self.nc = nc # number of classes self.no = nc + 5 # number of outputs per anchor self.nl = len(anchors) # number of detection layers @@ -38,8 +42,11 @@ def __init__(self, nc=80, anchors=(), ch=()): # detection layer self.grid = [torch.zeros(1)] * self.nl # init grid a = torch.tensor(anchors).float().view(self.nl, -1, 2) self.register_buffer('anchors', a) # shape(nl,na,2) - self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) - self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv + self.register_buffer('anchor_grid', + a.clone().view(self.nl, 1, -1, 1, 1, + 2)) # shape(nl,1,na,1,1,2) + self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) + for x in ch) # output conv self.ia = nn.ModuleList(ImplicitA(x) for x in ch) self.im = nn.ModuleList(ImplicitM(self.no * self.na) for _ in ch) @@ -52,15 +59,17 @@ def forward(self, x): x[i] = self.m[i](self.ia[i](x[i])) # conv x[i] = self.im[i](x[i]) bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) - x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() + x[i] = x[i].view(bs, self.na, self.no, ny, + nx).permute(0, 1, 3, 4, 2).contiguous() if not self.training: # inference if self.grid[i].shape[2:4] != x[i].shape[2:4]: self.grid[i] = self._make_grid(nx, ny).to(x[i].device) y = x[i].sigmoid() - y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy - y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh + y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + + self.grid[i]) * self.stride[i] # xy + y[..., 2:4] = (y[..., 2:4] * 2)**2 * self.anchor_grid[i] # wh z.append(y.view(bs, -1, self.no)) return x if self.training else (torch.cat(z, 1), x) @@ -72,7 +81,8 @@ def fuseforward(self, x): for i in range(self.nl): x[i] = self.m[i](x[i]) # conv bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) - x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() + x[i] = x[i].view(bs, self.na, self.no, ny, + nx).permute(0, 1, 3, 4, 2).contiguous() if not self.training: # inference if self.grid[i].shape[2:4] != x[i].shape[2:4]: @@ -80,12 +90,17 @@ def fuseforward(self, x): y = x[i].sigmoid() if not torch.onnx.is_in_onnx_export(): - y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy - y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh + y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + + self.grid[i]) * self.stride[i] # xy + y[..., + 2:4] = (y[..., 2:4] * 2)**2 * self.anchor_grid[i] # wh else: - xy, wh, conf = y.split((2, 2, self.nc + 1), 4) # y.tensor_split((2, 4, 5), 4) # torch 1.8.0 - xy = xy * (2. * self.stride[i]) + (self.stride[i] * (self.grid[i] - 0.5)) # new xy - wh = wh ** 2 * (4 * self.anchor_grid[i].data) # new wh + xy, wh, conf = y.split( + (2, 2, self.nc + 1), + 4) # y.tensor_split((2, 4, 5), 4) # torch 1.8.0 + xy = xy * (2. * self.stride[i]) + ( + self.stride[i] * (self.grid[i] - 0.5)) # new xy + wh = wh**2 * (4 * self.anchor_grid[i].data) # new wh y = torch.cat((xy, wh, conf), 4) z.append(y.view(bs, -1, self.no)) @@ -104,18 +119,20 @@ def fuseforward(self, x): return out def fuse(self): - print("IDetect.fuse") + print('IDetect.fuse') # fuse ImplicitA and Convolution for i in range(len(self.m)): - c1,c2,_,_ = self.m[i].weight.shape - c1_,c2_, _,_ = self.ia[i].implicit.shape - self.m[i].bias += torch.matmul(self.m[i].weight.reshape(c1,c2),self.ia[i].implicit.reshape(c2_,c1_)).squeeze(1) + c1, c2, _, _ = self.m[i].weight.shape + c1_, c2_, _, _ = self.ia[i].implicit.shape + self.m[i].bias += torch.matmul( + self.m[i].weight.reshape(c1, c2), + self.ia[i].implicit.reshape(c2_, c1_)).squeeze(1) # fuse ImplicitM and Convolution for i in range(len(self.m)): - c1,c2, _,_ = self.im[i].implicit.shape + c1, c2, _, _ = self.im[i].implicit.shape self.m[i].bias *= self.im[i].implicit.reshape(c2) - self.m[i].weight *= self.im[i].implicit.transpose(0,1) + self.m[i].weight *= self.im[i].implicit.transpose(0, 1) @staticmethod def _make_grid(nx=20, ny=20): @@ -128,16 +145,21 @@ def convert(self, z): conf = z[:, :, 4:5] score = z[:, :, 5:] score *= conf - convert_matrix = torch.tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], - dtype=torch.float32, - device=z.device) + convert_matrix = torch.tensor( + [[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], + dtype=torch.float32, + device=z.device) box @= convert_matrix return (box, score) class Model(nn.Module): - def __init__(self, cfg, ch=3, nc=None): # model, input channels, number of classes - super(Model, self).__init__() + + def __init__(self, + cfg, + ch=3, + nc=None): # model, input channels, number of classes + super().__init__() assert isinstance(cfg, str) self.yaml_file = Path(cfg).name with open(cfg) as f: @@ -146,7 +168,9 @@ def __init__(self, cfg, ch=3, nc=None): # model, input channels, number of clas if self.dynamic: router_channels = self.yaml['router_channels'] reduction = self.yaml.get('router_reduction', 4) - self.router = AdaptiveRouter(router_channels, 1, reduction=reduction) + self.router = AdaptiveRouter(router_channels, + 1, + reduction=reduction) self.router_ins = self.yaml['router_ins'] self.dy_thres = 0.5 self.get_score = False @@ -154,9 +178,13 @@ def __init__(self, cfg, ch=3, nc=None): # model, input channels, number of clas # Define model ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels if nc and nc != self.yaml['nc']: - logger.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}") + logger.info( + f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}") self.yaml['nc'] = nc # override yaml value - self.model_b, self.save_b, self.model_b2, self.save_b2, self.model_h, self.save_h, self.model_h2, self.save_h2 = parse_model(deepcopy(self.yaml), ch_b=[ch]) # model, savelist + (self.model_b, self.save_b, self.model_b2, self.save_b2, self.model_h, + self.save_h, self.model_h2, + self.save_h2) = parse_model(deepcopy(self.yaml), + ch_b=[ch]) # model, savelist self.keep_input = self.yaml.get('keep_input', False) self.names = [str(i) for i in range(self.yaml['nc'])] # default names # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))]) @@ -167,15 +195,27 @@ def __init__(self, cfg, ch=3, nc=None): # model, input channels, number of clas if isinstance(m, IDetect): s = 256 # 2x min stride if self.dynamic: - m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))[0][:m.anchors.shape[0]]]) # forward + m.stride = torch.tensor([ + s / x.shape[-2] for x in self.forward( + torch.zeros(1, ch, s, s))[0][:m.anchors.shape[0]] + ]) # forward else: - m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))[:m.anchors.shape[0]]]) # forward + m.stride = torch.tensor([ + s / x.shape[-2] for x in self.forward( + torch.zeros(1, ch, s, s))[:m.anchors.shape[0]] + ]) # forward check_anchor_order(m) m.anchors /= m.stride.view(-1, 1, 1) if self.dynamic: - m2.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))[0][:m2.anchors.shape[0]]]) # forward + m2.stride = torch.tensor([ + s / x.shape[-2] for x in self.forward( + torch.zeros(1, ch, s, s))[0][:m2.anchors.shape[0]] + ]) # forward else: - m2.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))[:m2.anchors.shape[0]]]) # forward + m2.stride = torch.tensor([ + s / x.shape[-2] for x in self.forward( + torch.zeros(1, ch, s, s))[:m2.anchors.shape[0]] + ]) # forward check_anchor_order(m2) m2.anchors /= m2.stride.view(-1, 1, 1) self.stride = m.stride @@ -194,7 +234,9 @@ def forward(self, x, augment=False, profile=False): f = [None, 3, None] # flips (2-ud, 3-lr) y = [] # outputs for si, fi in zip(s, f): - xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max())) + xi = scale_img(x.flip(fi) if fi else x, + si, + gs=int(self.stride.max())) yi = self.forward_once(xi)[0] # forward # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save yi[..., :4] /= si # de-scale @@ -205,7 +247,8 @@ def forward(self, x, augment=False, profile=False): y.append(yi) return torch.cat(y, 1), None # augmented inference, train else: - return self.forward_once(x, profile) # single-scale inference, train + return self.forward_once(x, + profile) # single-scale inference, train def forward_once(self, x, profile=False): if self.keep_input: @@ -215,11 +258,15 @@ def forward_once(self, x, profile=False): outs = [] for m in self.model_b: if m.f != -1: # if not from previous layer - x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers + x = y[m.f] if isinstance( + m.f, int) else [x if j == -1 else y[j] + for j in m.f] # from earlier layers if profile: c = isinstance(m, IDetect) - o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPS + o = thop.profile( + m, inputs=(x.copy() if c else x, ), + verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPS for _ in range(10): m(x.copy() if c else x) t = time_synchronized() @@ -234,33 +281,42 @@ def forward_once(self, x, profile=False): assert len(y) == self.yaml['n_first_layers'] if self.dynamic: - score = self.router([y[i] for i in self.router_ins]) # 'score' denotes the (1 - difficulty score) + score = self.router([ + y[i] for i in self.router_ins + ]) # 'score' denotes the (1 - difficulty score) if not hasattr(self, 'get_score'): self.get_score = False if self.get_score: return score - need_second = self.training or (not self.dynamic) or score[:, 0] < self.dy_thres - need_first_head = self.training or (self.dynamic and score[:, 0] >= self.dy_thres) + need_second = self.training or ( + not self.dynamic) or score[:, 0] < self.dy_thres + need_first_head = self.training or (self.dynamic + and score[:, 0] >= self.dy_thres) if need_second: for m in self.model_b2: if m.f == 'input': x = input_x elif m.f != -1: # if not from previous layer - x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers + x = y[m.f] if isinstance( + m.f, int) else [x if j == -1 else y[j] + for j in m.f] # from earlier layers if profile: c = isinstance(m, IDetect) - o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPS + o = thop.profile( + m, inputs=(x.copy() if c else x, ), + verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPS for _ in range(10): m(x.copy() if c else x) t = time_synchronized() for _ in range(10): m(x.copy() if c else x) dt.append((time_synchronized() - t) * 100) - print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type)) + print('%10.1f%10.0f%10.1fms %-40s' % + (o, m.np, dt[-1], m.type)) x = m(x) # run @@ -269,18 +325,23 @@ def forward_once(self, x, profile=False): if need_first_head: for m in self.model_h: if m.f != -1: # if not from previous layer - x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers + x = y[m.f] if isinstance( + m.f, int) else [x if j == -1 else y[j] + for j in m.f] # from earlier layers if profile: c = isinstance(m, IDetect) - o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPS + o = thop.profile( + m, inputs=(x.copy() if c else x, ), + verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPS for _ in range(10): m(x.copy() if c else x) t = time_synchronized() for _ in range(10): m(x.copy() if c else x) dt.append((time_synchronized() - t) * 100) - print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type)) + print('%10.1f%10.0f%10.1fms %-40s' % + (o, m.np, dt[-1], m.type)) x = m(x) # run @@ -295,18 +356,23 @@ def forward_once(self, x, profile=False): else: cur_f = m.f if cur_f != -1: # if not from previous layer - x = y[cur_f] if isinstance(cur_f, int) else [x if j == -1 else y[j] for j in cur_f] # from earlier layers + x = y[cur_f] if isinstance(cur_f, int) else [ + x if j == -1 else y[j] for j in cur_f + ] # from earlier layers if profile: c = isinstance(m, IDetect) - o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPS + o = thop.profile( + m, inputs=(x.copy() if c else x, ), + verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPS for _ in range(10): m(x.copy() if c else x) t = time_synchronized() for _ in range(10): m(x.copy() if c else x) dt.append((time_synchronized() - t) * 100) - print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type)) + print('%10.1f%10.0f%10.1fms %-40s' % + (o, m.np, dt[-1], m.type)) x = m(x) # run @@ -326,31 +392,43 @@ def close_all_bn(self): if isinstance(m, _BatchNorm): m.eval() - def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency + def _initialize_biases( + self, + cf=None): # initialize biases into Detect(), cf is class frequency # https://arxiv.org/abs/1708.02002 section 3.3 # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. m = self.model_h[-1] # Detect() module for mi, s in zip(m.m, m.stride): # from b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) - b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) - b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls + b.data[:, 4] += math.log( + 8 / (640 / s)**2) # obj (8 objects per 640 image) + b.data[:, + 5:] += math.log(0.6 / + (m.nc - 0.99)) if cf is None else torch.log( + cf / cf.sum()) # cls mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) m = self.model_h2[-1] # Detect() module for mi, s in zip(m.m, m.stride): # from b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) - b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) - b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls + b.data[:, 4] += math.log( + 8 / (640 / s)**2) # obj (8 objects per 640 image) + b.data[:, + 5:] += math.log(0.6 / + (m.nc - 0.99)) if cf is None else torch.log( + cf / cf.sum()) # cls mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) def _print_biases(self): m = self.model_h[-1] # Detect() module for mi in m.m: # from b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85) - print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean())) + print(('%6g Conv2d.bias:' + '%10.3g' * 6) % + (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean())) m = self.model_h2[-1] # Detect() module for mi in m.m: # from b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85) - print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean())) + print(('%6g Conv2d.bias:' + '%10.3g' * 6) % + (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean())) # def _print_weights(self): # for m in self.model.modules(): @@ -359,10 +437,12 @@ def _print_biases(self): def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers print('Fusing layers... ') - for model in [self.model_b, self.model_b2, self.model_h, self.model_h2]: + for model in [ + self.model_b, self.model_b2, self.model_h, self.model_h2 + ]: for m in model.modules(): if isinstance(m, RepConv): - #print(f" fuse_repvgg_block") + # print(f" fuse_repvgg_block") m.fuse_repvgg_block() elif type(m) is Conv and hasattr(m, 'bn'): m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv @@ -402,7 +482,10 @@ def nms(self, mode=True): # add or remove NMS module def autoshape(self): # add autoShape module print('Adding autoShape... ') m = autoShape(self) # wrap model - copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=()) # copy attributes + copy_attr(m, + self, + include=('yaml', 'nc', 'hyp', 'names', 'stride'), + exclude=()) # copy attributes return m def info(self, verbose=False, img_size=640): # print model information @@ -410,18 +493,22 @@ def info(self, verbose=False, img_size=640): # print model information def parse_model(d, ch_b): # model_dict, input_channels(3) - logger.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments')) - anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'] - na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors + logger.info('\n%3s%18s%3s%10s %-40s%-30s' % + ('', 'from', 'n', 'params', 'module', 'arguments')) + anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d[ + 'width_multiple'] + na = (len(anchors[0]) // + 2) if isinstance(anchors, list) else anchors # number of anchors no = na * (nc + 5) # number of outputs = anchors * (classes + 5) layers_b, save_b, c2 = [], [], ch_b[-1] # layers, savelist, ch_b out - for i, (f, n, m, args) in enumerate(d['backbone']): # from, number, module, args + for i, (f, n, m, + args) in enumerate(d['backbone']): # from, number, module, args m = eval(m) if isinstance(m, str) else m # eval strings for j, a in enumerate(args): try: args[j] = eval(a) if isinstance(a, str) else a # eval strings - except: + except Exception: pass n = max(round(n * gd), 1) if n > 1 else n # depth gain @@ -449,12 +536,15 @@ def parse_model(d, ch_b): # model_dict, input_channels(3) else: c2 = ch_b[f] - m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module + m_ = nn.Sequential( + *[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module t = str(m)[8:-2].replace('__main__.', '') # module type np = sum([x.numel() for x in m_.parameters()]) # number params m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params - logger.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print - save_b.extend(x % i for x in ([f] if isinstance(f, (int, str)) else f) if x != -1) # append to savelist + logger.info('%3s%18s%3s%10.0f %-40s%-30s' % + (i, f, n, np, t, args)) # print + save_b.extend(x % i for x in ([f] if isinstance(f, (int, str)) else f) + if x != -1) # append to savelist layers_b.append(m_) if i == 0: ch_b = [] @@ -463,12 +553,13 @@ def parse_model(d, ch_b): # model_dict, input_channels(3) layers_b2, save_b2 = [], [] # layers, savelist ch_b2 = [] - for i, (f, n, m, args) in enumerate(d['dual_backbone']): # from, number, module, args + for i, (f, n, m, args) in enumerate( + d['dual_backbone']): # from, number, module, args m = eval(m) if isinstance(m, str) else m # eval strings for j, a in enumerate(args): try: args[j] = eval(a) if isinstance(a, str) else a # eval strings - except: + except Exception: pass chs = [] @@ -520,12 +611,15 @@ def parse_model(d, ch_b): # model_dict, input_channels(3) assert len(chs) == 1 c2 = chs[0][f] - m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module + m_ = nn.Sequential( + *[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module t = str(m)[8:-2].replace('__main__.', '') # module type np = sum([x.numel() for x in m_.parameters()]) # number params m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params - logger.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print - for x in ([f] if isinstance(f, (int, str)) else f): # append to savelist + logger.info('%3s%18s%3s%10.0f %-40s%-30s' % + (i, f, n, np, t, args)) # print + for x in ([f] if isinstance(f, + (int, str)) else f): # append to savelist if isinstance(x, str): continue if x >= 0: @@ -536,12 +630,13 @@ def parse_model(d, ch_b): # model_dict, input_channels(3) ch_b2.append(c2) layers_h, save_h, ch_h = [], [], [] - for i, (f, n, m, args) in enumerate(d['head']): # from, number, module, args + for i, (f, n, m, + args) in enumerate(d['head']): # from, number, module, args m = eval(m) if isinstance(m, str) else m # eval strings for j, a in enumerate(args): try: args[j] = eval(a) if isinstance(a, str) else a # eval strings - except: + except Exception: pass chs = [] for x in ([f] if isinstance(f, (int, str)) else f): @@ -582,12 +677,15 @@ def parse_model(d, ch_b): # model_dict, input_channels(3) assert len(chs) == 1 c2 = chs[0][f] - m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module + m_ = nn.Sequential( + *[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module t = str(m)[8:-2].replace('__main__.', '') # module type np = sum([x.numel() for x in m_.parameters()]) # number params m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params - logger.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print - for x in ([f] if isinstance(f, (int, str)) else f): # append to savelist + logger.info('%3s%18s%3s%10.0f %-40s%-30s' % + (i, f, n, np, t, args)) # print + for x in ([f] if isinstance(f, + (int, str)) else f): # append to savelist if isinstance(x, str): continue if x >= 0: @@ -598,12 +696,13 @@ def parse_model(d, ch_b): # model_dict, input_channels(3) ch_h.append(c2) layers_h2, save_h2, ch_h2 = [], [], [] - for i, (f, n, m, args) in enumerate(d['head2']): # from, number, module, args + for i, (f, n, m, + args) in enumerate(d['head2']): # from, number, module, args m = eval(m) if isinstance(m, str) else m # eval strings for j, a in enumerate(args): try: args[j] = eval(a) if isinstance(a, str) else a # eval strings - except: + except Exception: pass chs = [] for x in ([f] if isinstance(f, (int, str)) else f): @@ -644,12 +743,15 @@ def parse_model(d, ch_b): # model_dict, input_channels(3) assert len(chs) == 1 c2 = chs[0][f] - m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module + m_ = nn.Sequential( + *[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module t = str(m)[8:-2].replace('__main__.', '') # module type np = sum([x.numel() for x in m_.parameters()]) # number params m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params - logger.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print - for x in ([f] if isinstance(f, (int, str)) else f): # append to savelist + logger.info('%3s%18s%3s%10.0f %-40s%-30s' % + (i, f, n, np, t, args)) # print + for x in ([f] if isinstance(f, + (int, str)) else f): # append to savelist if isinstance(x, str): continue if x >= 0: @@ -662,5 +764,7 @@ def parse_model(d, ch_b): # model_dict, input_channels(3) save_b.extend(d['b1_save']) save_b2.extend(d['b2_save']) - return nn.Sequential(*layers_b), sorted(save_b), nn.Sequential(*layers_b2), sorted(save_b2), \ - nn.Sequential(*layers_h), sorted(save_h), nn.Sequential(*layers_h2), sorted(save_h2) + return (nn.Sequential(*layers_b), + sorted(save_b), nn.Sequential(*layers_b2), sorted(save_b2), + nn.Sequential(*layers_h), sorted(save_h), + nn.Sequential(*layers_h2), sorted(save_h2)) diff --git a/test.py b/test.py index 8e3bcbc..76c60e9 100644 --- a/test.py +++ b/test.py @@ -3,49 +3,52 @@ import logging from pathlib import Path from threading import Thread -import yaml -from tqdm import tqdm import numpy as np import torch import torch.nn as nn +import yaml +from tqdm import tqdm from models.yolo import Model from utils.datasets import create_dataloader -from utils.general import coco80_to_coco91_class, check_dataset, check_file, check_img_size, \ - box_iou, non_max_suppression, scale_coords, xyxy2xywh, xywh2xyxy, set_logging, increment_path, colorstr -from utils.metrics import ap_per_class, ConfusionMatrix -from utils.plots import plot_images, output_to_target -from utils.torch_utils import select_device, time_synchronized, intersect_dicts - +from utils.general import (box_iou, check_dataset, check_file, check_img_size, + coco80_to_coco91_class, colorstr, increment_path, + non_max_suppression, scale_coords, set_logging, + xywh2xyxy, xyxy2xywh) +from utils.metrics import ConfusionMatrix, ap_per_class +from utils.plots import output_to_target, plot_images +from utils.torch_utils import intersect_dicts, select_device, time_synchronized logger = logging.getLogger(__name__) -def test(data, - cfg=None, - weight=None, - batch_size=32, - imgsz=640, - conf_thres=0.001, - iou_thres=0.6, # for NMS - save_json=False, - single_cls=False, - augment=False, - verbose=False, - model=None, - dataloader=None, - save_dir=Path(''), # for saving images - save_txt=False, # for auto-labelling - save_hybrid=False, # for hybrid auto-labelling - save_conf=False, # save auto-label confidences - plots=True, - wandb_logger=None, - compute_loss=None, - half_precision=True, - is_coco=False, - v5_metric=False, - dy_thres=0.5, - save_results=False): + +def test( + data, + cfg=None, + weight=None, + batch_size=32, + imgsz=640, + conf_thres=0.001, + iou_thres=0.6, # for NMS + save_json=False, + single_cls=False, + augment=False, + verbose=False, + model=None, + dataloader=None, + save_dir=Path(''), # for saving images + save_txt=False, # for auto-labelling + save_hybrid=False, # for hybrid auto-labelling + save_conf=False, # save auto-label confidences + plots=True, + wandb_logger=None, + compute_loss=None, + half_precision=True, + is_coco=False, + v5_metric=False, + dy_thres=0.5, + save_results=False): # Initialize/load model and set device training = model is not None @@ -61,27 +64,36 @@ def test(data, data = yaml.load(f, Loader=yaml.SafeLoader) check_dataset(data) # check nc = 1 if single_cls else int(data['nc']) # number of classes - iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for mAP@0.5:0.95 + iouv = torch.linspace(0.5, 0.95, + 10).to(device) # iou vector for mAP@0.5:0.95 niou = iouv.numel() if not training: # Directories - save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run - (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir + save_dir = Path( + increment_path(Path(opt.project) / opt.name, + exist_ok=opt.exist_ok)) # increment run + (save_dir / 'labels' if save_txt else save_dir).mkdir( + parents=True, exist_ok=True) # make dir # Load model model = Model(cfg, ch=3, nc=nc) # create state_dict = torch.load(weight, map_location='cpu')['model'] - state_dict = intersect_dicts(state_dict, model.state_dict()) # intersect + state_dict = intersect_dicts(state_dict, + model.state_dict()) # intersect model.load_state_dict(state_dict, strict=False) # load model.to(device) - logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weight)) # report + logger.info( + 'Transferred %g/%g items from %s' % + (len(state_dict), len(model.state_dict()), weight)) # report for p in model.parameters(): p.requires_grad = False model.float().fuse().eval() # Compatibility updates for m in model.modules(): - if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: + if type(m) in [ + nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU + ]: m.inplace = True # pytorch 1.7.0 compatibility elif type(m) is nn.Upsample: m.recompute_scale_factor = None # torch 1.11.0 compatibility @@ -107,23 +119,38 @@ def test(data, # Dataloader if not training: if device.type != 'cpu': - model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once - task = opt.task if opt.task in ('train', 'val', 'test') else 'val' # path to train/val/test images - dataloader = create_dataloader(data[task], imgsz, batch_size, gs, opt, pad=0.5, rect=True, + model( + torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( + next(model.parameters()))) # run once + task = opt.task if opt.task in ( + 'train', 'val', 'test') else 'val' # path to train/val/test images + dataloader = create_dataloader(data[task], + imgsz, + batch_size, + gs, + opt, + pad=0.5, + rect=True, prefix=colorstr(f'{task}: '))[0] if v5_metric: - print("Testing with YOLOv5 AP metric...") + print('Testing with YOLOv5 AP metric...') seen = 0 confusion_matrix = ConfusionMatrix(nc=nc) - names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)} + names = { + k: v + for k, v in enumerate( + model.names if hasattr(model, 'names') else model.module.names) + } coco91class = coco80_to_coco91_class() - s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Labels', 'P', 'R', 'mAP@.5', 'mAP@.5:.95') + s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Labels', 'P', 'R', + 'mAP@.5', 'mAP@.5:.95') p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class, wandb_images = [], [], [], [], [] - for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): + for batch_i, (img, targets, paths, + shapes) in enumerate(tqdm(dataloader, desc=s)): img = img.to(device, non_blocking=True) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 @@ -133,18 +160,26 @@ def test(data, with torch.no_grad(): # Run model t = time_synchronized() - out, train_out = model(img, augment=augment) # inference and training outputs + out, train_out = model( + img, augment=augment) # inference and training outputs t0 += time_synchronized() - t # Compute loss if compute_loss: - loss += compute_loss([x.float() for x in train_out], targets)[1][:3] # box, obj, cls + loss += compute_loss([x.float() for x in train_out], + targets)[1][:3] # box, obj, cls # Run NMS - targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels - lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling + targets[:, 2:] *= torch.Tensor([width, height, width, + height]).to(device) # to pixels + lb = [targets[targets[:, 0] == i, 1:] for i in range(nb) + ] if save_hybrid else [] # for autolabelling t = time_synchronized() - out = non_max_suppression(out, conf_thres=conf_thres, iou_thres=iou_thres, labels=lb, multi_label=True) + out = non_max_suppression(out, + conf_thres=conf_thres, + iou_thres=iou_thres, + labels=lb, + multi_label=True) t1 += time_synchronized() - t # Statistics per image @@ -157,67 +192,109 @@ def test(data, if len(pred) == 0: if nl: - stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) + stats.append((torch.zeros(0, niou, dtype=torch.bool), + torch.Tensor(), torch.Tensor(), tcls)) continue # Predictions predn = pred.clone() - scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1]) # native-space pred + scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0], + shapes[si][1]) # native-space pred # Append to text file if save_txt: - gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]] # normalization gain whwh + gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0 + ]] # normalization gain whwh for *xyxy, conf, cls in predn.tolist(): - xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh - line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format - with open(save_dir / 'labels' / (path.stem + '.txt'), 'a') as f: + xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / + gn).view(-1).tolist() # normalized xywh + line = (cls, *xywh, + conf) if save_conf else (cls, + *xywh) # label format + with open(save_dir / 'labels' / (path.stem + '.txt'), + 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') # W&B logging - Media Panel Plots - if len(wandb_images) < log_imgs and wandb_logger.current_epoch > 0: # Check for test operation + if len( + wandb_images + ) < log_imgs and wandb_logger.current_epoch > 0: # Check for test operation if wandb_logger.current_epoch % wandb_logger.bbox_interval == 0: - box_data = [{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]}, - "class_id": int(cls), - "box_caption": "%s %.3f" % (names[cls], conf), - "scores": {"class_score": conf}, - "domain": "pixel"} for *xyxy, conf, cls in pred.tolist()] - boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space - wandb_images.append(wandb_logger.wandb.Image(img[si], boxes=boxes, caption=path.name)) - wandb_logger.log_training_progress(predn, path, names) if wandb_logger and wandb_logger.wandb_run else None + box_data = [{ + 'position': { + 'minX': xyxy[0], + 'minY': xyxy[1], + 'maxX': xyxy[2], + 'maxY': xyxy[3] + }, + 'class_id': int(cls), + 'box_caption': f'{names[cls]} {conf:.3f}', + 'scores': { + 'class_score': conf + }, + 'domain': 'pixel' + } for *xyxy, conf, cls in pred.tolist()] + boxes = { + 'predictions': { + 'box_data': box_data, + 'class_labels': names + } + } # inference-space + wandb_images.append( + wandb_logger.wandb.Image(img[si], + boxes=boxes, + caption=path.name)) + wandb_logger.log_training_progress( + predn, path, + names) if wandb_logger and wandb_logger.wandb_run else None # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... - image_id = int(path.stem) if path.stem.isnumeric() else path.stem + image_id = int( + path.stem) if path.stem.isnumeric() else path.stem box = xyxy2xywh(predn[:, :4]) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): - jdict.append({'image_id': image_id, - 'category_id': coco91class[int(p[5])] if is_coco else int(p[5]), - 'bbox': [round(x, 3) for x in b], - 'score': round(p[4], 5)}) + jdict.append({ + 'image_id': + image_id, + 'category_id': + coco91class[int(p[5])] if is_coco else int(p[5]), + 'bbox': [round(x, 3) for x in b], + 'score': + round(p[4], 5) + }) # Assign all predictions as incorrect - correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) + correct = torch.zeros(pred.shape[0], + niou, + dtype=torch.bool, + device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) - scale_coords(img[si].shape[1:], tbox, shapes[si][0], shapes[si][1]) # native-space labels + scale_coords(img[si].shape[1:], tbox, shapes[si][0], + shapes[si][1]) # native-space labels if plots: - confusion_matrix.process_batch(predn, torch.cat((labels[:, 0:1], tbox), 1)) + confusion_matrix.process_batch( + predn, torch.cat((labels[:, 0:1], tbox), 1)) # Per target class for cls in torch.unique(tcls_tensor): - ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1) # prediction indices - pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1) # target indices + ti = (cls == tcls_tensor).nonzero(as_tuple=False).view( + -1) # prediction indices + pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view( + -1) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious - ious, i = box_iou(predn[pi, :4], tbox[ti]).max(1) # best ious, indices + ious, i = box_iou(predn[pi, :4], tbox[ti]).max( + 1) # best ious, indices # Append detections detected_set = set() @@ -226,27 +303,40 @@ def test(data, if d.item() not in detected_set: detected_set.add(d.item()) detected.append(d) - correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn - if len(detected) == nl: # all targets already located in image + correct[ + pi[j]] = ious[j] > iouv # iou_thres is 1xn + if len( + detected + ) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) - stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) + stats.append( + (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if plots and batch_i < 3: f = save_dir / f'test_batch{batch_i}_labels.jpg' # labels - Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start() + Thread(target=plot_images, + args=(img, targets, paths, f, names), + daemon=True).start() f = save_dir / f'test_batch{batch_i}_pred.jpg' # predictions - Thread(target=plot_images, args=(img, output_to_target(out), paths, f, names), daemon=True).start() + Thread(target=plot_images, + args=(img, output_to_target(out), paths, f, names), + daemon=True).start() # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): - p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, v5_metric=v5_metric, save_dir=save_dir, names=names) + p, r, ap, f1, ap_class = ap_per_class(*stats, + plot=plots, + v5_metric=v5_metric, + save_dir=save_dir, + names=names) ap50, ap = ap[:, 0], ap.mean(1) # AP@0.5, AP@0.5:0.95 mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() - nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class + nt = np.bincount(stats[3].astype(np.int64), + minlength=nc) # number of targets per class else: nt = torch.zeros(1) @@ -260,24 +350,30 @@ def test(data, print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds - t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple + t = tuple(x / seen * 1E3 + for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple if not training: - print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t) + print( + 'Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' + % t) # Plots if plots: confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) if wandb_logger and wandb_logger.wandb: - val_batches = [wandb_logger.wandb.Image(str(f), caption=f.name) for f in sorted(save_dir.glob('test*.jpg'))] - wandb_logger.log({"Validation": val_batches}) + val_batches = [ + wandb_logger.wandb.Image(str(f), caption=f.name) + for f in sorted(save_dir.glob('test*.jpg')) + ] + wandb_logger.log({'Validation': val_batches}) if wandb_images: - wandb_logger.log({"Bounding Box Debugger/Images": wandb_images}) + wandb_logger.log({'Bounding Box Debugger/Images': wandb_images}) # Save JSON if save_json and len(jdict): w = Path(weight).stem if weight is not None else '' # weight anno_json = './coco/annotations/instances_val2017.json' # annotations json - pred_json = str(save_dir / f"{w}_predictions.json") # predictions json + pred_json = str(save_dir / f'{w}_predictions.json') # predictions json print('\nEvaluating pycocotools mAP... saving %s...' % pred_json) with open(pred_json, 'w') as f: json.dump(jdict, f) @@ -290,17 +386,26 @@ def test(data, pred = anno.loadRes(pred_json) # init predictions api eval = COCOeval(anno, pred, 'bbox') if is_coco: - eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files] # image IDs to evaluate + eval.params.imgIds = [ + int(Path(x).stem) for x in dataloader.dataset.img_files + ] # image IDs to evaluate eval.evaluate() eval.accumulate() eval.summarize() - map, map50 = eval.stats[:2] # update results (mAP@0.5:0.95, mAP@0.5) + map, map50 = eval.stats[: + 2] # update results (mAP@0.5:0.95, mAP@0.5) if save_results: - results_txt = str(save_dir / f"{w}_results.txt") + results_txt = str(save_dir / f'{w}_results.txt') with open(results_txt, 'a') as f: - f.write(f'map: {round(map, 3)}, map50: {round(map50, 3)}, dy_thres: {dy_thres}\n') - f.write('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g\n' % t) - f.write('***********************************************************************************\n') + f.write( + f'map: {round(map, 3)}, map50: {round(map50, 3)}, dy_thres: {dy_thres}\n' + ) + f.write( + 'Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g\n' + % t) + f.write( + '***********************************************************************************\n' + ) except Exception as e: print(f'pycocotools unable to run: {e}') @@ -308,37 +413,85 @@ def test(data, model.float() # for training if not training: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' - print(f"Results saved to {save_dir}{s}") + print(f'Results saved to {save_dir}{s}') maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] - return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t + return (mp, mr, map50, map, + *(loss.cpu() / len(dataloader)).tolist()), maps, t if __name__ == '__main__': parser = argparse.ArgumentParser(prog='test.py') parser.add_argument('--cfg', type=str, default='', help='model.yaml path') - parser.add_argument('--weight', type=str, default='', help='model.pt path(s)') - parser.add_argument('--data', type=str, default='data/coco.yaml', help='*.data path') - parser.add_argument('--batch-size', type=int, default=1, help='size of each image batch') - parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)') - parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold') - parser.add_argument('--iou-thres', type=float, default=0.65, help='IOU threshold for NMS') - parser.add_argument('--task', default='val', help='train, val, test, speed or study') - parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') - parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset') - parser.add_argument('--augment', action='store_true', help='augmented inference') - parser.add_argument('--verbose', action='store_true', help='report mAP by class') - parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') - parser.add_argument('--save-hybrid', action='store_true', help='save label+prediction hybrid results to *.txt') - parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') - parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file') - parser.add_argument('--project', default='runs/test', help='save to project/name') + parser.add_argument('--weight', + type=str, + default='', + help='model.pt path(s)') + parser.add_argument('--data', + type=str, + default='data/coco.yaml', + help='*.data path') + parser.add_argument('--batch-size', + type=int, + default=1, + help='size of each image batch') + parser.add_argument('--img-size', + type=int, + default=640, + help='inference size (pixels)') + parser.add_argument('--conf-thres', + type=float, + default=0.001, + help='object confidence threshold') + parser.add_argument('--iou-thres', + type=float, + default=0.65, + help='IOU threshold for NMS') + parser.add_argument('--task', + default='val', + help='train, val, test, speed or study') + parser.add_argument('--device', + default='', + help='cuda device, i.e. 0 or 0,1,2,3 or cpu') + parser.add_argument('--single-cls', + action='store_true', + help='treat as single-class dataset') + parser.add_argument('--augment', + action='store_true', + help='augmented inference') + parser.add_argument('--verbose', + action='store_true', + help='report mAP by class') + parser.add_argument('--save-txt', + action='store_true', + help='save results to *.txt') + parser.add_argument('--save-hybrid', + action='store_true', + help='save label+prediction hybrid results to *.txt') + parser.add_argument('--save-conf', + action='store_true', + help='save confidences in --save-txt labels') + parser.add_argument('--save-json', + action='store_true', + help='save a cocoapi-compatible JSON results file') + parser.add_argument('--project', + default='runs/test', + help='save to project/name') parser.add_argument('--name', default='exp', help='save to project/name') - parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') - parser.add_argument('--v5-metric', action='store_true', help='assume maximum recall as 1.0 in AP calculation') - parser.add_argument('--dy-thres', type=float, default=0.5, help='dynamic threshold') - parser.add_argument('--save-results', action='store_true', help='save results') + parser.add_argument('--exist-ok', + action='store_true', + help='existing project/name ok, do not increment') + parser.add_argument('--v5-metric', + action='store_true', + help='assume maximum recall as 1.0 in AP calculation') + parser.add_argument('--dy-thres', + type=float, + default=0.5, + help='dynamic threshold') + parser.add_argument('--save-results', + action='store_true', + help='save results') opt = parser.parse_args() opt.save_json |= opt.data.endswith('coco.yaml') opt.data = check_file(opt.data) # check file @@ -361,8 +514,7 @@ def test(data, save_conf=opt.save_conf, v5_metric=opt.v5_metric, dy_thres=opt.dy_thres, - save_results=opt.save_results - ) + save_results=opt.save_results) else: - raise NotImplementedError \ No newline at end of file + raise NotImplementedError diff --git a/train_step1.py b/train_step1.py index 486181b..4b1249d 100644 --- a/train_step1.py +++ b/train_step1.py @@ -3,12 +3,11 @@ import math import os import random +import test # import test.py to get mAP after each epoch import time from copy import deepcopy from pathlib import Path from threading import Thread -import yaml -from tqdm import tqdm import numpy as np import torch @@ -17,28 +16,33 @@ import torch.nn.functional as F import torch.optim as optim import torch.optim.lr_scheduler as lr_scheduler +import yaml from torch.cuda import amp from torch.nn.parallel import DistributedDataParallel as DDP from torch.utils.tensorboard import SummaryWriter +from tqdm import tqdm -import test # import test.py to get mAP after each epoch from models.yolo import Model from utils.autoanchor import check_anchors +from utils.checkpoint import get_state_dict from utils.datasets import create_dataloader -from utils.general import labels_to_class_weights, increment_path, labels_to_image_weights, init_seeds, \ - fitness, strip_optimizer, get_latest_run, check_dataset, check_file, check_git_status, check_img_size, \ - set_logging, one_cycle, colorstr +from utils.general import (check_dataset, check_file, check_img_size, colorstr, + fitness, get_latest_run, increment_path, init_seeds, + labels_to_class_weights, labels_to_image_weights, + one_cycle, set_logging, strip_optimizer) from utils.loss import ComputeLoss, ComputeLossOTA, ComputeLossOTADual -from utils.plots import plot_images, plot_results, plot_lr_scheduler -from utils.torch_utils import ModelEMA, select_device, intersect_dicts, torch_distributed_zero_first, is_parallel +from utils.plots import plot_images, plot_lr_scheduler, plot_results +from utils.torch_utils import (ModelEMA, intersect_dicts, is_parallel, + select_device, torch_distributed_zero_first) from utils.wandb_logging.wandb_utils import WandbLogger, check_wandb_resume -from utils.checkpoint import get_state_dict - logger = logging.getLogger(__name__) + def train(hyp, opt, device, tb_writer=None): - logger.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items())) + logger.info( + colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' + for k, v in hyp.items())) save_dir, epochs, batch_size, total_batch_size, weight, rank, freeze = \ Path(opt.save_dir), opt.epochs, opt.batch_size, opt.total_batch_size, opt.weight, opt.global_rank, opt.freeze @@ -72,16 +76,23 @@ def train(hyp, opt, device, tb_writer=None): loggers = {'wandb': None} # loggers dict if rank in [-1, 0]: opt.hyp = hyp # add hyperparameters - run_id = torch.load(weight, map_location='cpu')['wandb_id'] if weight.endswith('.pt') and os.path.isfile(weight) else None - wandb_logger = WandbLogger(opt, Path(opt.save_dir).stem, run_id, data_dict) + run_id = torch.load(weight, + map_location='cpu')['wandb_id'] if weight.endswith( + '.pt') and os.path.isfile(weight) else None + wandb_logger = WandbLogger(opt, + Path(opt.save_dir).stem, run_id, data_dict) loggers['wandb'] = wandb_logger.wandb data_dict = wandb_logger.data_dict if wandb_logger.wandb: - weight, epochs, hyp = opt.weight, opt.epochs, opt.hyp # WandbLogger might update weights, epochs if resuming + # WandbLogger might update weights, epochs if resuming + weight, epochs, hyp = opt.weight, opt.epochs, opt.hyp nc = 1 if opt.single_cls else int(data_dict['nc']) # number of classes - names = ['item'] if opt.single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names - assert len(names) == nc, '%g names found for nc=%g dataset in %s' % (len(names), nc, opt.data) # check + names = ['item'] if opt.single_cls and len( + data_dict['names']) != 1 else data_dict['names'] # class names + assert len( + names) == nc, '{:g} names found for nc={:g} dataset in {}'.format( + len(names), nc, opt.data) # check # Model pretrained = weight.endswith('.pt') @@ -89,11 +100,18 @@ def train(hyp, opt, device, tb_writer=None): ckpt = torch.load(weight, map_location='cpu') # load checkpoint state_dict = ckpt['model'] model = Model(opt.cfg, ch=3, nc=nc) # create - exclude = ['anchor'] if (opt.cfg or hyp.get('anchors')) and not opt.resume else [] # exclude keys - state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude) # intersect + exclude = [ + 'anchor' + ] if (opt.cfg or hyp.get('anchors')) and not opt.resume else [ + ] # exclude keys + state_dict = intersect_dicts(state_dict, + model.state_dict(), + exclude=exclude) # intersect model.load_state_dict(state_dict, strict=False) # load model.to(device) - logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weight)) # report + logger.info( + 'Transferred %g/%g items from %s' % + (len(state_dict), len(model.state_dict()), weight)) # report else: model = Model(opt.cfg, ch=3, nc=nc).to(device) # create with torch_distributed_zero_first(rank): @@ -102,7 +120,10 @@ def train(hyp, opt, device, tb_writer=None): test_path = data_dict['val'] # Freeze - freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))] # parameter names to freeze (full or partial) + freeze = [ + f'model.{x}.' + for x in (freeze if len(freeze) > 1 else range(freeze[0])) + ] # parameter names to freeze (full or partial) for k, v in model.named_parameters(): v.requires_grad = True # train all layers if any(x in k for x in freeze): @@ -111,7 +132,8 @@ def train(hyp, opt, device, tb_writer=None): # Optimizer nbs = 64 # nominal batch size - accumulate = max(round(nbs / total_batch_size), 1) # accumulate loss before optimizing + accumulate = max(round(nbs / total_batch_size), + 1) # accumulate loss before optimizing hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay logger.info(f"Scaled weight_decay = {hyp['weight_decay']}") @@ -181,19 +203,33 @@ def train(hyp, opt, device, tb_writer=None): pg0.append(v.rbr_dense.vector) if opt.adam: - optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum + optimizer = optim.Adam(pg0, + lr=hyp['lr0'], + betas=(hyp['momentum'], + 0.999)) # adjust beta1 to momentum else: - optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) - - optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']}) # add pg1 with weight_decay + optimizer = optim.SGD(pg0, + lr=hyp['lr0'], + momentum=hyp['momentum'], + nesterov=True) + + optimizer.add_param_group({ + 'params': pg1, + 'weight_decay': hyp['weight_decay'] + }) # add pg1 with weight_decay optimizer.add_param_group({'params': pg2}) # add pg2 (biases) - logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0))) + logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % + (len(pg2), len(pg1), len(pg0))) del pg0, pg1, pg2 # Scheduler https://arxiv.org/pdf/1812.01187.pdf # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR if opt.linear_lr: - lf = lambda x: (1 - x / (epochs - 1)) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear + + def get_linear_lr(x): + return (1 - x / (epochs - 1)) * (1.0 - hyp['lrf']) + hyp['lrf'] + + lf = get_linear_lr else: lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf'] scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) @@ -217,23 +253,28 @@ def train(hyp, opt, device, tb_writer=None): # Results if ckpt.get('training_results') is not None: - results_file.write_text(ckpt['training_results']) # write results.txt + results_file.write_text( + ckpt['training_results']) # write results.txt # Epochs start_epoch = ckpt['epoch'] + 1 if opt.resume: - assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % (weight, epochs) + assert start_epoch > 0, '{} training to {:g} epochs is finished, nothing to resume.'.format( + weight, epochs) if epochs < start_epoch: - logger.info('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % - (weight, ckpt['epoch'], epochs)) + logger.info( + '%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' + % (weight, ckpt['epoch'], epochs)) epochs += ckpt['epoch'] # finetune additional epochs del ckpt # Image sizes gs = max(int(model.stride.max()), 32) # grid size (max stride) - nl = model.model_h2[-1].nl # number of detection layers (used for scaling hyp['obj']) - imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size] # verify imgsz are gs-multiples + nl = model.model_h2[ + -1].nl # number of detection layers (used for scaling hyp['obj']) + imgsz, imgsz_test = (check_img_size(x, gs) for x in opt.img_size + ) # verify imgsz are gs-multiples # DP mode if cuda and rank == -1 and torch.cuda.device_count() > 1: @@ -245,20 +286,42 @@ def train(hyp, opt, device, tb_writer=None): logger.info('Using SyncBatchNorm()') # Trainloader - dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt, - hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank, - world_size=opt.world_size, workers=opt.workers, - image_weights=opt.image_weights, quad=opt.quad, prefix=colorstr('train: ')) + dataloader, dataset = create_dataloader(train_path, + imgsz, + batch_size, + gs, + opt, + hyp=hyp, + augment=True, + cache=opt.cache_images, + rect=opt.rect, + rank=rank, + world_size=opt.world_size, + workers=opt.workers, + image_weights=opt.image_weights, + quad=opt.quad, + prefix=colorstr('train: ')) mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class nb = len(dataloader) # number of batches - assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % (mlc, nc, opt.data, nc - 1) + assert mlc < nc, 'Label class {:g} exceeds nc={:g} in {}. Possible class labels are 0-{:g}'.format( + mlc, nc, opt.data, nc - 1) # Process 0 if rank in [-1, 0]: - testloader = create_dataloader(test_path, imgsz_test, batch_size * 2, gs, opt, # testloader - hyp=hyp, cache=opt.cache_images and not opt.notest, rect=True, rank=-1, - world_size=opt.world_size, workers=opt.workers, - pad=0.5, prefix=colorstr('val: '))[0] + testloader = create_dataloader( + test_path, + imgsz_test, + batch_size * 2, + gs, + opt, # testloader + hyp=hyp, + cache=opt.cache_images and not opt.notest, + rect=True, + rank=-1, + world_size=opt.world_size, + workers=opt.workers, + pad=0.5, + prefix=colorstr('val: '))[0] if not opt.resume: labels = np.concatenate(dataset.labels, 0) @@ -271,32 +334,44 @@ def train(hyp, opt, device, tb_writer=None): # Anchors if not opt.noautoanchor: - check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) + check_anchors(dataset, + model=model, + thr=hyp['anchor_t'], + imgsz=imgsz) model.half().float() # pre-reduce anchor precision # DDP mode if cuda and rank != -1: - model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank, - # nn.MultiheadAttention incompatibility with DDP https://github.com/pytorch/pytorch/issues/26698 - find_unused_parameters=any(isinstance(layer, nn.MultiheadAttention) for layer in model.modules())) + model = DDP( + model, + device_ids=[opt.local_rank], + output_device=opt.local_rank, + # nn.MultiheadAttention incompatibility with DDP https://github.com/pytorch/pytorch/issues/26698 + find_unused_parameters=any( + isinstance(layer, nn.MultiheadAttention) + for layer in model.modules())) # Model parameters hyp['box'] *= 3. / nl # scale to layers hyp['cls'] *= nc / 80. * 3. / nl # scale to classes and layers - hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl # scale to image size and layers + hyp['obj'] *= (imgsz / 640)**2 * 3. / nl # scale to image size and layers hyp['label_smoothing'] = opt.label_smoothing model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.gr = 1.0 # iou loss ratio (obj_loss = 1.0 or iou) - model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc # attach class weights + model.class_weights = labels_to_class_weights( + dataset.labels, nc).to(device) * nc # attach class weights model.names = names # Start training t0 = time.time() - nw = max(round(hyp['warmup_epochs'] * nb), 1000) # number of warmup iterations, max(3 epochs, 1k iterations) - # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training + nw = max( + round(hyp['warmup_epochs'] * nb), + 1000) # number of warm up iterations, max(3 epochs, 1k iterations) + # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warm up to < 1/2 of training maps = np.zeros(nc) # mAP per class - results = (0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) + results = (0, 0, 0, 0, 0, 0, 0 + ) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) scheduler.last_epoch = start_epoch - 1 # do not move scaler = amp.GradScaler(enabled=cuda) if opt.single_backbone: @@ -308,19 +383,27 @@ def train(hyp, opt, device, tb_writer=None): f'Using {dataloader.num_workers} dataloader workers\n' f'Logging results to {save_dir}\n' f'Starting training for {epochs} epochs...') - for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ + for epoch in range( + start_epoch, epochs + ): # epoch ------------------------------------------------------------------ model.train() # Update image weights (optional) if opt.image_weights: # Generate indices if rank in [-1, 0]: - cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc # class weights - iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights - dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx + cw = model.class_weights.cpu().numpy() * ( + 1 - maps)**2 / nc # class weights + iw = labels_to_image_weights(dataset.labels, + nc=nc, + class_weights=cw) # image weights + dataset.indices = random.choices( + range(dataset.n), weights=iw, + k=dataset.n) # rand weighted idx # Broadcast if DDP if rank != -1: - indices = (torch.tensor(dataset.indices) if rank == 0 else torch.zeros(dataset.n)).int() + indices = (torch.tensor(dataset.indices) + if rank == 0 else torch.zeros(dataset.n)).int() dist.broadcast(indices, 0) if rank != 0: dataset.indices = indices.cpu().numpy() @@ -333,37 +416,55 @@ def train(hyp, opt, device, tb_writer=None): if rank != -1: dataloader.sampler.set_epoch(epoch) pbar = enumerate(dataloader) - logger.info(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'total', 'labels', 'img_size')) + logger.info( + ('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', + 'total', 'labels', 'img_size')) if rank in [-1, 0]: pbar = tqdm(pbar, total=nb) # progress bar optimizer.zero_grad() - for i, (imgs, targets, paths, _) in pbar: # batch ------------------------------------------------------------- + for i, ( + imgs, targets, paths, _ + ) in pbar: # batch ------------------------------------------------------------- ni = i + nb * epoch # number integrated batches (since train start) - imgs = imgs.to(device, non_blocking=True).float() / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 + imgs = imgs.to(device, non_blocking=True).float( + ) / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 - # Warmup + # Warm up if ni <= nw: xi = [0, nw] # x interp # model.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) - accumulate = max(1, np.interp(ni, xi, [1, nbs / total_batch_size]).round()) + accumulate = max( + 1, + np.interp(ni, xi, [1, nbs / total_batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 - x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)]) + x['lr'] = np.interp(ni, xi, [ + hyp['warmup_bias_lr'] if j == 2 else 0.0, + x['initial_lr'] * lf(epoch) + ]) if 'momentum' in x: - x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) + x['momentum'] = np.interp( + ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) # Multi-scale if opt.multi_scale: - sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size + sz = random.randrange(imgsz * 0.5, + imgsz * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: - ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple) - imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) + ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:] + ] # new shape (stretched to gs-multiple) + imgs = F.interpolate(imgs, + size=ns, + mode='bilinear', + align_corners=False) # Forward with amp.autocast(enabled=cuda): pred = model(imgs) # forward - loss, loss_items = compute_loss_ota(pred, targets.to(device), imgs) # loss scaled by batch_size + loss, loss_items = compute_loss_ota( + pred, targets.to(device), + imgs) # loss scaled by batch_size if rank != -1: loss *= opt.world_size # gradient averaged between devices in DDP mode if opt.quad: @@ -382,25 +483,34 @@ def train(hyp, opt, device, tb_writer=None): # Print if rank in [-1, 0]: - mloss = (mloss * i + loss_items) / (i + 1) # update mean losses - mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB) - s = ('%10s' * 2 + '%10.4g' * 6) % ( - '%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1]) + mloss = (mloss * i + loss_items) / (i + 1 + ) # update mean losses + mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 + if torch.cuda.is_available() else 0) # (GB) + s = ('%10s' * 2 + '%10.4g' * 6) % ('{:g}/{:g}'.format( + epoch, + epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1]) pbar.set_description(s) # Plot if plots and ni < 10: f = save_dir / f'train_batch{ni}.jpg' # filename - Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start() + Thread(target=plot_images, + args=(imgs, targets, paths, f), + daemon=True).start() # if tb_writer: # tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch) # tb_writer.add_graph(torch.jit.trace(model, imgs, strict=False), []) # add model graph elif plots and ni == 10 and wandb_logger.wandb: - wandb_logger.log({"Mosaics": [wandb_logger.wandb.Image(str(x), caption=x.name) for x in - save_dir.glob('train*.jpg') if x.exists()]}) + wandb_logger.log({ + 'Mosaics': [ + wandb_logger.wandb.Image(str(x), caption=x.name) + for x in save_dir.glob('train*.jpg') if x.exists() + ] + }) - # end batch ------------------------------------------------------------------------------------------------ - # end epoch ---------------------------------------------------------------------------------------------------- + # end batch ---------------------------------------------------------------------------------------------- + # end epoch -------------------------------------------------------------------------------------------------- # Scheduler lr = [x['lr'] for x in optimizer.param_groups] # for tensorboard @@ -409,7 +519,11 @@ def train(hyp, opt, device, tb_writer=None): # DDP process 0 or single-GPU if rank in [-1, 0]: # mAP - ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride', 'class_weights']) + ema.update_attr(model, + include=[ + 'yaml', 'nc', 'hyp', 'gr', 'names', 'stride', + 'class_weights' + ]) final_epoch = epoch + 1 == epochs if not opt.notest or final_epoch: # Calculate mAP wandb_logger.current_epoch = epoch + 1 @@ -420,7 +534,8 @@ def train(hyp, opt, device, tb_writer=None): single_cls=opt.single_cls, dataloader=testloader, save_dir=save_dir, - verbose=nc < 50 and final_epoch, + verbose=nc < 50 + and final_epoch, plots=plots and final_epoch, wandb_logger=wandb_logger, compute_loss=compute_loss, @@ -429,15 +544,28 @@ def train(hyp, opt, device, tb_writer=None): # Write with open(results_file, 'a') as f: - f.write(s + '%10.4g' * 7 % results + '\n') # append metrics, val_loss + f.write(s + '%10.4g' * 7 % results + + '\n') # append metrics, val_loss if len(opt.name) and opt.bucket: - os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name)) + os.system('gsutil cp %s gs://%s/results/results%s.txt' % + (results_file, opt.bucket, opt.name)) # Log - tags = ['train/box_loss', 'train/obj_loss', 'train/cls_loss', # train loss - 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', - 'val/box_loss', 'val/obj_loss', 'val/cls_loss', # val loss - 'x/lr0', 'x/lr1', 'x/lr2'] # params + tags = [ + 'train/box_loss', + 'train/obj_loss', + 'train/cls_loss', # train loss + 'metrics/precision', + 'metrics/recall', + 'metrics/mAP_0.5', + 'metrics/mAP_0.5:0.95', + 'val/box_loss', + 'val/obj_loss', + 'val/cls_loss', # val loss + 'x/lr0', + 'x/lr1', + 'x/lr2' + ] # params for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags): if tb_writer: tb_writer.add_scalar(tag, x, epoch) # tensorboard @@ -445,52 +573,77 @@ def train(hyp, opt, device, tb_writer=None): wandb_logger.log({tag: x}) # W&B # Update best mAP - fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] + fi = fitness(np.array(results).reshape( + 1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] if fi > best_fitness: best_fitness = fi wandb_logger.end_epoch(best_result=best_fitness == fi) # Save model if (not opt.nosave) or final_epoch: # if save - ckpt = {'epoch': epoch, - 'best_fitness': best_fitness, - 'training_results': results_file.read_text(), - 'model': get_state_dict(deepcopy(model.module if is_parallel(model) else model).half()), - 'ema': get_state_dict(deepcopy(ema.ema).half()), - 'updates': ema.updates, - 'optimizer': optimizer.state_dict(), - 'wandb_id': wandb_logger.wandb_run.id if wandb_logger.wandb else None} + ckpt = { + 'epoch': + epoch, + 'best_fitness': + best_fitness, + 'training_results': + results_file.read_text(), + 'model': + get_state_dict( + deepcopy(model.module if is_parallel(model) else model + ).half()), + 'ema': + get_state_dict(deepcopy(ema.ema).half()), + 'updates': + ema.updates, + 'optimizer': + optimizer.state_dict(), + 'wandb_id': + wandb_logger.wandb_run.id if wandb_logger.wandb else None + } # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) if (best_fitness == fi) and (epoch >= 200): - torch.save(ckpt, wdir / 'best_{:03d}.pt'.format(epoch)) + torch.save(ckpt, wdir / f'best_{epoch:03d}.pt') if epoch == 0: - torch.save(ckpt, wdir / 'epoch_{:03d}.pt'.format(epoch)) - elif ((epoch+1) % 25) == 0: - torch.save(ckpt, wdir / 'epoch_{:03d}.pt'.format(epoch)) - elif epoch >= (epochs-5): - torch.save(ckpt, wdir / 'epoch_{:03d}.pt'.format(epoch)) + torch.save(ckpt, wdir / f'epoch_{epoch:03d}.pt') + elif ((epoch + 1) % 25) == 0: + torch.save(ckpt, wdir / f'epoch_{epoch:03d}.pt') + elif epoch >= (epochs - 5): + torch.save(ckpt, wdir / f'epoch_{epoch:03d}.pt') if wandb_logger.wandb: - if ((epoch + 1) % opt.save_period == 0 and not final_epoch) and opt.save_period != -1: - wandb_logger.log_model( - last.parent, opt, epoch, fi, best_model=best_fitness == fi) + if ((epoch + 1) % opt.save_period == 0 + and not final_epoch) and opt.save_period != -1: + wandb_logger.log_model(last.parent, + opt, + epoch, + fi, + best_model=best_fitness == fi) del ckpt - # end epoch ---------------------------------------------------------------------------------------------------- + # end epoch -------------------------------------------------------------------------------------------------- # end training if rank in [-1, 0]: # Plots if plots: plot_results(save_dir=save_dir) # save as results.png if wandb_logger.wandb: - files = ['results.png', 'confusion_matrix.png', *[f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R')]] - wandb_logger.log({"Results": [wandb_logger.wandb.Image(str(save_dir / f), caption=f) for f in files - if (save_dir / f).exists()]}) + files = [ + 'results.png', 'confusion_matrix.png', + *[f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R')] + ] + wandb_logger.log({ + 'Results': [ + wandb_logger.wandb.Image(str(save_dir / f), caption=f) + for f in files if (save_dir / f).exists() + ] + }) # Test best.pt - logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) + logger.info('%g epochs completed in %.3f hours.\n' % + (epoch - start_epoch + 1, (time.time() - t0) / 3600)) # Strip optimizers final = best if best.exists() else last # final model for f in last, best: @@ -499,9 +652,11 @@ def train(hyp, opt, device, tb_writer=None): if opt.bucket: os.system(f'gsutil cp {final} gs://{opt.bucket}/weights') # upload if wandb_logger.wandb: # Log the stripped model - wandb_logger.wandb.log_artifact(str(final), type='model', - name='run_' + wandb_logger.wandb_run.id + '_model', - aliases=['last', 'best', 'stripped']) + wandb_logger.wandb.log_artifact( + str(final), + type='model', + name='run_' + wandb_logger.wandb_run.id + '_model', + aliases=['last', 'best', 'stripped']) wandb_logger.finish_run() else: dist.destroy_process_group() @@ -512,66 +667,152 @@ def train(hyp, opt, device, tb_writer=None): if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--cfg', type=str, default='', help='model.yaml path') - parser.add_argument('--weight', type=str, default='', help='initial weights path') - parser.add_argument('--data', type=str, default='data/coco.yaml', help='data.yaml path') - parser.add_argument('--hyp', type=str, default='hyp/hyp.scratch.p5.yaml', help='hyperparameters path') + parser.add_argument('--weight', + type=str, + default='', + help='initial weights path') + parser.add_argument('--data', + type=str, + default='data/coco.yaml', + help='data.yaml path') + parser.add_argument('--hyp', + type=str, + default='hyp/hyp.scratch.p5.yaml', + help='hyperparameters path') parser.add_argument('--epochs', type=int, default=300) - parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs') - parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='[train, test] image sizes') - parser.add_argument('--rect', action='store_true', help='rectangular training') - parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training') - parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') - parser.add_argument('--notest', action='store_true', help='only test final epoch') - parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check') + parser.add_argument('--batch-size', + type=int, + default=16, + help='total batch size for all GPUs') + parser.add_argument('--img-size', + nargs='+', + type=int, + default=[640, 640], + help='[train, test] image sizes') + parser.add_argument('--rect', + action='store_true', + help='rectangular training') + parser.add_argument('--resume', + nargs='?', + const=True, + default=False, + help='resume most recent training') + parser.add_argument('--nosave', + action='store_true', + help='only save final checkpoint') + parser.add_argument('--notest', + action='store_true', + help='only test final epoch') + parser.add_argument('--noautoanchor', + action='store_true', + help='disable autoanchor check') parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') - parser.add_argument('--cache-images', action='store_true', help='cache images for faster training') - parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training') - parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') - parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') - parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class') - parser.add_argument('--single-backbone', action='store_true', help='train single backbone model') - parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer') - parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') - parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify') - parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers') - parser.add_argument('--project', default='runs/train', help='save to project/name') + parser.add_argument('--cache-images', + action='store_true', + help='cache images for faster training') + parser.add_argument('--image-weights', + action='store_true', + help='use weighted image selection for training') + parser.add_argument('--device', + default='', + help='cuda device, i.e. 0 or 0,1,2,3 or cpu') + parser.add_argument('--multi-scale', + action='store_true', + help='vary img-size +/- 50%%') + parser.add_argument('--single-cls', + action='store_true', + help='train multi-class data as single-class') + parser.add_argument('--single-backbone', + action='store_true', + help='train single backbone model') + parser.add_argument('--adam', + action='store_true', + help='use torch.optim.Adam() optimizer') + parser.add_argument('--sync-bn', + action='store_true', + help='use SyncBatchNorm, only available in DDP mode') + parser.add_argument('--local_rank', + type=int, + default=-1, + help='DDP parameter, do not modify') + parser.add_argument('--workers', + type=int, + default=8, + help='maximum number of dataloader workers') + parser.add_argument('--project', + default='runs/train', + help='save to project/name') parser.add_argument('--entity', default=None, help='W&B entity') parser.add_argument('--name', default='exp', help='save to project/name') - parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') + parser.add_argument('--exist-ok', + action='store_true', + help='existing project/name ok, do not increment') parser.add_argument('--quad', action='store_true', help='quad dataloader') parser.add_argument('--linear-lr', action='store_true', help='linear LR') - parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon') - parser.add_argument('--upload_dataset', action='store_true', help='Upload dataset as W&B artifact table') - parser.add_argument('--bbox_interval', type=int, default=-1, help='Set bounding-box image logging interval for W&B') - parser.add_argument('--save_period', type=int, default=-1, help='Log model after every "save_period" epoch') - parser.add_argument('--artifact_alias', type=str, default="latest", help='version of dataset artifact to be used') - parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone of yolov7=50, first3=0 1 2') - parser.add_argument('--v5-metric', action='store_true', help='assume maximum recall as 1.0 in AP calculation') + parser.add_argument('--label-smoothing', + type=float, + default=0.0, + help='Label smoothing epsilon') + parser.add_argument('--upload_dataset', + action='store_true', + help='Upload dataset as W&B artifact table') + parser.add_argument('--bbox_interval', + type=int, + default=-1, + help='Set bounding-box image logging interval for W&B') + parser.add_argument('--save_period', + type=int, + default=-1, + help='Log model after every "save_period" epoch') + parser.add_argument('--artifact_alias', + type=str, + default='latest', + help='version of dataset artifact to be used') + parser.add_argument( + '--freeze', + nargs='+', + type=int, + default=[0], + help='Freeze layers: backbone of yolov7=50, first3=0 1 2') + parser.add_argument('--v5-metric', + action='store_true', + help='assume maximum recall as 1.0 in AP calculation') opt = parser.parse_args() # Set DDP variables - opt.world_size = int(os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1 + opt.world_size = int( + os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1 opt.global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else -1 set_logging(opt.global_rank) # Resume wandb_run = check_wandb_resume(opt) if opt.resume and not wandb_run: # resume an interrupted run - ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run() # specified or most recent path - assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist' + ckpt = opt.resume if isinstance( + opt.resume, + str) else get_latest_run() # specified or most recent path + assert os.path.isfile( + ckpt), 'ERROR: --resume checkpoint does not exist' apriori = opt.global_rank, opt.local_rank with open(Path(ckpt).parent.parent / 'opt.yaml') as f: - opt = argparse.Namespace(**yaml.load(f, Loader=yaml.SafeLoader)) # replace - opt.cfg, opt.weight, opt.resume = os.path.relpath(Path(ckpt).parent.parent / 'cfg.yaml'), ckpt, True - opt.batch_size, opt.global_rank, opt.local_rank = opt.total_batch_size, *apriori # reinstate + opt = argparse.Namespace(**yaml.load( + f, Loader=yaml.SafeLoader)) # replace + opt.cfg, opt.weight, opt.resume = os.path.relpath( + Path(ckpt).parent.parent / 'cfg.yaml'), ckpt, True + opt.batch_size, opt.global_rank, opt.local_rank = \ + opt.total_batch_size, *apriori # reinstate opt.save_dir = os.path.relpath(Path(ckpt).parent.parent) logger.info('Resuming training from %s' % ckpt) else: # opt.hyp = opt.hyp or ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml') - opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file(opt.cfg), check_file(opt.hyp) # check files + opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file( + opt.cfg), check_file(opt.hyp) # check files assert len(opt.cfg), 'cfg must be specified' - opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size))) # extend to 2 sizes (train, test) - opt.save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok) # increment run + opt.img_size.extend( + [opt.img_size[-1]] * + (2 - len(opt.img_size))) # extend to 2 sizes (train, test) + opt.save_dir = increment_path(Path(opt.project) / opt.name, + exist_ok=opt.exist_ok) # increment run # DDP mode opt.total_batch_size = opt.batch_size @@ -580,7 +821,8 @@ def train(hyp, opt, device, tb_writer=None): assert torch.cuda.device_count() > opt.local_rank torch.cuda.set_device(opt.local_rank) device = torch.device('cuda', opt.local_rank) - dist.init_process_group(backend='nccl', init_method='env://') # distributed backend + dist.init_process_group(backend='nccl', + init_method='env://') # distributed backend assert opt.batch_size % opt.world_size == 0, '--batch-size must be multiple of CUDA device count' opt.batch_size = opt.total_batch_size // opt.world_size @@ -593,6 +835,8 @@ def train(hyp, opt, device, tb_writer=None): tb_writer = None # init loggers if opt.global_rank in [-1, 0]: prefix = colorstr('tensorboard: ') - logger.info(f"{prefix}Start with 'tensorboard --logdir {opt.project}', view at http://localhost:6006/") + logger.info( + f"{prefix}Start with 'tensorboard --logdir {opt.project}', view at http://localhost:6006/" + ) tb_writer = SummaryWriter(opt.save_dir) # Tensorboard train(hyp, opt, device, tb_writer) diff --git a/train_step2.py b/train_step2.py index 912e16f..0c82463 100644 --- a/train_step2.py +++ b/train_step2.py @@ -3,12 +3,11 @@ import math import os import random +import test # import test.py to get mAP after each epoch import time from copy import deepcopy from pathlib import Path from threading import Thread -import yaml -from tqdm import tqdm import numpy as np import torch @@ -17,28 +16,33 @@ import torch.nn.functional as F import torch.optim as optim import torch.optim.lr_scheduler as lr_scheduler +import yaml from torch.cuda import amp from torch.nn.parallel import DistributedDataParallel as DDP from torch.utils.tensorboard import SummaryWriter +from tqdm import tqdm -import test # import test.py to get mAP after each epoch from models.yolo import Model from utils.autoanchor import check_anchors +from utils.checkpoint import get_state_dict from utils.datasets import create_dataloader -from utils.general import labels_to_class_weights, increment_path, labels_to_image_weights, init_seeds, \ - fitness, strip_optimizer, get_latest_run, check_dataset, check_file, check_git_status, check_img_size, \ - set_logging, colorstr +from utils.general import (check_dataset, check_file, check_img_size, colorstr, + fitness, get_latest_run, increment_path, init_seeds, + labels_to_class_weights, labels_to_image_weights, + set_logging, strip_optimizer) from utils.loss import ComputeLoss, ComputeLossOTADy -from utils.plots import plot_images, plot_results, plot_lr_scheduler -from utils.torch_utils import ModelEMA, select_device, intersect_dicts, torch_distributed_zero_first, is_parallel +from utils.plots import plot_images, plot_lr_scheduler, plot_results +from utils.torch_utils import (ModelEMA, intersect_dicts, is_parallel, + select_device, torch_distributed_zero_first) from utils.wandb_logging.wandb_utils import WandbLogger, check_wandb_resume -from utils.checkpoint import get_state_dict - logger = logging.getLogger(__name__) + def train(hyp, opt, device, tb_writer=None): - logger.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items())) + logger.info( + colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' + for k, v in hyp.items())) save_dir, epochs, batch_size, total_batch_size, weight, rank, freeze = \ Path(opt.save_dir), opt.epochs, opt.batch_size, opt.total_batch_size, opt.weight, opt.global_rank, opt.freeze @@ -72,16 +76,23 @@ def train(hyp, opt, device, tb_writer=None): loggers = {'wandb': None} # loggers dict if rank in [-1, 0]: opt.hyp = hyp # add hyperparameters - run_id = torch.load(weight, map_location='cpu')['wandb_id'] if weight.endswith('.pt') and os.path.isfile(weight) else None - wandb_logger = WandbLogger(opt, Path(opt.save_dir).stem, run_id, data_dict) + run_id = torch.load(weight, + map_location='cpu')['wandb_id'] if weight.endswith( + '.pt') and os.path.isfile(weight) else None + wandb_logger = WandbLogger(opt, + Path(opt.save_dir).stem, run_id, data_dict) loggers['wandb'] = wandb_logger.wandb data_dict = wandb_logger.data_dict if wandb_logger.wandb: - weight, epochs, hyp = opt.weight, opt.epochs, opt.hyp # WandbLogger might update weights, epochs if resuming + # WandbLogger might update weights, epochs if resuming + weight, epochs, hyp = opt.weight, opt.epochs, opt.hyp nc = 1 if opt.single_cls else int(data_dict['nc']) # number of classes - names = ['item'] if opt.single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names - assert len(names) == nc, '%g names found for nc=%g dataset in %s' % (len(names), nc, opt.data) # check + names = ['item'] if opt.single_cls and len( + data_dict['names']) != 1 else data_dict['names'] # class names + assert len( + names) == nc, '{:g} names found for nc={:g} dataset in {}'.format( + len(names), nc, opt.data) # check # Model pretrained = weight.endswith('.pt') @@ -89,11 +100,18 @@ def train(hyp, opt, device, tb_writer=None): ckpt = torch.load(weight, map_location='cpu') # load checkpoint state_dict = ckpt['model'] model = Model(opt.cfg, ch=3, nc=nc) # create - exclude = ['anchor'] if (opt.cfg or hyp.get('anchors')) and not opt.resume else [] # exclude keys - state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude) # intersect + exclude = [ + 'anchor' + ] if (opt.cfg or hyp.get('anchors')) and not opt.resume else [ + ] # exclude keys + state_dict = intersect_dicts(state_dict, + model.state_dict(), + exclude=exclude) # intersect model.load_state_dict(state_dict, strict=False) # load model.to(device) - logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weight)) # report + logger.info( + 'Transferred %g/%g items from %s' % + (len(state_dict), len(model.state_dict()), weight)) # report else: model = Model(opt.cfg, ch=3, nc=nc).to(device) # create with torch_distributed_zero_first(rank): @@ -102,7 +120,10 @@ def train(hyp, opt, device, tb_writer=None): test_path = data_dict['val'] # Freeze - freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))] # parameter names to freeze (full or partial) + freeze = [ + f'model.{x}.' + for x in (freeze if len(freeze) > 1 else range(freeze[0])) + ] # parameter names to freeze (full or partial) for k, v in model.named_parameters(): v.requires_grad = True # train all layers if any(x in k for x in freeze): @@ -117,7 +138,8 @@ def train(hyp, opt, device, tb_writer=None): # Optimizer nbs = 64 # nominal batch size - accumulate = max(round(nbs / total_batch_size), 1) # accumulate loss before optimizing + accumulate = max(round(nbs / total_batch_size), + 1) # accumulate loss before optimizing hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay logger.info(f"Scaled weight_decay = {hyp['weight_decay']}") @@ -132,19 +154,37 @@ def train(hyp, opt, device, tb_writer=None): pg1.append(v.weight) # apply decay if opt.adam: - optimizer = optim.AdamW(pg1, lr=hyp['lr0'], weight_decay=hyp['weight_decay'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum + optimizer = optim.AdamW(pg1, + lr=hyp['lr0'], + weight_decay=hyp['weight_decay'], + betas=(hyp['momentum'], + 0.999)) # adjust beta1 to momentum else: - optimizer = optim.SGD(pg1, lr=hyp['lr0'], weight_decay=hyp['weight_decay'], momentum=hyp['momentum'], nesterov=True) + optimizer = optim.SGD(pg1, + lr=hyp['lr0'], + weight_decay=hyp['weight_decay'], + momentum=hyp['momentum'], + nesterov=True) if len(pg0): - optimizer.add_param_group({'params': pg0, 'weight_decay': 0}) # add pg0 without weight_decay + optimizer.add_param_group({ + 'params': pg0, + 'weight_decay': 0 + }) # add pg0 without weight_decay if len(pg2): - optimizer.add_param_group({'params': pg2, 'weight_decay': 0}) # add pg2 (biases) - logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0))) + optimizer.add_param_group({ + 'params': pg2, + 'weight_decay': 0 + }) # add pg2 (biases) + logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % + (len(pg2), len(pg1), len(pg0))) del pg0, pg1, pg2 # Scheduler https://arxiv.org/pdf/1812.01187.pdf # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR - lf = lambda x: (1 - x / (epochs - 1)) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear + def get_linear_lr(x): + return (1 - x / (epochs - 1)) * (1.0 - hyp['lrf']) + hyp['lrf'] + + lf = get_linear_lr scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) plot_lr_scheduler(optimizer, scheduler, epochs, save_dir) @@ -167,23 +207,28 @@ def train(hyp, opt, device, tb_writer=None): # Results if ckpt.get('training_results') is not None: - results_file.write_text(ckpt['training_results']) # write results.txt + results_file.write_text( + ckpt['training_results']) # write results.txt # Epochs start_epoch = ckpt['epoch'] + 1 if opt.resume: - assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % (weight, epochs) + assert start_epoch > 0, '{} training to {:g} epochs is finished, nothing to resume.'.format( + weight, epochs) if epochs < start_epoch: - logger.info('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % - (weight, ckpt['epoch'], epochs)) + logger.info( + '%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' + % (weight, ckpt['epoch'], epochs)) epochs += ckpt['epoch'] # finetune additional epochs del ckpt # Image sizes gs = max(int(model.stride.max()), 32) # grid size (max stride) - nl = model.model_h2[-1].nl # number of detection layers (used for scaling hyp['obj']) - imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size] # verify imgsz are gs-multiples + nl = model.model_h2[ + -1].nl # number of detection layers (used for scaling hyp['obj']) + imgsz, imgsz_test = (check_img_size(x, gs) for x in opt.img_size + ) # verify imgsz are gs-multiples # DP mode if cuda and rank == -1 and torch.cuda.device_count() > 1: @@ -195,21 +240,43 @@ def train(hyp, opt, device, tb_writer=None): logger.info('Using SyncBatchNorm()') # Trainloader - dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt, - hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank, - world_size=opt.world_size, workers=opt.workers, - image_weights=opt.image_weights, quad=opt.quad, prefix=colorstr('train: ')) + dataloader, dataset = create_dataloader(train_path, + imgsz, + batch_size, + gs, + opt, + hyp=hyp, + augment=True, + cache=opt.cache_images, + rect=opt.rect, + rank=rank, + world_size=opt.world_size, + workers=opt.workers, + image_weights=opt.image_weights, + quad=opt.quad, + prefix=colorstr('train: ')) mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class nb = len(dataloader) # number of batches - assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % (mlc, nc, opt.data, nc - 1) + assert mlc < nc, 'Label class {:g} exceeds nc={:g} in {}. Possible class labels are 0-{:g}'.format( + mlc, nc, opt.data, nc - 1) # Process 0 if rank in [-1, 0]: test_batch_size = 1 - testloader = create_dataloader(test_path, imgsz_test, test_batch_size, gs, opt, # testloader - hyp=hyp, cache=opt.cache_images and not opt.notest, rect=True, rank=-1, - world_size=opt.world_size, workers=opt.workers, - pad=0.5, prefix=colorstr('val: '))[0] + testloader = create_dataloader( + test_path, + imgsz_test, + test_batch_size, + gs, + opt, # testloader + hyp=hyp, + cache=opt.cache_images and not opt.notest, + rect=True, + rank=-1, + world_size=opt.world_size, + workers=opt.workers, + pad=0.5, + prefix=colorstr('val: '))[0] if not opt.resume: labels = np.concatenate(dataset.labels, 0) @@ -222,32 +289,44 @@ def train(hyp, opt, device, tb_writer=None): # Anchors if not opt.noautoanchor: - check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) + check_anchors(dataset, + model=model, + thr=hyp['anchor_t'], + imgsz=imgsz) model.half().float() # pre-reduce anchor precision # DDP mode if cuda and rank != -1: - model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank, - # nn.MultiheadAttention incompatibility with DDP https://github.com/pytorch/pytorch/issues/26698 - find_unused_parameters=any(isinstance(layer, nn.MultiheadAttention) for layer in model.modules())) + model = DDP( + model, + device_ids=[opt.local_rank], + output_device=opt.local_rank, + # nn.MultiheadAttention incompatibility with DDP https://github.com/pytorch/pytorch/issues/26698 + find_unused_parameters=any( + isinstance(layer, nn.MultiheadAttention) + for layer in model.modules())) # Model parameters hyp['box'] *= 3. / nl # scale to layers hyp['cls'] *= nc / 80. * 3. / nl # scale to classes and layers - hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl # scale to image size and layers + hyp['obj'] *= (imgsz / 640)**2 * 3. / nl # scale to image size and layers hyp['label_smoothing'] = opt.label_smoothing model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.gr = 1.0 # iou loss ratio (obj_loss = 1.0 or iou) - model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc # attach class weights + model.class_weights = labels_to_class_weights( + dataset.labels, nc).to(device) * nc # attach class weights model.names = names # Start training t0 = time.time() - nw = max(round(hyp['warmup_epochs'] * nb), 1000) # number of warmup iterations, max(3 epochs, 1k iterations) - # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training + nw = max( + round(hyp['warmup_epochs'] * nb), + 1000) # number of warm up iterations, max(3 epochs, 1k iterations) + # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warm up to < 1/2 of training maps = np.zeros(nc) # mAP per class - results = (0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) + results = (0, 0, 0, 0, 0, 0, 0 + ) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls) scheduler.last_epoch = start_epoch - 1 # do not move scaler = amp.GradScaler(enabled=cuda) compute_loss_ota_dy = ComputeLossOTADy(model) @@ -256,7 +335,9 @@ def train(hyp, opt, device, tb_writer=None): f'Using {dataloader.num_workers} dataloader workers\n' f'Logging results to {save_dir}\n' f'Starting training for {epochs} epochs...') - for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ + for epoch in range( + start_epoch, epochs + ): # epoch ------------------------------------------------------------------ model.train() model.close_all_bn() @@ -264,12 +345,18 @@ def train(hyp, opt, device, tb_writer=None): if opt.image_weights: # Generate indices if rank in [-1, 0]: - cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc # class weights - iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights - dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx + cw = model.class_weights.cpu().numpy() * ( + 1 - maps)**2 / nc # class weights + iw = labels_to_image_weights(dataset.labels, + nc=nc, + class_weights=cw) # image weights + dataset.indices = random.choices( + range(dataset.n), weights=iw, + k=dataset.n) # rand weighted idx # Broadcast if DDP if rank != -1: - indices = (torch.tensor(dataset.indices) if rank == 0 else torch.zeros(dataset.n)).int() + indices = (torch.tensor(dataset.indices) + if rank == 0 else torch.zeros(dataset.n)).int() dist.broadcast(indices, 0) if rank != 0: dataset.indices = indices.cpu().numpy() @@ -282,37 +369,55 @@ def train(hyp, opt, device, tb_writer=None): if rank != -1: dataloader.sampler.set_epoch(epoch) pbar = enumerate(dataloader) - logger.info(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'diff', 'score.0', 'score.1', 'total', 'labels', 'img_size')) + logger.info( + ('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'diff', 'score.0', + 'score.1', 'total', 'labels', 'img_size')) if rank in [-1, 0]: pbar = tqdm(pbar, total=nb) # progress bar optimizer.zero_grad() - for i, (imgs, targets, paths, _) in pbar: # batch ------------------------------------------------------------- + for i, ( + imgs, targets, paths, _ + ) in pbar: # batch ------------------------------------------------------------- ni = i + nb * epoch # number integrated batches (since train start) - imgs = imgs.to(device, non_blocking=True).float() / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 + imgs = imgs.to(device, non_blocking=True).float( + ) / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 - # Warmup + # Warm up if ni <= nw: xi = [0, nw] # x interp # model.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) - accumulate = max(1, np.interp(ni, xi, [1, nbs / total_batch_size]).round()) + accumulate = max( + 1, + np.interp(ni, xi, [1, nbs / total_batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 - x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)]) + x['lr'] = np.interp(ni, xi, [ + hyp['warmup_bias_lr'] if j == 2 else 0.0, + x['initial_lr'] * lf(epoch) + ]) if 'momentum' in x: - x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) + x['momentum'] = np.interp( + ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) # Multi-scale if opt.multi_scale: - sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size + sz = random.randrange(imgsz * 0.5, + imgsz * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: - ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple) - imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) + ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:] + ] # new shape (stretched to gs-multiple) + imgs = F.interpolate(imgs, + size=ns, + mode='bilinear', + align_corners=False) # Forward with amp.autocast(enabled=cuda): pred = model(imgs) # forward - loss, loss_items = compute_loss_ota_dy(pred, targets.to(device), imgs) # loss scaled by batch_size + loss, loss_items = compute_loss_ota_dy( + pred, targets.to(device), + imgs) # loss scaled by batch_size if rank != -1: loss *= opt.world_size # gradient averaged between devices in DDP mode if opt.quad: @@ -331,25 +436,34 @@ def train(hyp, opt, device, tb_writer=None): # Print if rank in [-1, 0]: - mloss = (mloss * i + loss_items) / (i + 1) # update mean losses - mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB) - s = ('%10s' * 2 + '%10.4g' * 6) % ( - '%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1]) + mloss = (mloss * i + loss_items) / (i + 1 + ) # update mean losses + mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 + if torch.cuda.is_available() else 0) # (GB) + s = ('%10s' * 2 + '%10.4g' * 6) % ('{:g}/{:g}'.format( + epoch, + epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1]) pbar.set_description(s) # Plot if plots and ni < 10: f = save_dir / f'train_batch{ni}.jpg' # filename - Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start() + Thread(target=plot_images, + args=(imgs, targets, paths, f), + daemon=True).start() # if tb_writer: # tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch) # tb_writer.add_graph(torch.jit.trace(model, imgs, strict=False), []) # add model graph elif plots and ni == 10 and wandb_logger.wandb: - wandb_logger.log({"Mosaics": [wandb_logger.wandb.Image(str(x), caption=x.name) for x in - save_dir.glob('train*.jpg') if x.exists()]}) + wandb_logger.log({ + 'Mosaics': [ + wandb_logger.wandb.Image(str(x), caption=x.name) + for x in save_dir.glob('train*.jpg') if x.exists() + ] + }) - # end batch ------------------------------------------------------------------------------------------------ - # end epoch ---------------------------------------------------------------------------------------------------- + # end batch ---------------------------------------------------------------------------------------------- + # end epoch -------------------------------------------------------------------------------------------------- # Scheduler lr = [x['lr'] for x in optimizer.param_groups] # for tensorboard @@ -358,7 +472,11 @@ def train(hyp, opt, device, tb_writer=None): # DDP process 0 or single-GPU if rank in [-1, 0]: # mAP - ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride', 'class_weights']) + ema.update_attr(model, + include=[ + 'yaml', 'nc', 'hyp', 'gr', 'names', 'stride', + 'class_weights' + ]) final_epoch = epoch + 1 == epochs if not opt.notest or final_epoch: # Calculate mAP wandb_logger.current_epoch = epoch + 1 @@ -370,7 +488,8 @@ def train(hyp, opt, device, tb_writer=None): single_cls=opt.single_cls, dataloader=testloader, save_dir=save_dir, - verbose=nc < 50 and final_epoch, + verbose=nc < 50 + and final_epoch, plots=plots and final_epoch, wandb_logger=wandb_logger, compute_loss=compute_loss, @@ -379,15 +498,28 @@ def train(hyp, opt, device, tb_writer=None): # Write with open(results_file, 'a') as f: - f.write(s + '%10.4g' * 7 % results + '\n') # append metrics, val_loss + f.write(s + '%10.4g' * 7 % results + + '\n') # append metrics, val_loss if len(opt.name) and opt.bucket: - os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name)) + os.system('gsutil cp %s gs://%s/results/results%s.txt' % + (results_file, opt.bucket, opt.name)) # Log - tags = ['train/box_loss', 'train/obj_loss', 'train/cls_loss', # train loss - 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', - 'val/box_loss', 'val/obj_loss', 'val/cls_loss', # val loss - 'x/lr0', 'x/lr1', 'x/lr2'] # params + tags = [ + 'train/box_loss', + 'train/obj_loss', + 'train/cls_loss', # train loss + 'metrics/precision', + 'metrics/recall', + 'metrics/mAP_0.5', + 'metrics/mAP_0.5:0.95', + 'val/box_loss', + 'val/obj_loss', + 'val/cls_loss', # val loss + 'x/lr0', + 'x/lr1', + 'x/lr2' + ] # params for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags): if tb_writer: tb_writer.add_scalar(tag, x, epoch) # tensorboard @@ -395,52 +527,77 @@ def train(hyp, opt, device, tb_writer=None): wandb_logger.log({tag: x}) # W&B # Update best mAP - fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] + fi = fitness(np.array(results).reshape( + 1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95] if fi > best_fitness: best_fitness = fi wandb_logger.end_epoch(best_result=best_fitness == fi) # Save model if (not opt.nosave) or final_epoch: # if save - ckpt = {'epoch': epoch, - 'best_fitness': best_fitness, - 'training_results': results_file.read_text(), - 'model': get_state_dict(deepcopy(model.module if is_parallel(model) else model).half()), - 'ema': get_state_dict(deepcopy(ema.ema).half()), - 'updates': ema.updates, - 'optimizer': optimizer.state_dict(), - 'wandb_id': wandb_logger.wandb_run.id if wandb_logger.wandb else None} + ckpt = { + 'epoch': + epoch, + 'best_fitness': + best_fitness, + 'training_results': + results_file.read_text(), + 'model': + get_state_dict( + deepcopy(model.module if is_parallel(model) else model + ).half()), + 'ema': + get_state_dict(deepcopy(ema.ema).half()), + 'updates': + ema.updates, + 'optimizer': + optimizer.state_dict(), + 'wandb_id': + wandb_logger.wandb_run.id if wandb_logger.wandb else None + } # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) if (best_fitness == fi) and (epoch >= 200): - torch.save(ckpt, wdir / 'best_{:03d}.pt'.format(epoch)) + torch.save(ckpt, wdir / f'best_{epoch:03d}.pt') if epoch == 0: - torch.save(ckpt, wdir / 'epoch_{:03d}.pt'.format(epoch)) - elif ((epoch+1) % 25) == 0: - torch.save(ckpt, wdir / 'epoch_{:03d}.pt'.format(epoch)) - elif epoch >= (epochs-5): - torch.save(ckpt, wdir / 'epoch_{:03d}.pt'.format(epoch)) + torch.save(ckpt, wdir / f'epoch_{epoch:03d}.pt') + elif ((epoch + 1) % 25) == 0: + torch.save(ckpt, wdir / f'epoch_{epoch:03d}.pt') + elif epoch >= (epochs - 5): + torch.save(ckpt, wdir / f'epoch_{epoch:03d}.pt') if wandb_logger.wandb: - if ((epoch + 1) % opt.save_period == 0 and not final_epoch) and opt.save_period != -1: - wandb_logger.log_model( - last.parent, opt, epoch, fi, best_model=best_fitness == fi) + if ((epoch + 1) % opt.save_period == 0 + and not final_epoch) and opt.save_period != -1: + wandb_logger.log_model(last.parent, + opt, + epoch, + fi, + best_model=best_fitness == fi) del ckpt - # end epoch ---------------------------------------------------------------------------------------------------- + # end epoch -------------------------------------------------------------------------------------------------- # end training if rank in [-1, 0]: # Plots if plots: plot_results(save_dir=save_dir) # save as results.png if wandb_logger.wandb: - files = ['results.png', 'confusion_matrix.png', *[f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R')]] - wandb_logger.log({"Results": [wandb_logger.wandb.Image(str(save_dir / f), caption=f) for f in files - if (save_dir / f).exists()]}) + files = [ + 'results.png', 'confusion_matrix.png', + *[f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R')] + ] + wandb_logger.log({ + 'Results': [ + wandb_logger.wandb.Image(str(save_dir / f), caption=f) + for f in files if (save_dir / f).exists() + ] + }) # Test best.pt - logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) + logger.info('%g epochs completed in %.3f hours.\n' % + (epoch - start_epoch + 1, (time.time() - t0) / 3600)) # Strip optimizers final = best if best.exists() else last # final model for f in last, best: @@ -449,9 +606,11 @@ def train(hyp, opt, device, tb_writer=None): if opt.bucket: os.system(f'gsutil cp {final} gs://{opt.bucket}/weights') # upload if wandb_logger.wandb: # Log the stripped model - wandb_logger.wandb.log_artifact(str(final), type='model', - name='run_' + wandb_logger.wandb_run.id + '_model', - aliases=['last', 'best', 'stripped']) + wandb_logger.wandb.log_artifact( + str(final), + type='model', + name='run_' + wandb_logger.wandb_run.id + '_model', + aliases=['last', 'best', 'stripped']) wandb_logger.finish_run() else: dist.destroy_process_group() @@ -462,64 +621,147 @@ def train(hyp, opt, device, tb_writer=None): if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--cfg', type=str, default='', help='model.yaml path') - parser.add_argument('--weight', type=str, default='', help='initial weights path') - parser.add_argument('--data', type=str, default='data/coco.yaml', help='data.yaml path') - parser.add_argument('--hyp', type=str, default='hyp/hyp.scratch.p5.yaml', help='hyperparameters path') + parser.add_argument('--weight', + type=str, + default='', + help='initial weights path') + parser.add_argument('--data', + type=str, + default='data/coco.yaml', + help='data.yaml path') + parser.add_argument('--hyp', + type=str, + default='hyp/hyp.scratch.p5.yaml', + help='hyperparameters path') parser.add_argument('--epochs', type=int, default=2) - parser.add_argument('--batch-size', type=int, default=1, help='total batch size for all GPUs') - parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='[train, test] image sizes') - parser.add_argument('--rect', action='store_true', help='rectangular training') - parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training') - parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') - parser.add_argument('--notest', action='store_true', help='only test final epoch') - parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check') + parser.add_argument('--batch-size', + type=int, + default=1, + help='total batch size for all GPUs') + parser.add_argument('--img-size', + nargs='+', + type=int, + default=[640, 640], + help='[train, test] image sizes') + parser.add_argument('--rect', + action='store_true', + help='rectangular training') + parser.add_argument('--resume', + nargs='?', + const=True, + default=False, + help='resume most recent training') + parser.add_argument('--nosave', + action='store_true', + help='only save final checkpoint') + parser.add_argument('--notest', + action='store_true', + help='only test final epoch') + parser.add_argument('--noautoanchor', + action='store_true', + help='disable autoanchor check') parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') - parser.add_argument('--cache-images', action='store_true', help='cache images for faster training') - parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training') - parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') - parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%') - parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class') - parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer') - parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode') - parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify') - parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers') - parser.add_argument('--project', default='runs/train', help='save to project/name') + parser.add_argument('--cache-images', + action='store_true', + help='cache images for faster training') + parser.add_argument('--image-weights', + action='store_true', + help='use weighted image selection for training') + parser.add_argument('--device', + default='', + help='cuda device, i.e. 0 or 0,1,2,3 or cpu') + parser.add_argument('--multi-scale', + action='store_true', + help='vary img-size +/- 50%%') + parser.add_argument('--single-cls', + action='store_true', + help='train multi-class data as single-class') + parser.add_argument('--adam', + action='store_true', + help='use torch.optim.Adam() optimizer') + parser.add_argument('--sync-bn', + action='store_true', + help='use SyncBatchNorm, only available in DDP mode') + parser.add_argument('--local_rank', + type=int, + default=-1, + help='DDP parameter, do not modify') + parser.add_argument('--workers', + type=int, + default=8, + help='maximum number of dataloader workers') + parser.add_argument('--project', + default='runs/train', + help='save to project/name') parser.add_argument('--entity', default=None, help='W&B entity') parser.add_argument('--name', default='exp', help='save to project/name') - parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') + parser.add_argument('--exist-ok', + action='store_true', + help='existing project/name ok, do not increment') parser.add_argument('--quad', action='store_true', help='quad dataloader') - parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon') - parser.add_argument('--upload_dataset', action='store_true', help='Upload dataset as W&B artifact table') - parser.add_argument('--bbox_interval', type=int, default=-1, help='Set bounding-box image logging interval for W&B') - parser.add_argument('--save_period', type=int, default=-1, help='Log model after every "save_period" epoch') - parser.add_argument('--artifact_alias', type=str, default="latest", help='version of dataset artifact to be used') - parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone of yolov7=50, first3=0 1 2') - parser.add_argument('--v5-metric', action='store_true', help='assume maximum recall as 1.0 in AP calculation') + parser.add_argument('--label-smoothing', + type=float, + default=0.0, + help='Label smoothing epsilon') + parser.add_argument('--upload_dataset', + action='store_true', + help='Upload dataset as W&B artifact table') + parser.add_argument('--bbox_interval', + type=int, + default=-1, + help='Set bounding-box image logging interval for W&B') + parser.add_argument('--save_period', + type=int, + default=-1, + help='Log model after every "save_period" epoch') + parser.add_argument('--artifact_alias', + type=str, + default='latest', + help='version of dataset artifact to be used') + parser.add_argument( + '--freeze', + nargs='+', + type=int, + default=[0], + help='Freeze layers: backbone of yolov7=50, first3=0 1 2') + parser.add_argument('--v5-metric', + action='store_true', + help='assume maximum recall as 1.0 in AP calculation') opt = parser.parse_args() # Set DDP variables - opt.world_size = int(os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1 + opt.world_size = int( + os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1 opt.global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else -1 set_logging(opt.global_rank) # Resume wandb_run = check_wandb_resume(opt) if opt.resume and not wandb_run: # resume an interrupted run - ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run() # specified or most recent path - assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist' + ckpt = opt.resume if isinstance( + opt.resume, + str) else get_latest_run() # specified or most recent path + assert os.path.isfile( + ckpt), 'ERROR: --resume checkpoint does not exist' apriori = opt.global_rank, opt.local_rank with open(Path(ckpt).parent.parent / 'opt.yaml') as f: - opt = argparse.Namespace(**yaml.load(f, Loader=yaml.SafeLoader)) # replace - opt.cfg, opt.weight, opt.resume = os.path.relpath(Path(ckpt).parent.parent / 'cfg.yaml'), ckpt, True + opt = argparse.Namespace(**yaml.load( + f, Loader=yaml.SafeLoader)) # replace + opt.cfg, opt.weight, opt.resume = os.path.relpath( + Path(ckpt).parent.parent / 'cfg.yaml'), ckpt, True opt.batch_size, opt.global_rank, opt.local_rank = opt.total_batch_size, *apriori # reinstate opt.save_dir = os.path.relpath(Path(ckpt).parent.parent) logger.info('Resuming training from %s' % ckpt) else: # opt.hyp = opt.hyp or ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml') - opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file(opt.cfg), check_file(opt.hyp) # check files + opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file( + opt.cfg), check_file(opt.hyp) # check files assert len(opt.cfg), 'cfg must be specified' - opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size))) # extend to 2 sizes (train, test) - opt.save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok) # increment run + opt.img_size.extend( + [opt.img_size[-1]] * + (2 - len(opt.img_size))) # extend to 2 sizes (train, test) + opt.save_dir = increment_path(Path(opt.project) / opt.name, + exist_ok=opt.exist_ok) # increment run # DDP mode opt.total_batch_size = opt.batch_size @@ -530,7 +772,8 @@ def train(hyp, opt, device, tb_writer=None): assert torch.cuda.device_count() > opt.local_rank torch.cuda.set_device(opt.local_rank) device = torch.device('cuda', opt.local_rank) - dist.init_process_group(backend='nccl', init_method='env://') # distributed backend + dist.init_process_group(backend='nccl', + init_method='env://') # distributed backend assert opt.batch_size % opt.world_size == 0, '--batch-size must be multiple of CUDA device count' opt.batch_size = opt.total_batch_size // opt.world_size @@ -543,6 +786,8 @@ def train(hyp, opt, device, tb_writer=None): tb_writer = None # init loggers if opt.global_rank in [-1, 0]: prefix = colorstr('tensorboard: ') - logger.info(f"{prefix}Start with 'tensorboard --logdir {opt.project}', view at http://localhost:6006/") + logger.info( + f"{prefix}Start with 'tensorboard --logdir {opt.project}', view at http://localhost:6006/" + ) tb_writer = SummaryWriter(opt.save_dir) # Tensorboard train(hyp, opt, device, tb_writer) diff --git a/utils/__init__.py b/utils/__init__.py index 84952a8..a6131c1 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -1 +1 @@ -# init \ No newline at end of file +# init diff --git a/utils/autoanchor.py b/utils/autoanchor.py index 18a6049..53decdd 100644 --- a/utils/autoanchor.py +++ b/utils/autoanchor.py @@ -24,10 +24,15 @@ def check_anchors(dataset, model, thr=4.0, imgsz=640): # Check anchor fit to data, recompute if necessary prefix = colorstr('autoanchor: ') print(f'\n{prefix}Analyzing anchors... ', end='') - m = model.module.model_h2[-1] if hasattr(model, 'module') else model.model_h2[-1] # Detect() + m = model.module.model_h2[-1] if hasattr( + model, 'module') else model.model_h2[-1] # Detect() shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True) - scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale - wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh + scale = np.random.uniform(0.9, 1.1, + size=(shapes.shape[0], 1)) # augment scale + wh = torch.tensor( + np.concatenate([ + l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels) + ])).float() # wh def metric(k): # compute metric r = wh[:, None] / k[None] @@ -39,42 +44,61 @@ def metric(k): # compute metric anchors = m.anchor_grid.clone().cpu().view(-1, 2) # current anchors bpr, aat = metric(anchors) - print(f'anchors/target = {aat:.2f}, Best Possible Recall (BPR) = {bpr:.4f}', end='') + print( + f'anchors/target = {aat:.2f}, Best Possible Recall (BPR) = {bpr:.4f}', + end='') if bpr < 0.98: # threshold to recompute print('. Attempting to improve anchors, please wait...') na = m.anchor_grid.numel() // 2 # number of anchors try: - anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False) + anchors = kmean_anchors(dataset, + n=na, + img_size=imgsz, + thr=thr, + gen=1000, + verbose=False) except Exception as e: print(f'{prefix}ERROR: {e}') new_bpr = metric(anchors)[0] if new_bpr > bpr: # replace anchors - anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors) - m.anchor_grid[:] = anchors.clone().view_as(m.anchor_grid) # for inference + anchors = torch.tensor(anchors, + device=m.anchors.device).type_as(m.anchors) + m.anchor_grid[:] = anchors.clone().view_as( + m.anchor_grid) # for inference check_anchor_order(m) - m.anchors[:] = anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1) # loss - print(f'{prefix}New anchors saved to model. Update model *.yaml to use these anchors in the future.') + m.anchors[:] = anchors.clone().view_as(m.anchors) / m.stride.to( + m.anchors.device).view(-1, 1, 1) # loss + print( + f'{prefix}New anchors saved to model. Update model *.yaml to use these anchors in the future.' + ) else: - print(f'{prefix}Original anchors better than new anchors. Proceeding with original anchors.') + print( + f'{prefix}Original anchors better than new anchors. Proceeding with original anchors.' + ) print('') # newline -def kmean_anchors(path='./data/coco.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True): - """ Creates kmeans-evolved anchors from training dataset +def kmean_anchors(path='./data/coco.yaml', + n=9, + img_size=640, + thr=4.0, + gen=1000, + verbose=True): + """Creates kmeans-evolved anchors from training dataset. - Arguments: - path: path to dataset *.yaml, or a loaded dataset - n: number of anchors - img_size: image size used for training - thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0 - gen: generations to evolve anchors using genetic algorithm - verbose: print all results + Arguments: + path: path to dataset *.yaml, or a loaded dataset + n: number of anchors + img_size: image size used for training + thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0 + gen: generations to evolve anchors using genetic algorithm + verbose: print all results - Return: - k: kmeans evolved anchors + Return: + k: kmeans evolved anchors - Usage: - from utils.autoanchor import *; _ = kmean_anchors() + Usage: + from utils.autoanchor import *; _ = kmean_anchors() """ thr = 1. / thr prefix = colorstr('autoanchor: ') @@ -92,30 +116,41 @@ def anchor_fitness(k): # mutation fitness def print_results(k): k = k[np.argsort(k.prod(1))] # sort small to large x, best = metric(k, wh0) - bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr - print(f'{prefix}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr') - print(f'{prefix}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, ' - f'past_thr={x[x > thr].mean():.3f}-mean: ', end='') + bpr, aat = (best > thr).float().mean(), ( + x > thr).float().mean() * n # best possible recall, anch > thr + print( + f'{prefix}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr' + ) + print( + f'{prefix}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, ' + f'past_thr={x[x > thr].mean():.3f}-mean: ', + end='') for i, x in enumerate(k): - print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg + print('%i,%i' % (round(x[0]), round(x[1])), + end=', ' if i < len(k) - 1 else '\n') # use in *.cfg return k if isinstance(path, str): # *.yaml file with open(path) as f: data_dict = yaml.load(f, Loader=yaml.SafeLoader) # model dict from utils.datasets import LoadImagesAndLabels - dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True) + dataset = LoadImagesAndLabels(data_dict['train'], + augment=True, + rect=True) else: dataset = path # dataset # Get label wh shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True) - wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh + wh0 = np.concatenate( + [l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh # Filter i = (wh0 < 3.0).any(1).sum() if i: - print(f'{prefix}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.') + print( + f'{prefix}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.' + ) wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels # wh = wh * (np.random.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1 @@ -123,7 +158,9 @@ def print_results(k): print(f'{prefix}Running kmeans for {n} anchors on {len(wh)} points...') s = wh.std(0) # sigmas for whitening k, dist = kmeans(wh / s, n, iter=30) # points, mean distance - assert len(k) == n, print(f'{prefix}ERROR: scipy.cluster.vq.kmeans requested {n} points but returned only {len(k)}') + assert len(k) == n, print( + f'{prefix}ERROR: scipy.cluster.vq.kmeans requested {n} points but returned only {len(k)}' + ) k *= s wh = torch.tensor(wh, dtype=torch.float32) # filtered wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered @@ -143,12 +180,17 @@ def print_results(k): # Evolve npr = np.random - f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma - pbar = tqdm(range(gen), desc=f'{prefix}Evolving anchors with Genetic Algorithm:') # progress bar + f, sh, mp, s = anchor_fitness( + k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma + pbar = tqdm(range(gen), + desc=f'{prefix}Evolving anchors with Genetic Algorithm:' + ) # progress bar for _ in pbar: v = np.ones(sh) - while (v == 1).all(): # mutate until a change occurs (prevent duplicates) - v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0) + while (v == 1 + ).all(): # mutate until a change occurs (prevent duplicates) + v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + + 1).clip(0.3, 3.0) kg = (k.copy() * v).clip(min=2.0) fg = anchor_fitness(kg) if fg > f: diff --git a/utils/checkpoint.py b/utils/checkpoint.py index 2b7fd82..b054659 100644 --- a/utils/checkpoint.py +++ b/utils/checkpoint.py @@ -1,7 +1,9 @@ from collections import OrderedDict + def weights_to_cpu(state_dict): """Copy a model state_dict to cpu. + Args: state_dict (OrderedDict): Model weights on GPU. Returns: diff --git a/utils/datasets.py b/utils/datasets.py index 940e668..798bfed 100644 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -16,24 +16,23 @@ import numpy as np import torch import torch.nn.functional as F -from PIL import Image, ExifTags +from PIL import ExifTags, Image from torch.utils.data import Dataset +# from pycocotools import mask as maskUtils from tqdm import tqdm -import pickle -from copy import deepcopy -#from pycocotools import mask as maskUtils -from torchvision.utils import save_image -from torchvision.ops import roi_pool, roi_align, ps_roi_pool, ps_roi_align - -from utils.general import check_requirements, xyxy2xywh, xywh2xyxy, xywhn2xyxy, xyn2xy, segment2box, segments2boxes, \ - resample_segments, clean_str +from utils.general import (check_requirements, clean_str, resample_segments, + segment2box, segments2boxes, xyn2xy, xywh2xyxy, + xywhn2xyxy, xyxy2xywh) from utils.torch_utils import torch_distributed_zero_first # Parameters help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data' -img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp', 'mpo'] # acceptable image suffixes -vid_formats = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv'] # acceptable video suffixes +img_formats = [ + 'bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp', 'mpo' +] # acceptable image suffixes +vid_formats = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', + 'mkv'] # acceptable video suffixes logger = logging.getLogger(__name__) # Get orientation exif tag @@ -56,30 +55,51 @@ def exif_size(img): s = (s[1], s[0]) elif rotation == 8: # rotation 90 s = (s[1], s[0]) - except: + except Exception: pass return s -def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False, - rank=-1, world_size=1, workers=8, image_weights=False, quad=False, prefix=''): +def create_dataloader(path, + imgsz, + batch_size, + stride, + opt, + hyp=None, + augment=False, + cache=False, + pad=0.0, + rect=False, + rank=-1, + world_size=1, + workers=8, + image_weights=False, + quad=False, + prefix=''): # Make sure only the first process in DDP process the dataset first, and the following others can use the cache with torch_distributed_zero_first(rank): - dataset = LoadImagesAndLabels(path, imgsz, batch_size, - augment=augment, # augment images - hyp=hyp, # augmentation hyperparameters - rect=rect, # rectangular training - cache_images=cache, - single_cls=opt.single_cls, - stride=int(stride), - pad=pad, - image_weights=image_weights, - prefix=prefix) + dataset = LoadImagesAndLabels( + path, + imgsz, + batch_size, + augment=augment, # augment images + hyp=hyp, # augmentation hyperparameters + rect=rect, # rectangular training + cache_images=cache, + single_cls=opt.single_cls, + stride=int(stride), + pad=pad, + image_weights=image_weights, + prefix=prefix) batch_size = min(batch_size, len(dataset)) - nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers]) # number of workers - sampler = torch.utils.data.distributed.DistributedSampler(dataset) if rank != -1 else None + nw = min([ + os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, + workers + ]) # number of workers + sampler = torch.utils.data.distributed.DistributedSampler( + dataset) if rank != -1 else None loader = torch.utils.data.DataLoader if image_weights else InfiniteDataLoader # Use torch.utils.data.DataLoader() if dataset.properties will update during training else InfiniteDataLoader() dataloader = loader(dataset, @@ -87,19 +107,21 @@ def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=Fa num_workers=nw, sampler=sampler, pin_memory=True, - collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn) + collate_fn=LoadImagesAndLabels.collate_fn4 + if quad else LoadImagesAndLabels.collate_fn) return dataloader, dataset class InfiniteDataLoader(torch.utils.data.dataloader.DataLoader): - """ Dataloader that reuses workers + """Dataloader that reuses workers. Uses same syntax as vanilla DataLoader """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler)) + object.__setattr__(self, 'batch_sampler', + _RepeatSampler(self.batch_sampler)) self.iterator = super().__iter__() def __len__(self): @@ -110,8 +132,8 @@ def __iter__(self): yield next(self.iterator) -class _RepeatSampler(object): - """ Sampler that repeats forever +class _RepeatSampler: + """Sampler that repeats forever. Args: sampler (Sampler) @@ -126,6 +148,7 @@ def __iter__(self): class LoadImages: # for inference + def __init__(self, path, img_size=640, stride=32): p = str(Path(path).absolute()) # os-agnostic absolute path if '*' in p: @@ -178,14 +201,16 @@ def __next__(self): ret_val, img0 = self.cap.read() self.frame += 1 - print(f'video {self.count + 1}/{self.nf} ({self.frame}/{self.nframes}) {path}: ', end='') + print( + f'video {self.count + 1}/{self.nf} ({self.frame}/{self.nframes}) {path}: ', + end='') else: # Read image self.count += 1 img0 = cv2.imread(path) # BGR assert img0 is not None, 'Image Not Found ' + path - #print(f'image {self.count}/{self.nf} {path}: ', end='') + # print(f'image {self.count}/{self.nf} {path}: ', end='') # Padded resize img = letterbox(img0, self.img_size, stride=self.stride)[0] @@ -206,6 +231,7 @@ def __len__(self): class LoadWebcam: # for inference + def __init__(self, pipe='0', img_size=640, stride=32): self.img_size = img_size self.stride = stride @@ -264,28 +290,34 @@ def __len__(self): class LoadStreams: # multiple IP or RTSP cameras + def __init__(self, sources='streams.txt', img_size=640, stride=32): self.mode = 'stream' self.img_size = img_size self.stride = stride if os.path.isfile(sources): - with open(sources, 'r') as f: - sources = [x.strip() for x in f.read().strip().splitlines() if len(x.strip())] + with open(sources) as f: + sources = [ + x.strip() for x in f.read().strip().splitlines() + if len(x.strip()) + ] else: sources = [sources] n = len(sources) self.imgs = [None] * n - self.sources = [clean_str(x) for x in sources] # clean source names for later + self.sources = [clean_str(x) + for x in sources] # clean source names for later for i, s in enumerate(sources): # Start the thread to read frames from the video stream print(f'{i + 1}/{n}: {s}... ', end='') url = eval(s) if s.isnumeric() else s - if 'youtube.com/' in str(url) or 'youtu.be/' in str(url): # if source is YouTube video + if 'youtube.com/' in str(url) or 'youtu.be/' in str( + url): # if source is YouTube video check_requirements(('pafy', 'youtube_dl')) import pafy - url = pafy.new(url).getbest(preftype="mp4").url + url = pafy.new(url).getbest(preftype='mp4').url cap = cv2.VideoCapture(url) assert cap.isOpened(), f'Failed to open {s}' w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) @@ -299,10 +331,16 @@ def __init__(self, sources='streams.txt', img_size=640, stride=32): print('') # newline # check for common shapes - s = np.stack([letterbox(x, self.img_size, stride=self.stride)[0].shape for x in self.imgs], 0) # shapes - self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal + s = np.stack([ + letterbox(x, self.img_size, stride=self.stride)[0].shape + for x in self.imgs + ], 0) # shapes + self.rect = np.unique( + s, axis=0).shape[0] == 1 # rect inference if all shapes equal if not self.rect: - print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.') + print( + 'WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.' + ) def update(self, index, cap): # Read next stream frame in a daemon thread @@ -329,13 +367,17 @@ def __next__(self): raise StopIteration # Letterbox - img = [letterbox(x, self.img_size, auto=self.rect, stride=self.stride)[0] for x in img0] + img = [ + letterbox(x, self.img_size, auto=self.rect, stride=self.stride)[0] + for x in img0 + ] # Stack img = np.stack(img, 0) # Convert - img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416 + img = img[:, :, :, ::-1].transpose(0, 3, 1, + 2) # BGR to RGB, to bsx3x416x416 img = np.ascontiguousarray(img) return self.sources, img, img0, None @@ -347,12 +389,27 @@ def __len__(self): def img2label_paths(img_paths): # Define label paths as a function of image paths sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings - return ['txt'.join(x.replace(sa, sb, 1).rsplit(x.split('.')[-1], 1)) for x in img_paths] + return [ + 'txt'.join(x.replace(sa, sb, 1).rsplit(x.split('.')[-1], 1)) + for x in img_paths + ] class LoadImagesAndLabels(Dataset): # for training/testing - def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, - cache_images=False, single_cls=False, stride=32, pad=0.0, prefix=''): + + def __init__(self, + path, + img_size=640, + batch_size=16, + augment=False, + hyp=None, + rect=False, + image_weights=False, + cache_images=False, + single_cls=False, + stride=32, + pad=0.0, + prefix=''): self.img_size = img_size self.augment = augment self.hyp = hyp @@ -362,7 +419,7 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r self.mosaic_border = [-img_size // 2, -img_size // 2] self.stride = stride self.path = path - #self.albumentations = Albumentations() if augment else None + # self.albumentations = Albumentations() if augment else None try: f = [] # image files @@ -372,35 +429,48 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r f += glob.glob(str(p / '**' / '*.*'), recursive=True) # f = list(p.rglob('**/*.*')) # pathlib elif p.is_file(): # file - with open(p, 'r') as t: + with open(p) as t: t = t.read().strip().splitlines() parent = str(p.parent) + os.sep - f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path + f += [ + x.replace('./', parent) + if x.startswith('./') else x for x in t + ] # local to global path # f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib) else: raise Exception(f'{prefix}{p} does not exist') - self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in img_formats]) + self.img_files = sorted([ + x.replace('/', os.sep) for x in f + if x.split('.')[-1].lower() in img_formats + ]) # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in img_formats]) # pathlib assert self.img_files, f'{prefix}No images found' except Exception as e: - raise Exception(f'{prefix}Error loading data from {path}: {e}\nSee {help_url}') + raise Exception( + f'{prefix}Error loading data from {path}: {e}\nSee {help_url}') # Check cache self.label_files = img2label_paths(self.img_files) # labels - cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') # cached labels + cache_path = (p if p.is_file() else Path( + self.label_files[0]).parent).with_suffix('.cache') # cached labels if cache_path.is_file(): cache, exists = torch.load(cache_path), True # load - #if cache['hash'] != get_hash(self.label_files + self.img_files) or 'version' not in cache: # changed - # cache, exists = self.cache_labels(cache_path, prefix), False # re-cache + # if cache['hash'] != get_hash(self.label_files + self.img_files) or 'version' not in cache: # changed + # cache, exists = self.cache_labels(cache_path, prefix), False # re-cache else: - cache, exists = self.cache_labels(cache_path, prefix), False # cache + cache, exists = self.cache_labels(cache_path, + prefix), False # cache # Display cache - nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupted, total + nf, nm, ne, nc, n = cache.pop( + 'results') # found, missing, empty, corrupted, total if exists: d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted" - tqdm(None, desc=prefix + d, total=n, initial=n) # display cache results - assert nf > 0 or not augment, f'{prefix}No labels in {cache_path}. Can not train without labels. See {help_url}' + tqdm(None, desc=prefix + d, total=n, + initial=n) # display cache results + assert nf > 0 or not augment, ( + f'{prefix}No labels in {cache_path}. Can not train without labels. See {help_url}' + ) # Read cache cache.pop('hash') # remove hash @@ -443,18 +513,25 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r elif mini > 1: shapes[i] = [1, 1 / mini] - self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride + self.batch_shapes = np.ceil( + np.array(shapes) * img_size / stride + pad).astype( + np.int) * stride # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM) self.imgs = [None] * n if cache_images: if cache_images == 'disk': - self.im_cache_dir = Path(Path(self.img_files[0]).parent.as_posix() + '_npy') - self.img_npy = [self.im_cache_dir / Path(f).with_suffix('.npy').name for f in self.img_files] + self.im_cache_dir = Path( + Path(self.img_files[0]).parent.as_posix() + '_npy') + self.img_npy = [ + self.im_cache_dir / Path(f).with_suffix('.npy').name + for f in self.img_files + ] self.im_cache_dir.mkdir(parents=True, exist_ok=True) gb = 0 # Gigabytes of cached images self.img_hw0, self.img_hw = [None] * n, [None] * n - results = ThreadPool(8).imap(lambda x: load_image(*x), zip(repeat(self), range(n))) + results = ThreadPool(8).imap(lambda x: load_image(*x), + zip(repeat(self), range(n))) pbar = tqdm(enumerate(results), total=n) for i, x in pbar: if cache_images == 'disk': @@ -471,7 +548,9 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''): # Cache dataset labels, check images and read shapes x = {} # dict nm, nf, ne, nc = 0, 0, 0, 0 # number missing, found, empty, duplicate - pbar = tqdm(zip(self.img_files, self.label_files), desc='Scanning images', total=len(self.img_files)) + pbar = tqdm(zip(self.img_files, self.label_files), + desc='Scanning images', + total=len(self.img_files)) for i, (im_file, lb_file) in enumerate(pbar): try: # verify images @@ -479,41 +558,59 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''): im.verify() # PIL verify shape = exif_size(im) # image size segments = [] # instance segments - assert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels' - assert im.format.lower() in img_formats, f'invalid image format {im.format}' + assert (shape[0] > 9) & (shape[1] > + 9), f'image size {shape} <10 pixels' + assert im.format.lower( + ) in img_formats, f'invalid image format {im.format}' # verify labels if os.path.isfile(lb_file): nf += 1 # label found - with open(lb_file, 'r') as f: - l = [x.split() for x in f.read().strip().splitlines()] - if any([len(x) > 8 for x in l]): # is segment - classes = np.array([x[0] for x in l], dtype=np.float32) - segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in l] # (cls, xy1...) - l = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh) - l = np.array(l, dtype=np.float32) - if len(l): - assert l.shape[1] == 5, 'labels require 5 columns each' - assert (l >= 0).all(), 'negative labels' - assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels' - assert np.unique(l, axis=0).shape[0] == l.shape[0], 'duplicate labels' + with open(lb_file) as f: + cur_l = [ + x.split() for x in f.read().strip().splitlines() + ] + if any([len(x) > 8 for x in cur_l]): # is segment + classes = np.array([x[0] for x in cur_l], + dtype=np.float32) + segments = [ + np.array(x[1:], + dtype=np.float32).reshape(-1, 2) + for x in cur_l + ] # (cls, xy1...) + cur_l = np.concatenate((classes.reshape( + -1, 1), segments2boxes(segments)), + 1) # (cls, xywh) + cur_l = np.array(cur_l, dtype=np.float32) + if len(cur_l): + assert cur_l.shape[ + 1] == 5, 'labels require 5 columns each' + assert (cur_l >= 0).all(), 'negative labels' + assert (cur_l[:, 1:] <= 1).all( + ), 'non-normalized or out of bounds coordinate labels' + assert np.unique( + cur_l, axis=0 + ).shape[0] == cur_l.shape[0], 'duplicate labels' else: ne += 1 # label empty - l = np.zeros((0, 5), dtype=np.float32) + cur_l = np.zeros((0, 5), dtype=np.float32) else: nm += 1 # label missing - l = np.zeros((0, 5), dtype=np.float32) - x[im_file] = [l, shape, segments] + cur_l = np.zeros((0, 5), dtype=np.float32) + x[im_file] = [cur_l, shape, segments] except Exception as e: nc += 1 - print(f'{prefix}WARNING: Ignoring corrupted image and/or label {im_file}: {e}') + print( + f'{prefix}WARNING: Ignoring corrupted image and/or label {im_file}: {e}' + ) pbar.desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels... " \ - f"{nf} found, {nm} missing, {ne} empty, {nc} corrupted" + f'{nf} found, {nm} missing, {ne} empty, {nc} corrupted' pbar.close() if nf == 0: - print(f'{prefix}WARNING: No labels found in {path}. See {help_url}') + print( + f'{prefix}WARNING: No labels found in {path}. See {help_url}') x['hash'] = get_hash(self.label_files + self.img_files) x['results'] = nf, nm, ne, nc, i + 1 @@ -547,9 +644,13 @@ def __getitem__(self, index): # MixUp https://arxiv.org/pdf/1710.09412.pdf if random.random() < hyp['mixup']: if random.random() < 0.8: - img2, labels2 = load_mosaic(self, random.randint(0, len(self.labels) - 1)) + img2, labels2 = load_mosaic( + self, random.randint(0, + len(self.labels) - 1)) else: - img2, labels2 = load_mosaic9(self, random.randint(0, len(self.labels) - 1)) + img2, labels2 = load_mosaic9( + self, random.randint(0, + len(self.labels) - 1)) r = np.random.beta(8.0, 8.0) # mixup ratio, alpha=beta=8.0 img = (img * r + img2 * (1 - r)).astype(np.uint8) labels = np.concatenate((labels, labels2), 0) @@ -559,29 +660,42 @@ def __getitem__(self, index): img, (h0, w0), (h, w) = load_image(self, index) # Letterbox - shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape - img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment) - shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling + shape = self.batch_shapes[self.batch[ + index]] if self.rect else self.img_size # final letterboxed shape + img, ratio, pad = letterbox(img, + shape, + auto=False, + scaleup=self.augment) + shapes = (h0, w0), ( + (h / h0, w / w0), pad) # for COCO mAP rescaling labels = self.labels[index].copy() if labels.size: # normalized xywh to pixel xyxy format - labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1]) + labels[:, 1:] = xywhn2xyxy(labels[:, 1:], + ratio[0] * w, + ratio[1] * h, + padw=pad[0], + padh=pad[1]) if self.augment: # Augment imagespace if not mosaic: - img, labels = random_perspective(img, labels, - degrees=hyp['degrees'], - translate=hyp['translate'], - scale=hyp['scale'], - shear=hyp['shear'], - perspective=hyp['perspective']) - + img, labels = random_perspective( + img, + labels, + degrees=hyp['degrees'], + translate=hyp['translate'], + scale=hyp['scale'], + shear=hyp['shear'], + perspective=hyp['perspective']) - #img, labels = self.albumentations(img, labels) + # img, labels = self.albumentations(img, labels) # Augment colorspace - augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v']) + augment_hsv(img, + hgain=hyp['hsv_h'], + sgain=hyp['hsv_s'], + vgain=hyp['hsv_v']) # Apply cutouts # if random.random() < 0.9: @@ -590,14 +704,17 @@ def __getitem__(self, index): if random.random() < hyp['paste_in']: sample_labels, sample_images, sample_masks = [], [], [] while len(sample_labels) < 30: - sample_labels_, sample_images_, sample_masks_ = load_samples(self, random.randint(0, len(self.labels) - 1)) + sample_labels_, sample_images_, sample_masks_ = load_samples( + self, random.randint(0, + len(self.labels) - 1)) sample_labels += sample_labels_ sample_images += sample_images_ sample_masks += sample_masks_ - #print(len(sample_labels)) + # print(len(sample_labels)) if len(sample_labels) == 0: break - labels = pastein(img, labels, sample_labels, sample_images, sample_masks) + labels = pastein(img, labels, sample_labels, sample_images, + sample_masks) nL = len(labels) # number of labels if nL: @@ -631,8 +748,8 @@ def __getitem__(self, index): @staticmethod def collate_fn(batch): img, label, path, shapes = zip(*batch) # transposed - for i, l in enumerate(label): - l[:, 0] = i # add target image index for build_targets() + for i, cur_l in enumerate(label): + cur_l[:, 0] = i # add target image index for build_targets() return torch.stack(img, 0), torch.cat(label, 0), path, shapes @staticmethod @@ -647,22 +764,28 @@ def collate_fn4(batch): for i in range(n): # zidane torch.zeros(16,3,720,1280) # BCHW i *= 4 if random.random() < 0.5: - im = F.interpolate(img[i].unsqueeze(0).float(), scale_factor=2., mode='bilinear', align_corners=False)[ - 0].type(img[i].type()) - l = label[i] + im = F.interpolate(img[i].unsqueeze(0).float(), + scale_factor=2., + mode='bilinear', + align_corners=False)[0].type(img[i].type()) + cur_l = label[i] else: - im = torch.cat((torch.cat((img[i], img[i + 1]), 1), torch.cat((img[i + 2], img[i + 3]), 1)), 2) - l = torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo), 0) * s + im = torch.cat( + (torch.cat((img[i], img[i + 1]), + 1), torch.cat((img[i + 2], img[i + 3]), 1)), 2) + cur_l = torch.cat( + (label[i], label[i + 1] + ho, label[i + 2] + wo, + label[i + 3] + ho + wo), 0) * s img4.append(im) - label4.append(l) + label4.append(cur_l) - for i, l in enumerate(label4): - l[:, 0] = i # add target image index for build_targets() + for i, cur_l in enumerate(label4): + cur_l[:, 0] = i # add target image index for build_targets() return torch.stack(img4, 0), torch.cat(label4, 0), path4, shapes4 -# Ancillary functions -------------------------------------------------------------------------------------------------- +# Ancillary functions ------------------------------------------------------------------------------------------------ def load_image(self, index): # loads 1 image from dataset, returns img, original hw, resized hw img = self.imgs[index] @@ -674,10 +797,12 @@ def load_image(self, index): r = self.img_size / max(h0, w0) # resize image to img_size if r != 1: # always resize down, only resize up if training with augmentation interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR - img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp) + img = cv2.resize(img, (int(w0 * r), int(h0 * r)), + interpolation=interp) return img, (h0, w0), img.shape[:2] # img, hw_original, hw_resized else: - return self.imgs[index], self.img_hw0[index], self.img_hw[index] # img, hw_original, hw_resized + return self.imgs[index], self.img_hw0[index], self.img_hw[ + index] # img, hw_original, hw_resized def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5): @@ -690,7 +815,8 @@ def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5): lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) lut_val = np.clip(x * r[2], 0, 255).astype(dtype) - img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype) + img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), + cv2.LUT(val, lut_val))).astype(dtype) cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed @@ -701,8 +827,10 @@ def hist_equalize(img, clahe=True, bgr=False): c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) yuv[:, :, 0] = c.apply(yuv[:, :, 0]) else: - yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0]) # equalize Y channel histogram - return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB) # convert YUV image to RGB + yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, + 0]) # equalize Y channel histogram + return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else + cv2.COLOR_YUV2RGB) # convert YUV image to RGB def load_mosaic(self, index): @@ -710,17 +838,22 @@ def load_mosaic(self, index): labels4, segments4 = [], [] s = self.img_size - yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y - indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices + yc, xc = (int(random.uniform(-x, 2 * s + x)) + for x in self.mosaic_border) # mosaic center x, y + indices = [index] + random.choices(self.indices, + k=3) # 3 additional image indices for i, index in enumerate(indices): # Load image img, _, (h, w) = load_image(self, index) # place img in img4 if i == 0: # top left - img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles - x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) - x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) + img4 = np.full((s * 2, s * 2, img.shape[2]), 114, + dtype=np.uint8) # base image with 4 tiles + x1a, y1a, x2a, y2a = max(xc - w, 0), max( + yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) + x1b, y1b, x2b, y2b = w - (x2a - x1a), h - ( + y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) elif i == 1: # top right x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h @@ -731,14 +864,18 @@ def load_mosaic(self, index): x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) - img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] + img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, + x1b:x2b] # img4[ymin:ymax, xmin:xmax] padw = x1a - x1b padh = y1a - y1b # Labels - labels, segments = self.labels[index].copy(), self.segments[index].copy() + labels, segments = self.labels[index].copy( + ), self.segments[index].copy() if labels.size: - labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format + labels[:, 1:] = xywhn2xyxy( + labels[:, 1:], w, h, padw, + padh) # normalized xywh to pixel xyxy format segments = [xyn2xy(x, w, h, padw, padh) for x in segments] labels4.append(labels) segments4.extend(segments) @@ -750,16 +887,22 @@ def load_mosaic(self, index): # img4, labels4 = replicate(img4, labels4) # replicate # Augment - #img4, labels4, segments4 = remove_background(img4, labels4, segments4) - #sample_segments(img4, labels4, segments4, probability=self.hyp['copy_paste']) - img4, labels4, segments4 = copy_paste(img4, labels4, segments4, probability=self.hyp['copy_paste']) - img4, labels4 = random_perspective(img4, labels4, segments4, - degrees=self.hyp['degrees'], - translate=self.hyp['translate'], - scale=self.hyp['scale'], - shear=self.hyp['shear'], - perspective=self.hyp['perspective'], - border=self.mosaic_border) # border to remove + # img4, labels4, segments4 = remove_background(img4, labels4, segments4) + # sample_segments(img4, labels4, segments4, probability=self.hyp['copy_paste']) + img4, labels4, segments4 = copy_paste(img4, + labels4, + segments4, + probability=self.hyp['copy_paste']) + img4, labels4 = random_perspective( + img4, + labels4, + segments4, + degrees=self.hyp['degrees'], + translate=self.hyp['translate'], + scale=self.hyp['scale'], + shear=self.hyp['shear'], + perspective=self.hyp['perspective'], + border=self.mosaic_border) # border to remove return img4, labels4 @@ -769,14 +912,17 @@ def load_mosaic9(self, index): labels9, segments9 = [], [] s = self.img_size - indices = [index] + random.choices(self.indices, k=8) # 8 additional image indices + indices = [index] + random.choices(self.indices, + k=8) # 8 additional image indices + wp = hp = None for i, index in enumerate(indices): # Load image img, _, (h, w) = load_image(self, index) # place img in img9 if i == 0: # center - img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles + img9 = np.full((s * 3, s * 3, img.shape[2]), 114, + dtype=np.uint8) # base image with 4 tiles h0, w0 = h, w c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates elif i == 1: # top @@ -797,22 +943,27 @@ def load_mosaic9(self, index): c = s - w, s + h0 - hp - h, s, s + h0 - hp padx, pady = c[:2] - x1, y1, x2, y2 = [max(x, 0) for x in c] # allocate coords + x1, y1, x2, y2 = (max(x, 0) for x in c) # allocate coords # Labels - labels, segments = self.labels[index].copy(), self.segments[index].copy() + labels, segments = self.labels[index].copy( + ), self.segments[index].copy() if labels.size: - labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady) # normalized xywh to pixel xyxy format + labels[:, 1:] = xywhn2xyxy( + labels[:, 1:], w, h, padx, + pady) # normalized xywh to pixel xyxy format segments = [xyn2xy(x, w, h, padx, pady) for x in segments] labels9.append(labels) segments9.extend(segments) # Image - img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:] # img9[ymin:ymax, xmin:xmax] + img9[y1:y2, x1:x2] = img[y1 - pady:, + x1 - padx:] # img9[ymin:ymax, xmin:xmax] hp, wp = h, w # height, width previous # Offset - yc, xc = [int(random.uniform(0, s)) for _ in self.mosaic_border] # mosaic center x, y + yc, xc = (int(random.uniform(0, s)) + for _ in self.mosaic_border) # mosaic center x, y img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s] # Concat/clip labels @@ -827,15 +978,21 @@ def load_mosaic9(self, index): # img9, labels9 = replicate(img9, labels9) # replicate # Augment - #img9, labels9, segments9 = remove_background(img9, labels9, segments9) - img9, labels9, segments9 = copy_paste(img9, labels9, segments9, probability=self.hyp['copy_paste']) - img9, labels9 = random_perspective(img9, labels9, segments9, - degrees=self.hyp['degrees'], - translate=self.hyp['translate'], - scale=self.hyp['scale'], - shear=self.hyp['shear'], - perspective=self.hyp['perspective'], - border=self.mosaic_border) # border to remove + # img9, labels9, segments9 = remove_background(img9, labels9, segments9) + img9, labels9, segments9 = copy_paste(img9, + labels9, + segments9, + probability=self.hyp['copy_paste']) + img9, labels9 = random_perspective( + img9, + labels9, + segments9, + degrees=self.hyp['degrees'], + translate=self.hyp['translate'], + scale=self.hyp['scale'], + shear=self.hyp['shear'], + perspective=self.hyp['perspective'], + border=self.mosaic_border) # border to remove return img9, labels9 @@ -845,17 +1002,22 @@ def load_samples(self, index): labels4, segments4 = [], [] s = self.img_size - yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y - indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices + yc, xc = (int(random.uniform(-x, 2 * s + x)) + for x in self.mosaic_border) # mosaic center x, y + indices = [index] + random.choices(self.indices, + k=3) # 3 additional image indices for i, index in enumerate(indices): # Load image img, _, (h, w) = load_image(self, index) # place img in img4 if i == 0: # top left - img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles - x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) - x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) + img4 = np.full((s * 2, s * 2, img.shape[2]), 114, + dtype=np.uint8) # base image with 4 tiles + x1a, y1a, x2a, y2a = max(xc - w, 0), max( + yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) + x1b, y1b, x2b, y2b = w - (x2a - x1a), h - ( + y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) elif i == 1: # top right x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h @@ -866,14 +1028,18 @@ def load_samples(self, index): x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) - img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] + img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, + x1b:x2b] # img4[ymin:ymax, xmin:xmax] padw = x1a - x1b padh = y1a - y1b # Labels - labels, segments = self.labels[index].copy(), self.segments[index].copy() + labels, segments = self.labels[index].copy( + ), self.segments[index].copy() if labels.size: - labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format + labels[:, 1:] = xywhn2xyxy( + labels[:, 1:], w, h, padw, + padh) # normalized xywh to pixel xyxy format segments = [xyn2xy(x, w, h, padw, padh) for x in segments] labels4.append(labels) segments4.extend(segments) @@ -885,8 +1051,9 @@ def load_samples(self, index): # img4, labels4 = replicate(img4, labels4) # replicate # Augment - #img4, labels4, segments4 = remove_background(img4, labels4, segments4) - sample_labels, sample_images, sample_masks = sample_segments(img4, labels4, segments4, probability=0.5) + # img4, labels4, segments4 = remove_background(img4, labels4, segments4) + sample_labels, sample_images, sample_masks = sample_segments( + img4, labels4, segments4, probability=0.5) return sample_labels, sample_images, sample_masks @@ -898,13 +1065,14 @@ def copy_paste(img, labels, segments, probability=0.5): h, w, c = img.shape # height, width, channels im_new = np.zeros(img.shape, np.uint8) for j in random.sample(range(n), k=round(probability * n)): - l, s = labels[j], segments[j] - box = w - l[3], l[2], w - l[1], l[4] + cur_l, s = labels[j], segments[j] + box = w - cur_l[3], cur_l[2], w - cur_l[1], cur_l[4] ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area if (ioa < 0.30).all(): # allow 30% obscuration of existing labels - labels = np.concatenate((labels, [[l[0], *box]]), 0) + labels = np.concatenate((labels, [[cur_l[0], *box]]), 0) segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1)) - cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED) + cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, + (255, 255, 255), cv2.FILLED) result = cv2.bitwise_and(src1=img, src2=im_new) result = cv2.flip(result, 1) # augment segments (flip left-right) @@ -922,7 +1090,8 @@ def remove_background(img, labels, segments): im_new = np.zeros(img.shape, np.uint8) img_new = np.ones(img.shape, np.uint8) * 114 for j in range(n): - cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED) + cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, + (255, 255, 255), cv2.FILLED) result = cv2.bitwise_and(src1=img, src2=im_new) @@ -941,25 +1110,29 @@ def sample_segments(img, labels, segments, probability=0.5): if probability and n: h, w, c = img.shape # height, width, channels for j in random.sample(range(n), k=round(probability * n)): - l, s = labels[j], segments[j] - box = l[1].astype(int).clip(0,w-1), l[2].astype(int).clip(0,h-1), l[3].astype(int).clip(0,w-1), l[4].astype(int).clip(0,h-1) + cur_l, _ = labels[j], segments[j] + box = cur_l[1].astype(int).clip( + 0, w - 1), cur_l[2].astype(int).clip( + 0, h - 1), cur_l[3].astype(int).clip( + 0, w - 1), cur_l[4].astype(int).clip(0, h - 1) - #print(box) + # print(box) if (box[2] <= box[0]) or (box[3] <= box[1]): continue - sample_labels.append(l[0]) + sample_labels.append(cur_l[0]) mask = np.zeros(img.shape, np.uint8) - cv2.drawContours(mask, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED) - sample_masks.append(mask[box[1]:box[3],box[0]:box[2],:]) + cv2.drawContours(mask, [segments[j].astype(np.int32)], -1, + (255, 255, 255), cv2.FILLED) + sample_masks.append(mask[box[1]:box[3], box[0]:box[2], :]) result = cv2.bitwise_and(src1=img, src2=mask) i = result > 0 # pixels to replace mask[i] = result[i] # cv2.imwrite('debug.jpg', img) # debug - #print(box) - sample_images.append(mask[box[1]:box[3],box[0]:box[2],:]) + # print(box) + sample_images.append(mask[box[1]:box[3], box[0]:box[2], :]) return sample_labels, sample_images, sample_masks @@ -973,15 +1146,24 @@ def replicate(img, labels): for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices x1b, y1b, x2b, y2b = boxes[i] bh, bw = y2b - y1b, x2b - x1b - yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y + yc, xc = int(random.uniform(0, h - bh)), int(random.uniform( + 0, w - bw)) # offset x, y x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh] - img[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] - labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0) + img[y1a:y2a, x1a:x2a] = img[y1b:y2b, + x1b:x2b] # img4[ymin:ymax, xmin:xmax] + labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], + axis=0) return img, labels -def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32): +def letterbox(img, + new_shape=(640, 640), + color=(114, 114, 114), + auto=True, + scaleFill=False, + scaleup=True, + stride=32): # Resize and pad image while meeting stride-multiple constraints shape = img.shape[:2] # current shape [height, width] if isinstance(new_shape, int): @@ -995,13 +1177,15 @@ def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scale # Compute padding ratio = r, r # width, height ratios new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) - dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding + dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[ + 1] # wh padding if auto: # minimum rectangle dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding elif scaleFill: # stretch dw, dh = 0.0, 0.0 new_unpad = (new_shape[1], new_shape[0]) - ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios + ratio = new_shape[1] / shape[1], new_shape[0] / shape[ + 0] # width, height ratios dw /= 2 # divide padding into 2 sides dh /= 2 @@ -1010,11 +1194,24 @@ def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scale img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) - img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border + img = cv2.copyMakeBorder(img, + top, + bottom, + left, + right, + cv2.BORDER_CONSTANT, + value=color) # add border return img, ratio, (dw, dh) -def random_perspective(img, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, +def random_perspective(img, + targets=(), + segments=(), + degrees=10, + translate=.1, + scale=.1, + shear=10, + perspective=0.0, border=(0, 0)): # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) # targets = [cls, xyxy] @@ -1029,8 +1226,10 @@ def random_perspective(img, targets=(), segments=(), degrees=10, translate=.1, s # Perspective P = np.eye(3) - P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y) - P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x) + P[2, 0] = random.uniform(-perspective, + perspective) # x perspective (about y) + P[2, 1] = random.uniform(-perspective, + perspective) # y perspective (about x) # Rotation and Scale R = np.eye(3) @@ -1042,21 +1241,32 @@ def random_perspective(img, targets=(), segments=(), degrees=10, translate=.1, s # Shear S = np.eye(3) - S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) - S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) + S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / + 180) # x shear (deg) + S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / + 180) # y shear (deg) # Translation T = np.eye(3) - T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels) - T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels) + T[0, 2] = random.uniform(0.5 - translate, + 0.5 + translate) * width # x translation (pixels) + T[1, 2] = random.uniform( + 0.5 - translate, 0.5 + translate) * height # y translation (pixels) # Combined rotation matrix M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT - if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed + if (border[0] != 0) or (border[1] != + 0) or (M != np.eye(3)).any(): # image changed if perspective: - img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114)) + img = cv2.warpPerspective(img, + M, + dsize=(width, height), + borderValue=(114, 114, 114)) else: # affine - img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114)) + img = cv2.warpAffine(img, + M[:2], + dsize=(width, height), + borderValue=(114, 114, 114)) # Visualize # import matplotlib.pyplot as plt @@ -1075,40 +1285,56 @@ def random_perspective(img, targets=(), segments=(), degrees=10, translate=.1, s xy = np.ones((len(segment), 3)) xy[:, :2] = segment xy = xy @ M.T # transform - xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2] # perspective rescale or affine + xy = xy[:, : + 2] / xy[:, 2: + 3] if perspective else xy[:, : + 2] # perspective rescale or affine # clip new[i] = segment2box(xy, width, height) else: # warp boxes xy = np.ones((n * 4, 3)) - xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 + xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape( + n * 4, 2) # x1y1, x2y2, x1y2, x2y1 xy = xy @ M.T # transform - xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine + xy = (xy[:, :2] / + xy[:, 2:3] if perspective else xy[:, :2]).reshape( + n, 8) # perspective rescale or affine # create new boxes x = xy[:, [0, 2, 4, 6]] y = xy[:, [1, 3, 5, 7]] - new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T + new = np.concatenate( + (x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T # clip new[:, [0, 2]] = new[:, [0, 2]].clip(0, width) new[:, [1, 3]] = new[:, [1, 3]].clip(0, height) # filter candidates - i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10) + i = box_candidates(box1=targets[:, 1:5].T * s, + box2=new.T, + area_thr=0.01 if use_segments else 0.10) targets = targets[i] targets[:, 1:5] = new[i] return img, targets -def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n) +def box_candidates(box1, + box2, + wh_thr=2, + ar_thr=20, + area_thr=0.1, + eps=1e-16): # box1(4,n), box2(4,n) # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio w1, h1 = box1[2] - box1[0], box1[3] - box1[1] w2, h2 = box2[2] - box2[0], box2[3] - box2[1] ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio - return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates + return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / + (w1 * h1 + eps) > area_thr) & ( + ar < ar_thr) # candidates def bbox_ioa(box1, box2): @@ -1135,7 +1361,9 @@ def cutout(image, labels): h, w = image.shape[:2] # create random masks - scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction + scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [ + 0.03125 + ] * 16 # image size fraction for s in scales: mask_h = random.randint(1, int(h * s)) mask_w = random.randint(1, int(w * s)) @@ -1147,7 +1375,8 @@ def cutout(image, labels): ymax = min(h, ymin + mask_h) # apply random color mask - image[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)] + image[ymin:ymax, + xmin:xmax] = [random.randint(64, 191) for _ in range(3)] # return unobscured labels if len(labels) and s > 0.03: @@ -1163,7 +1392,9 @@ def pastein(image, labels, sample_labels, sample_images, sample_masks): h, w = image.shape[:2] # create random masks - scales = [0.75] * 2 + [0.5] * 4 + [0.25] * 4 + [0.125] * 4 + [0.0625] * 6 # image size fraction + scales = [0.75] * 2 + [0.5] * 4 + [0.25] * 4 + [0.125] * 4 + [ + 0.0625 + ] * 6 # image size fraction for s in scales: if random.random() < 0.2: continue @@ -1182,39 +1413,44 @@ def pastein(image, labels, sample_labels, sample_images, sample_masks): else: ioa = np.zeros(1) - if (ioa < 0.30).all() and len(sample_labels) and (xmax > xmin+20) and (ymax > ymin+20): # allow 30% obscuration of existing labels - sel_ind = random.randint(0, len(sample_labels)-1) - #print(len(sample_labels)) - #print(sel_ind) - #print((xmax-xmin, ymax-ymin)) - #print(image[ymin:ymax, xmin:xmax].shape) - #print([[sample_labels[sel_ind], *box]]) - #print(labels.shape) + if (ioa < 0.30 + ).all() and len(sample_labels) and (xmax > xmin + 20) and ( + ymax > ymin + 20): # allow 30% obscuration of existing labels + sel_ind = random.randint(0, len(sample_labels) - 1) + # print(len(sample_labels)) + # print(sel_ind) + # print((xmax-xmin, ymax-ymin)) + # print(image[ymin:ymax, xmin:xmax].shape) + # print([[sample_labels[sel_ind], *box]]) + # print(labels.shape) hs, ws, cs = sample_images[sel_ind].shape - r_scale = min((ymax-ymin)/hs, (xmax-xmin)/ws) - r_w = int(ws*r_scale) - r_h = int(hs*r_scale) + r_scale = min((ymax - ymin) / hs, (xmax - xmin) / ws) + r_w = int(ws * r_scale) + r_h = int(hs * r_scale) if (r_w > 10) and (r_h > 10): r_mask = cv2.resize(sample_masks[sel_ind], (r_w, r_h)) r_image = cv2.resize(sample_images[sel_ind], (r_w, r_h)) - temp_crop = image[ymin:ymin+r_h, xmin:xmin+r_w] + temp_crop = image[ymin:ymin + r_h, xmin:xmin + r_w] m_ind = r_mask > 0 if m_ind.astype(np.int).sum() > 60: temp_crop[m_ind] = r_image[m_ind] - #print(sample_labels[sel_ind]) - #print(sample_images[sel_ind].shape) - #print(temp_crop.shape) - box = np.array([xmin, ymin, xmin+r_w, ymin+r_h], dtype=np.float32) + # print(sample_labels[sel_ind]) + # print(sample_images[sel_ind].shape) + # print(temp_crop.shape) + box = np.array([xmin, ymin, xmin + r_w, ymin + r_h], + dtype=np.float32) if len(labels): - labels = np.concatenate((labels, [[sample_labels[sel_ind], *box]]), 0) + labels = np.concatenate( + (labels, [[sample_labels[sel_ind], *box]]), 0) else: labels = np.array([[sample_labels[sel_ind], *box]]) - image[ymin:ymin+r_h, xmin:xmin+r_w] = temp_crop + image[ymin:ymin + r_h, xmin:xmin + r_w] = temp_crop return labels + class Albumentations: # YOLOv5 Albumentations class (optional, only used if package is installed) def __init__(self): @@ -1223,20 +1459,27 @@ def __init__(self): self.transform = A.Compose([ A.CLAHE(p=0.01), - A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.01), + A.RandomBrightnessContrast( + brightness_limit=0.2, contrast_limit=0.2, p=0.01), A.RandomGamma(gamma_limit=[80, 120], p=0.01), A.Blur(p=0.01), A.MedianBlur(p=0.01), A.ToGray(p=0.01), - A.ImageCompression(quality_lower=75, p=0.01),], - bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels'])) + A.ImageCompression(quality_lower=75, p=0.01), + ], + bbox_params=A.BboxParams( + format='pascal_voc', + label_fields=['class_labels'])) - #logging.info(colorstr('albumentations: ') + ', '.join(f'{x}' for x in self.transform.transforms if x.p)) + # logging.info(colorstr('albumentations: ') + ', '.join(f'{x}' for x in self.transform.transforms if x.p)) def __call__(self, im, labels, p=1.0): if self.transform and random.random() < p: - new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0]) # transformed - im, labels = new['image'], np.array([[c, *b] for c, b in zip(new['class_labels'], new['bboxes'])]) + new = self.transform(image=im, + bboxes=labels[:, 1:], + class_labels=labels[:, 0]) # transformed + im, labels = new['image'], np.array( + [[c, *b] for c, b in zip(new['class_labels'], new['bboxes'])]) return im, labels @@ -1255,11 +1498,14 @@ def flatten_recursive(path='../coco'): shutil.copyfile(file, new_path / Path(file).name) -def extract_boxes(path='../coco/'): # from utils.datasets import *; extract_boxes('../coco128') +def extract_boxes( + path='../coco/' +): # from utils.datasets import *; extract_boxes('../coco128') # Convert detection dataset into classification dataset, with one directory per class path = Path(path) # images dir - shutil.rmtree(path / 'classifier') if (path / 'classifier').is_dir() else None # remove existing + shutil.rmtree(path / 'classifier') if ( + path / 'classifier').is_dir() else None # remove existing files = list(path.rglob('*.*')) n = len(files) # number of files for im_file in tqdm(files, total=n): @@ -1271,12 +1517,16 @@ def extract_boxes(path='../coco/'): # from utils.datasets import *; extract_box # labels lb_file = Path(img2label_paths([str(im_file)])[0]) if Path(lb_file).exists(): - with open(lb_file, 'r') as f: - lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32) # labels + with open(lb_file) as f: + lb = np.array( + [x.split() for x in f.read().strip().splitlines()], + dtype=np.float32) # labels for j, x in enumerate(lb): c = int(x[0]) # class - f = (path / 'classifier') / f'{c}' / f'{path.stem}_{im_file.stem}_{j}.jpg' # new filename + f = ( + path / 'classifier' + ) / f'{c}' / f'{path.stem}_{im_file.stem}_{j}.jpg' # new filename if not f.parent.is_dir(): f.parent.mkdir(parents=True) @@ -1285,9 +1535,12 @@ def extract_boxes(path='../coco/'): # from utils.datasets import *; extract_box b[2:] = b[2:] * 1.2 + 3 # pad b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int) - b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image + b[[0, 2]] = np.clip(b[[0, 2]], 0, + w) # clip boxes outside of image b[[1, 3]] = np.clip(b[[1, 3]], 0, h) - assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}' + assert cv2.imwrite(str(f), + im[b[1]:b[3], + b[0]:b[2]]), f'box failure in {f}' def autosplit(path='../coco', weights=(0.9, 0.1, 0.0), annotated_only=False): @@ -1299,22 +1552,28 @@ def autosplit(path='../coco', weights=(0.9, 0.1, 0.0), annotated_only=False): annotated_only: Only use images with an annotated txt file """ path = Path(path) # images dir - files = sum([list(path.rglob(f"*.{img_ext}")) for img_ext in img_formats], []) # image files only + files = sum([list(path.rglob(f'*.{img_ext}')) for img_ext in img_formats], + []) # image files only n = len(files) # number of files - indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split + indices = random.choices([0, 1, 2], weights=weights, + k=n) # assign each image to a split - txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt'] # 3 txt files - [(path / x).unlink() for x in txt if (path / x).exists()] # remove existing + txt = ['autosplit_train.txt', 'autosplit_val.txt', + 'autosplit_test.txt'] # 3 txt files + [(path / x).unlink() for x in txt + if (path / x).exists()] # remove existing - print(f'Autosplitting images from {path}' + ', using *.txt labeled images only' * annotated_only) + print(f'Autosplitting images from {path}' + + ', using *.txt labeled images only' * annotated_only) for i, img in tqdm(zip(indices, files), total=n): - if not annotated_only or Path(img2label_paths([str(img)])[0]).exists(): # check label + # check label + if not annotated_only or Path(img2label_paths([str(img)])[0]).exists(): with open(path / txt[i], 'a') as f: f.write(str(img) + '\n') # add image to txt file def load_segmentations(self, index): key = '/work/handsomejw66/coco17/' + self.img_files[index] - #print(key) + # print(key) # /work/handsomejw66/coco17/ return self.segs[key] diff --git a/utils/general.py b/utils/general.py index 1f13fe6..5af0043 100644 --- a/utils/general.py +++ b/utils/general.py @@ -14,22 +14,25 @@ import pandas as pd import torch import torchvision -import yaml -from utils.metrics import fitness from utils.torch_utils import init_torch_seeds # Settings torch.set_printoptions(linewidth=320, precision=5, profile='long') -np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5 +np.set_printoptions(linewidth=320, + formatter={'float_kind': '{:11.5g}'.format + }) # format short g, %precision=5 pd.options.display.max_columns = 10 -cv2.setNumThreads(0) # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader) -os.environ['NUMEXPR_MAX_THREADS'] = str(min(os.cpu_count(), 8)) # NumExpr max threads +cv2.setNumThreads( + 0 +) # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader) +os.environ['NUMEXPR_MAX_THREADS'] = str(min(os.cpu_count(), + 8)) # NumExpr max threads def set_logging(rank=-1): logging.basicConfig( - format="%(message)s", + format='%(message)s', level=logging.INFO if rank in [-1, 0] else logging.WARN) @@ -53,14 +56,16 @@ def isdocker(): def emojis(str=''): # Return platform-dependent emoji-safe version of string - return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str + return str.encode().decode( + 'ascii', 'ignore') if platform.system() == 'Windows' else str def check_online(): # Check internet connectivity import socket try: - socket.create_connection(("1.1.1.1", 443), 5) # check host accesability + socket.create_connection(('1.1.1.1', 443), + 5) # check host accesability return True except OSError: return False @@ -75,9 +80,15 @@ def check_git_status(): assert check_online(), 'skipping check (offline)' cmd = 'git fetch && git config --get remote.origin.url' - url = subprocess.check_output(cmd, shell=True).decode().strip().rstrip('.git') # github repo url - branch = subprocess.check_output('git rev-parse --abbrev-ref HEAD', shell=True).decode().strip() # checked out - n = int(subprocess.check_output(f'git rev-list {branch}..origin/master --count', shell=True)) # commits behind + url = subprocess.check_output(cmd, shell=True).decode().strip().rstrip( + '.git') # github repo url + branch = subprocess.check_output( + 'git rev-parse --abbrev-ref HEAD', + shell=True).decode().strip() # checked out + n = int( + subprocess.check_output( + f'git rev-list {branch}..origin/master --count', + shell=True)) # commits behind if n > 0: s = f"⚠️ WARNING: code is out of date by {n} commit{'s' * (n > 1)}. " \ f"Use 'git pull' to update or 'git clone {url}' to download latest." @@ -95,9 +106,13 @@ def check_requirements(requirements='requirements.txt', exclude=()): if isinstance(requirements, (str, Path)): # requirements.txt file file = Path(requirements) if not file.exists(): - print(f"{prefix} {file.resolve()} not found, check failed.") + print(f'{prefix} {file.resolve()} not found, check failed.') return - requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(file.open()) if x.name not in exclude] + requirements = [ + f'{x.name}{x.specifier}' + for x in pkg.parse_requirements(file.open()) + if x.name not in exclude + ] else: # list or tuple of packages requirements = [x for x in requirements if x not in exclude] @@ -107,8 +122,10 @@ def check_requirements(requirements='requirements.txt', exclude=()): pkg.require(r) except Exception as e: # DistributionNotFound or VersionConflict if requirements not met n += 1 - print(f"{prefix} {e.req} not found, attempting auto-update...") - print(subprocess.check_output(f"pip install '{e.req}'", shell=True).decode()) + print(f'{prefix} {e.req} not found, attempting auto-update...') + print( + subprocess.check_output(f"pip install '{e.req}'", + shell=True).decode()) if n: # if packages updated source = file.resolve() if 'file' in locals() else requirements @@ -121,21 +138,26 @@ def check_img_size(img_size, s=32): # Verify img_size is a multiple of stride s new_size = make_divisible(img_size, int(s)) # ceil gs-multiple if new_size != img_size: - print('WARNING: --img-size %g must be multiple of max stride %g, updating to %g' % (img_size, s, new_size)) + print( + 'WARNING: --img-size %g must be multiple of max stride %g, updating to %g' + % (img_size, s, new_size)) return new_size def check_imshow(): # Check if environment supports image displays try: - assert not isdocker(), 'cv2.imshow() is disabled in Docker environments' + assert not isdocker( + ), 'cv2.imshow() is disabled in Docker environments' cv2.imshow('test', np.zeros((1, 1, 3))) cv2.waitKey(1) cv2.destroyAllWindows() cv2.waitKey(1) return True except Exception as e: - print(f'WARNING: Environment does not support cv2.imshow() or PIL Image.show() image displays\n{e}') + print( + f'WARNING: Environment does not support cv2.imshow() or PIL Image.show() image displays\n{e}' + ) return False @@ -146,7 +168,9 @@ def check_file(file): else: files = glob.glob('./**/' + file, recursive=True) # find file assert len(files), f'File Not Found: {file}' # assert file was found - assert len(files) == 1, f"Multiple files match '{file}', specify exact path: {files}" # assert unique + assert len( + files + ) == 1, f"Multiple files match '{file}', specify exact path: {files}" # assert unique return files[0] # return file @@ -154,18 +178,25 @@ def check_dataset(dict): # Download dataset if not found locally val, s = dict.get('val'), dict.get('download') if val and len(val): - val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path + val = [ + Path(x).resolve() + for x in (val if isinstance(val, list) else [val]) + ] # val path if not all(x.exists() for x in val): - print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()]) + print('\nWARNING: Dataset not found, nonexistent paths: %s' % + [str(x) for x in val if not x.exists()]) if s and len(s): # download script print('Downloading %s ...' % s) if s.startswith('http') and s.endswith('.zip'): # URL f = Path(s).name # filename torch.hub.download_url_to_file(s, f) - r = os.system('unzip -q %s -d ../ && rm %s' % (f, f)) # unzip + r = os.system('unzip -q %s -d ../ && rm %s' % + (f, f)) # unzip else: # bash script r = os.system(s) - print('Dataset autodownload %s\n' % ('success' if r == 0 else 'failure')) # analyze return value + print('Dataset autodownload %s\n' % + ('success' + if r == 0 else 'failure')) # analyze return value else: raise Exception('Dataset not found.') @@ -177,7 +208,7 @@ def make_divisible(x, divisor): def clean_str(s): # Cleans a string by replacing special characters with underscore _ - return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s) + return re.sub(pattern='[|@#!¡·$€%&()=?¿^*;:,¨´><+]', repl='_', string=s) def one_cycle(y1=0.0, y2=1.0, steps=100): @@ -187,26 +218,29 @@ def one_cycle(y1=0.0, y2=1.0, steps=100): def colorstr(*input): # Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e. colorstr('blue', 'hello world') - *args, string = input if len(input) > 1 else ('blue', 'bold', input[0]) # color arguments, string - colors = {'black': '\033[30m', # basic colors - 'red': '\033[31m', - 'green': '\033[32m', - 'yellow': '\033[33m', - 'blue': '\033[34m', - 'magenta': '\033[35m', - 'cyan': '\033[36m', - 'white': '\033[37m', - 'bright_black': '\033[90m', # bright colors - 'bright_red': '\033[91m', - 'bright_green': '\033[92m', - 'bright_yellow': '\033[93m', - 'bright_blue': '\033[94m', - 'bright_magenta': '\033[95m', - 'bright_cyan': '\033[96m', - 'bright_white': '\033[97m', - 'end': '\033[0m', # misc - 'bold': '\033[1m', - 'underline': '\033[4m'} + *args, string = input if len(input) > 1 else ('blue', 'bold', input[0] + ) # color arguments, string + colors = { + 'black': '\033[30m', # basic colors + 'red': '\033[31m', + 'green': '\033[32m', + 'yellow': '\033[33m', + 'blue': '\033[34m', + 'magenta': '\033[35m', + 'cyan': '\033[36m', + 'white': '\033[37m', + 'bright_black': '\033[90m', # bright colors + 'bright_red': '\033[91m', + 'bright_green': '\033[92m', + 'bright_yellow': '\033[93m', + 'bright_blue': '\033[94m', + 'bright_magenta': '\033[95m', + 'bright_cyan': '\033[96m', + 'bright_white': '\033[97m', + 'end': '\033[0m', # misc + 'bold': '\033[1m', + 'underline': '\033[4m' + } return ''.join(colors[x] for x in args) + f'{string}' + colors['end'] @@ -231,21 +265,27 @@ def labels_to_class_weights(labels, nc=80): def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)): # Produces image weights based on class_weights and image contents - class_counts = np.array([np.bincount(x[:, 0].astype(np.int), minlength=nc) for x in labels]) + class_counts = np.array( + [np.bincount(x[:, 0].astype(np.int), minlength=nc) for x in labels]) image_weights = (class_weights.reshape(1, nc) * class_counts).sum(1) # index = random.choices(range(n), weights=image_weights, k=1) # weight image sample return image_weights -def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) +def coco80_to_coco91_class( +): # converts 80-index (val2014) to 91-index (paper) # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/ # a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n') # b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n') # x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco # x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet - x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, - 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] + x = [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, + 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, + 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, + 85, 86, 87, 88, 89, 90 + ] return x @@ -292,7 +332,8 @@ def segment2box(segment, width=640, height=640): x, y = segment.T # segment xy inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height) x, y, = x[inside], y[inside] - return np.array([x.min(), y.min(), x.max(), y.max()]) if any(x) else np.zeros((1, 4)) # xyxy + return np.array([x.min(), y.min(), x.max(), + y.max()]) if any(x) else np.zeros((1, 4)) # xyxy def segments2boxes(segments): @@ -310,15 +351,19 @@ def resample_segments(segments, n=1000): s = np.concatenate((s, s[0:1, :]), axis=0) x = np.linspace(0, len(s) - 1, n) xp = np.arange(len(s)) - segments[i] = np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T # segment xy + segments[i] = np.concatenate([ + np.interp(x, xp, s[:, i]) for i in range(2) + ]).reshape(2, -1).T # segment xy return segments def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None): # Rescale coords (xyxy) from img1_shape to img0_shape if ratio_pad is None: # calculate from img0_shape - gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new - pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding + gain = min(img1_shape[0] / img0_shape[0], + img1_shape[1] / img0_shape[1]) # gain = old / new + pad = (img1_shape[1] - img0_shape[1] * gain) / 2, ( + img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding else: gain = ratio_pad[0][0] pad = ratio_pad[1] @@ -338,7 +383,13 @@ def clip_coords(boxes, img_shape): boxes[:, 3].clamp_(0, img_shape[0]) # y2 -def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7): +def bbox_iou(box1, + box2, + x1y1x2y2=True, + GIoU=False, + DIoU=False, + CIoU=False, + eps=1e-7): # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4 box2 = box2.T @@ -364,16 +415,20 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps= iou = inter / union if GIoU or DIoU or CIoU: - cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width + cw = torch.max(b1_x2, b2_x2) - torch.min( + b1_x1, b2_x1) # convex (smallest enclosing box) width ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 - c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared - rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + - (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center distance squared + c2 = cw**2 + ch**2 + eps # convex diagonal squared + # center distance squared + rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2)**2 + + (b2_y1 + b2_y2 - b1_y1 - b1_y2)**2) / 4 if DIoU: return iou - rho2 / c2 # DIoU elif CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 - v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / (h2 + eps)) - torch.atan(w1 / (h1 + eps)), 2) + v = (4 / math.pi**2) * torch.pow( + torch.atan(w2 / (h2 + eps)) - torch.atan(w1 / + (h1 + eps)), 2) with torch.no_grad(): alpha = v / (v - iou + (1 + eps)) return iou - (rho2 / c2 + v * alpha) # CIoU @@ -384,7 +439,14 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps= return iou # IoU -def bbox_alpha_iou(box1, box2, x1y1x2y2=False, GIoU=False, DIoU=False, CIoU=False, alpha=2, eps=1e-9): +def bbox_alpha_iou(box1, + box2, + x1y1x2y2=False, + GIoU=False, + DIoU=False, + CIoU=False, + alpha=2, + eps=1e-9): # Returns tsqrt_he IoU of box1 to box2. box1 is 4, box2 is nx4 box2 = box2.T @@ -409,37 +471,41 @@ def bbox_alpha_iou(box1, box2, x1y1x2y2=False, GIoU=False, DIoU=False, CIoU=Fals # change iou into pow(iou+eps) # iou = inter / union - iou = torch.pow(inter/union + eps, alpha) + iou = torch.pow(inter / union + eps, alpha) # beta = 2 * alpha if GIoU or DIoU or CIoU: - cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width + cw = torch.max(b1_x2, b2_x2) - torch.min( + b1_x1, b2_x1) # convex (smallest enclosing box) width ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 - c2 = (cw ** 2 + ch ** 2) ** alpha + eps # convex diagonal + c2 = (cw**2 + ch**2)**alpha + eps # convex diagonal rho_x = torch.abs(b2_x1 + b2_x2 - b1_x1 - b1_x2) rho_y = torch.abs(b2_y1 + b2_y2 - b1_y1 - b1_y2) - rho2 = ((rho_x ** 2 + rho_y ** 2) / 4) ** alpha # center distance + rho2 = ((rho_x**2 + rho_y**2) / 4)**alpha # center distance if DIoU: return iou - rho2 / c2 # DIoU elif CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 - v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) + v = (4 / math.pi**2) * torch.pow( + torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) with torch.no_grad(): alpha_ciou = v / ((1 + eps) - inter / union + v) # return iou - (rho2 / c2 + v * alpha_ciou) # CIoU - return iou - (rho2 / c2 + torch.pow(v * alpha_ciou + eps, alpha)) # CIoU + return iou - ( + rho2 / c2 + torch.pow(v * alpha_ciou + eps, alpha)) # CIoU else: # GIoU https://arxiv.org/pdf/1902.09630.pdf # c_area = cw * ch + eps # convex area # return iou - (c_area - union) / c_area # GIoU - c_area = torch.max(cw * ch + eps, union) # convex area - return iou - torch.pow((c_area - union) / c_area + eps, alpha) # GIoU + c_area = torch.max(cw * ch + eps, union) # convex area + return iou - torch.pow( + (c_area - union) / c_area + eps, alpha) # GIoU else: - return iou # torch.log(iou+eps) or iou + return iou # torch.log(iou+eps) or iou def box_iou(box1, box2): # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py - """ - Return intersection-over-union (Jaccard index) of boxes. + """Return intersection-over-union (Jaccard index) of boxes. + Both sets of boxes are expected to be in (x1, y1, x2, y2) format. Arguments: box1 (Tensor[N, 4]) @@ -457,8 +523,10 @@ def box_area(box): area2 = box_area(box2.T) # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) - inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) - return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter) + inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - + torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) + return inter / (area1[:, None] + area2 - inter + ) # iou = inter / (area1 + area2 - inter) def wh_iou(wh1, wh2): @@ -466,12 +534,14 @@ def wh_iou(wh1, wh2): wh1 = wh1[:, None] # [N,1,2] wh2 = wh2[None] # [1,M,2] inter = torch.min(wh1, wh2).prod(2) # [N,M] - return inter / (wh1.prod(2) + wh2.prod(2) - inter) # iou = inter / (area1 + area2 - inter) + return inter / (wh1.prod(2) + wh2.prod(2) - inter + ) # iou = inter / (area1 + area2 - inter) def box_giou(box1, box2): - """ - Return generalized intersection-over-union (Jaccard index) between two sets of boxes. + """Return generalized intersection-over-union (Jaccard index) between two + sets of boxes. + Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with ``0 <= x1 < x2`` and ``0 <= y1 < y2``. Args: @@ -489,7 +559,8 @@ def box_area(box): area1 = box_area(box1.T) area2 = box_area(box2.T) - inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) + inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - + torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) union = (area1[:, None] + area2 - inter) iou = inter / union @@ -504,8 +575,9 @@ def box_area(box): def box_ciou(box1, box2, eps: float = 1e-7): - """ - Return complete intersection-over-union (Jaccard index) between two sets of boxes. + """Return complete intersection-over-union (Jaccard index) between two sets + of boxes. + Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with ``0 <= x1 < x2`` and ``0 <= y1 < y2``. Args: @@ -524,7 +596,8 @@ def box_area(box): area1 = box_area(box1.T) area2 = box_area(box2.T) - inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) + inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - + torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) union = (area1[:, None] + area2 - inter) iou = inter / union @@ -533,7 +606,7 @@ def box_area(box): rbi = torch.max(box1[:, None, 2:], box2[:, 2:]) whi = (rbi - lti).clamp(min=0) # [N,M,2] - diagonal_distance_squared = (whi[:, :, 0] ** 2) + (whi[:, :, 1] ** 2) + eps + diagonal_distance_squared = (whi[:, :, 0]**2) + (whi[:, :, 1]**2) + eps # centers of boxes x_p = (box1[:, None, 0] + box1[:, None, 2]) / 2 @@ -541,7 +614,7 @@ def box_area(box): x_g = (box2[:, 0] + box2[:, 2]) / 2 y_g = (box2[:, 1] + box2[:, 3]) / 2 # The distance between boxes' centers squared. - centers_distance_squared = (x_p - x_g) ** 2 + (y_p - y_g) ** 2 + centers_distance_squared = (x_p - x_g)**2 + (y_p - y_g)**2 w_pred = box1[:, None, 2] - box1[:, None, 0] h_pred = box1[:, None, 3] - box1[:, None, 1] @@ -549,15 +622,18 @@ def box_area(box): w_gt = box2[:, 2] - box2[:, 0] h_gt = box2[:, 3] - box2[:, 1] - v = (4 / (torch.pi ** 2)) * torch.pow((torch.atan(w_gt / h_gt) - torch.atan(w_pred / h_pred)), 2) + v = (4 / (torch.pi**2)) * torch.pow( + (torch.atan(w_gt / h_gt) - torch.atan(w_pred / h_pred)), 2) with torch.no_grad(): alpha = v / (1 - iou + v + eps) - return iou - (centers_distance_squared / diagonal_distance_squared) - alpha * v + return iou - (centers_distance_squared / + diagonal_distance_squared) - alpha * v def box_diou(box1, box2, eps: float = 1e-7): - """ - Return distance intersection-over-union (Jaccard index) between two sets of boxes. + """Return distance intersection-over-union (Jaccard index) between two sets + of boxes. + Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with ``0 <= x1 < x2`` and ``0 <= y1 < y2``. Args: @@ -576,7 +652,8 @@ def box_area(box): area1 = box_area(box1.T) area2 = box_area(box2.T) - inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) + inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - + torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) union = (area1[:, None] + area2 - inter) iou = inter / union @@ -585,7 +662,7 @@ def box_area(box): rbi = torch.max(box1[:, None, 2:], box2[:, 2:]) whi = (rbi - lti).clamp(min=0) # [N,M,2] - diagonal_distance_squared = (whi[:, :, 0] ** 2) + (whi[:, :, 1] ** 2) + eps + diagonal_distance_squared = (whi[:, :, 0]**2) + (whi[:, :, 1]**2) + eps # centers of boxes x_p = (box1[:, None, 0] + box1[:, None, 2]) / 2 @@ -593,16 +670,21 @@ def box_area(box): x_g = (box2[:, 0] + box2[:, 2]) / 2 y_g = (box2[:, 1] + box2[:, 3]) / 2 # The distance between boxes' centers squared. - centers_distance_squared = (x_p - x_g) ** 2 + (y_p - y_g) ** 2 + centers_distance_squared = (x_p - x_g)**2 + (y_p - y_g)**2 # The distance IoU is the IoU penalized by a normalized # distance between boxes' centers squared. return iou - (centers_distance_squared / diagonal_distance_squared) -def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, +def non_max_suppression(prediction, + conf_thres=0.25, + iou_thres=0.45, + classes=None, + agnostic=False, + multi_label=False, labels=()): - """Runs Non-Maximum Suppression (NMS) on inference results + """Runs Non-Maximum Suppression (NMS) on inference results. Returns: list of detections, on (n,6) tensor per image [xyxy, conf, cls] @@ -612,7 +694,9 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non xc = prediction[..., 4] > conf_thres # candidates # Settings - min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height + # (pixels) minimum and maximum box width and height + # min_wh = 2 + max_wh = 4096 max_det = 300 # maximum number of detections per image max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() time_limit = 10.0 # seconds to quit after @@ -621,7 +705,8 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non merge = False # use merge-NMS t = time.time() - output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0] + output = [torch.zeros( + (0, 6), device=prediction.device)] * prediction.shape[0] for xi, x in enumerate(prediction): # image index, image inference # Apply constraints # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height @@ -629,11 +714,11 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non # Cat apriori labels if autolabelling if labels and len(labels[xi]): - l = labels[xi] - v = torch.zeros((len(l), nc + 5), device=x.device) - v[:, :4] = l[:, 1:5] # box + cur_l = labels[xi] + v = torch.zeros((len(cur_l), nc + 5), device=x.device) + v[:, :4] = cur_l[:, 1:5] # box v[:, 4] = 1.0 # conf - v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls + v[range(len(cur_l)), cur_l[:, 0].long() + 5] = 1.0 # cls x = torch.cat((x, v), 0) # If none remain process next image @@ -642,8 +727,10 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non # Compute conf if nc == 1: - x[:, 5:] = x[:, 4:5] # for models with one class, cls_loss is 0 and cls_conf is always 0.5, - # so there is no need to multiplicate. + x[:, + 5:] = x[:, 4: + 5] # for models with one class, cls_loss is 0 and cls_conf is always 0.5, + # so there is no need to multiplicate. else: x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf @@ -656,7 +743,8 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) else: # best class only conf, j = x[:, 5:].max(1, keepdim=True) - x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] + x = torch.cat((box, conf, j.float()), + 1)[conf.view(-1) > conf_thres] # Filter by class if classes is not None: @@ -671,19 +759,23 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non if not n: # no boxes continue elif n > max_nms: # excess boxes - x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence + x = x[x[:, 4].argsort( + descending=True)[:max_nms]] # sort by confidence # Batched NMS c = x[:, 5:6] * (0 if agnostic else max_wh) # classes - boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores + boxes, scores = x[:, :4] + c, x[:, + 4] # boxes (offset by class), scores i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS if i.shape[0] > max_det: # limit detections i = i[:max_det] - if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) + if merge and (1 < n < + 3E3): # Merge NMS (boxes merged using weighted mean) # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix weights = iou * scores[None] # box weights - x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes + x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum( + 1, keepdim=True) # merged boxes if redundant: i = i[iou.sum(1) > 1] # require redundancy @@ -695,7 +787,8 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non return output -def strip_optimizer(f='best.pt', s=''): # from utils.general import *; strip_optimizer() +def strip_optimizer(f='best.pt', + s=''): # from utils.general import *; strip_optimizer() # Strip optimizer from 'f' to finalize training, optionally save as 's' x = torch.load(f, map_location=torch.device('cpu')) if x.get('ema'): @@ -705,7 +798,9 @@ def strip_optimizer(f='best.pt', s=''): # from utils.general import *; strip_op x['epoch'] = -1 torch.save(x, s or f) mb = os.path.getsize(s or f) / 1E6 # filesize - print(f"Optimizer stripped from {f},{(' saved as %s,' % s) if s else ''} {mb:.1f}MB") + print( + f"Optimizer stripped from {f},{(' saved as %s,' % s) if s else ''} {mb:.1f}MB" + ) def increment_path(path, exist_ok=True, sep=''): @@ -714,8 +809,8 @@ def increment_path(path, exist_ok=True, sep=''): if (path.exists() and exist_ok) or (not path.exists()): return str(path) else: - dirs = glob.glob(f"{path}{sep}*") # similar paths - matches = [re.search(rf"%s{sep}(\d+)" % path.stem, d) for d in dirs] + dirs = glob.glob(f'{path}{sep}*') # similar paths + matches = [re.search(rf'%s{sep}(\d+)' % path.stem, d) for d in dirs] i = [int(m.groups()[0]) for m in matches if m] # indices n = max(i) + 1 if i else 2 # increment number - return f"{path}{sep}{n}" # update path + return f'{path}{sep}{n}' # update path diff --git a/utils/loss.py b/utils/loss.py index 6283e77..d2fa65c 100644 --- a/utils/loss.py +++ b/utils/loss.py @@ -7,7 +7,9 @@ from utils.torch_utils import is_parallel -def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 +def smooth_BCE( + eps=0.1 +): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 # return positive, negative label smoothing BCE targets return 1.0 - 0.5 * eps, 0.5 * eps @@ -15,7 +17,7 @@ def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#iss class FocalLoss(nn.Module): # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): - super(FocalLoss, self).__init__() + super().__init__() self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() self.gamma = gamma self.alpha = alpha @@ -31,7 +33,7 @@ def forward(self, pred, true): pred_prob = torch.sigmoid(pred) # prob from logits p_t = true * pred_prob + (1 - true) * (1 - pred_prob) alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) - modulating_factor = (1.0 - p_t) ** self.gamma + modulating_factor = (1.0 - p_t)**self.gamma loss *= alpha_factor * modulating_factor if self.reduction == 'mean': @@ -45,35 +47,44 @@ def forward(self, pred, true): class ComputeLoss: # Compute losses def __init__(self, model, autobalance=False): - super(ComputeLoss, self).__init__() + super().__init__() device = next(model.parameters()).device # get model device h = model.hyp # hyperparameters # Define criteria - BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device)) - BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device)) + BCEcls = nn.BCEWithLogitsLoss( + pos_weight=torch.tensor([h['cls_pw']], device=device)) + BCEobj = nn.BCEWithLogitsLoss( + pos_weight=torch.tensor([h['obj_pw']], device=device)) # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 - self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0)) # positive, negative BCE targets + self.cp, self.cn = smooth_BCE(eps=h.get( + 'label_smoothing', 0.0)) # positive, negative BCE targets # Focal loss g = h['fl_gamma'] # focal loss gamma if g > 0: BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) - det = model.module.model_h2[-1] if is_parallel(model) else model.model_h2[-1] # Detect() module - self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, .02]) # P3-P7 - #self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.1, .05]) # P3-P7 - #self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.5, 0.4, .1]) # P3-P7 - self.ssi = list(det.stride).index(16) if autobalance else 0 # stride 16 index + det = model.module.model_h2[-1] if is_parallel( + model) else model.model_h2[-1] # Detect() module + self.balance = { + 3: [4.0, 1.0, 0.4] + }.get(det.nl, [4.0, 1.0, 0.25, 0.06, .02]) # P3-P7 + # self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.1, .05]) # P3-P7 + # self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.5, 0.4, .1]) # P3-P7 + self.ssi = list( + det.stride).index(16) if autobalance else 0 # stride 16 index self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, model.gr, h, autobalance for k in 'na', 'nc', 'nl', 'anchors': setattr(self, k, getattr(det, k)) def __call__(self, p, targets): # predictions, targets, model device = targets.device - lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device) - tcls, tbox, indices, anchors = self.build_targets(p, targets) # targets + lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros( + 1, device=device), torch.zeros(1, device=device) + tcls, tbox, indices, anchors = self.build_targets(p, + targets) # targets # Losses for i, pi in enumerate(p): # layer index, layer predictions @@ -82,23 +93,29 @@ def __call__(self, p, targets): # predictions, targets, model n = b.shape[0] # number of targets if n: - ps = pi[b, a, gj, gi] # prediction subset corresponding to targets + ps = pi[b, a, gj, + gi] # prediction subset corresponding to targets # Regression pxy = ps[:, :2].sigmoid() * 2. - 0.5 - pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i] + pwh = (ps[:, 2:4].sigmoid() * 2)**2 * anchors[i] pbox = torch.cat((pxy, pwh), 1) # predicted box - iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target) + iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, + CIoU=True) # iou(prediction, target) lbox += (1.0 - iou).mean() # iou loss # Objectness - tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * iou.detach().clamp(0).type(tobj.dtype) # iou ratio + tobj[b, a, gj, + gi] = (1.0 - + self.gr) + self.gr * iou.detach().clamp(0).type( + tobj.dtype) # iou ratio # Classification if self.nc > 1: # cls loss (only if multiple classes) - t = torch.full_like(ps[:, 5:], self.cn, device=device) # targets + t = torch.full_like(ps[:, 5:], self.cn, + device=device) # targets t[range(n), tcls[i]] = self.cp - #t[t==self.cp] = iou.detach().clamp(0).type(t.dtype) + # t[t==self.cp] = iou.detach().clamp(0).type(t.dtype) lcls += self.BCEcls(ps[:, 5:], t) # BCE # Append targets to text file @@ -108,7 +125,8 @@ def __call__(self, p, targets): # predictions, targets, model obji = self.BCEobj(pi[..., 4], tobj) lobj += obji * self.balance[i] # obj loss if self.autobalance: - self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item() + self.balance[i] = self.balance[ + i] * 0.9999 + 0.0001 / obji.detach().item() if self.autobalance: self.balance = [x / self.balance[self.ssi] for x in self.balance] @@ -124,15 +142,25 @@ def build_targets(self, p, targets): # Build targets for compute_loss(), input targets(image,class,x,y,w,h) na, nt = self.na, targets.shape[0] # number of anchors, targets tcls, tbox, indices, anch = [], [], [], [] - gain = torch.ones(7, device=targets.device).long() # normalized to gridspace gain - ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt) - targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices + gain = torch.ones( + 7, device=targets.device).long() # normalized to gridspace gain + ai = torch.arange(na, + device=targets.device).float().view(na, 1).repeat( + 1, nt) # same as .repeat_interleave(nt) + targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), + 2) # append anchor indices g = 0.5 # bias - off = torch.tensor([[0, 0], - [1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m - # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm - ], device=targets.device).float() * g # offsets + off = torch.tensor( + [ + [0, 0], + [1, 0], + [0, 1], + [-1, 0], + [0, -1], # j,k,l,m + # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm + ], + device=targets.device).float() * g # offsets for i in range(self.nl): anchors = self.anchors[i] @@ -143,7 +171,8 @@ def build_targets(self, p, targets): if nt: # Matches r = t[:, :, 4:6] / anchors[:, None] # wh ratio - j = torch.max(r, 1. / r).max(2)[0] < self.hyp['anchor_t'] # compare + j = torch.max( + r, 1. / r).max(2)[0] < self.hyp['anchor_t'] # compare # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) t = t[j] # filter @@ -168,7 +197,9 @@ def build_targets(self, p, targets): # Append a = t[:, 6].long() # anchor indices - indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices + indices.append( + (b, a, gj.clamp_(0, gain[3] - 1), + gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices tbox.append(torch.cat((gxy - gij, gwh), 1)) # box anch.append(anchors[a]) # anchors tcls.append(c) # class @@ -179,63 +210,82 @@ def build_targets(self, p, targets): class ComputeLossOTA: # Compute losses def __init__(self, model, autobalance=False): - super(ComputeLossOTA, self).__init__() + super().__init__() device = next(model.parameters()).device # get model device h = model.hyp # hyperparameters # Define criteria - BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device)) - BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device)) + BCEcls = nn.BCEWithLogitsLoss( + pos_weight=torch.tensor([h['cls_pw']], device=device)) + BCEobj = nn.BCEWithLogitsLoss( + pos_weight=torch.tensor([h['obj_pw']], device=device)) # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 - self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0)) # positive, negative BCE targets + self.cp, self.cn = smooth_BCE(eps=h.get( + 'label_smoothing', 0.0)) # positive, negative BCE targets # Focal loss g = h['fl_gamma'] # focal loss gamma if g > 0: BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) - det = model.module.model_h2[-1] if is_parallel(model) else model.model_h2[-1] # Detect() module - self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, .02]) # P3-P7 - self.ssi = list(det.stride).index(16) if autobalance else 0 # stride 16 index + det = model.module.model_h2[-1] if is_parallel( + model) else model.model_h2[-1] # Detect() module + self.balance = { + 3: [4.0, 1.0, 0.4] + }.get(det.nl, [4.0, 1.0, 0.25, 0.06, .02]) # P3-P7 + self.ssi = list( + det.stride).index(16) if autobalance else 0 # stride 16 index self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, model.gr, h, autobalance for k in 'na', 'nc', 'nl', 'anchors', 'stride': setattr(self, k, getattr(det, k)) def __call__(self, p, targets, imgs): # predictions, targets, model device = targets.device - lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device) - bs, as_, gjs, gis, targets, anchors = self.build_targets(p, targets, imgs) - pre_gen_gains = [torch.tensor(pp.shape, device=device)[[3, 2, 3, 2]] for pp in p] - + lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros( + 1, device=device), torch.zeros(1, device=device) + bs, as_, gjs, gis, targets, anchors = self.build_targets( + p, targets, imgs) + pre_gen_gains = [ + torch.tensor(pp.shape, device=device)[[3, 2, 3, 2]] for pp in p + ] # Losses for i, pi in enumerate(p): # layer index, layer predictions - b, a, gj, gi = bs[i], as_[i], gjs[i], gis[i] # image, anchor, gridy, gridx + b, a, gj, gi = bs[i], as_[i], gjs[i], gis[ + i] # image, anchor, gridy, gridx tobj = torch.zeros_like(pi[..., 0], device=device) # target obj n = b.shape[0] # number of targets if n: - ps = pi[b, a, gj, gi] # prediction subset corresponding to targets + ps = pi[b, a, gj, + gi] # prediction subset corresponding to targets # Regression grid = torch.stack([gi, gj], dim=1) pxy = ps[:, :2].sigmoid() * 2. - 0.5 - #pxy = ps[:, :2].sigmoid() * 3. - 1. - pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i] + # pxy = ps[:, :2].sigmoid() * 3. - 1. + pwh = (ps[:, 2:4].sigmoid() * 2)**2 * anchors[i] pbox = torch.cat((pxy, pwh), 1) # predicted box selected_tbox = targets[i][:, 2:6] * pre_gen_gains[i] selected_tbox[:, :2] -= grid - iou = bbox_iou(pbox.T, selected_tbox, x1y1x2y2=False, CIoU=True) # iou(prediction, target) + iou = bbox_iou(pbox.T, + selected_tbox, + x1y1x2y2=False, + CIoU=True) # iou(prediction, target) lbox += (1.0 - iou).mean() # iou loss # Objectness - tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * iou.detach().clamp(0).type(tobj.dtype) # iou ratio + tobj[b, a, gj, + gi] = (1.0 - + self.gr) + self.gr * iou.detach().clamp(0).type( + tobj.dtype) # iou ratio # Classification selected_tcls = targets[i][:, 1].long() if self.nc > 1: # cls loss (only if multiple classes) - t = torch.full_like(ps[:, 5:], self.cn, device=device) # targets + t = torch.full_like(ps[:, 5:], self.cn, + device=device) # targets t[range(n), selected_tcls] = self.cp lcls += self.BCEcls(ps[:, 5:], t) # BCE @@ -246,7 +296,8 @@ def __call__(self, p, targets, imgs): # predictions, targets, model obji = self.BCEobj(pi[..., 4], tobj) lobj += obji * self.balance[i] # obj loss if self.autobalance: - self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item() + self.balance[i] = self.balance[ + i] * 0.9999 + 0.0001 / obji.detach().item() if self.autobalance: self.balance = [x / self.balance[self.ssi] for x in self.balance] @@ -260,11 +311,11 @@ def __call__(self, p, targets, imgs): # predictions, targets, model def build_targets(self, p, targets, imgs): - #indices, anch = self.find_positive(p, targets) + # indices, anch = self.find_positive(p, targets) indices, anch = self.find_3_positive(p, targets) - #indices, anch = self.find_4_positive(p, targets) - #indices, anch = self.find_5_positive(p, targets) - #indices, anch = self.find_9_positive(p, targets) + # indices, anch = self.find_4_positive(p, targets) + # indices, anch = self.find_5_positive(p, targets) + # indices, anch = self.find_9_positive(p, targets) matching_bs = [[] for pp in p] matching_as = [[] for pp in p] @@ -277,7 +328,7 @@ def build_targets(self, p, targets, imgs): for batch_idx in range(p[0].shape[0]): - b_idx = targets[:, 0]==batch_idx + b_idx = targets[:, 0] == batch_idx this_target = targets[b_idx] if this_target.shape[0] == 0: continue @@ -305,16 +356,18 @@ def build_targets(self, p, targets, imgs): all_gj.append(gj) all_gi.append(gi) all_anch.append(anch[i][idx]) - from_which_layer.append(torch.ones(size=(len(b),)) * i) + from_which_layer.append(torch.ones(size=(len(b), )) * i) fg_pred = pi[b, a, gj, gi] p_obj.append(fg_pred[:, 4:5]) p_cls.append(fg_pred[:, 5:]) grid = torch.stack([gi, gj], dim=1) - pxy = (fg_pred[:, :2].sigmoid() * 2. - 0.5 + grid) * self.stride[i] #/ 8. - #pxy = (fg_pred[:, :2].sigmoid() * 3. - 1. + grid) * self.stride[i] - pwh = (fg_pred[:, 2:4].sigmoid() * 2) ** 2 * anch[i][idx] * self.stride[i] #/ 8. + pxy = (fg_pred[:, :2].sigmoid() * 2. - 0.5 + + grid) * self.stride[i] # / 8. + # pxy = (fg_pred[:, :2].sigmoid() * 3. - 1. + grid) * self.stride[i] + pwh = (fg_pred[:, 2:4].sigmoid() * + 2)**2 * anch[i][idx] * self.stride[i] # / 8. pxywh = torch.cat([pxy, pwh], dim=-1) pxyxy = xywh2xyxy(pxywh) pxyxys.append(pxyxy) @@ -335,45 +388,41 @@ def build_targets(self, p, targets, imgs): pair_wise_iou_loss = -torch.log(pair_wise_iou + 1e-8) - top_k, _ = torch.topk(pair_wise_iou, min(10, pair_wise_iou.shape[1]), dim=1) + top_k, _ = torch.topk(pair_wise_iou, + min(10, pair_wise_iou.shape[1]), + dim=1) dynamic_ks = torch.clamp(top_k.sum(1).int(), min=1) - gt_cls_per_image = ( - F.one_hot(this_target[:, 1].to(torch.int64), self.nc) - .float() - .unsqueeze(1) - .repeat(1, pxyxys.shape[0], 1) - ) + gt_cls_per_image = (F.one_hot(this_target[:, 1].to(torch.int64), + self.nc).float().unsqueeze(1).repeat( + 1, pxyxys.shape[0], 1)) num_gt = this_target.shape[0] cls_preds_ = ( - p_cls.float().unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_() - * p_obj.unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_() - ) + p_cls.float().unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_() * + p_obj.unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_()) y = cls_preds_.sqrt_() pair_wise_cls_loss = F.binary_cross_entropy_with_logits( - torch.log(y/(1-y)) , gt_cls_per_image, reduction="none" - ).sum(-1) + torch.log(y / (1 - y)), gt_cls_per_image, + reduction='none').sum(-1) del cls_preds_ - cost = ( - pair_wise_cls_loss - + 3.0 * pair_wise_iou_loss - ) + cost = (pair_wise_cls_loss + 3.0 * pair_wise_iou_loss) matching_matrix = torch.zeros_like(cost) for gt_idx in range(num_gt): - _, pos_idx = torch.topk( - cost[gt_idx], k=dynamic_ks[gt_idx].item(), largest=False - ) + _, pos_idx = torch.topk(cost[gt_idx], + k=dynamic_ks[gt_idx].item(), + largest=False) matching_matrix[gt_idx][pos_idx] = 1.0 del top_k, dynamic_ks anchor_matching_gt = matching_matrix.sum(0) if (anchor_matching_gt > 1).sum() > 0: - _, cost_argmin = torch.min(cost[:, anchor_matching_gt > 1], dim=0) + _, cost_argmin = torch.min(cost[:, anchor_matching_gt > 1], + dim=0) matching_matrix[:, anchor_matching_gt > 1] *= 0.0 matching_matrix[cost_argmin, anchor_matching_gt > 1] = 1.0 fg_mask_inboxes = matching_matrix.sum(0) > 0.0 @@ -406,12 +455,24 @@ def build_targets(self, p, targets, imgs): matching_targets[i] = torch.cat(matching_targets[i], dim=0) matching_anchs[i] = torch.cat(matching_anchs[i], dim=0) else: - matching_bs[i] = torch.tensor([], device='cuda:0', dtype=torch.int64) - matching_as[i] = torch.tensor([], device='cuda:0', dtype=torch.int64) - matching_gjs[i] = torch.tensor([], device='cuda:0', dtype=torch.int64) - matching_gis[i] = torch.tensor([], device='cuda:0', dtype=torch.int64) - matching_targets[i] = torch.tensor([], device='cuda:0', dtype=torch.int64) - matching_anchs[i] = torch.tensor([], device='cuda:0', dtype=torch.int64) + matching_bs[i] = torch.tensor([], + device='cuda:0', + dtype=torch.int64) + matching_as[i] = torch.tensor([], + device='cuda:0', + dtype=torch.int64) + matching_gjs[i] = torch.tensor([], + device='cuda:0', + dtype=torch.int64) + matching_gis[i] = torch.tensor([], + device='cuda:0', + dtype=torch.int64) + matching_targets[i] = torch.tensor([], + device='cuda:0', + dtype=torch.int64) + matching_anchs[i] = torch.tensor([], + device='cuda:0', + dtype=torch.int64) return matching_bs, matching_as, matching_gjs, matching_gis, matching_targets, matching_anchs @@ -419,15 +480,25 @@ def find_3_positive(self, p, targets): # Build targets for compute_loss(), input targets(image,class,x,y,w,h) na, nt = self.na, targets.shape[0] # number of anchors, targets indices, anch = [], [] - gain = torch.ones(7, device=targets.device).long() # normalized to gridspace gain - ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt) - targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices + gain = torch.ones( + 7, device=targets.device).long() # normalized to gridspace gain + ai = torch.arange(na, + device=targets.device).float().view(na, 1).repeat( + 1, nt) # same as .repeat_interleave(nt) + targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), + 2) # append anchor indices g = 0.5 # bias - off = torch.tensor([[0, 0], - [1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m - # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm - ], device=targets.device).float() * g # offsets + off = torch.tensor( + [ + [0, 0], + [1, 0], + [0, 1], + [-1, 0], + [0, -1], # j,k,l,m + # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm + ], + device=targets.device).float() * g # offsets for i in range(self.nl): anchors = self.anchors[i] @@ -438,7 +509,8 @@ def find_3_positive(self, p, targets): if nt: # Matches r = t[:, :, 4:6] / anchors[:, None] # wh ratio - j = torch.max(r, 1. / r).max(2)[0] < self.hyp['anchor_t'] # compare + j = torch.max( + r, 1. / r).max(2)[0] < self.hyp['anchor_t'] # compare # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) t = t[j] # filter @@ -457,13 +529,15 @@ def find_3_positive(self, p, targets): # Define b, c = t[:, :2].long().T # image, class gxy = t[:, 2:4] # grid xy - gwh = t[:, 4:6] # grid wh + # gwh = t[:, 4:6] # grid wh gij = (gxy - offsets).long() gi, gj = gij.T # grid xy indices # Append a = t[:, 6].long() # anchor indices - indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices + indices.append( + (b, a, gj.clamp_(0, gain[3] - 1), + gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices anch.append(anchors[a]) # anchors return indices, anch @@ -472,48 +546,68 @@ def find_3_positive(self, p, targets): class ComputeLossOTADual(ComputeLossOTA): # Compute losses def __init__(self, model, autobalance=False): - super(ComputeLossOTADual, self).__init__(model, autobalance) + super().__init__(model, autobalance) def __call__(self, p, targets, imgs): # predictions, targets, model device = targets.device - lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device) - - bs_2, as_2_, gjs_2, gis_2, targets_2, anchors_2 = self.build_targets(p[self.nl:], targets, imgs) - bs, as_, gjs, gis, targets, anchors = self.build_targets(p[:self.nl], targets, imgs) - pre_gen_gains = [torch.tensor(pp.shape, device=device)[[3, 2, 3, 2]] for pp in p[:self.nl]] - pre_gen_gains_2 = [torch.tensor(pp.shape, device=device)[[3, 2, 3, 2]] for pp in p[self.nl:]] + lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros( + 1, device=device), torch.zeros(1, device=device) + + bs_2, as_2_, gjs_2, gis_2, targets_2, anchors_2 = self.build_targets( + p[self.nl:], targets, imgs) + bs, as_, gjs, gis, targets, anchors = self.build_targets( + p[:self.nl], targets, imgs) + pre_gen_gains = [ + torch.tensor(pp.shape, device=device)[[3, 2, 3, 2]] + for pp in p[:self.nl] + ] + pre_gen_gains_2 = [ + torch.tensor(pp.shape, device=device)[[3, 2, 3, 2]] + for pp in p[self.nl:] + ] # Losses for i in range(self.nl): # layer index, layer predictions pi = p[i] - pi_2 = p[i+self.nl] - b, a, gj, gi = bs[i], as_[i], gjs[i], gis[i] # image, anchor, gridy, gridx - b_2, a_2, gj_2, gi_2 = bs_2[i], as_2_[i], gjs_2[i], gis_2[i] # image, anchor, gridy, gridx + pi_2 = p[i + self.nl] + b, a, gj, gi = bs[i], as_[i], gjs[i], gis[ + i] # image, anchor, gridy, gridx + b_2, a_2, gj_2, gi_2 = bs_2[i], as_2_[i], gjs_2[i], gis_2[ + i] # image, anchor, gridy, gridx tobj = torch.zeros_like(pi[..., 0], device=device) # target obj - tobj_2 = torch.zeros_like(pi_2[..., 0], device=device) # target obj + tobj_2 = torch.zeros_like(pi_2[..., 0], + device=device) # target obj n = b.shape[0] # number of targets if n: - ps = pi[b, a, gj, gi] # prediction subset corresponding to targets + ps = pi[b, a, gj, + gi] # prediction subset corresponding to targets # Regression grid = torch.stack([gi, gj], dim=1) pxy = ps[:, :2].sigmoid() * 2. - 0.5 - #pxy = ps[:, :2].sigmoid() * 3. - 1. - pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i] + # pxy = ps[:, :2].sigmoid() * 3. - 1. + pwh = (ps[:, 2:4].sigmoid() * 2)**2 * anchors[i] pbox = torch.cat((pxy, pwh), 1) # predicted box selected_tbox = targets[i][:, 2:6] * pre_gen_gains[i] selected_tbox[:, :2] -= grid - iou = bbox_iou(pbox.T, selected_tbox, x1y1x2y2=False, CIoU=True) # iou(prediction, target) + iou = bbox_iou(pbox.T, + selected_tbox, + x1y1x2y2=False, + CIoU=True) # iou(prediction, target) lbox += (1.0 - iou).mean() # iou loss # Objectness - tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * iou.detach().clamp(0).type(tobj.dtype) # iou ratio + tobj[b, a, gj, + gi] = (1.0 - + self.gr) + self.gr * iou.detach().clamp(0).type( + tobj.dtype) # iou ratio # Classification selected_tcls = targets[i][:, 1].long() if self.nc > 1: # cls loss (only if multiple classes) - t = torch.full_like(ps[:, 5:], self.cn, device=device) # targets + t = torch.full_like(ps[:, 5:], self.cn, + device=device) # targets t[range(n), selected_tcls] = self.cp lcls += self.BCEcls(ps[:, 5:], t) # BCE @@ -523,26 +617,33 @@ def __call__(self, p, targets, imgs): # predictions, targets, model n_2 = b_2.shape[0] # number of targets if n_2: - ps_2 = pi_2[b_2, a_2, gj_2, gi_2] # prediction subset corresponding to targets + ps_2 = pi_2[b_2, a_2, gj_2, + gi_2] # prediction subset corresponding to targets # Regression grid_2 = torch.stack([gi_2, gj_2], dim=1) pxy_2 = ps_2[:, :2].sigmoid() * 2. - 0.5 - #pxy = ps[:, :2].sigmoid() * 3. - 1. - pwh_2 = (ps_2[:, 2:4].sigmoid() * 2) ** 2 * anchors_2[i] + # pxy = ps[:, :2].sigmoid() * 3. - 1. + pwh_2 = (ps_2[:, 2:4].sigmoid() * 2)**2 * anchors_2[i] pbox_2 = torch.cat((pxy_2, pwh_2), 1) # predicted box selected_tbox_2 = targets_2[i][:, 2:6] * pre_gen_gains_2[i] selected_tbox_2[:, :2] -= grid_2 - iou_2 = bbox_iou(pbox_2.T, selected_tbox_2, x1y1x2y2=False, CIoU=True) # iou(prediction, target) + iou_2 = bbox_iou(pbox_2.T, + selected_tbox_2, + x1y1x2y2=False, + CIoU=True) # iou(prediction, target) lbox += (1.0 - iou_2).mean() # iou loss # Objectness - tobj_2[b_2, a_2, gj_2, gi_2] = (1.0 - self.gr) + self.gr * iou_2.detach().clamp(0).type(tobj_2.dtype) # iou ratio + tobj_2[b_2, a_2, gj_2, gi_2] = ( + 1.0 - self.gr) + self.gr * iou_2.detach().clamp(0).type( + tobj_2.dtype) # iou ratio # Classification selected_tcls_2 = targets_2[i][:, 1].long() if self.nc > 1: # cls loss (only if multiple classes) - t_2 = torch.full_like(ps_2[:, 5:], self.cn, device=device) # targets + t_2 = torch.full_like(ps_2[:, 5:], self.cn, + device=device) # targets t_2[range(n_2), selected_tcls_2] = self.cp lcls += self.BCEcls(ps_2[:, 5:], t_2) # BCE @@ -550,13 +651,13 @@ def __call__(self, p, targets, imgs): # predictions, targets, model # with open('targets.txt', 'a') as file: # [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)] - obji = self.BCEobj(pi[..., 4], tobj) obji_2 = self.BCEobj(pi_2[..., 4], tobj_2) lobj += obji * self.balance[i] # obj loss lobj += obji_2 * self.balance[i] # obj loss if self.autobalance: - self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item() + self.balance[i] = self.balance[ + i] * 0.9999 + 0.0001 / obji.detach().item() if self.autobalance: self.balance = [x / self.balance[self.ssi] for x in self.balance] @@ -572,7 +673,7 @@ def __call__(self, p, targets, imgs): # predictions, targets, model class ComputeLossOTADy(ComputeLossOTA): # Compute losses def __init__(self, model, autobalance=False): - super(ComputeLossOTADy, self).__init__(model, autobalance) + super().__init__(model, autobalance) self.tracked_diff = 0 self.iter_count = 0 self.diff_list = [] @@ -580,45 +681,66 @@ def __init__(self, model, autobalance=False): def __call__(self, ps, targets, imgs): # predictions, targets, model p, score = ps device = targets.device - lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device) - lcls_2, lbox_2, lobj_2 = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device) - - bs_2, as_2_, gjs_2, gis_2, targets_2, anchors_2 = self.build_targets(p[self.nl:], targets, imgs) - bs, as_, gjs, gis, targets, anchors = self.build_targets(p[:self.nl], targets, imgs) - pre_gen_gains = [torch.tensor(pp.shape, device=device)[[3, 2, 3, 2]] for pp in p[:self.nl]] - pre_gen_gains_2 = [torch.tensor(pp.shape, device=device)[[3, 2, 3, 2]] for pp in p[self.nl:]] + lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros( + 1, device=device), torch.zeros(1, device=device) + lcls_2, lbox_2, lobj_2 = torch.zeros(1, device=device), torch.zeros( + 1, device=device), torch.zeros(1, device=device) + + bs_2, as_2_, gjs_2, gis_2, targets_2, anchors_2 = self.build_targets( + p[self.nl:], targets, imgs) + bs, as_, gjs, gis, targets, anchors = self.build_targets( + p[:self.nl], targets, imgs) + pre_gen_gains = [ + torch.tensor(pp.shape, device=device)[[3, 2, 3, 2]] + for pp in p[:self.nl] + ] + pre_gen_gains_2 = [ + torch.tensor(pp.shape, device=device)[[3, 2, 3, 2]] + for pp in p[self.nl:] + ] # Losses for i in range(self.nl): # layer index, layer predictions pi = p[i] - pi_2 = p[i+self.nl] - b, a, gj, gi = bs[i], as_[i], gjs[i], gis[i] # image, anchor, gridy, gridx - b_2, a_2, gj_2, gi_2 = bs_2[i], as_2_[i], gjs_2[i], gis_2[i] # image, anchor, gridy, gridx + pi_2 = p[i + self.nl] + b, a, gj, gi = bs[i], as_[i], gjs[i], gis[ + i] # image, anchor, gridy, gridx + b_2, a_2, gj_2, gi_2 = bs_2[i], as_2_[i], gjs_2[i], gis_2[ + i] # image, anchor, gridy, gridx tobj = torch.zeros_like(pi[..., 0], device=device) # target obj - tobj_2 = torch.zeros_like(pi_2[..., 0], device=device) # target obj + tobj_2 = torch.zeros_like(pi_2[..., 0], + device=device) # target obj n = b.shape[0] # number of targets if n: - ps = pi[b, a, gj, gi] # prediction subset corresponding to targets + ps = pi[b, a, gj, + gi] # prediction subset corresponding to targets # Regression grid = torch.stack([gi, gj], dim=1) pxy = ps[:, :2].sigmoid() * 2. - 0.5 - #pxy = ps[:, :2].sigmoid() * 3. - 1. - pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i] + # pxy = ps[:, :2].sigmoid() * 3. - 1. + pwh = (ps[:, 2:4].sigmoid() * 2)**2 * anchors[i] pbox = torch.cat((pxy, pwh), 1) # predicted box selected_tbox = targets[i][:, 2:6] * pre_gen_gains[i] selected_tbox[:, :2] -= grid - iou = bbox_iou(pbox.T, selected_tbox, x1y1x2y2=False, CIoU=True) # iou(prediction, target) + iou = bbox_iou(pbox.T, + selected_tbox, + x1y1x2y2=False, + CIoU=True) # iou(prediction, target) lbox += (1.0 - iou).mean() # iou loss # Objectness - tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * iou.detach().clamp(0).type(tobj.dtype) # iou ratio + tobj[b, a, gj, + gi] = (1.0 - + self.gr) + self.gr * iou.detach().clamp(0).type( + tobj.dtype) # iou ratio # Classification selected_tcls = targets[i][:, 1].long() if self.nc > 1: # cls loss (only if multiple classes) - t = torch.full_like(ps[:, 5:], self.cn, device=device) # targets + t = torch.full_like(ps[:, 5:], self.cn, + device=device) # targets t[range(n), selected_tcls] = self.cp lcls += self.BCEcls(ps[:, 5:], t) # BCE @@ -628,26 +750,33 @@ def __call__(self, ps, targets, imgs): # predictions, targets, model n_2 = b_2.shape[0] # number of targets if n_2: - ps_2 = pi_2[b_2, a_2, gj_2, gi_2] # prediction subset corresponding to targets + ps_2 = pi_2[b_2, a_2, gj_2, + gi_2] # prediction subset corresponding to targets # Regression grid_2 = torch.stack([gi_2, gj_2], dim=1) pxy_2 = ps_2[:, :2].sigmoid() * 2. - 0.5 - #pxy = ps[:, :2].sigmoid() * 3. - 1. - pwh_2 = (ps_2[:, 2:4].sigmoid() * 2) ** 2 * anchors_2[i] + # pxy = ps[:, :2].sigmoid() * 3. - 1. + pwh_2 = (ps_2[:, 2:4].sigmoid() * 2)**2 * anchors_2[i] pbox_2 = torch.cat((pxy_2, pwh_2), 1) # predicted box selected_tbox_2 = targets_2[i][:, 2:6] * pre_gen_gains_2[i] selected_tbox_2[:, :2] -= grid_2 - iou_2 = bbox_iou(pbox_2.T, selected_tbox_2, x1y1x2y2=False, CIoU=True) # iou(prediction, target) + iou_2 = bbox_iou(pbox_2.T, + selected_tbox_2, + x1y1x2y2=False, + CIoU=True) # iou(prediction, target) lbox_2 += (1.0 - iou_2).mean() # iou loss # Objectness - tobj_2[b_2, a_2, gj_2, gi_2] = (1.0 - self.gr) + self.gr * iou_2.detach().clamp(0).type(tobj_2.dtype) # iou ratio + tobj_2[b_2, a_2, gj_2, gi_2] = ( + 1.0 - self.gr) + self.gr * iou_2.detach().clamp(0).type( + tobj_2.dtype) # iou ratio # Classification selected_tcls_2 = targets_2[i][:, 1].long() if self.nc > 1: # cls loss (only if multiple classes) - t_2 = torch.full_like(ps_2[:, 5:], self.cn, device=device) # targets + t_2 = torch.full_like(ps_2[:, 5:], self.cn, + device=device) # targets t_2[range(n_2), selected_tcls_2] = self.cp lcls_2 += self.BCEcls(ps_2[:, 5:], t_2) # BCE @@ -655,13 +784,13 @@ def __call__(self, ps, targets, imgs): # predictions, targets, model # with open('targets.txt', 'a') as file: # [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)] - obji = self.BCEobj(pi[..., 4], tobj) obji_2 = self.BCEobj(pi_2[..., 4], tobj_2) lobj += obji * self.balance[i] # obj loss lobj_2 += obji_2 * self.balance[i] # obj loss if self.autobalance: - self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item() + self.balance[i] = self.balance[ + i] * 0.9999 + 0.0001 / obji.detach().item() if self.autobalance: self.balance = [x / self.balance[self.ssi] for x in self.balance] @@ -689,4 +818,6 @@ def __call__(self, ps, targets, imgs): # predictions, targets, model loss = loss - self.tracked_diff / 2 loss_2 = loss_2 + self.tracked_diff / 2 adaptive_loss = score[:, 0] * loss + (1 - score[:, 0]) * loss_2 - return adaptive_loss * bs, torch.cat((current_diff, score[:, 0], 1 - score[:, 0], adaptive_loss)).detach() + return adaptive_loss * bs, torch.cat( + (current_diff, score[:, + 0], 1 - score[:, 0], adaptive_loss)).detach() diff --git a/utils/metrics.py b/utils/metrics.py index 6d2f536..f16fe79 100644 --- a/utils/metrics.py +++ b/utils/metrics.py @@ -15,7 +15,14 @@ def fitness(x): return (x[:, :4] * w).sum(1) -def ap_per_class(tp, conf, pred_cls, target_cls, v5_metric=False, plot=False, save_dir='.', names=()): +def ap_per_class(tp, + conf, + pred_cls, + target_cls, + v5_metric=False, + plot=False, + save_dir='.', + names=()): """ Compute the average precision, given the recall and precision curves. Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. # Arguments @@ -39,7 +46,8 @@ def ap_per_class(tp, conf, pred_cls, target_cls, v5_metric=False, plot=False, sa # Create Precision-Recall curve and compute AP for each class px, py = np.linspace(0, 1, 1000), [] # for plotting - ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000)) + ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros( + (nc, 1000)) for ci, c in enumerate(unique_classes): i = pred_cls == c n_l = (target_cls == c).sum() # number of labels @@ -54,32 +62,50 @@ def ap_per_class(tp, conf, pred_cls, target_cls, v5_metric=False, plot=False, sa # Recall recall = tpc / (n_l + 1e-16) # recall curve - r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases + r[ci] = np.interp(-px, -conf[i], recall[:, 0], + left=0) # negative x, xp because xp decreases # Precision precision = tpc / (tpc + fpc) # precision curve - p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score + p[ci] = np.interp(-px, -conf[i], precision[:, 0], + left=1) # p at pr_score # AP from recall-precision curve for j in range(tp.shape[1]): - ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j], v5_metric=v5_metric) + ap[ci, j], mpre, mrec = compute_ap(recall[:, j], + precision[:, j], + v5_metric=v5_metric) if plot and j == 0: - py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5 + py.append(np.interp(px, mrec, + mpre)) # precision at mAP@0.5 # Compute F1 (harmonic mean of precision and recall) f1 = 2 * p * r / (p + r + 1e-16) if plot: plot_pr_curve(px, py, ap, Path(save_dir) / 'PR_curve.png', names) - plot_mc_curve(px, f1, Path(save_dir) / 'F1_curve.png', names, ylabel='F1') - plot_mc_curve(px, p, Path(save_dir) / 'P_curve.png', names, ylabel='Precision') - plot_mc_curve(px, r, Path(save_dir) / 'R_curve.png', names, ylabel='Recall') + plot_mc_curve(px, + f1, + Path(save_dir) / 'F1_curve.png', + names, + ylabel='F1') + plot_mc_curve(px, + p, + Path(save_dir) / 'P_curve.png', + names, + ylabel='Precision') + plot_mc_curve(px, + r, + Path(save_dir) / 'R_curve.png', + names, + ylabel='Recall') i = f1.mean(0).argmax() # max F1 index return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype('int32') def compute_ap(recall, precision, v5_metric=False): - """ Compute the average precision, given the recall and precision curves + """Compute the average precision, given the recall and precision curves. + # Arguments recall: The recall curve (list) precision: The precision curve (list) @@ -104,7 +130,8 @@ def compute_ap(recall, precision, v5_metric=False): x = np.linspace(0, 1, 101) # 101-point interp (COCO) ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate else: # 'continuous' - i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes + i = np.where( + mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve return ap, mpre, mrec @@ -119,8 +146,8 @@ def __init__(self, nc, conf=0.25, iou_thres=0.45): self.iou_thres = iou_thres def process_batch(self, detections, labels): - """ - Return intersection-over-union (Jaccard index) of boxes. + """Return intersection-over-union (Jaccard index) of boxes. + Both sets of boxes are expected to be in (x1, y1, x2, y2) format. Arguments: detections (Array[N, 6]), x1, y1, x2, y2, conf, class @@ -135,12 +162,15 @@ def process_batch(self, detections, labels): x = torch.where(iou > self.iou_thres) if x[0].shape[0]: - matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() + matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), + 1).cpu().numpy() if x[0].shape[0] > 1: matches = matches[matches[:, 2].argsort()[::-1]] - matches = matches[np.unique(matches[:, 1], return_index=True)[1]] + matches = matches[np.unique(matches[:, 1], + return_index=True)[1]] matches = matches[matches[:, 2].argsort()[::-1]] - matches = matches[np.unique(matches[:, 0], return_index=True)[1]] + matches = matches[np.unique(matches[:, 0], + return_index=True)[1]] else: matches = np.zeros((0, 3)) @@ -165,19 +195,32 @@ def plot(self, save_dir='', names=()): try: import seaborn as sn - array = self.matrix / (self.matrix.sum(0).reshape(1, self.nc + 1) + 1E-6) # normalize - array[array < 0.005] = np.nan # don't annotate (would appear as 0.00) + array = self.matrix / ( + self.matrix.sum(0).reshape(1, self.nc + 1) + 1E-6) # normalize + array[array < + 0.005] = np.nan # don't annotate (would appear as 0.00) fig = plt.figure(figsize=(12, 9), tight_layout=True) sn.set(font_scale=1.0 if self.nc < 50 else 0.8) # for label size - labels = (0 < len(names) < 99) and len(names) == self.nc # apply names to ticklabels - sn.heatmap(array, annot=self.nc < 30, annot_kws={"size": 8}, cmap='Blues', fmt='.2f', square=True, - xticklabels=names + ['background FP'] if labels else "auto", - yticklabels=names + ['background FN'] if labels else "auto").set_facecolor((1, 1, 1)) + labels = (0 < len(names) < 99 + ) and len(names) == self.nc # apply names to ticklabels + sn.heatmap(array, + annot=self.nc < 30, + annot_kws={ + 'size': 8 + }, + cmap='Blues', + fmt='.2f', + square=True, + xticklabels=names + + ['background FP'] if labels else 'auto', + yticklabels=names + + ['background FN'] if labels else 'auto').set_facecolor( + (1, 1, 1)) fig.axes[0].set_xlabel('True') fig.axes[0].set_ylabel('Predicted') fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250) - except Exception as e: + except Exception: pass def print(self): @@ -185,7 +228,8 @@ def print(self): print(' '.join(map(str, self.matrix[i]))) -# Plots ---------------------------------------------------------------------------------------------------------------- +# Plots -------------------------------------------------------------------------------------------------------------- + def plot_pr_curve(px, py, ap, save_dir='pr_curve.png', names=()): # Precision-recall curve @@ -194,34 +238,51 @@ def plot_pr_curve(px, py, ap, save_dir='pr_curve.png', names=()): if 0 < len(names) < 21: # display per-class legend if < 21 classes for i, y in enumerate(py.T): - ax.plot(px, y, linewidth=1, label=f'{names[i]} {ap[i, 0]:.3f}') # plot(recall, precision) + ax.plot( + px, y, linewidth=1, + label=f'{names[i]} {ap[i, 0]:.3f}') # plot(recall, precision) else: ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision) - ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean()) + ax.plot(px, + py.mean(1), + linewidth=3, + color='blue', + label='all classes %.3f mAP@0.5' % ap[:, 0].mean()) ax.set_xlabel('Recall') ax.set_ylabel('Precision') ax.set_xlim(0, 1) ax.set_ylim(0, 1) - plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left") + plt.legend(bbox_to_anchor=(1.04, 1), loc='upper left') fig.savefig(Path(save_dir), dpi=250) -def plot_mc_curve(px, py, save_dir='mc_curve.png', names=(), xlabel='Confidence', ylabel='Metric'): +def plot_mc_curve(px, + py, + save_dir='mc_curve.png', + names=(), + xlabel='Confidence', + ylabel='Metric'): # Metric-confidence curve fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) if 0 < len(names) < 21: # display per-class legend if < 21 classes for i, y in enumerate(py): - ax.plot(px, y, linewidth=1, label=f'{names[i]}') # plot(confidence, metric) + ax.plot(px, y, linewidth=1, + label=f'{names[i]}') # plot(confidence, metric) else: - ax.plot(px, py.T, linewidth=1, color='grey') # plot(confidence, metric) + ax.plot(px, py.T, linewidth=1, + color='grey') # plot(confidence, metric) y = py.mean(0) - ax.plot(px, y, linewidth=3, color='blue', label=f'all classes {y.max():.2f} at {px[y.argmax()]:.3f}') + ax.plot(px, + y, + linewidth=3, + color='blue', + label=f'all classes {y.max():.2f} at {px[y.argmax()]:.3f}') ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) ax.set_xlim(0, 1) ax.set_ylim(0, 1) - plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left") + plt.legend(bbox_to_anchor=(1.04, 1), loc='upper left') fig.savefig(Path(save_dir), dpi=250) diff --git a/utils/plots.py b/utils/plots.py index e6c3aaf..c7f76ef 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -1,6 +1,5 @@ # Plotting utils -import glob import math import os import random @@ -11,15 +10,10 @@ import matplotlib import matplotlib.pyplot as plt import numpy as np -import pandas as pd -import seaborn as sns import torch -import yaml -from PIL import Image, ImageDraw, ImageFont -from scipy.signal import butter, filtfilt +from PIL import Image from utils.general import xywh2xyxy, xyxy2xywh -from utils.metrics import fitness # Settings matplotlib.rc('font', **{'size': 11}) @@ -31,12 +25,14 @@ def color_list(): def hex2rgb(h): return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4)) - return [hex2rgb(h) for h in matplotlib.colors.TABLEAU_COLORS.values()] # or BASE_ (8), CSS4_ (148), XKCD_ (949) + return [hex2rgb(h) for h in matplotlib.colors.TABLEAU_COLORS.values() + ] # or BASE_ (8), CSS4_ (148), XKCD_ (949) def plot_one_box(x, img, color=None, label=None, line_thickness=3): # Plots one bounding box on image img - tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness + tl = line_thickness or round( + 0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness color = color or [random.randint(0, 255) for _ in range(3)] c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) @@ -45,7 +41,12 @@ def plot_one_box(x, img, color=None, label=None, line_thickness=3): t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled - cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) + cv2.putText(img, + label, (c1[0], c1[1] - 2), + 0, + tl / 3, [225, 255, 255], + thickness=tf, + lineType=cv2.LINE_AA) def output_to_target(output): @@ -53,11 +54,18 @@ def output_to_target(output): targets = [] for i, o in enumerate(output): for *box, conf, cls in o.cpu().numpy(): - targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf]) + targets.append( + [i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf]) return np.array(targets) -def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=640, max_subplots=16): +def plot_images(images, + targets, + paths=None, + fname='images.jpg', + names=None, + max_size=640, + max_subplots=16): # Plot image grid with labels if isinstance(images, torch.Tensor): @@ -73,7 +81,7 @@ def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max tf = max(tl - 1, 1) # font thickness bs, _, h, w = images.shape # batch size, _, height, width bs = min(bs, max_subplots) # limit plot images - ns = np.ceil(bs ** 0.5) # number of subplots (square) + ns = np.ceil(bs**0.5) # number of subplots (square) # Check if we should resize scale_factor = max_size / max(h, w) @@ -82,7 +90,8 @@ def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max w = math.ceil(scale_factor * w) colors = color_list() # list of colors - mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8) # init + mosaic = np.full((int(ns * h), int(ns * w), 3), 255, + dtype=np.uint8) # init for i, img in enumerate(images): if i == max_subplots: # if last batch has fewer images than we expect break @@ -100,7 +109,8 @@ def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max boxes = xywh2xyxy(image_targets[:, 2:6]).T classes = image_targets[:, 1].astype('int') labels = image_targets.shape[1] == 6 # labels if no conf column - conf = None if labels else image_targets[:, 6] # check for confidence presence (label vs pred) + conf = None if labels else image_targets[:, + 6] # check for confidence presence (label vs pred) if boxes.shape[1]: if boxes.max() <= 1.01: # if normalized with tolerance 0.01 @@ -115,22 +125,35 @@ def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max color = colors[cls % len(colors)] cls = names[cls] if names else cls if labels or conf[j] > 0.25: # 0.25 conf thresh - label = '%s' % cls if labels else '%s %.1f' % (cls, conf[j]) - plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl) + label = '%s' % cls if labels else '{} {:.1f}'.format( + cls, conf[j]) + plot_one_box(box, + mosaic, + label=label, + color=color, + line_thickness=tl) # Draw image filename labels if paths: label = Path(paths[i]).name[:40] # trim to 40 char - t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] - cv2.putText(mosaic, label, (block_x + 5, block_y + t_size[1] + 5), 0, tl / 3, [220, 220, 220], thickness=tf, + t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, + thickness=tf)[0] + cv2.putText(mosaic, + label, (block_x + 5, block_y + t_size[1] + 5), + 0, + tl / 3, [220, 220, 220], + thickness=tf, lineType=cv2.LINE_AA) # Image border - cv2.rectangle(mosaic, (block_x, block_y), (block_x + w, block_y + h), (255, 255, 255), thickness=3) + cv2.rectangle(mosaic, (block_x, block_y), (block_x + w, block_y + h), + (255, 255, 255), + thickness=3) if fname: r = min(1280. / max(h, w) / ns, 1.0) # ratio to limit image size - mosaic = cv2.resize(mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA) + mosaic = cv2.resize(mosaic, (int(ns * w * r), int(ns * h * r)), + interpolation=cv2.INTER_AREA) # cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB)) # cv2 save Image.fromarray(mosaic).save(fname) # PIL save return mosaic @@ -138,7 +161,8 @@ def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=''): # Plot LR simulating training for full epochs - optimizer, scheduler = copy(optimizer), copy(scheduler) # do not modify originals + optimizer, scheduler = copy(optimizer), copy( + scheduler) # do not modify originals y = [] for _ in range(epochs): scheduler.step() @@ -157,19 +181,28 @@ def plot_results(start=0, stop=0, bucket='', id=(), labels=(), save_dir=''): # Plot training 'results*.txt'. from utils.plots import *; plot_results(save_dir='runs/train/exp') fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True) ax = ax.ravel() - s = ['Box', 'Objectness', 'Classification', 'Precision', 'Recall', - 'val Box', 'val Objectness', 'val Classification', 'mAP@0.5', 'mAP@0.5:0.95'] + s = [ + 'Box', 'Objectness', 'Classification', 'Precision', 'Recall', + 'val Box', 'val Objectness', 'val Classification', 'mAP@0.5', + 'mAP@0.5:0.95' + ] if bucket: # files = ['https://storage.googleapis.com/%s/results%g.txt' % (bucket, x) for x in id] files = ['results%g.txt' % x for x in id] - c = ('gsutil cp ' + '%s ' * len(files) + '.') % tuple('gs://%s/results%g.txt' % (bucket, x) for x in id) + c = ('gsutil cp ' + '%s ' * len(files) + '.') % tuple( + f'gs://{bucket}/results{x:g}.txt' for x in id) os.system(c) else: files = list(Path(save_dir).glob('results*.txt')) - assert len(files), 'No results.txt files found in %s, nothing to plot.' % os.path.abspath(save_dir) + assert len( + files + ), 'No results.txt files found in %s, nothing to plot.' % os.path.abspath( + save_dir) for fi, f in enumerate(files): try: - results = np.loadtxt(f, usecols=[2, 3, 4, 8, 9, 12, 13, 14, 10, 11], ndmin=2).T + results = np.loadtxt(f, + usecols=[2, 3, 4, 8, 9, 12, 13, 14, 10, 11], + ndmin=2).T n = results.shape[1] # number of rows x = range(start, min(stop, n) if stop else n) for i in range(10): @@ -178,12 +211,17 @@ def plot_results(start=0, stop=0, bucket='', id=(), labels=(), save_dir=''): y[y == 0] = np.nan # don't show zero loss values # y /= y[0] # normalize label = labels[fi] if len(labels) else f.stem - ax[i].plot(x, y, marker='.', label=label, linewidth=2, markersize=8) + ax[i].plot(x, + y, + marker='.', + label=label, + linewidth=2, + markersize=8) ax[i].set_title(s[i]) # if i in [5, 6, 7]: # share train and val loss y axes # ax[i].get_shared_y_axes().join(ax[i], ax[i - 5]) except Exception as e: - print('Warning: Plotting error for %s; %s' % (f, e)) + print(f'Warning: Plotting error for {f}; {e}') ax[1].legend() fig.savefig(Path(save_dir) / 'results.png', dpi=200) diff --git a/utils/torch_utils.py b/utils/torch_utils.py index 70b1bf5..8b207a7 100644 --- a/utils/torch_utils.py +++ b/utils/torch_utils.py @@ -13,7 +13,6 @@ import torch.backends.cudnn as cudnn import torch.nn as nn import torch.nn.functional as F -import torchvision try: import thop # for FLOPS computation @@ -24,9 +23,8 @@ @contextmanager def torch_distributed_zero_first(local_rank: int): - """ - Decorator to make all processes in distributed training wait for each local_master to do something. - """ + """Decorator to make all processes in distributed training wait for each + local_master to do something.""" if local_rank not in [-1, 0]: torch.distributed.barrier() yield @@ -53,8 +51,9 @@ def git_describe(path=Path(__file__).parent): # path must be a directory # return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe s = f'git -C {path} describe --tags --long --always' try: - return subprocess.check_output(s, shell=True, stderr=subprocess.STDOUT).decode()[:-1] - except subprocess.CalledProcessError as e: + return subprocess.check_output(s, shell=True, + stderr=subprocess.STDOUT).decode()[:-1] + except subprocess.CalledProcessError: return '' # not a git repository @@ -63,10 +62,12 @@ def select_device(device='', batch_size=None): s = f'DynamicDet 🚀 {git_describe() or date_modified()} torch {torch.__version__} ' # string cpu = device.lower() == 'cpu' if cpu: - os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False + os.environ[ + 'CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False elif device: # non-cpu device requested os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable - assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested' # check availability + assert torch.cuda.is_available( + ), f'CUDA unavailable, invalid device {device} requested' # check availability cuda = not cpu and torch.cuda.is_available() if cuda: @@ -80,7 +81,8 @@ def select_device(device='', batch_size=None): else: s += 'CPU\n' - logger.info(s.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else s) # emoji-safe + logger.info(s.encode().decode('ascii', 'ignore') + if platform.system() == 'Windows' else s) # emoji-safe return torch.device('cuda:0' if cuda else 'cpu') @@ -98,18 +100,24 @@ def profile(x, ops, n=100, device=None): # m2 = nn.SiLU() # profile(x, [m1, m2], n=100) # profile speed over 100 iterations - device = device or torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') + device = device or torch.device( + 'cuda:0' if torch.cuda.is_available() else 'cpu') x = x.to(device) x.requires_grad = True - print(torch.__version__, device.type, torch.cuda.get_device_properties(0) if device.type == 'cuda' else '') - print(f"\n{'Params':>12s}{'GFLOPS':>12s}{'forward (ms)':>16s}{'backward (ms)':>16s}{'input':>24s}{'output':>24s}") + print(torch.__version__, device.type, + torch.cuda.get_device_properties(0) if device.type == 'cuda' else '') + print( + f"\n{'Params':>12s}{'GFLOPS':>12s}{'forward (ms)':>16s}{'backward (ms)':>16s}{'input':>24s}{'output':>24s}" + ) for m in ops if isinstance(ops, list) else [ops]: m = m.to(device) if hasattr(m, 'to') else m # device - m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m # type + m = m.half() if hasattr(m, 'half') and isinstance( + x, torch.Tensor) and x.dtype is torch.float16 else m # type dtf, dtb, t = 0., 0., [0., 0., 0.] # dt forward, backward try: - flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # GFLOPS - except: + flops = thop.profile(m, inputs=(x, ), + verbose=False)[0] / 1E9 * 2 # GFLOPS + except Exception: flops = 0 for _ in range(n): @@ -119,24 +127,33 @@ def profile(x, ops, n=100, device=None): try: _ = y.sum().backward() t[2] = time_synchronized() - except: # no backward method + except Exception: # no backward method t[2] = float('nan') dtf += (t[1] - t[0]) * 1000 / n # ms per op forward dtb += (t[2] - t[1]) * 1000 / n # ms per op backward s_in = tuple(x.shape) if isinstance(x, torch.Tensor) else 'list' s_out = tuple(y.shape) if isinstance(y, torch.Tensor) else 'list' - p = sum(list(x.numel() for x in m.parameters())) if isinstance(m, nn.Module) else 0 # parameters - print(f'{p:12}{flops:12.4g}{dtf:16.4g}{dtb:16.4g}{str(s_in):>24s}{str(s_out):>24s}') + p = sum(list(x.numel() for x in m.parameters())) if isinstance( + m, nn.Module) else 0 # parameters + print( + f'{p:12}{flops:12.4g}{dtf:16.4g}{dtb:16.4g}{str(s_in):>24s}{str(s_out):>24s}' + ) def is_parallel(model): - return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) + return type(model) in (nn.parallel.DataParallel, + nn.parallel.DistributedDataParallel) def intersect_dicts(da, db, exclude=()): # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values - return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape} + return { + k: v + for k, v in da.items() + if k in db and not any(x in k + for x in exclude) and v.shape == db[k].shape + } def initialize_weights(model): @@ -153,7 +170,9 @@ def initialize_weights(model): def find_modules(model, mclass=nn.Conv2d): # Finds layer indices matching module class 'mclass' - return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)] + return [ + i for i, m in enumerate(model.module_list) if isinstance(m, mclass) + ] def sparsity(model): @@ -184,7 +203,8 @@ def fuse_conv_and_bn(conv, bn): stride=conv.stride, padding=conv.padding, groups=conv.groups, - bias=True).requires_grad_(False).to(conv.weight.device) + bias=True).requires_grad_(False).to( + conv.weight.device) # prepare filters w_conv = conv.weight.clone().view(conv.out_channels, -1) @@ -192,9 +212,13 @@ def fuse_conv_and_bn(conv, bn): fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape)) # prepare spatial bias - b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias - b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) - fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) + b_conv = torch.zeros( + conv.weight.size(0), + device=conv.weight.device) if conv.bias is None else conv.bias + b_bn = bn.bias - bn.weight.mul(bn.running_mean).div( + torch.sqrt(bn.running_var + bn.eps)) + fusedconv.bias.copy_( + torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) return fusedconv @@ -202,25 +226,36 @@ def fuse_conv_and_bn(conv, bn): def model_info(model, verbose=False, img_size=640): # Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320] n_p = sum(x.numel() for x in model.parameters()) # number parameters - n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients + n_g = sum(x.numel() for x in model.parameters() + if x.requires_grad) # number gradients if verbose: - print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) + print('%5s %40s %9s %12s %20s %10s %10s' % + ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', + 'sigma')) for i, (name, p) in enumerate(model.named_parameters()): name = name.replace('module_list.', '') print('%5g %40s %9s %12g %20s %10.3g %10.3g' % - (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) + (i, name, p.requires_grad, p.numel(), list( + p.shape), p.mean(), p.std())) try: # FLOPS from thop import profile - stride = max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32 - img = torch.zeros((1, model.yaml.get('ch', 3), stride, stride), device=next(model.parameters()).device) # input - flops = profile(deepcopy(model), inputs=(img,), verbose=False)[0] / 1E9 * 2 # stride GFLOPS - img_size = img_size if isinstance(img_size, list) else [img_size, img_size] # expand if int/float - fs = ', %.1f GFLOPS' % (flops * img_size[0] / stride * img_size[1] / stride) # 640x640 GFLOPS + stride = max(int(model.stride.max()), 32) if hasattr(model, + 'stride') else 32 + img = torch.zeros((1, model.yaml.get('ch', 3), stride, stride), + device=next(model.parameters()).device) # input + flops = profile(deepcopy(model), inputs=(img, ), + verbose=False)[0] / 1E9 * 2 # stride GFLOPS + img_size = img_size if isinstance( + img_size, list) else [img_size, img_size] # expand if int/float + fs = ', %.1f GFLOPS' % (flops * img_size[0] / stride * img_size[1] / + stride) # 640x640 GFLOPS except (ImportError, Exception): fs = '' - logger.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}") + logger.info( + f'Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}' + ) def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416) @@ -230,24 +265,29 @@ def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416) else: h, w = img.shape[2:] s = (int(h * ratio), int(w * ratio)) # new size - img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize + img = F.interpolate(img, size=s, mode='bilinear', + align_corners=False) # resize if not same_shape: # pad/crop img - h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)] - return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean + h, w = (math.ceil(x * ratio / gs) * gs for x in (h, w)) + return F.pad(img, [0, w - s[1], 0, h - s[0]], + value=0.447) # value = imagenet mean def copy_attr(a, b, include=(), exclude=()): # Copy attributes from b to a, options to only include [...] and to exclude [...] for k, v in b.__dict__.items(): - if (len(include) and k not in include) or k.startswith('_') or k in exclude: + if (len(include) + and k not in include) or k.startswith('_') or k in exclude: continue else: setattr(a, k, v) class ModelEMA: - """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models - Keep a moving average of everything in the model state_dict (parameters and buffers). + """Model Exponential Moving Average from + https://github.com/rwightman/pytorch-image-models Keep a moving average of + everything in the model state_dict (parameters and buffers). + This is intended to allow functionality like https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage A smoothed version of the weights is necessary for some training schemes to perform well. @@ -257,11 +297,13 @@ class ModelEMA: def __init__(self, model, decay=0.9999, updates=0): # Create EMA - self.ema = deepcopy(model.module if is_parallel(model) else model).eval() # FP32 EMA + self.ema = deepcopy( + model.module if is_parallel(model) else model).eval() # FP32 EMA # if next(model.parameters()).device.type != 'cpu': # self.ema.half() # FP16 EMA self.updates = updates # number of EMA updates - self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs) + self.decay = lambda x: decay * (1 - math.exp( + -x / 2000)) # decay exponential ramp (to help early epochs) for p in self.ema.parameters(): p.requires_grad_(False) @@ -271,18 +313,23 @@ def update(self, model): self.updates += 1 d = self.decay(self.updates) - msd = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict + msd = model.module.state_dict() if is_parallel( + model) else model.state_dict() # model state_dict for k, v in self.ema.state_dict().items(): if v.dtype.is_floating_point: v *= d v += (1. - d) * msd[k].detach() - def update_attr(self, model, include=(), exclude=('process_group', 'reducer')): + def update_attr(self, + model, + include=(), + exclude=('process_group', 'reducer')): # Update EMA attributes copy_attr(self.ema, model, include, exclude) class BatchNormXd(torch.nn.modules.batchnorm._BatchNorm): + def _check_input_dim(self, input): # The only difference between BatchNorm1d, BatchNorm2d, BatchNorm3d, etc # is this method that is overwritten by the sub-class @@ -294,16 +341,16 @@ def _check_input_dim(self, input): # we could return the one that was originally created) return + def revert_sync_batchnorm(module): # this is very similar to the function that it is trying to revert: # https://github.com/pytorch/pytorch/blob/c8b3686a3e4ba63dc59e5dcfe5db3430df256833/torch/nn/modules/batchnorm.py#L679 module_output = module if isinstance(module, torch.nn.modules.batchnorm.SyncBatchNorm): - new_cls = BatchNormXd - module_output = BatchNormXd(module.num_features, - module.eps, module.momentum, - module.affine, - module.track_running_stats) + # new_cls = BatchNormXd + module_output = BatchNormXd(module.num_features, module.eps, + module.momentum, module.affine, + module.track_running_stats) if module.affine: with torch.no_grad(): module_output.weight = module.weight @@ -311,7 +358,7 @@ def revert_sync_batchnorm(module): module_output.running_mean = module.running_mean module_output.running_var = module.running_var module_output.num_batches_tracked = module.num_batches_tracked - if hasattr(module, "qconfig"): + if hasattr(module, 'qconfig'): module_output.qconfig = module.qconfig for name, child in module.named_children(): module_output.add_module(name, revert_sync_batchnorm(child)) diff --git a/utils/wandb_logging/__init__.py b/utils/wandb_logging/__init__.py index 84952a8..a6131c1 100644 --- a/utils/wandb_logging/__init__.py +++ b/utils/wandb_logging/__init__.py @@ -1 +1 @@ -# init \ No newline at end of file +# init diff --git a/utils/wandb_logging/log_dataset.py b/utils/wandb_logging/log_dataset.py deleted file mode 100644 index 438e64e..0000000 --- a/utils/wandb_logging/log_dataset.py +++ /dev/null @@ -1,13 +0,0 @@ -import argparse - -import yaml - -from wandb_utils import WandbLogger - -WANDB_ARTIFACT_PREFIX = 'wandb-artifact://' - - -def create_dataset_artifact(opt): - with open(opt.data) as f: - data = yaml.load(f, Loader=yaml.SafeLoader) # data dict - logger = WandbLogger(opt, '', None, data, job_type='Dataset Creation') diff --git a/utils/wandb_logging/wandb_utils.py b/utils/wandb_logging/wandb_utils.py index 869b514..e67014c 100644 --- a/utils/wandb_logging/wandb_utils.py +++ b/utils/wandb_logging/wandb_utils.py @@ -6,17 +6,16 @@ import yaml from tqdm import tqdm -sys.path.append(str(Path(__file__).parent.parent.parent)) # add utils/ to path -from utils.datasets import LoadImagesAndLabels -from utils.datasets import img2label_paths -from utils.general import colorstr, xywh2xyxy, check_dataset +from utils.datasets import LoadImagesAndLabels, img2label_paths +from utils.general import check_dataset, colorstr, xywh2xyxy try: import wandb - from wandb import init, finish except ImportError: wandb = None +sys.path.append(str(Path(__file__).parent.parent.parent)) # add utils/ to path + WANDB_ARTIFACT_PREFIX = 'wandb-artifact://' @@ -25,7 +24,8 @@ def remove_prefix(from_string, prefix=WANDB_ARTIFACT_PREFIX): def check_wandb_config_file(data_config_file): - wandb_config = '_wandb.'.join(data_config_file.rsplit('.', 1)) # updated data.yaml path + wandb_config = '_wandb.'.join(data_config_file.rsplit( + '.', 1)) # updated data.yaml path if Path(wandb_config).is_file(): return wandb_config return data_config_file @@ -40,15 +40,17 @@ def get_run_info(run_path): def check_wandb_resume(opt): - process_wandb_config_ddp_mode(opt) if opt.global_rank not in [-1, 0] else None + process_wandb_config_ddp_mode(opt) if opt.global_rank not in [-1, 0 + ] else None if isinstance(opt.resume, str): if opt.resume.startswith(WANDB_ARTIFACT_PREFIX): if opt.global_rank not in [-1, 0]: # For resuming DDP runs run_id, project, model_artifact_name = get_run_info(opt.resume) api = wandb.Api() - artifact = api.artifact(project + '/' + model_artifact_name + ':latest') + artifact = api.artifact(project + '/' + model_artifact_name + + ':latest') modeldir = artifact.download() - opt.weights = str(Path(modeldir) / "last.pt") + opt.weights = str(Path(modeldir) / 'last.pt') return True return None @@ -57,16 +59,21 @@ def process_wandb_config_ddp_mode(opt): with open(opt.data) as f: data_dict = yaml.load(f, Loader=yaml.SafeLoader) # data dict train_dir, val_dir = None, None - if isinstance(data_dict['train'], str) and data_dict['train'].startswith(WANDB_ARTIFACT_PREFIX): + if isinstance( + data_dict['train'], + str) and data_dict['train'].startswith(WANDB_ARTIFACT_PREFIX): api = wandb.Api() - train_artifact = api.artifact(remove_prefix(data_dict['train']) + ':' + opt.artifact_alias) + train_artifact = api.artifact( + remove_prefix(data_dict['train']) + ':' + opt.artifact_alias) train_dir = train_artifact.download() train_path = Path(train_dir) / 'data/images/' data_dict['train'] = str(train_path) - if isinstance(data_dict['val'], str) and data_dict['val'].startswith(WANDB_ARTIFACT_PREFIX): + if isinstance(data_dict['val'], + str) and data_dict['val'].startswith(WANDB_ARTIFACT_PREFIX): api = wandb.Api() - val_artifact = api.artifact(remove_prefix(data_dict['val']) + ':' + opt.artifact_alias) + val_artifact = api.artifact( + remove_prefix(data_dict['val']) + ':' + opt.artifact_alias) val_dir = val_artifact.download() val_path = Path(val_dir) / 'data/images/' data_dict['val'] = str(val_path) @@ -78,30 +85,37 @@ def process_wandb_config_ddp_mode(opt): class WandbLogger(): + def __init__(self, opt, name, run_id, data_dict, job_type='Training'): # Pre-training routine -- self.job_type = job_type self.wandb, self.wandb_run, self.data_dict = wandb, None if not wandb else wandb.run, data_dict - # It's more elegant to stick to 1 wandb.init call, but useful config data is overwritten in the WandbLogger's wandb.init call + # It's more elegant to stick to 1 wandb.init call, but useful config data is overwritten in the + # WandbLogger's wandb.init call if isinstance(opt.resume, str): # checks resume from artifact if opt.resume.startswith(WANDB_ARTIFACT_PREFIX): run_id, project, model_artifact_name = get_run_info(opt.resume) model_artifact_name = WANDB_ARTIFACT_PREFIX + model_artifact_name assert wandb, 'install wandb to resume wandb runs' # Resume wandb-artifact:// runs here| workaround for not overwriting wandb.config - self.wandb_run = wandb.init(id=run_id, project=project, resume='allow') + self.wandb_run = wandb.init(id=run_id, + project=project, + resume='allow') opt.resume = model_artifact_name elif self.wandb: - self.wandb_run = wandb.init(config=opt, - resume="allow", - project='DynamicDet' if opt.project == 'runs/train' else Path(opt.project).stem, - name=name, - job_type=job_type, - id=run_id) if not wandb.run else wandb.run + self.wandb_run = wandb.init( + config=opt, + resume='allow', + project='DynamicDet' + if opt.project == 'runs/train' else Path(opt.project).stem, + name=name, + job_type=job_type, + id=run_id) if not wandb.run else wandb.run if self.wandb_run: if self.job_type == 'Training': if not opt.resume: - wandb_data_dict = self.check_and_upload_dataset(opt) if opt.upload_dataset else data_dict + wandb_data_dict = self.check_and_upload_dataset( + opt) if opt.upload_dataset else data_dict # Info useful for resuming from artifacts self.wandb_run.config.opt = vars(opt) self.wandb_run.config.data_dict = wandb_data_dict @@ -110,15 +124,17 @@ def __init__(self, opt, name, run_id, data_dict, job_type='Training'): self.data_dict = self.check_and_upload_dataset(opt) else: prefix = colorstr('wandb: ') - print(f"{prefix}Install Weights & Biases for DynamicDet logging with 'pip install wandb' (recommended)") + print( + f"{prefix}Install Weights & Biases for DynamicDet logging with 'pip install wandb' (recommended)" + ) def check_and_upload_dataset(self, opt): assert wandb, 'Install wandb to upload dataset' check_dataset(self.data_dict) - config_path = self.log_dataset_artifact(opt.data, - opt.single_cls, - 'DynamicDet' if opt.project == 'runs/train' else Path(opt.project).stem) - print("Created dataset config file ", config_path) + config_path = self.log_dataset_artifact( + opt.data, opt.single_cls, 'DynamicDet' + if opt.project == 'runs/train' else Path(opt.project).stem) + print('Created dataset config file ', config_path) with open(config_path) as f: wandb_data_dict = yaml.load(f, Loader=yaml.SafeLoader) return wandb_data_dict @@ -129,17 +145,21 @@ def setup_training(self, opt, data_dict): if isinstance(opt.resume, str): modeldir, _ = self.download_model_artifact(opt) if modeldir: - self.weights = Path(modeldir) / "last.pt" + self.weights = Path(modeldir) / 'last.pt' config = self.wandb_run.config - opt.weights, opt.save_period, opt.batch_size, opt.bbox_interval, opt.epochs, opt.hyp = str( - self.weights), config.save_period, config.total_batch_size, config.bbox_interval, config.epochs, \ - config.opt['hyp'] - data_dict = dict(self.wandb_run.config.data_dict) # eliminates the need for config file to resume - if 'val_artifact' not in self.__dict__: # If --upload_dataset is set, use the existing artifact, don't download - self.train_artifact_path, self.train_artifact = self.download_dataset_artifact(data_dict.get('train'), - opt.artifact_alias) - self.val_artifact_path, self.val_artifact = self.download_dataset_artifact(data_dict.get('val'), - opt.artifact_alias) + (opt.weights, opt.save_period, opt.batch_size, + opt.bbox_interval, opt.epochs, + opt.hyp) = (str(self.weights), config.save_period, + config.total_batch_size, config.bbox_interval, + config.epochs, config.opt['hyp']) + data_dict = dict(self.wandb_run.config.data_dict + ) # eliminates the need for config file to resume + # If --upload_dataset is set, use the existing artifact, don't download + if 'val_artifact' not in self.__dict__: + self.train_artifact_path, self.train_artifact = self.download_dataset_artifact( + data_dict.get('train'), opt.artifact_alias) + self.val_artifact_path, self.val_artifact = self.download_dataset_artifact( + data_dict.get('val'), opt.artifact_alias) self.result_artifact, self.result_table, self.val_table, self.weights = None, None, None, None if self.train_artifact_path is not None: train_path = Path(self.train_artifact_path) / 'data/images/' @@ -147,18 +167,22 @@ def setup_training(self, opt, data_dict): if self.val_artifact_path is not None: val_path = Path(self.val_artifact_path) / 'data/images/' data_dict['val'] = str(val_path) - self.val_table = self.val_artifact.get("val") + self.val_table = self.val_artifact.get('val') self.map_val_table_path() if self.val_artifact is not None: - self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation") - self.result_table = wandb.Table(["epoch", "id", "prediction", "avg_confidence"]) + self.result_artifact = wandb.Artifact( + 'run_' + wandb.run.id + '_progress', 'evaluation') + self.result_table = wandb.Table( + ['epoch', 'id', 'prediction', 'avg_confidence']) if opt.bbox_interval == -1: - self.bbox_interval = opt.bbox_interval = (opt.epochs // 10) if opt.epochs > 10 else 1 + self.bbox_interval = opt.bbox_interval = ( + opt.epochs // 10) if opt.epochs > 10 else 1 return data_dict def download_dataset_artifact(self, path, alias): if isinstance(path, str) and path.startswith(WANDB_ARTIFACT_PREFIX): - dataset_artifact = wandb.use_artifact(remove_prefix(path, WANDB_ARTIFACT_PREFIX) + ":" + alias) + dataset_artifact = wandb.use_artifact( + remove_prefix(path, WANDB_ARTIFACT_PREFIX) + ':' + alias) assert dataset_artifact is not None, "'Error: W&B dataset artifact doesn\'t exist'" datadir = dataset_artifact.download() return datadir, dataset_artifact @@ -166,7 +190,8 @@ def download_dataset_artifact(self, path, alias): def download_model_artifact(self, opt): if opt.resume.startswith(WANDB_ARTIFACT_PREFIX): - model_artifact = wandb.use_artifact(remove_prefix(opt.resume, WANDB_ARTIFACT_PREFIX) + ":latest") + model_artifact = wandb.use_artifact( + remove_prefix(opt.resume, WANDB_ARTIFACT_PREFIX) + ':latest') assert model_artifact is not None, 'Error: W&B model artifact doesn\'t exist' modeldir = model_artifact.download() epochs_trained = model_artifact.metadata.get('epochs_trained') @@ -177,33 +202,47 @@ def download_model_artifact(self, opt): return None, None def log_model(self, path, opt, epoch, fitness_score, best_model=False): - model_artifact = wandb.Artifact('run_' + wandb.run.id + '_model', type='model', metadata={ - 'original_url': str(path), - 'epochs_trained': epoch + 1, - 'save period': opt.save_period, - 'project': opt.project, - 'total_epochs': opt.epochs, - 'fitness_score': fitness_score - }) + model_artifact = wandb.Artifact('run_' + wandb.run.id + '_model', + type='model', + metadata={ + 'original_url': str(path), + 'epochs_trained': epoch + 1, + 'save period': opt.save_period, + 'project': opt.project, + 'total_epochs': opt.epochs, + 'fitness_score': fitness_score + }) model_artifact.add_file(str(path / 'last.pt'), name='last.pt') wandb.log_artifact(model_artifact, - aliases=['latest', 'epoch ' + str(self.current_epoch), 'best' if best_model else '']) - print("Saving model artifact on epoch ", epoch + 1) + aliases=[ + 'latest', 'epoch ' + str(self.current_epoch), + 'best' if best_model else '' + ]) + print('Saving model artifact on epoch ', epoch + 1) - def log_dataset_artifact(self, data_file, single_cls, project, overwrite_config=False): + def log_dataset_artifact(self, + data_file, + single_cls, + project, + overwrite_config=False): with open(data_file) as f: data = yaml.load(f, Loader=yaml.SafeLoader) # data dict - nc, names = (1, ['item']) if single_cls else (int(data['nc']), data['names']) + nc, names = (1, ['item']) if single_cls else (int(data['nc']), + data['names']) names = {k: v for k, v in enumerate(names)} # to index dictionary - self.train_artifact = self.create_dataset_table(LoadImagesAndLabels( - data['train']), names, name='train') if data.get('train') else None - self.val_artifact = self.create_dataset_table(LoadImagesAndLabels( - data['val']), names, name='val') if data.get('val') else None + self.train_artifact = self.create_dataset_table( + LoadImagesAndLabels(data['train']), names, + name='train') if data.get('train') else None + self.val_artifact = self.create_dataset_table( + LoadImagesAndLabels( + data['val']), names, name='val') if data.get('val') else None if data.get('train'): - data['train'] = WANDB_ARTIFACT_PREFIX + str(Path(project) / 'train') + data['train'] = WANDB_ARTIFACT_PREFIX + str( + Path(project) / 'train') if data.get('val'): data['val'] = WANDB_ARTIFACT_PREFIX + str(Path(project) / 'val') - path = data_file if overwrite_config else '_wandb.'.join(data_file.rsplit('.', 1)) # updated data.yaml path + path = data_file if overwrite_config else '_wandb.'.join( + data_file.rsplit('.', 1)) # updated data.yaml path data.pop('download', None) with open(path, 'w') as f: yaml.dump(data, f) @@ -221,14 +260,16 @@ def log_dataset_artifact(self, data_file, single_cls, project, overwrite_config= def map_val_table_path(self): self.val_table_map = {} - print("Mapping dataset") + print('Mapping dataset') for i, data in enumerate(tqdm(self.val_table.data)): self.val_table_map[data[3]] = data[0] def create_dataset_table(self, dataset, class_to_id, name='dataset'): - # TODO: Explore multiprocessing to slpit this loop parallely| This is essential for speeding up the the logging - artifact = wandb.Artifact(name=name, type="dataset") - img_files = tqdm([dataset.path]) if isinstance(dataset.path, str) and Path(dataset.path).is_dir() else None + # TODO: Explore multiprocessing to slpit this loop parallelly| + # This is essential for speeding up the the logging + artifact = wandb.Artifact(name=name, type='dataset') + img_files = tqdm([dataset.path]) if isinstance( + dataset.path, str) and Path(dataset.path).is_dir() else None img_files = tqdm(dataset.img_files) if not img_files else img_files for img_file in img_files: if Path(img_file).is_dir(): @@ -236,51 +277,90 @@ def create_dataset_table(self, dataset, class_to_id, name='dataset'): labels_path = 'labels'.join(dataset.path.rsplit('images', 1)) artifact.add_dir(labels_path, name='data/labels') else: - artifact.add_file(img_file, name='data/images/' + Path(img_file).name) + artifact.add_file(img_file, + name='data/images/' + Path(img_file).name) label_file = Path(img2label_paths([img_file])[0]) - artifact.add_file(str(label_file), - name='data/labels/' + label_file.name) if label_file.exists() else None - table = wandb.Table(columns=["id", "train_image", "Classes", "name"]) - class_set = wandb.Classes([{'id': id, 'name': name} for id, name in class_to_id.items()]) + artifact.add_file( + str(label_file), name='data/labels/' + + label_file.name) if label_file.exists() else None + table = wandb.Table(columns=['id', 'train_image', 'Classes', 'name']) + class_set = wandb.Classes([{ + 'id': id, + 'name': name + } for id, name in class_to_id.items()]) for si, (img, labels, paths, shapes) in enumerate(tqdm(dataset)): height, width = shapes[0] - labels[:, 2:] = (xywh2xyxy(labels[:, 2:].view(-1, 4))) * torch.Tensor([width, height, width, height]) + labels[:, 2:] = (xywh2xyxy(labels[:, 2:].view( + -1, 4))) * torch.Tensor([width, height, width, height]) box_data, img_classes = [], {} for cls, *xyxy in labels[:, 1:].tolist(): cls = int(cls) - box_data.append({"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]}, - "class_id": cls, - "box_caption": "%s" % (class_to_id[cls]), - "scores": {"acc": 1}, - "domain": "pixel"}) + box_data.append({ + 'position': { + 'minX': xyxy[0], + 'minY': xyxy[1], + 'maxX': xyxy[2], + 'maxY': xyxy[3] + }, + 'class_id': cls, + 'box_caption': '%s' % (class_to_id[cls]), + 'scores': { + 'acc': 1 + }, + 'domain': 'pixel' + }) img_classes[cls] = class_to_id[cls] - boxes = {"ground_truth": {"box_data": box_data, "class_labels": class_to_id}} # inference-space - table.add_data(si, wandb.Image(paths, classes=class_set, boxes=boxes), json.dumps(img_classes), + boxes = { + 'ground_truth': { + 'box_data': box_data, + 'class_labels': class_to_id + } + } # inference-space + table.add_data(si, + wandb.Image(paths, classes=class_set, boxes=boxes), + json.dumps(img_classes), Path(paths).name) artifact.add(table, name) return artifact def log_training_progress(self, predn, path, names): if self.val_table and self.result_table: - class_set = wandb.Classes([{'id': id, 'name': name} for id, name in names.items()]) + class_set = wandb.Classes([{ + 'id': id, + 'name': name + } for id, name in names.items()]) box_data = [] total_conf = 0 for *xyxy, conf, cls in predn.tolist(): if conf >= 0.25: - box_data.append( - {"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]}, - "class_id": int(cls), - "box_caption": "%s %.3f" % (names[cls], conf), - "scores": {"class_score": conf}, - "domain": "pixel"}) + box_data.append({ + 'position': { + 'minX': xyxy[0], + 'minY': xyxy[1], + 'maxX': xyxy[2], + 'maxY': xyxy[3] + }, + 'class_id': int(cls), + 'box_caption': f'{names[cls]} {conf:.3f}', + 'scores': { + 'class_score': conf + }, + 'domain': 'pixel' + }) total_conf = total_conf + conf - boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space + boxes = { + 'predictions': { + 'box_data': box_data, + 'class_labels': names + } + } # inference-space id = self.val_table_map[Path(path).name] - self.result_table.add_data(self.current_epoch, - id, - wandb.Image(self.val_table.data[id][1], boxes=boxes, classes=class_set), - total_conf / max(1, len(box_data)) - ) + self.result_table.add_data( + self.current_epoch, id, + wandb.Image(self.val_table.data[id][1], + boxes=boxes, + classes=class_set), + total_conf / max(1, len(box_data))) def log(self, log_dict): if self.wandb_run: @@ -292,12 +372,19 @@ def end_epoch(self, best_result=False): wandb.log(self.log_dict) self.log_dict = {} if self.result_artifact: - train_results = wandb.JoinedTable(self.val_table, self.result_table, "id") + train_results = wandb.JoinedTable(self.val_table, + self.result_table, 'id') self.result_artifact.add(train_results, 'result') - wandb.log_artifact(self.result_artifact, aliases=['latest', 'epoch ' + str(self.current_epoch), - ('best' if best_result else '')]) - self.result_table = wandb.Table(["epoch", "id", "prediction", "avg_confidence"]) - self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation") + wandb.log_artifact(self.result_artifact, + aliases=[ + 'latest', + 'epoch ' + str(self.current_epoch), + ('best' if best_result else '') + ]) + self.result_table = wandb.Table( + ['epoch', 'id', 'prediction', 'avg_confidence']) + self.result_artifact = wandb.Artifact( + 'run_' + wandb.run.id + '_progress', 'evaluation') def finish_run(self): if self.wandb_run: