diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
new file mode 100644
index 0000000..74a733e
--- /dev/null
+++ b/.github/workflows/lint.yml
@@ -0,0 +1,23 @@
+name: lint
+
+on: [push, pull_request]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python 3.8
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.8
+      - name: Install pre-commit hook
+        run: |
+          pip install pre-commit
+          pre-commit install
+      - name: Linting
+        run: pre-commit run --all-files
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..d9a0479
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,51 @@
+exclude: ^(tests/data/|requirements.txt)
+repos:
+  - repo: https://gitee.com/openmmlab/mirrors-flake8
+    rev: 5.0.4
+    hooks:
+      - id: flake8
+        args: ["--max-line-length=119"]
+  - repo: https://gitee.com/openmmlab/mirrors-isort
+    rev: 5.11.5
+    hooks:
+      - id: isort
+  - repo: https://gitee.com/openmmlab/mirrors-yapf
+    rev: v0.32.0
+    hooks:
+      - id: yapf
+  - repo: https://gitee.com/openmmlab/mirrors-pre-commit-hooks
+    rev: v4.3.0
+    hooks:
+      - id: trailing-whitespace
+      - id: check-yaml
+      - id: end-of-file-fixer
+      - id: requirements-txt-fixer
+      - id: double-quote-string-fixer
+      - id: check-merge-conflict
+      - id: fix-encoding-pragma
+        args: ["--remove"]
+      - id: mixed-line-ending
+        args: ["--fix=lf"]
+  - repo: https://gitee.com/openmmlab/mirrors-codespell
+    rev: v2.2.1
+    hooks:
+      - id: codespell
+  - repo: https://gitee.com/openmmlab/mirrors-mdformat
+    rev: 0.7.9
+    hooks:
+      - id: mdformat
+        args: ["--number"]
+        additional_dependencies:
+          - mdformat-openmmlab
+          - mdformat_frontmatter
+          - linkify-it-py
+  - repo: https://gitee.com/openmmlab/mirrors-docformatter
+    rev: v1.3.1
+    hooks:
+      - id: docformatter
+        args: ["--in-place", "--wrap-descriptions", "119"]
+  - repo: https://gitee.com/openmmlab/mirrors-pyupgrade
+    rev: v3.0.0
+    hooks:
+      - id: pyupgrade
+        args: ["--py36-plus"]
diff --git a/README.md b/README.md
index dc6af16..c2fb536 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# DynamicDet [[arXiv]](https://arxiv.org/abs/2304.05552)
+# DynamicDet [\[arXiv\]](https://arxiv.org/abs/2304.05552)
 
 This repo contains the official implementation of [**"DynamicDet: A Unified Dynamic Architecture for Object Detection"**](https://arxiv.org/abs/2304.05552).
 
@@ -12,23 +12,22 @@ This repo contains the official implementation of [**"DynamicDet: A Unified Dyna
 
 MS COCO
 
-| Model                                                | Easy / Hard | Size | FLOPs  | FPS  | AP<sup>val</sup> | AP<sup>test</sup> |
-| :----------------------------------------------------------- | :---------: | :--: | :----: | :--: | :--------------: | :---------------: |
-| [**Dy-YOLOv7**](https://github.com/VDIGPKU/DynamicDet/releases/download/v0.1/dy-yolov7.pt) |  90% / 10%  | 640  | 112.4G | 110  |      51.4%       |       52.1%       |
-|                                                              |  50% / 50%  | 640  | 143.2G |  96  |      52.7%       |       53.3%       |
-|                                                              |  10% / 90%  | 640  | 174.0G |  85  |      53.3%       |       53.8%       |
-|                                                              |  0% / 100%  | 640  | 181.7G |  83  |      53.5%       |       53.9%       |
-|                                                              |             |      |        |      |                  |                   |
-| [**Dy-YOLOv7-X**](https://github.com/VDIGPKU/DynamicDet/releases/download/v0.1/dy-yolov7x.pt) |  90% / 10%  | 640  | 201.7G |  98  |      53.0%       |       53.3%       |
-|                                                              |  50% / 50%  | 640  | 248.9G |  78  |      54.2%       |       54.4%       |
-|                                                              |  10% / 90%  | 640  | 296.1G |  65  |      54.7%       |       55.0%       |
-|                                                              |  0% / 100%  | 640  | 307.9G |  64  |      54.8%       |       55.0%       |
-|                                                              |             |      |        |      |                  |                   |
-| [**Dy-YOLOv7-W6**](https://github.com/VDIGPKU/DynamicDet/releases/download/v0.1/dy-yolov7-w6.pt) |  90% / 10%  | 1280 | 384.2G |  74  |      54.9%       |       55.2%       |
-|                                                              |  50% / 50%  | 1280 | 480.8G |  58  |      55.9%       |       56.1%       |
-|                                                              |  10% / 90%  | 1280 | 577.4G |  48  |      56.4%       |       56.7%       |
-|                                                              |  0% / 100%  | 1280 | 601.6G |  46  |      56.5%       |       56.8%       |
-
+| Model                                                                                            | Easy / Hard | Size | FLOPs  | FPS | AP<sup>val</sup> | AP<sup>test</sup> |
+| :----------------------------------------------------------------------------------------------- | :---------: | :--: | :----: | :-: | :--------------: | :---------------: |
+| [**Dy-YOLOv7**](https://github.com/VDIGPKU/DynamicDet/releases/download/v0.1/dy-yolov7.pt)       |  90% / 10%  | 640  | 112.4G | 110 |      51.4%       |       52.1%       |
+|                                                                                                  |  50% / 50%  | 640  | 143.2G | 96  |      52.7%       |       53.3%       |
+|                                                                                                  |  10% / 90%  | 640  | 174.0G | 85  |      53.3%       |       53.8%       |
+|                                                                                                  |  0% / 100%  | 640  | 181.7G | 83  |      53.5%       |       53.9%       |
+|                                                                                                  |             |      |        |     |                  |                   |
+| [**Dy-YOLOv7-X**](https://github.com/VDIGPKU/DynamicDet/releases/download/v0.1/dy-yolov7x.pt)    |  90% / 10%  | 640  | 201.7G | 98  |      53.0%       |       53.3%       |
+|                                                                                                  |  50% / 50%  | 640  | 248.9G | 78  |      54.2%       |       54.4%       |
+|                                                                                                  |  10% / 90%  | 640  | 296.1G | 65  |      54.7%       |       55.0%       |
+|                                                                                                  |  0% / 100%  | 640  | 307.9G | 64  |      54.8%       |       55.0%       |
+|                                                                                                  |             |      |        |     |                  |                   |
+| [**Dy-YOLOv7-W6**](https://github.com/VDIGPKU/DynamicDet/releases/download/v0.1/dy-yolov7-w6.pt) |  90% / 10%  | 1280 | 384.2G | 74  |      54.9%       |       55.2%       |
+|                                                                                                  |  50% / 50%  | 1280 | 480.8G | 58  |      55.9%       |       56.1%       |
+|                                                                                                  |  10% / 90%  | 1280 | 577.4G | 48  |      56.4%       |       56.7%       |
+|                                                                                                  |  0% / 100%  | 1280 | 601.6G | 46  |      56.5%       |       56.8%       |
 
 <details>
 <summary> Table Notes </summary>
@@ -39,7 +38,6 @@ MS COCO
 
 </details>
 
-
 ## Quick Start
 
 ### Installation
@@ -50,7 +48,6 @@ conda install pytorch=1.11 cudatoolkit=11.3 torchvision -c pytorch
 pip install -r requirements.txt
 ```
 
-
 ### Data preparation
 
 Download MS COCO dataset images ([train](http://images.cocodataset.org/zips/train2017.zip), [val](http://images.cocodataset.org/zips/val2017.zip), [test](http://images.cocodataset.org/zips/test2017.zip)) and [labels](https://github.com/VDIGPKU/DynamicDet/releases/download/v0.1/coco2017labels-segments.zip).
@@ -95,21 +92,21 @@ python train_step2.py --workers 4 --device 0 --batch-size 1 --epochs 2 --img 640
 
 ### Getting the dynamic thresholds for variable-speed inference
 
-  ```bash
+```bash
 python get_dynamic_thres.py --device 0 --batch-size 1 --img-size 640 --cfg cfg/dy-yolov7-step2.yaml --weight weights/dy-yolov7.pt --data data/coco.yaml --task val
-  ```
+```
 
 ### Testing
 
-  ```bash
+```bash
 python test.py --img-size 640 --batch-size 1 --conf 0.001 --iou 0.65 --device 0 --cfg cfg/dy-yolov7-step2.yaml --weight weights/dy-yolov7.pt --data data/coco.yaml --dy-thres <DY_THRESHOLD>
-  ```
+```
 
 ### Inference
 
-  ```bash
+```bash
 python detect.py --cfg cfg/dy-yolov7-step2.yaml --weight weights/dy-yolov7.pt --num-classes 80 --source <IMAGE/VIDEO> --device 0 --dy-thres <DY_THRESHOLD>
-  ```
+```
 
 ## Citation
 
diff --git a/detect.py b/detect.py
index 49eba4b..49ec7e1 100644
--- a/detect.py
+++ b/detect.py
@@ -1,34 +1,40 @@
 import argparse
-import time
 import logging
+import time
 from pathlib import Path
 
 import cv2
 import torch
-import torch.nn as nn
 import torch.backends.cudnn as cudnn
+import torch.nn as nn
 from numpy import random
 
 from models.yolo import Model
-from utils.datasets import LoadStreams, LoadImages
-from utils.general import check_img_size, check_imshow, non_max_suppression, \
-    scale_coords, xyxy2xywh, strip_optimizer, set_logging, increment_path
+from utils.datasets import LoadImages, LoadStreams
+from utils.general import (check_img_size, check_imshow, increment_path,
+                           non_max_suppression, scale_coords, set_logging,
+                           xyxy2xywh)
 from utils.plots import plot_one_box
-from utils.torch_utils import select_device, time_synchronized, intersect_dicts
-
+from utils.torch_utils import intersect_dicts, select_device, time_synchronized
 
 logger = logging.getLogger(__name__)
 
+
 def detect(save_img=False):
     source, cfg, weight, view_img, save_txt, nc, imgsz = opt.source, opt.cfg, opt.weight, \
         opt.view_img, opt.save_txt, opt.num_classes, opt.img_size
-    save_img = not opt.nosave and not source.endswith('.txt')  # save inference images
-    webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
-        ('rtsp://', 'rtmp://', 'http://', 'https://'))
+    save_img = not opt.nosave and not source.endswith(
+        '.txt')  # save inference images
+    webcam = source.isnumeric() or source.endswith(
+        '.txt') or source.lower().startswith(
+            ('rtsp://', 'rtmp://', 'http://', 'https://'))
 
     # Directories
-    save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))  # increment run
-    (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
+    save_dir = Path(
+        increment_path(Path(opt.project) / opt.name,
+                       exist_ok=opt.exist_ok))  # increment run
+    (save_dir / 'labels' if save_txt else save_dir).mkdir(
+        parents=True, exist_ok=True)  # make dir
 
     # Initialize
     set_logging()
@@ -41,7 +47,8 @@ def detect(save_img=False):
     state_dict = intersect_dicts(state_dict, model.state_dict())  # intersect
     model.load_state_dict(state_dict, strict=False)  # load
     model.to(device)
-    logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weight))  # report
+    logger.info('Transferred %g/%g items from %s' %
+                (len(state_dict), len(model.state_dict()), weight))  # report
     for p in model.parameters():
         p.requires_grad = False
     model.float().fuse().eval()
@@ -77,7 +84,9 @@ def detect(save_img=False):
 
     # Run inference
     if device.type != 'cpu':
-        model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
+        model(
+            torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
+                next(model.parameters())))  # run once
     old_img_w = old_img_h = imgsz
     old_img_b = 1
 
@@ -89,8 +98,10 @@ def detect(save_img=False):
         if img.ndimension() == 3:
             img = img.unsqueeze(0)
 
-        # Warmup
-        if device.type != 'cpu' and (old_img_b != img.shape[0] or old_img_h != img.shape[2] or old_img_w != img.shape[3]):
+        # warm up
+        if device.type != 'cpu' and (old_img_b != img.shape[0]
+                                     or old_img_h != img.shape[2]
+                                     or old_img_w != img.shape[3]):
             old_img_b = img.shape[0]
             old_img_h = img.shape[2]
             old_img_w = img.shape[3]
@@ -99,28 +110,37 @@ def detect(save_img=False):
 
         # Inference
         t1 = time_synchronized()
-        with torch.no_grad():   # Calculating gradients would cause a GPU memory leak
+        with torch.no_grad(
+        ):  # Calculating gradients would cause a GPU memory leak
             pred = model(img, augment=opt.augment)[0]
         t2 = time_synchronized()
 
         # Apply NMS
-        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
+        pred = non_max_suppression(pred,
+                                   opt.conf_thres,
+                                   opt.iou_thres,
+                                   classes=opt.classes,
+                                   agnostic=opt.agnostic_nms)
         t3 = time_synchronized()
 
         # Process detections
         for i, det in enumerate(pred):  # detections per image
             if webcam:  # batch_size >= 1
-                p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count
+                p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(
+                ), dataset.count
             else:
                 p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)
 
             p = Path(p)  # to Path
             save_path = str(save_dir / p.name)  # img.jpg
-            txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # img.txt
-            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
+            txt_path = str(save_dir / 'labels' / p.stem) + (
+                '' if dataset.mode == 'image' else f'_{frame}')  # img.txt
+            gn = torch.tensor(im0.shape)[[1, 0, 1,
+                                          0]]  # normalization gain whwh
             if len(det):
                 # Rescale boxes from img_size to im0 size
-                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
+                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
+                                          im0.shape).round()
 
                 # Print results
                 for c in det[:, -1].unique():
@@ -130,17 +150,25 @@ def detect(save_img=False):
                 # Write results
                 for *xyxy, conf, cls in reversed(det):
                     if save_txt:  # Write to file
-                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
-                        line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh)  # label format
+                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
+                                gn).view(-1).tolist()  # normalized xywh
+                        line = (cls, *xywh, conf) if opt.save_conf else (
+                            cls, *xywh)  # label format
                         with open(txt_path + '.txt', 'a') as f:
                             f.write(('%g ' * len(line)).rstrip() % line + '\n')
 
                     if save_img or view_img:  # Add bbox to image
                         label = f'{names[int(cls)]} {conf:.2f}'
-                        plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=1)
+                        plot_one_box(xyxy,
+                                     im0,
+                                     label=label,
+                                     color=colors[int(cls)],
+                                     line_thickness=1)
 
             # Print time (inference + NMS)
-            print(f'{s}Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference, ({(1E3 * (t3 - t2)):.1f}ms) NMS')
+            print(
+                f'{s}Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference, ({(1E3 * (t3 - t2)):.1f}ms) NMS'
+            )
 
             # Stream results
             if view_img:
@@ -151,12 +179,14 @@ def detect(save_img=False):
             if save_img:
                 if dataset.mode == 'image':
                     cv2.imwrite(save_path, im0)
-                    print(f" The image with the result is saved in: {save_path}")
+                    print(
+                        f' The image with the result is saved in: {save_path}')
                 else:  # 'video' or 'stream'
                     if vid_path != save_path:  # new video
                         vid_path = save_path
                         if isinstance(vid_writer, cv2.VideoWriter):
-                            vid_writer.release()  # release previous video writer
+                            vid_writer.release(
+                            )  # release previous video writer
                         if vid_cap:  # video
                             fps = vid_cap.get(cv2.CAP_PROP_FPS)
                             w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
@@ -164,12 +194,14 @@ def detect(save_img=False):
                         else:  # stream
                             fps, w, h = 30, im0.shape[1], im0.shape[0]
                             save_path += '.mp4'
-                        vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
+                        vid_writer = cv2.VideoWriter(
+                            save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps,
+                            (w, h))
                     vid_writer.write(im0)
 
     if save_txt or save_img:
         s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
-        #print(f"Results saved to {save_dir}{s}")
+        # print(f"Results saved to {save_dir}{s}")
 
     print(f'Done. ({time.time() - t0:.3f}s)')
 
@@ -177,24 +209,68 @@ def detect(save_img=False):
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
-    parser.add_argument('--weight', type=str, default='', help='model.pt path(s)')
-    parser.add_argument('--num-classes', type=int, default=80, help='number of classes')
-    parser.add_argument('--source', type=str, default='inference/images', help='source')  # file/folder, 0 for webcam
-    parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
-    parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
-    parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
-    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
-    parser.add_argument('--view-img', action='store_true', help='display results')
-    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
-    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
-    parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
-    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
-    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
-    parser.add_argument('--augment', action='store_true', help='augmented inference')
-    parser.add_argument('--project', default='runs/detect', help='save results to project/name')
-    parser.add_argument('--name', default='exp', help='save results to project/name')
-    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
-    parser.add_argument('--dy-thres', type=float, default=0.5, help='dynamic thres')
+    parser.add_argument('--weight',
+                        type=str,
+                        default='',
+                        help='model.pt path(s)')
+    parser.add_argument('--num-classes',
+                        type=int,
+                        default=80,
+                        help='number of classes')
+    parser.add_argument('--source',
+                        type=str,
+                        default='inference/images',
+                        help='source')  # file/folder, 0 for webcam
+    parser.add_argument('--img-size',
+                        type=int,
+                        default=640,
+                        help='inference size (pixels)')
+    parser.add_argument('--conf-thres',
+                        type=float,
+                        default=0.25,
+                        help='object confidence threshold')
+    parser.add_argument('--iou-thres',
+                        type=float,
+                        default=0.45,
+                        help='IOU threshold for NMS')
+    parser.add_argument('--device',
+                        default='',
+                        help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--view-img',
+                        action='store_true',
+                        help='display results')
+    parser.add_argument('--save-txt',
+                        action='store_true',
+                        help='save results to *.txt')
+    parser.add_argument('--save-conf',
+                        action='store_true',
+                        help='save confidences in --save-txt labels')
+    parser.add_argument('--nosave',
+                        action='store_true',
+                        help='do not save images/videos')
+    parser.add_argument('--classes',
+                        nargs='+',
+                        type=int,
+                        help='filter by class: --class 0, or --class 0 2 3')
+    parser.add_argument('--agnostic-nms',
+                        action='store_true',
+                        help='class-agnostic NMS')
+    parser.add_argument('--augment',
+                        action='store_true',
+                        help='augmented inference')
+    parser.add_argument('--project',
+                        default='runs/detect',
+                        help='save results to project/name')
+    parser.add_argument('--name',
+                        default='exp',
+                        help='save results to project/name')
+    parser.add_argument('--exist-ok',
+                        action='store_true',
+                        help='existing project/name ok, do not increment')
+    parser.add_argument('--dy-thres',
+                        type=float,
+                        default=0.5,
+                        help='dynamic thres')
     opt = parser.parse_args()
     print(opt)
 
diff --git a/get_dynamic_thres.py b/get_dynamic_thres.py
index b8f445b..29e4a79 100644
--- a/get_dynamic_thres.py
+++ b/get_dynamic_thres.py
@@ -1,23 +1,21 @@
 import argparse
-import os
 import logging
-from pathlib import Path
-from threading import Thread
-import yaml
-from tqdm import tqdm
 
 import numpy as np
 import torch
 import torch.nn as nn
+import yaml
+from tqdm import tqdm
 
 from models.yolo import Model
 from utils.datasets import create_dataloader
-from utils.general import check_dataset, check_file, check_img_size, set_logging, colorstr
+from utils.general import (check_dataset, check_file, check_img_size, colorstr,
+                           set_logging)
 from utils.torch_utils import select_device
 
-
 logger = logging.getLogger(__name__)
 
+
 def get_thres(data,
               cfg=None,
               weight=None,
@@ -28,20 +26,22 @@ def get_thres(data,
     set_logging()
     device = select_device(opt.device, batch_size=batch_size)
     if isinstance(data, str):
-        is_coco = data.endswith('coco.yaml')
+        # is_coco = data.endswith('coco.yaml')
         with open(data) as f:
             data = yaml.load(f, Loader=yaml.SafeLoader)
     check_dataset(data)  # check
     nc = int(data['nc'])  # number of classes
-    iouv = torch.linspace(0.5, 0.95, 10).to(device)  # iou vector for mAP@0.5:0.95
-    niou = iouv.numel()
+    # iouv = torch.linspace(0.5, 0.95,
+    #                       10).to(device)  # iou vector for mAP@0.5:0.95
+    # niou = iouv.numel()
 
     # Load model
     model = Model(cfg, ch=3, nc=nc)  # create
     state_dict = torch.load(weight, map_location='cpu')['model']
     model.load_state_dict(state_dict, strict=True)  # load
     model.to(device)
-    logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weight))  # report
+    logger.info('Transferred %g/%g items from %s' %
+                (len(state_dict), len(model.state_dict()), weight))  # report
     for p in model.parameters():
         p.requires_grad = False
     model.float().fuse().eval()
@@ -63,10 +63,19 @@ def get_thres(data,
 
     # Dataloader
     if device.type != 'cpu':
-        model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
-    task = opt.task if opt.task in ('train', 'val', 'test') else 'val'  # path to train/val/test images
-    dataloader = create_dataloader(data[task], imgsz, batch_size, gs, opt, pad=0.5, rect=True,
-                                    prefix=colorstr(f'{task}: '))[0]
+        model(
+            torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
+                next(model.parameters())))  # run once
+    task = opt.task if opt.task in (
+        'train', 'val', 'test') else 'val'  # path to train/val/test images
+    dataloader = create_dataloader(data[task],
+                                   imgsz,
+                                   batch_size,
+                                   gs,
+                                   opt,
+                                   pad=0.5,
+                                   rect=True,
+                                   prefix=colorstr(f'{task}: '))[0]
 
     score_list = []
     for batch_i, (img, _, _, _) in enumerate(tqdm(dataloader)):
@@ -75,7 +84,8 @@ def get_thres(data,
         img /= 255.0  # 0 - 255 to 0.0 - 1.0
         with torch.no_grad():
             # Run model
-            cur_score = model(img, augment=augment)  # inference and training outputs
+            cur_score = model(
+                img, augment=augment)  # inference and training outputs
             score_list.append(cur_score.item())
 
     thres = ['0']
@@ -88,28 +98,50 @@ def get_thres(data,
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(prog='test.py')
     parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
-    parser.add_argument('--weight', type=str, default='', help='model.pt path(s)')
-    parser.add_argument('--data', type=str, default='data/coco.yaml', help='*.data path')
-    parser.add_argument('--batch-size', type=int, default=1, help='size of each image batch')
-    parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
+    parser.add_argument('--weight',
+                        type=str,
+                        default='',
+                        help='model.pt path(s)')
+    parser.add_argument('--data',
+                        type=str,
+                        default='data/coco.yaml',
+                        help='*.data path')
+    parser.add_argument('--batch-size',
+                        type=int,
+                        default=1,
+                        help='size of each image batch')
+    parser.add_argument('--img-size',
+                        type=int,
+                        default=640,
+                        help='inference size (pixels)')
     parser.add_argument('--task', default='val', help='train, val, test')
-    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
-    parser.add_argument('--augment', action='store_true', help='augmented inference')
-    parser.add_argument('--project', default='runs/test', help='save to project/name')
+    parser.add_argument('--device',
+                        default='',
+                        help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--augment',
+                        action='store_true',
+                        help='augmented inference')
+    parser.add_argument('--project',
+                        default='runs/test',
+                        help='save to project/name')
     parser.add_argument('--name', default='exp', help='save to project/name')
-    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
+    parser.add_argument('--exist-ok',
+                        action='store_true',
+                        help='existing project/name ok, do not increment')
     opt = parser.parse_args()
     opt.single_cls = False
     opt.data = check_file(opt.data)  # check file
     print(opt)
 
     if opt.task in ('train', 'val', 'test'):  # run normally
-        thres = get_thres(opt.data, opt.cfg, opt.weight, opt.batch_size, opt.img_size, opt.augment)
+        thres = get_thres(opt.data, opt.cfg, opt.weight, opt.batch_size,
+                          opt.img_size, opt.augment)
         print()
         print('***************************************************')
         print(' '.join(thres))
         for idx, thr in enumerate(thres):
-            print('First: {}%\tSecond: {}%\tThreshold: {}'.format(100 - idx * 10, idx  * 10, thr))
+            print('First: {}%\tSecond: {}%\tThreshold: {}'.format(
+                100 - idx * 10, idx * 10, thr))
         print('***************************************************')
     else:
         raise NotImplementedError
diff --git a/hyp/hyp.finetune.dynamic.adam.yaml b/hyp/hyp.finetune.dynamic.adam.yaml
index 56c175a..d57c51d 100644
--- a/hyp/hyp.finetune.dynamic.adam.yaml
+++ b/hyp/hyp.finetune.dynamic.adam.yaml
@@ -2,9 +2,9 @@ lr0: 0.00001  # initial learning rate
 lrf: 1  # final OneCycleLR learning rate (lr0 * lrf)
 momentum: 0.937  # SGD momentum/Adam beta1
 weight_decay: 0.005  # optimizer weight decay 5e-4
-warmup_epochs: 0.01  # warmup epochs (fractions ok)
-warmup_momentum: 0.8  # warmup initial momentum
-warmup_bias_lr: 0.01  # warmup initial bias lr
+warmup_epochs: 0.01  # warm up epochs (fractions ok)
+warmup_momentum: 0.8  # warm up initial momentum
+warmup_bias_lr: 0.01  # warm up initial bias lr
 box: 0.05  # box loss gain
 cls: 0.3  # cls loss gain
 cls_pw: 1.0  # cls BCELoss positive_weight
diff --git a/hyp/hyp.scratch.p5.yaml b/hyp/hyp.scratch.p5.yaml
index d046ae4..de2246e 100644
--- a/hyp/hyp.scratch.p5.yaml
+++ b/hyp/hyp.scratch.p5.yaml
@@ -2,9 +2,9 @@ lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
 lrf: 0.1  # final OneCycleLR learning rate (lr0 * lrf)
 momentum: 0.937  # SGD momentum/Adam beta1
 weight_decay: 0.0005  # optimizer weight decay 5e-4
-warmup_epochs: 3.0  # warmup epochs (fractions ok)
-warmup_momentum: 0.8  # warmup initial momentum
-warmup_bias_lr: 0.1  # warmup initial bias lr
+warmup_epochs: 3.0  # warm up epochs (fractions ok)
+warmup_momentum: 0.8  # warm up initial momentum
+warmup_bias_lr: 0.1  # warm up initial bias lr
 box: 0.05  # box loss gain
 cls: 0.3  # cls loss gain
 cls_pw: 1.0  # cls BCELoss positive_weight
diff --git a/hyp/hyp.scratch.p6.yaml b/hyp/hyp.scratch.p6.yaml
index bc2c5ca..ffd067a 100644
--- a/hyp/hyp.scratch.p6.yaml
+++ b/hyp/hyp.scratch.p6.yaml
@@ -2,9 +2,9 @@ lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
 lrf: 0.2  # final OneCycleLR learning rate (lr0 * lrf)
 momentum: 0.937  # SGD momentum/Adam beta1
 weight_decay: 0.0005  # optimizer weight decay 5e-4
-warmup_epochs: 3.0  # warmup epochs (fractions ok)
-warmup_momentum: 0.8  # warmup initial momentum
-warmup_bias_lr: 0.1  # warmup initial bias lr
+warmup_epochs: 3.0  # warm up epochs (fractions ok)
+warmup_momentum: 0.8  # warm up initial momentum
+warmup_bias_lr: 0.1  # warm up initial bias lr
 box: 0.05  # box loss gain
 cls: 0.3  # cls loss gain
 cls_pw: 1.0  # cls BCELoss positive_weight
diff --git a/models/__init__.py b/models/__init__.py
index 84952a8..a6131c1 100644
--- a/models/__init__.py
+++ b/models/__init__.py
@@ -1 +1 @@
-# init
\ No newline at end of file
+# init
diff --git a/models/common.py b/models/common.py
index 7b370e4..d405617 100644
--- a/models/common.py
+++ b/models/common.py
@@ -6,8 +6,8 @@
 
 from utils.general import non_max_suppression
 
+# basic
 
-##### basic ####
 
 def autopad(k, p=None):  # kernel, padding
     # Pad to 'same'
@@ -17,8 +17,9 @@ def autopad(k, p=None):  # kernel, padding
 
 
 class MP(nn.Module):
+
     def __init__(self, k=2):
-        super(MP, self).__init__()
+        super().__init__()
         self.m = nn.MaxPool2d(kernel_size=k, stride=k)
 
     def forward(self, x):
@@ -26,16 +27,21 @@ def forward(self, x):
 
 
 class ReOrg(nn.Module):
+
     def __init__(self):
-        super(ReOrg, self).__init__()
+        super().__init__()
 
     def forward(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)
-        return torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)
+        return torch.cat([
+            x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2],
+            x[..., 1::2, 1::2]
+        ], 1)
 
 
 class Concat(nn.Module):
+
     def __init__(self, dimension=1):
-        super(Concat, self).__init__()
+        super().__init__()
         self.d = dimension
 
     def forward(self, x):
@@ -43,21 +49,36 @@ def forward(self, x):
 
 
 class Shortcut(nn.Module):
+
     def __init__(self, dimension=0):
-        super(Shortcut, self).__init__()
+        super().__init__()
         self.d = dimension
 
     def forward(self, x):
-        return x[0]+x[1]
+        return x[0] + x[1]
 
 
 class Conv(nn.Module):
     # Standard convolution
-    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
-        super(Conv, self).__init__()
-        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
+    def __init__(self,
+                 c1,
+                 c2,
+                 k=1,
+                 s=1,
+                 p=None,
+                 g=1,
+                 act=True):  # ch_in, ch_out, kernel, stride, padding, groups
+        super().__init__()
+        self.conv = nn.Conv2d(c1,
+                              c2,
+                              k,
+                              s,
+                              autopad(k, p),
+                              groups=g,
+                              bias=False)
         self.bn = nn.BatchNorm2d(c2)
-        self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
+        self.act = nn.SiLU() if act is True else (
+            act if isinstance(act, nn.Module) else nn.Identity())
 
     def forward(self, x):
         return self.act(self.bn(self.conv(x)))
@@ -68,29 +89,36 @@ def fuseforward(self, x):
 
 class ConvCheckpoint(nn.Module):
     # Standard convolution
-    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
-        super(ConvCheckpoint, self).__init__()
+    def __init__(self,
+                 c1,
+                 c2,
+                 k=1,
+                 s=1,
+                 p=None,
+                 g=1,
+                 act=True):  # ch_in, ch_out, kernel, stride, padding, groups
+        super().__init__()
         self.conv = Conv(c1, c2, k, s, p, g, act)
 
     def forward(self, x):
         x = checkpoint.checkpoint(self.conv, x)
         return x
 
-##### end of basic #####
 
+# cspnet
 
-##### cspnet #####
 
 class SPPCSPC(nn.Module):
     # CSP https://github.com/WongKinYiu/CrossStagePartialNetworks
     def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5, k=(5, 9, 13)):
-        super(SPPCSPC, self).__init__()
+        super().__init__()
         c_ = int(2 * c2 * e)  # hidden channels
         self.cv1 = Conv(c1, c_, 1, 1)
         self.cv2 = Conv(c1, c_, 1, 1)
         self.cv3 = Conv(c_, c_, 3, 1)
         self.cv4 = Conv(c_, c_, 1, 1)
-        self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
+        self.m = nn.ModuleList(
+            [nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
         self.cv5 = Conv(4 * c_, c_, 1, 1)
         self.cv6 = Conv(c_, c_, 3, 1)
         self.cv7 = Conv(2 * c_, c2, 1, 1)
@@ -101,14 +129,14 @@ def forward(self, x):
         y2 = self.cv2(x)
         return self.cv7(torch.cat((y1, y2), dim=1))
 
-##### end of cspnet #####
 
+# yolor
 
-##### yolor #####
 
 class ImplicitA(nn.Module):
+
     def __init__(self, channel, mean=0., std=.02):
-        super(ImplicitA, self).__init__()
+        super().__init__()
         self.channel = channel
         self.mean = mean
         self.std = std
@@ -120,8 +148,9 @@ def forward(self, x):
 
 
 class ImplicitM(nn.Module):
+
     def __init__(self, channel, mean=1., std=.02):
-        super(ImplicitM, self).__init__()
+        super().__init__()
         self.channel = channel
         self.mean = mean
         self.std = std
@@ -131,17 +160,16 @@ def __init__(self, channel, mean=1., std=.02):
     def forward(self, x):
         return self.implicit * x
 
-##### end of yolor #####
 
+# repvgg
 
-##### repvgg #####
 
 class RepConv(nn.Module):
     # Represented convolution
     # https://arxiv.org/abs/2101.03697
 
     def __init__(self, c1, c2, k=3, s=1, p=None, g=1, act=True, deploy=False):
-        super(RepConv, self).__init__()
+        super().__init__()
 
         self.deploy = deploy
         self.groups = g
@@ -153,13 +181,21 @@ def __init__(self, c1, c2, k=3, s=1, p=None, g=1, act=True, deploy=False):
 
         padding_11 = autopad(k, p) - k // 2
 
-        self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
+        self.act = nn.SiLU() if act is True else (
+            act if isinstance(act, nn.Module) else nn.Identity())
 
         if deploy:
-            self.rbr_reparam = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=True)
+            self.rbr_reparam = nn.Conv2d(c1,
+                                         c2,
+                                         k,
+                                         s,
+                                         autopad(k, p),
+                                         groups=g,
+                                         bias=True)
 
         else:
-            self.rbr_identity = (nn.BatchNorm2d(num_features=c1) if c2 == c1 and s == 1 else None)
+            self.rbr_identity = (nn.BatchNorm2d(
+                num_features=c1) if c2 == c1 and s == 1 else None)
 
             self.rbr_dense = nn.Sequential(
                 nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False),
@@ -167,12 +203,12 @@ def __init__(self, c1, c2, k=3, s=1, p=None, g=1, act=True, deploy=False):
             )
 
             self.rbr_1x1 = nn.Sequential(
-                nn.Conv2d( c1, c2, 1, s, padding_11, groups=g, bias=False),
+                nn.Conv2d(c1, c2, 1, s, padding_11, groups=g, bias=False),
                 nn.BatchNorm2d(num_features=c2),
             )
 
     def forward(self, inputs):
-        if hasattr(self, "rbr_reparam"):
+        if hasattr(self, 'rbr_reparam'):
             return self.act(self.rbr_reparam(inputs))
 
         if self.rbr_identity is None:
@@ -209,14 +245,14 @@ def _fuse_bn_tensor(self, branch):
             eps = branch[1].eps
         else:
             assert isinstance(branch, nn.BatchNorm2d)
-            if not hasattr(self, "id_tensor"):
+            if not hasattr(self, 'id_tensor'):
                 input_dim = self.in_channels // self.groups
-                kernel_value = np.zeros(
-                    (self.in_channels, input_dim, 3, 3), dtype=np.float32
-                )
+                kernel_value = np.zeros((self.in_channels, input_dim, 3, 3),
+                                        dtype=np.float32)
                 for i in range(self.in_channels):
                     kernel_value[i, i % input_dim, 1, 1] = 1
-                self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device)
+                self.id_tensor = torch.from_numpy(kernel_value).to(
+                    branch.weight.device)
             kernel = self.id_tensor
             running_mean = branch.running_mean
             running_var = branch.running_var
@@ -243,15 +279,15 @@ def fuse_conv_bn(self, conv, bn):
         weights = conv.weight * t
 
         bn = nn.Identity()
-        conv = nn.Conv2d(in_channels = conv.in_channels,
-                              out_channels = conv.out_channels,
-                              kernel_size = conv.kernel_size,
-                              stride=conv.stride,
-                              padding = conv.padding,
-                              dilation = conv.dilation,
-                              groups = conv.groups,
-                              bias = True,
-                              padding_mode = conv.padding_mode)
+        conv = nn.Conv2d(in_channels=conv.in_channels,
+                         out_channels=conv.out_channels,
+                         kernel_size=conv.kernel_size,
+                         stride=conv.stride,
+                         padding=conv.padding,
+                         dilation=conv.dilation,
+                         groups=conv.groups,
+                         bias=True,
+                         padding_mode=conv.padding_mode)
 
         conv.weight = torch.nn.Parameter(weights)
         conv.bias = torch.nn.Parameter(bias)
@@ -260,48 +296,60 @@ def fuse_conv_bn(self, conv, bn):
     def fuse_repvgg_block(self):
         if self.deploy:
             return
-        print(f"RepConv.fuse_repvgg_block")
+        print('RepConv.fuse_repvgg_block')
 
-        self.rbr_dense = self.fuse_conv_bn(self.rbr_dense[0], self.rbr_dense[1])
+        self.rbr_dense = self.fuse_conv_bn(self.rbr_dense[0],
+                                           self.rbr_dense[1])
 
         self.rbr_1x1 = self.fuse_conv_bn(self.rbr_1x1[0], self.rbr_1x1[1])
         rbr_1x1_bias = self.rbr_1x1.bias
-        weight_1x1_expanded = torch.nn.functional.pad(self.rbr_1x1.weight, [1, 1, 1, 1])
+        weight_1x1_expanded = torch.nn.functional.pad(self.rbr_1x1.weight,
+                                                      [1, 1, 1, 1])
 
         # Fuse self.rbr_identity
-        if (isinstance(self.rbr_identity, nn.BatchNorm2d) or isinstance(self.rbr_identity, nn.modules.batchnorm.SyncBatchNorm)):
+        if (isinstance(self.rbr_identity, nn.BatchNorm2d) or isinstance(
+                self.rbr_identity, nn.modules.batchnorm.SyncBatchNorm)):
             # print(f"fuse: rbr_identity == BatchNorm2d or SyncBatchNorm")
-            identity_conv_1x1 = nn.Conv2d(
-                    in_channels=self.in_channels,
-                    out_channels=self.out_channels,
-                    kernel_size=1,
-                    stride=1,
-                    padding=0,
-                    groups=self.groups,
-                    bias=False)
-            identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.to(self.rbr_1x1.weight.data.device)
-            identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.squeeze().squeeze()
+            identity_conv_1x1 = nn.Conv2d(in_channels=self.in_channels,
+                                          out_channels=self.out_channels,
+                                          kernel_size=1,
+                                          stride=1,
+                                          padding=0,
+                                          groups=self.groups,
+                                          bias=False)
+            identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.to(
+                self.rbr_1x1.weight.data.device)
+            identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.squeeze(
+            ).squeeze()
             # print(f" identity_conv_1x1.weight = {identity_conv_1x1.weight.shape}")
             identity_conv_1x1.weight.data.fill_(0.0)
             identity_conv_1x1.weight.data.fill_diagonal_(1.0)
-            identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.unsqueeze(2).unsqueeze(3)
+            identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.unsqueeze(
+                2).unsqueeze(3)
             # print(f" identity_conv_1x1.weight = {identity_conv_1x1.weight.shape}")
 
-            identity_conv_1x1 = self.fuse_conv_bn(identity_conv_1x1, self.rbr_identity)
+            identity_conv_1x1 = self.fuse_conv_bn(identity_conv_1x1,
+                                                  self.rbr_identity)
             bias_identity_expanded = identity_conv_1x1.bias
-            weight_identity_expanded = torch.nn.functional.pad(identity_conv_1x1.weight, [1, 1, 1, 1])
+            weight_identity_expanded = torch.nn.functional.pad(
+                identity_conv_1x1.weight, [1, 1, 1, 1])
         else:
             # print(f"fuse: rbr_identity != BatchNorm2d, rbr_identity = {self.rbr_identity}")
-            bias_identity_expanded = torch.nn.Parameter( torch.zeros_like(rbr_1x1_bias) )
-            weight_identity_expanded = torch.nn.Parameter( torch.zeros_like(weight_1x1_expanded) )
-
-
-        #print(f"self.rbr_1x1.weight = {self.rbr_1x1.weight.shape}, ")
-        #print(f"weight_1x1_expanded = {weight_1x1_expanded.shape}, ")
-        #print(f"self.rbr_dense.weight = {self.rbr_dense.weight.shape}, ")
-
-        self.rbr_dense.weight = torch.nn.Parameter(self.rbr_dense.weight + weight_1x1_expanded + weight_identity_expanded)
-        self.rbr_dense.bias = torch.nn.Parameter(self.rbr_dense.bias + rbr_1x1_bias + bias_identity_expanded)
+            bias_identity_expanded = torch.nn.Parameter(
+                torch.zeros_like(rbr_1x1_bias))
+            weight_identity_expanded = torch.nn.Parameter(
+                torch.zeros_like(weight_1x1_expanded))
+
+        # print(f"self.rbr_1x1.weight = {self.rbr_1x1.weight.shape}, ")
+        # print(f"weight_1x1_expanded = {weight_1x1_expanded.shape}, ")
+        # print(f"self.rbr_dense.weight = {self.rbr_dense.weight.shape}, ")
+
+        self.rbr_dense.weight = torch.nn.Parameter(self.rbr_dense.weight +
+                                                   weight_1x1_expanded +
+                                                   weight_identity_expanded)
+        self.rbr_dense.bias = torch.nn.Parameter(self.rbr_dense.bias +
+                                                 rbr_1x1_bias +
+                                                 bias_identity_expanded)
 
         self.rbr_reparam = self.rbr_dense
         self.deploy = True
@@ -318,10 +366,9 @@ def fuse_repvgg_block(self):
             del self.rbr_dense
             self.rbr_dense = None
 
-##### end of repvgg #####
 
+# yolov5
 
-##### yolov5 #####
 
 class NMS(nn.Module):
     # Non-Maximum Suppression (NMS) module
@@ -330,47 +377,67 @@ class NMS(nn.Module):
     classes = None  # (optional list) filter by class
 
     def __init__(self):
-        super(NMS, self).__init__()
+        super().__init__()
 
     def forward(self, x):
-        return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)
+        return non_max_suppression(x[0],
+                                   conf_thres=self.conf,
+                                   iou_thres=self.iou,
+                                   classes=self.classes)
 
-##### end of yolov5 ######
 
+# CBNet
 
-##### CBNet #####
 
 class CBLinear(nn.Module):
-    def __init__(self, c1, c2s, k=1, s=1, p=None, g=1):  # ch_in, ch_outs, kernel, stride, padding, groups
-        super(CBLinear, self).__init__()
+
+    def __init__(self,
+                 c1,
+                 c2s,
+                 k=1,
+                 s=1,
+                 p=None,
+                 g=1):  # ch_in, ch_outs, kernel, stride, padding, groups
+        super().__init__()
         self.c2s = c2s
-        self.conv = nn.Conv2d(c1, sum(c2s), k, s, autopad(k, p), groups=g, bias=True)
+        self.conv = nn.Conv2d(c1,
+                              sum(c2s),
+                              k,
+                              s,
+                              autopad(k, p),
+                              groups=g,
+                              bias=True)
 
     def forward(self, x):
         outs = self.conv(x).split(self.c2s, dim=1)
         return outs
 
+
 class CBFuse(nn.Module):
+
     def __init__(self, idx):
-        super(CBFuse, self).__init__()
+        super().__init__()
         self.idx = idx
 
     def forward(self, xs):
         target_size = xs[-1].shape[2:]
-        res = [F.interpolate(x[self.idx[i]], size=target_size, mode='nearest') for i, x in enumerate(xs[:-1])]
+        res = [
+            F.interpolate(x[self.idx[i]], size=target_size, mode='nearest')
+            for i, x in enumerate(xs[:-1])
+        ]
         out = torch.sum(torch.stack(res + xs[-1:]), dim=0)
         return out
 
-##### end of CBNet #####
 
+# DynamicDet
 
-##### DynamicDet #####
 
 def sigmoid(logits, hard=False, threshold=0.5):
     y_soft = logits.sigmoid()
     if hard:
         indices = (y_soft < threshold).nonzero(as_tuple=True)
-        y_hard = torch.zeros_like(logits, memory_format=torch.legacy_contiguous_format)
+        y_hard = torch.zeros_like(logits,
+                                  memory_format=torch.legacy_contiguous_format)
         y_hard[indices[0], indices[1]] = 1.0
         ret = y_hard - y_soft.detach() + y_soft
     else:
@@ -379,13 +446,20 @@ def sigmoid(logits, hard=False, threshold=0.5):
 
 
 class AdaptiveRouter(nn.Module):
+
     def __init__(self, features_channels, out_channels, reduction=4):
-        super(AdaptiveRouter, self).__init__()
+        super().__init__()
         self.inp = sum(features_channels)
         self.oup = out_channels
         self.reduction = reduction
-        self.layer1 = nn.Conv2d(self.inp, self.inp//self.reduction, kernel_size=1, bias=True)
-        self.layer2 = nn.Conv2d(self.inp//self.reduction, self.oup, kernel_size=1, bias=True)
+        self.layer1 = nn.Conv2d(self.inp,
+                                self.inp // self.reduction,
+                                kernel_size=1,
+                                bias=True)
+        self.layer2 = nn.Conv2d(self.inp // self.reduction,
+                                self.oup,
+                                kernel_size=1,
+                                bias=True)
 
     def forward(self, xs, thres=0.5):
         xs = [x.mean(dim=(2, 3), keepdim=True) for x in xs]
@@ -398,5 +472,3 @@ def forward(self, xs, thres=0.5):
         else:
             xs = xs.sigmoid()
         return xs
-
-##### end of DynamicDet #####
diff --git a/models/yolo.py b/models/yolo.py
index 52f3b52..9f60643 100644
--- a/models/yolo.py
+++ b/models/yolo.py
@@ -1,26 +1,30 @@
-import math
-import argparse
 import logging
-from pathlib import Path
-import yaml
+import math
 import sys
 from copy import deepcopy
+from pathlib import Path
 
-sys.path.append('./')  # to run '$ python *.py' files in subdirectories
-logger = logging.getLogger(__name__)
 import torch
+import torch.nn as nn
+import yaml
 from torch.nn.modules.batchnorm import _BatchNorm
-from models.common import *
+
+from models.common import (NMS, SPPCSPC, AdaptiveRouter, CBFuse, CBLinear,
+                           Concat, Conv, ConvCheckpoint, ImplicitA, ImplicitM,
+                           ReOrg, RepConv, Shortcut, autoShape)
 from utils.autoanchor import check_anchor_order
-from utils.general import make_divisible, check_file, set_logging
-from utils.torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \
-    select_device, copy_attr
+from utils.general import make_divisible
+from utils.torch_utils import (copy_attr, fuse_conv_and_bn, initialize_weights,
+                               model_info, scale_img, time_synchronized)
 
 try:
     import thop  # for FLOPS computation
 except ImportError:
     thop = None
 
+sys.path.append('./')  # to run '$ python *.py' files in subdirectories
+logger = logging.getLogger(__name__)
+
 
 class IDetect(nn.Module):
     stride = None  # strides computed during build
@@ -30,7 +34,7 @@ class IDetect(nn.Module):
     concat = False
 
     def __init__(self, nc=80, anchors=(), ch=()):  # detection layer
-        super(IDetect, self).__init__()
+        super().__init__()
         self.nc = nc  # number of classes
         self.no = nc + 5  # number of outputs per anchor
         self.nl = len(anchors)  # number of detection layers
@@ -38,8 +42,11 @@ def __init__(self, nc=80, anchors=(), ch=()):  # detection layer
         self.grid = [torch.zeros(1)] * self.nl  # init grid
         a = torch.tensor(anchors).float().view(self.nl, -1, 2)
         self.register_buffer('anchors', a)  # shape(nl,na,2)
-        self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
-        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
+        self.register_buffer('anchor_grid',
+                             a.clone().view(self.nl, 1, -1, 1, 1,
+                                            2))  # shape(nl,1,na,1,1,2)
+        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1)
+                               for x in ch)  # output conv
 
         self.ia = nn.ModuleList(ImplicitA(x) for x in ch)
         self.im = nn.ModuleList(ImplicitM(self.no * self.na) for _ in ch)
@@ -52,15 +59,17 @@ def forward(self, x):
             x[i] = self.m[i](self.ia[i](x[i]))  # conv
             x[i] = self.im[i](x[i])
             bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
-            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
+            x[i] = x[i].view(bs, self.na, self.no, ny,
+                             nx).permute(0, 1, 3, 4, 2).contiguous()
 
             if not self.training:  # inference
                 if self.grid[i].shape[2:4] != x[i].shape[2:4]:
                     self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
 
                 y = x[i].sigmoid()
-                y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
-                y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
+                y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 +
+                               self.grid[i]) * self.stride[i]  # xy
+                y[..., 2:4] = (y[..., 2:4] * 2)**2 * self.anchor_grid[i]  # wh
                 z.append(y.view(bs, -1, self.no))
 
         return x if self.training else (torch.cat(z, 1), x)
@@ -72,7 +81,8 @@ def fuseforward(self, x):
         for i in range(self.nl):
             x[i] = self.m[i](x[i])  # conv
             bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
-            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
+            x[i] = x[i].view(bs, self.na, self.no, ny,
+                             nx).permute(0, 1, 3, 4, 2).contiguous()
 
             if not self.training:  # inference
                 if self.grid[i].shape[2:4] != x[i].shape[2:4]:
@@ -80,12 +90,17 @@ def fuseforward(self, x):
 
                 y = x[i].sigmoid()
                 if not torch.onnx.is_in_onnx_export():
-                    y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
-                    y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
+                    y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 +
+                                   self.grid[i]) * self.stride[i]  # xy
+                    y[...,
+                      2:4] = (y[..., 2:4] * 2)**2 * self.anchor_grid[i]  # wh
                 else:
-                    xy, wh, conf = y.split((2, 2, self.nc + 1), 4)  # y.tensor_split((2, 4, 5), 4)  # torch 1.8.0
-                    xy = xy * (2. * self.stride[i]) + (self.stride[i] * (self.grid[i] - 0.5))  # new xy
-                    wh = wh ** 2 * (4 * self.anchor_grid[i].data)  # new wh
+                    xy, wh, conf = y.split(
+                        (2, 2, self.nc + 1),
+                        4)  # y.tensor_split((2, 4, 5), 4)  # torch 1.8.0
+                    xy = xy * (2. * self.stride[i]) + (
+                        self.stride[i] * (self.grid[i] - 0.5))  # new xy
+                    wh = wh**2 * (4 * self.anchor_grid[i].data)  # new wh
                     y = torch.cat((xy, wh, conf), 4)
                 z.append(y.view(bs, -1, self.no))
 
@@ -104,18 +119,20 @@ def fuseforward(self, x):
         return out
 
     def fuse(self):
-        print("IDetect.fuse")
+        print('IDetect.fuse')
         # fuse ImplicitA and Convolution
         for i in range(len(self.m)):
-            c1,c2,_,_ = self.m[i].weight.shape
-            c1_,c2_, _,_ = self.ia[i].implicit.shape
-            self.m[i].bias += torch.matmul(self.m[i].weight.reshape(c1,c2),self.ia[i].implicit.reshape(c2_,c1_)).squeeze(1)
+            c1, c2, _, _ = self.m[i].weight.shape
+            c1_, c2_, _, _ = self.ia[i].implicit.shape
+            self.m[i].bias += torch.matmul(
+                self.m[i].weight.reshape(c1, c2),
+                self.ia[i].implicit.reshape(c2_, c1_)).squeeze(1)
 
         # fuse ImplicitM and Convolution
         for i in range(len(self.m)):
-            c1,c2, _,_ = self.im[i].implicit.shape
+            c1, c2, _, _ = self.im[i].implicit.shape
             self.m[i].bias *= self.im[i].implicit.reshape(c2)
-            self.m[i].weight *= self.im[i].implicit.transpose(0,1)
+            self.m[i].weight *= self.im[i].implicit.transpose(0, 1)
 
     @staticmethod
     def _make_grid(nx=20, ny=20):
@@ -128,16 +145,21 @@ def convert(self, z):
         conf = z[:, :, 4:5]
         score = z[:, :, 5:]
         score *= conf
-        convert_matrix = torch.tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]],
-                                           dtype=torch.float32,
-                                           device=z.device)
+        convert_matrix = torch.tensor(
+            [[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]],
+            dtype=torch.float32,
+            device=z.device)
         box @= convert_matrix
         return (box, score)
 
 
 class Model(nn.Module):
-    def __init__(self, cfg, ch=3, nc=None):  # model, input channels, number of classes
-        super(Model, self).__init__()
+
+    def __init__(self,
+                 cfg,
+                 ch=3,
+                 nc=None):  # model, input channels, number of classes
+        super().__init__()
         assert isinstance(cfg, str)
         self.yaml_file = Path(cfg).name
         with open(cfg) as f:
@@ -146,7 +168,9 @@ def __init__(self, cfg, ch=3, nc=None):  # model, input channels, number of clas
         if self.dynamic:
             router_channels = self.yaml['router_channels']
             reduction = self.yaml.get('router_reduction', 4)
-            self.router = AdaptiveRouter(router_channels, 1, reduction=reduction)
+            self.router = AdaptiveRouter(router_channels,
+                                         1,
+                                         reduction=reduction)
             self.router_ins = self.yaml['router_ins']
             self.dy_thres = 0.5
             self.get_score = False
@@ -154,9 +178,13 @@ def __init__(self, cfg, ch=3, nc=None):  # model, input channels, number of clas
         # Define model
         ch = self.yaml['ch'] = self.yaml.get('ch', ch)  # input channels
         if nc and nc != self.yaml['nc']:
-            logger.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
+            logger.info(
+                f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
             self.yaml['nc'] = nc  # override yaml value
-        self.model_b, self.save_b, self.model_b2, self.save_b2, self.model_h, self.save_h, self.model_h2, self.save_h2 = parse_model(deepcopy(self.yaml), ch_b=[ch])  # model, savelist
+        (self.model_b, self.save_b, self.model_b2, self.save_b2, self.model_h,
+         self.save_h, self.model_h2,
+         self.save_h2) = parse_model(deepcopy(self.yaml),
+                                     ch_b=[ch])  # model, savelist
         self.keep_input = self.yaml.get('keep_input', False)
         self.names = [str(i) for i in range(self.yaml['nc'])]  # default names
         # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
@@ -167,15 +195,27 @@ def __init__(self, cfg, ch=3, nc=None):  # model, input channels, number of clas
         if isinstance(m, IDetect):
             s = 256  # 2x min stride
             if self.dynamic:
-                m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))[0][:m.anchors.shape[0]]])  # forward
+                m.stride = torch.tensor([
+                    s / x.shape[-2] for x in self.forward(
+                        torch.zeros(1, ch, s, s))[0][:m.anchors.shape[0]]
+                ])  # forward
             else:
-                m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))[:m.anchors.shape[0]]])  # forward
+                m.stride = torch.tensor([
+                    s / x.shape[-2] for x in self.forward(
+                        torch.zeros(1, ch, s, s))[:m.anchors.shape[0]]
+                ])  # forward
             check_anchor_order(m)
             m.anchors /= m.stride.view(-1, 1, 1)
             if self.dynamic:
-                m2.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))[0][:m2.anchors.shape[0]]])  # forward
+                m2.stride = torch.tensor([
+                    s / x.shape[-2] for x in self.forward(
+                        torch.zeros(1, ch, s, s))[0][:m2.anchors.shape[0]]
+                ])  # forward
             else:
-                m2.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))[:m2.anchors.shape[0]]])  # forward
+                m2.stride = torch.tensor([
+                    s / x.shape[-2] for x in self.forward(
+                        torch.zeros(1, ch, s, s))[:m2.anchors.shape[0]]
+                ])  # forward
             check_anchor_order(m2)
             m2.anchors /= m2.stride.view(-1, 1, 1)
             self.stride = m.stride
@@ -194,7 +234,9 @@ def forward(self, x, augment=False, profile=False):
             f = [None, 3, None]  # flips (2-ud, 3-lr)
             y = []  # outputs
             for si, fi in zip(s, f):
-                xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
+                xi = scale_img(x.flip(fi) if fi else x,
+                               si,
+                               gs=int(self.stride.max()))
                 yi = self.forward_once(xi)[0]  # forward
                 # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1])  # save
                 yi[..., :4] /= si  # de-scale
@@ -205,7 +247,8 @@ def forward(self, x, augment=False, profile=False):
                 y.append(yi)
             return torch.cat(y, 1), None  # augmented inference, train
         else:
-            return self.forward_once(x, profile)  # single-scale inference, train
+            return self.forward_once(x,
+                                     profile)  # single-scale inference, train
 
     def forward_once(self, x, profile=False):
         if self.keep_input:
@@ -215,11 +258,15 @@ def forward_once(self, x, profile=False):
         outs = []
         for m in self.model_b:
             if m.f != -1:  # if not from previous layer
-                x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
+                x = y[m.f] if isinstance(
+                    m.f, int) else [x if j == -1 else y[j]
+                                    for j in m.f]  # from earlier layers
 
             if profile:
                 c = isinstance(m, IDetect)
-                o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0  # FLOPS
+                o = thop.profile(
+                    m, inputs=(x.copy() if c else x, ),
+                    verbose=False)[0] / 1E9 * 2 if thop else 0  # FLOPS
                 for _ in range(10):
                     m(x.copy() if c else x)
                 t = time_synchronized()
@@ -234,33 +281,42 @@ def forward_once(self, x, profile=False):
         assert len(y) == self.yaml['n_first_layers']
 
         if self.dynamic:
-            score = self.router([y[i] for i in self.router_ins]) # 'score' denotes the (1 - difficulty score)
+            score = self.router([
+                y[i] for i in self.router_ins
+            ])  # 'score' denotes the (1 - difficulty score)
 
             if not hasattr(self, 'get_score'):
                 self.get_score = False
             if self.get_score:
                 return score
 
-        need_second = self.training or (not self.dynamic) or score[:, 0] < self.dy_thres
-        need_first_head = self.training or (self.dynamic and score[:, 0] >= self.dy_thres)
+        need_second = self.training or (
+            not self.dynamic) or score[:, 0] < self.dy_thres
+        need_first_head = self.training or (self.dynamic
+                                            and score[:, 0] >= self.dy_thres)
 
         if need_second:
             for m in self.model_b2:
                 if m.f == 'input':
                     x = input_x
                 elif m.f != -1:  # if not from previous layer
-                    x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
+                    x = y[m.f] if isinstance(
+                        m.f, int) else [x if j == -1 else y[j]
+                                        for j in m.f]  # from earlier layers
 
                 if profile:
                     c = isinstance(m, IDetect)
-                    o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0  # FLOPS
+                    o = thop.profile(
+                        m, inputs=(x.copy() if c else x, ),
+                        verbose=False)[0] / 1E9 * 2 if thop else 0  # FLOPS
                     for _ in range(10):
                         m(x.copy() if c else x)
                     t = time_synchronized()
                     for _ in range(10):
                         m(x.copy() if c else x)
                     dt.append((time_synchronized() - t) * 100)
-                    print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type))
+                    print('%10.1f%10.0f%10.1fms %-40s' %
+                          (o, m.np, dt[-1], m.type))
 
                 x = m(x)  # run
 
@@ -269,18 +325,23 @@ def forward_once(self, x, profile=False):
         if need_first_head:
             for m in self.model_h:
                 if m.f != -1:  # if not from previous layer
-                    x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
+                    x = y[m.f] if isinstance(
+                        m.f, int) else [x if j == -1 else y[j]
+                                        for j in m.f]  # from earlier layers
 
                 if profile:
                     c = isinstance(m, IDetect)
-                    o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0  # FLOPS
+                    o = thop.profile(
+                        m, inputs=(x.copy() if c else x, ),
+                        verbose=False)[0] / 1E9 * 2 if thop else 0  # FLOPS
                     for _ in range(10):
                         m(x.copy() if c else x)
                     t = time_synchronized()
                     for _ in range(10):
                         m(x.copy() if c else x)
                     dt.append((time_synchronized() - t) * 100)
-                    print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type))
+                    print('%10.1f%10.0f%10.1fms %-40s' %
+                          (o, m.np, dt[-1], m.type))
 
                 x = m(x)  # run
 
@@ -295,18 +356,23 @@ def forward_once(self, x, profile=False):
                 else:
                     cur_f = m.f
                 if cur_f != -1:  # if not from previous layer
-                    x = y[cur_f] if isinstance(cur_f, int) else [x if j == -1 else y[j] for j in cur_f]  # from earlier layers
+                    x = y[cur_f] if isinstance(cur_f, int) else [
+                        x if j == -1 else y[j] for j in cur_f
+                    ]  # from earlier layers
 
                 if profile:
                     c = isinstance(m, IDetect)
-                    o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0  # FLOPS
+                    o = thop.profile(
+                        m, inputs=(x.copy() if c else x, ),
+                        verbose=False)[0] / 1E9 * 2 if thop else 0  # FLOPS
                     for _ in range(10):
                         m(x.copy() if c else x)
                     t = time_synchronized()
                     for _ in range(10):
                         m(x.copy() if c else x)
                     dt.append((time_synchronized() - t) * 100)
-                    print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type))
+                    print('%10.1f%10.0f%10.1fms %-40s' %
+                          (o, m.np, dt[-1], m.type))
 
                 x = m(x)  # run
 
@@ -326,31 +392,43 @@ def close_all_bn(self):
             if isinstance(m, _BatchNorm):
                 m.eval()
 
-    def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is class frequency
+    def _initialize_biases(
+            self,
+            cf=None):  # initialize biases into Detect(), cf is class frequency
         # https://arxiv.org/abs/1708.02002 section 3.3
         # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
         m = self.model_h[-1]  # Detect() module
         for mi, s in zip(m.m, m.stride):  # from
             b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
-            b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
-            b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
+            b.data[:, 4] += math.log(
+                8 / (640 / s)**2)  # obj (8 objects per 640 image)
+            b.data[:,
+                   5:] += math.log(0.6 /
+                                   (m.nc - 0.99)) if cf is None else torch.log(
+                                       cf / cf.sum())  # cls
             mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
         m = self.model_h2[-1]  # Detect() module
         for mi, s in zip(m.m, m.stride):  # from
             b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
-            b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
-            b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
+            b.data[:, 4] += math.log(
+                8 / (640 / s)**2)  # obj (8 objects per 640 image)
+            b.data[:,
+                   5:] += math.log(0.6 /
+                                   (m.nc - 0.99)) if cf is None else torch.log(
+                                       cf / cf.sum())  # cls
             mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
 
     def _print_biases(self):
         m = self.model_h[-1]  # Detect() module
         for mi in m.m:  # from
             b = mi.bias.detach().view(m.na, -1).T  # conv.bias(255) to (3,85)
-            print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
+            print(('%6g Conv2d.bias:' + '%10.3g' * 6) %
+                  (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
         m = self.model_h2[-1]  # Detect() module
         for mi in m.m:  # from
             b = mi.bias.detach().view(m.na, -1).T  # conv.bias(255) to (3,85)
-            print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
+            print(('%6g Conv2d.bias:' + '%10.3g' * 6) %
+                  (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
 
     # def _print_weights(self):
     #     for m in self.model.modules():
@@ -359,10 +437,12 @@ def _print_biases(self):
 
     def fuse(self):  # fuse model Conv2d() + BatchNorm2d() layers
         print('Fusing layers... ')
-        for model in [self.model_b, self.model_b2, self.model_h, self.model_h2]:
+        for model in [
+                self.model_b, self.model_b2, self.model_h, self.model_h2
+        ]:
             for m in model.modules():
                 if isinstance(m, RepConv):
-                    #print(f" fuse_repvgg_block")
+                    # print(f" fuse_repvgg_block")
                     m.fuse_repvgg_block()
                 elif type(m) is Conv and hasattr(m, 'bn'):
                     m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv
@@ -402,7 +482,10 @@ def nms(self, mode=True):  # add or remove NMS module
     def autoshape(self):  # add autoShape module
         print('Adding autoShape... ')
         m = autoShape(self)  # wrap model
-        copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=())  # copy attributes
+        copy_attr(m,
+                  self,
+                  include=('yaml', 'nc', 'hyp', 'names', 'stride'),
+                  exclude=())  # copy attributes
         return m
 
     def info(self, verbose=False, img_size=640):  # print model information
@@ -410,18 +493,22 @@ def info(self, verbose=False, img_size=640):  # print model information
 
 
 def parse_model(d, ch_b):  # model_dict, input_channels(3)
-    logger.info('\n%3s%18s%3s%10s  %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
-    anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
-    na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
+    logger.info('\n%3s%18s%3s%10s  %-40s%-30s' %
+                ('', 'from', 'n', 'params', 'module', 'arguments'))
+    anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d[
+        'width_multiple']
+    na = (len(anchors[0]) //
+          2) if isinstance(anchors, list) else anchors  # number of anchors
     no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
     layers_b, save_b, c2 = [], [], ch_b[-1]  # layers, savelist, ch_b out
 
-    for i, (f, n, m, args) in enumerate(d['backbone']):  # from, number, module, args
+    for i, (f, n, m,
+            args) in enumerate(d['backbone']):  # from, number, module, args
         m = eval(m) if isinstance(m, str) else m  # eval strings
         for j, a in enumerate(args):
             try:
                 args[j] = eval(a) if isinstance(a, str) else a  # eval strings
-            except:
+            except Exception:
                 pass
 
         n = max(round(n * gd), 1) if n > 1 else n  # depth gain
@@ -449,12 +536,15 @@ def parse_model(d, ch_b):  # model_dict, input_channels(3)
         else:
             c2 = ch_b[f]
 
-        m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args)  # module
+        m_ = nn.Sequential(
+            *[m(*args) for _ in range(n)]) if n > 1 else m(*args)  # module
         t = str(m)[8:-2].replace('__main__.', '')  # module type
         np = sum([x.numel() for x in m_.parameters()])  # number params
         m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
-        logger.info('%3s%18s%3s%10.0f  %-40s%-30s' % (i, f, n, np, t, args))  # print
-        save_b.extend(x % i for x in ([f] if isinstance(f, (int, str)) else f) if x != -1)  # append to savelist
+        logger.info('%3s%18s%3s%10.0f  %-40s%-30s' %
+                    (i, f, n, np, t, args))  # print
+        save_b.extend(x % i for x in ([f] if isinstance(f, (int, str)) else f)
+                      if x != -1)  # append to savelist
         layers_b.append(m_)
         if i == 0:
             ch_b = []
@@ -463,12 +553,13 @@ def parse_model(d, ch_b):  # model_dict, input_channels(3)
     layers_b2, save_b2 = [], []  # layers, savelist
     ch_b2 = []
 
-    for i, (f, n, m, args) in enumerate(d['dual_backbone']):  # from, number, module, args
+    for i, (f, n, m, args) in enumerate(
+            d['dual_backbone']):  # from, number, module, args
         m = eval(m) if isinstance(m, str) else m  # eval strings
         for j, a in enumerate(args):
             try:
                 args[j] = eval(a) if isinstance(a, str) else a  # eval strings
-            except:
+            except Exception:
                 pass
 
         chs = []
@@ -520,12 +611,15 @@ def parse_model(d, ch_b):  # model_dict, input_channels(3)
             assert len(chs) == 1
             c2 = chs[0][f]
 
-        m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args)  # module
+        m_ = nn.Sequential(
+            *[m(*args) for _ in range(n)]) if n > 1 else m(*args)  # module
         t = str(m)[8:-2].replace('__main__.', '')  # module type
         np = sum([x.numel() for x in m_.parameters()])  # number params
         m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
-        logger.info('%3s%18s%3s%10.0f  %-40s%-30s' % (i, f, n, np, t, args))  # print
-        for x in ([f] if isinstance(f, (int, str)) else f):  # append to savelist
+        logger.info('%3s%18s%3s%10.0f  %-40s%-30s' %
+                    (i, f, n, np, t, args))  # print
+        for x in ([f] if isinstance(f,
+                                    (int, str)) else f):  # append to savelist
             if isinstance(x, str):
                 continue
             if x >= 0:
@@ -536,12 +630,13 @@ def parse_model(d, ch_b):  # model_dict, input_channels(3)
         ch_b2.append(c2)
 
     layers_h, save_h, ch_h = [], [], []
-    for i, (f, n, m, args) in enumerate(d['head']):  # from, number, module, args
+    for i, (f, n, m,
+            args) in enumerate(d['head']):  # from, number, module, args
         m = eval(m) if isinstance(m, str) else m  # eval strings
         for j, a in enumerate(args):
             try:
                 args[j] = eval(a) if isinstance(a, str) else a  # eval strings
-            except:
+            except Exception:
                 pass
         chs = []
         for x in ([f] if isinstance(f, (int, str)) else f):
@@ -582,12 +677,15 @@ def parse_model(d, ch_b):  # model_dict, input_channels(3)
             assert len(chs) == 1
             c2 = chs[0][f]
 
-        m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args)  # module
+        m_ = nn.Sequential(
+            *[m(*args) for _ in range(n)]) if n > 1 else m(*args)  # module
         t = str(m)[8:-2].replace('__main__.', '')  # module type
         np = sum([x.numel() for x in m_.parameters()])  # number params
         m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
-        logger.info('%3s%18s%3s%10.0f  %-40s%-30s' % (i, f, n, np, t, args))  # print
-        for x in ([f] if isinstance(f, (int, str)) else f):  # append to savelist
+        logger.info('%3s%18s%3s%10.0f  %-40s%-30s' %
+                    (i, f, n, np, t, args))  # print
+        for x in ([f] if isinstance(f,
+                                    (int, str)) else f):  # append to savelist
             if isinstance(x, str):
                 continue
             if x >= 0:
@@ -598,12 +696,13 @@ def parse_model(d, ch_b):  # model_dict, input_channels(3)
         ch_h.append(c2)
 
     layers_h2, save_h2, ch_h2 = [], [], []
-    for i, (f, n, m, args) in enumerate(d['head2']):  # from, number, module, args
+    for i, (f, n, m,
+            args) in enumerate(d['head2']):  # from, number, module, args
         m = eval(m) if isinstance(m, str) else m  # eval strings
         for j, a in enumerate(args):
             try:
                 args[j] = eval(a) if isinstance(a, str) else a  # eval strings
-            except:
+            except Exception:
                 pass
         chs = []
         for x in ([f] if isinstance(f, (int, str)) else f):
@@ -644,12 +743,15 @@ def parse_model(d, ch_b):  # model_dict, input_channels(3)
             assert len(chs) == 1
             c2 = chs[0][f]
 
-        m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args)  # module
+        m_ = nn.Sequential(
+            *[m(*args) for _ in range(n)]) if n > 1 else m(*args)  # module
         t = str(m)[8:-2].replace('__main__.', '')  # module type
         np = sum([x.numel() for x in m_.parameters()])  # number params
         m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
-        logger.info('%3s%18s%3s%10.0f  %-40s%-30s' % (i, f, n, np, t, args))  # print
-        for x in ([f] if isinstance(f, (int, str)) else f):  # append to savelist
+        logger.info('%3s%18s%3s%10.0f  %-40s%-30s' %
+                    (i, f, n, np, t, args))  # print
+        for x in ([f] if isinstance(f,
+                                    (int, str)) else f):  # append to savelist
             if isinstance(x, str):
                 continue
             if x >= 0:
@@ -662,5 +764,7 @@ def parse_model(d, ch_b):  # model_dict, input_channels(3)
     save_b.extend(d['b1_save'])
     save_b2.extend(d['b2_save'])
 
-    return nn.Sequential(*layers_b), sorted(save_b), nn.Sequential(*layers_b2), sorted(save_b2), \
-            nn.Sequential(*layers_h), sorted(save_h), nn.Sequential(*layers_h2), sorted(save_h2)
+    return (nn.Sequential(*layers_b),
+            sorted(save_b), nn.Sequential(*layers_b2), sorted(save_b2),
+            nn.Sequential(*layers_h), sorted(save_h),
+            nn.Sequential(*layers_h2), sorted(save_h2))
diff --git a/test.py b/test.py
index 8e3bcbc..76c60e9 100644
--- a/test.py
+++ b/test.py
@@ -3,49 +3,52 @@
 import logging
 from pathlib import Path
 from threading import Thread
-import yaml
-from tqdm import tqdm
 
 import numpy as np
 import torch
 import torch.nn as nn
+import yaml
+from tqdm import tqdm
 
 from models.yolo import Model
 from utils.datasets import create_dataloader
-from utils.general import coco80_to_coco91_class, check_dataset, check_file, check_img_size, \
-    box_iou, non_max_suppression, scale_coords, xyxy2xywh, xywh2xyxy, set_logging, increment_path, colorstr
-from utils.metrics import ap_per_class, ConfusionMatrix
-from utils.plots import plot_images, output_to_target
-from utils.torch_utils import select_device, time_synchronized, intersect_dicts
-
+from utils.general import (box_iou, check_dataset, check_file, check_img_size,
+                           coco80_to_coco91_class, colorstr, increment_path,
+                           non_max_suppression, scale_coords, set_logging,
+                           xywh2xyxy, xyxy2xywh)
+from utils.metrics import ConfusionMatrix, ap_per_class
+from utils.plots import output_to_target, plot_images
+from utils.torch_utils import intersect_dicts, select_device, time_synchronized
 
 logger = logging.getLogger(__name__)
 
-def test(data,
-         cfg=None,
-         weight=None,
-         batch_size=32,
-         imgsz=640,
-         conf_thres=0.001,
-         iou_thres=0.6,  # for NMS
-         save_json=False,
-         single_cls=False,
-         augment=False,
-         verbose=False,
-         model=None,
-         dataloader=None,
-         save_dir=Path(''),  # for saving images
-         save_txt=False,  # for auto-labelling
-         save_hybrid=False,  # for hybrid auto-labelling
-         save_conf=False,  # save auto-label confidences
-         plots=True,
-         wandb_logger=None,
-         compute_loss=None,
-         half_precision=True,
-         is_coco=False,
-         v5_metric=False,
-         dy_thres=0.5,
-         save_results=False):
+
+def test(
+        data,
+        cfg=None,
+        weight=None,
+        batch_size=32,
+        imgsz=640,
+        conf_thres=0.001,
+        iou_thres=0.6,  # for NMS
+        save_json=False,
+        single_cls=False,
+        augment=False,
+        verbose=False,
+        model=None,
+        dataloader=None,
+        save_dir=Path(''),  # for saving images
+        save_txt=False,  # for auto-labelling
+        save_hybrid=False,  # for hybrid auto-labelling
+        save_conf=False,  # save auto-label confidences
+        plots=True,
+        wandb_logger=None,
+        compute_loss=None,
+        half_precision=True,
+        is_coco=False,
+        v5_metric=False,
+        dy_thres=0.5,
+        save_results=False):
     # Initialize/load model and set device
     training = model is not None
 
@@ -61,27 +64,36 @@ def test(data,
             data = yaml.load(f, Loader=yaml.SafeLoader)
     check_dataset(data)  # check
     nc = 1 if single_cls else int(data['nc'])  # number of classes
-    iouv = torch.linspace(0.5, 0.95, 10).to(device)  # iou vector for mAP@0.5:0.95
+    iouv = torch.linspace(0.5, 0.95,
+                          10).to(device)  # iou vector for mAP@0.5:0.95
     niou = iouv.numel()
 
     if not training:
         # Directories
-        save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))  # increment run
-        (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
+        save_dir = Path(
+            increment_path(Path(opt.project) / opt.name,
+                           exist_ok=opt.exist_ok))  # increment run
+        (save_dir / 'labels' if save_txt else save_dir).mkdir(
+            parents=True, exist_ok=True)  # make dir
 
         # Load model
         model = Model(cfg, ch=3, nc=nc)  # create
         state_dict = torch.load(weight, map_location='cpu')['model']
-        state_dict = intersect_dicts(state_dict, model.state_dict())  # intersect
+        state_dict = intersect_dicts(state_dict,
+                                     model.state_dict())  # intersect
         model.load_state_dict(state_dict, strict=False)  # load
         model.to(device)
-        logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weight))  # report
+        logger.info(
+            'Transferred %g/%g items from %s' %
+            (len(state_dict), len(model.state_dict()), weight))  # report
         for p in model.parameters():
             p.requires_grad = False
         model.float().fuse().eval()
         # Compatibility updates
         for m in model.modules():
-            if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
+            if type(m) in [
+                    nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU
+            ]:
                 m.inplace = True  # pytorch 1.7.0 compatibility
             elif type(m) is nn.Upsample:
                 m.recompute_scale_factor = None  # torch 1.11.0 compatibility
@@ -107,23 +119,38 @@ def test(data,
     # Dataloader
     if not training:
         if device.type != 'cpu':
-            model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
-        task = opt.task if opt.task in ('train', 'val', 'test') else 'val'  # path to train/val/test images
-        dataloader = create_dataloader(data[task], imgsz, batch_size, gs, opt, pad=0.5, rect=True,
+            model(
+                torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
+                    next(model.parameters())))  # run once
+        task = opt.task if opt.task in (
+            'train', 'val', 'test') else 'val'  # path to train/val/test images
+        dataloader = create_dataloader(data[task],
+                                       imgsz,
+                                       batch_size,
+                                       gs,
+                                       opt,
+                                       pad=0.5,
+                                       rect=True,
                                        prefix=colorstr(f'{task}: '))[0]
 
     if v5_metric:
-        print("Testing with YOLOv5 AP metric...")
+        print('Testing with YOLOv5 AP metric...')
 
     seen = 0
     confusion_matrix = ConfusionMatrix(nc=nc)
-    names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)}
+    names = {
+        k: v
+        for k, v in enumerate(
+            model.names if hasattr(model, 'names') else model.module.names)
+    }
     coco91class = coco80_to_coco91_class()
-    s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Labels', 'P', 'R', 'mAP@.5', 'mAP@.5:.95')
+    s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Labels', 'P', 'R',
+                                 'mAP@.5', 'mAP@.5:.95')
     p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0.
     loss = torch.zeros(3, device=device)
     jdict, stats, ap, ap_class, wandb_images = [], [], [], [], []
-    for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
+    for batch_i, (img, targets, paths,
+                  shapes) in enumerate(tqdm(dataloader, desc=s)):
         img = img.to(device, non_blocking=True)
         img = img.half() if half else img.float()  # uint8 to fp16/32
         img /= 255.0  # 0 - 255 to 0.0 - 1.0
@@ -133,18 +160,26 @@ def test(data,
         with torch.no_grad():
             # Run model
             t = time_synchronized()
-            out, train_out = model(img, augment=augment)  # inference and training outputs
+            out, train_out = model(
+                img, augment=augment)  # inference and training outputs
             t0 += time_synchronized() - t
 
             # Compute loss
             if compute_loss:
-                loss += compute_loss([x.float() for x in train_out], targets)[1][:3]  # box, obj, cls
+                loss += compute_loss([x.float() for x in train_out],
+                                     targets)[1][:3]  # box, obj, cls
 
             # Run NMS
-            targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device)  # to pixels
-            lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else []  # for autolabelling
+            targets[:, 2:] *= torch.Tensor([width, height, width,
+                                            height]).to(device)  # to pixels
+            lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)
+                  ] if save_hybrid else []  # for autolabelling
             t = time_synchronized()
-            out = non_max_suppression(out, conf_thres=conf_thres, iou_thres=iou_thres, labels=lb, multi_label=True)
+            out = non_max_suppression(out,
+                                      conf_thres=conf_thres,
+                                      iou_thres=iou_thres,
+                                      labels=lb,
+                                      multi_label=True)
             t1 += time_synchronized() - t
 
         # Statistics per image
@@ -157,67 +192,109 @@ def test(data,
 
             if len(pred) == 0:
                 if nl:
-                    stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls))
+                    stats.append((torch.zeros(0, niou, dtype=torch.bool),
+                                  torch.Tensor(), torch.Tensor(), tcls))
                 continue
 
             # Predictions
             predn = pred.clone()
-            scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1])  # native-space pred
+            scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0],
+                         shapes[si][1])  # native-space pred
 
             # Append to text file
             if save_txt:
-                gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]]  # normalization gain whwh
+                gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0
+                                                  ]]  # normalization gain whwh
                 for *xyxy, conf, cls in predn.tolist():
-                    xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
-                    line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
-                    with open(save_dir / 'labels' / (path.stem + '.txt'), 'a') as f:
+                    xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
+                            gn).view(-1).tolist()  # normalized xywh
+                    line = (cls, *xywh,
+                            conf) if save_conf else (cls,
+                                                     *xywh)  # label format
+                    with open(save_dir / 'labels' / (path.stem + '.txt'),
+                              'a') as f:
                         f.write(('%g ' * len(line)).rstrip() % line + '\n')
 
             # W&B logging - Media Panel Plots
-            if len(wandb_images) < log_imgs and wandb_logger.current_epoch > 0:  # Check for test operation
+            if len(
+                    wandb_images
+            ) < log_imgs and wandb_logger.current_epoch > 0:  # Check for test operation
                 if wandb_logger.current_epoch % wandb_logger.bbox_interval == 0:
-                    box_data = [{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},
-                                 "class_id": int(cls),
-                                 "box_caption": "%s %.3f" % (names[cls], conf),
-                                 "scores": {"class_score": conf},
-                                 "domain": "pixel"} for *xyxy, conf, cls in pred.tolist()]
-                    boxes = {"predictions": {"box_data": box_data, "class_labels": names}}  # inference-space
-                    wandb_images.append(wandb_logger.wandb.Image(img[si], boxes=boxes, caption=path.name))
-            wandb_logger.log_training_progress(predn, path, names) if wandb_logger and wandb_logger.wandb_run else None
+                    box_data = [{
+                        'position': {
+                            'minX': xyxy[0],
+                            'minY': xyxy[1],
+                            'maxX': xyxy[2],
+                            'maxY': xyxy[3]
+                        },
+                        'class_id': int(cls),
+                        'box_caption': f'{names[cls]} {conf:.3f}',
+                        'scores': {
+                            'class_score': conf
+                        },
+                        'domain': 'pixel'
+                    } for *xyxy, conf, cls in pred.tolist()]
+                    boxes = {
+                        'predictions': {
+                            'box_data': box_data,
+                            'class_labels': names
+                        }
+                    }  # inference-space
+                    wandb_images.append(
+                        wandb_logger.wandb.Image(img[si],
+                                                 boxes=boxes,
+                                                 caption=path.name))
+            wandb_logger.log_training_progress(
+                predn, path,
+                names) if wandb_logger and wandb_logger.wandb_run else None
 
             # Append to pycocotools JSON dictionary
             if save_json:
                 # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
-                image_id = int(path.stem) if path.stem.isnumeric() else path.stem
+                image_id = int(
+                    path.stem) if path.stem.isnumeric() else path.stem
                 box = xyxy2xywh(predn[:, :4])  # xywh
                 box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                 for p, b in zip(pred.tolist(), box.tolist()):
-                    jdict.append({'image_id': image_id,
-                                  'category_id': coco91class[int(p[5])] if is_coco else int(p[5]),
-                                  'bbox': [round(x, 3) for x in b],
-                                  'score': round(p[4], 5)})
+                    jdict.append({
+                        'image_id':
+                        image_id,
+                        'category_id':
+                        coco91class[int(p[5])] if is_coco else int(p[5]),
+                        'bbox': [round(x, 3) for x in b],
+                        'score':
+                        round(p[4], 5)
+                    })
 
             # Assign all predictions as incorrect
-            correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device)
+            correct = torch.zeros(pred.shape[0],
+                                  niou,
+                                  dtype=torch.bool,
+                                  device=device)
             if nl:
                 detected = []  # target indices
                 tcls_tensor = labels[:, 0]
 
                 # target boxes
                 tbox = xywh2xyxy(labels[:, 1:5])
-                scale_coords(img[si].shape[1:], tbox, shapes[si][0], shapes[si][1])  # native-space labels
+                scale_coords(img[si].shape[1:], tbox, shapes[si][0],
+                             shapes[si][1])  # native-space labels
                 if plots:
-                    confusion_matrix.process_batch(predn, torch.cat((labels[:, 0:1], tbox), 1))
+                    confusion_matrix.process_batch(
+                        predn, torch.cat((labels[:, 0:1], tbox), 1))
 
                 # Per target class
                 for cls in torch.unique(tcls_tensor):
-                    ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1)  # prediction indices
-                    pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1)  # target indices
+                    ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(
+                        -1)  # prediction indices
+                    pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(
+                        -1)  # target indices
 
                     # Search for detections
                     if pi.shape[0]:
                         # Prediction to target ious
-                        ious, i = box_iou(predn[pi, :4], tbox[ti]).max(1)  # best ious, indices
+                        ious, i = box_iou(predn[pi, :4], tbox[ti]).max(
+                            1)  # best ious, indices
 
                         # Append detections
                         detected_set = set()
@@ -226,27 +303,40 @@ def test(data,
                             if d.item() not in detected_set:
                                 detected_set.add(d.item())
                                 detected.append(d)
-                                correct[pi[j]] = ious[j] > iouv  # iou_thres is 1xn
-                                if len(detected) == nl:  # all targets already located in image
+                                correct[
+                                    pi[j]] = ious[j] > iouv  # iou_thres is 1xn
+                                if len(
+                                        detected
+                                ) == nl:  # all targets already located in image
                                     break
 
             # Append statistics (correct, conf, pcls, tcls)
-            stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))
+            stats.append(
+                (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))
 
         # Plot images
         if plots and batch_i < 3:
             f = save_dir / f'test_batch{batch_i}_labels.jpg'  # labels
-            Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start()
+            Thread(target=plot_images,
+                   args=(img, targets, paths, f, names),
+                   daemon=True).start()
             f = save_dir / f'test_batch{batch_i}_pred.jpg'  # predictions
-            Thread(target=plot_images, args=(img, output_to_target(out), paths, f, names), daemon=True).start()
+            Thread(target=plot_images,
+                   args=(img, output_to_target(out), paths, f, names),
+                   daemon=True).start()
 
     # Compute statistics
     stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
     if len(stats) and stats[0].any():
-        p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, v5_metric=v5_metric, save_dir=save_dir, names=names)
+        p, r, ap, f1, ap_class = ap_per_class(*stats,
+                                              plot=plots,
+                                              v5_metric=v5_metric,
+                                              save_dir=save_dir,
+                                              names=names)
         ap50, ap = ap[:, 0], ap.mean(1)  # AP@0.5, AP@0.5:0.95
         mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
-        nt = np.bincount(stats[3].astype(np.int64), minlength=nc)  # number of targets per class
+        nt = np.bincount(stats[3].astype(np.int64),
+                         minlength=nc)  # number of targets per class
     else:
         nt = torch.zeros(1)
 
@@ -260,24 +350,30 @@ def test(data,
             print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))
 
     # Print speeds
-    t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size)  # tuple
+    t = tuple(x / seen * 1E3
+              for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size)  # tuple
     if not training:
-        print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t)
+        print(
+            'Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g'
+            % t)
 
     # Plots
     if plots:
         confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
         if wandb_logger and wandb_logger.wandb:
-            val_batches = [wandb_logger.wandb.Image(str(f), caption=f.name) for f in sorted(save_dir.glob('test*.jpg'))]
-            wandb_logger.log({"Validation": val_batches})
+            val_batches = [
+                wandb_logger.wandb.Image(str(f), caption=f.name)
+                for f in sorted(save_dir.glob('test*.jpg'))
+            ]
+            wandb_logger.log({'Validation': val_batches})
     if wandb_images:
-        wandb_logger.log({"Bounding Box Debugger/Images": wandb_images})
+        wandb_logger.log({'Bounding Box Debugger/Images': wandb_images})
 
     # Save JSON
     if save_json and len(jdict):
         w = Path(weight).stem if weight is not None else ''  # weight
         anno_json = './coco/annotations/instances_val2017.json'  # annotations json
-        pred_json = str(save_dir / f"{w}_predictions.json")  # predictions json
+        pred_json = str(save_dir / f'{w}_predictions.json')  # predictions json
         print('\nEvaluating pycocotools mAP... saving %s...' % pred_json)
         with open(pred_json, 'w') as f:
             json.dump(jdict, f)
@@ -290,17 +386,26 @@ def test(data,
             pred = anno.loadRes(pred_json)  # init predictions api
             eval = COCOeval(anno, pred, 'bbox')
             if is_coco:
-                eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files]  # image IDs to evaluate
+                eval.params.imgIds = [
+                    int(Path(x).stem) for x in dataloader.dataset.img_files
+                ]  # image IDs to evaluate
             eval.evaluate()
             eval.accumulate()
             eval.summarize()
-            map, map50 = eval.stats[:2]  # update results (mAP@0.5:0.95, mAP@0.5)
+            map, map50 = eval.stats[:
+                                    2]  # update results (mAP@0.5:0.95, mAP@0.5)
             if save_results:
-                results_txt = str(save_dir / f"{w}_results.txt")
+                results_txt = str(save_dir / f'{w}_results.txt')
                 with open(results_txt, 'a') as f:
-                    f.write(f'map: {round(map, 3)}, map50: {round(map50, 3)}, dy_thres: {dy_thres}\n')
-                    f.write('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g\n' % t)
-                    f.write('***********************************************************************************\n')
+                    f.write(
+                        f'map: {round(map, 3)}, map50: {round(map50, 3)}, dy_thres: {dy_thres}\n'
+                    )
+                    f.write(
+                        'Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g\n'
+                        % t)
+                    f.write(
+                        '***********************************************************************************\n'
+                    )
         except Exception as e:
             print(f'pycocotools unable to run: {e}')
 
@@ -308,37 +413,85 @@ def test(data,
     model.float()  # for training
     if not training:
         s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
-        print(f"Results saved to {save_dir}{s}")
+        print(f'Results saved to {save_dir}{s}')
     maps = np.zeros(nc) + map
     for i, c in enumerate(ap_class):
         maps[c] = ap[i]
-    return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
+    return (mp, mr, map50, map,
+            *(loss.cpu() / len(dataloader)).tolist()), maps, t
 
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(prog='test.py')
     parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
-    parser.add_argument('--weight', type=str, default='', help='model.pt path(s)')
-    parser.add_argument('--data', type=str, default='data/coco.yaml', help='*.data path')
-    parser.add_argument('--batch-size', type=int, default=1, help='size of each image batch')
-    parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
-    parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold')
-    parser.add_argument('--iou-thres', type=float, default=0.65, help='IOU threshold for NMS')
-    parser.add_argument('--task', default='val', help='train, val, test, speed or study')
-    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
-    parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset')
-    parser.add_argument('--augment', action='store_true', help='augmented inference')
-    parser.add_argument('--verbose', action='store_true', help='report mAP by class')
-    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
-    parser.add_argument('--save-hybrid', action='store_true', help='save label+prediction hybrid results to *.txt')
-    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
-    parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')
-    parser.add_argument('--project', default='runs/test', help='save to project/name')
+    parser.add_argument('--weight',
+                        type=str,
+                        default='',
+                        help='model.pt path(s)')
+    parser.add_argument('--data',
+                        type=str,
+                        default='data/coco.yaml',
+                        help='*.data path')
+    parser.add_argument('--batch-size',
+                        type=int,
+                        default=1,
+                        help='size of each image batch')
+    parser.add_argument('--img-size',
+                        type=int,
+                        default=640,
+                        help='inference size (pixels)')
+    parser.add_argument('--conf-thres',
+                        type=float,
+                        default=0.001,
+                        help='object confidence threshold')
+    parser.add_argument('--iou-thres',
+                        type=float,
+                        default=0.65,
+                        help='IOU threshold for NMS')
+    parser.add_argument('--task',
+                        default='val',
+                        help='train, val, test, speed or study')
+    parser.add_argument('--device',
+                        default='',
+                        help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--single-cls',
+                        action='store_true',
+                        help='treat as single-class dataset')
+    parser.add_argument('--augment',
+                        action='store_true',
+                        help='augmented inference')
+    parser.add_argument('--verbose',
+                        action='store_true',
+                        help='report mAP by class')
+    parser.add_argument('--save-txt',
+                        action='store_true',
+                        help='save results to *.txt')
+    parser.add_argument('--save-hybrid',
+                        action='store_true',
+                        help='save label+prediction hybrid results to *.txt')
+    parser.add_argument('--save-conf',
+                        action='store_true',
+                        help='save confidences in --save-txt labels')
+    parser.add_argument('--save-json',
+                        action='store_true',
+                        help='save a cocoapi-compatible JSON results file')
+    parser.add_argument('--project',
+                        default='runs/test',
+                        help='save to project/name')
     parser.add_argument('--name', default='exp', help='save to project/name')
-    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
-    parser.add_argument('--v5-metric', action='store_true', help='assume maximum recall as 1.0 in AP calculation')
-    parser.add_argument('--dy-thres', type=float, default=0.5, help='dynamic threshold')
-    parser.add_argument('--save-results', action='store_true', help='save results')
+    parser.add_argument('--exist-ok',
+                        action='store_true',
+                        help='existing project/name ok, do not increment')
+    parser.add_argument('--v5-metric',
+                        action='store_true',
+                        help='assume maximum recall as 1.0 in AP calculation')
+    parser.add_argument('--dy-thres',
+                        type=float,
+                        default=0.5,
+                        help='dynamic threshold')
+    parser.add_argument('--save-results',
+                        action='store_true',
+                        help='save results')
     opt = parser.parse_args()
     opt.save_json |= opt.data.endswith('coco.yaml')
     opt.data = check_file(opt.data)  # check file
@@ -361,8 +514,7 @@ def test(data,
              save_conf=opt.save_conf,
              v5_metric=opt.v5_metric,
              dy_thres=opt.dy_thres,
-             save_results=opt.save_results
-             )
+             save_results=opt.save_results)
 
     else:
-        raise NotImplementedError
\ No newline at end of file
+        raise NotImplementedError
diff --git a/train_step1.py b/train_step1.py
index 486181b..4b1249d 100644
--- a/train_step1.py
+++ b/train_step1.py
@@ -3,12 +3,11 @@
 import math
 import os
 import random
+import test  # import test.py to get mAP after each epoch
 import time
 from copy import deepcopy
 from pathlib import Path
 from threading import Thread
-import yaml
-from tqdm import tqdm
 
 import numpy as np
 import torch
@@ -17,28 +16,33 @@
 import torch.nn.functional as F
 import torch.optim as optim
 import torch.optim.lr_scheduler as lr_scheduler
+import yaml
 from torch.cuda import amp
 from torch.nn.parallel import DistributedDataParallel as DDP
 from torch.utils.tensorboard import SummaryWriter
+from tqdm import tqdm
 
-import test  # import test.py to get mAP after each epoch
 from models.yolo import Model
 from utils.autoanchor import check_anchors
+from utils.checkpoint import get_state_dict
 from utils.datasets import create_dataloader
-from utils.general import labels_to_class_weights, increment_path, labels_to_image_weights, init_seeds, \
-    fitness, strip_optimizer, get_latest_run, check_dataset, check_file, check_git_status, check_img_size, \
-    set_logging, one_cycle, colorstr
+from utils.general import (check_dataset, check_file, check_img_size, colorstr,
+                           fitness, get_latest_run, increment_path, init_seeds,
+                           labels_to_class_weights, labels_to_image_weights,
+                           one_cycle, set_logging, strip_optimizer)
 from utils.loss import ComputeLoss, ComputeLossOTA, ComputeLossOTADual
-from utils.plots import plot_images, plot_results, plot_lr_scheduler
-from utils.torch_utils import ModelEMA, select_device, intersect_dicts, torch_distributed_zero_first, is_parallel
+from utils.plots import plot_images, plot_lr_scheduler, plot_results
+from utils.torch_utils import (ModelEMA, intersect_dicts, is_parallel,
+                               select_device, torch_distributed_zero_first)
 from utils.wandb_logging.wandb_utils import WandbLogger, check_wandb_resume
-from utils.checkpoint import get_state_dict
-
 
 logger = logging.getLogger(__name__)
 
+
 def train(hyp, opt, device, tb_writer=None):
-    logger.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items()))
+    logger.info(
+        colorstr('hyperparameters: ') + ', '.join(f'{k}={v}'
+                                                  for k, v in hyp.items()))
     save_dir, epochs, batch_size, total_batch_size, weight, rank, freeze = \
         Path(opt.save_dir), opt.epochs, opt.batch_size, opt.total_batch_size, opt.weight, opt.global_rank, opt.freeze
 
@@ -72,16 +76,23 @@ def train(hyp, opt, device, tb_writer=None):
     loggers = {'wandb': None}  # loggers dict
     if rank in [-1, 0]:
         opt.hyp = hyp  # add hyperparameters
-        run_id = torch.load(weight, map_location='cpu')['wandb_id'] if weight.endswith('.pt') and os.path.isfile(weight) else None
-        wandb_logger = WandbLogger(opt, Path(opt.save_dir).stem, run_id, data_dict)
+        run_id = torch.load(weight,
+                            map_location='cpu')['wandb_id'] if weight.endswith(
+                                '.pt') and os.path.isfile(weight) else None
+        wandb_logger = WandbLogger(opt,
+                                   Path(opt.save_dir).stem, run_id, data_dict)
         loggers['wandb'] = wandb_logger.wandb
         data_dict = wandb_logger.data_dict
         if wandb_logger.wandb:
-            weight, epochs, hyp = opt.weight, opt.epochs, opt.hyp  # WandbLogger might update weights, epochs if resuming
+            # WandbLogger might update weights, epochs if resuming
+            weight, epochs, hyp = opt.weight, opt.epochs, opt.hyp
 
     nc = 1 if opt.single_cls else int(data_dict['nc'])  # number of classes
-    names = ['item'] if opt.single_cls and len(data_dict['names']) != 1 else data_dict['names']  # class names
-    assert len(names) == nc, '%g names found for nc=%g dataset in %s' % (len(names), nc, opt.data)  # check
+    names = ['item'] if opt.single_cls and len(
+        data_dict['names']) != 1 else data_dict['names']  # class names
+    assert len(
+        names) == nc, '{:g} names found for nc={:g} dataset in {}'.format(
+            len(names), nc, opt.data)  # check
 
     # Model
     pretrained = weight.endswith('.pt')
@@ -89,11 +100,18 @@ def train(hyp, opt, device, tb_writer=None):
         ckpt = torch.load(weight, map_location='cpu')  # load checkpoint
         state_dict = ckpt['model']
         model = Model(opt.cfg, ch=3, nc=nc)  # create
-        exclude = ['anchor'] if (opt.cfg or hyp.get('anchors')) and not opt.resume else []  # exclude keys
-        state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude)  # intersect
+        exclude = [
+            'anchor'
+        ] if (opt.cfg or hyp.get('anchors')) and not opt.resume else [
+        ]  # exclude keys
+        state_dict = intersect_dicts(state_dict,
+                                     model.state_dict(),
+                                     exclude=exclude)  # intersect
         model.load_state_dict(state_dict, strict=False)  # load
         model.to(device)
-        logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weight))  # report
+        logger.info(
+            'Transferred %g/%g items from %s' %
+            (len(state_dict), len(model.state_dict()), weight))  # report
     else:
         model = Model(opt.cfg, ch=3, nc=nc).to(device)  # create
     with torch_distributed_zero_first(rank):
@@ -102,7 +120,10 @@ def train(hyp, opt, device, tb_writer=None):
     test_path = data_dict['val']
 
     # Freeze
-    freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))]  # parameter names to freeze (full or partial)
+    freeze = [
+        f'model.{x}.'
+        for x in (freeze if len(freeze) > 1 else range(freeze[0]))
+    ]  # parameter names to freeze (full or partial)
     for k, v in model.named_parameters():
         v.requires_grad = True  # train all layers
         if any(x in k for x in freeze):
@@ -111,7 +132,8 @@ def train(hyp, opt, device, tb_writer=None):
 
     # Optimizer
     nbs = 64  # nominal batch size
-    accumulate = max(round(nbs / total_batch_size), 1)  # accumulate loss before optimizing
+    accumulate = max(round(nbs / total_batch_size),
+                     1)  # accumulate loss before optimizing
     hyp['weight_decay'] *= total_batch_size * accumulate / nbs  # scale weight_decay
     logger.info(f"Scaled weight_decay = {hyp['weight_decay']}")
 
@@ -181,19 +203,33 @@ def train(hyp, opt, device, tb_writer=None):
                 pg0.append(v.rbr_dense.vector)
 
     if opt.adam:
-        optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum
+        optimizer = optim.Adam(pg0,
+                               lr=hyp['lr0'],
+                               betas=(hyp['momentum'],
+                                      0.999))  # adjust beta1 to momentum
     else:
-        optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
-
-    optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']})  # add pg1 with weight_decay
+        optimizer = optim.SGD(pg0,
+                              lr=hyp['lr0'],
+                              momentum=hyp['momentum'],
+                              nesterov=True)
+
+    optimizer.add_param_group({
+        'params': pg1,
+        'weight_decay': hyp['weight_decay']
+    })  # add pg1 with weight_decay
     optimizer.add_param_group({'params': pg2})  # add pg2 (biases)
-    logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0)))
+    logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' %
+                (len(pg2), len(pg1), len(pg0)))
     del pg0, pg1, pg2
 
     # Scheduler https://arxiv.org/pdf/1812.01187.pdf
     # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR
     if opt.linear_lr:
-        lf = lambda x: (1 - x / (epochs - 1)) * (1.0 - hyp['lrf']) + hyp['lrf']  # linear
+
+        def get_linear_lr(x):
+            return (1 - x / (epochs - 1)) * (1.0 - hyp['lrf']) + hyp['lrf']
+
+        lf = get_linear_lr
     else:
         lf = one_cycle(1, hyp['lrf'], epochs)  # cosine 1->hyp['lrf']
     scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
@@ -217,23 +253,28 @@ def train(hyp, opt, device, tb_writer=None):
 
         # Results
         if ckpt.get('training_results') is not None:
-            results_file.write_text(ckpt['training_results'])  # write results.txt
+            results_file.write_text(
+                ckpt['training_results'])  # write results.txt
 
         # Epochs
         start_epoch = ckpt['epoch'] + 1
         if opt.resume:
-            assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % (weight, epochs)
+            assert start_epoch > 0, '{} training to {:g} epochs is finished, nothing to resume.'.format(
+                weight, epochs)
         if epochs < start_epoch:
-            logger.info('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' %
-                        (weight, ckpt['epoch'], epochs))
+            logger.info(
+                '%s has been trained for %g epochs. Fine-tuning for %g additional epochs.'
+                % (weight, ckpt['epoch'], epochs))
             epochs += ckpt['epoch']  # finetune additional epochs
 
         del ckpt
 
     # Image sizes
     gs = max(int(model.stride.max()), 32)  # grid size (max stride)
-    nl = model.model_h2[-1].nl  # number of detection layers (used for scaling hyp['obj'])
-    imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size]  # verify imgsz are gs-multiples
+    nl = model.model_h2[
+        -1].nl  # number of detection layers (used for scaling hyp['obj'])
+    imgsz, imgsz_test = (check_img_size(x, gs) for x in opt.img_size
+                         )  # verify imgsz are gs-multiples
 
     # DP mode
     if cuda and rank == -1 and torch.cuda.device_count() > 1:
@@ -245,20 +286,42 @@ def train(hyp, opt, device, tb_writer=None):
         logger.info('Using SyncBatchNorm()')
 
     # Trainloader
-    dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt,
-                                            hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank,
-                                            world_size=opt.world_size, workers=opt.workers,
-                                            image_weights=opt.image_weights, quad=opt.quad, prefix=colorstr('train: '))
+    dataloader, dataset = create_dataloader(train_path,
+                                            imgsz,
+                                            batch_size,
+                                            gs,
+                                            opt,
+                                            hyp=hyp,
+                                            augment=True,
+                                            cache=opt.cache_images,
+                                            rect=opt.rect,
+                                            rank=rank,
+                                            world_size=opt.world_size,
+                                            workers=opt.workers,
+                                            image_weights=opt.image_weights,
+                                            quad=opt.quad,
+                                            prefix=colorstr('train: '))
     mlc = np.concatenate(dataset.labels, 0)[:, 0].max()  # max label class
     nb = len(dataloader)  # number of batches
-    assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % (mlc, nc, opt.data, nc - 1)
+    assert mlc < nc, 'Label class {:g} exceeds nc={:g} in {}. Possible class labels are 0-{:g}'.format(
+        mlc, nc, opt.data, nc - 1)
 
     # Process 0
     if rank in [-1, 0]:
-        testloader = create_dataloader(test_path, imgsz_test, batch_size * 2, gs, opt,  # testloader
-                                       hyp=hyp, cache=opt.cache_images and not opt.notest, rect=True, rank=-1,
-                                       world_size=opt.world_size, workers=opt.workers,
-                                       pad=0.5, prefix=colorstr('val: '))[0]
+        testloader = create_dataloader(
+            test_path,
+            imgsz_test,
+            batch_size * 2,
+            gs,
+            opt,  # testloader
+            hyp=hyp,
+            cache=opt.cache_images and not opt.notest,
+            rect=True,
+            rank=-1,
+            world_size=opt.world_size,
+            workers=opt.workers,
+            pad=0.5,
+            prefix=colorstr('val: '))[0]
 
         if not opt.resume:
             labels = np.concatenate(dataset.labels, 0)
@@ -271,32 +334,44 @@ def train(hyp, opt, device, tb_writer=None):
 
             # Anchors
             if not opt.noautoanchor:
-                check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)
+                check_anchors(dataset,
+                              model=model,
+                              thr=hyp['anchor_t'],
+                              imgsz=imgsz)
             model.half().float()  # pre-reduce anchor precision
 
     # DDP mode
     if cuda and rank != -1:
-        model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank,
-                    # nn.MultiheadAttention incompatibility with DDP https://github.com/pytorch/pytorch/issues/26698
-                    find_unused_parameters=any(isinstance(layer, nn.MultiheadAttention) for layer in model.modules()))
+        model = DDP(
+            model,
+            device_ids=[opt.local_rank],
+            output_device=opt.local_rank,
+            # nn.MultiheadAttention incompatibility with DDP https://github.com/pytorch/pytorch/issues/26698
+            find_unused_parameters=any(
+                isinstance(layer, nn.MultiheadAttention)
+                for layer in model.modules()))
 
     # Model parameters
     hyp['box'] *= 3. / nl  # scale to layers
     hyp['cls'] *= nc / 80. * 3. / nl  # scale to classes and layers
-    hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl  # scale to image size and layers
+    hyp['obj'] *= (imgsz / 640)**2 * 3. / nl  # scale to image size and layers
     hyp['label_smoothing'] = opt.label_smoothing
     model.nc = nc  # attach number of classes to model
     model.hyp = hyp  # attach hyperparameters to model
     model.gr = 1.0  # iou loss ratio (obj_loss = 1.0 or iou)
-    model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc  # attach class weights
+    model.class_weights = labels_to_class_weights(
+        dataset.labels, nc).to(device) * nc  # attach class weights
     model.names = names
 
     # Start training
     t0 = time.time()
-    nw = max(round(hyp['warmup_epochs'] * nb), 1000)  # number of warmup iterations, max(3 epochs, 1k iterations)
-    # nw = min(nw, (epochs - start_epoch) / 2 * nb)  # limit warmup to < 1/2 of training
+    nw = max(
+        round(hyp['warmup_epochs'] * nb),
+        1000)  # number of warm up iterations, max(3 epochs, 1k iterations)
+    # nw = min(nw, (epochs - start_epoch) / 2 * nb)  # limit warm up to < 1/2 of training
     maps = np.zeros(nc)  # mAP per class
-    results = (0, 0, 0, 0, 0, 0, 0)  # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
+    results = (0, 0, 0, 0, 0, 0, 0
+               )  # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
     scheduler.last_epoch = start_epoch - 1  # do not move
     scaler = amp.GradScaler(enabled=cuda)
     if opt.single_backbone:
@@ -308,19 +383,27 @@ def train(hyp, opt, device, tb_writer=None):
                 f'Using {dataloader.num_workers} dataloader workers\n'
                 f'Logging results to {save_dir}\n'
                 f'Starting training for {epochs} epochs...')
-    for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
+    for epoch in range(
+            start_epoch, epochs
+    ):  # epoch ------------------------------------------------------------------
         model.train()
 
         # Update image weights (optional)
         if opt.image_weights:
             # Generate indices
             if rank in [-1, 0]:
-                cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc  # class weights
-                iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw)  # image weights
-                dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n)  # rand weighted idx
+                cw = model.class_weights.cpu().numpy() * (
+                    1 - maps)**2 / nc  # class weights
+                iw = labels_to_image_weights(dataset.labels,
+                                             nc=nc,
+                                             class_weights=cw)  # image weights
+                dataset.indices = random.choices(
+                    range(dataset.n), weights=iw,
+                    k=dataset.n)  # rand weighted idx
             # Broadcast if DDP
             if rank != -1:
-                indices = (torch.tensor(dataset.indices) if rank == 0 else torch.zeros(dataset.n)).int()
+                indices = (torch.tensor(dataset.indices)
+                           if rank == 0 else torch.zeros(dataset.n)).int()
                 dist.broadcast(indices, 0)
                 if rank != 0:
                     dataset.indices = indices.cpu().numpy()
@@ -333,37 +416,55 @@ def train(hyp, opt, device, tb_writer=None):
         if rank != -1:
             dataloader.sampler.set_epoch(epoch)
         pbar = enumerate(dataloader)
-        logger.info(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'total', 'labels', 'img_size'))
+        logger.info(
+            ('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls',
+                                   'total', 'labels', 'img_size'))
         if rank in [-1, 0]:
             pbar = tqdm(pbar, total=nb)  # progress bar
         optimizer.zero_grad()
-        for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------
+        for i, (
+                imgs, targets, paths, _
+        ) in pbar:  # batch -------------------------------------------------------------
             ni = i + nb * epoch  # number integrated batches (since train start)
-            imgs = imgs.to(device, non_blocking=True).float() / 255.0  # uint8 to float32, 0-255 to 0.0-1.0
+            imgs = imgs.to(device, non_blocking=True).float(
+            ) / 255.0  # uint8 to float32, 0-255 to 0.0-1.0
 
-            # Warmup
+            # Warm up
             if ni <= nw:
                 xi = [0, nw]  # x interp
                 # model.gr = np.interp(ni, xi, [0.0, 1.0])  # iou loss ratio (obj_loss = 1.0 or iou)
-                accumulate = max(1, np.interp(ni, xi, [1, nbs / total_batch_size]).round())
+                accumulate = max(
+                    1,
+                    np.interp(ni, xi, [1, nbs / total_batch_size]).round())
                 for j, x in enumerate(optimizer.param_groups):
                     # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
-                    x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
+                    x['lr'] = np.interp(ni, xi, [
+                        hyp['warmup_bias_lr'] if j == 2 else 0.0,
+                        x['initial_lr'] * lf(epoch)
+                    ])
                     if 'momentum' in x:
-                        x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']])
+                        x['momentum'] = np.interp(
+                            ni, xi, [hyp['warmup_momentum'], hyp['momentum']])
 
             # Multi-scale
             if opt.multi_scale:
-                sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs  # size
+                sz = random.randrange(imgsz * 0.5,
+                                      imgsz * 1.5 + gs) // gs * gs  # size
                 sf = sz / max(imgs.shape[2:])  # scale factor
                 if sf != 1:
-                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]  # new shape (stretched to gs-multiple)
-                    imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
+                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]
+                          ]  # new shape (stretched to gs-multiple)
+                    imgs = F.interpolate(imgs,
+                                         size=ns,
+                                         mode='bilinear',
+                                         align_corners=False)
 
             # Forward
             with amp.autocast(enabled=cuda):
                 pred = model(imgs)  # forward
-                loss, loss_items = compute_loss_ota(pred, targets.to(device), imgs)  # loss scaled by batch_size
+                loss, loss_items = compute_loss_ota(
+                    pred, targets.to(device),
+                    imgs)  # loss scaled by batch_size
                 if rank != -1:
                     loss *= opt.world_size  # gradient averaged between devices in DDP mode
                 if opt.quad:
@@ -382,25 +483,34 @@ def train(hyp, opt, device, tb_writer=None):
 
             # Print
             if rank in [-1, 0]:
-                mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
-                mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0)  # (GB)
-                s = ('%10s' * 2 + '%10.4g' * 6) % (
-                    '%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1])
+                mloss = (mloss * i + loss_items) / (i + 1
+                                                    )  # update mean losses
+                mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9
+                                 if torch.cuda.is_available() else 0)  # (GB)
+                s = ('%10s' * 2 + '%10.4g' * 6) % ('{:g}/{:g}'.format(
+                    epoch,
+                    epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1])
                 pbar.set_description(s)
 
                 # Plot
                 if plots and ni < 10:
                     f = save_dir / f'train_batch{ni}.jpg'  # filename
-                    Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start()
+                    Thread(target=plot_images,
+                           args=(imgs, targets, paths, f),
+                           daemon=True).start()
                     # if tb_writer:
                     #     tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch)
                     #     tb_writer.add_graph(torch.jit.trace(model, imgs, strict=False), [])  # add model graph
                 elif plots and ni == 10 and wandb_logger.wandb:
-                    wandb_logger.log({"Mosaics": [wandb_logger.wandb.Image(str(x), caption=x.name) for x in
-                                                  save_dir.glob('train*.jpg') if x.exists()]})
+                    wandb_logger.log({
+                        'Mosaics': [
+                            wandb_logger.wandb.Image(str(x), caption=x.name)
+                            for x in save_dir.glob('train*.jpg') if x.exists()
+                        ]
+                    })
 
-            # end batch ------------------------------------------------------------------------------------------------
-        # end epoch ----------------------------------------------------------------------------------------------------
+            # end batch ----------------------------------------------------------------------------------------------
+        # end epoch --------------------------------------------------------------------------------------------------
 
         # Scheduler
         lr = [x['lr'] for x in optimizer.param_groups]  # for tensorboard
@@ -409,7 +519,11 @@ def train(hyp, opt, device, tb_writer=None):
         # DDP process 0 or single-GPU
         if rank in [-1, 0]:
             # mAP
-            ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride', 'class_weights'])
+            ema.update_attr(model,
+                            include=[
+                                'yaml', 'nc', 'hyp', 'gr', 'names', 'stride',
+                                'class_weights'
+                            ])
             final_epoch = epoch + 1 == epochs
             if not opt.notest or final_epoch:  # Calculate mAP
                 wandb_logger.current_epoch = epoch + 1
@@ -420,7 +534,8 @@ def train(hyp, opt, device, tb_writer=None):
                                                  single_cls=opt.single_cls,
                                                  dataloader=testloader,
                                                  save_dir=save_dir,
-                                                 verbose=nc < 50 and final_epoch,
+                                                 verbose=nc < 50
+                                                 and final_epoch,
                                                  plots=plots and final_epoch,
                                                  wandb_logger=wandb_logger,
                                                  compute_loss=compute_loss,
@@ -429,15 +544,28 @@ def train(hyp, opt, device, tb_writer=None):
 
             # Write
             with open(results_file, 'a') as f:
-                f.write(s + '%10.4g' * 7 % results + '\n')  # append metrics, val_loss
+                f.write(s + '%10.4g' * 7 % results +
+                        '\n')  # append metrics, val_loss
             if len(opt.name) and opt.bucket:
-                os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name))
+                os.system('gsutil cp %s gs://%s/results/results%s.txt' %
+                          (results_file, opt.bucket, opt.name))
 
             # Log
-            tags = ['train/box_loss', 'train/obj_loss', 'train/cls_loss',  # train loss
-                    'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95',
-                    'val/box_loss', 'val/obj_loss', 'val/cls_loss',  # val loss
-                    'x/lr0', 'x/lr1', 'x/lr2']  # params
+            tags = [
+                'train/box_loss',
+                'train/obj_loss',
+                'train/cls_loss',  # train loss
+                'metrics/precision',
+                'metrics/recall',
+                'metrics/mAP_0.5',
+                'metrics/mAP_0.5:0.95',
+                'val/box_loss',
+                'val/obj_loss',
+                'val/cls_loss',  # val loss
+                'x/lr0',
+                'x/lr1',
+                'x/lr2'
+            ]  # params
             for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags):
                 if tb_writer:
                     tb_writer.add_scalar(tag, x, epoch)  # tensorboard
@@ -445,52 +573,77 @@ def train(hyp, opt, device, tb_writer=None):
                     wandb_logger.log({tag: x})  # W&B
 
             # Update best mAP
-            fi = fitness(np.array(results).reshape(1, -1))  # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
+            fi = fitness(np.array(results).reshape(
+                1, -1))  # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
             if fi > best_fitness:
                 best_fitness = fi
             wandb_logger.end_epoch(best_result=best_fitness == fi)
 
             # Save model
             if (not opt.nosave) or final_epoch:  # if save
-                ckpt = {'epoch': epoch,
-                        'best_fitness': best_fitness,
-                        'training_results': results_file.read_text(),
-                        'model': get_state_dict(deepcopy(model.module if is_parallel(model) else model).half()),
-                        'ema': get_state_dict(deepcopy(ema.ema).half()),
-                        'updates': ema.updates,
-                        'optimizer': optimizer.state_dict(),
-                        'wandb_id': wandb_logger.wandb_run.id if wandb_logger.wandb else None}
+                ckpt = {
+                    'epoch':
+                    epoch,
+                    'best_fitness':
+                    best_fitness,
+                    'training_results':
+                    results_file.read_text(),
+                    'model':
+                    get_state_dict(
+                        deepcopy(model.module if is_parallel(model) else model
+                                 ).half()),
+                    'ema':
+                    get_state_dict(deepcopy(ema.ema).half()),
+                    'updates':
+                    ema.updates,
+                    'optimizer':
+                    optimizer.state_dict(),
+                    'wandb_id':
+                    wandb_logger.wandb_run.id if wandb_logger.wandb else None
+                }
 
                 # Save last, best and delete
                 torch.save(ckpt, last)
                 if best_fitness == fi:
                     torch.save(ckpt, best)
                 if (best_fitness == fi) and (epoch >= 200):
-                    torch.save(ckpt, wdir / 'best_{:03d}.pt'.format(epoch))
+                    torch.save(ckpt, wdir / f'best_{epoch:03d}.pt')
                 if epoch == 0:
-                    torch.save(ckpt, wdir / 'epoch_{:03d}.pt'.format(epoch))
-                elif ((epoch+1) % 25) == 0:
-                    torch.save(ckpt, wdir / 'epoch_{:03d}.pt'.format(epoch))
-                elif epoch >= (epochs-5):
-                    torch.save(ckpt, wdir / 'epoch_{:03d}.pt'.format(epoch))
+                    torch.save(ckpt, wdir / f'epoch_{epoch:03d}.pt')
+                elif ((epoch + 1) % 25) == 0:
+                    torch.save(ckpt, wdir / f'epoch_{epoch:03d}.pt')
+                elif epoch >= (epochs - 5):
+                    torch.save(ckpt, wdir / f'epoch_{epoch:03d}.pt')
                 if wandb_logger.wandb:
-                    if ((epoch + 1) % opt.save_period == 0 and not final_epoch) and opt.save_period != -1:
-                        wandb_logger.log_model(
-                            last.parent, opt, epoch, fi, best_model=best_fitness == fi)
+                    if ((epoch + 1) % opt.save_period == 0
+                            and not final_epoch) and opt.save_period != -1:
+                        wandb_logger.log_model(last.parent,
+                                               opt,
+                                               epoch,
+                                               fi,
+                                               best_model=best_fitness == fi)
                 del ckpt
 
-        # end epoch ----------------------------------------------------------------------------------------------------
+        # end epoch --------------------------------------------------------------------------------------------------
     # end training
     if rank in [-1, 0]:
         # Plots
         if plots:
             plot_results(save_dir=save_dir)  # save as results.png
             if wandb_logger.wandb:
-                files = ['results.png', 'confusion_matrix.png', *[f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R')]]
-                wandb_logger.log({"Results": [wandb_logger.wandb.Image(str(save_dir / f), caption=f) for f in files
-                                              if (save_dir / f).exists()]})
+                files = [
+                    'results.png', 'confusion_matrix.png',
+                    *[f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R')]
+                ]
+                wandb_logger.log({
+                    'Results': [
+                        wandb_logger.wandb.Image(str(save_dir / f), caption=f)
+                        for f in files if (save_dir / f).exists()
+                    ]
+                })
         # Test best.pt
-        logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
+        logger.info('%g epochs completed in %.3f hours.\n' %
+                    (epoch - start_epoch + 1, (time.time() - t0) / 3600))
         # Strip optimizers
         final = best if best.exists() else last  # final model
         for f in last, best:
@@ -499,9 +652,11 @@ def train(hyp, opt, device, tb_writer=None):
         if opt.bucket:
             os.system(f'gsutil cp {final} gs://{opt.bucket}/weights')  # upload
         if wandb_logger.wandb:  # Log the stripped model
-            wandb_logger.wandb.log_artifact(str(final), type='model',
-                                            name='run_' + wandb_logger.wandb_run.id + '_model',
-                                            aliases=['last', 'best', 'stripped'])
+            wandb_logger.wandb.log_artifact(
+                str(final),
+                type='model',
+                name='run_' + wandb_logger.wandb_run.id + '_model',
+                aliases=['last', 'best', 'stripped'])
         wandb_logger.finish_run()
     else:
         dist.destroy_process_group()
@@ -512,66 +667,152 @@ def train(hyp, opt, device, tb_writer=None):
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
-    parser.add_argument('--weight', type=str, default='', help='initial weights path')
-    parser.add_argument('--data', type=str, default='data/coco.yaml', help='data.yaml path')
-    parser.add_argument('--hyp', type=str, default='hyp/hyp.scratch.p5.yaml', help='hyperparameters path')
+    parser.add_argument('--weight',
+                        type=str,
+                        default='',
+                        help='initial weights path')
+    parser.add_argument('--data',
+                        type=str,
+                        default='data/coco.yaml',
+                        help='data.yaml path')
+    parser.add_argument('--hyp',
+                        type=str,
+                        default='hyp/hyp.scratch.p5.yaml',
+                        help='hyperparameters path')
     parser.add_argument('--epochs', type=int, default=300)
-    parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs')
-    parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='[train, test] image sizes')
-    parser.add_argument('--rect', action='store_true', help='rectangular training')
-    parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
-    parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
-    parser.add_argument('--notest', action='store_true', help='only test final epoch')
-    parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
+    parser.add_argument('--batch-size',
+                        type=int,
+                        default=16,
+                        help='total batch size for all GPUs')
+    parser.add_argument('--img-size',
+                        nargs='+',
+                        type=int,
+                        default=[640, 640],
+                        help='[train, test] image sizes')
+    parser.add_argument('--rect',
+                        action='store_true',
+                        help='rectangular training')
+    parser.add_argument('--resume',
+                        nargs='?',
+                        const=True,
+                        default=False,
+                        help='resume most recent training')
+    parser.add_argument('--nosave',
+                        action='store_true',
+                        help='only save final checkpoint')
+    parser.add_argument('--notest',
+                        action='store_true',
+                        help='only test final epoch')
+    parser.add_argument('--noautoanchor',
+                        action='store_true',
+                        help='disable autoanchor check')
     parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
-    parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')
-    parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
-    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
-    parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
-    parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
-    parser.add_argument('--single-backbone', action='store_true', help='train single backbone model')
-    parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer')
-    parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
-    parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
-    parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers')
-    parser.add_argument('--project', default='runs/train', help='save to project/name')
+    parser.add_argument('--cache-images',
+                        action='store_true',
+                        help='cache images for faster training')
+    parser.add_argument('--image-weights',
+                        action='store_true',
+                        help='use weighted image selection for training')
+    parser.add_argument('--device',
+                        default='',
+                        help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--multi-scale',
+                        action='store_true',
+                        help='vary img-size +/- 50%%')
+    parser.add_argument('--single-cls',
+                        action='store_true',
+                        help='train multi-class data as single-class')
+    parser.add_argument('--single-backbone',
+                        action='store_true',
+                        help='train single backbone model')
+    parser.add_argument('--adam',
+                        action='store_true',
+                        help='use torch.optim.Adam() optimizer')
+    parser.add_argument('--sync-bn',
+                        action='store_true',
+                        help='use SyncBatchNorm, only available in DDP mode')
+    parser.add_argument('--local_rank',
+                        type=int,
+                        default=-1,
+                        help='DDP parameter, do not modify')
+    parser.add_argument('--workers',
+                        type=int,
+                        default=8,
+                        help='maximum number of dataloader workers')
+    parser.add_argument('--project',
+                        default='runs/train',
+                        help='save to project/name')
     parser.add_argument('--entity', default=None, help='W&B entity')
     parser.add_argument('--name', default='exp', help='save to project/name')
-    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
+    parser.add_argument('--exist-ok',
+                        action='store_true',
+                        help='existing project/name ok, do not increment')
     parser.add_argument('--quad', action='store_true', help='quad dataloader')
     parser.add_argument('--linear-lr', action='store_true', help='linear LR')
-    parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
-    parser.add_argument('--upload_dataset', action='store_true', help='Upload dataset as W&B artifact table')
-    parser.add_argument('--bbox_interval', type=int, default=-1, help='Set bounding-box image logging interval for W&B')
-    parser.add_argument('--save_period', type=int, default=-1, help='Log model after every "save_period" epoch')
-    parser.add_argument('--artifact_alias', type=str, default="latest", help='version of dataset artifact to be used')
-    parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone of yolov7=50, first3=0 1 2')
-    parser.add_argument('--v5-metric', action='store_true', help='assume maximum recall as 1.0 in AP calculation')
+    parser.add_argument('--label-smoothing',
+                        type=float,
+                        default=0.0,
+                        help='Label smoothing epsilon')
+    parser.add_argument('--upload_dataset',
+                        action='store_true',
+                        help='Upload dataset as W&B artifact table')
+    parser.add_argument('--bbox_interval',
+                        type=int,
+                        default=-1,
+                        help='Set bounding-box image logging interval for W&B')
+    parser.add_argument('--save_period',
+                        type=int,
+                        default=-1,
+                        help='Log model after every "save_period" epoch')
+    parser.add_argument('--artifact_alias',
+                        type=str,
+                        default='latest',
+                        help='version of dataset artifact to be used')
+    parser.add_argument(
+        '--freeze',
+        nargs='+',
+        type=int,
+        default=[0],
+        help='Freeze layers: backbone of yolov7=50, first3=0 1 2')
+    parser.add_argument('--v5-metric',
+                        action='store_true',
+                        help='assume maximum recall as 1.0 in AP calculation')
     opt = parser.parse_args()
 
     # Set DDP variables
-    opt.world_size = int(os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1
+    opt.world_size = int(
+        os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1
     opt.global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else -1
     set_logging(opt.global_rank)
 
     # Resume
     wandb_run = check_wandb_resume(opt)
     if opt.resume and not wandb_run:  # resume an interrupted run
-        ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run()  # specified or most recent path
-        assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist'
+        ckpt = opt.resume if isinstance(
+            opt.resume,
+            str) else get_latest_run()  # specified or most recent path
+        assert os.path.isfile(
+            ckpt), 'ERROR: --resume checkpoint does not exist'
         apriori = opt.global_rank, opt.local_rank
         with open(Path(ckpt).parent.parent / 'opt.yaml') as f:
-            opt = argparse.Namespace(**yaml.load(f, Loader=yaml.SafeLoader))  # replace
-        opt.cfg, opt.weight, opt.resume = os.path.relpath(Path(ckpt).parent.parent / 'cfg.yaml'), ckpt, True
-        opt.batch_size, opt.global_rank, opt.local_rank = opt.total_batch_size, *apriori  # reinstate
+            opt = argparse.Namespace(**yaml.load(
+                f, Loader=yaml.SafeLoader))  # replace
+        opt.cfg, opt.weight, opt.resume = os.path.relpath(
+            Path(ckpt).parent.parent / 'cfg.yaml'), ckpt, True
+        opt.batch_size, opt.global_rank, opt.local_rank = \
+            opt.total_batch_size, *apriori  # reinstate
         opt.save_dir = os.path.relpath(Path(ckpt).parent.parent)
         logger.info('Resuming training from %s' % ckpt)
     else:
         # opt.hyp = opt.hyp or ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml')
-        opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file(opt.cfg), check_file(opt.hyp)  # check files
+        opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file(
+            opt.cfg), check_file(opt.hyp)  # check files
         assert len(opt.cfg), 'cfg must be specified'
-        opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size)))  # extend to 2 sizes (train, test)
-        opt.save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)  # increment run
+        opt.img_size.extend(
+            [opt.img_size[-1]] *
+            (2 - len(opt.img_size)))  # extend to 2 sizes (train, test)
+        opt.save_dir = increment_path(Path(opt.project) / opt.name,
+                                      exist_ok=opt.exist_ok)  # increment run
 
     # DDP mode
     opt.total_batch_size = opt.batch_size
@@ -580,7 +821,8 @@ def train(hyp, opt, device, tb_writer=None):
         assert torch.cuda.device_count() > opt.local_rank
         torch.cuda.set_device(opt.local_rank)
         device = torch.device('cuda', opt.local_rank)
-        dist.init_process_group(backend='nccl', init_method='env://')  # distributed backend
+        dist.init_process_group(backend='nccl',
+                                init_method='env://')  # distributed backend
         assert opt.batch_size % opt.world_size == 0, '--batch-size must be multiple of CUDA device count'
         opt.batch_size = opt.total_batch_size // opt.world_size
 
@@ -593,6 +835,8 @@ def train(hyp, opt, device, tb_writer=None):
     tb_writer = None  # init loggers
     if opt.global_rank in [-1, 0]:
         prefix = colorstr('tensorboard: ')
-        logger.info(f"{prefix}Start with 'tensorboard --logdir {opt.project}', view at http://localhost:6006/")
+        logger.info(
+            f"{prefix}Start with 'tensorboard --logdir {opt.project}', view at http://localhost:6006/"
+        )
         tb_writer = SummaryWriter(opt.save_dir)  # Tensorboard
     train(hyp, opt, device, tb_writer)
diff --git a/train_step2.py b/train_step2.py
index 912e16f..0c82463 100644
--- a/train_step2.py
+++ b/train_step2.py
@@ -3,12 +3,11 @@
 import math
 import os
 import random
+import test  # import test.py to get mAP after each epoch
 import time
 from copy import deepcopy
 from pathlib import Path
 from threading import Thread
-import yaml
-from tqdm import tqdm
 
 import numpy as np
 import torch
@@ -17,28 +16,33 @@
 import torch.nn.functional as F
 import torch.optim as optim
 import torch.optim.lr_scheduler as lr_scheduler
+import yaml
 from torch.cuda import amp
 from torch.nn.parallel import DistributedDataParallel as DDP
 from torch.utils.tensorboard import SummaryWriter
+from tqdm import tqdm
 
-import test  # import test.py to get mAP after each epoch
 from models.yolo import Model
 from utils.autoanchor import check_anchors
+from utils.checkpoint import get_state_dict
 from utils.datasets import create_dataloader
-from utils.general import labels_to_class_weights, increment_path, labels_to_image_weights, init_seeds, \
-    fitness, strip_optimizer, get_latest_run, check_dataset, check_file, check_git_status, check_img_size, \
-    set_logging, colorstr
+from utils.general import (check_dataset, check_file, check_img_size, colorstr,
+                           fitness, get_latest_run, increment_path, init_seeds,
+                           labels_to_class_weights, labels_to_image_weights,
+                           set_logging, strip_optimizer)
 from utils.loss import ComputeLoss, ComputeLossOTADy
-from utils.plots import plot_images, plot_results, plot_lr_scheduler
-from utils.torch_utils import ModelEMA, select_device, intersect_dicts, torch_distributed_zero_first, is_parallel
+from utils.plots import plot_images, plot_lr_scheduler, plot_results
+from utils.torch_utils import (ModelEMA, intersect_dicts, is_parallel,
+                               select_device, torch_distributed_zero_first)
 from utils.wandb_logging.wandb_utils import WandbLogger, check_wandb_resume
-from utils.checkpoint import get_state_dict
-
 
 logger = logging.getLogger(__name__)
 
+
 def train(hyp, opt, device, tb_writer=None):
-    logger.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items()))
+    logger.info(
+        colorstr('hyperparameters: ') + ', '.join(f'{k}={v}'
+                                                  for k, v in hyp.items()))
     save_dir, epochs, batch_size, total_batch_size, weight, rank, freeze = \
         Path(opt.save_dir), opt.epochs, opt.batch_size, opt.total_batch_size, opt.weight, opt.global_rank, opt.freeze
 
@@ -72,16 +76,23 @@ def train(hyp, opt, device, tb_writer=None):
     loggers = {'wandb': None}  # loggers dict
     if rank in [-1, 0]:
         opt.hyp = hyp  # add hyperparameters
-        run_id = torch.load(weight, map_location='cpu')['wandb_id'] if weight.endswith('.pt') and os.path.isfile(weight) else None
-        wandb_logger = WandbLogger(opt, Path(opt.save_dir).stem, run_id, data_dict)
+        run_id = torch.load(weight,
+                            map_location='cpu')['wandb_id'] if weight.endswith(
+                                '.pt') and os.path.isfile(weight) else None
+        wandb_logger = WandbLogger(opt,
+                                   Path(opt.save_dir).stem, run_id, data_dict)
         loggers['wandb'] = wandb_logger.wandb
         data_dict = wandb_logger.data_dict
         if wandb_logger.wandb:
-            weight, epochs, hyp = opt.weight, opt.epochs, opt.hyp  # WandbLogger might update weights, epochs if resuming
+            # WandbLogger might update weights, epochs if resuming
+            weight, epochs, hyp = opt.weight, opt.epochs, opt.hyp
 
     nc = 1 if opt.single_cls else int(data_dict['nc'])  # number of classes
-    names = ['item'] if opt.single_cls and len(data_dict['names']) != 1 else data_dict['names']  # class names
-    assert len(names) == nc, '%g names found for nc=%g dataset in %s' % (len(names), nc, opt.data)  # check
+    names = ['item'] if opt.single_cls and len(
+        data_dict['names']) != 1 else data_dict['names']  # class names
+    assert len(
+        names) == nc, '{:g} names found for nc={:g} dataset in {}'.format(
+            len(names), nc, opt.data)  # check
 
     # Model
     pretrained = weight.endswith('.pt')
@@ -89,11 +100,18 @@ def train(hyp, opt, device, tb_writer=None):
         ckpt = torch.load(weight, map_location='cpu')  # load checkpoint
         state_dict = ckpt['model']
         model = Model(opt.cfg, ch=3, nc=nc)  # create
-        exclude = ['anchor'] if (opt.cfg or hyp.get('anchors')) and not opt.resume else []  # exclude keys
-        state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude)  # intersect
+        exclude = [
+            'anchor'
+        ] if (opt.cfg or hyp.get('anchors')) and not opt.resume else [
+        ]  # exclude keys
+        state_dict = intersect_dicts(state_dict,
+                                     model.state_dict(),
+                                     exclude=exclude)  # intersect
         model.load_state_dict(state_dict, strict=False)  # load
         model.to(device)
-        logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weight))  # report
+        logger.info(
+            'Transferred %g/%g items from %s' %
+            (len(state_dict), len(model.state_dict()), weight))  # report
     else:
         model = Model(opt.cfg, ch=3, nc=nc).to(device)  # create
     with torch_distributed_zero_first(rank):
@@ -102,7 +120,10 @@ def train(hyp, opt, device, tb_writer=None):
     test_path = data_dict['val']
 
     # Freeze
-    freeze = [f'model.{x}.' for x in (freeze if len(freeze) > 1 else range(freeze[0]))]  # parameter names to freeze (full or partial)
+    freeze = [
+        f'model.{x}.'
+        for x in (freeze if len(freeze) > 1 else range(freeze[0]))
+    ]  # parameter names to freeze (full or partial)
     for k, v in model.named_parameters():
         v.requires_grad = True  # train all layers
         if any(x in k for x in freeze):
@@ -117,7 +138,8 @@ def train(hyp, opt, device, tb_writer=None):
 
     # Optimizer
     nbs = 64  # nominal batch size
-    accumulate = max(round(nbs / total_batch_size), 1)  # accumulate loss before optimizing
+    accumulate = max(round(nbs / total_batch_size),
+                     1)  # accumulate loss before optimizing
     hyp['weight_decay'] *= total_batch_size * accumulate / nbs  # scale weight_decay
     logger.info(f"Scaled weight_decay = {hyp['weight_decay']}")
 
@@ -132,19 +154,37 @@ def train(hyp, opt, device, tb_writer=None):
                 pg1.append(v.weight)  # apply decay
 
     if opt.adam:
-        optimizer = optim.AdamW(pg1, lr=hyp['lr0'], weight_decay=hyp['weight_decay'], betas=(hyp['momentum'], 0.999))  # adjust beta1 to momentum
+        optimizer = optim.AdamW(pg1,
+                                lr=hyp['lr0'],
+                                weight_decay=hyp['weight_decay'],
+                                betas=(hyp['momentum'],
+                                       0.999))  # adjust beta1 to momentum
     else:
-        optimizer = optim.SGD(pg1, lr=hyp['lr0'], weight_decay=hyp['weight_decay'], momentum=hyp['momentum'], nesterov=True)
+        optimizer = optim.SGD(pg1,
+                              lr=hyp['lr0'],
+                              weight_decay=hyp['weight_decay'],
+                              momentum=hyp['momentum'],
+                              nesterov=True)
     if len(pg0):
-        optimizer.add_param_group({'params': pg0, 'weight_decay': 0})  # add pg0 without weight_decay
+        optimizer.add_param_group({
+            'params': pg0,
+            'weight_decay': 0
+        })  # add pg0 without weight_decay
     if len(pg2):
-        optimizer.add_param_group({'params': pg2, 'weight_decay': 0})  # add pg2 (biases)
-    logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0)))
+        optimizer.add_param_group({
+            'params': pg2,
+            'weight_decay': 0
+        })  # add pg2 (biases)
+    logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' %
+                (len(pg2), len(pg1), len(pg0)))
     del pg0, pg1, pg2
 
     # Scheduler https://arxiv.org/pdf/1812.01187.pdf
     # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR
-    lf = lambda x: (1 - x / (epochs - 1)) * (1.0 - hyp['lrf']) + hyp['lrf']  # linear
+    def get_linear_lr(x):
+        return (1 - x / (epochs - 1)) * (1.0 - hyp['lrf']) + hyp['lrf']
+
+    lf = get_linear_lr
     scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
     plot_lr_scheduler(optimizer, scheduler, epochs, save_dir)
 
@@ -167,23 +207,28 @@ def train(hyp, opt, device, tb_writer=None):
 
         # Results
         if ckpt.get('training_results') is not None:
-            results_file.write_text(ckpt['training_results'])  # write results.txt
+            results_file.write_text(
+                ckpt['training_results'])  # write results.txt
 
         # Epochs
         start_epoch = ckpt['epoch'] + 1
         if opt.resume:
-            assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % (weight, epochs)
+            assert start_epoch > 0, '{} training to {:g} epochs is finished, nothing to resume.'.format(
+                weight, epochs)
         if epochs < start_epoch:
-            logger.info('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' %
-                        (weight, ckpt['epoch'], epochs))
+            logger.info(
+                '%s has been trained for %g epochs. Fine-tuning for %g additional epochs.'
+                % (weight, ckpt['epoch'], epochs))
             epochs += ckpt['epoch']  # finetune additional epochs
 
         del ckpt
 
     # Image sizes
     gs = max(int(model.stride.max()), 32)  # grid size (max stride)
-    nl = model.model_h2[-1].nl  # number of detection layers (used for scaling hyp['obj'])
-    imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size]  # verify imgsz are gs-multiples
+    nl = model.model_h2[
+        -1].nl  # number of detection layers (used for scaling hyp['obj'])
+    imgsz, imgsz_test = (check_img_size(x, gs) for x in opt.img_size
+                         )  # verify imgsz are gs-multiples
 
     # DP mode
     if cuda and rank == -1 and torch.cuda.device_count() > 1:
@@ -195,21 +240,43 @@ def train(hyp, opt, device, tb_writer=None):
         logger.info('Using SyncBatchNorm()')
 
     # Trainloader
-    dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt,
-                                            hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank,
-                                            world_size=opt.world_size, workers=opt.workers,
-                                            image_weights=opt.image_weights, quad=opt.quad, prefix=colorstr('train: '))
+    dataloader, dataset = create_dataloader(train_path,
+                                            imgsz,
+                                            batch_size,
+                                            gs,
+                                            opt,
+                                            hyp=hyp,
+                                            augment=True,
+                                            cache=opt.cache_images,
+                                            rect=opt.rect,
+                                            rank=rank,
+                                            world_size=opt.world_size,
+                                            workers=opt.workers,
+                                            image_weights=opt.image_weights,
+                                            quad=opt.quad,
+                                            prefix=colorstr('train: '))
     mlc = np.concatenate(dataset.labels, 0)[:, 0].max()  # max label class
     nb = len(dataloader)  # number of batches
-    assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % (mlc, nc, opt.data, nc - 1)
+    assert mlc < nc, 'Label class {:g} exceeds nc={:g} in {}. Possible class labels are 0-{:g}'.format(
+        mlc, nc, opt.data, nc - 1)
 
     # Process 0
     if rank in [-1, 0]:
         test_batch_size = 1
-        testloader = create_dataloader(test_path, imgsz_test, test_batch_size, gs, opt,  # testloader
-                                       hyp=hyp, cache=opt.cache_images and not opt.notest, rect=True, rank=-1,
-                                       world_size=opt.world_size, workers=opt.workers,
-                                       pad=0.5, prefix=colorstr('val: '))[0]
+        testloader = create_dataloader(
+            test_path,
+            imgsz_test,
+            test_batch_size,
+            gs,
+            opt,  # testloader
+            hyp=hyp,
+            cache=opt.cache_images and not opt.notest,
+            rect=True,
+            rank=-1,
+            world_size=opt.world_size,
+            workers=opt.workers,
+            pad=0.5,
+            prefix=colorstr('val: '))[0]
 
         if not opt.resume:
             labels = np.concatenate(dataset.labels, 0)
@@ -222,32 +289,44 @@ def train(hyp, opt, device, tb_writer=None):
 
             # Anchors
             if not opt.noautoanchor:
-                check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)
+                check_anchors(dataset,
+                              model=model,
+                              thr=hyp['anchor_t'],
+                              imgsz=imgsz)
             model.half().float()  # pre-reduce anchor precision
 
     # DDP mode
     if cuda and rank != -1:
-        model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank,
-                    # nn.MultiheadAttention incompatibility with DDP https://github.com/pytorch/pytorch/issues/26698
-                    find_unused_parameters=any(isinstance(layer, nn.MultiheadAttention) for layer in model.modules()))
+        model = DDP(
+            model,
+            device_ids=[opt.local_rank],
+            output_device=opt.local_rank,
+            # nn.MultiheadAttention incompatibility with DDP https://github.com/pytorch/pytorch/issues/26698
+            find_unused_parameters=any(
+                isinstance(layer, nn.MultiheadAttention)
+                for layer in model.modules()))
 
     # Model parameters
     hyp['box'] *= 3. / nl  # scale to layers
     hyp['cls'] *= nc / 80. * 3. / nl  # scale to classes and layers
-    hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl  # scale to image size and layers
+    hyp['obj'] *= (imgsz / 640)**2 * 3. / nl  # scale to image size and layers
     hyp['label_smoothing'] = opt.label_smoothing
     model.nc = nc  # attach number of classes to model
     model.hyp = hyp  # attach hyperparameters to model
     model.gr = 1.0  # iou loss ratio (obj_loss = 1.0 or iou)
-    model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc  # attach class weights
+    model.class_weights = labels_to_class_weights(
+        dataset.labels, nc).to(device) * nc  # attach class weights
     model.names = names
 
     # Start training
     t0 = time.time()
-    nw = max(round(hyp['warmup_epochs'] * nb), 1000)  # number of warmup iterations, max(3 epochs, 1k iterations)
-    # nw = min(nw, (epochs - start_epoch) / 2 * nb)  # limit warmup to < 1/2 of training
+    nw = max(
+        round(hyp['warmup_epochs'] * nb),
+        1000)  # number of warm up iterations, max(3 epochs, 1k iterations)
+    # nw = min(nw, (epochs - start_epoch) / 2 * nb)  # limit warm up to < 1/2 of training
     maps = np.zeros(nc)  # mAP per class
-    results = (0, 0, 0, 0, 0, 0, 0)  # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
+    results = (0, 0, 0, 0, 0, 0, 0
+               )  # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
     scheduler.last_epoch = start_epoch - 1  # do not move
     scaler = amp.GradScaler(enabled=cuda)
     compute_loss_ota_dy = ComputeLossOTADy(model)
@@ -256,7 +335,9 @@ def train(hyp, opt, device, tb_writer=None):
                 f'Using {dataloader.num_workers} dataloader workers\n'
                 f'Logging results to {save_dir}\n'
                 f'Starting training for {epochs} epochs...')
-    for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
+    for epoch in range(
+            start_epoch, epochs
+    ):  # epoch ------------------------------------------------------------------
         model.train()
         model.close_all_bn()
 
@@ -264,12 +345,18 @@ def train(hyp, opt, device, tb_writer=None):
         if opt.image_weights:
             # Generate indices
             if rank in [-1, 0]:
-                cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc  # class weights
-                iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw)  # image weights
-                dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n)  # rand weighted idx
+                cw = model.class_weights.cpu().numpy() * (
+                    1 - maps)**2 / nc  # class weights
+                iw = labels_to_image_weights(dataset.labels,
+                                             nc=nc,
+                                             class_weights=cw)  # image weights
+                dataset.indices = random.choices(
+                    range(dataset.n), weights=iw,
+                    k=dataset.n)  # rand weighted idx
             # Broadcast if DDP
             if rank != -1:
-                indices = (torch.tensor(dataset.indices) if rank == 0 else torch.zeros(dataset.n)).int()
+                indices = (torch.tensor(dataset.indices)
+                           if rank == 0 else torch.zeros(dataset.n)).int()
                 dist.broadcast(indices, 0)
                 if rank != 0:
                     dataset.indices = indices.cpu().numpy()
@@ -282,37 +369,55 @@ def train(hyp, opt, device, tb_writer=None):
         if rank != -1:
             dataloader.sampler.set_epoch(epoch)
         pbar = enumerate(dataloader)
-        logger.info(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'diff', 'score.0', 'score.1', 'total', 'labels', 'img_size'))
+        logger.info(
+            ('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'diff', 'score.0',
+                                   'score.1', 'total', 'labels', 'img_size'))
         if rank in [-1, 0]:
             pbar = tqdm(pbar, total=nb)  # progress bar
         optimizer.zero_grad()
-        for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------
+        for i, (
+                imgs, targets, paths, _
+        ) in pbar:  # batch -------------------------------------------------------------
             ni = i + nb * epoch  # number integrated batches (since train start)
-            imgs = imgs.to(device, non_blocking=True).float() / 255.0  # uint8 to float32, 0-255 to 0.0-1.0
+            imgs = imgs.to(device, non_blocking=True).float(
+            ) / 255.0  # uint8 to float32, 0-255 to 0.0-1.0
 
-            # Warmup
+            # Warm up
             if ni <= nw:
                 xi = [0, nw]  # x interp
                 # model.gr = np.interp(ni, xi, [0.0, 1.0])  # iou loss ratio (obj_loss = 1.0 or iou)
-                accumulate = max(1, np.interp(ni, xi, [1, nbs / total_batch_size]).round())
+                accumulate = max(
+                    1,
+                    np.interp(ni, xi, [1, nbs / total_batch_size]).round())
                 for j, x in enumerate(optimizer.param_groups):
                     # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
-                    x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
+                    x['lr'] = np.interp(ni, xi, [
+                        hyp['warmup_bias_lr'] if j == 2 else 0.0,
+                        x['initial_lr'] * lf(epoch)
+                    ])
                     if 'momentum' in x:
-                        x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']])
+                        x['momentum'] = np.interp(
+                            ni, xi, [hyp['warmup_momentum'], hyp['momentum']])
 
             # Multi-scale
             if opt.multi_scale:
-                sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs  # size
+                sz = random.randrange(imgsz * 0.5,
+                                      imgsz * 1.5 + gs) // gs * gs  # size
                 sf = sz / max(imgs.shape[2:])  # scale factor
                 if sf != 1:
-                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]  # new shape (stretched to gs-multiple)
-                    imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
+                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]
+                          ]  # new shape (stretched to gs-multiple)
+                    imgs = F.interpolate(imgs,
+                                         size=ns,
+                                         mode='bilinear',
+                                         align_corners=False)
 
             # Forward
             with amp.autocast(enabled=cuda):
                 pred = model(imgs)  # forward
-                loss, loss_items = compute_loss_ota_dy(pred, targets.to(device), imgs)  # loss scaled by batch_size
+                loss, loss_items = compute_loss_ota_dy(
+                    pred, targets.to(device),
+                    imgs)  # loss scaled by batch_size
                 if rank != -1:
                     loss *= opt.world_size  # gradient averaged between devices in DDP mode
                 if opt.quad:
@@ -331,25 +436,34 @@ def train(hyp, opt, device, tb_writer=None):
 
             # Print
             if rank in [-1, 0]:
-                mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
-                mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0)  # (GB)
-                s = ('%10s' * 2 + '%10.4g' * 6) % (
-                    '%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1])
+                mloss = (mloss * i + loss_items) / (i + 1
+                                                    )  # update mean losses
+                mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9
+                                 if torch.cuda.is_available() else 0)  # (GB)
+                s = ('%10s' * 2 + '%10.4g' * 6) % ('{:g}/{:g}'.format(
+                    epoch,
+                    epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1])
                 pbar.set_description(s)
 
                 # Plot
                 if plots and ni < 10:
                     f = save_dir / f'train_batch{ni}.jpg'  # filename
-                    Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start()
+                    Thread(target=plot_images,
+                           args=(imgs, targets, paths, f),
+                           daemon=True).start()
                     # if tb_writer:
                     #     tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch)
                     #     tb_writer.add_graph(torch.jit.trace(model, imgs, strict=False), [])  # add model graph
                 elif plots and ni == 10 and wandb_logger.wandb:
-                    wandb_logger.log({"Mosaics": [wandb_logger.wandb.Image(str(x), caption=x.name) for x in
-                                                  save_dir.glob('train*.jpg') if x.exists()]})
+                    wandb_logger.log({
+                        'Mosaics': [
+                            wandb_logger.wandb.Image(str(x), caption=x.name)
+                            for x in save_dir.glob('train*.jpg') if x.exists()
+                        ]
+                    })
 
-            # end batch ------------------------------------------------------------------------------------------------
-        # end epoch ----------------------------------------------------------------------------------------------------
+            # end batch ----------------------------------------------------------------------------------------------
+        # end epoch --------------------------------------------------------------------------------------------------
 
         # Scheduler
         lr = [x['lr'] for x in optimizer.param_groups]  # for tensorboard
@@ -358,7 +472,11 @@ def train(hyp, opt, device, tb_writer=None):
         # DDP process 0 or single-GPU
         if rank in [-1, 0]:
             # mAP
-            ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride', 'class_weights'])
+            ema.update_attr(model,
+                            include=[
+                                'yaml', 'nc', 'hyp', 'gr', 'names', 'stride',
+                                'class_weights'
+                            ])
             final_epoch = epoch + 1 == epochs
             if not opt.notest or final_epoch:  # Calculate mAP
                 wandb_logger.current_epoch = epoch + 1
@@ -370,7 +488,8 @@ def train(hyp, opt, device, tb_writer=None):
                                                  single_cls=opt.single_cls,
                                                  dataloader=testloader,
                                                  save_dir=save_dir,
-                                                 verbose=nc < 50 and final_epoch,
+                                                 verbose=nc < 50
+                                                 and final_epoch,
                                                  plots=plots and final_epoch,
                                                  wandb_logger=wandb_logger,
                                                  compute_loss=compute_loss,
@@ -379,15 +498,28 @@ def train(hyp, opt, device, tb_writer=None):
 
             # Write
             with open(results_file, 'a') as f:
-                f.write(s + '%10.4g' * 7 % results + '\n')  # append metrics, val_loss
+                f.write(s + '%10.4g' * 7 % results +
+                        '\n')  # append metrics, val_loss
             if len(opt.name) and opt.bucket:
-                os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name))
+                os.system('gsutil cp %s gs://%s/results/results%s.txt' %
+                          (results_file, opt.bucket, opt.name))
 
             # Log
-            tags = ['train/box_loss', 'train/obj_loss', 'train/cls_loss',  # train loss
-                    'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95',
-                    'val/box_loss', 'val/obj_loss', 'val/cls_loss',  # val loss
-                    'x/lr0', 'x/lr1', 'x/lr2']  # params
+            tags = [
+                'train/box_loss',
+                'train/obj_loss',
+                'train/cls_loss',  # train loss
+                'metrics/precision',
+                'metrics/recall',
+                'metrics/mAP_0.5',
+                'metrics/mAP_0.5:0.95',
+                'val/box_loss',
+                'val/obj_loss',
+                'val/cls_loss',  # val loss
+                'x/lr0',
+                'x/lr1',
+                'x/lr2'
+            ]  # params
             for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags):
                 if tb_writer:
                     tb_writer.add_scalar(tag, x, epoch)  # tensorboard
@@ -395,52 +527,77 @@ def train(hyp, opt, device, tb_writer=None):
                     wandb_logger.log({tag: x})  # W&B
 
             # Update best mAP
-            fi = fitness(np.array(results).reshape(1, -1))  # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
+            fi = fitness(np.array(results).reshape(
+                1, -1))  # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
             if fi > best_fitness:
                 best_fitness = fi
             wandb_logger.end_epoch(best_result=best_fitness == fi)
 
             # Save model
             if (not opt.nosave) or final_epoch:  # if save
-                ckpt = {'epoch': epoch,
-                        'best_fitness': best_fitness,
-                        'training_results': results_file.read_text(),
-                        'model': get_state_dict(deepcopy(model.module if is_parallel(model) else model).half()),
-                        'ema': get_state_dict(deepcopy(ema.ema).half()),
-                        'updates': ema.updates,
-                        'optimizer': optimizer.state_dict(),
-                        'wandb_id': wandb_logger.wandb_run.id if wandb_logger.wandb else None}
+                ckpt = {
+                    'epoch':
+                    epoch,
+                    'best_fitness':
+                    best_fitness,
+                    'training_results':
+                    results_file.read_text(),
+                    'model':
+                    get_state_dict(
+                        deepcopy(model.module if is_parallel(model) else model
+                                 ).half()),
+                    'ema':
+                    get_state_dict(deepcopy(ema.ema).half()),
+                    'updates':
+                    ema.updates,
+                    'optimizer':
+                    optimizer.state_dict(),
+                    'wandb_id':
+                    wandb_logger.wandb_run.id if wandb_logger.wandb else None
+                }
 
                 # Save last, best and delete
                 torch.save(ckpt, last)
                 if best_fitness == fi:
                     torch.save(ckpt, best)
                 if (best_fitness == fi) and (epoch >= 200):
-                    torch.save(ckpt, wdir / 'best_{:03d}.pt'.format(epoch))
+                    torch.save(ckpt, wdir / f'best_{epoch:03d}.pt')
                 if epoch == 0:
-                    torch.save(ckpt, wdir / 'epoch_{:03d}.pt'.format(epoch))
-                elif ((epoch+1) % 25) == 0:
-                    torch.save(ckpt, wdir / 'epoch_{:03d}.pt'.format(epoch))
-                elif epoch >= (epochs-5):
-                    torch.save(ckpt, wdir / 'epoch_{:03d}.pt'.format(epoch))
+                    torch.save(ckpt, wdir / f'epoch_{epoch:03d}.pt')
+                elif ((epoch + 1) % 25) == 0:
+                    torch.save(ckpt, wdir / f'epoch_{epoch:03d}.pt')
+                elif epoch >= (epochs - 5):
+                    torch.save(ckpt, wdir / f'epoch_{epoch:03d}.pt')
                 if wandb_logger.wandb:
-                    if ((epoch + 1) % opt.save_period == 0 and not final_epoch) and opt.save_period != -1:
-                        wandb_logger.log_model(
-                            last.parent, opt, epoch, fi, best_model=best_fitness == fi)
+                    if ((epoch + 1) % opt.save_period == 0
+                            and not final_epoch) and opt.save_period != -1:
+                        wandb_logger.log_model(last.parent,
+                                               opt,
+                                               epoch,
+                                               fi,
+                                               best_model=best_fitness == fi)
                 del ckpt
 
-        # end epoch ----------------------------------------------------------------------------------------------------
+        # end epoch --------------------------------------------------------------------------------------------------
     # end training
     if rank in [-1, 0]:
         # Plots
         if plots:
             plot_results(save_dir=save_dir)  # save as results.png
             if wandb_logger.wandb:
-                files = ['results.png', 'confusion_matrix.png', *[f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R')]]
-                wandb_logger.log({"Results": [wandb_logger.wandb.Image(str(save_dir / f), caption=f) for f in files
-                                              if (save_dir / f).exists()]})
+                files = [
+                    'results.png', 'confusion_matrix.png',
+                    *[f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R')]
+                ]
+                wandb_logger.log({
+                    'Results': [
+                        wandb_logger.wandb.Image(str(save_dir / f), caption=f)
+                        for f in files if (save_dir / f).exists()
+                    ]
+                })
         # Test best.pt
-        logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
+        logger.info('%g epochs completed in %.3f hours.\n' %
+                    (epoch - start_epoch + 1, (time.time() - t0) / 3600))
         # Strip optimizers
         final = best if best.exists() else last  # final model
         for f in last, best:
@@ -449,9 +606,11 @@ def train(hyp, opt, device, tb_writer=None):
         if opt.bucket:
             os.system(f'gsutil cp {final} gs://{opt.bucket}/weights')  # upload
         if wandb_logger.wandb:  # Log the stripped model
-            wandb_logger.wandb.log_artifact(str(final), type='model',
-                                            name='run_' + wandb_logger.wandb_run.id + '_model',
-                                            aliases=['last', 'best', 'stripped'])
+            wandb_logger.wandb.log_artifact(
+                str(final),
+                type='model',
+                name='run_' + wandb_logger.wandb_run.id + '_model',
+                aliases=['last', 'best', 'stripped'])
         wandb_logger.finish_run()
     else:
         dist.destroy_process_group()
@@ -462,64 +621,147 @@ def train(hyp, opt, device, tb_writer=None):
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
-    parser.add_argument('--weight', type=str, default='', help='initial weights path')
-    parser.add_argument('--data', type=str, default='data/coco.yaml', help='data.yaml path')
-    parser.add_argument('--hyp', type=str, default='hyp/hyp.scratch.p5.yaml', help='hyperparameters path')
+    parser.add_argument('--weight',
+                        type=str,
+                        default='',
+                        help='initial weights path')
+    parser.add_argument('--data',
+                        type=str,
+                        default='data/coco.yaml',
+                        help='data.yaml path')
+    parser.add_argument('--hyp',
+                        type=str,
+                        default='hyp/hyp.scratch.p5.yaml',
+                        help='hyperparameters path')
     parser.add_argument('--epochs', type=int, default=2)
-    parser.add_argument('--batch-size', type=int, default=1, help='total batch size for all GPUs')
-    parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='[train, test] image sizes')
-    parser.add_argument('--rect', action='store_true', help='rectangular training')
-    parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
-    parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
-    parser.add_argument('--notest', action='store_true', help='only test final epoch')
-    parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
+    parser.add_argument('--batch-size',
+                        type=int,
+                        default=1,
+                        help='total batch size for all GPUs')
+    parser.add_argument('--img-size',
+                        nargs='+',
+                        type=int,
+                        default=[640, 640],
+                        help='[train, test] image sizes')
+    parser.add_argument('--rect',
+                        action='store_true',
+                        help='rectangular training')
+    parser.add_argument('--resume',
+                        nargs='?',
+                        const=True,
+                        default=False,
+                        help='resume most recent training')
+    parser.add_argument('--nosave',
+                        action='store_true',
+                        help='only save final checkpoint')
+    parser.add_argument('--notest',
+                        action='store_true',
+                        help='only test final epoch')
+    parser.add_argument('--noautoanchor',
+                        action='store_true',
+                        help='disable autoanchor check')
     parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
-    parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')
-    parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
-    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
-    parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
-    parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
-    parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer')
-    parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
-    parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
-    parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers')
-    parser.add_argument('--project', default='runs/train', help='save to project/name')
+    parser.add_argument('--cache-images',
+                        action='store_true',
+                        help='cache images for faster training')
+    parser.add_argument('--image-weights',
+                        action='store_true',
+                        help='use weighted image selection for training')
+    parser.add_argument('--device',
+                        default='',
+                        help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--multi-scale',
+                        action='store_true',
+                        help='vary img-size +/- 50%%')
+    parser.add_argument('--single-cls',
+                        action='store_true',
+                        help='train multi-class data as single-class')
+    parser.add_argument('--adam',
+                        action='store_true',
+                        help='use torch.optim.Adam() optimizer')
+    parser.add_argument('--sync-bn',
+                        action='store_true',
+                        help='use SyncBatchNorm, only available in DDP mode')
+    parser.add_argument('--local_rank',
+                        type=int,
+                        default=-1,
+                        help='DDP parameter, do not modify')
+    parser.add_argument('--workers',
+                        type=int,
+                        default=8,
+                        help='maximum number of dataloader workers')
+    parser.add_argument('--project',
+                        default='runs/train',
+                        help='save to project/name')
     parser.add_argument('--entity', default=None, help='W&B entity')
     parser.add_argument('--name', default='exp', help='save to project/name')
-    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
+    parser.add_argument('--exist-ok',
+                        action='store_true',
+                        help='existing project/name ok, do not increment')
     parser.add_argument('--quad', action='store_true', help='quad dataloader')
-    parser.add_argument('--label-smoothing', type=float, default=0.0, help='Label smoothing epsilon')
-    parser.add_argument('--upload_dataset', action='store_true', help='Upload dataset as W&B artifact table')
-    parser.add_argument('--bbox_interval', type=int, default=-1, help='Set bounding-box image logging interval for W&B')
-    parser.add_argument('--save_period', type=int, default=-1, help='Log model after every "save_period" epoch')
-    parser.add_argument('--artifact_alias', type=str, default="latest", help='version of dataset artifact to be used')
-    parser.add_argument('--freeze', nargs='+', type=int, default=[0], help='Freeze layers: backbone of yolov7=50, first3=0 1 2')
-    parser.add_argument('--v5-metric', action='store_true', help='assume maximum recall as 1.0 in AP calculation')
+    parser.add_argument('--label-smoothing',
+                        type=float,
+                        default=0.0,
+                        help='Label smoothing epsilon')
+    parser.add_argument('--upload_dataset',
+                        action='store_true',
+                        help='Upload dataset as W&B artifact table')
+    parser.add_argument('--bbox_interval',
+                        type=int,
+                        default=-1,
+                        help='Set bounding-box image logging interval for W&B')
+    parser.add_argument('--save_period',
+                        type=int,
+                        default=-1,
+                        help='Log model after every "save_period" epoch')
+    parser.add_argument('--artifact_alias',
+                        type=str,
+                        default='latest',
+                        help='version of dataset artifact to be used')
+    parser.add_argument(
+        '--freeze',
+        nargs='+',
+        type=int,
+        default=[0],
+        help='Freeze layers: backbone of yolov7=50, first3=0 1 2')
+    parser.add_argument('--v5-metric',
+                        action='store_true',
+                        help='assume maximum recall as 1.0 in AP calculation')
     opt = parser.parse_args()
 
     # Set DDP variables
-    opt.world_size = int(os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1
+    opt.world_size = int(
+        os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1
     opt.global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else -1
     set_logging(opt.global_rank)
 
     # Resume
     wandb_run = check_wandb_resume(opt)
     if opt.resume and not wandb_run:  # resume an interrupted run
-        ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run()  # specified or most recent path
-        assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist'
+        ckpt = opt.resume if isinstance(
+            opt.resume,
+            str) else get_latest_run()  # specified or most recent path
+        assert os.path.isfile(
+            ckpt), 'ERROR: --resume checkpoint does not exist'
         apriori = opt.global_rank, opt.local_rank
         with open(Path(ckpt).parent.parent / 'opt.yaml') as f:
-            opt = argparse.Namespace(**yaml.load(f, Loader=yaml.SafeLoader))  # replace
-        opt.cfg, opt.weight, opt.resume = os.path.relpath(Path(ckpt).parent.parent / 'cfg.yaml'), ckpt, True
+            opt = argparse.Namespace(**yaml.load(
+                f, Loader=yaml.SafeLoader))  # replace
+        opt.cfg, opt.weight, opt.resume = os.path.relpath(
+            Path(ckpt).parent.parent / 'cfg.yaml'), ckpt, True
         opt.batch_size, opt.global_rank, opt.local_rank = opt.total_batch_size, *apriori  # reinstate
         opt.save_dir = os.path.relpath(Path(ckpt).parent.parent)
         logger.info('Resuming training from %s' % ckpt)
     else:
         # opt.hyp = opt.hyp or ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml')
-        opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file(opt.cfg), check_file(opt.hyp)  # check files
+        opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file(
+            opt.cfg), check_file(opt.hyp)  # check files
         assert len(opt.cfg), 'cfg must be specified'
-        opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size)))  # extend to 2 sizes (train, test)
-        opt.save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)  # increment run
+        opt.img_size.extend(
+            [opt.img_size[-1]] *
+            (2 - len(opt.img_size)))  # extend to 2 sizes (train, test)
+        opt.save_dir = increment_path(Path(opt.project) / opt.name,
+                                      exist_ok=opt.exist_ok)  # increment run
 
     # DDP mode
     opt.total_batch_size = opt.batch_size
@@ -530,7 +772,8 @@ def train(hyp, opt, device, tb_writer=None):
         assert torch.cuda.device_count() > opt.local_rank
         torch.cuda.set_device(opt.local_rank)
         device = torch.device('cuda', opt.local_rank)
-        dist.init_process_group(backend='nccl', init_method='env://')  # distributed backend
+        dist.init_process_group(backend='nccl',
+                                init_method='env://')  # distributed backend
         assert opt.batch_size % opt.world_size == 0, '--batch-size must be multiple of CUDA device count'
         opt.batch_size = opt.total_batch_size // opt.world_size
 
@@ -543,6 +786,8 @@ def train(hyp, opt, device, tb_writer=None):
     tb_writer = None  # init loggers
     if opt.global_rank in [-1, 0]:
         prefix = colorstr('tensorboard: ')
-        logger.info(f"{prefix}Start with 'tensorboard --logdir {opt.project}', view at http://localhost:6006/")
+        logger.info(
+            f"{prefix}Start with 'tensorboard --logdir {opt.project}', view at http://localhost:6006/"
+        )
         tb_writer = SummaryWriter(opt.save_dir)  # Tensorboard
     train(hyp, opt, device, tb_writer)
diff --git a/utils/__init__.py b/utils/__init__.py
index 84952a8..a6131c1 100644
--- a/utils/__init__.py
+++ b/utils/__init__.py
@@ -1 +1 @@
-# init
\ No newline at end of file
+# init
diff --git a/utils/autoanchor.py b/utils/autoanchor.py
index 18a6049..53decdd 100644
--- a/utils/autoanchor.py
+++ b/utils/autoanchor.py
@@ -24,10 +24,15 @@ def check_anchors(dataset, model, thr=4.0, imgsz=640):
     # Check anchor fit to data, recompute if necessary
     prefix = colorstr('autoanchor: ')
     print(f'\n{prefix}Analyzing anchors... ', end='')
-    m = model.module.model_h2[-1] if hasattr(model, 'module') else model.model_h2[-1]  # Detect()
+    m = model.module.model_h2[-1] if hasattr(
+        model, 'module') else model.model_h2[-1]  # Detect()
     shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True)
-    scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1))  # augment scale
-    wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float()  # wh
+    scale = np.random.uniform(0.9, 1.1,
+                              size=(shapes.shape[0], 1))  # augment scale
+    wh = torch.tensor(
+        np.concatenate([
+            l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)
+        ])).float()  # wh
 
     def metric(k):  # compute metric
         r = wh[:, None] / k[None]
@@ -39,42 +44,61 @@ def metric(k):  # compute metric
 
     anchors = m.anchor_grid.clone().cpu().view(-1, 2)  # current anchors
     bpr, aat = metric(anchors)
-    print(f'anchors/target = {aat:.2f}, Best Possible Recall (BPR) = {bpr:.4f}', end='')
+    print(
+        f'anchors/target = {aat:.2f}, Best Possible Recall (BPR) = {bpr:.4f}',
+        end='')
     if bpr < 0.98:  # threshold to recompute
         print('. Attempting to improve anchors, please wait...')
         na = m.anchor_grid.numel() // 2  # number of anchors
         try:
-            anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False)
+            anchors = kmean_anchors(dataset,
+                                    n=na,
+                                    img_size=imgsz,
+                                    thr=thr,
+                                    gen=1000,
+                                    verbose=False)
         except Exception as e:
             print(f'{prefix}ERROR: {e}')
         new_bpr = metric(anchors)[0]
         if new_bpr > bpr:  # replace anchors
-            anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors)
-            m.anchor_grid[:] = anchors.clone().view_as(m.anchor_grid)  # for inference
+            anchors = torch.tensor(anchors,
+                                   device=m.anchors.device).type_as(m.anchors)
+            m.anchor_grid[:] = anchors.clone().view_as(
+                m.anchor_grid)  # for inference
             check_anchor_order(m)
-            m.anchors[:] = anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1)  # loss
-            print(f'{prefix}New anchors saved to model. Update model *.yaml to use these anchors in the future.')
+            m.anchors[:] = anchors.clone().view_as(m.anchors) / m.stride.to(
+                m.anchors.device).view(-1, 1, 1)  # loss
+            print(
+                f'{prefix}New anchors saved to model. Update model *.yaml to use these anchors in the future.'
+            )
         else:
-            print(f'{prefix}Original anchors better than new anchors. Proceeding with original anchors.')
+            print(
+                f'{prefix}Original anchors better than new anchors. Proceeding with original anchors.'
+            )
     print('')  # newline
 
 
-def kmean_anchors(path='./data/coco.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
-    """ Creates kmeans-evolved anchors from training dataset
+def kmean_anchors(path='./data/coco.yaml',
+                  n=9,
+                  img_size=640,
+                  thr=4.0,
+                  gen=1000,
+                  verbose=True):
+    """Creates kmeans-evolved anchors from training dataset.
 
-        Arguments:
-            path: path to dataset *.yaml, or a loaded dataset
-            n: number of anchors
-            img_size: image size used for training
-            thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
-            gen: generations to evolve anchors using genetic algorithm
-            verbose: print all results
+    Arguments:
+        path: path to dataset *.yaml, or a loaded dataset
+        n: number of anchors
+        img_size: image size used for training
+        thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
+        gen: generations to evolve anchors using genetic algorithm
+        verbose: print all results
 
-        Return:
-            k: kmeans evolved anchors
+    Return:
+        k: kmeans evolved anchors
 
-        Usage:
-            from utils.autoanchor import *; _ = kmean_anchors()
+    Usage:
+        from utils.autoanchor import *; _ = kmean_anchors()
     """
     thr = 1. / thr
     prefix = colorstr('autoanchor: ')
@@ -92,30 +116,41 @@ def anchor_fitness(k):  # mutation fitness
     def print_results(k):
         k = k[np.argsort(k.prod(1))]  # sort small to large
         x, best = metric(k, wh0)
-        bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n  # best possible recall, anch > thr
-        print(f'{prefix}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr')
-        print(f'{prefix}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, '
-              f'past_thr={x[x > thr].mean():.3f}-mean: ', end='')
+        bpr, aat = (best > thr).float().mean(), (
+            x > thr).float().mean() * n  # best possible recall, anch > thr
+        print(
+            f'{prefix}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr'
+        )
+        print(
+            f'{prefix}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, '
+            f'past_thr={x[x > thr].mean():.3f}-mean: ',
+            end='')
         for i, x in enumerate(k):
-            print('%i,%i' % (round(x[0]), round(x[1])), end=',  ' if i < len(k) - 1 else '\n')  # use in *.cfg
+            print('%i,%i' % (round(x[0]), round(x[1])),
+                  end=',  ' if i < len(k) - 1 else '\n')  # use in *.cfg
         return k
 
     if isinstance(path, str):  # *.yaml file
         with open(path) as f:
             data_dict = yaml.load(f, Loader=yaml.SafeLoader)  # model dict
         from utils.datasets import LoadImagesAndLabels
-        dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True)
+        dataset = LoadImagesAndLabels(data_dict['train'],
+                                      augment=True,
+                                      rect=True)
     else:
         dataset = path  # dataset
 
     # Get label wh
     shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
-    wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)])  # wh
+    wh0 = np.concatenate(
+        [l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)])  # wh
 
     # Filter
     i = (wh0 < 3.0).any(1).sum()
     if i:
-        print(f'{prefix}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.')
+        print(
+            f'{prefix}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.'
+        )
     wh = wh0[(wh0 >= 2.0).any(1)]  # filter > 2 pixels
     # wh = wh * (np.random.rand(wh.shape[0], 1) * 0.9 + 0.1)  # multiply by random scale 0-1
 
@@ -123,7 +158,9 @@ def print_results(k):
     print(f'{prefix}Running kmeans for {n} anchors on {len(wh)} points...')
     s = wh.std(0)  # sigmas for whitening
     k, dist = kmeans(wh / s, n, iter=30)  # points, mean distance
-    assert len(k) == n, print(f'{prefix}ERROR: scipy.cluster.vq.kmeans requested {n} points but returned only {len(k)}')
+    assert len(k) == n, print(
+        f'{prefix}ERROR: scipy.cluster.vq.kmeans requested {n} points but returned only {len(k)}'
+    )
     k *= s
     wh = torch.tensor(wh, dtype=torch.float32)  # filtered
     wh0 = torch.tensor(wh0, dtype=torch.float32)  # unfiltered
@@ -143,12 +180,17 @@ def print_results(k):
 
     # Evolve
     npr = np.random
-    f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1  # fitness, generations, mutation prob, sigma
-    pbar = tqdm(range(gen), desc=f'{prefix}Evolving anchors with Genetic Algorithm:')  # progress bar
+    f, sh, mp, s = anchor_fitness(
+        k), k.shape, 0.9, 0.1  # fitness, generations, mutation prob, sigma
+    pbar = tqdm(range(gen),
+                desc=f'{prefix}Evolving anchors with Genetic Algorithm:'
+                )  # progress bar
     for _ in pbar:
         v = np.ones(sh)
-        while (v == 1).all():  # mutate until a change occurs (prevent duplicates)
-            v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
+        while (v == 1
+               ).all():  # mutate until a change occurs (prevent duplicates)
+            v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s +
+                 1).clip(0.3, 3.0)
         kg = (k.copy() * v).clip(min=2.0)
         fg = anchor_fitness(kg)
         if fg > f:
diff --git a/utils/checkpoint.py b/utils/checkpoint.py
index 2b7fd82..b054659 100644
--- a/utils/checkpoint.py
+++ b/utils/checkpoint.py
@@ -1,7 +1,9 @@
 from collections import OrderedDict
 
+
 def weights_to_cpu(state_dict):
     """Copy a model state_dict to cpu.
+
     Args:
         state_dict (OrderedDict): Model weights on GPU.
     Returns:
diff --git a/utils/datasets.py b/utils/datasets.py
index 940e668..798bfed 100644
--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -16,24 +16,23 @@
 import numpy as np
 import torch
 import torch.nn.functional as F
-from PIL import Image, ExifTags
+from PIL import ExifTags, Image
 from torch.utils.data import Dataset
+# from pycocotools import mask as maskUtils
 from tqdm import tqdm
 
-import pickle
-from copy import deepcopy
-#from pycocotools import mask as maskUtils
-from torchvision.utils import save_image
-from torchvision.ops import roi_pool, roi_align, ps_roi_pool, ps_roi_align
-
-from utils.general import check_requirements, xyxy2xywh, xywh2xyxy, xywhn2xyxy, xyn2xy, segment2box, segments2boxes, \
-    resample_segments, clean_str
+from utils.general import (check_requirements, clean_str, resample_segments,
+                           segment2box, segments2boxes, xyn2xy, xywh2xyxy,
+                           xywhn2xyxy, xyxy2xywh)
 from utils.torch_utils import torch_distributed_zero_first
 
 # Parameters
 help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
-img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp', 'mpo']  # acceptable image suffixes
-vid_formats = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv']  # acceptable video suffixes
+img_formats = [
+    'bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp', 'mpo'
+]  # acceptable image suffixes
+vid_formats = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv',
+               'mkv']  # acceptable video suffixes
 logger = logging.getLogger(__name__)
 
 # Get orientation exif tag
@@ -56,30 +55,51 @@ def exif_size(img):
             s = (s[1], s[0])
         elif rotation == 8:  # rotation 90
             s = (s[1], s[0])
-    except:
+    except Exception:
         pass
 
     return s
 
 
-def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False,
-                      rank=-1, world_size=1, workers=8, image_weights=False, quad=False, prefix=''):
+def create_dataloader(path,
+                      imgsz,
+                      batch_size,
+                      stride,
+                      opt,
+                      hyp=None,
+                      augment=False,
+                      cache=False,
+                      pad=0.0,
+                      rect=False,
+                      rank=-1,
+                      world_size=1,
+                      workers=8,
+                      image_weights=False,
+                      quad=False,
+                      prefix=''):
     # Make sure only the first process in DDP process the dataset first, and the following others can use the cache
     with torch_distributed_zero_first(rank):
-        dataset = LoadImagesAndLabels(path, imgsz, batch_size,
-                                      augment=augment,  # augment images
-                                      hyp=hyp,  # augmentation hyperparameters
-                                      rect=rect,  # rectangular training
-                                      cache_images=cache,
-                                      single_cls=opt.single_cls,
-                                      stride=int(stride),
-                                      pad=pad,
-                                      image_weights=image_weights,
-                                      prefix=prefix)
+        dataset = LoadImagesAndLabels(
+            path,
+            imgsz,
+            batch_size,
+            augment=augment,  # augment images
+            hyp=hyp,  # augmentation hyperparameters
+            rect=rect,  # rectangular training
+            cache_images=cache,
+            single_cls=opt.single_cls,
+            stride=int(stride),
+            pad=pad,
+            image_weights=image_weights,
+            prefix=prefix)
 
     batch_size = min(batch_size, len(dataset))
-    nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers])  # number of workers
-    sampler = torch.utils.data.distributed.DistributedSampler(dataset) if rank != -1 else None
+    nw = min([
+        os.cpu_count() // world_size, batch_size if batch_size > 1 else 0,
+        workers
+    ])  # number of workers
+    sampler = torch.utils.data.distributed.DistributedSampler(
+        dataset) if rank != -1 else None
     loader = torch.utils.data.DataLoader if image_weights else InfiniteDataLoader
     # Use torch.utils.data.DataLoader() if dataset.properties will update during training else InfiniteDataLoader()
     dataloader = loader(dataset,
@@ -87,19 +107,21 @@ def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=Fa
                         num_workers=nw,
                         sampler=sampler,
                         pin_memory=True,
-                        collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn)
+                        collate_fn=LoadImagesAndLabels.collate_fn4
+                        if quad else LoadImagesAndLabels.collate_fn)
     return dataloader, dataset
 
 
 class InfiniteDataLoader(torch.utils.data.dataloader.DataLoader):
-    """ Dataloader that reuses workers
+    """Dataloader that reuses workers.
 
     Uses same syntax as vanilla DataLoader
     """
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler))
+        object.__setattr__(self, 'batch_sampler',
+                           _RepeatSampler(self.batch_sampler))
         self.iterator = super().__iter__()
 
     def __len__(self):
@@ -110,8 +132,8 @@ def __iter__(self):
             yield next(self.iterator)
 
 
-class _RepeatSampler(object):
-    """ Sampler that repeats forever
+class _RepeatSampler:
+    """Sampler that repeats forever.
 
     Args:
         sampler (Sampler)
@@ -126,6 +148,7 @@ def __iter__(self):
 
 
 class LoadImages:  # for inference
+
     def __init__(self, path, img_size=640, stride=32):
         p = str(Path(path).absolute())  # os-agnostic absolute path
         if '*' in p:
@@ -178,14 +201,16 @@ def __next__(self):
                     ret_val, img0 = self.cap.read()
 
             self.frame += 1
-            print(f'video {self.count + 1}/{self.nf} ({self.frame}/{self.nframes}) {path}: ', end='')
+            print(
+                f'video {self.count + 1}/{self.nf} ({self.frame}/{self.nframes}) {path}: ',
+                end='')
 
         else:
             # Read image
             self.count += 1
             img0 = cv2.imread(path)  # BGR
             assert img0 is not None, 'Image Not Found ' + path
-            #print(f'image {self.count}/{self.nf} {path}: ', end='')
+            # print(f'image {self.count}/{self.nf} {path}: ', end='')
 
         # Padded resize
         img = letterbox(img0, self.img_size, stride=self.stride)[0]
@@ -206,6 +231,7 @@ def __len__(self):
 
 
 class LoadWebcam:  # for inference
+
     def __init__(self, pipe='0', img_size=640, stride=32):
         self.img_size = img_size
         self.stride = stride
@@ -264,28 +290,34 @@ def __len__(self):
 
 
 class LoadStreams:  # multiple IP or RTSP cameras
+
     def __init__(self, sources='streams.txt', img_size=640, stride=32):
         self.mode = 'stream'
         self.img_size = img_size
         self.stride = stride
 
         if os.path.isfile(sources):
-            with open(sources, 'r') as f:
-                sources = [x.strip() for x in f.read().strip().splitlines() if len(x.strip())]
+            with open(sources) as f:
+                sources = [
+                    x.strip() for x in f.read().strip().splitlines()
+                    if len(x.strip())
+                ]
         else:
             sources = [sources]
 
         n = len(sources)
         self.imgs = [None] * n
-        self.sources = [clean_str(x) for x in sources]  # clean source names for later
+        self.sources = [clean_str(x)
+                        for x in sources]  # clean source names for later
         for i, s in enumerate(sources):
             # Start the thread to read frames from the video stream
             print(f'{i + 1}/{n}: {s}... ', end='')
             url = eval(s) if s.isnumeric() else s
-            if 'youtube.com/' in str(url) or 'youtu.be/' in str(url):  # if source is YouTube video
+            if 'youtube.com/' in str(url) or 'youtu.be/' in str(
+                    url):  # if source is YouTube video
                 check_requirements(('pafy', 'youtube_dl'))
                 import pafy
-                url = pafy.new(url).getbest(preftype="mp4").url
+                url = pafy.new(url).getbest(preftype='mp4').url
             cap = cv2.VideoCapture(url)
             assert cap.isOpened(), f'Failed to open {s}'
             w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
@@ -299,10 +331,16 @@ def __init__(self, sources='streams.txt', img_size=640, stride=32):
         print('')  # newline
 
         # check for common shapes
-        s = np.stack([letterbox(x, self.img_size, stride=self.stride)[0].shape for x in self.imgs], 0)  # shapes
-        self.rect = np.unique(s, axis=0).shape[0] == 1  # rect inference if all shapes equal
+        s = np.stack([
+            letterbox(x, self.img_size, stride=self.stride)[0].shape
+            for x in self.imgs
+        ], 0)  # shapes
+        self.rect = np.unique(
+            s, axis=0).shape[0] == 1  # rect inference if all shapes equal
         if not self.rect:
-            print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
+            print(
+                'WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.'
+            )
 
     def update(self, index, cap):
         # Read next stream frame in a daemon thread
@@ -329,13 +367,17 @@ def __next__(self):
             raise StopIteration
 
         # Letterbox
-        img = [letterbox(x, self.img_size, auto=self.rect, stride=self.stride)[0] for x in img0]
+        img = [
+            letterbox(x, self.img_size, auto=self.rect, stride=self.stride)[0]
+            for x in img0
+        ]
 
         # Stack
         img = np.stack(img, 0)
 
         # Convert
-        img = img[:, :, :, ::-1].transpose(0, 3, 1, 2)  # BGR to RGB, to bsx3x416x416
+        img = img[:, :, :, ::-1].transpose(0, 3, 1,
+                                           2)  # BGR to RGB, to bsx3x416x416
         img = np.ascontiguousarray(img)
 
         return self.sources, img, img0, None
@@ -347,12 +389,27 @@ def __len__(self):
 def img2label_paths(img_paths):
     # Define label paths as a function of image paths
     sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep  # /images/, /labels/ substrings
-    return ['txt'.join(x.replace(sa, sb, 1).rsplit(x.split('.')[-1], 1)) for x in img_paths]
+    return [
+        'txt'.join(x.replace(sa, sb, 1).rsplit(x.split('.')[-1], 1))
+        for x in img_paths
+    ]
 
 
 class LoadImagesAndLabels(Dataset):  # for training/testing
-    def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
-                 cache_images=False, single_cls=False, stride=32, pad=0.0, prefix=''):
+
+    def __init__(self,
+                 path,
+                 img_size=640,
+                 batch_size=16,
+                 augment=False,
+                 hyp=None,
+                 rect=False,
+                 image_weights=False,
+                 cache_images=False,
+                 single_cls=False,
+                 stride=32,
+                 pad=0.0,
+                 prefix=''):
         self.img_size = img_size
         self.augment = augment
         self.hyp = hyp
@@ -362,7 +419,7 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r
         self.mosaic_border = [-img_size // 2, -img_size // 2]
         self.stride = stride
         self.path = path
-        #self.albumentations = Albumentations() if augment else None
+        # self.albumentations = Albumentations() if augment else None
 
         try:
             f = []  # image files
@@ -372,35 +429,48 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r
                     f += glob.glob(str(p / '**' / '*.*'), recursive=True)
                     # f = list(p.rglob('**/*.*'))  # pathlib
                 elif p.is_file():  # file
-                    with open(p, 'r') as t:
+                    with open(p) as t:
                         t = t.read().strip().splitlines()
                         parent = str(p.parent) + os.sep
-                        f += [x.replace('./', parent) if x.startswith('./') else x for x in t]  # local to global path
+                        f += [
+                            x.replace('./', parent)
+                            if x.startswith('./') else x for x in t
+                        ]  # local to global path
                         # f += [p.parent / x.lstrip(os.sep) for x in t]  # local to global path (pathlib)
                 else:
                     raise Exception(f'{prefix}{p} does not exist')
-            self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in img_formats])
+            self.img_files = sorted([
+                x.replace('/', os.sep) for x in f
+                if x.split('.')[-1].lower() in img_formats
+            ])
             # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in img_formats])  # pathlib
             assert self.img_files, f'{prefix}No images found'
         except Exception as e:
-            raise Exception(f'{prefix}Error loading data from {path}: {e}\nSee {help_url}')
+            raise Exception(
+                f'{prefix}Error loading data from {path}: {e}\nSee {help_url}')
 
         # Check cache
         self.label_files = img2label_paths(self.img_files)  # labels
-        cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache')  # cached labels
+        cache_path = (p if p.is_file() else Path(
+            self.label_files[0]).parent).with_suffix('.cache')  # cached labels
         if cache_path.is_file():
             cache, exists = torch.load(cache_path), True  # load
-            #if cache['hash'] != get_hash(self.label_files + self.img_files) or 'version' not in cache:  # changed
-            #    cache, exists = self.cache_labels(cache_path, prefix), False  # re-cache
+            # if cache['hash'] != get_hash(self.label_files + self.img_files) or 'version' not in cache:  # changed
+            #     cache, exists = self.cache_labels(cache_path, prefix), False  # re-cache
         else:
-            cache, exists = self.cache_labels(cache_path, prefix), False  # cache
+            cache, exists = self.cache_labels(cache_path,
+                                              prefix), False  # cache
 
         # Display cache
-        nf, nm, ne, nc, n = cache.pop('results')  # found, missing, empty, corrupted, total
+        nf, nm, ne, nc, n = cache.pop(
+            'results')  # found, missing, empty, corrupted, total
         if exists:
             d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted"
-            tqdm(None, desc=prefix + d, total=n, initial=n)  # display cache results
-        assert nf > 0 or not augment, f'{prefix}No labels in {cache_path}. Can not train without labels. See {help_url}'
+            tqdm(None, desc=prefix + d, total=n,
+                 initial=n)  # display cache results
+        assert nf > 0 or not augment, (
+            f'{prefix}No labels in {cache_path}. Can not train without labels. See {help_url}'
+        )
 
         # Read cache
         cache.pop('hash')  # remove hash
@@ -443,18 +513,25 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r
                 elif mini > 1:
                     shapes[i] = [1, 1 / mini]
 
-            self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(np.int) * stride
+            self.batch_shapes = np.ceil(
+                np.array(shapes) * img_size / stride + pad).astype(
+                    np.int) * stride
 
         # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
         self.imgs = [None] * n
         if cache_images:
             if cache_images == 'disk':
-                self.im_cache_dir = Path(Path(self.img_files[0]).parent.as_posix() + '_npy')
-                self.img_npy = [self.im_cache_dir / Path(f).with_suffix('.npy').name for f in self.img_files]
+                self.im_cache_dir = Path(
+                    Path(self.img_files[0]).parent.as_posix() + '_npy')
+                self.img_npy = [
+                    self.im_cache_dir / Path(f).with_suffix('.npy').name
+                    for f in self.img_files
+                ]
                 self.im_cache_dir.mkdir(parents=True, exist_ok=True)
             gb = 0  # Gigabytes of cached images
             self.img_hw0, self.img_hw = [None] * n, [None] * n
-            results = ThreadPool(8).imap(lambda x: load_image(*x), zip(repeat(self), range(n)))
+            results = ThreadPool(8).imap(lambda x: load_image(*x),
+                                         zip(repeat(self), range(n)))
             pbar = tqdm(enumerate(results), total=n)
             for i, x in pbar:
                 if cache_images == 'disk':
@@ -471,7 +548,9 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''):
         # Cache dataset labels, check images and read shapes
         x = {}  # dict
         nm, nf, ne, nc = 0, 0, 0, 0  # number missing, found, empty, duplicate
-        pbar = tqdm(zip(self.img_files, self.label_files), desc='Scanning images', total=len(self.img_files))
+        pbar = tqdm(zip(self.img_files, self.label_files),
+                    desc='Scanning images',
+                    total=len(self.img_files))
         for i, (im_file, lb_file) in enumerate(pbar):
             try:
                 # verify images
@@ -479,41 +558,59 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''):
                 im.verify()  # PIL verify
                 shape = exif_size(im)  # image size
                 segments = []  # instance segments
-                assert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels'
-                assert im.format.lower() in img_formats, f'invalid image format {im.format}'
+                assert (shape[0] > 9) & (shape[1] >
+                                         9), f'image size {shape} <10 pixels'
+                assert im.format.lower(
+                ) in img_formats, f'invalid image format {im.format}'
 
                 # verify labels
                 if os.path.isfile(lb_file):
                     nf += 1  # label found
-                    with open(lb_file, 'r') as f:
-                        l = [x.split() for x in f.read().strip().splitlines()]
-                        if any([len(x) > 8 for x in l]):  # is segment
-                            classes = np.array([x[0] for x in l], dtype=np.float32)
-                            segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in l]  # (cls, xy1...)
-                            l = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1)  # (cls, xywh)
-                        l = np.array(l, dtype=np.float32)
-                    if len(l):
-                        assert l.shape[1] == 5, 'labels require 5 columns each'
-                        assert (l >= 0).all(), 'negative labels'
-                        assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels'
-                        assert np.unique(l, axis=0).shape[0] == l.shape[0], 'duplicate labels'
+                    with open(lb_file) as f:
+                        cur_l = [
+                            x.split() for x in f.read().strip().splitlines()
+                        ]
+                        if any([len(x) > 8 for x in cur_l]):  # is segment
+                            classes = np.array([x[0] for x in cur_l],
+                                               dtype=np.float32)
+                            segments = [
+                                np.array(x[1:],
+                                         dtype=np.float32).reshape(-1, 2)
+                                for x in cur_l
+                            ]  # (cls, xy1...)
+                            cur_l = np.concatenate((classes.reshape(
+                                -1, 1), segments2boxes(segments)),
+                                                   1)  # (cls, xywh)
+                        cur_l = np.array(cur_l, dtype=np.float32)
+                    if len(cur_l):
+                        assert cur_l.shape[
+                            1] == 5, 'labels require 5 columns each'
+                        assert (cur_l >= 0).all(), 'negative labels'
+                        assert (cur_l[:, 1:] <= 1).all(
+                        ), 'non-normalized or out of bounds coordinate labels'
+                        assert np.unique(
+                            cur_l, axis=0
+                        ).shape[0] == cur_l.shape[0], 'duplicate labels'
                     else:
                         ne += 1  # label empty
-                        l = np.zeros((0, 5), dtype=np.float32)
+                        cur_l = np.zeros((0, 5), dtype=np.float32)
                 else:
                     nm += 1  # label missing
-                    l = np.zeros((0, 5), dtype=np.float32)
-                x[im_file] = [l, shape, segments]
+                    cur_l = np.zeros((0, 5), dtype=np.float32)
+                x[im_file] = [cur_l, shape, segments]
             except Exception as e:
                 nc += 1
-                print(f'{prefix}WARNING: Ignoring corrupted image and/or label {im_file}: {e}')
+                print(
+                    f'{prefix}WARNING: Ignoring corrupted image and/or label {im_file}: {e}'
+                )
 
             pbar.desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels... " \
-                        f"{nf} found, {nm} missing, {ne} empty, {nc} corrupted"
+                        f'{nf} found, {nm} missing, {ne} empty, {nc} corrupted'
         pbar.close()
 
         if nf == 0:
-            print(f'{prefix}WARNING: No labels found in {path}. See {help_url}')
+            print(
+                f'{prefix}WARNING: No labels found in {path}. See {help_url}')
 
         x['hash'] = get_hash(self.label_files + self.img_files)
         x['results'] = nf, nm, ne, nc, i + 1
@@ -547,9 +644,13 @@ def __getitem__(self, index):
             # MixUp https://arxiv.org/pdf/1710.09412.pdf
             if random.random() < hyp['mixup']:
                 if random.random() < 0.8:
-                    img2, labels2 = load_mosaic(self, random.randint(0, len(self.labels) - 1))
+                    img2, labels2 = load_mosaic(
+                        self, random.randint(0,
+                                             len(self.labels) - 1))
                 else:
-                    img2, labels2 = load_mosaic9(self, random.randint(0, len(self.labels) - 1))
+                    img2, labels2 = load_mosaic9(
+                        self, random.randint(0,
+                                             len(self.labels) - 1))
                 r = np.random.beta(8.0, 8.0)  # mixup ratio, alpha=beta=8.0
                 img = (img * r + img2 * (1 - r)).astype(np.uint8)
                 labels = np.concatenate((labels, labels2), 0)
@@ -559,29 +660,42 @@ def __getitem__(self, index):
             img, (h0, w0), (h, w) = load_image(self, index)
 
             # Letterbox
-            shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size  # final letterboxed shape
-            img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
-            shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling
+            shape = self.batch_shapes[self.batch[
+                index]] if self.rect else self.img_size  # final letterboxed shape
+            img, ratio, pad = letterbox(img,
+                                        shape,
+                                        auto=False,
+                                        scaleup=self.augment)
+            shapes = (h0, w0), (
+                (h / h0, w / w0), pad)  # for COCO mAP rescaling
 
             labels = self.labels[index].copy()
             if labels.size:  # normalized xywh to pixel xyxy format
-                labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
+                labels[:, 1:] = xywhn2xyxy(labels[:, 1:],
+                                           ratio[0] * w,
+                                           ratio[1] * h,
+                                           padw=pad[0],
+                                           padh=pad[1])
 
         if self.augment:
             # Augment imagespace
             if not mosaic:
-                img, labels = random_perspective(img, labels,
-                                                 degrees=hyp['degrees'],
-                                                 translate=hyp['translate'],
-                                                 scale=hyp['scale'],
-                                                 shear=hyp['shear'],
-                                                 perspective=hyp['perspective'])
-
+                img, labels = random_perspective(
+                    img,
+                    labels,
+                    degrees=hyp['degrees'],
+                    translate=hyp['translate'],
+                    scale=hyp['scale'],
+                    shear=hyp['shear'],
+                    perspective=hyp['perspective'])
 
-            #img, labels = self.albumentations(img, labels)
+            # img, labels = self.albumentations(img, labels)
 
             # Augment colorspace
-            augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
+            augment_hsv(img,
+                        hgain=hyp['hsv_h'],
+                        sgain=hyp['hsv_s'],
+                        vgain=hyp['hsv_v'])
 
             # Apply cutouts
             # if random.random() < 0.9:
@@ -590,14 +704,17 @@ def __getitem__(self, index):
             if random.random() < hyp['paste_in']:
                 sample_labels, sample_images, sample_masks = [], [], []
                 while len(sample_labels) < 30:
-                    sample_labels_, sample_images_, sample_masks_ = load_samples(self, random.randint(0, len(self.labels) - 1))
+                    sample_labels_, sample_images_, sample_masks_ = load_samples(
+                        self, random.randint(0,
+                                             len(self.labels) - 1))
                     sample_labels += sample_labels_
                     sample_images += sample_images_
                     sample_masks += sample_masks_
-                    #print(len(sample_labels))
+                    # print(len(sample_labels))
                     if len(sample_labels) == 0:
                         break
-                labels = pastein(img, labels, sample_labels, sample_images, sample_masks)
+                labels = pastein(img, labels, sample_labels, sample_images,
+                                 sample_masks)
 
         nL = len(labels)  # number of labels
         if nL:
@@ -631,8 +748,8 @@ def __getitem__(self, index):
     @staticmethod
     def collate_fn(batch):
         img, label, path, shapes = zip(*batch)  # transposed
-        for i, l in enumerate(label):
-            l[:, 0] = i  # add target image index for build_targets()
+        for i, cur_l in enumerate(label):
+            cur_l[:, 0] = i  # add target image index for build_targets()
         return torch.stack(img, 0), torch.cat(label, 0), path, shapes
 
     @staticmethod
@@ -647,22 +764,28 @@ def collate_fn4(batch):
         for i in range(n):  # zidane torch.zeros(16,3,720,1280)  # BCHW
             i *= 4
             if random.random() < 0.5:
-                im = F.interpolate(img[i].unsqueeze(0).float(), scale_factor=2., mode='bilinear', align_corners=False)[
-                    0].type(img[i].type())
-                l = label[i]
+                im = F.interpolate(img[i].unsqueeze(0).float(),
+                                   scale_factor=2.,
+                                   mode='bilinear',
+                                   align_corners=False)[0].type(img[i].type())
+                cur_l = label[i]
             else:
-                im = torch.cat((torch.cat((img[i], img[i + 1]), 1), torch.cat((img[i + 2], img[i + 3]), 1)), 2)
-                l = torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo), 0) * s
+                im = torch.cat(
+                    (torch.cat((img[i], img[i + 1]),
+                               1), torch.cat((img[i + 2], img[i + 3]), 1)), 2)
+                cur_l = torch.cat(
+                    (label[i], label[i + 1] + ho, label[i + 2] + wo,
+                     label[i + 3] + ho + wo), 0) * s
             img4.append(im)
-            label4.append(l)
+            label4.append(cur_l)
 
-        for i, l in enumerate(label4):
-            l[:, 0] = i  # add target image index for build_targets()
+        for i, cur_l in enumerate(label4):
+            cur_l[:, 0] = i  # add target image index for build_targets()
 
         return torch.stack(img4, 0), torch.cat(label4, 0), path4, shapes4
 
 
-# Ancillary functions --------------------------------------------------------------------------------------------------
+# Ancillary functions ------------------------------------------------------------------------------------------------
 def load_image(self, index):
     # loads 1 image from dataset, returns img, original hw, resized hw
     img = self.imgs[index]
@@ -674,10 +797,12 @@ def load_image(self, index):
         r = self.img_size / max(h0, w0)  # resize image to img_size
         if r != 1:  # always resize down, only resize up if training with augmentation
             interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
-            img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
+            img = cv2.resize(img, (int(w0 * r), int(h0 * r)),
+                             interpolation=interp)
         return img, (h0, w0), img.shape[:2]  # img, hw_original, hw_resized
     else:
-        return self.imgs[index], self.img_hw0[index], self.img_hw[index]  # img, hw_original, hw_resized
+        return self.imgs[index], self.img_hw0[index], self.img_hw[
+            index]  # img, hw_original, hw_resized
 
 
 def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
@@ -690,7 +815,8 @@ def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
     lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
     lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
 
-    img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
+    img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat),
+                         cv2.LUT(val, lut_val))).astype(dtype)
     cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)  # no return needed
 
 
@@ -701,8 +827,10 @@ def hist_equalize(img, clahe=True, bgr=False):
         c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
         yuv[:, :, 0] = c.apply(yuv[:, :, 0])
     else:
-        yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0])  # equalize Y channel histogram
-    return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB)  # convert YUV image to RGB
+        yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :,
+                                            0])  # equalize Y channel histogram
+    return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else
+                        cv2.COLOR_YUV2RGB)  # convert YUV image to RGB
 
 
 def load_mosaic(self, index):
@@ -710,17 +838,22 @@ def load_mosaic(self, index):
 
     labels4, segments4 = [], []
     s = self.img_size
-    yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border]  # mosaic center x, y
-    indices = [index] + random.choices(self.indices, k=3)  # 3 additional image indices
+    yc, xc = (int(random.uniform(-x, 2 * s + x))
+              for x in self.mosaic_border)  # mosaic center x, y
+    indices = [index] + random.choices(self.indices,
+                                       k=3)  # 3 additional image indices
     for i, index in enumerate(indices):
         # Load image
         img, _, (h, w) = load_image(self, index)
 
         # place img in img4
         if i == 0:  # top left
-            img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
-            x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
-            x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
+            img4 = np.full((s * 2, s * 2, img.shape[2]), 114,
+                           dtype=np.uint8)  # base image with 4 tiles
+            x1a, y1a, x2a, y2a = max(xc - w, 0), max(
+                yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
+            x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (
+                y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
         elif i == 1:  # top right
             x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
             x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
@@ -731,14 +864,18 @@ def load_mosaic(self, index):
             x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
             x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
 
-        img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
+        img4[y1a:y2a, x1a:x2a] = img[y1b:y2b,
+                                     x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
         padw = x1a - x1b
         padh = y1a - y1b
 
         # Labels
-        labels, segments = self.labels[index].copy(), self.segments[index].copy()
+        labels, segments = self.labels[index].copy(
+        ), self.segments[index].copy()
         if labels.size:
-            labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh)  # normalized xywh to pixel xyxy format
+            labels[:, 1:] = xywhn2xyxy(
+                labels[:, 1:], w, h, padw,
+                padh)  # normalized xywh to pixel xyxy format
             segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
         labels4.append(labels)
         segments4.extend(segments)
@@ -750,16 +887,22 @@ def load_mosaic(self, index):
     # img4, labels4 = replicate(img4, labels4)  # replicate
 
     # Augment
-    #img4, labels4, segments4 = remove_background(img4, labels4, segments4)
-    #sample_segments(img4, labels4, segments4, probability=self.hyp['copy_paste'])
-    img4, labels4, segments4 = copy_paste(img4, labels4, segments4, probability=self.hyp['copy_paste'])
-    img4, labels4 = random_perspective(img4, labels4, segments4,
-                                       degrees=self.hyp['degrees'],
-                                       translate=self.hyp['translate'],
-                                       scale=self.hyp['scale'],
-                                       shear=self.hyp['shear'],
-                                       perspective=self.hyp['perspective'],
-                                       border=self.mosaic_border)  # border to remove
+    # img4, labels4, segments4 = remove_background(img4, labels4, segments4)
+    # sample_segments(img4, labels4, segments4, probability=self.hyp['copy_paste'])
+    img4, labels4, segments4 = copy_paste(img4,
+                                          labels4,
+                                          segments4,
+                                          probability=self.hyp['copy_paste'])
+    img4, labels4 = random_perspective(
+        img4,
+        labels4,
+        segments4,
+        degrees=self.hyp['degrees'],
+        translate=self.hyp['translate'],
+        scale=self.hyp['scale'],
+        shear=self.hyp['shear'],
+        perspective=self.hyp['perspective'],
+        border=self.mosaic_border)  # border to remove
 
     return img4, labels4
 
@@ -769,14 +912,17 @@ def load_mosaic9(self, index):
 
     labels9, segments9 = [], []
     s = self.img_size
-    indices = [index] + random.choices(self.indices, k=8)  # 8 additional image indices
+    indices = [index] + random.choices(self.indices,
+                                       k=8)  # 8 additional image indices
+    wp = hp = None
     for i, index in enumerate(indices):
         # Load image
         img, _, (h, w) = load_image(self, index)
 
         # place img in img9
         if i == 0:  # center
-            img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
+            img9 = np.full((s * 3, s * 3, img.shape[2]), 114,
+                           dtype=np.uint8)  # base image with 4 tiles
             h0, w0 = h, w
             c = s, s, s + w, s + h  # xmin, ymin, xmax, ymax (base) coordinates
         elif i == 1:  # top
@@ -797,22 +943,27 @@ def load_mosaic9(self, index):
             c = s - w, s + h0 - hp - h, s, s + h0 - hp
 
         padx, pady = c[:2]
-        x1, y1, x2, y2 = [max(x, 0) for x in c]  # allocate coords
+        x1, y1, x2, y2 = (max(x, 0) for x in c)  # allocate coords
 
         # Labels
-        labels, segments = self.labels[index].copy(), self.segments[index].copy()
+        labels, segments = self.labels[index].copy(
+        ), self.segments[index].copy()
         if labels.size:
-            labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady)  # normalized xywh to pixel xyxy format
+            labels[:, 1:] = xywhn2xyxy(
+                labels[:, 1:], w, h, padx,
+                pady)  # normalized xywh to pixel xyxy format
             segments = [xyn2xy(x, w, h, padx, pady) for x in segments]
         labels9.append(labels)
         segments9.extend(segments)
 
         # Image
-        img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:]  # img9[ymin:ymax, xmin:xmax]
+        img9[y1:y2, x1:x2] = img[y1 - pady:,
+                                 x1 - padx:]  # img9[ymin:ymax, xmin:xmax]
         hp, wp = h, w  # height, width previous
 
     # Offset
-    yc, xc = [int(random.uniform(0, s)) for _ in self.mosaic_border]  # mosaic center x, y
+    yc, xc = (int(random.uniform(0, s))
+              for _ in self.mosaic_border)  # mosaic center x, y
     img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s]
 
     # Concat/clip labels
@@ -827,15 +978,21 @@ def load_mosaic9(self, index):
     # img9, labels9 = replicate(img9, labels9)  # replicate
 
     # Augment
-    #img9, labels9, segments9 = remove_background(img9, labels9, segments9)
-    img9, labels9, segments9 = copy_paste(img9, labels9, segments9, probability=self.hyp['copy_paste'])
-    img9, labels9 = random_perspective(img9, labels9, segments9,
-                                       degrees=self.hyp['degrees'],
-                                       translate=self.hyp['translate'],
-                                       scale=self.hyp['scale'],
-                                       shear=self.hyp['shear'],
-                                       perspective=self.hyp['perspective'],
-                                       border=self.mosaic_border)  # border to remove
+    # img9, labels9, segments9 = remove_background(img9, labels9, segments9)
+    img9, labels9, segments9 = copy_paste(img9,
+                                          labels9,
+                                          segments9,
+                                          probability=self.hyp['copy_paste'])
+    img9, labels9 = random_perspective(
+        img9,
+        labels9,
+        segments9,
+        degrees=self.hyp['degrees'],
+        translate=self.hyp['translate'],
+        scale=self.hyp['scale'],
+        shear=self.hyp['shear'],
+        perspective=self.hyp['perspective'],
+        border=self.mosaic_border)  # border to remove
 
     return img9, labels9
 
@@ -845,17 +1002,22 @@ def load_samples(self, index):
 
     labels4, segments4 = [], []
     s = self.img_size
-    yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border]  # mosaic center x, y
-    indices = [index] + random.choices(self.indices, k=3)  # 3 additional image indices
+    yc, xc = (int(random.uniform(-x, 2 * s + x))
+              for x in self.mosaic_border)  # mosaic center x, y
+    indices = [index] + random.choices(self.indices,
+                                       k=3)  # 3 additional image indices
     for i, index in enumerate(indices):
         # Load image
         img, _, (h, w) = load_image(self, index)
 
         # place img in img4
         if i == 0:  # top left
-            img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
-            x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
-            x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
+            img4 = np.full((s * 2, s * 2, img.shape[2]), 114,
+                           dtype=np.uint8)  # base image with 4 tiles
+            x1a, y1a, x2a, y2a = max(xc - w, 0), max(
+                yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
+            x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (
+                y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
         elif i == 1:  # top right
             x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
             x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
@@ -866,14 +1028,18 @@ def load_samples(self, index):
             x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
             x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
 
-        img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
+        img4[y1a:y2a, x1a:x2a] = img[y1b:y2b,
+                                     x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
         padw = x1a - x1b
         padh = y1a - y1b
 
         # Labels
-        labels, segments = self.labels[index].copy(), self.segments[index].copy()
+        labels, segments = self.labels[index].copy(
+        ), self.segments[index].copy()
         if labels.size:
-            labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh)  # normalized xywh to pixel xyxy format
+            labels[:, 1:] = xywhn2xyxy(
+                labels[:, 1:], w, h, padw,
+                padh)  # normalized xywh to pixel xyxy format
             segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
         labels4.append(labels)
         segments4.extend(segments)
@@ -885,8 +1051,9 @@ def load_samples(self, index):
     # img4, labels4 = replicate(img4, labels4)  # replicate
 
     # Augment
-    #img4, labels4, segments4 = remove_background(img4, labels4, segments4)
-    sample_labels, sample_images, sample_masks = sample_segments(img4, labels4, segments4, probability=0.5)
+    # img4, labels4, segments4 = remove_background(img4, labels4, segments4)
+    sample_labels, sample_images, sample_masks = sample_segments(
+        img4, labels4, segments4, probability=0.5)
 
     return sample_labels, sample_images, sample_masks
 
@@ -898,13 +1065,14 @@ def copy_paste(img, labels, segments, probability=0.5):
         h, w, c = img.shape  # height, width, channels
         im_new = np.zeros(img.shape, np.uint8)
         for j in random.sample(range(n), k=round(probability * n)):
-            l, s = labels[j], segments[j]
-            box = w - l[3], l[2], w - l[1], l[4]
+            cur_l, s = labels[j], segments[j]
+            box = w - cur_l[3], cur_l[2], w - cur_l[1], cur_l[4]
             ioa = bbox_ioa(box, labels[:, 1:5])  # intersection over area
             if (ioa < 0.30).all():  # allow 30% obscuration of existing labels
-                labels = np.concatenate((labels, [[l[0], *box]]), 0)
+                labels = np.concatenate((labels, [[cur_l[0], *box]]), 0)
                 segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1))
-                cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED)
+                cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1,
+                                 (255, 255, 255), cv2.FILLED)
 
         result = cv2.bitwise_and(src1=img, src2=im_new)
         result = cv2.flip(result, 1)  # augment segments (flip left-right)
@@ -922,7 +1090,8 @@ def remove_background(img, labels, segments):
     im_new = np.zeros(img.shape, np.uint8)
     img_new = np.ones(img.shape, np.uint8) * 114
     for j in range(n):
-        cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED)
+        cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1,
+                         (255, 255, 255), cv2.FILLED)
 
         result = cv2.bitwise_and(src1=img, src2=im_new)
 
@@ -941,25 +1110,29 @@ def sample_segments(img, labels, segments, probability=0.5):
     if probability and n:
         h, w, c = img.shape  # height, width, channels
         for j in random.sample(range(n), k=round(probability * n)):
-            l, s = labels[j], segments[j]
-            box = l[1].astype(int).clip(0,w-1), l[2].astype(int).clip(0,h-1), l[3].astype(int).clip(0,w-1), l[4].astype(int).clip(0,h-1)
+            cur_l, _ = labels[j], segments[j]
+            box = cur_l[1].astype(int).clip(
+                0, w - 1), cur_l[2].astype(int).clip(
+                    0, h - 1), cur_l[3].astype(int).clip(
+                        0, w - 1), cur_l[4].astype(int).clip(0, h - 1)
 
-            #print(box)
+            # print(box)
             if (box[2] <= box[0]) or (box[3] <= box[1]):
                 continue
 
-            sample_labels.append(l[0])
+            sample_labels.append(cur_l[0])
 
             mask = np.zeros(img.shape, np.uint8)
 
-            cv2.drawContours(mask, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED)
-            sample_masks.append(mask[box[1]:box[3],box[0]:box[2],:])
+            cv2.drawContours(mask, [segments[j].astype(np.int32)], -1,
+                             (255, 255, 255), cv2.FILLED)
+            sample_masks.append(mask[box[1]:box[3], box[0]:box[2], :])
 
             result = cv2.bitwise_and(src1=img, src2=mask)
             i = result > 0  # pixels to replace
             mask[i] = result[i]  # cv2.imwrite('debug.jpg', img)  # debug
-            #print(box)
-            sample_images.append(mask[box[1]:box[3],box[0]:box[2],:])
+            # print(box)
+            sample_images.append(mask[box[1]:box[3], box[0]:box[2], :])
 
     return sample_labels, sample_images, sample_masks
 
@@ -973,15 +1146,24 @@ def replicate(img, labels):
     for i in s.argsort()[:round(s.size * 0.5)]:  # smallest indices
         x1b, y1b, x2b, y2b = boxes[i]
         bh, bw = y2b - y1b, x2b - x1b
-        yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw))  # offset x, y
+        yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(
+            0, w - bw))  # offset x, y
         x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
-        img[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
-        labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
+        img[y1a:y2a, x1a:x2a] = img[y1b:y2b,
+                                    x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
+        labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]],
+                           axis=0)
 
     return img, labels
 
 
-def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
+def letterbox(img,
+              new_shape=(640, 640),
+              color=(114, 114, 114),
+              auto=True,
+              scaleFill=False,
+              scaleup=True,
+              stride=32):
     # Resize and pad image while meeting stride-multiple constraints
     shape = img.shape[:2]  # current shape [height, width]
     if isinstance(new_shape, int):
@@ -995,13 +1177,15 @@ def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scale
     # Compute padding
     ratio = r, r  # width, height ratios
     new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
-    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[
+        1]  # wh padding
     if auto:  # minimum rectangle
         dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
     elif scaleFill:  # stretch
         dw, dh = 0.0, 0.0
         new_unpad = (new_shape[1], new_shape[0])
-        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios
+        ratio = new_shape[1] / shape[1], new_shape[0] / shape[
+            0]  # width, height ratios
 
     dw /= 2  # divide padding into 2 sides
     dh /= 2
@@ -1010,11 +1194,24 @@ def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scale
         img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
     top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
     left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
-    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
+    img = cv2.copyMakeBorder(img,
+                             top,
+                             bottom,
+                             left,
+                             right,
+                             cv2.BORDER_CONSTANT,
+                             value=color)  # add border
     return img, ratio, (dw, dh)
 
 
-def random_perspective(img, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0,
+def random_perspective(img,
+                       targets=(),
+                       segments=(),
+                       degrees=10,
+                       translate=.1,
+                       scale=.1,
+                       shear=10,
+                       perspective=0.0,
                        border=(0, 0)):
     # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
     # targets = [cls, xyxy]
@@ -1029,8 +1226,10 @@ def random_perspective(img, targets=(), segments=(), degrees=10, translate=.1, s
 
     # Perspective
     P = np.eye(3)
-    P[2, 0] = random.uniform(-perspective, perspective)  # x perspective (about y)
-    P[2, 1] = random.uniform(-perspective, perspective)  # y perspective (about x)
+    P[2, 0] = random.uniform(-perspective,
+                             perspective)  # x perspective (about y)
+    P[2, 1] = random.uniform(-perspective,
+                             perspective)  # y perspective (about x)
 
     # Rotation and Scale
     R = np.eye(3)
@@ -1042,21 +1241,32 @@ def random_perspective(img, targets=(), segments=(), degrees=10, translate=.1, s
 
     # Shear
     S = np.eye(3)
-    S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)
-    S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)
+    S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi /
+                       180)  # x shear (deg)
+    S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi /
+                       180)  # y shear (deg)
 
     # Translation
     T = np.eye(3)
-    T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width  # x translation (pixels)
-    T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height  # y translation (pixels)
+    T[0, 2] = random.uniform(0.5 - translate,
+                             0.5 + translate) * width  # x translation (pixels)
+    T[1, 2] = random.uniform(
+        0.5 - translate, 0.5 + translate) * height  # y translation (pixels)
 
     # Combined rotation matrix
     M = T @ S @ R @ P @ C  # order of operations (right to left) is IMPORTANT
-    if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any():  # image changed
+    if (border[0] != 0) or (border[1] !=
+                            0) or (M != np.eye(3)).any():  # image changed
         if perspective:
-            img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(114, 114, 114))
+            img = cv2.warpPerspective(img,
+                                      M,
+                                      dsize=(width, height),
+                                      borderValue=(114, 114, 114))
         else:  # affine
-            img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
+            img = cv2.warpAffine(img,
+                                 M[:2],
+                                 dsize=(width, height),
+                                 borderValue=(114, 114, 114))
 
     # Visualize
     # import matplotlib.pyplot as plt
@@ -1075,40 +1285,56 @@ def random_perspective(img, targets=(), segments=(), degrees=10, translate=.1, s
                 xy = np.ones((len(segment), 3))
                 xy[:, :2] = segment
                 xy = xy @ M.T  # transform
-                xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]  # perspective rescale or affine
+                xy = xy[:, :
+                        2] / xy[:, 2:
+                                3] if perspective else xy[:, :
+                                                          2]  # perspective rescale or affine
 
                 # clip
                 new[i] = segment2box(xy, width, height)
 
         else:  # warp boxes
             xy = np.ones((n * 4, 3))
-            xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
+            xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(
+                n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
             xy = xy @ M.T  # transform
-            xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8)  # perspective rescale or affine
+            xy = (xy[:, :2] /
+                  xy[:, 2:3] if perspective else xy[:, :2]).reshape(
+                      n, 8)  # perspective rescale or affine
 
             # create new boxes
             x = xy[:, [0, 2, 4, 6]]
             y = xy[:, [1, 3, 5, 7]]
-            new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
+            new = np.concatenate(
+                (x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
 
             # clip
             new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
             new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
 
         # filter candidates
-        i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)
+        i = box_candidates(box1=targets[:, 1:5].T * s,
+                           box2=new.T,
+                           area_thr=0.01 if use_segments else 0.10)
         targets = targets[i]
         targets[:, 1:5] = new[i]
 
     return img, targets
 
 
-def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16):  # box1(4,n), box2(4,n)
+def box_candidates(box1,
+                   box2,
+                   wh_thr=2,
+                   ar_thr=20,
+                   area_thr=0.1,
+                   eps=1e-16):  # box1(4,n), box2(4,n)
     # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
     w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
     w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
     ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps))  # aspect ratio
-    return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr)  # candidates
+    return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 /
+                                            (w1 * h1 + eps) > area_thr) & (
+                                                ar < ar_thr)  # candidates
 
 
 def bbox_ioa(box1, box2):
@@ -1135,7 +1361,9 @@ def cutout(image, labels):
     h, w = image.shape[:2]
 
     # create random masks
-    scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16  # image size fraction
+    scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [
+        0.03125
+    ] * 16  # image size fraction
     for s in scales:
         mask_h = random.randint(1, int(h * s))
         mask_w = random.randint(1, int(w * s))
@@ -1147,7 +1375,8 @@ def cutout(image, labels):
         ymax = min(h, ymin + mask_h)
 
         # apply random color mask
-        image[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
+        image[ymin:ymax,
+              xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
 
         # return unobscured labels
         if len(labels) and s > 0.03:
@@ -1163,7 +1392,9 @@ def pastein(image, labels, sample_labels, sample_images, sample_masks):
     h, w = image.shape[:2]
 
     # create random masks
-    scales = [0.75] * 2 + [0.5] * 4 + [0.25] * 4 + [0.125] * 4 + [0.0625] * 6  # image size fraction
+    scales = [0.75] * 2 + [0.5] * 4 + [0.25] * 4 + [0.125] * 4 + [
+        0.0625
+    ] * 6  # image size fraction
     for s in scales:
         if random.random() < 0.2:
             continue
@@ -1182,39 +1413,44 @@ def pastein(image, labels, sample_labels, sample_images, sample_masks):
         else:
             ioa = np.zeros(1)
 
-        if (ioa < 0.30).all() and len(sample_labels) and (xmax > xmin+20) and (ymax > ymin+20):  # allow 30% obscuration of existing labels
-            sel_ind = random.randint(0, len(sample_labels)-1)
-            #print(len(sample_labels))
-            #print(sel_ind)
-            #print((xmax-xmin, ymax-ymin))
-            #print(image[ymin:ymax, xmin:xmax].shape)
-            #print([[sample_labels[sel_ind], *box]])
-            #print(labels.shape)
+        if (ioa < 0.30
+            ).all() and len(sample_labels) and (xmax > xmin + 20) and (
+                ymax > ymin + 20):  # allow 30% obscuration of existing labels
+            sel_ind = random.randint(0, len(sample_labels) - 1)
+            # print(len(sample_labels))
+            # print(sel_ind)
+            # print((xmax-xmin, ymax-ymin))
+            # print(image[ymin:ymax, xmin:xmax].shape)
+            # print([[sample_labels[sel_ind], *box]])
+            # print(labels.shape)
             hs, ws, cs = sample_images[sel_ind].shape
-            r_scale = min((ymax-ymin)/hs, (xmax-xmin)/ws)
-            r_w = int(ws*r_scale)
-            r_h = int(hs*r_scale)
+            r_scale = min((ymax - ymin) / hs, (xmax - xmin) / ws)
+            r_w = int(ws * r_scale)
+            r_h = int(hs * r_scale)
 
             if (r_w > 10) and (r_h > 10):
                 r_mask = cv2.resize(sample_masks[sel_ind], (r_w, r_h))
                 r_image = cv2.resize(sample_images[sel_ind], (r_w, r_h))
-                temp_crop = image[ymin:ymin+r_h, xmin:xmin+r_w]
+                temp_crop = image[ymin:ymin + r_h, xmin:xmin + r_w]
                 m_ind = r_mask > 0
                 if m_ind.astype(np.int).sum() > 60:
                     temp_crop[m_ind] = r_image[m_ind]
-                    #print(sample_labels[sel_ind])
-                    #print(sample_images[sel_ind].shape)
-                    #print(temp_crop.shape)
-                    box = np.array([xmin, ymin, xmin+r_w, ymin+r_h], dtype=np.float32)
+                    # print(sample_labels[sel_ind])
+                    # print(sample_images[sel_ind].shape)
+                    # print(temp_crop.shape)
+                    box = np.array([xmin, ymin, xmin + r_w, ymin + r_h],
+                                   dtype=np.float32)
                     if len(labels):
-                        labels = np.concatenate((labels, [[sample_labels[sel_ind], *box]]), 0)
+                        labels = np.concatenate(
+                            (labels, [[sample_labels[sel_ind], *box]]), 0)
                     else:
                         labels = np.array([[sample_labels[sel_ind], *box]])
 
-                    image[ymin:ymin+r_h, xmin:xmin+r_w] = temp_crop
+                    image[ymin:ymin + r_h, xmin:xmin + r_w] = temp_crop
 
     return labels
 
+
 class Albumentations:
     # YOLOv5 Albumentations class (optional, only used if package is installed)
     def __init__(self):
@@ -1223,20 +1459,27 @@ def __init__(self):
 
         self.transform = A.Compose([
             A.CLAHE(p=0.01),
-            A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.01),
+            A.RandomBrightnessContrast(
+                brightness_limit=0.2, contrast_limit=0.2, p=0.01),
             A.RandomGamma(gamma_limit=[80, 120], p=0.01),
             A.Blur(p=0.01),
             A.MedianBlur(p=0.01),
             A.ToGray(p=0.01),
-            A.ImageCompression(quality_lower=75, p=0.01),],
-            bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))
+            A.ImageCompression(quality_lower=75, p=0.01),
+        ],
+                                   bbox_params=A.BboxParams(
+                                       format='pascal_voc',
+                                       label_fields=['class_labels']))
 
-            #logging.info(colorstr('albumentations: ') + ', '.join(f'{x}' for x in self.transform.transforms if x.p))
+        # logging.info(colorstr('albumentations: ') + ', '.join(f'{x}' for x in self.transform.transforms if x.p))
 
     def __call__(self, im, labels, p=1.0):
         if self.transform and random.random() < p:
-            new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0])  # transformed
-            im, labels = new['image'], np.array([[c, *b] for c, b in zip(new['class_labels'], new['bboxes'])])
+            new = self.transform(image=im,
+                                 bboxes=labels[:, 1:],
+                                 class_labels=labels[:, 0])  # transformed
+            im, labels = new['image'], np.array(
+                [[c, *b] for c, b in zip(new['class_labels'], new['bboxes'])])
         return im, labels
 
 
@@ -1255,11 +1498,14 @@ def flatten_recursive(path='../coco'):
         shutil.copyfile(file, new_path / Path(file).name)
 
 
-def extract_boxes(path='../coco/'):  # from utils.datasets import *; extract_boxes('../coco128')
+def extract_boxes(
+    path='../coco/'
+):  # from utils.datasets import *; extract_boxes('../coco128')
     # Convert detection dataset into classification dataset, with one directory per class
 
     path = Path(path)  # images dir
-    shutil.rmtree(path / 'classifier') if (path / 'classifier').is_dir() else None  # remove existing
+    shutil.rmtree(path / 'classifier') if (
+        path / 'classifier').is_dir() else None  # remove existing
     files = list(path.rglob('*.*'))
     n = len(files)  # number of files
     for im_file in tqdm(files, total=n):
@@ -1271,12 +1517,16 @@ def extract_boxes(path='../coco/'):  # from utils.datasets import *; extract_box
             # labels
             lb_file = Path(img2label_paths([str(im_file)])[0])
             if Path(lb_file).exists():
-                with open(lb_file, 'r') as f:
-                    lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32)  # labels
+                with open(lb_file) as f:
+                    lb = np.array(
+                        [x.split() for x in f.read().strip().splitlines()],
+                        dtype=np.float32)  # labels
 
                 for j, x in enumerate(lb):
                     c = int(x[0])  # class
-                    f = (path / 'classifier') / f'{c}' / f'{path.stem}_{im_file.stem}_{j}.jpg'  # new filename
+                    f = (
+                        path / 'classifier'
+                    ) / f'{c}' / f'{path.stem}_{im_file.stem}_{j}.jpg'  # new filename
                     if not f.parent.is_dir():
                         f.parent.mkdir(parents=True)
 
@@ -1285,9 +1535,12 @@ def extract_boxes(path='../coco/'):  # from utils.datasets import *; extract_box
                     b[2:] = b[2:] * 1.2 + 3  # pad
                     b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
 
-                    b[[0, 2]] = np.clip(b[[0, 2]], 0, w)  # clip boxes outside of image
+                    b[[0, 2]] = np.clip(b[[0, 2]], 0,
+                                        w)  # clip boxes outside of image
                     b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
-                    assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}'
+                    assert cv2.imwrite(str(f),
+                                       im[b[1]:b[3],
+                                          b[0]:b[2]]), f'box failure in {f}'
 
 
 def autosplit(path='../coco', weights=(0.9, 0.1, 0.0), annotated_only=False):
@@ -1299,22 +1552,28 @@ def autosplit(path='../coco', weights=(0.9, 0.1, 0.0), annotated_only=False):
         annotated_only: Only use images with an annotated txt file
     """
     path = Path(path)  # images dir
-    files = sum([list(path.rglob(f"*.{img_ext}")) for img_ext in img_formats], [])  # image files only
+    files = sum([list(path.rglob(f'*.{img_ext}')) for img_ext in img_formats],
+                [])  # image files only
     n = len(files)  # number of files
-    indices = random.choices([0, 1, 2], weights=weights, k=n)  # assign each image to a split
+    indices = random.choices([0, 1, 2], weights=weights,
+                             k=n)  # assign each image to a split
 
-    txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt']  # 3 txt files
-    [(path / x).unlink() for x in txt if (path / x).exists()]  # remove existing
+    txt = ['autosplit_train.txt', 'autosplit_val.txt',
+           'autosplit_test.txt']  # 3 txt files
+    [(path / x).unlink() for x in txt
+     if (path / x).exists()]  # remove existing
 
-    print(f'Autosplitting images from {path}' + ', using *.txt labeled images only' * annotated_only)
+    print(f'Autosplitting images from {path}' +
+          ', using *.txt labeled images only' * annotated_only)
     for i, img in tqdm(zip(indices, files), total=n):
-        if not annotated_only or Path(img2label_paths([str(img)])[0]).exists():  # check label
+        # check label
+        if not annotated_only or Path(img2label_paths([str(img)])[0]).exists():
             with open(path / txt[i], 'a') as f:
                 f.write(str(img) + '\n')  # add image to txt file
 
 
 def load_segmentations(self, index):
     key = '/work/handsomejw66/coco17/' + self.img_files[index]
-    #print(key)
+    # print(key)
     # /work/handsomejw66/coco17/
     return self.segs[key]
diff --git a/utils/general.py b/utils/general.py
index 1f13fe6..5af0043 100644
--- a/utils/general.py
+++ b/utils/general.py
@@ -14,22 +14,25 @@
 import pandas as pd
 import torch
 import torchvision
-import yaml
 
-from utils.metrics import fitness
 from utils.torch_utils import init_torch_seeds
 
 # Settings
 torch.set_printoptions(linewidth=320, precision=5, profile='long')
-np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format})  # format short g, %precision=5
+np.set_printoptions(linewidth=320,
+                    formatter={'float_kind': '{:11.5g}'.format
+                               })  # format short g, %precision=5
 pd.options.display.max_columns = 10
-cv2.setNumThreads(0)  # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader)
-os.environ['NUMEXPR_MAX_THREADS'] = str(min(os.cpu_count(), 8))  # NumExpr max threads
+cv2.setNumThreads(
+    0
+)  # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader)
+os.environ['NUMEXPR_MAX_THREADS'] = str(min(os.cpu_count(),
+                                            8))  # NumExpr max threads
 
 
 def set_logging(rank=-1):
     logging.basicConfig(
-        format="%(message)s",
+        format='%(message)s',
         level=logging.INFO if rank in [-1, 0] else logging.WARN)
 
 
@@ -53,14 +56,16 @@ def isdocker():
 
 def emojis(str=''):
     # Return platform-dependent emoji-safe version of string
-    return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str
+    return str.encode().decode(
+        'ascii', 'ignore') if platform.system() == 'Windows' else str
 
 
 def check_online():
     # Check internet connectivity
     import socket
     try:
-        socket.create_connection(("1.1.1.1", 443), 5)  # check host accesability
+        socket.create_connection(('1.1.1.1', 443),
+                                 5)  # check host accesability
         return True
     except OSError:
         return False
@@ -75,9 +80,15 @@ def check_git_status():
         assert check_online(), 'skipping check (offline)'
 
         cmd = 'git fetch && git config --get remote.origin.url'
-        url = subprocess.check_output(cmd, shell=True).decode().strip().rstrip('.git')  # github repo url
-        branch = subprocess.check_output('git rev-parse --abbrev-ref HEAD', shell=True).decode().strip()  # checked out
-        n = int(subprocess.check_output(f'git rev-list {branch}..origin/master --count', shell=True))  # commits behind
+        url = subprocess.check_output(cmd, shell=True).decode().strip().rstrip(
+            '.git')  # github repo url
+        branch = subprocess.check_output(
+            'git rev-parse --abbrev-ref HEAD',
+            shell=True).decode().strip()  # checked out
+        n = int(
+            subprocess.check_output(
+                f'git rev-list {branch}..origin/master --count',
+                shell=True))  # commits behind
         if n > 0:
             s = f"⚠️ WARNING: code is out of date by {n} commit{'s' * (n > 1)}. " \
                 f"Use 'git pull' to update or 'git clone {url}' to download latest."
@@ -95,9 +106,13 @@ def check_requirements(requirements='requirements.txt', exclude=()):
     if isinstance(requirements, (str, Path)):  # requirements.txt file
         file = Path(requirements)
         if not file.exists():
-            print(f"{prefix} {file.resolve()} not found, check failed.")
+            print(f'{prefix} {file.resolve()} not found, check failed.')
             return
-        requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(file.open()) if x.name not in exclude]
+        requirements = [
+            f'{x.name}{x.specifier}'
+            for x in pkg.parse_requirements(file.open())
+            if x.name not in exclude
+        ]
     else:  # list or tuple of packages
         requirements = [x for x in requirements if x not in exclude]
 
@@ -107,8 +122,10 @@ def check_requirements(requirements='requirements.txt', exclude=()):
             pkg.require(r)
         except Exception as e:  # DistributionNotFound or VersionConflict if requirements not met
             n += 1
-            print(f"{prefix} {e.req} not found, attempting auto-update...")
-            print(subprocess.check_output(f"pip install '{e.req}'", shell=True).decode())
+            print(f'{prefix} {e.req} not found, attempting auto-update...')
+            print(
+                subprocess.check_output(f"pip install '{e.req}'",
+                                        shell=True).decode())
 
     if n:  # if packages updated
         source = file.resolve() if 'file' in locals() else requirements
@@ -121,21 +138,26 @@ def check_img_size(img_size, s=32):
     # Verify img_size is a multiple of stride s
     new_size = make_divisible(img_size, int(s))  # ceil gs-multiple
     if new_size != img_size:
-        print('WARNING: --img-size %g must be multiple of max stride %g, updating to %g' % (img_size, s, new_size))
+        print(
+            'WARNING: --img-size %g must be multiple of max stride %g, updating to %g'
+            % (img_size, s, new_size))
     return new_size
 
 
 def check_imshow():
     # Check if environment supports image displays
     try:
-        assert not isdocker(), 'cv2.imshow() is disabled in Docker environments'
+        assert not isdocker(
+        ), 'cv2.imshow() is disabled in Docker environments'
         cv2.imshow('test', np.zeros((1, 1, 3)))
         cv2.waitKey(1)
         cv2.destroyAllWindows()
         cv2.waitKey(1)
         return True
     except Exception as e:
-        print(f'WARNING: Environment does not support cv2.imshow() or PIL Image.show() image displays\n{e}')
+        print(
+            f'WARNING: Environment does not support cv2.imshow() or PIL Image.show() image displays\n{e}'
+        )
         return False
 
 
@@ -146,7 +168,9 @@ def check_file(file):
     else:
         files = glob.glob('./**/' + file, recursive=True)  # find file
         assert len(files), f'File Not Found: {file}'  # assert file was found
-        assert len(files) == 1, f"Multiple files match '{file}', specify exact path: {files}"  # assert unique
+        assert len(
+            files
+        ) == 1, f"Multiple files match '{file}', specify exact path: {files}"  # assert unique
         return files[0]  # return file
 
 
@@ -154,18 +178,25 @@ def check_dataset(dict):
     # Download dataset if not found locally
     val, s = dict.get('val'), dict.get('download')
     if val and len(val):
-        val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])]  # val path
+        val = [
+            Path(x).resolve()
+            for x in (val if isinstance(val, list) else [val])
+        ]  # val path
         if not all(x.exists() for x in val):
-            print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()])
+            print('\nWARNING: Dataset not found, nonexistent paths: %s' %
+                  [str(x) for x in val if not x.exists()])
             if s and len(s):  # download script
                 print('Downloading %s ...' % s)
                 if s.startswith('http') and s.endswith('.zip'):  # URL
                     f = Path(s).name  # filename
                     torch.hub.download_url_to_file(s, f)
-                    r = os.system('unzip -q %s -d ../ && rm %s' % (f, f))  # unzip
+                    r = os.system('unzip -q %s -d ../ && rm %s' %
+                                  (f, f))  # unzip
                 else:  # bash script
                     r = os.system(s)
-                print('Dataset autodownload %s\n' % ('success' if r == 0 else 'failure'))  # analyze return value
+                print('Dataset autodownload %s\n' %
+                      ('success'
+                       if r == 0 else 'failure'))  # analyze return value
             else:
                 raise Exception('Dataset not found.')
 
@@ -177,7 +208,7 @@ def make_divisible(x, divisor):
 
 def clean_str(s):
     # Cleans a string by replacing special characters with underscore _
-    return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s)
+    return re.sub(pattern='[|@#!¡·$€%&()=?¿^*;:,¨´><+]', repl='_', string=s)
 
 
 def one_cycle(y1=0.0, y2=1.0, steps=100):
@@ -187,26 +218,29 @@ def one_cycle(y1=0.0, y2=1.0, steps=100):
 
 def colorstr(*input):
     # Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e.  colorstr('blue', 'hello world')
-    *args, string = input if len(input) > 1 else ('blue', 'bold', input[0])  # color arguments, string
-    colors = {'black': '\033[30m',  # basic colors
-              'red': '\033[31m',
-              'green': '\033[32m',
-              'yellow': '\033[33m',
-              'blue': '\033[34m',
-              'magenta': '\033[35m',
-              'cyan': '\033[36m',
-              'white': '\033[37m',
-              'bright_black': '\033[90m',  # bright colors
-              'bright_red': '\033[91m',
-              'bright_green': '\033[92m',
-              'bright_yellow': '\033[93m',
-              'bright_blue': '\033[94m',
-              'bright_magenta': '\033[95m',
-              'bright_cyan': '\033[96m',
-              'bright_white': '\033[97m',
-              'end': '\033[0m',  # misc
-              'bold': '\033[1m',
-              'underline': '\033[4m'}
+    *args, string = input if len(input) > 1 else ('blue', 'bold', input[0]
+                                                  )  # color arguments, string
+    colors = {
+        'black': '\033[30m',  # basic colors
+        'red': '\033[31m',
+        'green': '\033[32m',
+        'yellow': '\033[33m',
+        'blue': '\033[34m',
+        'magenta': '\033[35m',
+        'cyan': '\033[36m',
+        'white': '\033[37m',
+        'bright_black': '\033[90m',  # bright colors
+        'bright_red': '\033[91m',
+        'bright_green': '\033[92m',
+        'bright_yellow': '\033[93m',
+        'bright_blue': '\033[94m',
+        'bright_magenta': '\033[95m',
+        'bright_cyan': '\033[96m',
+        'bright_white': '\033[97m',
+        'end': '\033[0m',  # misc
+        'bold': '\033[1m',
+        'underline': '\033[4m'
+    }
     return ''.join(colors[x] for x in args) + f'{string}' + colors['end']
 
 
@@ -231,21 +265,27 @@ def labels_to_class_weights(labels, nc=80):
 
 def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)):
     # Produces image weights based on class_weights and image contents
-    class_counts = np.array([np.bincount(x[:, 0].astype(np.int), minlength=nc) for x in labels])
+    class_counts = np.array(
+        [np.bincount(x[:, 0].astype(np.int), minlength=nc) for x in labels])
     image_weights = (class_weights.reshape(1, nc) * class_counts).sum(1)
     # index = random.choices(range(n), weights=image_weights, k=1)  # weight image sample
     return image_weights
 
 
-def coco80_to_coco91_class():  # converts 80-index (val2014) to 91-index (paper)
+def coco80_to_coco91_class(
+):  # converts 80-index (val2014) to 91-index (paper)
     # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
     # a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
     # b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
     # x1 = [list(a[i] == b).index(True) + 1 for i in range(80)]  # darknet to coco
     # x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)]  # coco to darknet
-    x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
-         35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
-         64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
+    x = [
+        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21,
+        22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
+        43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
+        62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84,
+        85, 86, 87, 88, 89, 90
+    ]
     return x
 
 
@@ -292,7 +332,8 @@ def segment2box(segment, width=640, height=640):
     x, y = segment.T  # segment xy
     inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height)
     x, y, = x[inside], y[inside]
-    return np.array([x.min(), y.min(), x.max(), y.max()]) if any(x) else np.zeros((1, 4))  # xyxy
+    return np.array([x.min(), y.min(), x.max(),
+                     y.max()]) if any(x) else np.zeros((1, 4))  # xyxy
 
 
 def segments2boxes(segments):
@@ -310,15 +351,19 @@ def resample_segments(segments, n=1000):
         s = np.concatenate((s, s[0:1, :]), axis=0)
         x = np.linspace(0, len(s) - 1, n)
         xp = np.arange(len(s))
-        segments[i] = np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T  # segment xy
+        segments[i] = np.concatenate([
+            np.interp(x, xp, s[:, i]) for i in range(2)
+        ]).reshape(2, -1).T  # segment xy
     return segments
 
 
 def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
     # Rescale coords (xyxy) from img1_shape to img0_shape
     if ratio_pad is None:  # calculate from img0_shape
-        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
-        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
+        gain = min(img1_shape[0] / img0_shape[0],
+                   img1_shape[1] / img0_shape[1])  # gain  = old / new
+        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (
+            img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
     else:
         gain = ratio_pad[0][0]
         pad = ratio_pad[1]
@@ -338,7 +383,13 @@ def clip_coords(boxes, img_shape):
     boxes[:, 3].clamp_(0, img_shape[0])  # y2
 
 
-def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
+def bbox_iou(box1,
+             box2,
+             x1y1x2y2=True,
+             GIoU=False,
+             DIoU=False,
+             CIoU=False,
+             eps=1e-7):
     # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
     box2 = box2.T
 
@@ -364,16 +415,20 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=
     iou = inter / union
 
     if GIoU or DIoU or CIoU:
-        cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)  # convex (smallest enclosing box) width
+        cw = torch.max(b1_x2, b2_x2) - torch.min(
+            b1_x1, b2_x1)  # convex (smallest enclosing box) width
         ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height
         if CIoU or DIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
-            c2 = cw ** 2 + ch ** 2 + eps  # convex diagonal squared
-            rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 +
-                    (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4  # center distance squared
+            c2 = cw**2 + ch**2 + eps  # convex diagonal squared
+            # center distance squared
+            rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2)**2 +
+                    (b2_y1 + b2_y2 - b1_y1 - b1_y2)**2) / 4
             if DIoU:
                 return iou - rho2 / c2  # DIoU
             elif CIoU:  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
-                v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / (h2 + eps)) - torch.atan(w1 / (h1 + eps)), 2)
+                v = (4 / math.pi**2) * torch.pow(
+                    torch.atan(w2 / (h2 + eps)) - torch.atan(w1 /
+                                                             (h1 + eps)), 2)
                 with torch.no_grad():
                     alpha = v / (v - iou + (1 + eps))
                 return iou - (rho2 / c2 + v * alpha)  # CIoU
@@ -384,7 +439,14 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=
         return iou  # IoU
 
 
-def bbox_alpha_iou(box1, box2, x1y1x2y2=False, GIoU=False, DIoU=False, CIoU=False, alpha=2, eps=1e-9):
+def bbox_alpha_iou(box1,
+                   box2,
+                   x1y1x2y2=False,
+                   GIoU=False,
+                   DIoU=False,
+                   CIoU=False,
+                   alpha=2,
+                   eps=1e-9):
     # Returns tsqrt_he IoU of box1 to box2. box1 is 4, box2 is nx4
     box2 = box2.T
 
@@ -409,37 +471,41 @@ def bbox_alpha_iou(box1, box2, x1y1x2y2=False, GIoU=False, DIoU=False, CIoU=Fals
 
     # change iou into pow(iou+eps)
     # iou = inter / union
-    iou = torch.pow(inter/union + eps, alpha)
+    iou = torch.pow(inter / union + eps, alpha)
     # beta = 2 * alpha
     if GIoU or DIoU or CIoU:
-        cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)  # convex (smallest enclosing box) width
+        cw = torch.max(b1_x2, b2_x2) - torch.min(
+            b1_x1, b2_x1)  # convex (smallest enclosing box) width
         ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height
         if CIoU or DIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
-            c2 = (cw ** 2 + ch ** 2) ** alpha + eps  # convex diagonal
+            c2 = (cw**2 + ch**2)**alpha + eps  # convex diagonal
             rho_x = torch.abs(b2_x1 + b2_x2 - b1_x1 - b1_x2)
             rho_y = torch.abs(b2_y1 + b2_y2 - b1_y1 - b1_y2)
-            rho2 = ((rho_x ** 2 + rho_y ** 2) / 4) ** alpha  # center distance
+            rho2 = ((rho_x**2 + rho_y**2) / 4)**alpha  # center distance
             if DIoU:
                 return iou - rho2 / c2  # DIoU
             elif CIoU:  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
-                v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
+                v = (4 / math.pi**2) * torch.pow(
+                    torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
                 with torch.no_grad():
                     alpha_ciou = v / ((1 + eps) - inter / union + v)
                 # return iou - (rho2 / c2 + v * alpha_ciou)  # CIoU
-                return iou - (rho2 / c2 + torch.pow(v * alpha_ciou + eps, alpha))  # CIoU
+                return iou - (
+                    rho2 / c2 + torch.pow(v * alpha_ciou + eps, alpha))  # CIoU
         else:  # GIoU https://arxiv.org/pdf/1902.09630.pdf
             # c_area = cw * ch + eps  # convex area
             # return iou - (c_area - union) / c_area  # GIoU
-            c_area = torch.max(cw * ch + eps, union) # convex area
-            return iou - torch.pow((c_area - union) / c_area + eps, alpha)  # GIoU
+            c_area = torch.max(cw * ch + eps, union)  # convex area
+            return iou - torch.pow(
+                (c_area - union) / c_area + eps, alpha)  # GIoU
     else:
-        return iou # torch.log(iou+eps) or iou
+        return iou  # torch.log(iou+eps) or iou
 
 
 def box_iou(box1, box2):
     # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
-    """
-    Return intersection-over-union (Jaccard index) of boxes.
+    """Return intersection-over-union (Jaccard index) of boxes.
+
     Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
     Arguments:
         box1 (Tensor[N, 4])
@@ -457,8 +523,10 @@ def box_area(box):
     area2 = box_area(box2.T)
 
     # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
-    inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
-    return inter / (area1[:, None] + area2 - inter)  # iou = inter / (area1 + area2 - inter)
+    inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) -
+             torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
+    return inter / (area1[:, None] + area2 - inter
+                    )  # iou = inter / (area1 + area2 - inter)
 
 
 def wh_iou(wh1, wh2):
@@ -466,12 +534,14 @@ def wh_iou(wh1, wh2):
     wh1 = wh1[:, None]  # [N,1,2]
     wh2 = wh2[None]  # [1,M,2]
     inter = torch.min(wh1, wh2).prod(2)  # [N,M]
-    return inter / (wh1.prod(2) + wh2.prod(2) - inter)  # iou = inter / (area1 + area2 - inter)
+    return inter / (wh1.prod(2) + wh2.prod(2) - inter
+                    )  # iou = inter / (area1 + area2 - inter)
 
 
 def box_giou(box1, box2):
-    """
-    Return generalized intersection-over-union (Jaccard index) between two sets of boxes.
+    """Return generalized intersection-over-union (Jaccard index) between two
+    sets of boxes.
+
     Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with
     ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
     Args:
@@ -489,7 +559,8 @@ def box_area(box):
     area1 = box_area(box1.T)
     area2 = box_area(box2.T)
 
-    inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
+    inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) -
+             torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
     union = (area1[:, None] + area2 - inter)
 
     iou = inter / union
@@ -504,8 +575,9 @@ def box_area(box):
 
 
 def box_ciou(box1, box2, eps: float = 1e-7):
-    """
-    Return complete intersection-over-union (Jaccard index) between two sets of boxes.
+    """Return complete intersection-over-union (Jaccard index) between two sets
+    of boxes.
+
     Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with
     ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
     Args:
@@ -524,7 +596,8 @@ def box_area(box):
     area1 = box_area(box1.T)
     area2 = box_area(box2.T)
 
-    inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
+    inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) -
+             torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
     union = (area1[:, None] + area2 - inter)
 
     iou = inter / union
@@ -533,7 +606,7 @@ def box_area(box):
     rbi = torch.max(box1[:, None, 2:], box2[:, 2:])
 
     whi = (rbi - lti).clamp(min=0)  # [N,M,2]
-    diagonal_distance_squared = (whi[:, :, 0] ** 2) + (whi[:, :, 1] ** 2) + eps
+    diagonal_distance_squared = (whi[:, :, 0]**2) + (whi[:, :, 1]**2) + eps
 
     # centers of boxes
     x_p = (box1[:, None, 0] + box1[:, None, 2]) / 2
@@ -541,7 +614,7 @@ def box_area(box):
     x_g = (box2[:, 0] + box2[:, 2]) / 2
     y_g = (box2[:, 1] + box2[:, 3]) / 2
     # The distance between boxes' centers squared.
-    centers_distance_squared = (x_p - x_g) ** 2 + (y_p - y_g) ** 2
+    centers_distance_squared = (x_p - x_g)**2 + (y_p - y_g)**2
 
     w_pred = box1[:, None, 2] - box1[:, None, 0]
     h_pred = box1[:, None, 3] - box1[:, None, 1]
@@ -549,15 +622,18 @@ def box_area(box):
     w_gt = box2[:, 2] - box2[:, 0]
     h_gt = box2[:, 3] - box2[:, 1]
 
-    v = (4 / (torch.pi ** 2)) * torch.pow((torch.atan(w_gt / h_gt) - torch.atan(w_pred / h_pred)), 2)
+    v = (4 / (torch.pi**2)) * torch.pow(
+        (torch.atan(w_gt / h_gt) - torch.atan(w_pred / h_pred)), 2)
     with torch.no_grad():
         alpha = v / (1 - iou + v + eps)
-    return iou - (centers_distance_squared / diagonal_distance_squared) - alpha * v
+    return iou - (centers_distance_squared /
+                  diagonal_distance_squared) - alpha * v
 
 
 def box_diou(box1, box2, eps: float = 1e-7):
-    """
-    Return distance intersection-over-union (Jaccard index) between two sets of boxes.
+    """Return distance intersection-over-union (Jaccard index) between two sets
+    of boxes.
+
     Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with
     ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
     Args:
@@ -576,7 +652,8 @@ def box_area(box):
     area1 = box_area(box1.T)
     area2 = box_area(box2.T)
 
-    inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
+    inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) -
+             torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
     union = (area1[:, None] + area2 - inter)
 
     iou = inter / union
@@ -585,7 +662,7 @@ def box_area(box):
     rbi = torch.max(box1[:, None, 2:], box2[:, 2:])
 
     whi = (rbi - lti).clamp(min=0)  # [N,M,2]
-    diagonal_distance_squared = (whi[:, :, 0] ** 2) + (whi[:, :, 1] ** 2) + eps
+    diagonal_distance_squared = (whi[:, :, 0]**2) + (whi[:, :, 1]**2) + eps
 
     # centers of boxes
     x_p = (box1[:, None, 0] + box1[:, None, 2]) / 2
@@ -593,16 +670,21 @@ def box_area(box):
     x_g = (box2[:, 0] + box2[:, 2]) / 2
     y_g = (box2[:, 1] + box2[:, 3]) / 2
     # The distance between boxes' centers squared.
-    centers_distance_squared = (x_p - x_g) ** 2 + (y_p - y_g) ** 2
+    centers_distance_squared = (x_p - x_g)**2 + (y_p - y_g)**2
 
     # The distance IoU is the IoU penalized by a normalized
     # distance between boxes' centers squared.
     return iou - (centers_distance_squared / diagonal_distance_squared)
 
 
-def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False,
+def non_max_suppression(prediction,
+                        conf_thres=0.25,
+                        iou_thres=0.45,
+                        classes=None,
+                        agnostic=False,
+                        multi_label=False,
                         labels=()):
-    """Runs Non-Maximum Suppression (NMS) on inference results
+    """Runs Non-Maximum Suppression (NMS) on inference results.
 
     Returns:
          list of detections, on (n,6) tensor per image [xyxy, conf, cls]
@@ -612,7 +694,9 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non
     xc = prediction[..., 4] > conf_thres  # candidates
 
     # Settings
-    min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height
+    # (pixels) minimum and maximum box width and height
+    # min_wh = 2
+    max_wh = 4096
     max_det = 300  # maximum number of detections per image
     max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
     time_limit = 10.0  # seconds to quit after
@@ -621,7 +705,8 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non
     merge = False  # use merge-NMS
 
     t = time.time()
-    output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
+    output = [torch.zeros(
+        (0, 6), device=prediction.device)] * prediction.shape[0]
     for xi, x in enumerate(prediction):  # image index, image inference
         # Apply constraints
         # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
@@ -629,11 +714,11 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non
 
         # Cat apriori labels if autolabelling
         if labels and len(labels[xi]):
-            l = labels[xi]
-            v = torch.zeros((len(l), nc + 5), device=x.device)
-            v[:, :4] = l[:, 1:5]  # box
+            cur_l = labels[xi]
+            v = torch.zeros((len(cur_l), nc + 5), device=x.device)
+            v[:, :4] = cur_l[:, 1:5]  # box
             v[:, 4] = 1.0  # conf
-            v[range(len(l)), l[:, 0].long() + 5] = 1.0  # cls
+            v[range(len(cur_l)), cur_l[:, 0].long() + 5] = 1.0  # cls
             x = torch.cat((x, v), 0)
 
         # If none remain process next image
@@ -642,8 +727,10 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non
 
         # Compute conf
         if nc == 1:
-            x[:, 5:] = x[:, 4:5] # for models with one class, cls_loss is 0 and cls_conf is always 0.5,
-                                 # so there is no need to multiplicate.
+            x[:,
+              5:] = x[:, 4:
+                      5]  # for models with one class, cls_loss is 0 and cls_conf is always 0.5,
+            # so there is no need to multiplicate.
         else:
             x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
 
@@ -656,7 +743,8 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non
             x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
         else:  # best class only
             conf, j = x[:, 5:].max(1, keepdim=True)
-            x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
+            x = torch.cat((box, conf, j.float()),
+                          1)[conf.view(-1) > conf_thres]
 
         # Filter by class
         if classes is not None:
@@ -671,19 +759,23 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non
         if not n:  # no boxes
             continue
         elif n > max_nms:  # excess boxes
-            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
+            x = x[x[:, 4].argsort(
+                descending=True)[:max_nms]]  # sort by confidence
 
         # Batched NMS
         c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
-        boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
+        boxes, scores = x[:, :4] + c, x[:,
+                                        4]  # boxes (offset by class), scores
         i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
         if i.shape[0] > max_det:  # limit detections
             i = i[:max_det]
-        if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)
+        if merge and (1 < n <
+                      3E3):  # Merge NMS (boxes merged using weighted mean)
             # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
             iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
             weights = iou * scores[None]  # box weights
-            x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)  # merged boxes
+            x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(
+                1, keepdim=True)  # merged boxes
             if redundant:
                 i = i[iou.sum(1) > 1]  # require redundancy
 
@@ -695,7 +787,8 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non
     return output
 
 
-def strip_optimizer(f='best.pt', s=''):  # from utils.general import *; strip_optimizer()
+def strip_optimizer(f='best.pt',
+                    s=''):  # from utils.general import *; strip_optimizer()
     # Strip optimizer from 'f' to finalize training, optionally save as 's'
     x = torch.load(f, map_location=torch.device('cpu'))
     if x.get('ema'):
@@ -705,7 +798,9 @@ def strip_optimizer(f='best.pt', s=''):  # from utils.general import *; strip_op
     x['epoch'] = -1
     torch.save(x, s or f)
     mb = os.path.getsize(s or f) / 1E6  # filesize
-    print(f"Optimizer stripped from {f},{(' saved as %s,' % s) if s else ''} {mb:.1f}MB")
+    print(
+        f"Optimizer stripped from {f},{(' saved as %s,' % s) if s else ''} {mb:.1f}MB"
+    )
 
 
 def increment_path(path, exist_ok=True, sep=''):
@@ -714,8 +809,8 @@ def increment_path(path, exist_ok=True, sep=''):
     if (path.exists() and exist_ok) or (not path.exists()):
         return str(path)
     else:
-        dirs = glob.glob(f"{path}{sep}*")  # similar paths
-        matches = [re.search(rf"%s{sep}(\d+)" % path.stem, d) for d in dirs]
+        dirs = glob.glob(f'{path}{sep}*')  # similar paths
+        matches = [re.search(rf'%s{sep}(\d+)' % path.stem, d) for d in dirs]
         i = [int(m.groups()[0]) for m in matches if m]  # indices
         n = max(i) + 1 if i else 2  # increment number
-        return f"{path}{sep}{n}"  # update path
+        return f'{path}{sep}{n}'  # update path
diff --git a/utils/loss.py b/utils/loss.py
index 6283e77..d2fa65c 100644
--- a/utils/loss.py
+++ b/utils/loss.py
@@ -7,7 +7,9 @@
 from utils.torch_utils import is_parallel
 
 
-def smooth_BCE(eps=0.1):  # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441
+def smooth_BCE(
+    eps=0.1
+):  # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441
     # return positive, negative label smoothing BCE targets
     return 1.0 - 0.5 * eps, 0.5 * eps
 
@@ -15,7 +17,7 @@ def smooth_BCE(eps=0.1):  # https://github.com/ultralytics/yolov3/issues/238#iss
 class FocalLoss(nn.Module):
     # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
     def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
-        super(FocalLoss, self).__init__()
+        super().__init__()
         self.loss_fcn = loss_fcn  # must be nn.BCEWithLogitsLoss()
         self.gamma = gamma
         self.alpha = alpha
@@ -31,7 +33,7 @@ def forward(self, pred, true):
         pred_prob = torch.sigmoid(pred)  # prob from logits
         p_t = true * pred_prob + (1 - true) * (1 - pred_prob)
         alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
-        modulating_factor = (1.0 - p_t) ** self.gamma
+        modulating_factor = (1.0 - p_t)**self.gamma
         loss *= alpha_factor * modulating_factor
 
         if self.reduction == 'mean':
@@ -45,35 +47,44 @@ def forward(self, pred, true):
 class ComputeLoss:
     # Compute losses
     def __init__(self, model, autobalance=False):
-        super(ComputeLoss, self).__init__()
+        super().__init__()
         device = next(model.parameters()).device  # get model device
         h = model.hyp  # hyperparameters
 
         # Define criteria
-        BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device))
-        BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device))
+        BCEcls = nn.BCEWithLogitsLoss(
+            pos_weight=torch.tensor([h['cls_pw']], device=device))
+        BCEobj = nn.BCEWithLogitsLoss(
+            pos_weight=torch.tensor([h['obj_pw']], device=device))
 
         # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
-        self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0))  # positive, negative BCE targets
+        self.cp, self.cn = smooth_BCE(eps=h.get(
+            'label_smoothing', 0.0))  # positive, negative BCE targets
 
         # Focal loss
         g = h['fl_gamma']  # focal loss gamma
         if g > 0:
             BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
 
-        det = model.module.model_h2[-1] if is_parallel(model) else model.model_h2[-1]  # Detect() module
-        self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, .02])  # P3-P7
-        #self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.1, .05])  # P3-P7
-        #self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.5, 0.4, .1])  # P3-P7
-        self.ssi = list(det.stride).index(16) if autobalance else 0  # stride 16 index
+        det = model.module.model_h2[-1] if is_parallel(
+            model) else model.model_h2[-1]  # Detect() module
+        self.balance = {
+            3: [4.0, 1.0, 0.4]
+        }.get(det.nl, [4.0, 1.0, 0.25, 0.06, .02])  # P3-P7
+        # self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.1, .05])  # P3-P7
+        # self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.5, 0.4, .1])  # P3-P7
+        self.ssi = list(
+            det.stride).index(16) if autobalance else 0  # stride 16 index
         self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, model.gr, h, autobalance
         for k in 'na', 'nc', 'nl', 'anchors':
             setattr(self, k, getattr(det, k))
 
     def __call__(self, p, targets):  # predictions, targets, model
         device = targets.device
-        lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)
-        tcls, tbox, indices, anchors = self.build_targets(p, targets)  # targets
+        lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(
+            1, device=device), torch.zeros(1, device=device)
+        tcls, tbox, indices, anchors = self.build_targets(p,
+                                                          targets)  # targets
 
         # Losses
         for i, pi in enumerate(p):  # layer index, layer predictions
@@ -82,23 +93,29 @@ def __call__(self, p, targets):  # predictions, targets, model
 
             n = b.shape[0]  # number of targets
             if n:
-                ps = pi[b, a, gj, gi]  # prediction subset corresponding to targets
+                ps = pi[b, a, gj,
+                        gi]  # prediction subset corresponding to targets
 
                 # Regression
                 pxy = ps[:, :2].sigmoid() * 2. - 0.5
-                pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
+                pwh = (ps[:, 2:4].sigmoid() * 2)**2 * anchors[i]
                 pbox = torch.cat((pxy, pwh), 1)  # predicted box
-                iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True)  # iou(prediction, target)
+                iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False,
+                               CIoU=True)  # iou(prediction, target)
                 lbox += (1.0 - iou).mean()  # iou loss
 
                 # Objectness
-                tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * iou.detach().clamp(0).type(tobj.dtype)  # iou ratio
+                tobj[b, a, gj,
+                     gi] = (1.0 -
+                            self.gr) + self.gr * iou.detach().clamp(0).type(
+                                tobj.dtype)  # iou ratio
 
                 # Classification
                 if self.nc > 1:  # cls loss (only if multiple classes)
-                    t = torch.full_like(ps[:, 5:], self.cn, device=device)  # targets
+                    t = torch.full_like(ps[:, 5:], self.cn,
+                                        device=device)  # targets
                     t[range(n), tcls[i]] = self.cp
-                    #t[t==self.cp] = iou.detach().clamp(0).type(t.dtype)
+                    # t[t==self.cp] = iou.detach().clamp(0).type(t.dtype)
                     lcls += self.BCEcls(ps[:, 5:], t)  # BCE
 
                 # Append targets to text file
@@ -108,7 +125,8 @@ def __call__(self, p, targets):  # predictions, targets, model
             obji = self.BCEobj(pi[..., 4], tobj)
             lobj += obji * self.balance[i]  # obj loss
             if self.autobalance:
-                self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()
+                self.balance[i] = self.balance[
+                    i] * 0.9999 + 0.0001 / obji.detach().item()
 
         if self.autobalance:
             self.balance = [x / self.balance[self.ssi] for x in self.balance]
@@ -124,15 +142,25 @@ def build_targets(self, p, targets):
         # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
         na, nt = self.na, targets.shape[0]  # number of anchors, targets
         tcls, tbox, indices, anch = [], [], [], []
-        gain = torch.ones(7, device=targets.device).long()  # normalized to gridspace gain
-        ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt)  # same as .repeat_interleave(nt)
-        targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2)  # append anchor indices
+        gain = torch.ones(
+            7, device=targets.device).long()  # normalized to gridspace gain
+        ai = torch.arange(na,
+                          device=targets.device).float().view(na, 1).repeat(
+                              1, nt)  # same as .repeat_interleave(nt)
+        targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]),
+                            2)  # append anchor indices
 
         g = 0.5  # bias
-        off = torch.tensor([[0, 0],
-                            [1, 0], [0, 1], [-1, 0], [0, -1],  # j,k,l,m
-                            # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
-                            ], device=targets.device).float() * g  # offsets
+        off = torch.tensor(
+            [
+                [0, 0],
+                [1, 0],
+                [0, 1],
+                [-1, 0],
+                [0, -1],  # j,k,l,m
+                # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
+            ],
+            device=targets.device).float() * g  # offsets
 
         for i in range(self.nl):
             anchors = self.anchors[i]
@@ -143,7 +171,8 @@ def build_targets(self, p, targets):
             if nt:
                 # Matches
                 r = t[:, :, 4:6] / anchors[:, None]  # wh ratio
-                j = torch.max(r, 1. / r).max(2)[0] < self.hyp['anchor_t']  # compare
+                j = torch.max(
+                    r, 1. / r).max(2)[0] < self.hyp['anchor_t']  # compare
                 # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t']  # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
                 t = t[j]  # filter
 
@@ -168,7 +197,9 @@ def build_targets(self, p, targets):
 
             # Append
             a = t[:, 6].long()  # anchor indices
-            indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1)))  # image, anchor, grid indices
+            indices.append(
+                (b, a, gj.clamp_(0, gain[3] - 1),
+                 gi.clamp_(0, gain[2] - 1)))  # image, anchor, grid indices
             tbox.append(torch.cat((gxy - gij, gwh), 1))  # box
             anch.append(anchors[a])  # anchors
             tcls.append(c)  # class
@@ -179,63 +210,82 @@ def build_targets(self, p, targets):
 class ComputeLossOTA:
     # Compute losses
     def __init__(self, model, autobalance=False):
-        super(ComputeLossOTA, self).__init__()
+        super().__init__()
         device = next(model.parameters()).device  # get model device
         h = model.hyp  # hyperparameters
 
         # Define criteria
-        BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device))
-        BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device))
+        BCEcls = nn.BCEWithLogitsLoss(
+            pos_weight=torch.tensor([h['cls_pw']], device=device))
+        BCEobj = nn.BCEWithLogitsLoss(
+            pos_weight=torch.tensor([h['obj_pw']], device=device))
 
         # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
-        self.cp, self.cn = smooth_BCE(eps=h.get('label_smoothing', 0.0))  # positive, negative BCE targets
+        self.cp, self.cn = smooth_BCE(eps=h.get(
+            'label_smoothing', 0.0))  # positive, negative BCE targets
 
         # Focal loss
         g = h['fl_gamma']  # focal loss gamma
         if g > 0:
             BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
 
-        det = model.module.model_h2[-1] if is_parallel(model) else model.model_h2[-1]  # Detect() module
-        self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, .02])  # P3-P7
-        self.ssi = list(det.stride).index(16) if autobalance else 0  # stride 16 index
+        det = model.module.model_h2[-1] if is_parallel(
+            model) else model.model_h2[-1]  # Detect() module
+        self.balance = {
+            3: [4.0, 1.0, 0.4]
+        }.get(det.nl, [4.0, 1.0, 0.25, 0.06, .02])  # P3-P7
+        self.ssi = list(
+            det.stride).index(16) if autobalance else 0  # stride 16 index
         self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, model.gr, h, autobalance
         for k in 'na', 'nc', 'nl', 'anchors', 'stride':
             setattr(self, k, getattr(det, k))
 
     def __call__(self, p, targets, imgs):  # predictions, targets, model
         device = targets.device
-        lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)
-        bs, as_, gjs, gis, targets, anchors = self.build_targets(p, targets, imgs)
-        pre_gen_gains = [torch.tensor(pp.shape, device=device)[[3, 2, 3, 2]] for pp in p]
-
+        lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(
+            1, device=device), torch.zeros(1, device=device)
+        bs, as_, gjs, gis, targets, anchors = self.build_targets(
+            p, targets, imgs)
+        pre_gen_gains = [
+            torch.tensor(pp.shape, device=device)[[3, 2, 3, 2]] for pp in p
+        ]
 
         # Losses
         for i, pi in enumerate(p):  # layer index, layer predictions
-            b, a, gj, gi = bs[i], as_[i], gjs[i], gis[i]  # image, anchor, gridy, gridx
+            b, a, gj, gi = bs[i], as_[i], gjs[i], gis[
+                i]  # image, anchor, gridy, gridx
             tobj = torch.zeros_like(pi[..., 0], device=device)  # target obj
 
             n = b.shape[0]  # number of targets
             if n:
-                ps = pi[b, a, gj, gi]  # prediction subset corresponding to targets
+                ps = pi[b, a, gj,
+                        gi]  # prediction subset corresponding to targets
 
                 # Regression
                 grid = torch.stack([gi, gj], dim=1)
                 pxy = ps[:, :2].sigmoid() * 2. - 0.5
-                #pxy = ps[:, :2].sigmoid() * 3. - 1.
-                pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
+                # pxy = ps[:, :2].sigmoid() * 3. - 1.
+                pwh = (ps[:, 2:4].sigmoid() * 2)**2 * anchors[i]
                 pbox = torch.cat((pxy, pwh), 1)  # predicted box
                 selected_tbox = targets[i][:, 2:6] * pre_gen_gains[i]
                 selected_tbox[:, :2] -= grid
-                iou = bbox_iou(pbox.T, selected_tbox, x1y1x2y2=False, CIoU=True)  # iou(prediction, target)
+                iou = bbox_iou(pbox.T,
+                               selected_tbox,
+                               x1y1x2y2=False,
+                               CIoU=True)  # iou(prediction, target)
                 lbox += (1.0 - iou).mean()  # iou loss
 
                 # Objectness
-                tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * iou.detach().clamp(0).type(tobj.dtype)  # iou ratio
+                tobj[b, a, gj,
+                     gi] = (1.0 -
+                            self.gr) + self.gr * iou.detach().clamp(0).type(
+                                tobj.dtype)  # iou ratio
 
                 # Classification
                 selected_tcls = targets[i][:, 1].long()
                 if self.nc > 1:  # cls loss (only if multiple classes)
-                    t = torch.full_like(ps[:, 5:], self.cn, device=device)  # targets
+                    t = torch.full_like(ps[:, 5:], self.cn,
+                                        device=device)  # targets
                     t[range(n), selected_tcls] = self.cp
                     lcls += self.BCEcls(ps[:, 5:], t)  # BCE
 
@@ -246,7 +296,8 @@ def __call__(self, p, targets, imgs):  # predictions, targets, model
             obji = self.BCEobj(pi[..., 4], tobj)
             lobj += obji * self.balance[i]  # obj loss
             if self.autobalance:
-                self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()
+                self.balance[i] = self.balance[
+                    i] * 0.9999 + 0.0001 / obji.detach().item()
 
         if self.autobalance:
             self.balance = [x / self.balance[self.ssi] for x in self.balance]
@@ -260,11 +311,11 @@ def __call__(self, p, targets, imgs):  # predictions, targets, model
 
     def build_targets(self, p, targets, imgs):
 
-        #indices, anch = self.find_positive(p, targets)
+        # indices, anch = self.find_positive(p, targets)
         indices, anch = self.find_3_positive(p, targets)
-        #indices, anch = self.find_4_positive(p, targets)
-        #indices, anch = self.find_5_positive(p, targets)
-        #indices, anch = self.find_9_positive(p, targets)
+        # indices, anch = self.find_4_positive(p, targets)
+        # indices, anch = self.find_5_positive(p, targets)
+        # indices, anch = self.find_9_positive(p, targets)
 
         matching_bs = [[] for pp in p]
         matching_as = [[] for pp in p]
@@ -277,7 +328,7 @@ def build_targets(self, p, targets, imgs):
 
         for batch_idx in range(p[0].shape[0]):
 
-            b_idx = targets[:, 0]==batch_idx
+            b_idx = targets[:, 0] == batch_idx
             this_target = targets[b_idx]
             if this_target.shape[0] == 0:
                 continue
@@ -305,16 +356,18 @@ def build_targets(self, p, targets, imgs):
                 all_gj.append(gj)
                 all_gi.append(gi)
                 all_anch.append(anch[i][idx])
-                from_which_layer.append(torch.ones(size=(len(b),)) * i)
+                from_which_layer.append(torch.ones(size=(len(b), )) * i)
 
                 fg_pred = pi[b, a, gj, gi]
                 p_obj.append(fg_pred[:, 4:5])
                 p_cls.append(fg_pred[:, 5:])
 
                 grid = torch.stack([gi, gj], dim=1)
-                pxy = (fg_pred[:, :2].sigmoid() * 2. - 0.5 + grid) * self.stride[i] #/ 8.
-                #pxy = (fg_pred[:, :2].sigmoid() * 3. - 1. + grid) * self.stride[i]
-                pwh = (fg_pred[:, 2:4].sigmoid() * 2) ** 2 * anch[i][idx] * self.stride[i] #/ 8.
+                pxy = (fg_pred[:, :2].sigmoid() * 2. - 0.5 +
+                       grid) * self.stride[i]  # / 8.
+                # pxy = (fg_pred[:, :2].sigmoid() * 3. - 1. + grid) * self.stride[i]
+                pwh = (fg_pred[:, 2:4].sigmoid() *
+                       2)**2 * anch[i][idx] * self.stride[i]  # / 8.
                 pxywh = torch.cat([pxy, pwh], dim=-1)
                 pxyxy = xywh2xyxy(pxywh)
                 pxyxys.append(pxyxy)
@@ -335,45 +388,41 @@ def build_targets(self, p, targets, imgs):
 
             pair_wise_iou_loss = -torch.log(pair_wise_iou + 1e-8)
 
-            top_k, _ = torch.topk(pair_wise_iou, min(10, pair_wise_iou.shape[1]), dim=1)
+            top_k, _ = torch.topk(pair_wise_iou,
+                                  min(10, pair_wise_iou.shape[1]),
+                                  dim=1)
             dynamic_ks = torch.clamp(top_k.sum(1).int(), min=1)
 
-            gt_cls_per_image = (
-                F.one_hot(this_target[:, 1].to(torch.int64), self.nc)
-                .float()
-                .unsqueeze(1)
-                .repeat(1, pxyxys.shape[0], 1)
-            )
+            gt_cls_per_image = (F.one_hot(this_target[:, 1].to(torch.int64),
+                                          self.nc).float().unsqueeze(1).repeat(
+                                              1, pxyxys.shape[0], 1))
 
             num_gt = this_target.shape[0]
             cls_preds_ = (
-                p_cls.float().unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_()
-                * p_obj.unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_()
-            )
+                p_cls.float().unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_() *
+                p_obj.unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_())
 
             y = cls_preds_.sqrt_()
             pair_wise_cls_loss = F.binary_cross_entropy_with_logits(
-               torch.log(y/(1-y)) , gt_cls_per_image, reduction="none"
-            ).sum(-1)
+                torch.log(y / (1 - y)), gt_cls_per_image,
+                reduction='none').sum(-1)
             del cls_preds_
 
-            cost = (
-                pair_wise_cls_loss
-                + 3.0 * pair_wise_iou_loss
-            )
+            cost = (pair_wise_cls_loss + 3.0 * pair_wise_iou_loss)
 
             matching_matrix = torch.zeros_like(cost)
 
             for gt_idx in range(num_gt):
-                _, pos_idx = torch.topk(
-                    cost[gt_idx], k=dynamic_ks[gt_idx].item(), largest=False
-                )
+                _, pos_idx = torch.topk(cost[gt_idx],
+                                        k=dynamic_ks[gt_idx].item(),
+                                        largest=False)
                 matching_matrix[gt_idx][pos_idx] = 1.0
 
             del top_k, dynamic_ks
             anchor_matching_gt = matching_matrix.sum(0)
             if (anchor_matching_gt > 1).sum() > 0:
-                _, cost_argmin = torch.min(cost[:, anchor_matching_gt > 1], dim=0)
+                _, cost_argmin = torch.min(cost[:, anchor_matching_gt > 1],
+                                           dim=0)
                 matching_matrix[:, anchor_matching_gt > 1] *= 0.0
                 matching_matrix[cost_argmin, anchor_matching_gt > 1] = 1.0
             fg_mask_inboxes = matching_matrix.sum(0) > 0.0
@@ -406,12 +455,24 @@ def build_targets(self, p, targets, imgs):
                 matching_targets[i] = torch.cat(matching_targets[i], dim=0)
                 matching_anchs[i] = torch.cat(matching_anchs[i], dim=0)
             else:
-                matching_bs[i] = torch.tensor([], device='cuda:0', dtype=torch.int64)
-                matching_as[i] = torch.tensor([], device='cuda:0', dtype=torch.int64)
-                matching_gjs[i] = torch.tensor([], device='cuda:0', dtype=torch.int64)
-                matching_gis[i] = torch.tensor([], device='cuda:0', dtype=torch.int64)
-                matching_targets[i] = torch.tensor([], device='cuda:0', dtype=torch.int64)
-                matching_anchs[i] = torch.tensor([], device='cuda:0', dtype=torch.int64)
+                matching_bs[i] = torch.tensor([],
+                                              device='cuda:0',
+                                              dtype=torch.int64)
+                matching_as[i] = torch.tensor([],
+                                              device='cuda:0',
+                                              dtype=torch.int64)
+                matching_gjs[i] = torch.tensor([],
+                                               device='cuda:0',
+                                               dtype=torch.int64)
+                matching_gis[i] = torch.tensor([],
+                                               device='cuda:0',
+                                               dtype=torch.int64)
+                matching_targets[i] = torch.tensor([],
+                                                   device='cuda:0',
+                                                   dtype=torch.int64)
+                matching_anchs[i] = torch.tensor([],
+                                                 device='cuda:0',
+                                                 dtype=torch.int64)
 
         return matching_bs, matching_as, matching_gjs, matching_gis, matching_targets, matching_anchs
 
@@ -419,15 +480,25 @@ def find_3_positive(self, p, targets):
         # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
         na, nt = self.na, targets.shape[0]  # number of anchors, targets
         indices, anch = [], []
-        gain = torch.ones(7, device=targets.device).long()  # normalized to gridspace gain
-        ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt)  # same as .repeat_interleave(nt)
-        targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2)  # append anchor indices
+        gain = torch.ones(
+            7, device=targets.device).long()  # normalized to gridspace gain
+        ai = torch.arange(na,
+                          device=targets.device).float().view(na, 1).repeat(
+                              1, nt)  # same as .repeat_interleave(nt)
+        targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]),
+                            2)  # append anchor indices
 
         g = 0.5  # bias
-        off = torch.tensor([[0, 0],
-                            [1, 0], [0, 1], [-1, 0], [0, -1],  # j,k,l,m
-                            # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
-                            ], device=targets.device).float() * g  # offsets
+        off = torch.tensor(
+            [
+                [0, 0],
+                [1, 0],
+                [0, 1],
+                [-1, 0],
+                [0, -1],  # j,k,l,m
+                # [1, 1], [1, -1], [-1, 1], [-1, -1],  # jk,jm,lk,lm
+            ],
+            device=targets.device).float() * g  # offsets
 
         for i in range(self.nl):
             anchors = self.anchors[i]
@@ -438,7 +509,8 @@ def find_3_positive(self, p, targets):
             if nt:
                 # Matches
                 r = t[:, :, 4:6] / anchors[:, None]  # wh ratio
-                j = torch.max(r, 1. / r).max(2)[0] < self.hyp['anchor_t']  # compare
+                j = torch.max(
+                    r, 1. / r).max(2)[0] < self.hyp['anchor_t']  # compare
                 # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t']  # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
                 t = t[j]  # filter
 
@@ -457,13 +529,15 @@ def find_3_positive(self, p, targets):
             # Define
             b, c = t[:, :2].long().T  # image, class
             gxy = t[:, 2:4]  # grid xy
-            gwh = t[:, 4:6]  # grid wh
+            # gwh = t[:, 4:6]  # grid wh
             gij = (gxy - offsets).long()
             gi, gj = gij.T  # grid xy indices
 
             # Append
             a = t[:, 6].long()  # anchor indices
-            indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1)))  # image, anchor, grid indices
+            indices.append(
+                (b, a, gj.clamp_(0, gain[3] - 1),
+                 gi.clamp_(0, gain[2] - 1)))  # image, anchor, grid indices
             anch.append(anchors[a])  # anchors
 
         return indices, anch
@@ -472,48 +546,68 @@ def find_3_positive(self, p, targets):
 class ComputeLossOTADual(ComputeLossOTA):
     # Compute losses
     def __init__(self, model, autobalance=False):
-        super(ComputeLossOTADual, self).__init__(model, autobalance)
+        super().__init__(model, autobalance)
 
     def __call__(self, p, targets, imgs):  # predictions, targets, model
         device = targets.device
-        lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)
-
-        bs_2, as_2_, gjs_2, gis_2, targets_2, anchors_2 = self.build_targets(p[self.nl:], targets, imgs)
-        bs, as_, gjs, gis, targets, anchors = self.build_targets(p[:self.nl], targets, imgs)
-        pre_gen_gains = [torch.tensor(pp.shape, device=device)[[3, 2, 3, 2]] for pp in p[:self.nl]]
-        pre_gen_gains_2 = [torch.tensor(pp.shape, device=device)[[3, 2, 3, 2]] for pp in p[self.nl:]]
+        lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(
+            1, device=device), torch.zeros(1, device=device)
+
+        bs_2, as_2_, gjs_2, gis_2, targets_2, anchors_2 = self.build_targets(
+            p[self.nl:], targets, imgs)
+        bs, as_, gjs, gis, targets, anchors = self.build_targets(
+            p[:self.nl], targets, imgs)
+        pre_gen_gains = [
+            torch.tensor(pp.shape, device=device)[[3, 2, 3, 2]]
+            for pp in p[:self.nl]
+        ]
+        pre_gen_gains_2 = [
+            torch.tensor(pp.shape, device=device)[[3, 2, 3, 2]]
+            for pp in p[self.nl:]
+        ]
 
         # Losses
         for i in range(self.nl):  # layer index, layer predictions
             pi = p[i]
-            pi_2 = p[i+self.nl]
-            b, a, gj, gi = bs[i], as_[i], gjs[i], gis[i]  # image, anchor, gridy, gridx
-            b_2, a_2, gj_2, gi_2 = bs_2[i], as_2_[i], gjs_2[i], gis_2[i]  # image, anchor, gridy, gridx
+            pi_2 = p[i + self.nl]
+            b, a, gj, gi = bs[i], as_[i], gjs[i], gis[
+                i]  # image, anchor, gridy, gridx
+            b_2, a_2, gj_2, gi_2 = bs_2[i], as_2_[i], gjs_2[i], gis_2[
+                i]  # image, anchor, gridy, gridx
             tobj = torch.zeros_like(pi[..., 0], device=device)  # target obj
-            tobj_2 = torch.zeros_like(pi_2[..., 0], device=device)  # target obj
+            tobj_2 = torch.zeros_like(pi_2[..., 0],
+                                      device=device)  # target obj
 
             n = b.shape[0]  # number of targets
             if n:
-                ps = pi[b, a, gj, gi]  # prediction subset corresponding to targets
+                ps = pi[b, a, gj,
+                        gi]  # prediction subset corresponding to targets
 
                 # Regression
                 grid = torch.stack([gi, gj], dim=1)
                 pxy = ps[:, :2].sigmoid() * 2. - 0.5
-                #pxy = ps[:, :2].sigmoid() * 3. - 1.
-                pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
+                # pxy = ps[:, :2].sigmoid() * 3. - 1.
+                pwh = (ps[:, 2:4].sigmoid() * 2)**2 * anchors[i]
                 pbox = torch.cat((pxy, pwh), 1)  # predicted box
                 selected_tbox = targets[i][:, 2:6] * pre_gen_gains[i]
                 selected_tbox[:, :2] -= grid
-                iou = bbox_iou(pbox.T, selected_tbox, x1y1x2y2=False, CIoU=True)  # iou(prediction, target)
+                iou = bbox_iou(pbox.T,
+                               selected_tbox,
+                               x1y1x2y2=False,
+                               CIoU=True)  # iou(prediction, target)
                 lbox += (1.0 - iou).mean()  # iou loss
 
                 # Objectness
-                tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * iou.detach().clamp(0).type(tobj.dtype)  # iou ratio
+                tobj[b, a, gj,
+                     gi] = (1.0 -
+                            self.gr) + self.gr * iou.detach().clamp(0).type(
+                                tobj.dtype)  # iou ratio
 
                 # Classification
                 selected_tcls = targets[i][:, 1].long()
                 if self.nc > 1:  # cls loss (only if multiple classes)
-                    t = torch.full_like(ps[:, 5:], self.cn, device=device)  # targets
+                    t = torch.full_like(ps[:, 5:], self.cn,
+                                        device=device)  # targets
                     t[range(n), selected_tcls] = self.cp
                     lcls += self.BCEcls(ps[:, 5:], t)  # BCE
 
@@ -523,26 +617,33 @@ def __call__(self, p, targets, imgs):  # predictions, targets, model
 
             n_2 = b_2.shape[0]  # number of targets
             if n_2:
-                ps_2 = pi_2[b_2, a_2, gj_2, gi_2]  # prediction subset corresponding to targets
+                ps_2 = pi_2[b_2, a_2, gj_2,
+                            gi_2]  # prediction subset corresponding to targets
 
                 # Regression
                 grid_2 = torch.stack([gi_2, gj_2], dim=1)
                 pxy_2 = ps_2[:, :2].sigmoid() * 2. - 0.5
-                #pxy = ps[:, :2].sigmoid() * 3. - 1.
-                pwh_2 = (ps_2[:, 2:4].sigmoid() * 2) ** 2 * anchors_2[i]
+                # pxy = ps[:, :2].sigmoid() * 3. - 1.
+                pwh_2 = (ps_2[:, 2:4].sigmoid() * 2)**2 * anchors_2[i]
                 pbox_2 = torch.cat((pxy_2, pwh_2), 1)  # predicted box
                 selected_tbox_2 = targets_2[i][:, 2:6] * pre_gen_gains_2[i]
                 selected_tbox_2[:, :2] -= grid_2
-                iou_2 = bbox_iou(pbox_2.T, selected_tbox_2, x1y1x2y2=False, CIoU=True)  # iou(prediction, target)
+                iou_2 = bbox_iou(pbox_2.T,
+                                 selected_tbox_2,
+                                 x1y1x2y2=False,
+                                 CIoU=True)  # iou(prediction, target)
                 lbox += (1.0 - iou_2).mean()  # iou loss
 
                 # Objectness
-                tobj_2[b_2, a_2, gj_2, gi_2] = (1.0 - self.gr) + self.gr * iou_2.detach().clamp(0).type(tobj_2.dtype)  # iou ratio
+                tobj_2[b_2, a_2, gj_2, gi_2] = (
+                    1.0 - self.gr) + self.gr * iou_2.detach().clamp(0).type(
+                        tobj_2.dtype)  # iou ratio
 
                 # Classification
                 selected_tcls_2 = targets_2[i][:, 1].long()
                 if self.nc > 1:  # cls loss (only if multiple classes)
-                    t_2 = torch.full_like(ps_2[:, 5:], self.cn, device=device)  # targets
+                    t_2 = torch.full_like(ps_2[:, 5:], self.cn,
+                                          device=device)  # targets
                     t_2[range(n_2), selected_tcls_2] = self.cp
                     lcls += self.BCEcls(ps_2[:, 5:], t_2)  # BCE
 
@@ -550,13 +651,13 @@ def __call__(self, p, targets, imgs):  # predictions, targets, model
                 # with open('targets.txt', 'a') as file:
                 #     [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
 
-
             obji = self.BCEobj(pi[..., 4], tobj)
             obji_2 = self.BCEobj(pi_2[..., 4], tobj_2)
             lobj += obji * self.balance[i]  # obj loss
             lobj += obji_2 * self.balance[i]  # obj loss
             if self.autobalance:
-                self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()
+                self.balance[i] = self.balance[
+                    i] * 0.9999 + 0.0001 / obji.detach().item()
 
         if self.autobalance:
             self.balance = [x / self.balance[self.ssi] for x in self.balance]
@@ -572,7 +673,7 @@ def __call__(self, p, targets, imgs):  # predictions, targets, model
 class ComputeLossOTADy(ComputeLossOTA):
     # Compute losses
     def __init__(self, model, autobalance=False):
-        super(ComputeLossOTADy, self).__init__(model, autobalance)
+        super().__init__(model, autobalance)
         self.tracked_diff = 0
         self.iter_count = 0
         self.diff_list = []
@@ -580,45 +681,66 @@ def __init__(self, model, autobalance=False):
     def __call__(self, ps, targets, imgs):  # predictions, targets, model
         p, score = ps
         device = targets.device
-        lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)
-        lcls_2, lbox_2, lobj_2 = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)
-
-        bs_2, as_2_, gjs_2, gis_2, targets_2, anchors_2 = self.build_targets(p[self.nl:], targets, imgs)
-        bs, as_, gjs, gis, targets, anchors = self.build_targets(p[:self.nl], targets, imgs)
-        pre_gen_gains = [torch.tensor(pp.shape, device=device)[[3, 2, 3, 2]] for pp in p[:self.nl]]
-        pre_gen_gains_2 = [torch.tensor(pp.shape, device=device)[[3, 2, 3, 2]] for pp in p[self.nl:]]
+        lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(
+            1, device=device), torch.zeros(1, device=device)
+        lcls_2, lbox_2, lobj_2 = torch.zeros(1, device=device), torch.zeros(
+            1, device=device), torch.zeros(1, device=device)
+
+        bs_2, as_2_, gjs_2, gis_2, targets_2, anchors_2 = self.build_targets(
+            p[self.nl:], targets, imgs)
+        bs, as_, gjs, gis, targets, anchors = self.build_targets(
+            p[:self.nl], targets, imgs)
+        pre_gen_gains = [
+            torch.tensor(pp.shape, device=device)[[3, 2, 3, 2]]
+            for pp in p[:self.nl]
+        ]
+        pre_gen_gains_2 = [
+            torch.tensor(pp.shape, device=device)[[3, 2, 3, 2]]
+            for pp in p[self.nl:]
+        ]
 
         # Losses
         for i in range(self.nl):  # layer index, layer predictions
             pi = p[i]
-            pi_2 = p[i+self.nl]
-            b, a, gj, gi = bs[i], as_[i], gjs[i], gis[i]  # image, anchor, gridy, gridx
-            b_2, a_2, gj_2, gi_2 = bs_2[i], as_2_[i], gjs_2[i], gis_2[i]  # image, anchor, gridy, gridx
+            pi_2 = p[i + self.nl]
+            b, a, gj, gi = bs[i], as_[i], gjs[i], gis[
+                i]  # image, anchor, gridy, gridx
+            b_2, a_2, gj_2, gi_2 = bs_2[i], as_2_[i], gjs_2[i], gis_2[
+                i]  # image, anchor, gridy, gridx
             tobj = torch.zeros_like(pi[..., 0], device=device)  # target obj
-            tobj_2 = torch.zeros_like(pi_2[..., 0], device=device)  # target obj
+            tobj_2 = torch.zeros_like(pi_2[..., 0],
+                                      device=device)  # target obj
 
             n = b.shape[0]  # number of targets
             if n:
-                ps = pi[b, a, gj, gi]  # prediction subset corresponding to targets
+                ps = pi[b, a, gj,
+                        gi]  # prediction subset corresponding to targets
 
                 # Regression
                 grid = torch.stack([gi, gj], dim=1)
                 pxy = ps[:, :2].sigmoid() * 2. - 0.5
-                #pxy = ps[:, :2].sigmoid() * 3. - 1.
-                pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
+                # pxy = ps[:, :2].sigmoid() * 3. - 1.
+                pwh = (ps[:, 2:4].sigmoid() * 2)**2 * anchors[i]
                 pbox = torch.cat((pxy, pwh), 1)  # predicted box
                 selected_tbox = targets[i][:, 2:6] * pre_gen_gains[i]
                 selected_tbox[:, :2] -= grid
-                iou = bbox_iou(pbox.T, selected_tbox, x1y1x2y2=False, CIoU=True)  # iou(prediction, target)
+                iou = bbox_iou(pbox.T,
+                               selected_tbox,
+                               x1y1x2y2=False,
+                               CIoU=True)  # iou(prediction, target)
                 lbox += (1.0 - iou).mean()  # iou loss
 
                 # Objectness
-                tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * iou.detach().clamp(0).type(tobj.dtype)  # iou ratio
+                tobj[b, a, gj,
+                     gi] = (1.0 -
+                            self.gr) + self.gr * iou.detach().clamp(0).type(
+                                tobj.dtype)  # iou ratio
 
                 # Classification
                 selected_tcls = targets[i][:, 1].long()
                 if self.nc > 1:  # cls loss (only if multiple classes)
-                    t = torch.full_like(ps[:, 5:], self.cn, device=device)  # targets
+                    t = torch.full_like(ps[:, 5:], self.cn,
+                                        device=device)  # targets
                     t[range(n), selected_tcls] = self.cp
                     lcls += self.BCEcls(ps[:, 5:], t)  # BCE
 
@@ -628,26 +750,33 @@ def __call__(self, ps, targets, imgs):  # predictions, targets, model
 
             n_2 = b_2.shape[0]  # number of targets
             if n_2:
-                ps_2 = pi_2[b_2, a_2, gj_2, gi_2]  # prediction subset corresponding to targets
+                ps_2 = pi_2[b_2, a_2, gj_2,
+                            gi_2]  # prediction subset corresponding to targets
 
                 # Regression
                 grid_2 = torch.stack([gi_2, gj_2], dim=1)
                 pxy_2 = ps_2[:, :2].sigmoid() * 2. - 0.5
-                #pxy = ps[:, :2].sigmoid() * 3. - 1.
-                pwh_2 = (ps_2[:, 2:4].sigmoid() * 2) ** 2 * anchors_2[i]
+                # pxy = ps[:, :2].sigmoid() * 3. - 1.
+                pwh_2 = (ps_2[:, 2:4].sigmoid() * 2)**2 * anchors_2[i]
                 pbox_2 = torch.cat((pxy_2, pwh_2), 1)  # predicted box
                 selected_tbox_2 = targets_2[i][:, 2:6] * pre_gen_gains_2[i]
                 selected_tbox_2[:, :2] -= grid_2
-                iou_2 = bbox_iou(pbox_2.T, selected_tbox_2, x1y1x2y2=False, CIoU=True)  # iou(prediction, target)
+                iou_2 = bbox_iou(pbox_2.T,
+                                 selected_tbox_2,
+                                 x1y1x2y2=False,
+                                 CIoU=True)  # iou(prediction, target)
                 lbox_2 += (1.0 - iou_2).mean()  # iou loss
 
                 # Objectness
-                tobj_2[b_2, a_2, gj_2, gi_2] = (1.0 - self.gr) + self.gr * iou_2.detach().clamp(0).type(tobj_2.dtype)  # iou ratio
+                tobj_2[b_2, a_2, gj_2, gi_2] = (
+                    1.0 - self.gr) + self.gr * iou_2.detach().clamp(0).type(
+                        tobj_2.dtype)  # iou ratio
 
                 # Classification
                 selected_tcls_2 = targets_2[i][:, 1].long()
                 if self.nc > 1:  # cls loss (only if multiple classes)
-                    t_2 = torch.full_like(ps_2[:, 5:], self.cn, device=device)  # targets
+                    t_2 = torch.full_like(ps_2[:, 5:], self.cn,
+                                          device=device)  # targets
                     t_2[range(n_2), selected_tcls_2] = self.cp
                     lcls_2 += self.BCEcls(ps_2[:, 5:], t_2)  # BCE
 
@@ -655,13 +784,13 @@ def __call__(self, ps, targets, imgs):  # predictions, targets, model
                 # with open('targets.txt', 'a') as file:
                 #     [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
 
-
             obji = self.BCEobj(pi[..., 4], tobj)
             obji_2 = self.BCEobj(pi_2[..., 4], tobj_2)
             lobj += obji * self.balance[i]  # obj loss
             lobj_2 += obji_2 * self.balance[i]  # obj loss
             if self.autobalance:
-                self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()
+                self.balance[i] = self.balance[
+                    i] * 0.9999 + 0.0001 / obji.detach().item()
 
         if self.autobalance:
             self.balance = [x / self.balance[self.ssi] for x in self.balance]
@@ -689,4 +818,6 @@ def __call__(self, ps, targets, imgs):  # predictions, targets, model
         loss = loss - self.tracked_diff / 2
         loss_2 = loss_2 + self.tracked_diff / 2
         adaptive_loss = score[:, 0] * loss + (1 - score[:, 0]) * loss_2
-        return adaptive_loss * bs, torch.cat((current_diff, score[:, 0], 1 - score[:, 0], adaptive_loss)).detach()
+        return adaptive_loss * bs, torch.cat(
+            (current_diff, score[:,
+                                 0], 1 - score[:, 0], adaptive_loss)).detach()
diff --git a/utils/metrics.py b/utils/metrics.py
index 6d2f536..f16fe79 100644
--- a/utils/metrics.py
+++ b/utils/metrics.py
@@ -15,7 +15,14 @@ def fitness(x):
     return (x[:, :4] * w).sum(1)
 
 
-def ap_per_class(tp, conf, pred_cls, target_cls, v5_metric=False, plot=False, save_dir='.', names=()):
+def ap_per_class(tp,
+                 conf,
+                 pred_cls,
+                 target_cls,
+                 v5_metric=False,
+                 plot=False,
+                 save_dir='.',
+                 names=()):
     """ Compute the average precision, given the recall and precision curves.
     Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
     # Arguments
@@ -39,7 +46,8 @@ def ap_per_class(tp, conf, pred_cls, target_cls, v5_metric=False, plot=False, sa
 
     # Create Precision-Recall curve and compute AP for each class
     px, py = np.linspace(0, 1, 1000), []  # for plotting
-    ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
+    ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros(
+        (nc, 1000))
     for ci, c in enumerate(unique_classes):
         i = pred_cls == c
         n_l = (target_cls == c).sum()  # number of labels
@@ -54,32 +62,50 @@ def ap_per_class(tp, conf, pred_cls, target_cls, v5_metric=False, plot=False, sa
 
             # Recall
             recall = tpc / (n_l + 1e-16)  # recall curve
-            r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0)  # negative x, xp because xp decreases
+            r[ci] = np.interp(-px, -conf[i], recall[:, 0],
+                              left=0)  # negative x, xp because xp decreases
 
             # Precision
             precision = tpc / (tpc + fpc)  # precision curve
-            p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1)  # p at pr_score
+            p[ci] = np.interp(-px, -conf[i], precision[:, 0],
+                              left=1)  # p at pr_score
 
             # AP from recall-precision curve
             for j in range(tp.shape[1]):
-                ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j], v5_metric=v5_metric)
+                ap[ci, j], mpre, mrec = compute_ap(recall[:, j],
+                                                   precision[:, j],
+                                                   v5_metric=v5_metric)
                 if plot and j == 0:
-                    py.append(np.interp(px, mrec, mpre))  # precision at mAP@0.5
+                    py.append(np.interp(px, mrec,
+                                        mpre))  # precision at mAP@0.5
 
     # Compute F1 (harmonic mean of precision and recall)
     f1 = 2 * p * r / (p + r + 1e-16)
     if plot:
         plot_pr_curve(px, py, ap, Path(save_dir) / 'PR_curve.png', names)
-        plot_mc_curve(px, f1, Path(save_dir) / 'F1_curve.png', names, ylabel='F1')
-        plot_mc_curve(px, p, Path(save_dir) / 'P_curve.png', names, ylabel='Precision')
-        plot_mc_curve(px, r, Path(save_dir) / 'R_curve.png', names, ylabel='Recall')
+        plot_mc_curve(px,
+                      f1,
+                      Path(save_dir) / 'F1_curve.png',
+                      names,
+                      ylabel='F1')
+        plot_mc_curve(px,
+                      p,
+                      Path(save_dir) / 'P_curve.png',
+                      names,
+                      ylabel='Precision')
+        plot_mc_curve(px,
+                      r,
+                      Path(save_dir) / 'R_curve.png',
+                      names,
+                      ylabel='Recall')
 
     i = f1.mean(0).argmax()  # max F1 index
     return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype('int32')
 
 
 def compute_ap(recall, precision, v5_metric=False):
-    """ Compute the average precision, given the recall and precision curves
+    """Compute the average precision, given the recall and precision curves.
+
     # Arguments
         recall:    The recall curve (list)
         precision: The precision curve (list)
@@ -104,7 +130,8 @@ def compute_ap(recall, precision, v5_metric=False):
         x = np.linspace(0, 1, 101)  # 101-point interp (COCO)
         ap = np.trapz(np.interp(x, mrec, mpre), x)  # integrate
     else:  # 'continuous'
-        i = np.where(mrec[1:] != mrec[:-1])[0]  # points where x axis (recall) changes
+        i = np.where(
+            mrec[1:] != mrec[:-1])[0]  # points where x axis (recall) changes
         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])  # area under curve
 
     return ap, mpre, mrec
@@ -119,8 +146,8 @@ def __init__(self, nc, conf=0.25, iou_thres=0.45):
         self.iou_thres = iou_thres
 
     def process_batch(self, detections, labels):
-        """
-        Return intersection-over-union (Jaccard index) of boxes.
+        """Return intersection-over-union (Jaccard index) of boxes.
+
         Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
         Arguments:
             detections (Array[N, 6]), x1, y1, x2, y2, conf, class
@@ -135,12 +162,15 @@ def process_batch(self, detections, labels):
 
         x = torch.where(iou > self.iou_thres)
         if x[0].shape[0]:
-            matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()
+            matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]),
+                                1).cpu().numpy()
             if x[0].shape[0] > 1:
                 matches = matches[matches[:, 2].argsort()[::-1]]
-                matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
+                matches = matches[np.unique(matches[:, 1],
+                                            return_index=True)[1]]
                 matches = matches[matches[:, 2].argsort()[::-1]]
-                matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
+                matches = matches[np.unique(matches[:, 0],
+                                            return_index=True)[1]]
         else:
             matches = np.zeros((0, 3))
 
@@ -165,19 +195,32 @@ def plot(self, save_dir='', names=()):
         try:
             import seaborn as sn
 
-            array = self.matrix / (self.matrix.sum(0).reshape(1, self.nc + 1) + 1E-6)  # normalize
-            array[array < 0.005] = np.nan  # don't annotate (would appear as 0.00)
+            array = self.matrix / (
+                self.matrix.sum(0).reshape(1, self.nc + 1) + 1E-6)  # normalize
+            array[array <
+                  0.005] = np.nan  # don't annotate (would appear as 0.00)
 
             fig = plt.figure(figsize=(12, 9), tight_layout=True)
             sn.set(font_scale=1.0 if self.nc < 50 else 0.8)  # for label size
-            labels = (0 < len(names) < 99) and len(names) == self.nc  # apply names to ticklabels
-            sn.heatmap(array, annot=self.nc < 30, annot_kws={"size": 8}, cmap='Blues', fmt='.2f', square=True,
-                       xticklabels=names + ['background FP'] if labels else "auto",
-                       yticklabels=names + ['background FN'] if labels else "auto").set_facecolor((1, 1, 1))
+            labels = (0 < len(names) < 99
+                      ) and len(names) == self.nc  # apply names to ticklabels
+            sn.heatmap(array,
+                       annot=self.nc < 30,
+                       annot_kws={
+                           'size': 8
+                       },
+                       cmap='Blues',
+                       fmt='.2f',
+                       square=True,
+                       xticklabels=names +
+                       ['background FP'] if labels else 'auto',
+                       yticklabels=names +
+                       ['background FN'] if labels else 'auto').set_facecolor(
+                           (1, 1, 1))
             fig.axes[0].set_xlabel('True')
             fig.axes[0].set_ylabel('Predicted')
             fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250)
-        except Exception as e:
+        except Exception:
             pass
 
     def print(self):
@@ -185,7 +228,8 @@ def print(self):
             print(' '.join(map(str, self.matrix[i])))
 
 
-# Plots ----------------------------------------------------------------------------------------------------------------
+# Plots --------------------------------------------------------------------------------------------------------------
+
 
 def plot_pr_curve(px, py, ap, save_dir='pr_curve.png', names=()):
     # Precision-recall curve
@@ -194,34 +238,51 @@ def plot_pr_curve(px, py, ap, save_dir='pr_curve.png', names=()):
 
     if 0 < len(names) < 21:  # display per-class legend if < 21 classes
         for i, y in enumerate(py.T):
-            ax.plot(px, y, linewidth=1, label=f'{names[i]} {ap[i, 0]:.3f}')  # plot(recall, precision)
+            ax.plot(
+                px, y, linewidth=1,
+                label=f'{names[i]} {ap[i, 0]:.3f}')  # plot(recall, precision)
     else:
         ax.plot(px, py, linewidth=1, color='grey')  # plot(recall, precision)
 
-    ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean())
+    ax.plot(px,
+            py.mean(1),
+            linewidth=3,
+            color='blue',
+            label='all classes %.3f mAP@0.5' % ap[:, 0].mean())
     ax.set_xlabel('Recall')
     ax.set_ylabel('Precision')
     ax.set_xlim(0, 1)
     ax.set_ylim(0, 1)
-    plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
+    plt.legend(bbox_to_anchor=(1.04, 1), loc='upper left')
     fig.savefig(Path(save_dir), dpi=250)
 
 
-def plot_mc_curve(px, py, save_dir='mc_curve.png', names=(), xlabel='Confidence', ylabel='Metric'):
+def plot_mc_curve(px,
+                  py,
+                  save_dir='mc_curve.png',
+                  names=(),
+                  xlabel='Confidence',
+                  ylabel='Metric'):
     # Metric-confidence curve
     fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
 
     if 0 < len(names) < 21:  # display per-class legend if < 21 classes
         for i, y in enumerate(py):
-            ax.plot(px, y, linewidth=1, label=f'{names[i]}')  # plot(confidence, metric)
+            ax.plot(px, y, linewidth=1,
+                    label=f'{names[i]}')  # plot(confidence, metric)
     else:
-        ax.plot(px, py.T, linewidth=1, color='grey')  # plot(confidence, metric)
+        ax.plot(px, py.T, linewidth=1,
+                color='grey')  # plot(confidence, metric)
 
     y = py.mean(0)
-    ax.plot(px, y, linewidth=3, color='blue', label=f'all classes {y.max():.2f} at {px[y.argmax()]:.3f}')
+    ax.plot(px,
+            y,
+            linewidth=3,
+            color='blue',
+            label=f'all classes {y.max():.2f} at {px[y.argmax()]:.3f}')
     ax.set_xlabel(xlabel)
     ax.set_ylabel(ylabel)
     ax.set_xlim(0, 1)
     ax.set_ylim(0, 1)
-    plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
+    plt.legend(bbox_to_anchor=(1.04, 1), loc='upper left')
     fig.savefig(Path(save_dir), dpi=250)
diff --git a/utils/plots.py b/utils/plots.py
index e6c3aaf..c7f76ef 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -1,6 +1,5 @@
 # Plotting utils
 
-import glob
 import math
 import os
 import random
@@ -11,15 +10,10 @@
 import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
-import pandas as pd
-import seaborn as sns
 import torch
-import yaml
-from PIL import Image, ImageDraw, ImageFont
-from scipy.signal import butter, filtfilt
+from PIL import Image
 
 from utils.general import xywh2xyxy, xyxy2xywh
-from utils.metrics import fitness
 
 # Settings
 matplotlib.rc('font', **{'size': 11})
@@ -31,12 +25,14 @@ def color_list():
     def hex2rgb(h):
         return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))
 
-    return [hex2rgb(h) for h in matplotlib.colors.TABLEAU_COLORS.values()]  # or BASE_ (8), CSS4_ (148), XKCD_ (949)
+    return [hex2rgb(h) for h in matplotlib.colors.TABLEAU_COLORS.values()
+            ]  # or BASE_ (8), CSS4_ (148), XKCD_ (949)
 
 
 def plot_one_box(x, img, color=None, label=None, line_thickness=3):
     # Plots one bounding box on image img
-    tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness
+    tl = line_thickness or round(
+        0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness
     color = color or [random.randint(0, 255) for _ in range(3)]
     c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
     cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
@@ -45,7 +41,12 @@ def plot_one_box(x, img, color=None, label=None, line_thickness=3):
         t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
         c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
         cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
-        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
+        cv2.putText(img,
+                    label, (c1[0], c1[1] - 2),
+                    0,
+                    tl / 3, [225, 255, 255],
+                    thickness=tf,
+                    lineType=cv2.LINE_AA)
 
 
 def output_to_target(output):
@@ -53,11 +54,18 @@ def output_to_target(output):
     targets = []
     for i, o in enumerate(output):
         for *box, conf, cls in o.cpu().numpy():
-            targets.append([i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf])
+            targets.append(
+                [i, cls, *list(*xyxy2xywh(np.array(box)[None])), conf])
     return np.array(targets)
 
 
-def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max_size=640, max_subplots=16):
+def plot_images(images,
+                targets,
+                paths=None,
+                fname='images.jpg',
+                names=None,
+                max_size=640,
+                max_subplots=16):
     # Plot image grid with labels
 
     if isinstance(images, torch.Tensor):
@@ -73,7 +81,7 @@ def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max
     tf = max(tl - 1, 1)  # font thickness
     bs, _, h, w = images.shape  # batch size, _, height, width
     bs = min(bs, max_subplots)  # limit plot images
-    ns = np.ceil(bs ** 0.5)  # number of subplots (square)
+    ns = np.ceil(bs**0.5)  # number of subplots (square)
 
     # Check if we should resize
     scale_factor = max_size / max(h, w)
@@ -82,7 +90,8 @@ def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max
         w = math.ceil(scale_factor * w)
 
     colors = color_list()  # list of colors
-    mosaic = np.full((int(ns * h), int(ns * w), 3), 255, dtype=np.uint8)  # init
+    mosaic = np.full((int(ns * h), int(ns * w), 3), 255,
+                     dtype=np.uint8)  # init
     for i, img in enumerate(images):
         if i == max_subplots:  # if last batch has fewer images than we expect
             break
@@ -100,7 +109,8 @@ def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max
             boxes = xywh2xyxy(image_targets[:, 2:6]).T
             classes = image_targets[:, 1].astype('int')
             labels = image_targets.shape[1] == 6  # labels if no conf column
-            conf = None if labels else image_targets[:, 6]  # check for confidence presence (label vs pred)
+            conf = None if labels else image_targets[:,
+                                                     6]  # check for confidence presence (label vs pred)
 
             if boxes.shape[1]:
                 if boxes.max() <= 1.01:  # if normalized with tolerance 0.01
@@ -115,22 +125,35 @@ def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max
                 color = colors[cls % len(colors)]
                 cls = names[cls] if names else cls
                 if labels or conf[j] > 0.25:  # 0.25 conf thresh
-                    label = '%s' % cls if labels else '%s %.1f' % (cls, conf[j])
-                    plot_one_box(box, mosaic, label=label, color=color, line_thickness=tl)
+                    label = '%s' % cls if labels else '{} {:.1f}'.format(
+                        cls, conf[j])
+                    plot_one_box(box,
+                                 mosaic,
+                                 label=label,
+                                 color=color,
+                                 line_thickness=tl)
 
         # Draw image filename labels
         if paths:
             label = Path(paths[i]).name[:40]  # trim to 40 char
-            t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
-            cv2.putText(mosaic, label, (block_x + 5, block_y + t_size[1] + 5), 0, tl / 3, [220, 220, 220], thickness=tf,
+            t_size = cv2.getTextSize(label, 0, fontScale=tl / 3,
+                                     thickness=tf)[0]
+            cv2.putText(mosaic,
+                        label, (block_x + 5, block_y + t_size[1] + 5),
+                        0,
+                        tl / 3, [220, 220, 220],
+                        thickness=tf,
                         lineType=cv2.LINE_AA)
 
         # Image border
-        cv2.rectangle(mosaic, (block_x, block_y), (block_x + w, block_y + h), (255, 255, 255), thickness=3)
+        cv2.rectangle(mosaic, (block_x, block_y), (block_x + w, block_y + h),
+                      (255, 255, 255),
+                      thickness=3)
 
     if fname:
         r = min(1280. / max(h, w) / ns, 1.0)  # ratio to limit image size
-        mosaic = cv2.resize(mosaic, (int(ns * w * r), int(ns * h * r)), interpolation=cv2.INTER_AREA)
+        mosaic = cv2.resize(mosaic, (int(ns * w * r), int(ns * h * r)),
+                            interpolation=cv2.INTER_AREA)
         # cv2.imwrite(fname, cv2.cvtColor(mosaic, cv2.COLOR_BGR2RGB))  # cv2 save
         Image.fromarray(mosaic).save(fname)  # PIL save
     return mosaic
@@ -138,7 +161,8 @@ def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max
 
 def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=''):
     # Plot LR simulating training for full epochs
-    optimizer, scheduler = copy(optimizer), copy(scheduler)  # do not modify originals
+    optimizer, scheduler = copy(optimizer), copy(
+        scheduler)  # do not modify originals
     y = []
     for _ in range(epochs):
         scheduler.step()
@@ -157,19 +181,28 @@ def plot_results(start=0, stop=0, bucket='', id=(), labels=(), save_dir=''):
     # Plot training 'results*.txt'. from utils.plots import *; plot_results(save_dir='runs/train/exp')
     fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True)
     ax = ax.ravel()
-    s = ['Box', 'Objectness', 'Classification', 'Precision', 'Recall',
-         'val Box', 'val Objectness', 'val Classification', 'mAP@0.5', 'mAP@0.5:0.95']
+    s = [
+        'Box', 'Objectness', 'Classification', 'Precision', 'Recall',
+        'val Box', 'val Objectness', 'val Classification', 'mAP@0.5',
+        'mAP@0.5:0.95'
+    ]
     if bucket:
         # files = ['https://storage.googleapis.com/%s/results%g.txt' % (bucket, x) for x in id]
         files = ['results%g.txt' % x for x in id]
-        c = ('gsutil cp ' + '%s ' * len(files) + '.') % tuple('gs://%s/results%g.txt' % (bucket, x) for x in id)
+        c = ('gsutil cp ' + '%s ' * len(files) + '.') % tuple(
+            f'gs://{bucket}/results{x:g}.txt' for x in id)
         os.system(c)
     else:
         files = list(Path(save_dir).glob('results*.txt'))
-    assert len(files), 'No results.txt files found in %s, nothing to plot.' % os.path.abspath(save_dir)
+    assert len(
+        files
+    ), 'No results.txt files found in %s, nothing to plot.' % os.path.abspath(
+        save_dir)
     for fi, f in enumerate(files):
         try:
-            results = np.loadtxt(f, usecols=[2, 3, 4, 8, 9, 12, 13, 14, 10, 11], ndmin=2).T
+            results = np.loadtxt(f,
+                                 usecols=[2, 3, 4, 8, 9, 12, 13, 14, 10, 11],
+                                 ndmin=2).T
             n = results.shape[1]  # number of rows
             x = range(start, min(stop, n) if stop else n)
             for i in range(10):
@@ -178,12 +211,17 @@ def plot_results(start=0, stop=0, bucket='', id=(), labels=(), save_dir=''):
                     y[y == 0] = np.nan  # don't show zero loss values
                     # y /= y[0]  # normalize
                 label = labels[fi] if len(labels) else f.stem
-                ax[i].plot(x, y, marker='.', label=label, linewidth=2, markersize=8)
+                ax[i].plot(x,
+                           y,
+                           marker='.',
+                           label=label,
+                           linewidth=2,
+                           markersize=8)
                 ax[i].set_title(s[i])
                 # if i in [5, 6, 7]:  # share train and val loss y axes
                 #     ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
         except Exception as e:
-            print('Warning: Plotting error for %s; %s' % (f, e))
+            print(f'Warning: Plotting error for {f}; {e}')
 
     ax[1].legend()
     fig.savefig(Path(save_dir) / 'results.png', dpi=200)
diff --git a/utils/torch_utils.py b/utils/torch_utils.py
index 70b1bf5..8b207a7 100644
--- a/utils/torch_utils.py
+++ b/utils/torch_utils.py
@@ -13,7 +13,6 @@
 import torch.backends.cudnn as cudnn
 import torch.nn as nn
 import torch.nn.functional as F
-import torchvision
 
 try:
     import thop  # for FLOPS computation
@@ -24,9 +23,8 @@
 
 @contextmanager
 def torch_distributed_zero_first(local_rank: int):
-    """
-    Decorator to make all processes in distributed training wait for each local_master to do something.
-    """
+    """Decorator to make all processes in distributed training wait for each
+    local_master to do something."""
     if local_rank not in [-1, 0]:
         torch.distributed.barrier()
     yield
@@ -53,8 +51,9 @@ def git_describe(path=Path(__file__).parent):  # path must be a directory
     # return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe
     s = f'git -C {path} describe --tags --long --always'
     try:
-        return subprocess.check_output(s, shell=True, stderr=subprocess.STDOUT).decode()[:-1]
-    except subprocess.CalledProcessError as e:
+        return subprocess.check_output(s, shell=True,
+                                       stderr=subprocess.STDOUT).decode()[:-1]
+    except subprocess.CalledProcessError:
         return ''  # not a git repository
 
 
@@ -63,10 +62,12 @@ def select_device(device='', batch_size=None):
     s = f'DynamicDet 🚀 {git_describe() or date_modified()} torch {torch.__version__} '  # string
     cpu = device.lower() == 'cpu'
     if cpu:
-        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'  # force torch.cuda.is_available() = False
+        os.environ[
+            'CUDA_VISIBLE_DEVICES'] = '-1'  # force torch.cuda.is_available() = False
     elif device:  # non-cpu device requested
         os.environ['CUDA_VISIBLE_DEVICES'] = device  # set environment variable
-        assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested'  # check availability
+        assert torch.cuda.is_available(
+        ), f'CUDA unavailable, invalid device {device} requested'  # check availability
 
     cuda = not cpu and torch.cuda.is_available()
     if cuda:
@@ -80,7 +81,8 @@ def select_device(device='', batch_size=None):
     else:
         s += 'CPU\n'
 
-    logger.info(s.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else s)  # emoji-safe
+    logger.info(s.encode().decode('ascii', 'ignore')
+                if platform.system() == 'Windows' else s)  # emoji-safe
     return torch.device('cuda:0' if cuda else 'cpu')
 
 
@@ -98,18 +100,24 @@ def profile(x, ops, n=100, device=None):
     #     m2 = nn.SiLU()
     #     profile(x, [m1, m2], n=100)  # profile speed over 100 iterations
 
-    device = device or torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
+    device = device or torch.device(
+        'cuda:0' if torch.cuda.is_available() else 'cpu')
     x = x.to(device)
     x.requires_grad = True
-    print(torch.__version__, device.type, torch.cuda.get_device_properties(0) if device.type == 'cuda' else '')
-    print(f"\n{'Params':>12s}{'GFLOPS':>12s}{'forward (ms)':>16s}{'backward (ms)':>16s}{'input':>24s}{'output':>24s}")
+    print(torch.__version__, device.type,
+          torch.cuda.get_device_properties(0) if device.type == 'cuda' else '')
+    print(
+        f"\n{'Params':>12s}{'GFLOPS':>12s}{'forward (ms)':>16s}{'backward (ms)':>16s}{'input':>24s}{'output':>24s}"
+    )
     for m in ops if isinstance(ops, list) else [ops]:
         m = m.to(device) if hasattr(m, 'to') else m  # device
-        m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m  # type
+        m = m.half() if hasattr(m, 'half') and isinstance(
+            x, torch.Tensor) and x.dtype is torch.float16 else m  # type
         dtf, dtb, t = 0., 0., [0., 0., 0.]  # dt forward, backward
         try:
-            flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2  # GFLOPS
-        except:
+            flops = thop.profile(m, inputs=(x, ),
+                                 verbose=False)[0] / 1E9 * 2  # GFLOPS
+        except Exception:
             flops = 0
 
         for _ in range(n):
@@ -119,24 +127,33 @@ def profile(x, ops, n=100, device=None):
             try:
                 _ = y.sum().backward()
                 t[2] = time_synchronized()
-            except:  # no backward method
+            except Exception:  # no backward method
                 t[2] = float('nan')
             dtf += (t[1] - t[0]) * 1000 / n  # ms per op forward
             dtb += (t[2] - t[1]) * 1000 / n  # ms per op backward
 
         s_in = tuple(x.shape) if isinstance(x, torch.Tensor) else 'list'
         s_out = tuple(y.shape) if isinstance(y, torch.Tensor) else 'list'
-        p = sum(list(x.numel() for x in m.parameters())) if isinstance(m, nn.Module) else 0  # parameters
-        print(f'{p:12}{flops:12.4g}{dtf:16.4g}{dtb:16.4g}{str(s_in):>24s}{str(s_out):>24s}')
+        p = sum(list(x.numel() for x in m.parameters())) if isinstance(
+            m, nn.Module) else 0  # parameters
+        print(
+            f'{p:12}{flops:12.4g}{dtf:16.4g}{dtb:16.4g}{str(s_in):>24s}{str(s_out):>24s}'
+        )
 
 
 def is_parallel(model):
-    return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
+    return type(model) in (nn.parallel.DataParallel,
+                           nn.parallel.DistributedDataParallel)
 
 
 def intersect_dicts(da, db, exclude=()):
     # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values
-    return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape}
+    return {
+        k: v
+        for k, v in da.items()
+        if k in db and not any(x in k
+                               for x in exclude) and v.shape == db[k].shape
+    }
 
 
 def initialize_weights(model):
@@ -153,7 +170,9 @@ def initialize_weights(model):
 
 def find_modules(model, mclass=nn.Conv2d):
     # Finds layer indices matching module class 'mclass'
-    return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)]
+    return [
+        i for i, m in enumerate(model.module_list) if isinstance(m, mclass)
+    ]
 
 
 def sparsity(model):
@@ -184,7 +203,8 @@ def fuse_conv_and_bn(conv, bn):
                           stride=conv.stride,
                           padding=conv.padding,
                           groups=conv.groups,
-                          bias=True).requires_grad_(False).to(conv.weight.device)
+                          bias=True).requires_grad_(False).to(
+                              conv.weight.device)
 
     # prepare filters
     w_conv = conv.weight.clone().view(conv.out_channels, -1)
@@ -192,9 +212,13 @@ def fuse_conv_and_bn(conv, bn):
     fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape))
 
     # prepare spatial bias
-    b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
-    b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
-    fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
+    b_conv = torch.zeros(
+        conv.weight.size(0),
+        device=conv.weight.device) if conv.bias is None else conv.bias
+    b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(
+        torch.sqrt(bn.running_var + bn.eps))
+    fusedconv.bias.copy_(
+        torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
 
     return fusedconv
 
@@ -202,25 +226,36 @@ def fuse_conv_and_bn(conv, bn):
 def model_info(model, verbose=False, img_size=640):
     # Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320]
     n_p = sum(x.numel() for x in model.parameters())  # number parameters
-    n_g = sum(x.numel() for x in model.parameters() if x.requires_grad)  # number gradients
+    n_g = sum(x.numel() for x in model.parameters()
+              if x.requires_grad)  # number gradients
     if verbose:
-        print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
+        print('%5s %40s %9s %12s %20s %10s %10s' %
+              ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu',
+               'sigma'))
         for i, (name, p) in enumerate(model.named_parameters()):
             name = name.replace('module_list.', '')
             print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
-                  (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
+                  (i, name, p.requires_grad, p.numel(), list(
+                      p.shape), p.mean(), p.std()))
 
     try:  # FLOPS
         from thop import profile
-        stride = max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32
-        img = torch.zeros((1, model.yaml.get('ch', 3), stride, stride), device=next(model.parameters()).device)  # input
-        flops = profile(deepcopy(model), inputs=(img,), verbose=False)[0] / 1E9 * 2  # stride GFLOPS
-        img_size = img_size if isinstance(img_size, list) else [img_size, img_size]  # expand if int/float
-        fs = ', %.1f GFLOPS' % (flops * img_size[0] / stride * img_size[1] / stride)  # 640x640 GFLOPS
+        stride = max(int(model.stride.max()), 32) if hasattr(model,
+                                                             'stride') else 32
+        img = torch.zeros((1, model.yaml.get('ch', 3), stride, stride),
+                          device=next(model.parameters()).device)  # input
+        flops = profile(deepcopy(model), inputs=(img, ),
+                        verbose=False)[0] / 1E9 * 2  # stride GFLOPS
+        img_size = img_size if isinstance(
+            img_size, list) else [img_size, img_size]  # expand if int/float
+        fs = ', %.1f GFLOPS' % (flops * img_size[0] / stride * img_size[1] /
+                                stride)  # 640x640 GFLOPS
     except (ImportError, Exception):
         fs = ''
 
-    logger.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}")
+    logger.info(
+        f'Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}'
+    )
 
 
 def scale_img(img, ratio=1.0, same_shape=False, gs=32):  # img(16,3,256,416)
@@ -230,24 +265,29 @@ def scale_img(img, ratio=1.0, same_shape=False, gs=32):  # img(16,3,256,416)
     else:
         h, w = img.shape[2:]
         s = (int(h * ratio), int(w * ratio))  # new size
-        img = F.interpolate(img, size=s, mode='bilinear', align_corners=False)  # resize
+        img = F.interpolate(img, size=s, mode='bilinear',
+                            align_corners=False)  # resize
         if not same_shape:  # pad/crop img
-            h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)]
-        return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447)  # value = imagenet mean
+            h, w = (math.ceil(x * ratio / gs) * gs for x in (h, w))
+        return F.pad(img, [0, w - s[1], 0, h - s[0]],
+                     value=0.447)  # value = imagenet mean
 
 
 def copy_attr(a, b, include=(), exclude=()):
     # Copy attributes from b to a, options to only include [...] and to exclude [...]
     for k, v in b.__dict__.items():
-        if (len(include) and k not in include) or k.startswith('_') or k in exclude:
+        if (len(include)
+                and k not in include) or k.startswith('_') or k in exclude:
             continue
         else:
             setattr(a, k, v)
 
 
 class ModelEMA:
-    """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
-    Keep a moving average of everything in the model state_dict (parameters and buffers).
+    """Model Exponential Moving Average from
+    https://github.com/rwightman/pytorch-image-models Keep a moving average of
+    everything in the model state_dict (parameters and buffers).
+
     This is intended to allow functionality like
     https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
     A smoothed version of the weights is necessary for some training schemes to perform well.
@@ -257,11 +297,13 @@ class ModelEMA:
 
     def __init__(self, model, decay=0.9999, updates=0):
         # Create EMA
-        self.ema = deepcopy(model.module if is_parallel(model) else model).eval()  # FP32 EMA
+        self.ema = deepcopy(
+            model.module if is_parallel(model) else model).eval()  # FP32 EMA
         # if next(model.parameters()).device.type != 'cpu':
         #     self.ema.half()  # FP16 EMA
         self.updates = updates  # number of EMA updates
-        self.decay = lambda x: decay * (1 - math.exp(-x / 2000))  # decay exponential ramp (to help early epochs)
+        self.decay = lambda x: decay * (1 - math.exp(
+            -x / 2000))  # decay exponential ramp (to help early epochs)
         for p in self.ema.parameters():
             p.requires_grad_(False)
 
@@ -271,18 +313,23 @@ def update(self, model):
             self.updates += 1
             d = self.decay(self.updates)
 
-            msd = model.module.state_dict() if is_parallel(model) else model.state_dict()  # model state_dict
+            msd = model.module.state_dict() if is_parallel(
+                model) else model.state_dict()  # model state_dict
             for k, v in self.ema.state_dict().items():
                 if v.dtype.is_floating_point:
                     v *= d
                     v += (1. - d) * msd[k].detach()
 
-    def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
+    def update_attr(self,
+                    model,
+                    include=(),
+                    exclude=('process_group', 'reducer')):
         # Update EMA attributes
         copy_attr(self.ema, model, include, exclude)
 
 
 class BatchNormXd(torch.nn.modules.batchnorm._BatchNorm):
+
     def _check_input_dim(self, input):
         # The only difference between BatchNorm1d, BatchNorm2d, BatchNorm3d, etc
         # is this method that is overwritten by the sub-class
@@ -294,16 +341,16 @@ def _check_input_dim(self, input):
         #  we could return the one that was originally created)
         return
 
+
 def revert_sync_batchnorm(module):
     # this is very similar to the function that it is trying to revert:
     # https://github.com/pytorch/pytorch/blob/c8b3686a3e4ba63dc59e5dcfe5db3430df256833/torch/nn/modules/batchnorm.py#L679
     module_output = module
     if isinstance(module, torch.nn.modules.batchnorm.SyncBatchNorm):
-        new_cls = BatchNormXd
-        module_output = BatchNormXd(module.num_features,
-                                               module.eps, module.momentum,
-                                               module.affine,
-                                               module.track_running_stats)
+        # new_cls = BatchNormXd
+        module_output = BatchNormXd(module.num_features, module.eps,
+                                    module.momentum, module.affine,
+                                    module.track_running_stats)
         if module.affine:
             with torch.no_grad():
                 module_output.weight = module.weight
@@ -311,7 +358,7 @@ def revert_sync_batchnorm(module):
         module_output.running_mean = module.running_mean
         module_output.running_var = module.running_var
         module_output.num_batches_tracked = module.num_batches_tracked
-        if hasattr(module, "qconfig"):
+        if hasattr(module, 'qconfig'):
             module_output.qconfig = module.qconfig
     for name, child in module.named_children():
         module_output.add_module(name, revert_sync_batchnorm(child))
diff --git a/utils/wandb_logging/__init__.py b/utils/wandb_logging/__init__.py
index 84952a8..a6131c1 100644
--- a/utils/wandb_logging/__init__.py
+++ b/utils/wandb_logging/__init__.py
@@ -1 +1 @@
-# init
\ No newline at end of file
+# init
diff --git a/utils/wandb_logging/log_dataset.py b/utils/wandb_logging/log_dataset.py
deleted file mode 100644
index 438e64e..0000000
--- a/utils/wandb_logging/log_dataset.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import argparse
-
-import yaml
-
-from wandb_utils import WandbLogger
-
-WANDB_ARTIFACT_PREFIX = 'wandb-artifact://'
-
-
-def create_dataset_artifact(opt):
-    with open(opt.data) as f:
-        data = yaml.load(f, Loader=yaml.SafeLoader)  # data dict
-    logger = WandbLogger(opt, '', None, data, job_type='Dataset Creation')
diff --git a/utils/wandb_logging/wandb_utils.py b/utils/wandb_logging/wandb_utils.py
index 869b514..e67014c 100644
--- a/utils/wandb_logging/wandb_utils.py
+++ b/utils/wandb_logging/wandb_utils.py
@@ -6,17 +6,16 @@
 import yaml
 from tqdm import tqdm
 
-sys.path.append(str(Path(__file__).parent.parent.parent))  # add utils/ to path
-from utils.datasets import LoadImagesAndLabels
-from utils.datasets import img2label_paths
-from utils.general import colorstr, xywh2xyxy, check_dataset
+from utils.datasets import LoadImagesAndLabels, img2label_paths
+from utils.general import check_dataset, colorstr, xywh2xyxy
 
 try:
     import wandb
-    from wandb import init, finish
 except ImportError:
     wandb = None
 
+sys.path.append(str(Path(__file__).parent.parent.parent))  # add utils/ to path
+
 WANDB_ARTIFACT_PREFIX = 'wandb-artifact://'
 
 
@@ -25,7 +24,8 @@ def remove_prefix(from_string, prefix=WANDB_ARTIFACT_PREFIX):
 
 
 def check_wandb_config_file(data_config_file):
-    wandb_config = '_wandb.'.join(data_config_file.rsplit('.', 1))  # updated data.yaml path
+    wandb_config = '_wandb.'.join(data_config_file.rsplit(
+        '.', 1))  # updated data.yaml path
     if Path(wandb_config).is_file():
         return wandb_config
     return data_config_file
@@ -40,15 +40,17 @@ def get_run_info(run_path):
 
 
 def check_wandb_resume(opt):
-    process_wandb_config_ddp_mode(opt) if opt.global_rank not in [-1, 0] else None
+    process_wandb_config_ddp_mode(opt) if opt.global_rank not in [-1, 0
+                                                                  ] else None
     if isinstance(opt.resume, str):
         if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
             if opt.global_rank not in [-1, 0]:  # For resuming DDP runs
                 run_id, project, model_artifact_name = get_run_info(opt.resume)
                 api = wandb.Api()
-                artifact = api.artifact(project + '/' + model_artifact_name + ':latest')
+                artifact = api.artifact(project + '/' + model_artifact_name +
+                                        ':latest')
                 modeldir = artifact.download()
-                opt.weights = str(Path(modeldir) / "last.pt")
+                opt.weights = str(Path(modeldir) / 'last.pt')
             return True
     return None
 
@@ -57,16 +59,21 @@ def process_wandb_config_ddp_mode(opt):
     with open(opt.data) as f:
         data_dict = yaml.load(f, Loader=yaml.SafeLoader)  # data dict
     train_dir, val_dir = None, None
-    if isinstance(data_dict['train'], str) and data_dict['train'].startswith(WANDB_ARTIFACT_PREFIX):
+    if isinstance(
+            data_dict['train'],
+            str) and data_dict['train'].startswith(WANDB_ARTIFACT_PREFIX):
         api = wandb.Api()
-        train_artifact = api.artifact(remove_prefix(data_dict['train']) + ':' + opt.artifact_alias)
+        train_artifact = api.artifact(
+            remove_prefix(data_dict['train']) + ':' + opt.artifact_alias)
         train_dir = train_artifact.download()
         train_path = Path(train_dir) / 'data/images/'
         data_dict['train'] = str(train_path)
 
-    if isinstance(data_dict['val'], str) and data_dict['val'].startswith(WANDB_ARTIFACT_PREFIX):
+    if isinstance(data_dict['val'],
+                  str) and data_dict['val'].startswith(WANDB_ARTIFACT_PREFIX):
         api = wandb.Api()
-        val_artifact = api.artifact(remove_prefix(data_dict['val']) + ':' + opt.artifact_alias)
+        val_artifact = api.artifact(
+            remove_prefix(data_dict['val']) + ':' + opt.artifact_alias)
         val_dir = val_artifact.download()
         val_path = Path(val_dir) / 'data/images/'
         data_dict['val'] = str(val_path)
@@ -78,30 +85,37 @@ def process_wandb_config_ddp_mode(opt):
 
 
 class WandbLogger():
+
     def __init__(self, opt, name, run_id, data_dict, job_type='Training'):
         # Pre-training routine --
         self.job_type = job_type
         self.wandb, self.wandb_run, self.data_dict = wandb, None if not wandb else wandb.run, data_dict
-        # It's more elegant to stick to 1 wandb.init call, but useful config data is overwritten in the WandbLogger's wandb.init call
+        # It's more elegant to stick to 1 wandb.init call, but useful config data is overwritten in the
+        # WandbLogger's wandb.init call
         if isinstance(opt.resume, str):  # checks resume from artifact
             if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
                 run_id, project, model_artifact_name = get_run_info(opt.resume)
                 model_artifact_name = WANDB_ARTIFACT_PREFIX + model_artifact_name
                 assert wandb, 'install wandb to resume wandb runs'
                 # Resume wandb-artifact:// runs here| workaround for not overwriting wandb.config
-                self.wandb_run = wandb.init(id=run_id, project=project, resume='allow')
+                self.wandb_run = wandb.init(id=run_id,
+                                            project=project,
+                                            resume='allow')
                 opt.resume = model_artifact_name
         elif self.wandb:
-            self.wandb_run = wandb.init(config=opt,
-                                        resume="allow",
-                                        project='DynamicDet' if opt.project == 'runs/train' else Path(opt.project).stem,
-                                        name=name,
-                                        job_type=job_type,
-                                        id=run_id) if not wandb.run else wandb.run
+            self.wandb_run = wandb.init(
+                config=opt,
+                resume='allow',
+                project='DynamicDet'
+                if opt.project == 'runs/train' else Path(opt.project).stem,
+                name=name,
+                job_type=job_type,
+                id=run_id) if not wandb.run else wandb.run
         if self.wandb_run:
             if self.job_type == 'Training':
                 if not opt.resume:
-                    wandb_data_dict = self.check_and_upload_dataset(opt) if opt.upload_dataset else data_dict
+                    wandb_data_dict = self.check_and_upload_dataset(
+                        opt) if opt.upload_dataset else data_dict
                     # Info useful for resuming from artifacts
                     self.wandb_run.config.opt = vars(opt)
                     self.wandb_run.config.data_dict = wandb_data_dict
@@ -110,15 +124,17 @@ def __init__(self, opt, name, run_id, data_dict, job_type='Training'):
                 self.data_dict = self.check_and_upload_dataset(opt)
         else:
             prefix = colorstr('wandb: ')
-            print(f"{prefix}Install Weights & Biases for DynamicDet logging with 'pip install wandb' (recommended)")
+            print(
+                f"{prefix}Install Weights & Biases for DynamicDet logging with 'pip install wandb' (recommended)"
+            )
 
     def check_and_upload_dataset(self, opt):
         assert wandb, 'Install wandb to upload dataset'
         check_dataset(self.data_dict)
-        config_path = self.log_dataset_artifact(opt.data,
-                                                opt.single_cls,
-                                                'DynamicDet' if opt.project == 'runs/train' else Path(opt.project).stem)
-        print("Created dataset config file ", config_path)
+        config_path = self.log_dataset_artifact(
+            opt.data, opt.single_cls, 'DynamicDet'
+            if opt.project == 'runs/train' else Path(opt.project).stem)
+        print('Created dataset config file ', config_path)
         with open(config_path) as f:
             wandb_data_dict = yaml.load(f, Loader=yaml.SafeLoader)
         return wandb_data_dict
@@ -129,17 +145,21 @@ def setup_training(self, opt, data_dict):
         if isinstance(opt.resume, str):
             modeldir, _ = self.download_model_artifact(opt)
             if modeldir:
-                self.weights = Path(modeldir) / "last.pt"
+                self.weights = Path(modeldir) / 'last.pt'
                 config = self.wandb_run.config
-                opt.weights, opt.save_period, opt.batch_size, opt.bbox_interval, opt.epochs, opt.hyp = str(
-                    self.weights), config.save_period, config.total_batch_size, config.bbox_interval, config.epochs, \
-                                                                                                       config.opt['hyp']
-            data_dict = dict(self.wandb_run.config.data_dict)  # eliminates the need for config file to resume
-        if 'val_artifact' not in self.__dict__:  # If --upload_dataset is set, use the existing artifact, don't download
-            self.train_artifact_path, self.train_artifact = self.download_dataset_artifact(data_dict.get('train'),
-                                                                                           opt.artifact_alias)
-            self.val_artifact_path, self.val_artifact = self.download_dataset_artifact(data_dict.get('val'),
-                                                                                       opt.artifact_alias)
+                (opt.weights, opt.save_period, opt.batch_size,
+                 opt.bbox_interval, opt.epochs,
+                 opt.hyp) = (str(self.weights), config.save_period,
+                             config.total_batch_size, config.bbox_interval,
+                             config.epochs, config.opt['hyp'])
+            data_dict = dict(self.wandb_run.config.data_dict
+                             )  # eliminates the need for config file to resume
+        # If --upload_dataset is set, use the existing artifact, don't download
+        if 'val_artifact' not in self.__dict__:
+            self.train_artifact_path, self.train_artifact = self.download_dataset_artifact(
+                data_dict.get('train'), opt.artifact_alias)
+            self.val_artifact_path, self.val_artifact = self.download_dataset_artifact(
+                data_dict.get('val'), opt.artifact_alias)
             self.result_artifact, self.result_table, self.val_table, self.weights = None, None, None, None
             if self.train_artifact_path is not None:
                 train_path = Path(self.train_artifact_path) / 'data/images/'
@@ -147,18 +167,22 @@ def setup_training(self, opt, data_dict):
             if self.val_artifact_path is not None:
                 val_path = Path(self.val_artifact_path) / 'data/images/'
                 data_dict['val'] = str(val_path)
-                self.val_table = self.val_artifact.get("val")
+                self.val_table = self.val_artifact.get('val')
                 self.map_val_table_path()
         if self.val_artifact is not None:
-            self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation")
-            self.result_table = wandb.Table(["epoch", "id", "prediction", "avg_confidence"])
+            self.result_artifact = wandb.Artifact(
+                'run_' + wandb.run.id + '_progress', 'evaluation')
+            self.result_table = wandb.Table(
+                ['epoch', 'id', 'prediction', 'avg_confidence'])
         if opt.bbox_interval == -1:
-            self.bbox_interval = opt.bbox_interval = (opt.epochs // 10) if opt.epochs > 10 else 1
+            self.bbox_interval = opt.bbox_interval = (
+                opt.epochs // 10) if opt.epochs > 10 else 1
         return data_dict
 
     def download_dataset_artifact(self, path, alias):
         if isinstance(path, str) and path.startswith(WANDB_ARTIFACT_PREFIX):
-            dataset_artifact = wandb.use_artifact(remove_prefix(path, WANDB_ARTIFACT_PREFIX) + ":" + alias)
+            dataset_artifact = wandb.use_artifact(
+                remove_prefix(path, WANDB_ARTIFACT_PREFIX) + ':' + alias)
             assert dataset_artifact is not None, "'Error: W&B dataset artifact doesn\'t exist'"
             datadir = dataset_artifact.download()
             return datadir, dataset_artifact
@@ -166,7 +190,8 @@ def download_dataset_artifact(self, path, alias):
 
     def download_model_artifact(self, opt):
         if opt.resume.startswith(WANDB_ARTIFACT_PREFIX):
-            model_artifact = wandb.use_artifact(remove_prefix(opt.resume, WANDB_ARTIFACT_PREFIX) + ":latest")
+            model_artifact = wandb.use_artifact(
+                remove_prefix(opt.resume, WANDB_ARTIFACT_PREFIX) + ':latest')
             assert model_artifact is not None, 'Error: W&B model artifact doesn\'t exist'
             modeldir = model_artifact.download()
             epochs_trained = model_artifact.metadata.get('epochs_trained')
@@ -177,33 +202,47 @@ def download_model_artifact(self, opt):
         return None, None
 
     def log_model(self, path, opt, epoch, fitness_score, best_model=False):
-        model_artifact = wandb.Artifact('run_' + wandb.run.id + '_model', type='model', metadata={
-            'original_url': str(path),
-            'epochs_trained': epoch + 1,
-            'save period': opt.save_period,
-            'project': opt.project,
-            'total_epochs': opt.epochs,
-            'fitness_score': fitness_score
-        })
+        model_artifact = wandb.Artifact('run_' + wandb.run.id + '_model',
+                                        type='model',
+                                        metadata={
+                                            'original_url': str(path),
+                                            'epochs_trained': epoch + 1,
+                                            'save period': opt.save_period,
+                                            'project': opt.project,
+                                            'total_epochs': opt.epochs,
+                                            'fitness_score': fitness_score
+                                        })
         model_artifact.add_file(str(path / 'last.pt'), name='last.pt')
         wandb.log_artifact(model_artifact,
-                           aliases=['latest', 'epoch ' + str(self.current_epoch), 'best' if best_model else ''])
-        print("Saving model artifact on epoch ", epoch + 1)
+                           aliases=[
+                               'latest', 'epoch ' + str(self.current_epoch),
+                               'best' if best_model else ''
+                           ])
+        print('Saving model artifact on epoch ', epoch + 1)
 
-    def log_dataset_artifact(self, data_file, single_cls, project, overwrite_config=False):
+    def log_dataset_artifact(self,
+                             data_file,
+                             single_cls,
+                             project,
+                             overwrite_config=False):
         with open(data_file) as f:
             data = yaml.load(f, Loader=yaml.SafeLoader)  # data dict
-        nc, names = (1, ['item']) if single_cls else (int(data['nc']), data['names'])
+        nc, names = (1, ['item']) if single_cls else (int(data['nc']),
+                                                      data['names'])
         names = {k: v for k, v in enumerate(names)}  # to index dictionary
-        self.train_artifact = self.create_dataset_table(LoadImagesAndLabels(
-            data['train']), names, name='train') if data.get('train') else None
-        self.val_artifact = self.create_dataset_table(LoadImagesAndLabels(
-            data['val']), names, name='val') if data.get('val') else None
+        self.train_artifact = self.create_dataset_table(
+            LoadImagesAndLabels(data['train']), names,
+            name='train') if data.get('train') else None
+        self.val_artifact = self.create_dataset_table(
+            LoadImagesAndLabels(
+                data['val']), names, name='val') if data.get('val') else None
         if data.get('train'):
-            data['train'] = WANDB_ARTIFACT_PREFIX + str(Path(project) / 'train')
+            data['train'] = WANDB_ARTIFACT_PREFIX + str(
+                Path(project) / 'train')
         if data.get('val'):
             data['val'] = WANDB_ARTIFACT_PREFIX + str(Path(project) / 'val')
-        path = data_file if overwrite_config else '_wandb.'.join(data_file.rsplit('.', 1))  # updated data.yaml path
+        path = data_file if overwrite_config else '_wandb.'.join(
+            data_file.rsplit('.', 1))  # updated data.yaml path
         data.pop('download', None)
         with open(path, 'w') as f:
             yaml.dump(data, f)
@@ -221,14 +260,16 @@ def log_dataset_artifact(self, data_file, single_cls, project, overwrite_config=
 
     def map_val_table_path(self):
         self.val_table_map = {}
-        print("Mapping dataset")
+        print('Mapping dataset')
         for i, data in enumerate(tqdm(self.val_table.data)):
             self.val_table_map[data[3]] = data[0]
 
     def create_dataset_table(self, dataset, class_to_id, name='dataset'):
-        # TODO: Explore multiprocessing to slpit this loop parallely| This is essential for speeding up the the logging
-        artifact = wandb.Artifact(name=name, type="dataset")
-        img_files = tqdm([dataset.path]) if isinstance(dataset.path, str) and Path(dataset.path).is_dir() else None
+        # TODO: Explore multiprocessing to slpit this loop parallelly|
+        # This is essential for speeding up the the logging
+        artifact = wandb.Artifact(name=name, type='dataset')
+        img_files = tqdm([dataset.path]) if isinstance(
+            dataset.path, str) and Path(dataset.path).is_dir() else None
         img_files = tqdm(dataset.img_files) if not img_files else img_files
         for img_file in img_files:
             if Path(img_file).is_dir():
@@ -236,51 +277,90 @@ def create_dataset_table(self, dataset, class_to_id, name='dataset'):
                 labels_path = 'labels'.join(dataset.path.rsplit('images', 1))
                 artifact.add_dir(labels_path, name='data/labels')
             else:
-                artifact.add_file(img_file, name='data/images/' + Path(img_file).name)
+                artifact.add_file(img_file,
+                                  name='data/images/' + Path(img_file).name)
                 label_file = Path(img2label_paths([img_file])[0])
-                artifact.add_file(str(label_file),
-                                  name='data/labels/' + label_file.name) if label_file.exists() else None
-        table = wandb.Table(columns=["id", "train_image", "Classes", "name"])
-        class_set = wandb.Classes([{'id': id, 'name': name} for id, name in class_to_id.items()])
+                artifact.add_file(
+                    str(label_file), name='data/labels/' +
+                    label_file.name) if label_file.exists() else None
+        table = wandb.Table(columns=['id', 'train_image', 'Classes', 'name'])
+        class_set = wandb.Classes([{
+            'id': id,
+            'name': name
+        } for id, name in class_to_id.items()])
         for si, (img, labels, paths, shapes) in enumerate(tqdm(dataset)):
             height, width = shapes[0]
-            labels[:, 2:] = (xywh2xyxy(labels[:, 2:].view(-1, 4))) * torch.Tensor([width, height, width, height])
+            labels[:, 2:] = (xywh2xyxy(labels[:, 2:].view(
+                -1, 4))) * torch.Tensor([width, height, width, height])
             box_data, img_classes = [], {}
             for cls, *xyxy in labels[:, 1:].tolist():
                 cls = int(cls)
-                box_data.append({"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},
-                                 "class_id": cls,
-                                 "box_caption": "%s" % (class_to_id[cls]),
-                                 "scores": {"acc": 1},
-                                 "domain": "pixel"})
+                box_data.append({
+                    'position': {
+                        'minX': xyxy[0],
+                        'minY': xyxy[1],
+                        'maxX': xyxy[2],
+                        'maxY': xyxy[3]
+                    },
+                    'class_id': cls,
+                    'box_caption': '%s' % (class_to_id[cls]),
+                    'scores': {
+                        'acc': 1
+                    },
+                    'domain': 'pixel'
+                })
                 img_classes[cls] = class_to_id[cls]
-            boxes = {"ground_truth": {"box_data": box_data, "class_labels": class_to_id}}  # inference-space
-            table.add_data(si, wandb.Image(paths, classes=class_set, boxes=boxes), json.dumps(img_classes),
+            boxes = {
+                'ground_truth': {
+                    'box_data': box_data,
+                    'class_labels': class_to_id
+                }
+            }  # inference-space
+            table.add_data(si,
+                           wandb.Image(paths, classes=class_set, boxes=boxes),
+                           json.dumps(img_classes),
                            Path(paths).name)
         artifact.add(table, name)
         return artifact
 
     def log_training_progress(self, predn, path, names):
         if self.val_table and self.result_table:
-            class_set = wandb.Classes([{'id': id, 'name': name} for id, name in names.items()])
+            class_set = wandb.Classes([{
+                'id': id,
+                'name': name
+            } for id, name in names.items()])
             box_data = []
             total_conf = 0
             for *xyxy, conf, cls in predn.tolist():
                 if conf >= 0.25:
-                    box_data.append(
-                        {"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]},
-                         "class_id": int(cls),
-                         "box_caption": "%s %.3f" % (names[cls], conf),
-                         "scores": {"class_score": conf},
-                         "domain": "pixel"})
+                    box_data.append({
+                        'position': {
+                            'minX': xyxy[0],
+                            'minY': xyxy[1],
+                            'maxX': xyxy[2],
+                            'maxY': xyxy[3]
+                        },
+                        'class_id': int(cls),
+                        'box_caption': f'{names[cls]} {conf:.3f}',
+                        'scores': {
+                            'class_score': conf
+                        },
+                        'domain': 'pixel'
+                    })
                     total_conf = total_conf + conf
-            boxes = {"predictions": {"box_data": box_data, "class_labels": names}}  # inference-space
+            boxes = {
+                'predictions': {
+                    'box_data': box_data,
+                    'class_labels': names
+                }
+            }  # inference-space
             id = self.val_table_map[Path(path).name]
-            self.result_table.add_data(self.current_epoch,
-                                       id,
-                                       wandb.Image(self.val_table.data[id][1], boxes=boxes, classes=class_set),
-                                       total_conf / max(1, len(box_data))
-                                       )
+            self.result_table.add_data(
+                self.current_epoch, id,
+                wandb.Image(self.val_table.data[id][1],
+                            boxes=boxes,
+                            classes=class_set),
+                total_conf / max(1, len(box_data)))
 
     def log(self, log_dict):
         if self.wandb_run:
@@ -292,12 +372,19 @@ def end_epoch(self, best_result=False):
             wandb.log(self.log_dict)
             self.log_dict = {}
             if self.result_artifact:
-                train_results = wandb.JoinedTable(self.val_table, self.result_table, "id")
+                train_results = wandb.JoinedTable(self.val_table,
+                                                  self.result_table, 'id')
                 self.result_artifact.add(train_results, 'result')
-                wandb.log_artifact(self.result_artifact, aliases=['latest', 'epoch ' + str(self.current_epoch),
-                                                                  ('best' if best_result else '')])
-                self.result_table = wandb.Table(["epoch", "id", "prediction", "avg_confidence"])
-                self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation")
+                wandb.log_artifact(self.result_artifact,
+                                   aliases=[
+                                       'latest',
+                                       'epoch ' + str(self.current_epoch),
+                                       ('best' if best_result else '')
+                                   ])
+                self.result_table = wandb.Table(
+                    ['epoch', 'id', 'prediction', 'avg_confidence'])
+                self.result_artifact = wandb.Artifact(
+                    'run_' + wandb.run.id + '_progress', 'evaluation')
 
     def finish_run(self):
         if self.wandb_run: