Skip to content

Commit

Permalink
Merge pull request #105 from IGNF/upgrade-torch
Browse files Browse the repository at this point in the history
MàJ versions Pytorch et Pytorch-Geometric
  • Loading branch information
CharlesGaydon authored Feb 6, 2024
2 parents d1a8424 + e5f8a64 commit adadaf7
Show file tree
Hide file tree
Showing 26 changed files with 157 additions and 342 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/cicd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ jobs:
- name: Example inference run via Docker with default config and checkpoint
run: >
docker run
-v /var/data/cicd/CICD_github_assets/myria3d_V3.6.0/inputs/:/inputs/
-v /var/data/cicd/CICD_github_assets/myria3d_V3.6.0/outputs/:/outputs/
-v /var/data/cicd/CICD_github_assets/myria3d_V3.7.0/inputs/:/inputs/
-v /var/data/cicd/CICD_github_assets/myria3d_V3.7.0/outputs/:/outputs/
--ipc=host
--shm-size=2gb
myria3d
Expand All @@ -53,14 +53,14 @@ jobs:
- name: Example inference run via Docker with inference-time subtiles overlap to smooth-out results.
run: >
docker run
-v /var/data/cicd/CICD_github_assets/myria3d_V3.6.0/inputs/:/inputs/
-v /var/data/cicd/CICD_github_assets/myria3d_V3.6.0/outputs/:/outputs/
-v /var/data/cicd/CICD_github_assets/myria3d_V3.7.0/inputs/:/inputs/
-v /var/data/cicd/CICD_github_assets/myria3d_V3.7.0/outputs/:/outputs/
--ipc=host
--shm-size=2gb
myria3d
python run.py
--config-path /inputs/
--config-name proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.6.0
--config-name proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.7.0
predict.ckpt_path=/inputs/proto151_V2.0_epoch_100_Myria3DV3.1.0.ckpt
datamodule.epsg=2154
predict.src_las=/inputs/792000_6272000_subset_buildings.las
Expand Down
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
# CHANGELOG

## 3.7.0
- Update all versions of Pytorch, Pytorch Lightning, and Pytorch Geometric.
Changes are retrocompatible for models trained with older versions (with adjustment to the configuration file).
- Refactor logging of single-class IoUs to go from num_classes+1 torchmetrics instances to only 1.

### 3.6.1
- Set urllib3<2 for comet logging to function and add back seaborn for plotting optimal LR graph.

## 3.6.0
- Remove the "EPSG:2154" by default and use the metadata of the lidar file, unless a parameter is given.

Expand Down
6 changes: 0 additions & 6 deletions configs/callbacks/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,6 @@ lr_monitor:
logging_interval: "step"
log_momentum: true

# This logs IoU at validation and test time
# Predictions are aggregated and saved at test time in a way coherent with prediction logic.
log_iou_by_class:
_target_: myria3d.callbacks.logging_callbacks.LogIoUByClass
classification_dict: ${dataset_description.classification_dict}

model_checkpoint:
_target_: pytorch_lightning.callbacks.ModelCheckpoint
monitor: "val/loss_epoch" # name of the logged metric which determines when model is improving
Expand Down
1 change: 0 additions & 1 deletion configs/experiment/DebugFineTune.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ trainer:
limit_test_batches: 1
max_epochs: 1
num_sanity_val_steps: 0
# gpus: [1]

callbacks:
finetune:
Expand Down
3 changes: 1 addition & 2 deletions configs/experiment/RandLaNet_base_run_FR-MultiGPU.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,4 @@ trainer:
strategy: ddp_find_unused_parameters_false
# Replace by cpu to simulate multi-cpus training.
accelerator: gpu
num_processes: 2
gpus: 2
devices: 2
9 changes: 1 addition & 8 deletions configs/model/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ _target_: myria3d.models.model.Model
## Inputs and outputs
d_in: ${dataset_description.d_in} # XYZ (3) + Other features (N)
num_classes: ${dataset_description.num_classes}
classification_dict: ${dataset_description.classification_dict}

# Architecture defined in sub-configs
ckpt_path: null # str, for resuming training and finetuning.
Expand All @@ -13,14 +14,6 @@ neural_net_hparams: ???
interpolation_k: ${predict.interpolator.interpolation_k} # interpolation at eval time
num_workers: 4 # for knn_interpolate

## Evaluation metric - partial for triple (train/val/test) init
iou:
_target_: functools.partial
_args_:
- "${get_method:torchmetrics.JaccardIndex}"
- ${model.num_classes}
absent_score: 1.0 # do not penalize if a class is absent from labels.

## Optimization
momentum: 0.9 # arbitrary
monitor: "val/loss_epoch"
Expand Down
2 changes: 1 addition & 1 deletion configs/predict/default.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
src_las: "/path/to/input.las" # Any glob pattern can be used to predict on multiple files.
output_dir: "/path/to/output_dir/" # Predictions are saved in a new file which shares src_las basename.
ckpt_path: "/path/to/lightning_model.ckpt" # Checkpoint of trained model.
gpus: 0 # 0 for none, 1 for one, [gpu_id] to specify which gpu to use e.g [1]
gpus: 0

# Probas interpolation parameters
# subtile_overlap=25 to use a sliding window of inference of which predictions will be merged.
Expand Down
3 changes: 2 additions & 1 deletion configs/task/default.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
# Task at hand. Can be train or predict
task_name: fit # "fit" or "test" or "fit+test", or "predict", or "finetune"
task_name: fit # "fit" or "test" or "fit+test", or "predict", or "finetune"
auto_lr_find: false # override with true to run the LR-range test in train.py.
50 changes: 0 additions & 50 deletions configs/trainer/all_params.yaml

This file was deleted.

12 changes: 4 additions & 8 deletions configs/trainer/default.yaml
Original file line number Diff line number Diff line change
@@ -1,14 +1,10 @@
_target_: pytorch_lightning.Trainer

# set `1` to train on GPU, `0` to train on CPU only
gpus: 0

min_epochs: 1
max_epochs: 1300
log_every_n_steps: 1

weights_summary: null
progress_bar_refresh_rate: 1

auto_lr_find: false # override with true to run the LR-range test in train.py.

# set to gpu for gpu training (if devices > 1, set ddp_find_unused_parameters_false: true)
accelerator: cpu
devices: 1
num_nodes: 1
7 changes: 3 additions & 4 deletions docs/source/apidoc/default_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,11 @@ print_config: true
ignore_warnings: true
trainer:
_target_: pytorch_lightning.Trainer
gpus: 0
accelerator: cpu
devices: 1
min_epochs: 1
max_epochs: 1
log_every_n_steps: 1
weights_summary: null
progress_bar_refresh_rate: 1
auto_lr_find: false
limit_train_batches: 1
limit_val_batches: 1
limit_test_batches: 1
Expand Down Expand Up @@ -253,6 +251,7 @@ logger:
disabled: true
task:
task_name: fit
auto_lr_find: false
predict:
src_las: /path/to/input.las
output_dir: /path/to/output_dir/
Expand Down
2 changes: 1 addition & 1 deletion docs/source/guides/train_new_model.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ After training, you model best checkpoints and hydra config will be saved in a `
### Optimized learning rate

Pytorch Lightning support au [automated learning rate finder](https://pytorch-lightning.readthedocs.io/en/stable/common/trainer.html#auto-lr-find), by means of an Learning Rate-range test (see section 3.3 in [this paper](https://arxiv.org/pdf/1506.01186.pdf) for reference).
You can perfom this automatically before training by setting `trainer.auto_lr_find=true` when calling training on your dataset. The best learning rate will be logged and results saved as an image, so that you do not need to perform this test more than once.
You can perfom this automatically before training by setting `task.auto_lr_find=true` when calling training on your dataset. The best learning rate will be logged and results saved as an image, so that you do not need to perform this test more than once.

### Multi-GPUs

Expand Down
49 changes: 24 additions & 25 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,32 @@
# mamba env create -f environment.yml
name: myria3d
channels:
- conda-forge
- anaconda
- pytorch
- comet_ml
- nvidia
- pyg
- comet_ml
- conda-forge
dependencies:
- python==3.9.*
- python=3.9.*
- pip
# --------- data formats --------- #
- numpy
- h5py
# --------- Deep Learning --------- #
# cudatoolkit to specify the cuda driver in the conda env
- conda-forge::cudatoolkit=11.3.1 # single equal sign there, not a typo
- pytorch::pytorch==1.11.0
- pytorch::torchvision==0.12.0
- conda-forge::pytorch-lightning==1.5.9
- conda-forge::torchmetrics==0.7.*
- comet_ml::comet_ml==3.31.*
- conda-forge::urllib3<2 # To solve for https://github.com/GeneralMills/pytrends/issues/591
- pytorch::pytorch=2.1
- pytorch::pytorch-cuda=11.8
- pytorch::torchvision=0.16
- conda-forge::lightning=2.0
- conda-forge::torchmetrics=0.11
- pyg::pyg=2.4
- pyg::pytorch-cluster
- pyg::pytorch-scatter
- pyg::pytorch-sparse
- pyg::pyg==2.1.0
# Nota: if libcusparse.so.11. errors occur, run
# export LD_LIBRARY_PATH="/home/${USER}/miniconda/envs/lib:$LD_LIBRARY_PATH"
# ou
# export LD_LIBRARY_PATH="/home/${USER}/anaconda3/envs/lib:$LD_LIBRARY_PATH"
# see https://github.com/pyg-team/pytorch_geometric/issues/2040#issuecomment-766610625
# Troubleshooting: if libcusparse.so.11. errors occur, run
# export LD_LIBRARY_PATH="/home/${USER}/miniconda/envs/lib:$LD_LIBRARY_PATH"
# ou
# export LD_LIBRARY_PATH="/home/${USER}/anaconda3/envs/lib:$LD_LIBRARY_PATH"
# see https://github.com/pyg-team/pytorch_geometric/issues/2040#issuecomment-766610625
# --------- data formats --------- #
- numpy
- h5py
# --------- geo --------- #
- pdal
- python-pdal
Expand All @@ -39,6 +36,12 @@ dependencies:
- pandas
- matplotlib
# --------- loggers --------- #
- comet_ml::comet_ml=3.35
- conda-forge::urllib3<2 # To solve for https://github.com/GeneralMills/pytrends/issues/591
# --------- Visualization --------- #
- pandas
- matplotlib
- seaborn # used in some callbacks
# --------- linters --------- #
- pre-commit # hooks for applying linters on commit
- black # code formatting
Expand All @@ -52,9 +55,6 @@ dependencies:
- python-dotenv # loading env variables from .env file
- rich # beautiful text formatting in terminal
- sh # for running bash commands in some tests
# - scikit-learn # used in some callbacks
- seaborn # used in some callbacks
# - jupyterlab # better jupyter notebooks
- pudb # debugger
# # --------- Documentation --------- #
- sphinx==4.5.*
Expand All @@ -63,7 +63,6 @@ dependencies:
- docutils==0.17
- rstcheck==3.3.* # RST Linter
- pip:
# --------- hydra configs --------- #
- hydra-core==1.1.*
- hydra-colorlog==1.1.*
# --------- Documentation --------- #
Expand Down
6 changes: 3 additions & 3 deletions myria3d/callbacks/comet_callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from typing import Optional

from pytorch_lightning import Callback, Trainer
from pytorch_lightning.loggers import CometLogger, LoggerCollection
from pytorch_lightning.loggers import CometLogger
from pytorch_lightning.utilities import rank_zero_only

from myria3d.utils import utils
Expand All @@ -27,7 +27,7 @@ def get_comet_logger(trainer: Trainer) -> Optional[CometLogger]:
if isinstance(trainer.logger, CometLogger):
return trainer.logger

if isinstance(trainer.logger, LoggerCollection):
if isinstance(trainer.logger, list):
for logger in trainer.logger:
if isinstance(logger, CometLogger):
return logger
Expand Down Expand Up @@ -65,7 +65,7 @@ class LogLogsPath(Callback):
"""Logs run working directory to comet.ml"""

@rank_zero_only
def on_init_end(self, trainer):
def setup(self, trainer, pl_module, stage):
logger = get_comet_logger(trainer=trainer)
if logger:
log_path = os.getcwd()
Expand Down
Loading

0 comments on commit adadaf7

Please sign in to comment.