From d28c28b6ee93d09c53d6f710cfd86dc57e5967a9 Mon Sep 17 00:00:00 2001 From: hyunwoongko Date: Mon, 14 Nov 2022 22:54:25 +0900 Subject: [PATCH] [#102] Add docs and modify csrc compilation --- README.md | 4 +- oslo/torch/_C/__init__.py | 62 +++++++++++++++++++++- oslo/torch/__init__.py | 3 ++ oslo/torch/distributed/parallel_context.py | 2 +- oslo/torch/nn/modules/conv.py | 6 +-- requirements.txt | 3 ++ 6 files changed, 73 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 7bfcc236..5c065815 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # OSLO: Open Source for Large-scale Optimization -![](https://github.com/EleutherAI/oslo/blob/main/assets/logo.png?raw=true) +![](https://raw.githubusercontent.com/EleutherAI/oslo/main/assets/logo.png) ## What is OSLO about? @@ -35,6 +35,6 @@ If you find our work useful, please consider citing: ### Licensing -The Code of the OSLO project is licensed under the terms of the Apache License 2.0. +The code of the OSLO is licensed under the terms of the Apache License 2.0. Copyright 2022 EleutherAI. All Rights Reserved. diff --git a/oslo/torch/_C/__init__.py b/oslo/torch/_C/__init__.py index 1e118eb5..1d13993e 100644 --- a/oslo/torch/_C/__init__.py +++ b/oslo/torch/_C/__init__.py @@ -1,4 +1,5 @@ import os +import subprocess import sys from pathlib import Path @@ -22,6 +23,10 @@ _EXPERT_PARALLEL_KERNEL = None _NGRAM_REPEAT_BLOCK_KERNEL = None +YELLOW = "\033[93m" +END = "\033[0m" +WARNING = f"{YELLOW} [WARNING] {END}" + TORCH_MAJOR = int(torch.__version__.split(".")[0]) TORCH_MINOR = int(torch.__version__.split(".")[1]) @@ -398,7 +403,8 @@ def installed_rocm_version(): Binder._rocm_version = (int(ROCM_MAJOR), int(ROCM_MINOR)) return Binder._rocm_version - def strip_empty_entries(self, args): + @staticmethod + def strip_empty_entries(args): """ Drop any empty strings from the list of compile and link flags """ @@ -434,6 +440,54 @@ def cxx_args(self): ] return args + def warning(self, msg): + self.error_log = f"{msg}" + print(f"{WARNING} {msg}") + + def command_exists(self, cmd): + if "|" in cmd: + cmds = cmd.split("|") + else: + cmds = [cmd] + valid = False + for cmd in cmds: + result = subprocess.Popen(f"type {cmd}", stdout=subprocess.PIPE, shell=True) + valid = valid or result.wait() == 0 + + if not valid and len(cmds) > 1: + print( + f"{WARNING} {self.name} requires one of the following commands '{cmds}', but it does not exist!" + ) + elif not valid and len(cmds) == 1: + print( + f"{WARNING} {self.name} requires the '{cmd}' command, but it does not exist!" + ) + return valid + + def _backup_cpuinfo(self): + # Construct cpu_info dict from lscpu that is similar to what py-cpuinfo provides + if not self.command_exists("lscpu"): + self.warning( + f"{self.name} attempted to query 'lscpu' after failing to use py-cpuinfo " + "to detect the CPU architecture. 'lscpu' does not appear to exist on " + "your system, will fall back to use -march=native and non-vectorized execution." + ) + return None + result = subprocess.check_output("lscpu", shell=True) + result = result.decode("utf-8").strip().lower() + + cpu_info = {} + cpu_info["arch"] = None + cpu_info["flags"] = "" + if "genuineintel" in result or "authenticamd" in result: + cpu_info["arch"] = "X86_64" + if "avx512" in result: + cpu_info["flags"] += "avx512," + if "avx2" in result: + cpu_info["flags"] += "avx2" + elif "ppc64le" in result: + cpu_info["arch"] = "PPC_" + def cpu_arch(self): try: from cpuinfo import get_cpu_info @@ -484,6 +538,12 @@ def simd_width(self): return "-D__AVX256__" return "-D__SCALAR__" + def libraries_args(self): + if sys.platform == "win32": + return ["cublas", "curand"] + else: + return [] + class FusedLayerNormBinder(Binder): @property diff --git a/oslo/torch/__init__.py b/oslo/torch/__init__.py index e69de29b..03839858 100644 --- a/oslo/torch/__init__.py +++ b/oslo/torch/__init__.py @@ -0,0 +1,3 @@ +from oslo.torch.distributed import ParallelContext, ParallelMode + +__ALL__ = [ParallelContext, ParallelMode] diff --git a/oslo/torch/distributed/parallel_context.py b/oslo/torch/distributed/parallel_context.py index 31738d80..e344a7df 100644 --- a/oslo/torch/distributed/parallel_context.py +++ b/oslo/torch/distributed/parallel_context.py @@ -168,7 +168,7 @@ def from_torch( tensor_parallel_depth: Optional[int] = None, tensor_parallel_mode: Optional[ParallelMode] = ParallelMode.TENSOR_1D, backend: str = "nccl", - seed: bool = 42, + seed: int = 42, ): """ Initialize parallel context from `torch.distributed.launch`. diff --git a/oslo/torch/nn/modules/conv.py b/oslo/torch/nn/modules/conv.py index a36929c5..61a79282 100644 --- a/oslo/torch/nn/modules/conv.py +++ b/oslo/torch/nn/modules/conv.py @@ -10,9 +10,9 @@ class Conv1D(nn.Module): Basically works like a linear layer but the weights are transposed. Args: - nf (`int`): The number of output features. - nx (`int`): The number of input features. - skip_bias_add (`bool`): This was added to enable performance optimization where bias + nf (int): The number of output features. + nx (int): The number of input features. + skip_bias_add (bool): This was added to enable performance optimization where bias can be fused with other elementwise operations. We skip adding bias but instead return it. References: diff --git a/requirements.txt b/requirements.txt index bb85311b..bd1120ac 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,8 @@ anytree datasets +ninja packaging parameterized >= 0.8.1 +py-cpuinfo +pybind11 torch >= 1.11.0