Skip to content

Commit

Permalink
[#102] Add docs and modify csrc compilation
Browse files Browse the repository at this point in the history
  • Loading branch information
hyunwoongko committed Nov 14, 2022
1 parent cbf0d38 commit d28c28b
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 7 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# OSLO: Open Source for Large-scale Optimization

![](https://github.com/EleutherAI/oslo/blob/main/assets/logo.png?raw=true)
![](https://raw.githubusercontent.com/EleutherAI/oslo/main/assets/logo.png)

## What is OSLO about?

Expand Down Expand Up @@ -35,6 +35,6 @@ If you find our work useful, please consider citing:

### Licensing

The Code of the OSLO project is licensed under the terms of the Apache License 2.0.
The code of the OSLO is licensed under the terms of the Apache License 2.0.

Copyright 2022 EleutherAI. All Rights Reserved.
62 changes: 61 additions & 1 deletion oslo/torch/_C/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import subprocess
import sys
from pathlib import Path

Expand All @@ -22,6 +23,10 @@
_EXPERT_PARALLEL_KERNEL = None
_NGRAM_REPEAT_BLOCK_KERNEL = None

YELLOW = "\033[93m"
END = "\033[0m"
WARNING = f"{YELLOW} [WARNING] {END}"

TORCH_MAJOR = int(torch.__version__.split(".")[0])
TORCH_MINOR = int(torch.__version__.split(".")[1])

Expand Down Expand Up @@ -398,7 +403,8 @@ def installed_rocm_version():
Binder._rocm_version = (int(ROCM_MAJOR), int(ROCM_MINOR))
return Binder._rocm_version

def strip_empty_entries(self, args):
@staticmethod
def strip_empty_entries(args):
"""
Drop any empty strings from the list of compile and link flags
"""
Expand Down Expand Up @@ -434,6 +440,54 @@ def cxx_args(self):
]
return args

def warning(self, msg):
self.error_log = f"{msg}"
print(f"{WARNING} {msg}")

def command_exists(self, cmd):
if "|" in cmd:
cmds = cmd.split("|")
else:
cmds = [cmd]
valid = False
for cmd in cmds:
result = subprocess.Popen(f"type {cmd}", stdout=subprocess.PIPE, shell=True)
valid = valid or result.wait() == 0

if not valid and len(cmds) > 1:
print(
f"{WARNING} {self.name} requires one of the following commands '{cmds}', but it does not exist!"
)
elif not valid and len(cmds) == 1:
print(
f"{WARNING} {self.name} requires the '{cmd}' command, but it does not exist!"
)
return valid

def _backup_cpuinfo(self):
# Construct cpu_info dict from lscpu that is similar to what py-cpuinfo provides
if not self.command_exists("lscpu"):
self.warning(
f"{self.name} attempted to query 'lscpu' after failing to use py-cpuinfo "
"to detect the CPU architecture. 'lscpu' does not appear to exist on "
"your system, will fall back to use -march=native and non-vectorized execution."
)
return None
result = subprocess.check_output("lscpu", shell=True)
result = result.decode("utf-8").strip().lower()

cpu_info = {}
cpu_info["arch"] = None
cpu_info["flags"] = ""
if "genuineintel" in result or "authenticamd" in result:
cpu_info["arch"] = "X86_64"
if "avx512" in result:
cpu_info["flags"] += "avx512,"
if "avx2" in result:
cpu_info["flags"] += "avx2"
elif "ppc64le" in result:
cpu_info["arch"] = "PPC_"

def cpu_arch(self):
try:
from cpuinfo import get_cpu_info
Expand Down Expand Up @@ -484,6 +538,12 @@ def simd_width(self):
return "-D__AVX256__"
return "-D__SCALAR__"

def libraries_args(self):
if sys.platform == "win32":
return ["cublas", "curand"]
else:
return []


class FusedLayerNormBinder(Binder):
@property
Expand Down
3 changes: 3 additions & 0 deletions oslo/torch/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from oslo.torch.distributed import ParallelContext, ParallelMode

__ALL__ = [ParallelContext, ParallelMode]
2 changes: 1 addition & 1 deletion oslo/torch/distributed/parallel_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def from_torch(
tensor_parallel_depth: Optional[int] = None,
tensor_parallel_mode: Optional[ParallelMode] = ParallelMode.TENSOR_1D,
backend: str = "nccl",
seed: bool = 42,
seed: int = 42,
):
"""
Initialize parallel context from `torch.distributed.launch`.
Expand Down
6 changes: 3 additions & 3 deletions oslo/torch/nn/modules/conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ class Conv1D(nn.Module):
Basically works like a linear layer but the weights are transposed.
Args:
nf (`int`): The number of output features.
nx (`int`): The number of input features.
skip_bias_add (`bool`): This was added to enable performance optimization where bias
nf (int): The number of output features.
nx (int): The number of input features.
skip_bias_add (bool): This was added to enable performance optimization where bias
can be fused with other elementwise operations. We skip
adding bias but instead return it.
References:
Expand Down
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
anytree
datasets
ninja
packaging
parameterized >= 0.8.1
py-cpuinfo
pybind11
torch >= 1.11.0

0 comments on commit d28c28b

Please sign in to comment.