From d28c28b6ee93d09c53d6f710cfd86dc57e5967a9 Mon Sep 17 00:00:00 2001
From: hyunwoongko <gusdnd852@naver.com>
Date: Mon, 14 Nov 2022 22:54:25 +0900
Subject: [PATCH] [#102] Add docs and modify csrc compilation

---
 README.md                                  |  4 +-
 oslo/torch/_C/__init__.py                  | 62 +++++++++++++++++++++-
 oslo/torch/__init__.py                     |  3 ++
 oslo/torch/distributed/parallel_context.py |  2 +-
 oslo/torch/nn/modules/conv.py              |  6 +--
 requirements.txt                           |  3 ++
 6 files changed, 73 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 7bfcc236..5c065815 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # OSLO: Open Source for Large-scale Optimization
 
-![](https://github.com/EleutherAI/oslo/blob/main/assets/logo.png?raw=true)
+![](https://raw.githubusercontent.com/EleutherAI/oslo/main/assets/logo.png)
 
 ## What is OSLO about?
 
@@ -35,6 +35,6 @@ If you find our work useful, please consider citing:
 
 ### Licensing
 
-The Code of the OSLO project is licensed under the terms of the Apache License 2.0.
+The code of the OSLO is licensed under the terms of the Apache License 2.0.
 
 Copyright 2022 EleutherAI. All Rights Reserved.
diff --git a/oslo/torch/_C/__init__.py b/oslo/torch/_C/__init__.py
index 1e118eb5..1d13993e 100644
--- a/oslo/torch/_C/__init__.py
+++ b/oslo/torch/_C/__init__.py
@@ -1,4 +1,5 @@
 import os
+import subprocess
 import sys
 from pathlib import Path
 
@@ -22,6 +23,10 @@
 _EXPERT_PARALLEL_KERNEL = None
 _NGRAM_REPEAT_BLOCK_KERNEL = None
 
+YELLOW = "\033[93m"
+END = "\033[0m"
+WARNING = f"{YELLOW} [WARNING] {END}"
+
 TORCH_MAJOR = int(torch.__version__.split(".")[0])
 TORCH_MINOR = int(torch.__version__.split(".")[1])
 
@@ -398,7 +403,8 @@ def installed_rocm_version():
         Binder._rocm_version = (int(ROCM_MAJOR), int(ROCM_MINOR))
         return Binder._rocm_version
 
-    def strip_empty_entries(self, args):
+    @staticmethod
+    def strip_empty_entries(args):
         """
         Drop any empty strings from the list of compile and link flags
         """
@@ -434,6 +440,54 @@ def cxx_args(self):
         ]
         return args
 
+    def warning(self, msg):
+        self.error_log = f"{msg}"
+        print(f"{WARNING} {msg}")
+
+    def command_exists(self, cmd):
+        if "|" in cmd:
+            cmds = cmd.split("|")
+        else:
+            cmds = [cmd]
+        valid = False
+        for cmd in cmds:
+            result = subprocess.Popen(f"type {cmd}", stdout=subprocess.PIPE, shell=True)
+            valid = valid or result.wait() == 0
+
+        if not valid and len(cmds) > 1:
+            print(
+                f"{WARNING} {self.name} requires one of the following commands '{cmds}', but it does not exist!"
+            )
+        elif not valid and len(cmds) == 1:
+            print(
+                f"{WARNING} {self.name} requires the '{cmd}' command, but it does not exist!"
+            )
+        return valid
+
+    def _backup_cpuinfo(self):
+        # Construct cpu_info dict from lscpu that is similar to what py-cpuinfo provides
+        if not self.command_exists("lscpu"):
+            self.warning(
+                f"{self.name} attempted to query 'lscpu' after failing to use py-cpuinfo "
+                "to detect the CPU architecture. 'lscpu' does not appear to exist on "
+                "your system, will fall back to use -march=native and non-vectorized execution."
+            )
+            return None
+        result = subprocess.check_output("lscpu", shell=True)
+        result = result.decode("utf-8").strip().lower()
+
+        cpu_info = {}
+        cpu_info["arch"] = None
+        cpu_info["flags"] = ""
+        if "genuineintel" in result or "authenticamd" in result:
+            cpu_info["arch"] = "X86_64"
+            if "avx512" in result:
+                cpu_info["flags"] += "avx512,"
+            if "avx2" in result:
+                cpu_info["flags"] += "avx2"
+        elif "ppc64le" in result:
+            cpu_info["arch"] = "PPC_"
+
     def cpu_arch(self):
         try:
             from cpuinfo import get_cpu_info
@@ -484,6 +538,12 @@ def simd_width(self):
                 return "-D__AVX256__"
         return "-D__SCALAR__"
 
+    def libraries_args(self):
+        if sys.platform == "win32":
+            return ["cublas", "curand"]
+        else:
+            return []
+
 
 class FusedLayerNormBinder(Binder):
     @property
diff --git a/oslo/torch/__init__.py b/oslo/torch/__init__.py
index e69de29b..03839858 100644
--- a/oslo/torch/__init__.py
+++ b/oslo/torch/__init__.py
@@ -0,0 +1,3 @@
+from oslo.torch.distributed import ParallelContext, ParallelMode
+
+__ALL__ = [ParallelContext, ParallelMode]
diff --git a/oslo/torch/distributed/parallel_context.py b/oslo/torch/distributed/parallel_context.py
index 31738d80..e344a7df 100644
--- a/oslo/torch/distributed/parallel_context.py
+++ b/oslo/torch/distributed/parallel_context.py
@@ -168,7 +168,7 @@ def from_torch(
         tensor_parallel_depth: Optional[int] = None,
         tensor_parallel_mode: Optional[ParallelMode] = ParallelMode.TENSOR_1D,
         backend: str = "nccl",
-        seed: bool = 42,
+        seed: int = 42,
     ):
         """
         Initialize parallel context from `torch.distributed.launch`.
diff --git a/oslo/torch/nn/modules/conv.py b/oslo/torch/nn/modules/conv.py
index a36929c5..61a79282 100644
--- a/oslo/torch/nn/modules/conv.py
+++ b/oslo/torch/nn/modules/conv.py
@@ -10,9 +10,9 @@ class Conv1D(nn.Module):
     Basically works like a linear layer but the weights are transposed.
 
     Args:
-        nf (`int`): The number of output features.
-        nx (`int`): The number of input features.
-        skip_bias_add (`bool`): This was added to enable performance optimization where bias
+        nf (int): The number of output features.
+        nx (int): The number of input features.
+        skip_bias_add (bool): This was added to enable performance optimization where bias
                        can be fused with other elementwise operations. We skip
                        adding bias but instead return it.
     References:
diff --git a/requirements.txt b/requirements.txt
index bb85311b..bd1120ac 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,8 @@
 anytree
 datasets
+ninja
 packaging
 parameterized >= 0.8.1
+py-cpuinfo
+pybind11
 torch >= 1.11.0