-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathsetup.py
1719 lines (1365 loc) · 60.6 KB
/
setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
# SPDX-FileCopyrightText: Copyright 2021, Siavash Ameli <[email protected]>
# SPDX-License-Identifier: BSD-3-Clause
# SPDX-FileType: SOURCE
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the license found in the LICENSE.txt file in the root
# directory of this source tree.
# =======
# Imports
# =======
from __future__ import print_function
import os
from os.path import join
import sys
import json
import platform
from glob import glob
import subprocess
import codecs
import tempfile
import shutil
import textwrap
import multiprocessing
import re
import errno
# ===============
# install package
# ===============
def install_package(package):
"""
Installs packages using pip.
Example:
.. code-block:: python
>>> install_package('numpy>1.11')
:param package: Name of package with or without its version pin.
:type package: string
"""
subprocess.check_call([sys.executable, "-m", "pip", "install",
"--prefer-binary", package])
# =====================
# Import Setup Packages
# =====================
# Install setuptools package
try:
import setuptools # noqa F401
except ImportError:
# Install setuptools
install_package('setuptools')
import setuptools # noqa F401
from setuptools import Command
from setuptools.extension import Extension
from setuptools.errors import CompileError, LinkError, ExecError
from setuptools.command.build_ext import build_ext
# Check scipy is installed (needed for build, but not required to be imported)
try:
import scipy # noqa F401
except ImportError:
# Install scipy
install_package('scipy')
# Import Cython (to convert pyx to C code)
try:
from Cython.Build import cythonize
except ImportError:
# Install Cython
install_package('cython>=0.29,<3.0')
from Cython.Build import cythonize
# =========================
# get environment variables
# =========================
"""
* To build cython files in source, set ``CYTHON_BUILD_IN_SOURCE`` to ``1``.
* To build for documentation, set ``CYTHON_BUILD_FOR_DOC`` to ``1``.
* To compile with cuda, set ``USE_CUDA`` environment variable.
* To load cuda library at runtime dynamically, set ``CUDA_DYNAMIC_LOADING`` to
``1``. When this package wheel is created in *manylinux* environment, the
cuda's dynamic library files (``libcudart.so``, ``libcublas.so``,
``libcusparse.so``) will not bundle to the package, hence the size of the
wheel remain low. Without the cuda dynamic loading option, the size of the
wheel package increases to 450MB and cannot be uploaded to PyPI. When the
cuda dynamic loading is enabled, the end user should install cuda toolkit,
since the cuda library files are not included in the package anymore, rather,
they are loaded dynamically at the run time. The downside with the dynamic
loading is that the user should install the same CUDA major version that the
package was compiled with.
* To compile for debugging, set ``DEBUG_MODE`` environment variable. This will
increase the executable size.
::
# In Unix
export CYTHON_BUILD_IN_SOURCE=1
export CYTHON_BUILD_FOR_DOC=1
export USE_CBLAS=0
export USE_CUDA=1
export DEBUG_MODE=1
export CUDA_DYNAMIC_LOADING=1
# In Windows
$env:CYTHON_BUILD_IN_SOURCE = "1"
$env:CYTHON_BUILD_FOR_DOC = "1"
$env:USE_CBLAS = "0"
$env:USE_CUDA = "1"
$env:DEBUG_MODE = "1"
$env:CUDA_DYNAMIC_LOADDING= "1"
python setup.py install
If you are using ``sudo``, to pass the environment variable, use ``-E`` option:
::
sudo -E python setup.py install
"""
# If USE_CUDA is set to "1", the package is compiled with cuda lib using nvcc.
use_cuda = False
if 'USE_CUDA' in os.environ and os.environ.get('USE_CUDA') == '1':
use_cuda = True
# If CUDA_DYNAMIC_LOADING is set to "1", the cuda library is loaded dynamically
cuda_dynamic_loading = False
if 'CUDA_DYNAMIC_LOADING' in os.environ and \
os.environ.get('CUDA_DYNAMIC_LOADING') == '1':
cuda_dynamic_loading = True
# If DEBUG_MODE is set to "1", the package is compiled with debug mode.
debug_mode = False
if 'DEBUG_MODE' in os.environ and os.environ.get('DEBUG_MODE') == '1':
debug_mode = True
# If environment var "CYTHON_BUILD_IN_SOURCE" exists, cython builds *.c files
# in the source code, otherwise in "/build" directory
cython_build_in_source = False
if 'CYTHON_BUILD_IN_SOURCE' in os.environ and \
os.environ.get('CYTHON_BUILD_IN_SOURCE') == '1':
cython_build_in_source = True
# If this package is build for the documentation, define the environment
# variable "CYTHON_BUILD_FOR_DOC". By doing so, two things happen:
# 1. The cython source will be generated in source (not in build directory)
# 2. The "linetrace" is added to the cython's compiler derivatives.
cython_build_for_doc = False
if 'CYTHON_BUILD_FOR_DOC' in os.environ and \
os.environ.get('CYTHON_BUILD_FOR_DOC') == '1':
cython_build_for_doc = True
# If USE_CBLAS is defined and set to 1, it uses OpenBlas library for dense
# vector and matrix operations. In this case, openblas-dev sld be installed.
use_cblas = False
if 'USE_CBLAS' in os.environ and os.environ.get('USE_CBLAS') == '1':
use_cblas = True
# If USE_LONG_INT is set to 1, 64-bit integers are used for LongIndexType.
# Otherwise, 32-bit integers are used.
use_long_int = False
if 'USE_LONG_INT' in os.environ and os.environ.get('USE_LONG_INT') == '1':
use_long_int = True
# If USE_UNSIGNED_LONG_INT is set to 1, unsigned integers are used for the type
# LongIndexType, which doubles the maximum limit of integers. Otherwise, signed
# integers are used.
use_unsigned_long_int = False
if 'USE_UNSIGNED_LONG_INT' in os.environ and \
os.environ.get('USE_UNSIGNED_LONG_INT') == '1':
use_unsigned_long_int = True
# ================
# clean extensions
# ================
def clean_extensions(extensions):
"""
If the package is build for documentation (cython_build_for_doc=True), then
the extensions are built using --inplace option, which means all cython
generated files (*.c, *.cpp, *.so) will be generated inside the source
code.
To get rid of these files, just run
python setup.py clean
This command (which is implemented by this function) searches for all
*.pyx files in the extensions, then checks if a similar filename to the
*.pyx file but with *.c, *.cpp, or *.so file extension exists. If yes,
then it checks whether the first line of that file the *.c and *.cpp file
is "/* Generated by Cython ...". If yes, it deletes that file.
"""
# ======================
# check cython generated
# ======================
def check_cython_generated(filename):
"""
Reads the first line of file and checks if it starts with the line
"/* Generated by Cython". If yes, it returns `True`. Otherwise, it
returns `False`.
"""
with open(filename) as file:
lines = file.read()
first = lines.split('\n', 1)[0]
if (first.startswith('/* Generated by Cython')) or \
('failed Cython compilation' in first):
return True
else:
return False
# ---------
# Iterate over all extensions of the package
for extension in extensions:
# Each extension has multiple source files (*.pyx, *.py, *,cpp, etc)
for source in extension.sources:
# Some of the source files are not specific, rather are a wildcards
# that could indicate a series of files. Here we glob them to file
# all matches.
files = glob(source)
# Iterate through each matched file in a wildcard source
for file in files:
# Get path, base ans extension of the file
base_ext = os.path.basename(file)
base, _ = os.path.splitext(base_ext)
path_base, ext = os.path.splitext(file)
path = os.path.dirname(path_base)
# Status of finding files to be deleted
found_h = False
found_c = False
found_cpp = False
found_lib = False
if ext == '.pyx':
# Search for generated Ch file corresponding to *.pxd file
h_file = path_base + '.h'
if os.path.exists(h_file):
if check_cython_generated(h_file):
os.remove(h_file)
found_h = True
# Search for generated C file corresponding to *.pyx file
c_file = path_base + '.c'
if os.path.exists(c_file):
if check_cython_generated(c_file):
os.remove(c_file)
found_c = True
# Search for generated Cpp file corresponding to *.pyx file
cpp_file = path_base + '.cpp'
if os.path.exists(cpp_file):
if check_cython_generated(cpp_file):
os.remove(cpp_file)
found_cpp = True
# Search for generated *.so file corresponding to *.pyx
lib_files = \
glob(os.path.join(path, '*.so')) + \
glob(os.path.join(path, '*.dll')) + \
glob(os.path.join(path, '*.dylib'))
for lib_file in lib_files:
lib_file_base = os.path.basename(lib_file)
if lib_file_base.startswith(base):
os.remove(lib_file)
found_lib = True
# Print removed files
if found_h or found_c or found_cpp or found_lib:
print('Detects: %s' % file)
if found_h:
print('Removes: %s' % h_file)
if found_c:
print('Removes: %s' % c_file)
if found_cpp:
print('Removes: %s' % cpp_file)
if found_lib:
print('Removes: %s' % lib_file)
print('')
# ============
# find in path
# ============
def _find_in_path(executable_name, path):
"""
Recursively searches the executable ``executable_name`` in all of the
directories in the given path, and returns the full path of the executable
once its first occurrence is found.. If no executable is found, ``None`` is
returned. This is used to find CUDA's directories.
"""
for dir in path.split(os.pathsep):
executable_path = join(dir, executable_name)
if os.path.exists(executable_path):
return os.path.abspath(executable_path)
return None
# ===========
# locate cuda
# ===========
def locate_cuda():
"""
Finds the executable ``nvcc`` (or ``nvcc.exe`` if windows). If found,
creates a dictionary of cuda's executable path, include and lib directories
and home directory. This is used for GPU.
"""
if not use_cuda:
raise EnvironmentError('This function should not be called when '
'"USE_CUDA" is not set to "1".')
# List of environment variables to search for cuda
environs = ['CUDA_HOME', 'CUDA_ROOT', 'CUDA_PATH']
cuda_found = False
# nvcc binary
nvcc_binary_name = 'nvcc'
if sys.platform == 'win32':
nvcc_binary_name = nvcc_binary_name + '.exe'
# Search in each of the possible environment variables, if they exist
for env in environs:
if env in os.environ:
# Home
home = os.environ[env]
if not os.path.exists(home):
continue
# nvcc binary
nvcc = join(home, 'bin', nvcc_binary_name)
if not os.path.exists(nvcc):
continue
else:
cuda_found = True
break
# Brute-force search in all path to find nvcc binary
if not cuda_found:
nvcc = _find_in_path(nvcc_binary_name, os.environ['PATH'])
if nvcc is None:
raise EnvironmentError('The "nvcc" binary could not be located '
'in $PATH. Either add it to the PATH or '
'set either of $CUDA_HOME, ' +
'$CUDA_ROOT, or $CUDA_PATH.')
home = os.path.dirname(os.path.dirname(nvcc))
# Include directory
include = join(home, 'include')
if not os.path.exists(include):
raise EnvironmentError("The CUDA's include directory could not be " +
"located in %s." % include)
# Library directory
lib = join(home, 'lib')
if not os.path.exists(lib):
lib64 = join(home, 'lib64')
if not os.path.exists(lib64):
raise EnvironmentError("The CUDA's lib directory could not be " +
"located in %s or %s." % (lib, lib64))
lib = lib64
# For windows, add "x64" or "x86" to the end of lib path
if sys.platform == "win32":
# Detect architecture is 64bit or 32bit
if platform.machine().endswith('64'):
lib = join(lib, 'x64')
else:
lib = join(lib, 'x86')
if not os.path.exists(lib):
raise EnvironmentError("The CUDA's lib sub-directory could not " +
"be located in %s." % lib)
# Get a dictionary of cuda version with keys 'major', 'minor', and 'patch'.
version = get_cuda_version(home)
# Output dictionary of set of paths
cuda = {
'home': home,
'nvcc': nvcc,
'include': include,
'lib': lib,
'version': version
}
return cuda
# ================
# get cuda version
# ================
def get_cuda_version(cuda_home):
"""
Gets the version of CUDA library.
:param cuda_home: The CUDA home paths.
:type cuda_home: str
:return: A dictionary with version info containing the keys 'major',
'minor', and 'patch'.
:rtype: dict
"""
version_txt_file = join(cuda_home, 'version.txt')
version_json_file = join(cuda_home, 'version.json')
if os.path.isfile(version_txt_file):
# txt version file is used in CUDA 10 and earlier.
with open(version_txt_file, 'r') as file:
# Version_string is like "11.3.1"
version_string = file.read()
elif os.path.isfile(version_json_file):
# json version file is used in CUDA 11 and newer
with open(version_json_file, 'r') as file:
info = json.load(file)
# Version_string is like "11.3.1"
version_string = info['cuda']['version']
else:
# Find cuda version directly by grep-ing include/cuda.h file
cuda_filename = join(cuda_home, 'include', 'cuda.h')
# Regex pattern finds a match like "#define CUDA_VERSION 11030"
regex_pattern = r'^#define CUDA_VERSION.\d+$'
match = ''
with open(cuda_filename, 'r') as file:
for line in file:
if re.match(regex_pattern, line):
match = line
break
if match != '':
# version_string is like "11030"
version_string = match.split()[-1]
# Place a dot to separate major and minor version to parse them
# later. Here, version_string becomes something like "11.03.0"
version_string = version_string[:-3] + "." + version_string[-3:]
version_string = version_string[:-1] + "." + version_string[-1:]
else:
error_message_1 = 'Cannot find CUDA "version.txt" or ' + \
'"version.json" file in %s. ' % cuda_home
error_message_2 = 'Cannot find "CUDA_VERSION" in header file %s.' \
% cuda_filename
raise FileNotFoundError(error_message_1 + error_message_2)
# Convert string to a list of int
version_string_list = version_string.split(' ')[-1].split('.')
version_int = [int(v) for v in version_string_list]
# Output dictionary
version = {
'major': None,
'minor': None,
'patch': None
}
# Fill output dictionary
if len(version_int) == 0:
raise ValueError('Cannot detect CUDA major version.')
else:
version['major'] = version_int[0]
if len(version_int) > 1:
version['minor'] = version_int[1]
if len(version_int) > 2:
version['patch'] = version_int[2]
return version
# ================================
# customize unix compiler for nvcc
# ================================
def customize_unix_compiler_for_nvcc(self, cuda):
"""
Sets compiler to treat 'cpp' and 'cu' file extensions differently. Namely:
1. A 'cpp' file is treated as usual with the default compiler and the same
compiler and linker flags as before.
2. For a 'cu' file, the compiler is switched to 'nvcc' with other compiler
flags that suites GPU machine.
This function only should be called for 'unix' compiler (``gcc``, `clang``
or similar). For windows ``msvc`` compiler, this function does not apply.
.. note::
This function should be called when ``USE_CUDA`` is enabled.
"""
self.src_extensions.append('.cu')
# Backup default compiler to call them later
default_compiler_so = self.compiler_so
super = self._compile
# =======
# compile
# =======
def _compile(obj, src, ext, cc_args, extra_compile_args, pp_opts):
"""
Define ``_compile`` method to be called before the original
``self.compile`` method. This function modifies the dispatch of the
compiler depend on the source file extension ('cu', or non 'cu' file),
then calls the original (backed up) compile function.
Note: ``extra_compile_args_dict`` is a dictionary with two keys
``"nvcc"`` and ``"gcc"``. Respectively, the values of each are lists of
extra_compile_args for nvcc (to compile .cu files) and other compile
args to compile other files. This dictionary was created in the
extra_compile_args when each extension is created (see later in this
script).
"""
if os.path.splitext(src)[1] == '.cu':
# Use nvcc for *.cu files.
self.set_executable('compiler_so', cuda['nvcc'])
# Use only a part of extra_postargs dictionary with the key "nvcc"
_extra_compile_args = extra_compile_args['nvcc']
else:
# for any other file extension, use the defaukt compiler. Also, for
# the extra compile args, use args in "gcc" key of extra_postargs
_extra_compile_args = extra_compile_args['not_nvcc']
# Pass back to the default compiler
super(obj, src, ext, cc_args, _extra_compile_args, pp_opts)
# Return back the previous default compiler to self.compiler_so
self.compiler_so = default_compiler_so
self._compile = _compile
# ===================================
# customize windows compiler for nvcc
# ===================================
def customize_windows_compiler_for_nvcc(self, cuda):
"""
Sets compiler to treat 'cpp' and 'cu' file extensions differently. Namely:
1. A 'cpp' file is treated as usual with the default compiler and the same
compiler and linker flags as before.
2. For a 'cu' file, the compiler is switched to 'nvcc' with other compiler
flags that suites GPU machine.
This function only should be called for 'msvc' compiler.
.. note::
This function should be called when ``USE_CUDA`` is enabled.
"""
self.src_extensions.append('.cu')
# =======
# compile
# =======
def compile(sources, output_dir=None, macros=None, include_dirs=None,
debug=0, extra_preargs=None, extra_postargs=None,
depends=None):
"""
This method is copied from ``cpython/Lib/distutils/msvccompiler.py``.
See: github.com/python/cpython/blob/main/Lib/distutils/msvccompiler.py
This compile method is modified below to allow the ``.cu`` files to be
compiled with the cuda's nvcc compiler.
"""
# We altered extra_compile_args (or here, extra_postargs) to be a dict
# of two keys: 'nvcc' and 'not_nvcc'. Here we extract them.
extra_postargs_nvcc = extra_postargs['nvcc']
extra_postargs = extra_postargs['not_nvcc'] # keeping the same name
if not self.initialized:
self.initialize()
compile_info = self._setup_compile(output_dir, macros, include_dirs,
sources, depends, extra_postargs)
macros, objects, extra_postargs, pp_opts, build = compile_info
compile_opts = extra_preargs or []
compile_opts.append('/c')
if debug:
compile_opts.extend(self.compile_options_debug)
else:
compile_opts.extend(self.compile_options)
for obj in objects:
try:
src, ext = build[obj]
except KeyError:
continue
if debug:
# pass the full pathname to MSVC in debug mode,
# this allows the debugger to find the source file
# without asking the user to browse for it
src = os.path.abspath(src)
if ext in self._c_extensions:
input_opt = "/Tc" + src
elif ext in self._cpp_extensions:
input_opt = "/Tp" + src
elif ext in self._rc_extensions:
# compile .RC to .RES file
input_opt = src
output_opt = "/fo" + obj
try:
self.spawn([self.rc] + pp_opts +
[output_opt] + [input_opt])
except ExecError as msg:
raise CompileError(msg)
continue
elif ext in self._mc_extensions:
# Compile .MC to .RC file to .RES file.
# * '-h dir' specifies the directory for the
# generated include file
# * '-r dir' specifies the target directory of the
# generated RC file and the binary message resource
# it includes
#
# For now (since there are no options to change this),
# we use the source-directory for the include file and
# the build directory for the RC file and message
# resources. This works at least for win32all.
h_dir = os.path.dirname(src)
rc_dir = os.path.dirname(obj)
try:
# first compile .MC to .RC and .H file
self.spawn([self.mc] +
['-h', h_dir, '-r', rc_dir] + [src])
base, _ = os.path.splitext(os.path.basename(src))
rc_file = os.path.join(rc_dir, base + '.rc')
# then compile .RC to .RES file
self.spawn([self.rc] +
["/fo" + obj] + [rc_file])
except ExecError as msg:
raise CompileError(msg)
continue
elif ext in ['.cu']:
# Adding this elif condition to avoid the else statement below
pass
else:
# how to handle this file?
raise CompileError("Don't know how to compile %s to %s"
% (src, obj))
try:
if ext == '.cu':
# Compile with nvcc
input_opt = ['-c', src]
output_opt = ['-o', obj]
# Note: the compile_opts is removed below. All necessary
# options for nvcc compiler is in extra_postargs_nvcc
self.spawn([cuda['nvcc']] + pp_opts +
input_opt + output_opt +
extra_postargs_nvcc)
else:
# Compile with msvc
output_opt = "/Fo" + obj
self.spawn([self.cc] + compile_opts + pp_opts +
[input_opt, output_opt] +
extra_postargs)
except ExecError as msg:
raise CompileError(msg)
return objects
# Replace the previous compile function of distutils.ccompiler with the
# above modified function. Here, the object ``self`` is ``MVSCCompiler``
# which is a derived class from ``CCompiler`` in the ``distutils`` package
# in ``cpython`` package.
self.compile = compile
# =======================
# check compiler has flag
# =======================
def check_compiler_has_flag(compiler, compile_flags, link_flags):
"""
Checks if the C compiler has a given flag. The motivation for this function
is that:
* In Linux, the gcc compiler has ``-fopenmp`` flag, which enables compiling
with OpenMP.
* In macOS, the clang compiler does not recognize ``-fopenmp`` flag,
rather, this flag should be passed through the preprocessor using
``-Xpreprocessor -fopenmp``.
Thus, we should know in advance which compiler is employed to provide the
correct flags. The problem is that in the setup.py script, we cannot
determine if the compiler is gcc or clang. The closet we can get is to call
.. code-block:: python
>>> import distutils.ccompiler
>>> print(distutils.ccompiler.get_default_compiler())
In both Linux and macOS, the above line returns ``unix``, and in windows it
returns ``msvc`` for Microsoft Visual C++. In the case of Linux and macOS,
we cannot figure which compiler is being used as both outputs are the same.
The safest solution so far is this function, which compilers a small c code
with a given ``flag_name`` and checks if it compiles successfully. In case
of ``unix``, if it compiles with ``-fopenmp``, it is gcc on Linux,
otherwise it is likely to be the ``clang`` compiler on macOS.
:param compiler: The compiler object from build_ext.compiler
:type compiler: build_ext.compiler
:param compile_flags: A list of compile flags, such as
``['-Xpreprocessor','-fopenmp']``
:type compile_flags: list(string)
:param link_flags: A list of linker flags, such as
``['-Xpreprocessor','-fopenmp']``
:type link_flags: list(string)
"""
if "PYODIDE_PACKAGE_ABI" in os.environ:
# pyodide doesn't support OpenMP
return False
compile_success = True
current_working_dir = os.getcwd()
temp_dir = tempfile.mkdtemp()
filename = 'test.c'
code = "#include <omp.h>\nint main(int argc, char** argv) { return(0); }"
# Considerations for Microsoft visual C++ compiler
if compiler.compiler_type == "msvc":
link_flags = link_flags + ['/DLL']
# Write a code in temp directory
os.chdir(temp_dir)
with open(filename, 'wt') as file_obj:
file_obj.write(code)
try:
# Try to compile
objects = compiler.compile([filename], extra_postargs=compile_flags)
try:
# Try to link
compiler.link_shared_lib(
objects,
"testlib",
extra_postargs=link_flags)
except (LinkError, TypeError):
# Linker was not successful
compile_success = False
except CompileError:
# Compile was not successful
compile_success = False
os.chdir(current_working_dir)
shutil.rmtree(temp_dir)
return compile_success
# ======================
# Custom Build Extension
# ======================
class CustomBuildExtension(build_ext):
"""
Customized ``build_ext`` that provides correct compile and linker flags to
the extensions depending on the compiler and the operating system platform.
Default compiler names depending on platform:
* linux: gcc
* mac: clang (llvm)
* windows: msvc (Microsoft Visual C++)
Compiler flags:
* gcc : -O3 -march=native -fno-stack-protector -Wall -fopenmp
* clang : -O3 -march=native -fno-stack-protector -Wall -Xpreprocessor
-fopenmp
* msvc : /O2 /Wall /openmp
Linker flags:
* gcc : -fopenmp
* clang : -Xpreproessor -fopenmp -lomp
* msvc : (none)
Usage:
This class (CustomBuildExtention) is a child of``build_ext`` class. To use
this class, add it to the ``cmdclass`` by:
.. code-block: python
>>> setup(
... ...
... # cmdclass = {'build_ext' : } # default
... cmdclass = {'build_ext' : CustomBuildExtention} # this class
... ...
... )
"""
# ---------------
# Build Extension
# ---------------
def build_extensions(self):
"""
Specifies compiler and linker flags depending on the compiler.
.. warning::
DO NOT USE '-march=native' flag. By using this flag, the compiler
optimizes the instructions for the native machine of the build time
and the executable will not be backward compatible to older CPUs.
As a result, the package will not be distributable on other
machines as the installation with the binary wheel crashes on other
machines with this error:
'illegal instructions (core dumped).'
An alternative optimization flag is '-mtune=native', which is
backward compatible and the package can be installed using wheel
binary file.
"""
# Get compiler type. This is "unix" (linux, mac) or "msvc" (windows)
compiler_type = self.compiler.compiler_type
# Initialize flags
extra_compile_args = []
extra_link_args = []
if compiler_type == 'msvc':
# This is Microsoft Windows Visual C++ compiler
msvc_compile_args = ['/O2', '/Wall', '/openmp']
msvc_link_args = []
msvc_has_openmp_flag = check_compiler_has_flag(
self.compiler,
msvc_compile_args,
msvc_link_args)
if msvc_has_openmp_flag:
# Add flags
extra_compile_args += msvc_compile_args
extra_link_args += msvc_link_args
else:
# It does not seem msvc accept -fopenmp flag.
raise RuntimeError(textwrap.dedent(
"""
OpenMP does not seem to be available on %s compiler.
""" % compiler_type))
else:
# The compile_type is 'unix'. This is either linux or mac.
# We add common flags that work both for gcc and clang
extra_compile_args += ['-O3', '-fno-stack-protector', '-Wall',
'-Wextra', '-Wundef', '-Wcast-align',
'-Wunreachable-code', '-Wswitch-enum',
'-Wpointer-arith', '-Wcast-align',
'-Wwrite-strings', '-Wsign-compare',
'-Wundef', '-pedantic', '-fno-common',
'-fno-wrapv']
# The option '-Wl, ..' will send arguments to the linker. Here,
# '--strip-all' will remove all symbols from the shared library.
if not debug_mode:
extra_compile_args += ['-g0', '-Wl, --strip-all']
# Assume compiler is gcc (we do not know yet). Check if the
# compiler accepts '-fopenmp' flag. Note: clang in mac does not
# accept this flag alone, but gcc does.
gcc_compile_args = ['-fopenmp']
gcc_link_args = ['-fopenmp']
gcc_has_openmp_flag = check_compiler_has_flag(
self.compiler,
gcc_compile_args,
gcc_link_args)
if gcc_has_openmp_flag:
# Assuming this is gcc. Add '-fopenmp' safely.
extra_compile_args += gcc_compile_args
extra_link_args += gcc_link_args
else:
# Assume compiler is clang (we do not know yet). Check if
# -fopenmp can be passed through preprocessor. This is how
# clang compiler accepts -fopenmp.
clang_compile_args = ['-Xpreprocessor', '-fopenmp']
clang_link_args = ['-Xpreprocessor', '-fopenmp', '-lomp',
'-headerpad_max_install_names']
clang_has_openmp_flag = check_compiler_has_flag(
self.compiler,
clang_compile_args,
clang_link_args)
if clang_has_openmp_flag:
# Assuming this is mac's clang. Add '-fopenmp' through
# preprocessor
extra_compile_args += clang_compile_args
extra_link_args += clang_link_args
else:
# It doesn't seem either gcc or clang accept -fopenmp flag.
raise RuntimeError(textwrap.dedent(
"""
OpenMP does not seem to be available on %s compiler.
""" % compiler_type))
# Modify compiler flags for cuda
if use_cuda:
cuda = locate_cuda()
# Code generations for various device architectures
gencodes = []
if cuda['version']['major'] < 12:
gencodes += ['-gencode', 'arch=compute_35,code=sm_35']
gencodes += ['-gencode', 'arch=compute_50,code=sm_50',
'-gencode', 'arch=compute_52,code=sm_52',
'-gencode', 'arch=compute_60,code=sm_60',
'-gencode', 'arch=compute_61,code=sm_61',
'-gencode', 'arch=compute_70,code=sm_70',
'-gencode', 'arch=compute_75,code=sm_75']
if cuda['version']['major'] < 11:
gencodes += \
['-gencode', 'arch=compute_75,code=compute_75']
else:
gencodes += \
['-gencode', 'arch=compute_80,code=sm_80',
'-gencode', 'arch=compute_86,code=sm_86',
'-gencode', 'arch=compute_86,code=compute_86']
extra_compile_args_nvcc = gencodes + ['--ptxas-options=-v', '-c',
'--verbose', '--shared',
'-O3']
# Adding compiler options
if sys.platform == 'win32':