From d81f5f2cfc462a47612d8e330a6f794a8e0c8fc5 Mon Sep 17 00:00:00 2001 From: Jeffrey Newman Date: Fri, 26 Jul 2024 14:45:16 -0500 Subject: [PATCH] Performance docs (#878) * perf docs part 1 * performance docs * make persistent sharrow cache the default * performance tuning checklist * skim-data-format * recommend explicit chunking * multithread defaults * note on string columns in preprocessors * update dev install docs * address review comments * adding memory profile plotting * favicon for docs * change atol for sharrow tests to 1e-6 * troubleshooting docs * blacken * add link to notebook --------- Co-authored-by: David Hensle <51132108+dhensle@users.noreply.github.com> --- activitysim/core/configuration/filesystem.py | 7 +- activitysim/core/interaction_sample.py | 4 +- activitysim/core/interaction_simulate.py | 4 +- activitysim/core/simulate.py | 6 +- docs/_static/favicon.ico | Bin 0 -> 15406 bytes docs/conf.py | 2 +- docs/dev-guide/install.md | 9 - docs/dev-guide/using-sharrow.md | 153 +- docs/users-guide/example_models.rst | 2 +- docs/users-guide/index.rst | 5 +- docs/users-guide/model_anatomy.rst | 3 + docs/users-guide/performance/chunking.md | 71 + docs/users-guide/performance/index.md | 100 + .../performance/multiprocessing.md | 62 + .../users-guide/performance/multithreading.md | 112 + docs/users-guide/performance/sharrow.md | 41 + .../performance/skim-data-format.md | 37 + .../scripts/plot_memory_profile.ipynb | 189860 +++++++++++++++ 18 files changed, 190454 insertions(+), 24 deletions(-) create mode 100644 docs/_static/favicon.ico create mode 100644 docs/users-guide/performance/chunking.md create mode 100644 docs/users-guide/performance/index.md create mode 100644 docs/users-guide/performance/multiprocessing.md create mode 100644 docs/users-guide/performance/multithreading.md create mode 100644 docs/users-guide/performance/sharrow.md create mode 100644 docs/users-guide/performance/skim-data-format.md create mode 100644 other_resources/scripts/plot_memory_profile.ipynb diff --git a/activitysim/core/configuration/filesystem.py b/activitysim/core/configuration/filesystem.py index 27496b1c7..b55f0c496 100644 --- a/activitysim/core/configuration/filesystem.py +++ b/activitysim/core/configuration/filesystem.py @@ -105,8 +105,8 @@ def data_model_dirs_must_exist(cls, data_model_dir, values): """ Name of the output directory for sharrow cache files. - If not given, a directory named "__sharrowcache__" will be created inside - the general cache directory. + If not given, the sharrow cache is stored in a run-independent persistent + location, according to `platformdirs.user_cache_dir`. See `persist_sharrow_cache`. """ settings_file_name: str = "settings.yaml" @@ -395,7 +395,8 @@ def get_sharrow_cache_dir(self) -> Path: Path """ if self.sharrow_cache_dir is None: - out = self.get_cache_dir("__sharrowcache__") + self.persist_sharrow_cache() + out = self.sharrow_cache_dir else: out = self.get_working_subdir(self.sharrow_cache_dir) if not out.exists(): diff --git a/activitysim/core/interaction_sample.py b/activitysim/core/interaction_sample.py index 0afa6c3c4..cabcbeb64 100644 --- a/activitysim/core/interaction_sample.py +++ b/activitysim/core/interaction_sample.py @@ -359,7 +359,7 @@ def _interaction_sample( ), interaction_utilities.values, rtol=1e-2, - atol=0, + atol=1e-6, err_msg="utility not aligned", verbose=True, ) @@ -370,7 +370,7 @@ def _interaction_sample( interaction_utilities_sh.values, interaction_utilities.values, rtol=1e-2, - atol=0, + atol=1e-6, ) ) _sh_util_miss1 = interaction_utilities_sh.values[ diff --git a/activitysim/core/interaction_simulate.py b/activitysim/core/interaction_simulate.py index f6f97dd20..38a90655e 100644 --- a/activitysim/core/interaction_simulate.py +++ b/activitysim/core/interaction_simulate.py @@ -504,14 +504,14 @@ def to_series(x): sh_util.reshape(utilities.values.shape), utilities.values, rtol=1e-2, - atol=0, + atol=1e-6, err_msg="utility not aligned", verbose=True, ) except AssertionError as err: print(err) misses = np.where( - ~np.isclose(sh_util, utilities.values, rtol=1e-2, atol=0) + ~np.isclose(sh_util, utilities.values, rtol=1e-2, atol=1e-6) ) _sh_util_miss1 = sh_util[tuple(m[0] for m in misses)] _u_miss1 = utilities.values[tuple(m[0] for m in misses)] diff --git a/activitysim/core/simulate.py b/activitysim/core/simulate.py index 0dcb5a379..3ad4c640a 100644 --- a/activitysim/core/simulate.py +++ b/activitysim/core/simulate.py @@ -787,13 +787,15 @@ def eval_utilities( sh_util, utilities.values, rtol=1e-2, - atol=0, + atol=1e-6, err_msg="utility not aligned", verbose=True, ) except AssertionError as err: print(err) - misses = np.where(~np.isclose(sh_util, utilities.values, rtol=1e-2, atol=0)) + misses = np.where( + ~np.isclose(sh_util, utilities.values, rtol=1e-2, atol=1e-6) + ) _sh_util_miss1 = sh_util[tuple(m[0] for m in misses)] _u_miss1 = utilities.values[tuple(m[0] for m in misses)] _sh_util_miss1 - _u_miss1 diff --git a/docs/_static/favicon.ico b/docs/_static/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..99a65353bc2225ddf046a6446a4c6ecbed58b4b4 GIT binary patch literal 15406 zcmeHONmm?463(jf5AYo3uG2gpee&7&@pzTxRUXaQvNiV12!U9H5Su{Ih@EC_76DBI z!XPBHA|b$%nV*&OeO1b?>{`02=z@!z_v&R=R%S$GL}YB42w@0Iy#2O7dq!M(ONjS` z5NFOf@9+FchWoI=_EkGbA6*;qx2juMZot`=X0m!KnL4 z$b%)|ST*EOjUn#>$78^HJY~oY{GMt8-a$j|ps&`megz{3x`}7D#Sfo-zhiyVfTPBH z+lEhte6?c8gJnZr{n+JkJ%0a|2mSa~|B2i0ozM3>?G6o-^)8<2MyGF~&Fu@C%f9Aw z!sZ#}#C7s{dB@=kWgj+A?=SQ?_=YbVa`cMxJKq(JTfv~bsS9_(?_J1}zi&_<66kuN z*O2!iE6RhiOfr{aTYzsDI!S%#{Co2GHa_SsbvP;RB;x~(LV`}(nKO9a$3y!R_LqdF z>e!Q&x#I72*wDfMn(62mZ18xUDG5`-4{O`b8*(jU!cOopba%^9og`mgXieg84#l@Ple28aDuKZafJv25H5-Vtnu&DW4MHGE22 z?9%WGHe$LHzN^QS-9N;X2iDy8FBA67g`@Bn`mq+sqa59eUuq8h+A{3i9{lj*NmEWX zI6Sm{)f+oh_mN}ErkhOpbjFXDur@|b$TOVxil%Gl;LlcF7>DprrkE{1yDT7k;_-N8m-S%LM;~%z|$U zepJi2hrUU{-n(PUfm-;TiPC=cU2myBYZ*(<0qL06GbotU@nWdrkW{_2L$El=05ZA1H6eOF|wZw&1uT>=51ee($`1 z^=mwMG!b9_??KlXlaYsf%I$eC-r_rWa363K54d-^zT)?gb@W|w<;J<2gVA%sU}j+d z6n?f_!_ho=FjhLJ`=7_h&luy$lnal%?D#HRkGlR*y7q_T(e#F`UG)#b{^~qfhYeTX zyzM6TsXq4h&*5XebHmcIf%YzX$PZhZ5U2(e-{2zU)S+*oMMYq{|3$PW(@RkaL3%O7C75O7%j`OS4_MWy%A@1ltn=kBgf$EOnmT+SMB9n^_0Q_amPM z9ct~2S>wdG3eK@x*yJh5cx}kVO&S>69vfzeFAIa$1U-0?GbLj)=Rr(Z#xI1ysi+@( zuybJ5CR{^BxU0BQ)3va=z#pZbbrO_l#RWXoI!XE5w7O@j{0Y?o_yIfKraw3(`%HYb ztfy3nld*FDFP7XGuw=)DQ}W}WnfcMykR|(nwi3tdTzeyOfZM1w?TlOY83b!GONh_v zwcOxDWbuJhZ+e!qB=d5fe~<>!!n}lMJ5hKW5w~yjgKoraDu!eG9J$M2y(ycS;T87%jiGK+Je&hL?frnrXM4-J{E zvv6+X$7|zWH{~dD)wnp@&zm@V^TXOhu9?NT+!EK(XxX)^O7cmbEApJD(DRm0-im+B zUss?Hb*S4$@xJ&Sy!F<2Z9YH_dDPElQGeSUN8Ju}5az30sKwZ4oRmqFT=1PWy`-)ydi9oi!}@9$^gj}=u1mndGbq+jb$I;!95QCzwJ?BO5BU!3X*`R4IAO`z z>z2F)-C|B$qwF7GQ74!|+( z^HJ~g!eySyy7k;GA@8AXx{rL>JBGLqmo52XfjVs=kN3mz&Qb4_#R2%XjN0-f&PE4t z9?AM>Q8#oxm-`LA4gMSTO4LaeY}M{eJGJlCAz|m+%)8aMVv99!9bvjr9H;3H$FnxQ z&|}zj@kQ8?4%F@v>5jH0>H%f##q|n%n{{{M;hCwPFMG$veSe91!&0Ba7s6d1ft*qM zVvQ;>@6{S0gOM8e67a=K$Gt3U8qVLj51!%NHfW4@j{eT^%#nBDtJY8prmp0=U>^pk zmyTRBm#pK{&NYC3+KMMr!7}K=UZs4;8|m{*`Fzf?yI!5)JL#oO=9sM^Q_i=Uhz~Fy zwrd8mAu#IPdo$l+8M?a^YCAGtgDTW z1DE@90Xjr}>-xs`3B*~%83#_>F>T+PlILY!8L%GD`ejlZ*YYLr5c%P;Prqvvm%hkx z*a!J8C$81kK;C6*6Z&_^YWkYrCJ^p?D{KyMMbeW{-h{_k>o6du*qyVSm?0)i?kDZS}L8 zFxa%8^Z{y~YI(Mk^v+Zmj&l81VK)lMCB6Rk72?oQo^6%UUD}lKtB%c4bh9Nao)J_O zE5y`Yu8sFF==*gV_?u_K^kZS`r%m4;@$-Hk_MiDc*u1p+d5-lU>(^n%b&M&(WYGH~ zdRBT? z1=?cafDCeoy{Py5%R)Y!5%#_Phtq!EQ$77jYvwt{ z2l-;hcc&kxC0-kBksV7?$JKAN_Sp&F%h-VGw<>?CI`GHR0bCEN{#7|}Y!3V%O6_md literal 0 HcmV?d00001 diff --git a/docs/conf.py b/docs/conf.py index f6f56f081..ef358b4cf 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -175,7 +175,7 @@ # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -# html_favicon = None +html_favicon = "favicon.ico" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, diff --git a/docs/dev-guide/install.md b/docs/dev-guide/install.md index b7b6030a0..8f4b07bd8 100644 --- a/docs/dev-guide/install.md +++ b/docs/dev-guide/install.md @@ -49,18 +49,9 @@ conda activate ./ASIM-ENV git clone https://github.com/ActivitySim/sharrow.git python -m pip install -e ./sharrow git clone https://github.com/ActivitySim/activitysim.git -cd activitysim -git switch develop -cd .. python -m pip install -e ./activitysim ``` -```{note} -If the environment create step above fails due to a 404 missing error, -the main repository may not be up to date with these docs, try this instead: -https://raw.githubusercontent.com/camsys/activitysim/sharrow-black/conda-environments/activitysim-dev-base.yml -``` - Note the above commands will create an environment with all the necessary dependencies, clone both ActivitySim and sharrow from GitHub, and `pip install` each of these libraries in editable mode, which diff --git a/docs/dev-guide/using-sharrow.md b/docs/dev-guide/using-sharrow.md index 04894d769..d3924ed2a 100644 --- a/docs/dev-guide/using-sharrow.md +++ b/docs/dev-guide/using-sharrow.md @@ -17,6 +17,55 @@ multiprocessing mode after all the compilation for all model components is complete. ``` +### Top-Level Activation Options + +Activating sharrow is done at the top level of the model settings file, typically +`settings.yaml`, by setting the `sharrow` configuration setting to `True`: + +```yaml +sharrow: True +``` + +The default operation for sharrow is to attempt to use the sharrow compiler for +all model specifications, and to revert to the legacy pandas-based evaluation +if the sharrow compiler encounters a problem. Alternatively, the `sharrow` +setting can also be set to `require` or `test`. The `require` setting +will cause the model simply fail if sharrow encounters a problem, which is +useful if the user is interested in ensuring maximum performance. +The `test` setting will run the model in a mode where both sharrow and the +legacy pandas-based evaluation are run on each model specification, and the +results are compared to ensure they are substantially identical. This is +useful for debugging and testing, but is not recommended for production runs +as it is much slower than running only one evaluation path or the other. + +Testing is strongly recommended during model development, as it is possible +to write expressions that are valid in one evaluation mode but not the other. +This can happen if model data includes `NaN` values +(see [Performance Considerations](#performance-considerations)), or when +using arithmatic on logical values +(see [Arithmetic on Logical Values](#arithmetic-on-logical-values)). + +### Caching of Precompiled Functions + +The first time you run a model with sharrow enabled, the compiler will run +and create a cache of compiled functions. This can take a long time, especially +for models with many components or complex utility specifications. However, +once the cache is created, subsequent runs of the model will be much faster. +By default, the cached functions are stored in a subdirectory of the +`platformdirs.user_cache_dir` directory, which is located in a platform-specific +location: + +- Windows: `%USERPROFILE%\AppData\Local\ActivitySim\ActivitySim\Cache\...` +- MacOS: `~/Library/Caches/ActivitySim/...` +- Linux: `~/.cache/ActivitySim/...` or `~/$XDG_CACHE_HOME/ActivitySim/...` + +The cache directory can be changed from this default location by setting the +[`sharrow_cache_dir`](activitysim.core.configuration.FileSystem.sharrow_cache_dir) +setting in the `settings.yaml` file. Note if you change this setting and provide +a relative path, it will be interpreted as relative to the model working directory, +and cached functions may not carry over to other model runs unless copied there +by the user. + ## Model Design Requirements Activating the `sharrow` optimizations also requires using the new @@ -231,6 +280,35 @@ such string operations won't appear in utility specifications at all, or if they do appear, they are executed only once and stored in a temporary value for re-use as needed. +A good approach to reduce string operations in model spec files is to convert +string columns to integer or categorical columns in preprocessors. This can +be done using the `map` method, which can be used to convert strings to integers, +for example: + + `df['fuel_type'].map({'Gas': 1, 'Diesel': 2, 'Hybrid': 3}).fillna(-1).astype(int)` + +Alternatively, data columns can be converted to categorical columns with well-defined +structures. Recent versions of sharrow have made significant improvements in +handling of unordered categorical values, allowing for the use of possibly +more intuitive categorical columns. For example, the fuel type column above +could instead be redefined as a categorical column with the following code: + + `df['fuel_type'].astype(pd.CategoricalDtype(categories=['Gas', 'Diesel', 'Hybrid'], ordered=False))` + +It is important that the categories are defined with the same set of values +in the same order, as any deviation will from this will void the compiler cache +and cause the model specification to be recompiled. This means that using +`x.astype('category')` is not recommended, as the categories will be inferred +from the data and may not be consistent across multiple calls to the model +specification evaluator. + +```{note} +Beginning with ActivitySim version 1.3, string-valued +columns created in preprocessors are converted to categorical columns automatically, +which means that ignoring encoding for string-valued outputs is equivalent to +using the `astype('category')` method, and is not recommended. +``` + For models with utility expressions that include a lot of string comparisons, (e.g. because they are built for the legacy `pandas.eval` interpreter and have not been updated) sharrow can be disabled by setting @@ -410,7 +488,7 @@ taz_skims: ``` If groups of similarly named variables should have the same encoding applied, -they can be identifed by regular expressions ("regex") instead of explicitly +they can be identified by regular expressions ("regex") instead of explicitly giving each name. For example: ```yaml @@ -485,3 +563,76 @@ taz_skims: For more details on all the settings available for digital encoding, see [DigitalEncoding](activitysim.core.configuration.network.DigitalEncoding). + +## Troubleshooting + +If you encounter errors when running the model with sharrow enabled, it is +important to address them before using the model for analysis. This is +especially important when errors are found running in "test" mode (activated +by `sharrow: test` in the top level settings.yaml). Errors may +indicate that either sharrow or the legacy evaluator is not correctly processing +the mathematical expressions in the utility specifications. + +### "utility not aligned" Error + +One common error that can occur when running the model with sharrow in "test" +mode is the "utility not aligned" error. This error occurs when a sharrow +compiled utility calculation does not sufficiently match the legacy utility +calculation. We say "sufficiently" here because the two calculations may have +slight differences due to numerical precision optimizations applied by sharrow. +These optimizations can result in minor differences in the final utility values, +which are typically inconsequential for model results. However, if the differences +are too large, the "utility not aligned" error will be raised. This error does +not indicate whether the incorrect result is from the sharrow or legacy calculation +(or both), and it is up to the user to determine how to align the calculations +so they are reflective of the model developer's intent. + +To troubleshoot the "utility not aligned" error, the user can use a Python debugger +to compare the utility values calculated by sharrow and the legacy evaluator. +ActivitySim also includes error handler code that will attempt to find the +problematic utility expression and print it to the console or log file, under the +heading "possible problematic expressions". This can be helpful in quickly narrowing +down which lines of a specification file are causing the error. + +Common causes of the "utility not aligned" error include: + +- model data includes `NaN` values but the component settings do not + disable `fastmath` (see [Performance Considerations](#performance-considerations)) +- incorrect use of arithmatic on logical values (see + [Arithmetic on Logical Values](#arithmetic-on-logical-values)) + +### Insufficient system resources + +For large models run on large servers, it is possible to overwhelm the system +with too many processes and threads, which can result in the following error: + +``` +OSError: Insufficient system resources exist to complete the requested service +``` + +This error can be resolved by reducing the number of processes and/or threads per +process. See [Multiprocessing](../users-guide/performance/multiprocessing.md) and +[Multithreading](../users-guide/performance/multithreading.md) in the User's Guide +for more information on how to adjust these settings. + +### Permission Error + +If running a model using multiprocessing with sharrow enabled, it is necessary +to have pre-compiled all the utility specifications to prevent the multiple +processes from competing to write to the same cache location on disk. Failure +to do this can result in a permission error, as some processes may be unable to +write to the cache location. + +``` +PermissionError: The process cannot access the file because it is being used by another process +``` + +To resolve this error, run the model with sharrow enabled in single-process mode +to pre-compile all the utility specifications. If that does not resolve the error, +it is possible that some compiling is being triggered in multiprocess steps that +is not being handled in the single process mode. This is likely due to the presence +of string or categorical columns created in a preprocessor that are not being +stored in a stable data format. To resolve this error, ensure that all expressions +in pre-processors are written in a manner that results in stable data types (e.g. +integers, floats, or categorical columns with a fixed set of categories). See +see [Performance Considerations](#performance-considerations)) for examples. diff --git a/docs/users-guide/example_models.rst b/docs/users-guide/example_models.rst index be8f580e1..74ad80981 100644 --- a/docs/users-guide/example_models.rst +++ b/docs/users-guide/example_models.rst @@ -2756,7 +2756,7 @@ Skims are named ___