From 92b69bc504c84208cea5626a1e24fb7ed0eb9830 Mon Sep 17 00:00:00 2001
From: Junghwan Park <reserve.dev@gmail.com>
Date: Sat, 6 May 2023 11:05:46 +0900
Subject: [PATCH] =?UTF-8?q?PyTorch=20v2.0=20=EB=B0=98=EC=98=81,=20pytorch/?=
 =?UTF-8?q?tutorials@9efe789b=20(#626)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .build/get_files_to_run.py                    | 107 +++
 .build/get_sphinx_filenames.py                |  13 +
 .build/validate_tutorials_built.py            |  89 ++-
 .github/ISSUE_TEMPLATE/1_TRANSLATE_REQUEST.md |   2 +-
 LICENSE                                       |   2 +-
 README.md                                     |   8 +-
 _static/img/invpendulum.gif                   | Bin 0 -> 30332 bytes
 .../img/reinforcement_learning_diagram.drawio |   1 +
 .../img/reinforcement_learning_diagram.jpg    | Bin 23737 -> 34793 bytes
 _templates/layout.html                        |   4 +-
 advanced_source/cpp_frontend.rst              |  38 +-
 advanced_source/ddp_pipeline.py               |  49 +-
 .../dynamic_quantization_tutorial.py          |   6 +-
 advanced_source/generic_join.rst              |  10 +-
 advanced_source/neural_style_tutorial.py      |  27 +-
 advanced_source/numpy_extensions_tutorial.py  |   9 +-
 .../static_quantization_tutorial.rst          |  14 +-
 .../super_resolution_with_onnxruntime.py      |   8 +-
 .../Intro_to_TorchScript_tutorial.py          |   6 +-
 .../audio_data_augmentation_tutorial.py       | 434 -----------
 .../audio_data_augmentation_tutorial.rst      |  10 +
 beginner_source/audio_datasets_tutorial.py    |  87 ---
 beginner_source/audio_datasets_tutorial.rst   |  10 +
 .../audio_feature_augmentation_tutorial.py    | 168 -----
 .../audio_feature_augmentation_tutorial.rst   |  10 +
 .../audio_feature_extractions_tutorial.py     | 457 -----------
 .../audio_feature_extractions_tutorial.rst    |  10 +
 beginner_source/audio_io_tutorial.py          | 385 ----------
 beginner_source/audio_io_tutorial.rst         |  10 +
 beginner_source/audio_resampling_tutorial.py  | 476 ------------
 beginner_source/audio_resampling_tutorial.rst |   9 +
 beginner_source/basics/autogradqs_tutorial.py |   4 +-
 beginner_source/basics/buildmodel_tutorial.py |  18 +-
 beginner_source/basics/intro.py               |   2 +-
 .../basics/optimization_tutorial.py           |   4 +-
 beginner_source/basics/quickstart_tutorial.py |  19 +-
 .../basics/saveloadrun_tutorial.py            |   4 +-
 beginner_source/basics/tensorqs_tutorial.py   |   1 +
 .../bettertransformer_tutorial.rst            |   4 +-
 beginner_source/blitz/autograd_tutorial.py    |   4 -
 .../blitz/neural_networks_tutorial.py         |  36 +-
 beginner_source/chatbot_tutorial.py           | 139 ++--
 beginner_source/colab.rst                     |  26 +-
 beginner_source/data_loading_tutorial.py      |  41 +-
 beginner_source/dcgan_faces_tutorial.py       | 115 +--
 .../ddp_series_fault_tolerance.rst            |   8 +-
 beginner_source/ddp_series_multigpu.rst       |  35 +-
 beginner_source/ddp_series_theory.rst         |   8 +-
 ...deploy_seq2seq_hybrid_frontend_tutorial.py |  50 +-
 beginner_source/dist_overview.rst             |   4 +-
 beginner_source/fgsm_tutorial.py              |  20 +-
 beginner_source/flava_finetuning_tutorial.py  |  28 +-
 .../former_torchies/parallelism_tutorial.py   |   2 +-
 .../hyperparameter_tuning_tutorial.py         |  84 ++-
 .../introyt/autogradyt_tutorial.py            |   8 +-
 beginner_source/introyt/captumyt.py           |  10 +-
 beginner_source/introyt/introyt1_tutorial.py  |   8 +-
 beginner_source/introyt/modelsyt_tutorial.py  |  92 +--
 .../introyt/tensorboardyt_tutorial.py         |  64 +-
 .../introyt/tensors_deeper_tutorial.py        |   4 +-
 beginner_source/introyt/trainingyt.py         | 112 +--
 beginner_source/nn_tutorial.py                | 110 ++-
 beginner_source/saving_loading_models.py      |   6 +-
 beginner_source/t5_tutorial.py                | 456 +++++++++++
 beginner_source/template_tutorial.py          |  92 +++
 .../text_sentiment_ngrams_tutorial.py         |  10 +-
 beginner_source/transfer_learning_tutorial.py |  13 +-
 beginner_source/transformer_tutorial.py       |  85 ++-
 beginner_source/translation_transformer.py    |  22 +-
 beginner_source/vt_tutorial.py                |  24 +-
 conf.py                                       |  17 +-
 distributed/home.rst                          |  22 +-
 .../maskedtensor_sparsity.ipynb               |   6 +-
 .../maskedtensor_adagrad.py                   |   6 +-
 .../tuning_guide.ipynb                        |   4 +-
 .../maskedtensor_advanced_semantics.ipynb     |   4 +-
 .../maskedtensor_overview.py                  |   2 +-
 .../maskedtensor_overview.ipynb               |   2 +-
 .../modelsyt_tutorial.py                      |  92 +--
 .../fx_profiling_tutorial.py                  |   2 +-
 .../tuning_guide.py                           |   6 +-
 .../maskedtensor_sparsity.py                  |   6 +-
 .../fx_profiling_tutorial.ipynb               |   2 +-
 .../maskedtensor_adagrad.ipynb                |   4 +-
 .../tensorboardyt_tutorial.py                 |  54 +-
 .../maskedtensor_advanced_semantics.py        |  10 +-
 .../tensorboardyt_tutorial.ipynb              |   2 +-
 .../modelsyt_tutorial.ipynb                   |   2 +-
 docs/advanced/generic_join.html               | 158 ++--
 docs/beginner/ddp_series_multigpu.html        | 146 ++--
 docs/beginner/ddp_series_theory.html          | 144 ++--
 docs/beginner/introyt/modelsyt_tutorial.html  | 156 ++--
 .../introyt/tensorboardyt_tutorial.html       | 154 ++--
 docs/distributed/home.html                    | 172 ++---
 docs/intermediate/FSDP_adavnced_tutorial.html | 156 ++--
 .../dist_pipeline_parallel_tutorial.html      | 156 ++--
 docs/intermediate/fx_profiling_tutorial.html  | 154 ++--
 .../process_group_cpp_extension_tutorial.html | 154 ++--
 docs/intermediate/rpc_async_execution.html    | 156 ++--
 .../rpc_param_server_tutorial.html            | 154 ++--
 docs/intermediate/torchrec_tutorial.html      | 154 ++--
 docs/intermediate/torchserve_with_ipex.html   | 156 ++--
 docs/intermediate/torchserve_with_ipex_2.html | 156 ++--
 docs/prototype/fx_graph_mode_ptq_static.html  | 150 ++--
 docs/prototype/maskedtensor_adagrad.html      | 148 ++--
 .../maskedtensor_advanced_semantics.html      | 148 ++--
 docs/prototype/maskedtensor_overview.html     | 144 ++--
 docs/prototype/maskedtensor_sparsity.html     | 150 ++--
 docs/recipes/intel_extension_for_pytorch.html | 144 ++--
 docs/recipes/quantization.html                | 144 ++--
 docs/recipes/recipes/tuning_guide.html        | 160 ++--
 index.rst                                     |  89 ++-
 .../FSDP_adavnced_tutorial.rst                |  48 +-
 intermediate_source/FSDP_tutorial.rst         |   4 +-
 .../autograd_saved_tensors_hooks_tutorial.py  |  31 +-
 .../ax_multiobjective_nas_tutorial.py         |  48 +-
 .../char_rnn_classification_tutorial.py       |   9 +-
 .../char_rnn_generation_tutorial.py           |   9 +-
 .../custom_function_conv_bn_tutorial.py       |  58 +-
 intermediate_source/ddp_series_minGPT.rst     |   8 +-
 intermediate_source/ddp_series_multinode.rst  |   4 +-
 intermediate_source/ddp_tutorial.rst          |   3 +-
 .../dist_pipeline_parallel_tutorial.rst       |   6 +-
 intermediate_source/dist_tuto.rst             |   4 +-
 intermediate_source/ensembling.py             | 175 +++++
 .../flask_rest_api_tutorial.py                |  12 +-
 ...rced_alignment_with_torchaudio_tutorial.py | 528 -------------
 ...ced_alignment_with_torchaudio_tutorial.rst |  11 +
 intermediate_source/forward_ad_usage.py       |  37 +-
 intermediate_source/fx_conv_bn_fuser.py       |  24 +-
 intermediate_source/fx_profiling_tutorial.py  |  12 +-
 intermediate_source/jacobians_hessians.py     | 349 +++++++++
 intermediate_source/mario_rl_tutorial.py      |  36 +-
 intermediate_source/memory_format_tutorial.py |   9 +-
 intermediate_source/mnist_train_nas.py        |   8 +-
 .../model_parallel_tutorial.py                |   7 +-
 intermediate_source/neural_tangent_kernels.py | 248 ++++++
 intermediate_source/nvfuser_intro_tutorial.py | 687 -----------------
 .../nvfuser_intro_tutorial.rst                |   8 +
 intermediate_source/parametrizations.py       |  10 +-
 intermediate_source/per_sample_grads.py       | 226 ++++++
 intermediate_source/pipeline_tutorial.py      |  35 +-
 .../process_group_cpp_extension_tutorial.rst  |   6 +-
 intermediate_source/pruning_tutorial.py       |  13 +-
 intermediate_source/reinforcement_ppo.py      | 702 +++++++++++++++++
 .../reinforcement_q_learning.py               | 262 +++----
 intermediate_source/rpc_async_execution.rst   |   6 +-
 .../rpc_param_server_tutorial.rst             |   4 +-
 intermediate_source/rpc_tutorial.rst          |   2 +-
 .../scaled_dot_product_attention_tutorial.py  | 343 +++++++++
 .../seq2seq_translation_tutorial.py           |   8 +-
 ...classification_with_torchaudio_tutorial.py | 545 --------------
 .../speech_recognition_pipeline_tutorial.py   | 288 -------
 .../speech_recognition_pipeline_tutorial.rst  |  10 +
 .../tensorboard_profiler_tutorial.py          |  18 +-
 .../text_to_speech_with_torchaudio.py         | 306 --------
 .../text_to_speech_with_torchaudio.rst        |  10 +
 intermediate_source/torch_compile_tutorial.py | 509 +++++++++++++
 intermediate_source/torchrec_tutorial.rst     |  20 +-
 intermediate_source/torchserve_with_ipex.rst  | 176 ++---
 .../torchserve_with_ipex_2.rst                | 156 ++--
 prototype_source/README.md                    |   6 +-
 prototype_source/README.txt                   |   8 +-
 prototype_source/backend_config_tutorial.rst  | 326 ++++++++
 prototype_source/fx_graph_mode_ptq_dynamic.py |  68 +-
 prototype_source/fx_graph_mode_ptq_static.rst | 655 ++++++++--------
 .../fx_graph_mode_quant_guide.rst             |  59 +-
 prototype_source/fx_numeric_suite_tutorial.py | 231 ------
 prototype_source/maskedtensor_adagrad.py      |   6 +-
 .../maskedtensor_advanced_semantics.py        |  10 +-
 prototype_source/maskedtensor_overview.py     |   2 +-
 prototype_source/maskedtensor_sparsity.py     |   6 +-
 prototype_source/nestedtensor.py              | 383 +++++-----
 prototype_source/prototype_index.rst          |   4 +-
 prototype_source/skip_param_init.rst          |  18 +-
 prototype_source/vulkan_workflow.rst          |  12 +-
 .../android_native_app_with_custom_op.rst     |  18 +-
 recipes_source/bundled_inputs.rst             |   8 +-
 recipes_source/deployment_with_flask.rst      |   4 +-
 .../intel_extension_for_pytorch.rst           | 708 ++++++++++++++----
 recipes_source/mobile_perf.rst                |  86 ++-
 recipes_source/quantization.rst               |  26 +-
 recipes_source/recipes/amp_recipe.py          |  10 +-
 .../recipes/changing_default_device.py        |  50 ++
 .../recipes/reasoning_about_shapes.py         |  88 +++
 ...saving_and_loading_a_general_checkpoint.py |   2 +-
 recipes_source/recipes/tuning_guide.py        |  28 +-
 recipes_source/recipes_index.rst              |  11 +-
 recipes_source/script_optimized.rst           |  17 +-
 requirements.txt                              |   8 +-
 190 files changed, 8384 insertions(+), 8589 deletions(-)
 create mode 100644 .build/get_files_to_run.py
 create mode 100644 .build/get_sphinx_filenames.py
 create mode 100644 _static/img/invpendulum.gif
 create mode 100644 _static/img/reinforcement_learning_diagram.drawio
 delete mode 100644 beginner_source/audio_data_augmentation_tutorial.py
 create mode 100644 beginner_source/audio_data_augmentation_tutorial.rst
 delete mode 100644 beginner_source/audio_datasets_tutorial.py
 create mode 100644 beginner_source/audio_datasets_tutorial.rst
 delete mode 100644 beginner_source/audio_feature_augmentation_tutorial.py
 create mode 100644 beginner_source/audio_feature_augmentation_tutorial.rst
 delete mode 100644 beginner_source/audio_feature_extractions_tutorial.py
 create mode 100644 beginner_source/audio_feature_extractions_tutorial.rst
 delete mode 100644 beginner_source/audio_io_tutorial.py
 create mode 100644 beginner_source/audio_io_tutorial.rst
 delete mode 100644 beginner_source/audio_resampling_tutorial.py
 create mode 100644 beginner_source/audio_resampling_tutorial.rst
 create mode 100644 beginner_source/t5_tutorial.py
 create mode 100644 beginner_source/template_tutorial.py
 create mode 100644 intermediate_source/ensembling.py
 delete mode 100644 intermediate_source/forced_alignment_with_torchaudio_tutorial.py
 create mode 100644 intermediate_source/forced_alignment_with_torchaudio_tutorial.rst
 create mode 100644 intermediate_source/jacobians_hessians.py
 create mode 100644 intermediate_source/neural_tangent_kernels.py
 delete mode 100644 intermediate_source/nvfuser_intro_tutorial.py
 create mode 100644 intermediate_source/nvfuser_intro_tutorial.rst
 create mode 100644 intermediate_source/per_sample_grads.py
 create mode 100644 intermediate_source/reinforcement_ppo.py
 create mode 100644 intermediate_source/scaled_dot_product_attention_tutorial.py
 delete mode 100644 intermediate_source/speech_command_classification_with_torchaudio_tutorial.py
 delete mode 100644 intermediate_source/speech_recognition_pipeline_tutorial.py
 create mode 100644 intermediate_source/speech_recognition_pipeline_tutorial.rst
 delete mode 100644 intermediate_source/text_to_speech_with_torchaudio.py
 create mode 100644 intermediate_source/text_to_speech_with_torchaudio.rst
 create mode 100644 intermediate_source/torch_compile_tutorial.py
 create mode 100644 prototype_source/backend_config_tutorial.rst
 delete mode 100644 prototype_source/fx_numeric_suite_tutorial.py
 create mode 100644 recipes_source/recipes/changing_default_device.py
 create mode 100644 recipes_source/recipes/reasoning_about_shapes.py

diff --git a/.build/get_files_to_run.py b/.build/get_files_to_run.py
new file mode 100644
index 000000000..80f958f50
--- /dev/null
+++ b/.build/get_files_to_run.py
@@ -0,0 +1,107 @@
+from typing import Any, Dict, List, Optional, Tuple
+import json
+import os
+from pathlib import Path
+# from remove_runnable_code import remove_runnable_code
+
+
+# Calculate repo base dir
+REPO_BASE_DIR = Path(__file__).absolute().parent.parent
+
+
+def get_all_files() -> List[str]:
+    sources = [x.relative_to(REPO_BASE_DIR) for x in REPO_BASE_DIR.glob("*_source/**/*.py") if 'data' not in x.parts]
+    return [str(x) for x in sources]
+
+
+def read_metadata() -> Dict[str, Any]:
+    with (REPO_BASE_DIR / ".jenkins" / "metadata.json").open() as fp:
+        return json.load(fp)
+
+
+def calculate_shards(all_files: List[str], num_shards: int = 20) -> List[List[str]]:
+    sharded_files: List[Tuple[float, List[str]]] = [(0.0, []) for _ in range(num_shards)]
+    metadata = read_metadata()
+
+    def get_duration(file: str) -> int:
+        # tutorials not listed in the metadata.json file usually take
+        # <3min to run, so we'll default to 1min if it's not listed
+        return metadata.get(file, {}).get("duration", 60)
+
+    def get_needs_machine(file: str) -> Optional[str]:
+        return metadata.get(file, {}).get("needs", None)
+
+    def add_to_shard(i, filename):
+        shard_time, shard_jobs = sharded_files[i]
+        shard_jobs.append(filename)
+        sharded_files[i] = (
+            shard_time + get_duration(filename),
+            shard_jobs,
+        )
+
+    all_other_files = all_files.copy()
+    needs_gpu_nvidia_small_multi = list(
+        filter(lambda x: get_needs_machine(x) == "gpu.nvidia.small.multi", all_files,)
+    )
+    needs_gpu_nvidia_medium = list(
+        filter(lambda x: get_needs_machine(x) == "gpu.nvidia.large", all_files,)
+    )
+    for filename in needs_gpu_nvidia_small_multi:
+        # currently, the only job that uses gpu.nvidia.small.multi is the 0th worker,
+        # so we'll add all the jobs that need this machine to the 0th worker
+        add_to_shard(0, filename)
+        all_other_files.remove(filename)
+    for filename in needs_gpu_nvidia_medium:
+        # currently, the only job that uses gpu.nvidia.large is the 1st worker,
+        # so we'll add all the jobs that need this machine to the 1st worker
+        add_to_shard(1, filename)
+        all_other_files.remove(filename)
+
+    sorted_files = sorted(all_other_files, key=get_duration, reverse=True,)
+
+    for filename in sorted_files:
+        min_shard_index = sorted(range(num_shards), key=lambda i: sharded_files[i][0])[
+            0
+        ]
+        add_to_shard(min_shard_index, filename)
+    return [x[1] for x in sharded_files]
+
+
+def compute_files_to_keep(files_to_run: List[str]) -> List[str]:
+    metadata = read_metadata()
+    files_to_keep = list(files_to_run)
+    for file in files_to_run:
+        extra_files = metadata.get(file, {}).get("extra_files", [])
+        files_to_keep.extend(extra_files)
+    return files_to_keep
+
+
+def remove_other_files(all_files, files_to_keep) -> None:
+
+    for file in all_files:
+        if file not in files_to_keep:
+            remove_runnable_code(file, file)
+
+
+def parse_args() -> Any:
+    from argparse import ArgumentParser
+    parser = ArgumentParser("Select files to run")
+    parser.add_argument("--dry-run", action="store_true")
+    parser.add_argument("--num-shards", type=int, default=int(os.environ.get("NUM_WORKERS", 20)))
+    parser.add_argument("--shard-num", type=int, default=int(os.environ.get("WORKER_ID", 0)))
+    return parser.parse_args()
+
+
+def main() -> None:
+    args = parse_args()
+
+    all_files = get_all_files()
+    files_to_run = calculate_shards(all_files, num_shards=args.num_shards)[args.shard_num]
+    if not args.dry_run:
+        remove_other_files(all_files, compute_files_to_keep(files_to_run))
+    stripped_file_names = [Path(x).stem for x in files_to_run]
+    print(" ".join(stripped_file_names))
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/.build/get_sphinx_filenames.py b/.build/get_sphinx_filenames.py
new file mode 100644
index 000000000..b84267b48
--- /dev/null
+++ b/.build/get_sphinx_filenames.py
@@ -0,0 +1,13 @@
+from pathlib import Path
+from typing import List
+
+from get_files_to_run import get_all_files
+from validate_tutorials_built import NOT_RUN
+
+
+def get_files_for_sphinx() -> List[str]:
+    all_py_files = get_all_files()
+    return [x for x in all_py_files if all(y not in x for y in NOT_RUN)]
+
+
+SPHINX_SHOULD_RUN = "|".join(get_files_for_sphinx())
diff --git a/.build/validate_tutorials_built.py b/.build/validate_tutorials_built.py
index 992469ebf..46452cc18 100644
--- a/.build/validate_tutorials_built.py
+++ b/.build/validate_tutorials_built.py
@@ -9,51 +9,49 @@
 # the file name to explain why, like intro.html), or fix the tutorial and remove it from this list).
 
 NOT_RUN = [
-    "basics/intro",  # no code
-    "translation_transformer",
-    "profiler",
-    "saving_loading_models",
-    "introyt/captumyt",
-    "introyt/trainingyt",
-    "examples_nn/polynomial_module",
-    "examples_nn/dynamic_net",
-    "examples_nn/polynomial_optim",
-    "former_torchies/autograd_tutorial_old",
-    "former_torchies/tensor_tutorial_old",
-    "examples_autograd/polynomial_autograd",
-    "examples_autograd/polynomial_custom_function",
-    "parametrizations",
-    "mnist_train_nas",  # used by ax_multiobjective_nas_tutorial.py
-    "fx_conv_bn_fuser",
-    "super_resolution_with_onnxruntime",
-    "ddp_pipeline",  # requires 4 gpus
-    "fx_graph_mode_ptq_dynamic",
-    "vmap_recipe",
-    "torchscript_freezing",
-    "nestedtensor",
-    "recipes/saving_and_loading_models_for_inference",
-    "recipes/saving_multiple_models_in_one_file",
-    "recipes/loading_data_recipe",
-    "recipes/tensorboard_with_pytorch",
-    "recipes/what_is_state_dict",
-    "recipes/profiler_recipe",
-    "recipes/save_load_across_devices",
-    "recipes/warmstarting_model_using_parameters_from_a_different_model",
-    "recipes/dynamic_quantization",
-    "recipes/saving_and_loading_a_general_checkpoint",
-    "recipes/benchmark",
-    "recipes/tuning_guide",
-    "recipes/zeroing_out_gradients",
-    "recipes/defining_a_neural_network",
-    "recipes/timer_quick_start",
-    "recipes/amp_recipe",
-    "recipes/Captum_Recipe",
-    "hyperparameter_tuning_tutorial",
-    "flask_rest_api_tutorial",
-    "text_to_speech_with_torchaudio",
+    "beginner_source/basics/intro",  # no code
+    "beginner_source/translation_transformer",
+    "beginner_source/profiler",
+    "beginner_source/saving_loading_models",
+    "beginner_source/introyt/captumyt",
+    "beginner_source/examples_nn/polynomial_module",
+    "beginner_source/examples_nn/dynamic_net",
+    "beginner_source/examples_nn/polynomial_optim",
+    "beginner_source/former_torchies/autograd_tutorial_old",
+    "beginner_source/former_torchies/tensor_tutorial_old",
+    "beginner_source/examples_autograd/polynomial_autograd",
+    "beginner_source/examples_autograd/polynomial_custom_function",
+    "intermediate_source/parametrizations",
+    "intermediate_source/mnist_train_nas",  # used by ax_multiobjective_nas_tutorial.py
+    "intermediate_source/fx_conv_bn_fuser",
+    "advanced_source/super_resolution_with_onnxruntime",
+    "advanced_source/ddp_pipeline",  # requires 4 gpus
+    "prototype_source/fx_graph_mode_ptq_dynamic",
+    "prototype_source/vmap_recipe",
+    "prototype_source/torchscript_freezing",
+    "prototype_source/nestedtensor",
+    "recipes_source/recipes/saving_and_loading_models_for_inference",
+    "recipes_source/recipes/saving_multiple_models_in_one_file",
+    "recipes_source/recipes/loading_data_recipe",
+    "recipes_source/recipes/tensorboard_with_pytorch",
+    "recipes_source/recipes/what_is_state_dict",
+    "recipes_source/recipes/profiler_recipe",
+    "recipes_source/recipes/save_load_across_devices",
+    "recipes_source/recipes/warmstarting_model_using_parameters_from_a_different_model",
+    "recipes_source/recipes/dynamic_quantization",
+    "recipes_source/recipes/saving_and_loading_a_general_checkpoint",
+    "recipes_source/recipes/benchmark",
+    "recipes_source/recipes/tuning_guide",
+    "recipes_source/recipes/zeroing_out_gradients",
+    "recipes_source/recipes/defining_a_neural_network",
+    "recipes_source/recipes/timer_quick_start",
+    "recipes_source/recipes/amp_recipe",
+    "recipes_source/recipes/Captum_Recipe",
+    "intermediate_source/flask_rest_api_tutorial",
+    "intermediate_source/text_to_speech_with_torchaudio",
+    "intermediate_source/tensorboard_profiler_tutorial" # reenable after 2.0 release.
 ]
 
-
 def tutorial_source_dirs() -> List[Path]:
     return [
         p.relative_to(REPO_ROOT).with_name(p.stem[:-7])
@@ -68,6 +66,7 @@ def main() -> None:
         glob_path = f"{tutorial_source_dir}/**/*.html"
         html_file_paths += docs_dir.glob(glob_path)
 
+    should_not_run = [f'{x.replace("_source", "")}.html' for x in NOT_RUN]
     did_not_run = []
     for html_file_path in html_file_paths:
         with open(html_file_path, "r", encoding="utf-8") as html_file:
@@ -78,9 +77,7 @@ def main() -> None:
             if (
                 "Total running time of the script: ( 0 minutes  0.000 seconds)"
                 in elem.text
-                and not any(
-                    html_file_path.match(file) for file in NOT_RUN
-                )
+                and not any(html_file_path.match(file) for file in should_not_run)
             ):
                 did_not_run.append(html_file_path.as_posix())
 
diff --git a/.github/ISSUE_TEMPLATE/1_TRANSLATE_REQUEST.md b/.github/ISSUE_TEMPLATE/1_TRANSLATE_REQUEST.md
index f17e1c17c..bac5e213c 100644
--- a/.github/ISSUE_TEMPLATE/1_TRANSLATE_REQUEST.md
+++ b/.github/ISSUE_TEMPLATE/1_TRANSLATE_REQUEST.md
@@ -15,4 +15,4 @@ _(반드시 지키셔야 하는 일정이 아닙니다 - 일정이 너무 늦어
 ## 관련 이슈
 _현재 번역 요청 / 진행 내역을 보기 위해 각 버전의 메인 이슈를 참조합니다._ <br />
 _(특별한 일이 없다면 변경하지 않으셔도 됩니다.)_
-* 관련 이슈: #615 (v1.13)
+* 관련 이슈: #660 (v2.0)
diff --git a/LICENSE b/LICENSE
index 1ba2484d4..f0d2e189a 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 BSD 3-Clause License
 
-Copyright (c) 2017, Pytorch contributors
+Copyright (c) 2017, PyTorch contributors
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/README.md b/README.md
index dc71ab7e2..482b29eb2 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,10 @@
-# PyTorch 한국어 튜토리얼
+# 파이토치 한국어 튜토리얼 (PyTorch tutorials in Korean)
 
 ## 소개
 
 PyTorch에서 제공하는 튜토리얼의 한국어 번역을 위한 저장소입니다.\
 번역의 결과물은 [https://tutorials.pytorch.kr](https://tutorials.pytorch.kr)에서 확인하실 수 있습니다. (번역을 진행하며 **비정기적으로** 반영합니다.)\
-현재 버전의 번역 / 변경 관련 이슈는 [#615 이슈](https://github.com/PyTorchKorea/tutorials-kr/issues/615)를 참고해주세요.
+현재 버전의 번역 / 변경 관련 이슈는 [#660 이슈](https://github.com/PyTorchKorea/tutorials-kr/issues/660)를 참고해주세요.
 
 ## 기여하기
 
@@ -22,7 +22,7 @@ PyTorch에서 제공하는 튜토리얼의 한국어 번역을 위한 저장소
 
 ## 원문
 
-현재 PyTorch v1.13 튜토리얼([pytorch/tutorials@db34a77](https://github.com/pytorch/tutorials/commit/db34a779242f1a71346db4a9e5d6ac962a8d9b77) 기준) 번역이 진행 중입니다.
+현재 PyTorch v2.0 튜토리얼([pytorch/tutorials@9efe789b](https://github.com/pytorch/tutorials/commit/9efe789bfc3763ec359b60f12b5e6dda4e6d5db0) 기준) 번역이 진행 중입니다.
 
 최신 버전의 튜토리얼(공식, 영어)은 [PyTorch tutorials 사이트](https://pytorch.org/tutorials) 및 [PyTorch tutorials 저장소](https://github.com/pytorch/tutorials)를 참고해주세요.
 
@@ -46,5 +46,5 @@ v1.0 이후 번역은 별도 저장소로 관리하지 않습니다. [이 저장
 빌드 방법은 [기여하기 문서의 `2-5. (내 컴퓨터에서) 결과 확인하기`](https://github.com/PyTorchKorea/tutorials-kr/blob/master/CONTRIBUTING.md#2-5-내-컴퓨터에서-결과-확인하기) 부분을 참고해주세요.
 
 ---
-This is a project to translate [pytorch/tutorials@db34a77](https://github.com/pytorch/tutorials/commit/db34a779242f1a71346db4a9e5d6ac962a8d9b77) into Korean.
+This is a project to translate [pytorch/tutorials@9efe789b](https://github.com/pytorch/tutorials/commit/9efe789bfc3763ec359b60f12b5e6dda4e6d5db0) into Korean.
 For the latest version, please visit to the [official PyTorch tutorials repo](https://github.com/pytorch/tutorials).
diff --git a/_static/img/invpendulum.gif b/_static/img/invpendulum.gif
new file mode 100644
index 0000000000000000000000000000000000000000..3102c5b55cfbdd9ed11bf4da7422980287334753
GIT binary patch
literal 30332
zcma&NcQo7a|38|TNo*lj5PPe=f)YUxYSpfaO;MZbB7#uVu3EKe)!tN#+N);m7DesS
z($eLZe!lnqzTb0x_uPB)M{-WyIVU-<*DLSGeCiwMsi@+#fLg#GfPW7$DGH<pKv9`Y
zQkn%QDM2kJL?I{5%dg7Ar%a}x094nYR#t|}Ng)K02q7g&Rar(cW;HYw0Hw)=(on~!
zO6$n#paDi$fUW`1#1v?50kC(V#9Fg>c`@l4DTm+Tvv;BeT-63#HBeIYLaO;HDSB$E
z2Z~)W0-$t&`bHEMmI~JlIQ#>Rtg%KmMtT+oX0}+Yr5V=FT+jZxnXQ?Q2}ak(L0eNS
zDuVkVUKAfKPRYZ`E(#(S;06nGLd7{blsI5A^uiKk;!<S7k~HE{Gy=lZ(sDp4SsHN_
z3VB7Kk{S@H4wP4<L25vhRY4L`%);U@2?aKJ1*no5yUZ0Da$y08Fh9AN5RJGHP)ZUa
zB~B(H&MYnnl@enU7bcUHhR92UmBgTOl5jamHf0f@v@DIZ3=f|YX_ZJz1W}MDQ&XZ*
zRU}hZB`p_34GC6KWmb}hsw#46BH7iH1cXHR6{N&eMU~Y=NXsZFq|C1*D~ObrP*)Vw
zRFl(G2Jj-ZHOVzmKqMN1)a8)YX91!#sZlDNI_j)C;$j$KBwAEJM?*>ndDT!!UPoQS
zKur~`q+~A0qr<I(29j3Z2n*EJ2b&oKP0Rp><}~IOK+-Na+yGnKQW@C73@l(yHzAHr
zU~4BPXBS#`PX;e9XmBw6-W{&+F!*hM4m3tWTUQ=ssE9UFH^8bGS*TsRt{QfS@9sT;
zJK=o5tJ)NrdMsD9Rh3N@<lIR6sG@vJUCmb==|b8{HRS;9tG7{CuaPzsV4w>`lYWgM
zSWl016e#WNX^jj8bx{(Ax{}5QLI%1DMrd^-w7iLds-d2QnUSIyR^0-tW`R++G}ExP
zP_i<Ywls$ahcbD3^GDpJcEstLp!7}jjV#b;OYDD62o__dZ*Ge=v%;F$8k<?^V6U5@
z?JP;B#n90fZD(a>XKrR~iPo_;bh1UGobV4tqarvT;>8|Bi9|<>6Qae0AL9RW>i@4N
zzDpVtT}ulcZBt!2X&G`L>21CX3JM76Rr23Q{{2k=pi96o7>=nc?1=_J#T|y~ir&UC
z3K-^L>WljyvCDg}4AqwmK1S#yzzrHohtowZUN{Ull#M=-aT&=oXe=MkRSDW#8E&k2
z_e?7q!fn`8IaPv57r!ym^n9kmw8Svqu=&MYl})4f>PU0de67>l1a702my3;_GcRt8
zwp1^-`hOb9H)^d}?F|36w>sKd`|(XAISmiCt#17-5h~$0)>i*%Fh#(q0NdWMHJUAd
z>%&-k<L7sUI*)jaJDPT9DlMuU$2*$$=j&WX3yeEk4wu`4_CJhwwto589Zkb)@~Z9R
z(@?s^JNugUZ(F|kPoPA4?$frzHLp13u8WYS-39GSm+~Pd#odqNS@IP`I$7rdzs%=K
z8ux8OA8){o2Cmw|l|D7O@Y*hB;IMKdqHjOS2B`C|MdEs964?jgGfB#;f<cM*4pJ<5
z2{(Ga(Xz3Mf1*?7{IEfJpBLZ8sy^YMJ@()a6Eu!(zG<h}O2(UMFp-rr8pLI1DX^x-
za74VWHWz*}@c7FjdmL=MsgXX-(Ea*^q6zBa!$V8Ec(*L;%yc?-pOvo#vP!RuVg#&O
z(gmMhuh-Gbxn=k~hBu%_=kxV18Brb?0a&lNNUOgFbHPWa0Rvp#ufvSs)?-s}^#o}{
z6Xayxx{z$TA75Ntw!8TCprhcQdKrZSJ(Qe0j+b8EmX%~*J>jV)BmFS)3snirttR<#
zm`^x|H`^!B=6>z9W_kMhfq{Sl*l=2-WTUYn8x<E3!N2>^9xE@>f>j;VV-E;l7U!}@
z%SpEzs_Ij-?dsj3;krTbRr-~Iu`+en9$tJ9_Vep67WD)NvnxgRzw2vz^qZlddT;tg
z4ZdY$o12h_JmegN-EaGZ8!$Y>)F;_T_9TZ13e1@(+Odr8af!Wo8#t=hL*6(>XaG_v
zscm3clsqG_rKu>cIXKd-aMZiVP9-{ETHScwgx3VBRyI3l&dpL1>H5bFZezLIJn)b(
zH8A6vd;h!XAw@&~3p<x}%=6pSLN6=ZVV4P%-1FOq>5YAQ;#;iFAnA-q584P7?)Tz<
z)AM|2?L^B0DWuy?0w3hvs+=|Hx?>zH3K!yEqe6C=OfTXh-p$rbbi{3`;3;dqH%P?q
zG_(qG54IVN`1N#snuRMu^p_5gU$K3;o8A-lYk=c%@Xl{z`-q;PR^A^|zt6;^ZZzdo
zK6DuPu~spgT-X-$Zs1~fIk>sv+OL(6-*V%(71&ibc+=+|X50VqXHmF(-shc)u=<l=
zsYPB~s(Lr*@%tt~Qn1RT5aT_odVb1gcbj9pG~Wu<pq0|AD1V`uMY0YROp8bMF=1U`
zup9qbSyzEER`BTrt!+3Z*G8RC@B-@elChprZRI$QUre8yQ2FZdI51xFxz6B+=nj6A
znUGA!YDGeLpF{*2ZCAQ6sJGRhUiy{;CrGsys@NcxCiqB)C6miIjo#}VWLQZ<&hc4=
zS$H+T2M5yOx&Sd>g+WrR*#-GDdP{kIzFdtz*x;AyEz@r>N=+Xl1~n9Eh#Gr4ZKt>j
z^EC_?8|*pSFRY4c^ftX*CCf}>+!E)Ldt83s+o?WdOOngn$htU?xno60Ct`QlzA4J7
zhou~~bV6Ow^))*RQ!dBUi{&d|&Y8|D*M4D(_3{43e;7_6({>w+E&7s6?G-0Da53^&
zl{TN%?6Zo*c3o}{lz%O|BB^)nvg@uD`q}he#Z?iqNqL7v_y!|UgE}Y0S7W^JvumXm
z*+r8K+_D%{|2bIP`i-0oLkUIdL{Ry+dZ1%8!WzY(vkcT$Z7wO*uvjcx;}eN-Pk+2C
zLoFF99-p7E4vGE6z+QL)v1Vswu0v3(Q8bylnN%?862+Oy4Jo9~D?Ky-WY?c_#)x(E
zb2EhqsM|47@=Hd6v7UIl+_f0q-$VB`K)rVJBLpGAD2O5JZji7oeZXeVOJjC|5S5XQ
z{O>gw3dPM8krLO#jAtbH_A(O@l(sv~&nMfTYb!`Fg=;BRNB2_*O9z0b<cg_VyD5tz
zoIq1)R!zyic&0QhWa_1PQe!CvHB%m7aXKLsn#))%nl~A%!v2=s*E3p3<-~rCiVd3O
zNd_ez&n+pjw)QRF!YC&2WIBHD<YzTiJB_43orgm6{ryDVFN5{6i|jKCDeitAb=+Zg
z23wfM8OwIwIDWtu6N_8Sq{6O|ah-QlNE2Vu<JYKaU0@bAWPA^n!KUFGz3ku7VyGUR
zY05_LvvYz7MtkGTZ-bfOgko_9GTc->@0UrgPxO#XsT(MR?*j;&-@8BY{5l`hqHjov
z!@BZ)v4NO-;)T%mIE$CPu$ID@46h={^@X=Q*9#glO2#19B8#DuzGP_+U&hMiUW|ET
zqI2fQwv`&&FUNF~gC2)Te%7c2GR!bygMw3O9FLvZI*%I*siiz`p5rHp(oIDS)t*<c
zEU|SHn@UAAymZKQ*rVEXp!fE1mTNk^h4jsp`ciwA*}c%3W&+D8B2iz5lf5Rn`K5>P
zzTG>usWzS_>ENQzfyayQBI#S|qVxBixqdA)(o@OE`8rPFH=vbu{TjyskG(%e&%SR7
z)v&=qOg&Le1e@mS8}|<!7=Eqnn6!4hJN$fufoiZrtMSzg_Osfz@DJw`t*_=B+y!rQ
zLUVQqA|~22qPI9kK##xnSVSdUi&P!MNXx&OL7XT)TO4PJYwU`8`6Zc+gYgGB?ZW`P
zpe+UmHo6?>i?|v9v1t3fCG%Kytc!Nr2p!fhrBv4d8C&D{r9SXJ@ObFY#?9m8mphlu
z($i--$$FC7!z|^svlS*cr<H(Ui|m-ZtckOlyMOkVq!JD9<0(Xbb3rF`Z!gWi{(h=C
zzdb`)?kjJbY{jBoopoI8`D5b8>tEa*S?p-$7yh;#PXxHr5oDg=(enZ1>yzoSmu_cj
zqk#(iV2}K&ZRK|t!<<E|o00lz)3RZ2uZBSMWO}Wv-$lHbP=Z86QrOsXN^(WZyvknM
zm2l((8)$9(=w^8E<>8x|Ns$rNKu+k*x6&dx9j6zQ4>c+Npnqj`%$~0HyXtU)?jJlV
z{e_@=({es|?eEvz%ilZK-u#?@_xF3}<)7onZ+?HicKLJp^6&Y(H-CS;yZrm>@)E!v
zNv1;j%1Em4NKkepq%D$mA(H+)l94@%StW|aCJGiF#hx9-*%k#~h~hbqLa^icRq%o~
zc;RroXf|HF4KKNXmp;eKut&?OL@U@tD}_g^WJjyDMXN7FYo14Iu@g`#1hfr7FPwnM
zCK$F6unPo}bAl;*jJZmTrA>@gc#KVUj9pvIb#}&oRF3k0Q#l}j#O^>803Zd$e_x0N
zXaQ~m!ble`0YDHOlLA-8Q!?=XH^&2YJGDQe8ZOe&|1XYbMG#@+Q4eI(C2_oIzV_h5
zA;qB5oyE4$ExWyy=U<Bz<XVXw^1^W(?V;-F;`cr4Ry$MzlDUntXgPBAzhrr@il^iu
zJp)pNEXpMpWt|<GGA)LhrAEu|Z%7&qBrPj9I%HRej9Qmzhq`SF?{8Pz%emloYz{^j
z1RRw5Dzh{?W5&%&ljQXkc52=|RT)U86M}ira97AGR{6EH+;}Qw8b2Hx;Wp8qUKJ$b
zLk(*2S!^{N)o%C8Kl)@`bn%xvkA`z&;PL0}6{LyjT0oo6Fvk4n-lIc)-^<jXe4Asb
zWjehdHSN1?Cjk%A-5a-r+s_WTly6X9Do3iE8bhHEF3#pyRR3J)-xQRszg`%n&o`uZ
z?+tK47eS|4X^T7@V$y>Xhkgve*f~paF6_fVu9!yO6a82bxma^<hd!%ySa@HCMdCfh
zvoTi0h&3nAQDL9UV|~?VT^~SGc1fy!hc3Ok;a`1hdg=O(iJ_Ta{|!aEg#~{01OLUc
zC&!Vh>zR!)>y%3F1?{sL-VG`rbMFp6pS!;hSuZS0M35m0==X-xwZCMXcoYJ>&5R3T
zh5Mftv&fCamLT<VW_2U~u!)ugoKMpF<$Mh`^T0JR?f3*ZO<}#ujxL+tBO^=2U8}~m
zIQQ-K&r-ebG^rNv*YG_BX@zIjgEC>EFWn^T5pKb=3d1A9o^WeXE77JYp0f|dJ#N1w
zTgCf>skn@l?tJiHPW#0a7g#9qQOceekkPq{pQq;@ZRQy0I#(NjsRqrm^A`f%$JuuC
z+z2+~)|xQ-XiWPs!%Qt1J(icSCh2g3X1gkol5%RWaWa_8fp*J&L-~R~S`?Y(p1rl&
zfOsiubeg8%s1+l`e>)F2G^{G0Y4QyE7k>IkH~L=FvlB(J9oT)wRRtX?KJK&3fV`8<
zIYvIi-64EK6gBb?ufbSc(9g%ne_0P4Kzdc(NVUG-tz^3Blz3^;x@7#PNLf4Zrc0}l
zKBYMa$|pQvye{rzlBBEkz@e_<?(gbQ<h*6*KEtFdkE+h$y}LA7qp>xr3%BO(j)q)%
z$?olWkIFJuHKeQkMEy?2+EtD1yuNF_S-JD;p|xn*?(vOG3u>P_sLRsVu{Wo$Lf-TD
z_QRW^>8{BuG>%Ymk6l4;rM1#1OMm;^`6m>PTX57wIdxp#go~qWH4W`<a=`DiKK;1b
z090IC!Ml*X#HcQqX#ptob<;3wH%E^7$Uk85hsW1v;o&_OloLVQU=w!7*{(drEha=B
z9oLJN*F*U-G2iZ(gU#Nv8066y$r7fHSAwL|GPUE73K)h<iCLA7@swDqB8K5(!R|!S
zRH9w@3e(jrCRX6S`E+C+4P9?-PFIhaT5$o*`bkMOr9RCx$c3hMAm3dw-q<5pFWQt}
zPwB-0$L+r4gFM~ADYsJYiOGd8b*9uFCjkkQKVAORK;e=Kqx{oqE4=kZG0LJ6_Qjpc
zYK+gHW9aK5yW8>XGW=3x*T-z=M?p>tri~FvZ*BYIEyg7A>4E-3+!@Xj;VQvg;cK+<
zO!ycXAmEXgmapsLdat0<dV>3FGT6rC5NCX5YDgYAvzaJfDPD_0?9_?I*D0C(o<D(~
z5eQgOz%!k$B_!FDCg<mwtK?}>2uk9>SVoXW5BO4F)&U5PVZ=bGmIy)!9HdqyUQ)3+
z>yZKqV&vJ2062k-Oq&?709@g3TOx{aqlC4Yf-gIgf**^5(??h2Ig-)v3l@P1m7>Bj
z0r5tzc*bHlo$P>2EY{VzT&s(Kp%IU<-t@{blOw8?15BTi0ie0WJuC+kp^*I(Oxp=9
zQn(af=|@&cu1Lnp@?u&5N5NAqio$wy#G3?@0enMli9%9|N|Me@F>3_-A0yT|cKFgY
z%wSDI5U&n9t~%SlN(95lcuggXrHJSws=2-}BKI9y=r18GIYl9e<>&5?pXJd*E-5hf
zfC+7AXXJXk<w7rvyogfD631};a>0tVfwKFSZz{yxHLIMm>n*q5-C_;ezBU>ll?-8>
zrryrE{G~>W)M?S<C{t_1?6_Y@V=lzvUU60K4Km)%B8Qvj8`_^K0arMk9d91@wojfB
z#H<1s@GB*+-_H%+udjN16hZciz*wukl>6cLfit2L<Btax#!^+`nkugQI&0$~^RKO1
z8deYef{bWB6g#uzPg3|lL99@*J!ujZK3YAppcGJlQuPcFDdxVC!m+$Ckg@v06v+je
z%rRFBdCOo(S{Y#*RI^#gzW#T^830rpj!&9R#@wOTK%e$!GY$z0H@LNtWi~o*F}9jI
zCkv|d485Zp#>S|Dgt6Lju+0XLx+~Bu-i}3(&zDU3s{Q*+_fmMBKVAcDm?{i#Dx?y^
z^EjL3C9RF((y~!3ZHr8P!npjT@OUj4r<i4nF!vhcwgiGN9wbM>T$)88=m@0u?{-Ot
ztTz>@(?!PGRrA47kT@p|M%#U_`*h>;s8coxyN}g29AcPfpA246@S8*ef&Y!o4@uY@
z`aiIl<i7-hp!^E|?Y}SqR0*4M?v^(H!Dfvbduj%)&d@FCESi7*O9IDo(k&{j)Z$yB
z)~{EF(?!}tXLlF{+!M3ey$^@Z97axEI&YDXI5&ruiOcgKSZQ@+4$&U=z)#Ak9BV5F
z2pDmVu4t~ZFK27a<O{`eDEo~hbC6pk9WK8ydgv{nGcWzYrLRdL`Uz8=%aefNK3P3Q
zj(QcF9HBoKZSq5bi`{iacWDFiR5am2io5M0{ecHVb@B4So_R{v2ZF}C9bxDNH~0D1
z@XwI^^j*$hjz^g!{0{J&)9i9HX&)bJg#KQn?g(BEXdzx+Ja@SJ{aI=u!yDFu(f*|E
zrvkxG3POFq+t(x--I4wK^&--TPd`{^y|rIu;o>tF>JTeXc)zc1Fy;YRd?y#n@PUn*
zliJD?cgrv}9TycaRNt>qb$@~D4hN*KAYL@9qS!+;<{xr{Sc>AK!c|IK!1VzF{6C(u
z$3MRMH)aYxsU5xui$px#wC9k$v*f?N&bO6e??u5NZ@hP2nk5%#;D(%Vh$_jp_49JK
zxT+Z4pE=_@6^H!Ob-|{3j47eaZ}A1{S%G(lGoQ?P@;T%?zG_S<@;iCDQ_M8b6U#QQ
z4%sc*cOs{<)8Px`<Z^OqG_%kBo?+(mD8XB-{8icI(w244VjQvZi50Z!rGM%t6AtBA
zwHL5CpK_L3q}OH@Zx3_OzE^^*m2d-}*uF`muT%Vz^Pu@JbC;_c4U}9TnV#4td=UPX
z9=&N2_tbl}oMy4(6ADz#Ju|YO+Q8lbjI8UQO09h@!@WGA=Fv1#5I_!9XL_?aH02l7
z3R<Xp%ak9H2KUJLRSIWwQ8zQke)xJjN-0oS&WK~GXj`i-v`ubwSF~7I_7dV+ta>=+
z;H&m-Y*&^`q+_>H=U%x1VgkUZ5kIA`Jp1{XFgfG5R6c!QQ?=>vfTdw8->ByI+^IoH
zo&lS*-CXv+r{ot|n?LRPI5!-IzW1j=n<zDuUEcV-da|8#ifn!m2U@;gD4{alG~wRR
z7(me8-e7dwG~x9?|F}})N@a2pdi4*<+M2fY_2M{KqYE=UBp$WymgekTd?u?eaO@=O
z5=S1=HwPiJZad|`ztAa)I<*H-TrpPbW4mh7b>t1Ca~bEFy^<S%xpx1IqO6Z<bxjwE
zJdwJ~rrEj@?YA*qf$JIjzH4tu&%F%|i{Y;fyk?>n@(L~v__2Kb=l9Pb#YG6y90%{k
zT9JZ%j0gRTQTyp_SKhrz?Z}@l2+Ek|7#i`E(0gB@g+wN*l=~^6TiLl?y>&4^`mQ|?
z&r62)9v46%NBm#3a%t^{o}E{}u+m+^t6oN^XdP8nXra?7n9e-Vpw(WCX(tZ)WhPeY
zbSU-_&5iBK1*QJoTJ9(nX96*_m2E&!BanUM&5LXmH?c=yvbZOl&t~<M_k6jZJsSQt
zO_W#TG!h6K6HtB6$FI2*OLrfW&n<=5O?4xrko7WinSLJ7bq`3{WQ=APRIN08of<fj
zM$YDU(q=6tht47_V?Y;BHGl0i$00qTJ8`Mmvp5=Wv{B8uJO;J>#Z#<(!XWm}sDs@L
z%VSS!^(+I7Sa}J&jwmcC2&B>dE#}*CN``P;Pp}4bv(|yA)k^0uwr+ylJDz<ki2#5A
zdlX0)K*qS^Dt6~}I_OY=jJ2Hc5)>j>k;T4zQ?gu(Af&Kf2%_s%ulN?rOR-)^roF>V
z)FN2tMGGXJW|>dRg8zfUbp(ulP8`qVw$;H!6o;fXF)MkN+1Lifa_TQ0X}&MgkkZRs
z;1qA<v{9kuOE7q&Gr}XYQSszhOjw%F9HkNksyi3ykv69kAk0?e4($m)5Klned&fS|
zX3U2!0vmgVR9%e`WB!Vui9!48{en_6XHQySeTt!Ey97rh5TZv>L>FI4A@JRBq029m
z(O3p05&Oi<_3#P<#*@tIY%l&=I0{j4j26>kN-&KqW+|yW5CJ&Zj_$ESWfW1YdS5Z>
z<=zZ+*WYp<)e%sazcU&t>tdup#U#6;r1iz5lvHfzvSl_IOTLXbU!^I9Vs|Kmx9KF0
zIvK6gP&5N(sL%H*4q~@M4)~}@)EqE!{93cY@@%Ki>WX%Z=sAl0HL>>I_*)uBh0@ow
z&ugX6>nIMLi=ivV{TdKCNBnFF&kxM8#%U_VG91-a>05hG<bdYe!A5VVC?=Mxm}+tQ
z{hND#(Bh#q06P%6m4c}zKIBn?SzcC|r3{(RjFyXc(k47&Vb_;LF)XrsSpWbzod>@^
zS)8jhGP*zawpL9GF4KXb*LIVC>@jumw19OzV{Vc}y|_&gI18|+b!^8}XjjZ=G!(6-
zAXFPT<opnVtd<bgYOqYz3?cfMSgTG0Y4}xuWBhbrtlLro$~Z)TTo<p+w3M1{G7C2Q
zBRIgg0nunnFJ_fRF&E3kRDS&`DDlfcZJICHGs1#DB@(UBlbRfTvhOSZ3tV|Kh*`^!
zGy3Po_!W-gynAKlB_|R4lG@Z5`K-o>+xg~N|H(xDKZG=a1l9rn6RZ)Dfm=gj4xaxz
zERp3$#q3N<x&9YeO9%^6Uqh&eZ|cR8!V)*#z{5I4i_#&crbn9&dn>!<xXyt14R-lZ
zi3x#F$Q|(;KO5jJa(&4{xe-|U#!xWnCT_>bC$DOrDVCd%INls;(Ej2^LJA$%^A*57
zBdhOBOL@1x?V%U8eH;8fbO;FyY%|z(5$&Bb1uAS7Tn9G@yFtpgHb(hJhr!S@_DGk$
zC7+(?(0sSCN^X92Oq@}keV;Rw=SI83-ZGW-ld2%0<bwsDmJIC#MJc}(wq@D<KX;Yb
zOT|uj^!++l<Rxn#wt_ziaV^cvSlqmnoW~l$KlT9#?w_{oI+E6JKMpJ3qWifJu;BST
z%dhnG{K;<Yzq~>p`@jzKtsnqEWwRPd`4=|Gbye$|WAs<P%?vim-q?>^nhK3;583xZ
z%+wYcvsdHziqX#TM6pd14yKV%$4Bs<>}=Gx45rwq&$y|?M_2YLVUO?s7RAAB&ew}m
zpIn>{!hA62u_W2rXp(B`w`Cz{`N5h67PPtG<|t!qD<G@us~z{$qh@YG0XGslD&;T)
z^vHAg!xEdew;DeHn+h1NFu&>x6a|EhXjeXr<rzssfmI_aw0zX;DoefjT)iU5`U9wn
z!U9GzE9_s|So+Vv2MV7fi!NVyKd?s!vcI(J>05r;Lm4PhW7N|}8_M&6Bd_`x*a<b5
zVC2fJw`c$UqCrY_Uoy9epG&NnNA?zvPvQsWyr$(q<P_@I^HWaT`IJ>nJG0cRi6Wd#
zED!mmm&VL|%fX_SfTMj<`<mX&ZbId9X^4uQwB9nhRE;_UDF=$`$8RY(Rz7jCw0@Qu
zfbz|nq_KSYAv*-ioSDdw;HkW+c(*t&5E)e(A@aaC+>9<<F79RE`0d}Z7`UBEiTOM3
zW^!RJ<xgxIF_<t*dKF8@gl8$H`o?A`9GWPWo_t<$*1lO+RGMU?^7Vtai+*W}{Q*y#
zF1Kv!jTBV!KorIY(094+-Vmyzp#OroBi>?-p$tvEuCxtS*$8`50t#hL=;<#{;i8xc
z`Fwc4klop-;LgCYLZ|b`o_Ng;t<6#a<NG^BulrTXYBWvvPiqYJ74(<s0Y`STL=gYW
z_eXaP7X;OR*AFb$r1ah5-w<tRx%i7zo#a}$^{r}@s2|smH2P=fL(td}*HrSe_G|YS
z;LDS{HxrvO5<d-6xrtC0q)gjWRM&NX)mh4GK~RToeOA5DaX<=cYmrMIaD#nvuMJFf
zdu$E501r6qUX<7UaUG9vpou1L_NOaQeGCusPwMTCq=Sam!XI_MyYu~1?-ZXa{a6pf
zRL+USepxB~Ht@RHWx{)%{jz8dZS1@3@NDg;%@e`*xMt)Nc3uTfiXao6&AGkXG$kGw
z#kaQMx!XXhYh8MXu<2g7WMe9*m^S=&8ztwb?ij_ElVqX*xNAdYQ0gU<c{nXx^*xYJ
z)>oU>r?Z5MS82#6rAUk9CG!Ur59!K%<%Tvw*-+Haw33ba>;v_G^|8e2%l4%+V=exW
z5<Q9RPo7j<T+9B`$@>0dCdnH&ikvSDGJ%EMA_4k>eitv-63KaXF>zW)(`btx0tu|$
z6)IAx#ZQvhlOYt6cBF9P6&Y+DQ!b>M$rAWq@>(rki@<+ch=00-Vkqr4iDK^oTX+&w
zq!lhF<i!NJHwy7gS=|I-1t7rKpNzQ%L4}N8hoB4b+9D)*&BzC-4xj$yKi6T627<Bw
zz}k)|B6E75==qj4!wG}}Xq8@RM(f6>#Qn9beJ141t(ulXfnw}s&IXACWK!d?i8$JN
zj)cWLL_#4%u{_49m&xuVe?MR!o`mRyN(O+jJ{wgx&%_1rYq>=fIHtU{K4G<!>roUW
zEM5~cP%@kd9dm|Y5opu-rFauiX>HL^AAfQO5k)Mnz5hqT`HD7Cgb~Lq;O!pXG@BsG
zSX_1QtWQL}$96oen577$E2gOpnSi)9<{nB2vv*QxRO%q?rkU%$n^KHkhBnt0XvIkR
zXw%pMv{<{Fsr|#2?Ewc_t>wpPkq=7*<O+@fun^5zj=TZT{=xV9xL=D<M6Cf}h2|gk
ziCGmf05ZZ5KUHo=%t)l1E|Ia8I7tY*j>O?W0-Z}zsiAjzXyO4Soge9H#moa3i2uM^
zwC>K?$%ku0^UT<_Xz9qJ8^5zW*;vJTS;%M;u9fz}$WE$t5^k;h$RT4H?I&n|LB#Bq
z^U=*oO@;i>`ozgb@)=}M(C<s}*RnTQ%4I0E@sf=EG!#SS>@k5)Mr{P!Lb<mM4J3=}
z2;6t)YKey+XJi1_NF4}HQqe!7mvgsyuESvWr^dRc1N@j`u8Nu9kB5GLD(jsoP-x<b
z&yfYDDSJ4RK%R2h#^ZDF5Pl@0g`maqhv}(W6bScz()f9^P$Z;jgi6;_Kc}1Gk@KlR
zd{a;e3F9@eJs`cx3iN^jL698>sa|kH$_X#-)>U&K0z4INo<IyRWXvhVA&G9j<PdcO
zgxi2uG_!%2!xI9(O%EjFp+2Jn)Xp~AJ%4ZG`o4;r{W7naQuA~JCLoBWB6!Q{{}-_T
zQ{iF%6W9y81GgyB1U&vXu-6xB6KLe!oBq>+;E_#>)@OE94FqfdYe5LKht3p;1#a%x
zAqi5X76drbLCxET2o}<dwL`{|9P;XOW;?j^aIvsaAJvj^sZpahw~cfzxAS0{m_^os
zO`hDrblJ*?((b6YYclu#KvHgl%fOJOQEPqm$GpQA0jX6lk8U_Sz3}=yYO`f8<y4X;
z1k*Q?55Kak_~D?zK~d`3)<)9Iz~I(co`q3g_EF9g<!f;RxNjwkQ%`6DUVIoob5-?m
zTIi$FYHunj$>G@B?y?u`l<SRy{RsH&b`3E4v?A1T8|zb4+PrY78X@<??l3RlwPfQK
zGEya=W>WPI_tu@$q`B7p#;qjNjbmTgIL!8X>kZTu-RXj0U_w-y-Xh2O6Tc%-6}W6`
zUr#^mQ-*aR;i7Dl9$`PkG$_>vNLhzj2ZPq{BO1!MutM~$&X0JO*j&616D>ZOa`>rC
zm?7oDW*?)f+bI2PSfBE!Vw_dD;#yUoFQIS*CTjkM`udpbG`6I>P;k?rTUuZ+6DHHS
zTq*9U)=0gspifIX2e)<i;2P{<iid~G6^$TWE)RSwB~QrTMXSPSnBgJ^G4^@o%`1x0
zl=npO)LzBu-Z9f2Q^TR675UO<+5S(d*u8F+<#1-hip(!zgB&E`Zc}C3T$03A-#bVW
z?#Du6HAZew2Or}3i+;zK%$+l+FIsUg8+w%XeM}||bN75kA<LW1Qc@(1S_#RNBy~EF
zl5Oj0@(hR%ZV@%GX{YZ}ol;3{g+2k<q03$8_>47}_Q|cBE&%m!Gb98?>j<}z399N9
z=d(-}q%eANz?y)?p6J7V4e6CPIO5_4dBa!&*##;<g~MCKM!ylhOaXm%L5mhj?#-b*
znenPjWoow8Auj!R9<m$XP`6?CX#m45PnxEA&Q?xM%|=hw-dSD^Cb?&u@gZ=W51;wG
zbeTZq%yvY)+!FJ}CbmGgwTn`PGk{K6Thr`PK<T|>uk+yt#$n^IC-xXJf%TXt0(71q
z`FaO9f+xn>bTr%FH>_sf)MPSTIOe>Vx@C8CM|(1j>DFvjfuoA*RLzkaZ)wLw7&6>b
z)t9|`Ir$FqVN>hfFR!^yrv)gkjZ)c_vt61W*F7~-VqaAFa@S%Q8a>qR=w#t|Lf0G}
z5WM`3>fpy3n`QE2;gR<P7w?xpm>?<A;f{(s6;~%yb4Lwp>wbRXQuX`JeQq;g546On
z9$R)lP$Z86_n9nk5-IhyWFkRkeIH$8V$`rwk(x}F<Xhjg|6G)E#Jy#dt&t%et9Jpl
z!z>BC2r4$+I*~2^aV}R3t?Up`Kp7pseR(iQr8IWsK?Rh*me^KWg=Q40mndBJ4(ocd
z)`Ps{i+xhAt-CDcdi59o1gvwX<H~n!qv?}0LE>rO=R+e=$2|wGs2_S#6Y)vl-x~^Y
zS^W8D@mkTHD{O;34U#-bq5SQJ2#&$4It@h(M|Ia!S>n-6)sG(ooOMd?kq>R;3mVZn
zle7C|D#z^5`O+|vPnh>pA?gJ5G0!&#8=}+XRGA;Nrn!wsy36YN0CmY^dH>2aB!}$i
zJvslm@f#p>^(J#F&4SGEZ%klHL7towD2iIesmIt)oJ`>c8H(3&9TNC@gLNZiQ7vUx
z@>yXCQ-lnPY1@E;pOg!VkX_??0JtPX{7eVw74{0w2J~bkp<xk~pGmTp5Ua!o(ptb>
zY#fuzC<G(uZjdo$twDmr6Z5SM;{+2^2(fk>rK00H3<V8cw0{a|vk(L=Nnm8U5)Q0e
ziSK{uW$539hM8;iLP5BABfuK-lWq!O(JAtHeloV!9iTu)f@PEwDdzXYLx~9R?f$Ph
zRzK}n6SQVa#Y3Q;s{~O9AGIpuTXqg}yy!%ZRhe@yv#}PQr7vpGwOj{|aYC8N){`uB
z{_0ptN&&o9rJ;;7v{2?H4v4TVb8iM!cP9e!ua5;|$-u@KZya(RY#zP=eRfO`F$5-<
z63AG}WTd>W;_PNFIltEsFl4L=?eqW^3&fklS)S>-IE@t`tr3D?PPY&KXQww6#%Ng<
zLGGGbvrb82#Ia0^Rd#p?8sQfJtfR(PYSWF0UD1yO(%v}+g0pOnM6lX4_Z85*=i{|n
zUvJS^h8IIEDZgAH^f)>JBD<J3DAD2TkD|_d*?K?&GUo`JiyVH|AH*RIIoe3NeHYfh
z8VA=Ye?Ou|da;R`)F*HSejt$x@KYOUQO(qZ&nV@NspgX;O4Osz#rV#g$v&aiTpu*@
z*Z@1LQ{f`!*boY`CJ&)%v2ve;IBYwj<h;+<WsgGS`1@#A*ZZ6CYZNF4XDA{6r9S<m
zIbp`4I#y-Ap$-4*lmZHTuL-LZ0LUm)=Rh#2!7(T&Hjom8#eu_=_VqI&rmK6MD_9!Y
zCDTmJGw>9(Mk;t7eioUeA!i6z$d0bWG=;F|DHu=g_545{tby(U{VfG)r8CBeVNtMb
zZK8XI(r_w~9QK(&A&B4y8_-g~*0iYA-+R?-ON^@@Mh@196z?Y<PcOb3pvS9_)s2na
zUZT1oxXrxn>mz|Re-T>H#+7!YMEO5r?%@9e#Cz~t%vn%O@=DMDU1ijb-{Mqq-8nA)
zS7pSHVdx7C!UYu*NFcsNQs*@qLFjLG$RzF?q?o&rTT!hK&n)oiU|X*MlOEw3CPm-i
zYML(OOg`>uQfAW#bDjVEzQJ`QLkJmKMT^X{n{gI;a%9KhJeU#iN%%c`wzt^a-qvsn
zCwp69*L@?sL>kA;yU<q2uyG@}v+4f8)CMijL`j1DP02VqS}yb;it7<{FixO~s92S?
zkaB}V>2b_}S5TQrXUH9(ljHbZr?+9XF4Xz=`%O9t#IPURs~uyo^*+q)rxx^zgIf#s
zKki5?M8Kt;U!-52-e>LHi#_7ecq8-+`6NILDX#pi9f$O^->l>P`)8zWbbhLpGa_<R
z1n%W<PF?7?1<uaCe?C0LjlYrT!W~UtV7;nx5fU~UW^Y4;#d5r-hQ|Fo{K3b?=csZs
zK4R6i&{V=tc05L=CNz)TS&NPOk@D;866d+89_}QcU?!rO&iAZI_&adeBe<d5;->dC
zJ=JyCLz|GTJBR8)lwS55>swFGnrFtcHT`hgM9(A2nH=Bq@PXXt{m)_8X-^<m(rq7-
zv+lq56oy^$KM|2_U19d*AZ&YiS`mMWZ9R5!ZA?x{KPojtF4?H@k*stJ&kD@?>|*iT
z`t^Rr_M*!i7L$XNp1lM|r=aqe&%H@*f``MN_{+4fd*(X19W(lM3Iiup43ni6FY6`w
zI+Gh(ADrvRPX`)GVd#o<Ce$NkxMt(h1CWQUeCx9&2>Uu&oY!8&=#kMgJ?1a3Y%E)j
zx_H_phF>4zdJ^5flli3HpXnRr0qi(y50--^e9ZLdq2#h&)?)!L`uTBVu?g@;+5yps
zI7@mifoH$gS%Xt_0!Gw^>-!%h4gkNYSxt|nrTUIcnY`1Cwx{QIfy7|o&OSXs{km7t
z6I0e}ubSAc)sq5eSu#sOuz-WG5aOqRKTP)TB9r80m8jdB7n$nkJQh|L;lsxM&sMY8
z>gq3Bn^w`EHOy|_)1g+G#qVdCD8|rG0v?446sWi)jSjf2#T#vMR!xCBZfys=gRW%V
zzenFO>kz{S2VC7?+Fh=YpPk^<_x@dCdpS^JkKC(H-bM26OLxBhO4YCSe4$TVU0a6f
zi%R+MXP&!Ax_uAz@}=z8ue1*R7a9(IJk656FMrGmsM?Ttqw!<E4yW-jBlXQa9aE(d
zUfB516?Lpux=-ETj%Wk!0d&Js<?Z`2I4a<~GI=$)|6L)k*d=xqZooi|V9;Az_*Zi1
z@RqN4;@#+suNK@4y>tXe#v-4I?pWqtldvt&dX$Y|F<Zfra=}yFBdn*=>FK(aQdJS%
zrM$;8H;xIt>}UkGMWC$3_Bi&H{kwL=5A>vBZD^|#6XFF(ZI3C@%(o5B(C*I5k|`Fy
zzwgMNKpi=;zAibO%Co}EA1A|J8{`IXghz|0{Ef$59h8WEF0C{4r#YJ5A>1Z7c1{X4
zBI`{mIPp__EwNT?NZGO=2*!P*l5+TzmfTdV_o8H21wr=UBG>L7p@Q+3Xo}wlSsOiA
z1N}-<AQ2WlJQ*q`Y!h33ljNTRmT+mg>UEyGk#Qur#=2{wXk160!ba9elDs&lHZn$K
zh)>$#g9LZOWHi%&7X?2%5Ts>V6!zdanm;2QbgV#fyb&}64Rb3woj9Jg_Dc%k^K`Ir
zHj3c`DO^FI?Cp*{ps<cfp>7L?z3eR%RiIpGnGW1Srk6R#DLy%3mQjQKsrHla(XymK
zo9ECIn!RQ!Ugw5xhL=Q-1SH?r1eQJNCYZ<ufXx_jW=TJzIKn$D3heNVB-PLK)t^2d
zB3S-`V5T=cl^`kutWaC+6Q<~m$7bVbX<5jG)QZ5>-XRrPp1sV;#|i0PuG~5=*zKpa
zV+0xbE3y~RB2AD;lk-gGI1HK})&nx}@2!1scuOQSVOG?#N3c23$JI!|Hhxx)yUqp=
zl}xqhi`Zx^q@)x@F(r_Yho$h3q?qZxH8yw_Y$6)(>OnJaa(Rej2x}u}D(JR-6S2Y4
zQb#G}$;Wt|eV%84u0|`iD&cxzFSNS34@yk2SG1Y$xLa4_?Kir5MApr=Mj0E<%k1bh
zsl$e@#G4>bz~*%*-k$)id(>n%&zVbK|J3Ri9eOb%&bZEY%2<1)kB0X61=TB%i3}jT
zXN~GxF@jF-_)2gT#Tm(KFF;U=9|tf*FSH_lw8d%qoH+b-cJAmnr;LXBtc@MuMcQ>y
ze4z<A%SryWUhbnLU%r&Jz4L+RGL)j{5;yif^mKS(qC;DkSA~DY@=-5hbf$syf+~C!
z5r-6FCb$IYTxY}$7$zII=4KJ@JXF(6!IHvAA3WKzmPP;K&}iC>Qp8`OmZ*E6%eZfc
zA~gc0&55W5#bz5)xDhQX4mcf|PdGSb_EJPiBuD4nE#8GE)dlK@L74MHgFBKuz3;iI
z%><T4Pw3is>QgrCJu1xv>lNZvre(d&X)5_RdK8$HMkZtIN{c!}V^r3F^QIpp_yD4D
zl3%rA!g|yBw=+Jg-;82*<;uN*ubGg0dXZp_K{1njc{Bn@<4>FG4DI^fnlhfc*G&%S
zS#d+Z2+Ff-$knFl{f_1c98Fk@_(<7P`kK5?Kl)&M?SkrQx2?dl%JQ-I`4G^5=jZ?C
z7ychhZz2Cju&y0NVgmw$DgT2F5WzqNmW9dn{(plFP1z}YuhNfU?(l!X`lHPy<GbZU
zsU^1#tw_OokVt#*>;fZ~VIpmFDA!%_8xg78O)7n92F6)LI)zdml2>n-;?V9BO62|*
ztT(s>jMTXdH6OlzHrEYDzE9#(b{kAgeV&zEz5GUf-CHaa`}D>u-3wvLv2a0$E6!y(
zBuP)x^!fqUt-t5&dn>PqOXr?cw@<cbpuBIQ3>D~er^=T3bd8bxZ*Ms+)bViY2g|=~
zyhU?mD21D26)pBCltTTR|9byu-XdM$4+j1E{HwL&$2T}uTkQrf+s&!P-oIS-k`K0a
zH_AJHHMnfMAp6HZ@aVYh8jr{CJVJJn$49SoU_(FG@BaG!ho@f3Ywm|yjl5>9qN1_Z
zXBtGiD376f`xtv4*$Gs(R~rtQ>cdBUFQdOFhDZnT<aaqtY^3Cpv^w7)a!gcmm@7Z1
zT!YD;?FhKgJQo$@Lf5z~r6$EN4W!v)gLpD>gaijOh)n~fnb({bvwQ;E=7r2|s_hnL
z+Yb-8!aeBwT@jy$hlDM*nsnlx4$E>F<cD>zT5vm@&VPnIboTTp)VR|>#u`f>LX#ej
z(B&<0=sz7}^*zvAgk_2+?AjkHMC+FJS%&LX;*$3)ROdV|qa1D(lI`u8JNBzd@DoLe
zaY(%ndltx!qxbt9ZVTc*PKQo!`(FYqhho`2QFQtk(Ylcj)ci5NYaBT*zP0-DdB=8t
zYjvyy^Mj94cck0)j{11Q`%d?AU<uV6S3EYy$cKWD4et6y|9U+;Awd0gkm~h&*ceBG
z^4EE)*Vbxd=EyX(<Ofg1y;~<}Fz)4vX;<LNR57nhMoN+j^PB|iO@7{2T}YCE&^^Gb
z1B`*_J9ugqu5O_U_N{FHn~qR?W5wjBsj1_smQ;&vDa-lg(;3sN=wy{3{~qUu;x#4Z
zgSG;e$$CLrIV)1Q$E!h$kW80BR>|^+7<Sp$`OT<9mCItK7~ER5&m#3n*X~^%T?wIg
z5ns3)<Wn}V*r%VIblcGP206W&-0+KSpY9lbqI{7q;C-<rE<zY@Dad+SIYk((rrRHG
z*Hqaf@6R6(D3H6*?{8ycQAQr}{6rqK{a7B%V3Av-w(_8^4ME<=4lTpnUPXs|J^YQ+
zaPz5b*4UFR6U}(y>Z((BO0BCRckbxasQHVsIcWUX4@dc{f5_*Em%VdsESGy80<s6&
zjzB^CWA+A-J~3*V3BU({A}U$6jaswo1Sl`4q7AT_wDIlkrIMZ1PHy_-&z=|!3TjAU
z_9nH^R9LB)0w~oEZb!<-<4Pl%3s2T-;7gOJ(#xN3DY-tRJiN)LQz2M}P?ed8xmk4M
zQE8t5<wkON;|o)Ebf5O$v#Ri|Vh0mVYo%Dz$9JB0=sp$fk!Ks&WWsqym(F^s@iHGJ
z*{8U0j-o}7SxnBKjjnUG_^WT;kQScl{>TCG>K_Gq>8?MYc;OePBTbCD+N{RG(I^~e
z6d`8&0J~_&X`Wnm$ID5sWNm8$rmX9;erv`Mq0<Lz)f<oyZWu0;z=5fJO3_LpUV~~&
z;R>-C<UDnxX$Dq4^c7$cL#@BALK&Doz%$U@JtKxEp?jR4F$$ywz>`;#6K?1H6YO}N
zKBhurss#A4sXou^b_!vI^#WrM8RG~4OB%=ib-Jq{GR<5-<X!3Y+z3;BFXAv$n71hR
z8V1iqq5uhYv&?sPfR^Fci+D47m<b4i5WsXwSmr<CU8S!mJ!&CPZ%bv6CFPiq;7A62
z(kAv#yg&E~`Hy&)iJHmVW-0wIHjtcjzE#Fhd`%)j%gl(86`H$1&_bJmREK)l2P#vT
zN!at!{p>|{9U7X2h_Q7Q&9F`2K|eszi>UN4E3cwO)PRvMk62j}L1cO2J)o|mv%0%o
zFYJ<>h_6CQW1^Vt)f}TJgl*>P3J@_OnkV3E;l(V4AYD-{Gst*gR^!t%w6lcwylui^
zb6wXAk9%hv@^Xb#`D@#C)QE!EFW_NKj6`uu+~f5cvbJu@$lDeHkhzUnX2bky&hjd%
zr~@Ft3(y@>LBqZj@uUTCOeqt2!g!O&#(p|U$pq1!HM!Tz(li-&?@T-8f>dj7(drjb
zGqc)ZUT1s5UVDd<)QZa+V0(qB!?TS&BIC+Nfc0dq1Q$ILs$+v5De8(J^{nHEtPybf
z6OGe)h-GEV8$(Ke0<R>aW1c(Pzrh9xxm9FjPwydRzlizz@<(Co_7+A^VAQ_7Wg`BK
zS2G1GF(p<3+d91B`!%wrY9#uV5_cT}1r1&Tn`Uo}-lU2YqhNNIeQb-Ukm(oIs<%Tm
zWv#;NL!MVXaJEimzFrGZIne#y5pTp;3`R4?7;btiVKYm)tFw0yM|pF4x0?AAg!-6^
z5i~|%8Hc^ex6Iz&b5(dkCF^}M=7pijTm8PnKVEt2UM+qv1TTUl230;*lcUjK#DU5C
zJ#@4Er|sXO(i=3*^+xy}sgpGS=ilz=F)xCsX%SOHq`nE~UR_%mInPF9zl0+Yj3Kds
zRi&X;N;0rbBzb{}IVST*GxN_)Fb`m;%!h|RH>DjJ5v$PTuxP$ldG^bt<dn$3O)=*N
z{1tSKuBFG~DQC>G2;%>h3ef$h7=MpM1^oYqyPx9J9!-kv1+D+v-QRdia`)~2%{b~m
z-dv0KnLjEVcubmc%)r#ZkzQljs@_&LtHYTpK@SAy{qp#g`V#p%=R|UiW7N{c^DAX?
zpQ#L{8E6a$PqdZ%bN7dWmh&n{G7NMFsO*rzG_5b@4AMAqD_38I7_~^6@P>M()~x2H
ziEyj>Zwbr0KUp<?O;rAwsYC12;L^8{(UYrGzm=fA@89lrknQ0dun;brm%6?+!#FC|
z!TcFkKMz3N8jh`(+go;6)^IuesaUUsd%!N0)Tsg}7_V!J{gXTX_*U~7r+$*dXyAfx
zo;A|@?$52gfQ|D@s#U$w(2Wn*S~JSGlEObWR6RW?&<yyO4xB!avR}Hld&c)ZP3fX*
z@FOXwhvs_$A>lWpe)XTkGAI<yBW^;vEcC8_GQjYK0URr{PzNjvI5<nMOA%2Zt%NV`
zxIao)UYuv3*b36#SQJ$_jQNzH?4BvWgF9yDfsM@$Ii=W(%_j5Wns94U0l{Ya$bG`l
zrn$-AdR?wxX7RexhJNiZ)gu72TejSLv+}3L<ri*nVmEA%HN@3`H&5+^Wf=DJ0=5hb
z0t}1T&yXlcekiluQd->h?1W<Ek+Nq|te!U$3gi|{mDoosK3J0Tpzk?qXY=fNs=d!t
zT2h(SWw5tbRWiwzsL^A)rQhh@M^$Orkh)*cX)-IJCirK>y-K~u&Idi1CK&giW)<aD
zWv%+_KsR4n&^L9%jDxxb;U{sc^|@hZf8a*T2)&nG&c10aq*{lXZT?1Qzv3}U{L3p4
zxXDy4#AbfE*&1zr^k$OkULNXLrevk32q!z<OY1@v5O@*W64SfnPHScaw~##GWq*Wa
zQr*q?ozvWl$G#k4s6UXC?eU8W98zHGYmlds7da3R`#~B(o})OlAJgSJWE`Cj2YS1z
z1$|bq*hFibwcU3LQ8e026i}>kQ~0B3y;;}j5@(Mc_%<h4Lw9rePr;3#rNB4u^_Jb2
zFAoJX)Q@g_U-obPHaLs_v0uA7$dP;wb4t`{n^FH`wi$l+Zov4aNh@^dN%rj`)#r(9
zBu*3Q%%it?M{1(&+4EmidE2{VW9Oyy-v+4|=V$$XEh@zTS?(2|P272>RL`VG@Exmk
zeN{WkN5voVow)g}?5e_G$7{1Yl=97PSFdodd{dU@?nt<u(#MpH9K`R@tTgn5@H11h
zeeu{+o_^ouaS3xq<o~oYr@PYjS@Cii)$)K^xipC`Dgg8rHa6AT*iWZ7`!-Wh{Ja^S
zxgOY8_+e57q+D<cBt%p#op=tU>*uZ0z1Hsg&@s?W-$Kr4AIV1srS6*UF^QY~grmEx
zeV@Kyb_MUp<@|XFD0%`nQ&<4q{7sR@NP;KDYqKl&$}#<+DUZ!IfzkIeJezVvooSdN
zqG7*v2IStaLk?%~@-GK;0XwmZvb}hLQkz;Ozegel@{_51$CVMg&*L~;Y=LjTH6WQ9
zQub9!_tEc}tGo0O$>OB|F@5^qfJZbX;-%fc+p1_@585%`V~wz4uS%>dVUQu~mLe!7
zQ5u`OHAV>45!|v8UzP~&zI8Lg@Tc7=^fPP<)>lQ7F{JsJJCYlbJOXWF8|+)7s0g~6
zuZVc-wvF6bTCg2YsF5)#lwY6JVM**Z;l~tFTRV-7_W8%~V4^_hf@H{w!b@t~!na|?
zl0yGAU$|aqOq%CQ@sHWPe*3mYHVXAGw%<vIYz&RU9$o)8Usz~NitTj`K`764$XO#V
zSAr8Ljm)W(Rfsf=SCo=;1mQ^%C4-_Oz}RyZ=+iX{_6JT9N}}2<nq36*D}2)gPCoF)
z@oI}&0)+^%sX}v=V5O`L7Hb+{A0T=8GWDeSMpV_+eu7ZW8U=rMZy2?g5MW{oB*;is
zmAgZ*0#3%85b)4PGCKUs0U*QEv%34^RoCV_2x4Q!<y#ALYOb=B!i>EYac2Z~WupXh
z!bamWM2!1Y5-T9Qhu*(lC6p~QgXwtR?7tN4^8-OCb}7@toPE|O=F?+uH;F>LFpbf`
zS3y2PN?R+n4xMvNmo+Na+b6HT4gQW`x@j}d)(y}@98D4mLVKa5m>A6g8o+&!>#H5z
zFL%z&82(lB9{p#^kJiayp3QwDMk~DM=3(G`=b9U(<`JnCUuun@aIBRwJ^5dao%LIk
zf4}XA?qR5*VTNvLX<_JY7*blgK}1o898y3UL>i==p-ZK^J4As21px&WL`B)R-)HaZ
z*=L_~u514R_s{qGthHY2on*w5(-qnIB24D|%|>`$!#ho)fcw8TsWuU?SLdPu=4Wq>
zJQ0mkIWZy6gdxzcUhJnC@$|XTo4w&N^flt~f}m0y>RbM(B5^ZdbvRYa{%uzH@l5Nc
zha5PQF|G)?{IaDP5^5vU2^4YDF8~JuZ~fT5BdhNUnMR~M&+B;yc*I>6b=WKGX|!|%
z{2zU!Sq5{FSza2;7g(f+)~Zu$I1RwG^bXP*a>zbqYbwOp2nFvALv)b?MuHo0$vjKt
zTArFZ3wU$N0c7y^bA$971+z)pPoA325%4UiWnJeV!G?H9Wd6zO5wMKtA@yJ?ioiHR
zebFQlK~4Hq+)^d`G*6Oh_DKfN@L20(tLjZQ)U=G;SZmm$ds$1>ZtC&NgkP<)!;t&a
zBjpgcK_~#d{QJlLe?8@4Ku3Nu({608^xqo=C`u{W-ha}0#_`73)h0K^IF@nLUt@nL
z_@r6c%%aAqB}?FD_nUD#p04oK-607NN}FPpb=r@|<E>sp@$1JB#oUIE60*s99-$L$
z!fL^%CF5mv1~!&%zSV8&iT?Gg9m<RgW$HOa9sc8~__mwjf^S}{jE_63x3S*r9a=kf
zKZ0UhA5-j)71+wGW96KGc5;tK?BHhE7@NtY&eyOmfBScf=^=V^7~XbP#t&1XE}b;P
z&Mn<xdO6rwRt>qI;h(nbt}Y*n_I9bp4+qgLPWR0AzUzBRp)HrF&vv-<BKx}QkKO%{
z;Qek2P3frb4C0eAh3`rpPJYMv4n4V}!YON@d4XcoX8q~BdfWGU{qF4u9CHNB^VV|E
zaO4Tin$tb%7%L-o^5)U)hhN+;nIdNmBHoD-_?wP+FeQ_&8P7Tearz^kk8wQ`8z8Dk
zU7#_?C4=ABx++SHI2S(-T0>W41Y`=}GH<;-V0_}xF9*-o&7HSZJTlO&GQTAsQWa|Y
zr}a&qx|tMTnoV<yNuFc*_@c2li*B{IduLZ=aZsB%f3DESF8*g?^Id#dC}Q1%;uAR>
zd#P#@!JXoim{udjjfk5SkA>R#ywEwHzL{71`6kyC?HhcwsV#Sz(0s03cl~}?hF?%a
zu%0n$1DFKBIDdq;=y@MCcAD`VR;@+v_%!r6F79LMp}$P?K-o{ynqSWdW!0_gZqC;o
zXJZ>en-zX}xxP&pnN!^LxIbM~TFttEIKdnVXHN?W1!z*X4T70*3v0d)hCcn8LwpAH
zdS$iHvNiTY;cxsO=gUm_bwJEpA&P5eY+8e&9t{%mJH*G_tT1Y!Fa5X9gGL4f7FoVb
z7`ORyW^*n^N=!rBzK%|E)TD~dHvcYz6>vUQ7oS(gVKw{mQD@-1H<HRUic!92BdH9p
z6<d~xv0t^B#IIp36HWxGGfIy$*nOBh6jOsX2;07nz1&ME8(u$KxOVu?jrU$O^xZ>(
zIU^Vfx1u!@%;=vndx5Qy!`)_d1aJH4l(6hG&Klm8-&Rt_?ZT{2BGeX@8s!fqiyn4w
zlqTYZO$FaCk&WC-9gzWg3g*9#X7^`^bCJ>n)So<KMe~`4ZY{F3QomNw-bFa{8{qnG
z@xxbjSMZ|xHLiyY7~h0QjDFu<q_mP!;<%&obN<)MS5O3dM?Bl<X43;;!NT;>{qK{y
zU-A<;9Q~#r+L;u-+Oq~brs?0b)7k#)*nv$lB%$dwoy0$E&$x$#(4aF0p<ELEi}MuJ
z)I-})e3UZzpCL~QGdLNEr%a?F&i(SII$8aGXSfbmAZ#cVe0?)Q@mpmRmD@mSF1l9f
zTdI0Ez3U*Yu(e$97+B92G$@yk)R50eFp$8Nf0<BA6n2`YSb(@vZJKI*@+xEgLEsp7
zkT-Nw?2g$|xsyWS8G#~vJpJ`}1=)8UZCNiA^SfW!Y>RNJpUm0<wzY$27^zkk$%RM!
z$playU0u}B<2#f#qcXjx!~zce;2-sS6SU<JkbNv=70)8{AiBq*pN!X=ySRVOoHn9w
z(9s@;&mOn{z8y?<Puh(^ejyVa)VEvkqEm}`AX?+i8y5<fu|XanlK)$~+dbvwJ!ce}
zDJ6iKqhn}RbOEX@n#vdRUUUTT`04-tzhh`>f&aI4s7LWLjYS0D1oK1i$n>3J5Cu*q
z0=7>mkU(l(sA`%liTBLZQ&VAXP|uTr0-lBjJbqPl7Ld*Z{kcUkG+Nxg3=$6;(!HZM
z&jYmIJ|5I+=M;dE5IC73W{5W-IjKCk9AXVoe>?;MsJ8N26J;;DlIS?DRzwY6j=4N2
z5UdO<gCK^7_>#Mmu)#4jYI3F!rg<`KE(7qYC)7+aiN|5ha&Fs_D%s*$=x4OiJrxjk
zQlv>nh`3OuAT{6!iM5e-nWkKE0g;8E)o;oN=0~*%TGF7T@^F0mzpr{N)(zvh-PST#
zyr=S6(n@nUCddz;7C$pR<eL4c{V3xJ3kZnh*-+%g!$Ws;Gyi$jk2wqrS@hdmGBI|w
ziZ_Xxsgawso&ziZV5`q<eyZE)c_%Z=!38wuL4Ay`Af%BMszk5JzXP~^yvlq1cRx3*
zVP6g71(kvv^hHqAUj%MMk|;TyT*fex8ewAYNkH(gcNvev0{B&e$Zg-?IDU4X#-9{9
zQ&DKVUJ(_7{{bRMNPgef_4s7#DVM}!IMwU(dOn36GIRRxopWhTq0cBB>>BVHu9h`?
z+wLcR26}nhTcV(W`lp{1?)4Hm>&Q#9t{4Im3i#LOc1-Py`7nc?k)H9A&J%JS;IVfO
z`&H!%QgCN1Ej6a6g=jb_zE2t0WfN4>!FDId@prB^x>JRg^3pEz9h*m6T$#xaWyZZ4
zIG?BQCj^N(C#i|R$(v0`%UhDGgDWc-%$N=hv<*KdnS?%lf?2K}FR%`-C4d@<59kQ%
z9VKh=^jt2*ihw4(g`N{C@8)YLeAPXEph8%oH8*H^voOo`yn9jHi$tyL$3MNiwS+iT
zi~}Svp(Uh@eos4u)-Nl+l0AV=NB1MtPuI&`cym|!8K!$#<34Ta2R&=|uCb&0c2-9I
zx_nxHUQRxm{H6cjVYvUVnePp};86f4_g@72-<6{M2{In-Pybc`nsFc|Z?}kl5$v=)
zB-C%a!v+2o?P~{rYLck0K(EV$vjWlnxVcl$9bYsvceoF@EnEmN^Sv|SieCRvav}gF
zcQVK8KGAkG7P>wGQLJw8ZFUFDeEpiRzY0KCp(x}SdDDL@Ta;Ui*seQ`YKQ!-ZZmUU
z_f~I^dH6B3z}q(jTr$-xo`)TuqD`r&Vo~bLR!?kYyW_j{_bK1Cl3Q`GYREirKY@1!
zPyjdUU%iL6e@0WT_Pd7fhx=;;wMX2mSvddvfjE}>L-3@W#NI?ua47iYeXaGW*0pPu
z61LQfgf}m5t*(hZC|>9^50Ln*@G|n<aZys@i(llw`!2S=v#66TV{iLEcQ7S30}=CW
z2T;N_N1;Km7#J&ZyKD;qRv~4*9};->HXiv6yK|QVp##<nK}+n0eL3%kB?_wC&0=#t
z1$Kj+?P*A3vz9NN6@7g(H$9a6X~;5OlIgA|`x3@Gc{_-ZArp;MblsR%PLO-j2E_Ti
z_Bh{lcW-3$z5V?1Omm}b1-?Yh0-t7G8+lw@Kk=i_enCV}OV{OX5P?RAACkk!g_z3j
zvvd=cqlCyAD^||CH1l2b>gW}ZHEg_s^Bj|^stwvgs8|Eoy11bstt~SOnOWD}^y(6t
zN$B|QA9HoJ&5@QzC7spP!g?6u^?EaLhZZ8t8FnX@U2r&3#93Hq)3ZfoX@2IOq0y#U
z9FSRPSIv0a8I`~N0=#PJIhaRTkpc@dT?^Gmw2E`+UxLu|Rw;1zg_Rcg)lZ{C0;&9T
zmQzpR_liyZq+h#FJB~WPjxk>yUZ_iv8DELYfBqKa*J2xJ-+<+arFZ6J>X{s&M(P$z
z4xV6&gS>5Z=tL)^?rPLei`{Jx=}@-T5QZ^55tgpjOp!KbK5^`{9W}gPX`&dB+FKC3
zYU0o~E907awBi^z`E`}P&0LGgFZHQC^Vi}BvKzEm^^uaRwz6%dz20k~n_G-IA!&@Z
z9oYEPsMiWNw@JDRsF?lS?0I&RZp?J5?JG64u_a2?%FX974nbs{JeB1$9wl;^B384-
z?tLg1YKT;edDqCzY7NT59Jk!CNSEdMxK>dp`Q1nnv~NK$lA;lhB{~DK-pD|V`<x}5
z&VCkCeotvxp}E(!wX5{)T+jH1Y}_5CR~MUf8aEW(Q$_wTSmnM}e))OmQO{$84o>zl
zpT$fz%qjZMj{u&%d~MG11lBi)O#!7o?~*lgB?fc)*)P7nO9o*@?)s^TGG{v}4)9ia
z<e=npeQqmO&-YWH@L0bkZ(m$3BeER06AH|B<Z>;a%Z3ss_CA5hU++aHaFC0}ZMy29
zFZTw$nNFh#<j3PukyJ2Z(@5bu0s`C|v4|Wl$z)3BzD8WBEZeb^j&zCrkUEEmhDan!
zT%y#FE8CvEn%!_Lmu_VcjLFX#Oopl;TvY33bWQ|gDR8P|YC7V*W$ELY>Rb#3nW2=Q
zY}aUFiwFybP*v17$DCt%8j;VWWv2yy4o;^JVTe?hlb(2{n5=B$s<g4xxwlj1IGZfl
zXAEPrsq~!1HfhkJt!g<P+r>Q5^5g5M`q<?z%}fqF12CDCd$iyvsgF*o*Gx)*b%QMe
zUml}#aHx|YHLv9;W5lLF$9r1oOxAHha<{aShtJDH#Qn)NKpXEq!(_zq4CQo)ZmZEY
z=^O|<!6~v@-xprEkSaJz#0xO(lxqR$I8etwqJK_e28+>23Y-@_P=g*^Qiv^Tf<uAg
zpB4<L*{d!VdaF_H{5qg7>{~1mLseQpwC%>a%nWnXq4!`hbcL#<2;}bb0@x7K2uM$(
z6;6pLjpHr;xl|*IBGbq~sBc*q-Z#C$ZYh9oE{aNOZ$GCk5RZl0R}-rDp40lBd($On
z5C)~1KU*}@QE)1=Kx_kXfU2*{O|>M!wZ9%fEVEoc*)ZQqWA49te>t?43CY2W^53H4
z5ZhsNPY;Iptco^F0$pf4fN}s>Eotd`W2HTB`k1y_JY{X)#Ed6NlRjUSbf|iTuH<rv
zKO^N0U;=bKgAoPiC~g^7Lh0<>0jI*5_VvB*mKPg?9F>rGzT$Se5>+BmUuO#2&OGQ}
z4`8=G`Fry!v|cryUv-}BSXPbP%p^8?5w?v_!2}`+&$<ELO(7=eV;q=^rJqW%HO}W^
zXq!t+xH$zVfT6*k7$PBq{@VujF%Yx4Sn=q2Cnrp2KgFxZHzIZ*ESzqPJrL3x56Ti1
znV@tBFU9Ylskw0xY;&-^O3eC<7Y(Tx=nnY(f+L!SlIdo|DEJk1I-mg9wB4w^Kp1D6
zUu0HuE)4Y%FQPuzdt5ZfTo3&oCWIG4;z9LrDni-77$TmxDxXnYQFLN-lFs%(2r!3M
zkp|IDAD9;p(3f<6;w{7wa<`WEygRqk7?;@KRodz8pbF#Xq5UvLi|VlUzJtj4AgUEZ
zKxX9RV`p=JeYpwBRXC^NNP@LN)@I+?X*EZ%WXAF3uQd8Ml6sKL6LMRi3b-YF<d-r@
zkay2F9IA9YZq}2O{gmAh+2X<GVd6>(dukv%hsG6i<Vh+^D*aG;oBpyyLHp3kOTHMj
zZ-!<mMr8~;mu+@mn2>($I4Mw6`~bZFG=WZ|G%K1&t`A;3+uu_g<6TqqiP<<C)Q2NF
zlLID6L}EDgmp`xw9jViju{`;%hkM1aC~T7+WT;DA{yN9<zZ(Ys4;}HZB;c=(i1{Bn
zVvjaU@!z!l{0<_XO!%L*L+#jodVJ)n7;Y3;JB&0~YuZu_Ki<eO2i6V^<31NkR1AKl
z=?Rt!AZPEEfR6)$f?ZL|AhkS~$&mJ`UJk4)d~O$}KO(~{Df{@1rUSA~WIpcmv4G`@
zA%NWvjk!@I%{Mrdoz`XAY)k{~0E2Oh3gItJ&N+v5D4WT~My#D}ZBE1e&My<;)13k*
z6VmCq-lt<U?FQXklfj|wZ@5NS{EJ1MMjYtbTU$>OMK-I_)hWfw<^0+soV9<npCxpy
zqfr!jNH$sP34`3o-tOgk+}v1K*yQ)GqK}54*X>3eph+*g_7}vbMO3*ui{rWck7Ug9
zW$SH5@1^bAUj5j2I3JB_IAk1Uux^!RiC%G7CSeUY_1I>x2erXsN3h?V*nY?}l0ZWQ
zbQlvx%ZNspX=y_yAF3?TZ20ncAdO_~TY>8S(A7mT^-Qn8?bIq>LTVYi<3!xUS+Ah!
zc-5>C+Dt(%=7g*z8D=;6S$do8hyAF`G=p2VUh9@q^hp)TeVX6RbzC-xmwZfB?rL)K
zHqv<%-OU7KfG;MK6Z9)Si0Hxaju(?6?wd_%qMeh(Gyw38%*Z7B-`i%usam>Mx?>5Z
zp}N?SEtE?uVV_$uOD<h<%BMskb`zi?SZA|0<x0XT7`axCCbL0qHmFaV6E`)s@2E?i
zq!L)Xfa2?0$ggLp9q0O9!3=dL7l^Z4)Te9X)9l)504N}JU$_I2gG+_}5}im0gi78Q
z>y}As_wRWbl{gyuN>)F>5Sfe$NdNpt!shj#KlNtvQ*qt<%yy7Ezt`le-|Q2kWq;Q8
z(p#qjsh1I2(1A4I2&;W&RL>def=-z4<*%U3mbYu2|H!Jcwi9e2@rZJX$JRfoh5IaR
zHAEZwo~HGo+)Z_4nKols4yTafsFs^CD|?vfvG3Z)<&d<n4=eO)c3N>Mukbb%T^<{T
zYwYb2jr0wTp){Fy)6UzMW&*Dz0@=b@-(0+9Y5Qit^r)u;yB24?L@R}6?A+~1Ioi6^
z5EbOY#Ql0%bhw?TzeLN9|AEnSQqzY$QeAW$uALp=X{yzp`{IvD^P%SY2z6-tN&5+b
zgNz|lX9?LGtjY?wSB(t&m9)v!Q91wfq~emyH=<;Hv6DLMTXO~{5l^`T?XuPES6*As
z(sC@eCl0?-R6mJ2-NOvNiu=SWQuO8PquWN8C%=q7Gp)Xkkv4vyuz`P7Ypsf6gINFg
zLFH@4f9tc%XKKhBvmXL$FF(dtvNH`m3!xwDH@rtJaDvu23;b3jC6e)oNW+<@{ghY(
zb1JidawafEbI2AS-gE{L1YJk0^u?40vfVj_`)uHN@u8WplYFhQ{Cui8WmDLG>3VqX
zW$}spSu$MLII+94@`ehQ4L8Q#)>}JBoTUzSzAS`dP_Mq7-;i<5c4U9y$sjKxs#_|7
zV>ag=Rc*(jf+6Z0<L;U&_|ahdu?lEqCRnYTM#dWbmbx@kNpa@0Q21EoSyQ+um%@V5
zi4!q&gh|oOrumVG=@3I_OS7(7#y$34d-;#z`rexFbBU*TcXn4&Eek(UAp?oUU$_oT
z(wI{;?9EFau1<{%8?qcXFJbl8bT%CpDK0mtsq^P`-;qgqn|dWmzxU{tgFQwqKVThb
zD$keu5zXwh@Z{<4`L$l?pBKEKVPdAa_i&L6y3*4Kn(w{3>7m&KP)OPVcQi<h+RGXI
zB)V4E9xo-Zlz`(v{LWId<+v7rYY7;7QAa{%<gT_!GzIW!(9sJ5CTi5ps@gign5QBn
zJ#9`ihUw85LO!M(fTACuk5rvw_^nEzW$0~|CvMJVu*>%Tn;>QZFNKE(fWcvea1k{t
zed%%%Kg>?%Lr{W#YFoqKp}`E3I4ujaN(KrtC1CZ1H+mTW0~?&E)ChC7%1KiDhIpZ@
z$YwqUGK~l#E9T1^@s$<i8cg+0L_JA{fn0PhkXVUS>@#(=3tj%fu?KDg81YeqE-iXL
zDGvrCuSIsq@<@;Q6y}p=KIL{~lOniiP06$XsIAvfEPz#bm1`*G_6O;ulIr_lWsx#S
z_kJUP`=o<$Ha<WG5((%55+QVz?io^yZ`Yepz0bA#H2y~Sh3Y%yH#=a%F%74CKFJny
zyRSix@j}E(+m;QxuZbOrgjMj!4I4VhEMWLi%c?u%=1idbou>^C`kkFXu1pV{Vjjxi
zB^#FxKreM9R6h0ZLMbYTzt1J6nmdylha#}Km>8P4GYaYuVD0dNC?i5`kBiO*rp4DR
zcA0V8(6gM5uRT7yCe?-Eb?5MTZK5dBIoU`q;-$i$;Ttgq9&l}M#vT}&<v8W`?=R46
z`kGQrq4x%NK#K!?;j)23CYf(XXk*B=*VI`AiWAhVBj&6&V%6v!Ad1fiyMO*Z;Rgzb
zdUo9lgk7yw%6Q4Fx4E7Jr4!TLo|<0#ZMNo4Q76UvH&%D?xhB(&Xw&DC_1XuG=%kom
z(gv^dWXQC=a+e8+qKKZ6ti;B80`1P&Bl2K<>E?qYI#AF0G%XKiG~AaK4NA#antxJE
zJ1q}Wi+eF}{w5c#WB&|8D7d6h&u^|H?;=6Ob7@(3Idhkey!kVq2wt-;MBB<;Q8=#)
zL2W9Iuw-;xj<HxK)kPy1?90-F|Dd;t0<;mscq!HqR}2}XU1REE5P&BHWhh10%DqkT
zxot{<>vPE+!Sk*QryR1V`T3#oMYHzLOuv#BJWlzN_+MkFuZ*-A$>#fi`*JC4qWpyw
zO7QXj9Zfg^f<oiJRSW)^PbSK;(-He;KAFpREU;_{m`}E9$Onvx7*_d*Y|kYsb+QDg
z3wc(X+$V$goFokj7TgqlR|^jtR*d)z3*qZ(DeBVcZ1Qt!Hdjf(&hq&uwbN>?B0#5b
zsPoi!EW2_s_+*7fIox1<*)^EH4YzTQTMb{`fC)`D7Fm`{Y(_R*W;<}ohV<OSRUb34
z%8Q(bzd?$zGWP+vp_GsBOm6-@z3U#ui$bRIp)r@2@$r-f(?!fNQPo7!*j4oIWgsDS
zn0L*|$H1W93P!9P)83o8NbHuJ#IxIj9xd?(?KOkkV)mP06DC<uHXT(gQDM*GJg3$5
zw@JSAV3&?v`z}d;(alq0W@~QOKT4(|A#Se{jROTI?*uQ^umiY1?(M^?^U(#lAWYDt
z&pOTG2oxxUS;T<*hl#@{hDV5@P6F{|5hoh7TC5OS^(81$V_^Xe&Kxviwp$`mjSO-^
z4J(3rNpWiX7C22dYFYx2sqd*%qW;O1Cj>d|8Kj^25?iJzb=2@A{|RA#C7Q|Nm24T~
zg4=kN+Oa{)&b=4oC{0f1#t@H3pD#^0ih|InVHxM?(D#V-CAd>SP#}Cn;fCXwyZq*q
zL~XLX<k#~1A?@F$puUl{@lrRG<6#LRSZ)&55vGv0aqd$UED^Vi?oK})qQ6#8O+<or
zXX>7P>3}Fc?$bM9K(>Bos5k2VN*W;4jv`_dr7H4?S&i;5s-DKs)8>iDw-iOA4K4>|
zenz!<%7;dK-Db7k4%)6uNe{7zLRXINN*|TP!7Va<)wB~eyvpJ+d~HImoX(^=ORO#D
zt!DM6N$|XIs-Gakmlu4VUi}cM4@8ROB$TfDp-u(|iBYxP3oV0bxF$9W{|-$v3a@)7
zXvqtOgJiqjL8DxX!F)qzzc*JS(AjVIB&Sa-xKF07z6gfQUZH>y=ANf_DMH|19)!j?
zWtxAQcD9!|8-K_*HZwsyiXDc6o=^$h6Yx$vWn0NgYxY>p-Vv4vfLwbzP1(*;0vg>{
z5f;BHp1r@wYSRl|?qw$88a7E>rDc7_{6H%u`#$Au#m@~%q-8!GejQD`jyHMBROav=
zZzU&;E^xl$YA$13?W6PGKgGt9um;tm&*gf)ayYm~vQCXKxQB|#JxH6T2~C{(z4HKP
zf5&z7+oa!_j`1+-V_W%+jQ*FWH(GFl$9L~C@Gz-87Q14|ik^M=>lTez<CU<BTEOM*
zqQQbK_pN(GN0AIEMi#aNFXZAx`7r8*Qnuzid1|Mjr7_ysXzBVj-yL$sq0%PlHI_BV
zK6xHq-)~wCFs-bKvVAG(FC;0oU)2e@gvAb}wx{}^=Z0YN^_}hO4XoOadx2oJRM<yZ
z<g;IMa4BJXmt|4_LsT_I=lYOHRp&RvZBaimutEDTSe347(nOvFYLJng$8JB!jQ6tV
zu55DCqG=9HVWTgBR-*dW&a`QIo|ooaUTRU!muV3!AY>Lyb%=h@2ux)@<6zVUmq>iN
zr%ULn%~hbIC-y08Eu@l90wHEVMM?>+bJJ8k93{lp1UqA>S^G}GeNpov2KUHW2j(PY
zEzp@Al-qt5>glEohfr5DJ@MGJoEM$mh7-<4UwwbjgSt<+P295dNARZK`j)wg#w9YZ
z)@WpBs~7J^$1AwwQRo^VD8b3^p{IdV)@yX2ZBICm7+<%e-H6!_GvYbQo#J9xT1j2S
zV}t!^+BrZdQV1_F@KXwO4_u#3e&aWoD+4S4%`TAmbXpUH{4h1jkO;#_7KM1e&d^Go
zXix5?eq(Fszvcyi?4rbWw<cF$h;9IuVDntSS3{i8<k1EppGX<0r5uT&Phm!|n-??^
zIY9R?IjJd9hOPjp7C>1NMClRq0AvUf2QYS^?Pdf+aL<@7-s$IIFNfh9FMhOq;qOA1
zN;(i*O;4!hi%<86LC5;WnH7%?9NO_MiBl@poL>(0qUb<_|IbhB>4b7f3ezAL)D&&v
zdC~e5>0|vaGES&h%zi~<j=v=v5EP!v^@Zng{<qGOwjOfP_fZiAgdLHA0LrlRK*L&e
zK90u(FR%s30*IOuKiYl=-*rOpiI<U0XK^vKlD>$r#*$m+TQlrYy$~G(2KRUv>HO1#
z!<SWoXyW^;a|Y6=sQ%lxPw_#o=i<Y=DE?C<1jc;scOpK*_5Tb!jomyV+aI{u^tzM;
zupr)n?_UmH?GxPy$5PN7A^HmD;`qoPIN5bx4wI)#s4x{f*|mOu$;cNY_7Ls@`HDZp
zVKvBcoU+C1+tKq>j!4w3e~<5lORq8XhA1e5+~NVE<&-z>!DrQX(v^6x9WNvwQkC<D
z#<`Nk9(dpVXEgZ7^>kpmRIxG&*D+$rc9`M6U7i9FsIDNlz!36_qc#w%8*}*mk|9Nb
zg#48R`7<n5GW^P!(e0r<52t-SZceTn-L-0|^Yfw0or^^_#OJJrj>)s2;u<f`mocA&
z)wX?-)P};E_I&p=Cd$A<Jg6Fds&-7C0;F1{jT}C8paI3FZitbt6iaJwtuZ{wK+r?Y
z8-;_$fEuC+oScTLqjS_@PQaj_p}7{v9HvuT(#rkxMFf$EBVJPE=C8L|85%En0Vn~G
zxv7(3`y2%s9iCseHwdGb+{!soxUB7{aJ}Z|LXbr6F)oO;?k${ami|YGRQVq1h<1T4
z+R03ThnUfLLWb^tGskNIQU~RMLjR92gOrYsXEdXD;J-M<;S>;y<j}uPE^vCYG5>RN
zLA#E=ljiTq1;#p_bxGjl0^gmFq3D(x<5We~Tw&|4HS-mcv9cNyt%s^rdkg(V8|n7)
zHOQz>BCEhz1@DDY2UZ2?`t>}uE|m(p8qud-Cle)4?IZ<yw1bj)^}~V1z&n;N7TDhe
z-RbrVNoU{exoN~2Y<Z8i#K81JcX-l<$fuDLJ4rV`F3oBK7MHoZk2hepQ+~HQ+=g}u
zSsc1A`k$T_2X$#qAtl|nvUyVmtEv4q+w49b3u%KThUXqLq|m)L?Fr4we3GysMq4B|
z`<VHfTdM@gB<qmmt=&Y&uHYB43nRZ(f-DlhrO{zG`)tP~;S+ptBxHn%+tJtdvXmq|
z=Il<9iH4f2E~_6Cse^COW$CaPxcLL8>8YoB!h^Zx(GjSUy}4!LMp`Jdl<$<afy5ph
z@sxUmVVKKZs7)%(dv;!jBZ#G`WhR}&yE2V=n@D~y3Tf}klSM7Nw9Ms_&0m@3d<jA;
z?y>3ahyTeO1WUi0MmqZXUG`~4Ql-nU=^WadkBE-c{@{7IAWolGtR$PA&gn*9d&kb#
zX`V!p6JIZ1OQl`2q)@h<OiNLFV*kLNs*HeI)?K=*+SS>!e<m3;)Fo)e!u(u^X=v_x
zjS=XZ?8JtNKJWH6&p^;=IU-aJ2djb{st%hB1n+p%Vu_E|ppN!~JJ>OfHX8L)ah$zG
zG8r=oBNDAOBd26)cVet1**YT~uq}xz3%Huomd7nAW^{eH$o2>Fi8JDWmXUCm)@$8_
zfL9V!?tAXDZtKSx?_5yA)^1gG!UH#{mS^PN4cDz@B+WSU*f4OWN*GvTaW^FQq72?Z
zZ>4Gm$MQY1&$2fY<kCVB>D}RGi9uRprf+2yn<}mrkG6#3yGkU^YcM)UfJ*TN+0$<L
zpwgbLYd)2*F+HrVDh%=sB9eA$s;F2cqvkbI*d9|GEtH}K{1<b>HrY*y%xi8V*(hoG
zdR2ng1hWc}cc;JW^GM+W8}_!$!aF~SG_>!g^tkbj5=5qkm4`Zc|9D&QL1RYoAxO|i
z0*wiZXRJx)@HiAl8Kz4+wf4HRSznfxTbeJ-Ts}HV{Yo71=7I<mq{LLyo?l7vm}Y!8
zyF!w+W9srul;6>NS#KdxW69)wc10Q!C`54&iZn16*WHQLSlVR0^Iks957QsZiR$Nk
zeYFoslzX4a!fZMb=8jI)`g14ix-yo<O_h-AZM@7ynmQ`q)k%9yGJVP<L#rbq7Frn8
zN#uRXbBxwtqwQ}LeYr#FB+jN}5HXMi0xNCVzJrd?JRWk#BrrOY)?P<`#jf@ClRV6K
zCX*-Bcq~pBL&##x=7=QLia!iCq$E>kk*&KKjUoxu&AV<!XNLFTn4R;f#{m93$R7N_
zO~h6407fk1GDZ?Ys>dcjkRiWhn&#J!VCNznRkkHG56LG8K*Qx_FSnVcL@MFJEfTV!
z=~h!uw0K74bd$t+;@Js+#Y=H8Vgd7{B+h{Ip(V$zegAzaHFx*U7!oTNNz+GtFgkbn
z%&?_qsz!tCIi7L-IB7TbQu3;_c1*li!(jHj@O$$CLTc|TZxvdRO@@y|hF*mUg)7-G
zzIz>m$VhYeM+H|F>|uV-$zHK!&@!jQ5}yZsO8)bh8dY)kYh9U*vCLU?iqSVONLpG2
z)fZ=eR?-Wj7+N=hJ3rHJO%fBA<u8`u!}Sz7f~Hw=sHihh9@16M=T+ip)$PXSzn>Ol
za*mj~Mu8TdG)33v1P2%Lh-%ADM4O<o)M6}289LhDI`tN=8qui)tm6zzXNU7e_RkZb
z8p}}5wX#Q?k?%BhHz-9#zNGWmA5N7NntKG6rKn)|-K6y-o1rbjp>jUrk02|PeY*7`
zjXS|3^Tjf4c((qnM^hRTlkHk)_EpSYA#K?6e%t+9qRQ^j7x)ED$adjR`NXWz>v|&F
z&K&z06DA#cTETtlY#5zJ5i<%3-Mt;9eXlL(A8{({>q;xOgXjsd^FpxA0i0Zmv?(yn
zB#y(^nX-|d0GJ{r;)1M_Mh3?FCq=U6rR%bP#3U=QpnyGN6xlXa0A2N5EKb)X_|_@v
z??S_c7>ZN_jS#^|ZCx`okZXC~fRj3t{juGBoqEt8J;a>IdDZv&I_$=S+iJIKuU-gm
z{uX;AflAUhxg1D<;a~80rrKS>NEw>~V~T*$r!kk-o4_x^&KY1W1_t2Or+l*gNd}mH
z=Z?;JT8()!!?;V@e~3=;c;cS6&}?39hGHi&=DWM&QYB|k6xEXSc(`bCoZ_-;-Xu|w
z6p(616d4)l_QcD$kr`u7vz7bb%XTnd|3hx*nUdeU4~#KHNb>#p<w$JzX_kUc=d`yy
z5y2Ws?Ku+1S8_~d)v!BX4;U7F%3hYWE)3a5@s$9$VI0alW&i^!9;FFpMcqqYa&0V<
zqbXJ$KN-7`2tH}-fpI?IrSv6=hNnHgzsJJa)~T)hG3c@F5<qkUq0JYAof}6n9DSgK
z8`Xv<-p0S|8?{Jp5H_dyf5u<YXCiD=boQ}5{t291xY>R5cPFuXoo8iO+}6(zm_FY4
z@umIM%a`63_0H5@q&NAh4U!F5ueJ#aq=mG~K3prPEmc~nvzYmLC%<3;Rn4rHQ7BJa
zN$il(Kl}n%DdcuVyL$rH#$6No;N%2~0@5Z`lF~()ufuIJ?B%8Sa7l?%Bj1=wRqCHl
zh$Qz7foO?`FGeAlqO2cbIDD8t3<P!|%mG~oCh5&9M7#SJ7kv-`NAfT~t@IF8d@_TC
zpwv~P9aJkjXqw|M<w|0+X3?>5zX6^#_xAed@6XZHSE?~ngxn_mY?P68Za=DNS`^`c
zAV=KiH~ukU&H*B??vbl5vL2~XXG7&9=(qKME)>1`)}MyY@a>fRduQ#}>mu?uX1|iH
zRd+8sXJ7L0z#%$)_-xf%m1am6hmGfn!g+=dtx2;=zNLyBXEFCoKP}u#pyf=LMk$}5
zyCP0c!b^jAw#Wr51b@5azE1jIl|Xbrx*$i8-~Z4}LiT9jo<RcS@2RSw(!mt!zu}Dp
zTq~7bJvsXl>o$@pU`{Um!?bELL&We^v5iEvL7^PuMsGpOo&KjvQRGCnX0@gu4LgqW
z0{T6(V#8ax!XHi#$7)69%j&%u_^c8w*uA$lmXECKP8O=GY@3cpYRzx74-%g+w`i|!
zw1wHV2wH|7zHx9J<O5^nf+}C)Ci2`CI`?X);o%|$?k+C1jecx2V)Eh98?Dc;3=8%L
zjy5FTKREU22}&Ls67=X|y}6jQmSG;1)uz^PRG6uoU5ngzYk$q_I>LA2{Z9Dq2-PIG
z@W5r{F~3xlh*5q}UD@?@I<s}ahEHOW`>m0ZC`p&0fwA?M)=FlF&YRPVS|6bM7bgq^
zoX)n=QaMr3b!i66Bez9LXOLC|lI_+dBq@ZH&vPMW5135!MPD(NMWcPOL>$`~OHLnq
zyd2vwKORJ?1PzBdC6`}Z{~+6b-;<rF?|#S`g&vzb`RHS3ZOU}(IlO5(5)b6E(dp3F
zR3`WeDQCK~JutR^3h643F!x6rDXo2*PjEisO(M;+{WiZ7GKVL_poAOhCw^edu`<G-
zWPpQbzJw!b-OQEoiHrQQ&^mchsdUxmw{o*{6OS)xv{?PRlh7q9Ygi+t9>0X`m|CN%
zT*c4gA#Q0mA70nRGiFA2q^p`{=^E8NPyd<926rKWg$FdFh?ug>&Rt`AjaI?Z=rC_w
z)>{Trj;9Hq3Wbn+0+J)5(t4eH&!980joD^7Pgdk2T2VS_qax$(9w#F0+s(J+D_NAl
zTq8Ac_FuL~ZJ+P<m?1f;hu^%W9hoPFS`#*}H4$^t=XS5l(UM4x#8GT_T1TJ^^S@ge
zK<d8g3~Djyw39sZM4~eptzE4m=VK0)Tt*5p<h385k1>njGSHqkr_P%J0%IBPgtadc
z!~Q!&($2KLr`o&OdHk#+WJQ(Hgg>!H`Ee|1pEP&f9A>0KG0*pFtex=2<}_#>&Zf<)
zq-ME&+OJdDU3}vnq%Cli+;3O^SWB?mGZ@2847E>{5~w1+t<)Nr1ltYEg59PeQ#>V%
z54iVP+Lfu7&Oe9<GIyXEG|&6E>Em43)Xv%HGZp*9x#<zdd-BWysXu3}zrkA_U#|9w
zC@rPVKhHvAutbbAwNUrZd<3V3B{Hv))}X%jgEP~eviI+1`oR|ET<lAO;APAOPk%>!
zzJ&Kq&Ig<gZkGwmT#uX26Ep8_V_oEq$z3|Fw#vW0eoz(5B4TdbyGtiort5D)>FX4;
zaF(jvpJ;1++AqC?kf*Y(WM34RBH`Zy?xNJh@%27dMBrM<uqcwp+iBM>av8pBaIj2j
zEZcP@-}CZQafOqJ1Zec78r*+}S|P5C?Cv0?Sg)6U&>B9}mcN^+S`*wC%;EyMOc!t*
ziZcx+Rcyjm3v-Mhr^Rv1GE7xi_X_YQy;;$X($~Fln#p|i+)C}JDwa=Y<YF2OFk!wi
z;Vzm$JEpSs@Y)Q-G9f=lxjNHMY`(ZJTn2aGa>0hET!oByJ&TN`W;;XZ{dp|>31dWc
zYnb?kDw(8~1^SAGAU7CO;bq9i#S!s!RLXV%*0ZN?xdBg;6whSpm|J8h?X1r?z!pZG
z4v;I36NjC|B_1=4LiaG!a+lG}zgs*q(hJI-W4LLBu3*j);++9aQuoaZv^m#cfNq}H
zAs+-T1ikOn`Bhub(o9y))Fhs^2X8D56LMvEEM3-#x}!EWSS3KU4~OT{QG_uS9!Yu|
zzgu%-$3doLXG@8E$1(+W3(~p9&%mlfc=jXPd(a8UOf}(n<wvs=lf^IT3QX0mBb}Og
zV$$YeD@km?awNc=Sk_IH*Ci`>n+4Dybae39{@&xs8eqo$os#<7Tb;0K|Fcapo9Yo?
zX%Z)(0fa(2tTsJaa<vmK<QjnAL#)3x(2k-q+QdO4#pC+{$&xW0tFTQ%UvIDvD5U?a
zoD9%DJs0sLo7bxO{hTD>(w7eCFz~R!$xX3(wr-|GeAm+bz<yMme^}9`zS{t8R8gix
za3!Is{Vr{tUkqJjX=2E6n4n1`KCM!b-4@{T^t6ewkAR-pz2PEF+13cpoV4ZA*<m<r
z9XH!RfP;V}sOKc^!T$ZNqlrA)9M}xt@{pQNbpW4f(B%il2`PQ&mpX$e@L>t{oe*?i
zh~oMA44lbhfh1^5oL@-E`G*UlO$&JckZA7Gx@Pd**9l7g9GJMN(h2lTG=)42a4a<l
zhC0kysto{wc<v4w0<)QB4GCPV>tpxjn$?y{X&sMu8GE<m#V*Cs01-o1BCaXq+rKY%
zmznc&J6QSS{J!lP;Glux{crgYabF>>nf_{)_rU3=*>%_}&o5%(H2Z=#`zM})h6Rvn
zDV7Z}(0G;BJjN||i&XqR&h{bOM`sj@c7-z$;kqmfr3uP9Wy5)$!Tc)y$rhOMB?Y7N
zbmJW|9XhTtZIF7_GUkvw<mZ?sAL*0Z2%a#2Fe&I8dSD1-P!D$3M)`;Y(bLBsPj=}_
zKt$0uup5vk1nhh>7mei2CwbX8wq_)PK?MHR$anYkfzkIzM7ypqDZIABHS)Jvca`^F
z?(=|kwHa!ilAJ!Ty!s$}H;TmZwt>^#=~&O0d+tF_uzfe{M-B)$Tk%%1-2+%R5ojPs
zKh9c?v6@hVO^+iu-5TcH@~p_PYr=XaSRVxI($7F}C)+GgB6#1mI>b2n^=y><y856@
z0;02}ut7TU-qvQW7hdot98>sxX!7W%ZH0Ke?iklgqn9;f4$VcOeXeyDQNzM8JTmN)
z-^Y$QaU0Lc!M^5JFIghTR&JNSXzg>YjaV(Y(Ak)r7}?s<-eDN~;2ARDqjPjzOr}4A
Z%O1X7LXV_AuqN+j$(&W@2ZW5`e*h;#*6RQO

literal 0
HcmV?d00001

diff --git a/_static/img/reinforcement_learning_diagram.drawio b/_static/img/reinforcement_learning_diagram.drawio
new file mode 100644
index 000000000..2ff4e6f02
--- /dev/null
+++ b/_static/img/reinforcement_learning_diagram.drawio
@@ -0,0 +1 @@
+<mxfile host="app.diagrams.net" modified="2022-10-01T16:00:40.980Z" agent="5.0 (X11)" etag="_qbqVrrm3wUvm_i0-Q9T" version="20.4.0" type="device"><diagram id="aSXDm0BvLjt-Za0vl2Tv" name="Page-1">5Vpbc+MmFP41nmkfmpGEpMiPjTftzrTZZtbbbbYvHSxhiRQJFeHb/vqChG4gx95ElqfTeCaGwwEO37lwDskMLNL9zwzmyQONEJk5VrSfgXczx7GtuSW+JOVQUXzXrggxw5FiaglL/BXVMxV1gyNU9Bg5pYTjvE8MaZahkPdokDG667OtKenvmsMYGYRlCIlJ/QNHPKmogWe19PcIx0m9s22pkRTWzIpQJDCiuw4J3M/AglHKq1a6XyAiwatxqeb9dGS0EYyhjJ8z4flQfCient3PwfKXVfTn40P6/vMPapUtJBt14I8oJ/AgaA8opeygZOeHGhBGN1mE5JrWDNztEszRMoehHN0JExC0hKdE9GzRXGNCFpRQVs4Fa09+BD1mMMJC7s7YqvyIsYIz+jfqjPjljxhRsiLG0f4oCHYDrbBJRFPE5SEsNcGt1aPMMVDdXatbu1ZY0tGrr2hQmVPcrNwiLhoK9G9QgGMo4Lec41T6gWN9535v4C/WFLaPTmM/AlrebR8t2z0TruBScAEDLgaziKaCtoI8TAy4wg3bltYqIUFZ9KOMCaIbElgUOOxD1rftai0UGQFCg0/sRzcsRKfdjEMWI37KGkx1dOD2BtCuaQwRyPG2L+6QCtQOjxSLgxz1DRdoWqyOqWZ1I42+kNdfCOjmUOFgLCR0U0aemi2XDMVxgQ3ztK0X5fJtjR/0+EWjkqA1z0YHr7dYz7DYR0pwKM/5AfFRw2sEUbAOh8PrLYI+sgbDaxig1foy4dWxrh1fAzOACodeqi5lPKExzSC5b6laGGh5fqU0V8g/I84PKluBG06HQu8okcN/W+Q4OyS8CWTfsPFPlbz/Cxu/eg5hm0nEmPcg2mP+JNs3nup96Yy823c7h/HvTu8/cXfqV9H8lXen5xxJuUa+O91A2yd4+e709LvTP8Hvvsh/mbvWNtPDUR0hE+I9NVNF50vrFrLb+kLZq52hcaCO+9hTuY9zpvt413Qf3Vqc29e6z1xbyJkm9TSKb51fS4mdKVJP2zXc4fc8grwsLb3rlpaODog3cItaU96ijUdNmirWscHuXq03jjdVeLDnZ8aHI+qcJsGspewFdp8Iee8ivJU7Ehxn5YD/z0a+qN0RtOZtT7Ri9Q1Tac3ZqsjLvvWJQZzhLBbNUmtqXSFnuXQzb5zd7Bvxa5FQWkgvbB4vJDxCgXVlCEOOaXZhURwpylJQiRQFZdsL7wfkfh9RSFkkGql6XrQ2KiRddG9X7t2+rF10L6/ElUpu5VZ/ZWUt1D/piuk76/K8pWyq5S+lHiVi23oGaA9E7PlAxG4Yxw/ZZr4X1q5Vu9AE6V8wP5UAyt4jYlgcG7HrlUhVGL1WkgeO5EDf/r5oDdcuo9dIeqUPXk7ygK/xn3iPNACxJkgKHTMpHNVJBmod6+Z2snzmqmWMrlCgVx/nWjjQLc+7jIUDvYw5ZeFA43emsFjzCf0iYd2ava6q7z2LTVbX18XdyaDvX9UjNIMBevl2tkdo71VATyrG8ghd4LcV6qLb/oW/Ym//TwLc/ws=</diagram></mxfile>
\ No newline at end of file
diff --git a/_static/img/reinforcement_learning_diagram.jpg b/_static/img/reinforcement_learning_diagram.jpg
index bdcbc3225026bb186debc90fdf241dd6439720c2..7e04efc253461493d0475101eeef876e00d5b5dd 100644
GIT binary patch
literal 34793
zcmeFZcU)87wlBIuDAJpBAxIHLiXcTgh%^yFP<m03ZUChP2!!4{3JTH$1f=&4(xpfj
zB-Df=ASFRcln`!y=bYa;d++nx_r3G(d*}Y~?#hRRtgJQH9COamzGIAe{_T7PV7RNP
zqX~dO001Jsfb)4k9RQR3as2TPCLW~Zq<<V2$jQmbDKAh_QC^^=q@tmxp`xavrlh20
zq@|;0U|?jRqG4iYVqhlz&+tbhpg-;elU^X+$Usd=O+5QwJkCD>j2A#+Bv3Gj5g=g%
zff+&Py#R#RPcqQI9KgRFAQCVs89BuTN-ApN1@#O73Gvw^q+l{KQc~j80mR<{QbsbS
z%Mxnj%nzPW@V&Sw8JwJVf&X@O7mMKpM&Rbtmm!o?tZeKYoPt+`u3i(Cl9rLZB`2?b
zM?+IfTj#FP!$-y@re@~0&+P0S9G#p!JiWYqV7`8#Vc`*xQPD9esjptArN4Qb@jkzx
z@Iz5?$;X=7y84F3rskIJp5DIxfzN|OlT*`QXJ)_6AyMenwe^kfn_Ju1gP(^-$0xX7
zzyF{M1c3jQEaLmW67~;t5eET8LP{J`ia+QAk@yh*z>K71mnF!V)E-bgdBMyl8GPa5
z?c}`bE=vBJh8UKoFDIy21*A}d*gr`77s~$c5f<|Qh_Zht>_6y20yJO{aq_^7fC_MC
zluHss1zZLG&&U6JLm;;dfxX}FB5*-n^e0i%t{KPO-PFDn@oiASuM5W8=Bf#g9*(f1
z*jH>Xf<n&$N}ImU-wg1}=RmmK>^X3~7&r&QD$&bl7nB*!f$o|g=fLY}0MTuNbt8m9
zE+9~5UgyBjHlTvKj%z#zZqtXUV9Hv~fpsqcf_aU%I0y15x*<4YE5t!d5IycLLG&CL
z4@J@AuZwm5vQnkM3nEz0flW0`B!MN}249g2CNOozA`W?R>gNE}a3#US0QBG9D;Bqo
z%lJDJoc-UeFhUWl)h*LIQd`gJup&QoA;^mNjV7kZJ}}ftox$oQXd_5T_`h?}q5^Mb
zbx;PkD{7!tiE|Mv4^$uLooxfk(<<$%6v)VvV&K$(a0Eb{AxpX7UG>vxAfOoG7errA
zzi$Bgzqk}M0>AxEIkB+950}}~`P-*1m)`8ZlNoQR#22{*Gqy{g1N1rgQltSHr0#Sa
zbe!V{C{OJZ$qU#7PW9UXh|>v?HswPgplB(G{-0h#hv@&gKXv$e%!V1}e-7NAc?H^i
z2pE7=$$*Rh-+zFT(PL~llTUtRV=-+|w333_L>W@Bd0Lw5RbxZYAYXsx+DjIYkH#Rc
zT{!U4hSPPoQkhc&Vdufp*Y<lhawEd=oxFB@sX2??lP*#QHEu^R^{&>xOy@b^7o>Ro
z!CAhd?}yFb-{UuMc6oy+Zt7kiKhjza^xOL-HyWooM~1mG%`Uuso9-UF%fTO{gAP(+
zLc|eA7K%4sqRQ;0Y00ppmQk^7k{R}V`ctF2Wp0gf1aT`8@kWipO$sz<75G(!W*^R2
z4(sePkNdrrVIQgz1r>FuZJnAmtdF7{a}&XYzXnTQSA5ca6ZphL26&pM4|GQO=|@$-
zMFY^(g&lQiy1p4o!Uwu`P1>|Q7O}6Qx*ZOJ5dv~a2{HN?LGAf?bxdWHG*`fN%^B9c
zjeVU^t7zN!<NCM}<7<sjB=|KnH)+7-lDj+Tdk5!Pq12*-CN8hkxq~}X4-t`(kpFy~
zx>D|l%fdeG)*k;aHm5WT|EONrQ%*_Axm?ONiBmls-$|D;Csrk%8!v~bbkZrc+`<-|
zY<*C*%uCrovp1+;@Ur_^$e1U*fN=+vlo2L~vc{91VG(BW5G##zmN6;$aIbpvWa^H%
zHA{xV1IgRAPD?U|$=BK7L3xQ6L88mM)R>B4jj8%yidtTR6(1YjT+@E*?VJdw)%g18
zi?+Ss$TfZTOTdZo4z!2Ckwi~_^`v|8%JD|`_r5DyH8r7zEh#ho8yj5yI-hQg2~=xN
z6u8n4tq0vwBzEVk(~|gv7u42x-1Eo2{ORo!_WBFGuB7xLPahigLhU7L8ZM@Q#a<*Q
z(W=x#_6^8|V!n&J<%|8ny63=e@A_PN_`mWlNk!pDrwpaa`T79Ong02a7>N9aU9{pL
zw{5(v1lU92WAV0G(4}Nm)?y%+s@gf@+3)z#MDlzW!LxS{^j|oqUdv!#7vuL)^%1U4
zP%Vh#j*nXzC!HE2D`T><(z|q{J3>bwIppJ24iTLdK<*20xw(OL`ME43wR^T(X4~_*
zxV^<G_U}@_JoPEJasRT>l=$Pe+O}w+#-<Q80i!!Bk}Z;Jl1XZ$ml!-$naBSrbHdJn
zJ_>9{CpNzeLI;<|YW4C^PkBM8aj6y^>38a;u4HfcE41^lvt$q^*B_BATQV^|OfeCP
zGa&2486g-En>&HR=fFy1cPIV>QOjIh?zQ=?0Ey|uiDD|xfop##x^P)68zEr;T}hyZ
zUni<HXQEou!)u%a`PI16b0B1y>Kr)8AgZzd)87B4@lz(-<`E~%;BrvnthoJ0ST{VM
z^^Hi~sd{ch<PcY7&0yY^$p=|hW6Ygz83%BPh|DBN=ptyl?%$+6Xw8QCJGP=7o7iPT
z3zx-;TVd>TN^lOn=Igtou}(#L9*Q$Q=67yfmYr$5P`%svE~%J3DMtoGN-}`I)*01#
z=&iznz*wMc2u6RHB~)%`@iXqkJ{vqM&g2|;^@m}UiHki4v~01&?_?b;L_<iLXb62E
z@SOt_-|&ylfiAaj#4kNno)@Q@cxqgG8Jq@N5s6xk&$+1RQ?iY`*8(ky+^EA8NLUZQ
zh)N$x+IsOkp88k9u!t)Qd;WE(6bPkKJGNtsEj_lu87Z9uy@?63jSY?F714=gPLmvU
znPw1^dN--T8)KPcU)IWOUmhG(kO2WVk86Jr_2*bQ06z^lmE1Cd+4TO7FN%O)hV<$#
zGdo-JPqb+^bSo6|=xOYgt1p&W`z3Qcgti88fL|T~W*y)k7~Imy16y0)+y&TMyxDhs
zG`Go@gy+BBvrIim`|{QSC|hs32WDnW0S&>7P!xS`Q4nw&<UKANE>}RykLuaXxGT^|
z;`_a=wlQ80DaF6jEtTlP$Du2?c(6;Y%Iy;scoDDLfs3SwJ5sXIX`4DsWckoZdeXty
zc_;BcW1aonb5i>mws`LO(c!vG_deM)18PJ~M384CtxJahXPD118}eO-R-~8#Ls3r)
zJVmYOW9muz+eK6N;j2XT8Dx~W0*RUJoso~4k$Q7DywA9u5gD~&@~VA3c!ulYRK9)^
z$(ER*##1)C-cPJ640P^<k!<O@N#3f))`|Us={8X)s{D~4mZ#hn3Pf0Z_<2VroDBaw
z-Ia#fS6$eBMS%)9yxPI&A~V}44)ypTT8bm|^xeoyxjpRQRy1$%Tu%LTaa;l{VCy9M
zE(nzQ0WXfrT4{gzy)T<SwR`q={NWN`gbVck9-5uMvE%Erj)u2Zm%Sa-uFUZv!6`rz
zlq?8>8VfUr^tgdxm1@||D5;+)g-Qx1Yp$k_rqrP)E)I{6YjtE$^&t8rFoUF@0r}X@
zKoKtXK2Ga{%aYWqFK8Q<>e+gy44L?Bo!7<;L(7t{8xj^TD8|2AEd=};|5z9vOY$N6
zrjwp$VgnX!6MJH9YI%LEdC!vS`%1YDzftlt4+v<a7t?c9XbEdw1Z}ROD0;o3lM&82
z(Q$pj^jF@ZQdEJp$4<PD*ZeUO2`>TJ?#|c?#YB7GX_=j=G{jX(l=kFOYHH1(lIkkA
z3McThlLh9F9*y<Eay^(<RKtS%OgP;lJxc>P>gJ>CzHMZn{p(xP*vs=BmrPNPvh3)T
zN2bB?cPc1>wGIv#0__%yH-fI51I*iqi#zyhP&D*DY(rL_@Wy@$qE+MK=`#E_@SA~6
zKvm`28%dJ8!cpDAz(&y&;RY_f`}ZXk#<IZcOR(xaIOs(|kDE&|!anorvR>*#f86Q=
z=SFJx^nD?u>g(Ixw&07P((*p+5|rLD_I4(^%=$*ECRQHzL6b0k%5>iWG8bR4PreLe
zKj`SC-)sD}tb7&RI)SHNU8cnAw6srpjYXUIxxU4+4&0ksnf7YGOPHv;R(DmP#fP~y
zdiC`ARN(L11dDz~4wBcoH2Mh{Ch=%o&*~SWZes}3Wh~i#nA8P0vrnFSpw!#?;);5v
z!2kj9&MxP#h7FBYWrO%-h(s=(|G>;Sz^bUTI>xkBwRsLOP4-wISR8#CH0n#rc^2lZ
z#IpG0qjoMQ=v)rZm)RVB|48Ti(ntcR6{X<DJP_ExS)SN0;mJQ$ju&-v<0)t{olvQM
z%v_P%$p87L#`3&+Y~tZa)E!G`F5UdGGbS=lkwQH3kXsYMrt?7`v#OSHBwZT&_I;?S
zzQh&nnWx5Ik1x`a`d=i|PvX(082{%MH_s!q$}f3DWX`cZ-tHVIg3uXIxUAYlDT8p(
zLX8T%2DT#@1(op+uZ#I<K8>ml(U~&~(-j!`X!%irjN+EDQaoUck?qrufG|%U=tp!i
zDT<<dr{bPs>y(}@;Vse^tF)hZ&$Zgx-Wr`shzOFc*R!}gzBB(|rd~l$dBXZuhJQ)g
z)t=AW$tCu@c3H1g_eOxV7$9SNd!~QJ<;RaV<-5D9PWBll7n0w{)!yU*04ALkDnM6Z
zSWk13R$ad(Vq<1QI)f7BjveNrDJ`Yr(B=)f#XC+q4bt<9zkKAYsGL!r>6|n#xR%D}
zB}?hL^Eu_s&nA9J`G|f>O<;`^X#SsgA{4LTT<1VaEOwqSM3zH%-$D!cmH+9t_?kqW
z18Z9Z9>kmqHl6SeE~m;<`e$Q2UReAt&Q8J4MV^Cb#grEUzD3A^ascAOQwIc;`4Jte
z>OYjhf9VuR{ZSMBi`)-H^HS{`m@2FX0#yEdAkm!ysas9CNe74psOZn`ADXRO3LeF%
z{_GBTsDCLrm2hw)mtK)Dh5L%xxPoAC!oqbY`0XIiCw5n>XL%DTJcgY|Mfb3AX~B7L
z_p}391G<5(90o-Qmb~BP9Juh~g>|eMschOlSC^07HI9P{XyK!bB})c)^8-tgTe)-*
z{}cb4UKk6(XQ}jzoeB{oiTzHDyb7MIcl$-QgqUs1`AwNipQOxY<&40D5yn0*@nNkx
zlJntYM9d?_N?ffi<w|F3U8L3lwPpe3+?^C?d`*RgOT_J=PnRgd1BG2A=YPn%recLR
z4#tee@z+19#;(qHG7h<(1C8bA7F!RI7X{33HliKs8%VC6<nmHj26@jm28vI5es1$4
zF4iY6{43yhap9=Ba)(*5XElksjiJ4wL=R$QL*SsWDtJ=k0bg*HY#Iwd%IcGSqiI4>
z0}JX9!1XJPQFrznPL@BN6*C+aubbcc@bgO}{kkP(?+AyT39x5}+!;}oYLi&?FRe^a
zp@X^+uEH`2%up?~c#PCO3(WL~EcxiNUZnnvbr;Wo&uf~ew?x*$yDM`>3O<{>1pI`e
zJB093IEPa+=UEj-!eyL8cd`v$VbX9$&&{~8+9JYA*Fc2-rhFtXW75sI7ywnSeA&v`
zhWT>ql=77yY=J5*xLEG2#)u~~cK(!Yd~48VT{2s+1sC&io*zT1?BK>ZFf0%Jq-TWR
z$J!NjyY1%Cz%8(G1jQzdOlbgCcu2_@jn=1gE;)YLVO9OK#j|>$uBwN|g*{F5OM_nP
zCyGf(?M!=t4PG6amWWwJvsSO#0A^eAikNNrgnH!)(WAFKJ67`pJ_-t{YlWGEbZ9=G
zPn2!Nz{${Gql*WXp@@SAfeMH@)!K7l+C(jv{?VVf$6u|Wua5r)(1E|41CB<GDCoSO
z^KT+(HdI~?`2Me=Fn<HskXe-UZ1eC~9${s|i2_o=8Tj4*53BvwG6!B5GZ0sqk0ohe
z3P<}p!edn!H}{g9jR#xZ^>)fqwVIywNa`@E_|gJTO<_O#Iza7_YPdnNx`vydd8@Gz
znXX&5H+fhUHal#eT-&@<Ytj0cB7RN#DtPKoGXFCisaV>JIPfHb2`W1|xM3o;qI@Zr
z?zFE%9Mc(uSMWzKbNrYwmx(XpP1!y^cyQ?fNtcPhiO5hco$d{QNi~-oC*GH#p9ts9
zgMC-M$$hie+^(YhnY~^LyBMFD?LvE|OI$*KkX#NI{BaE?Cz`p-Pbj2KZ&jS(EX+Aw
z-Zhw|I%V`rR`Qh$-IA4XaNA2#z4hhcROhTBfI|jRx7_v1ZlbfkO^-fuxBknudR>=x
zZ_~QXM`^1&`;G+vQqiFyj(h%o`5t|MccTqM8`usVt3qlGTkXNX5+B5l)lLddqb%m%
zRNcp|h;GQE^m!S(4s+>LdXVCu)UfIX0pMQqN8jTPBbz&#C$*vrS^|W1<9_|D$eyb6
zP851GU!a|oBN413_nAWL1Kbk#pxlu&C+5&CdWpGnLdry9+x&GyJyf(jm9A9k<2pY-
ze*}Ly@O&%q8fLT+g~OY@$0h1^j<1JT@K5i$@uy9oE%%b{734h#O?^iLxeK5aeX#V$
zaI9OnAf_`;`8rO{Gly~Lw_ya9yH3UV?OoOgp`YYipl3rB+6%Wfg5)!EqL*o(w>^M_
zh$~-)+0!%Xsm$3<3Vto0Ioddwv=LFY84+I}X5Q`h{v4T4qAKY|xCOsiiyckIpr`u$
zT#JZ?{=-!5v>yGA#_Rx@XVuMZqCV`Y@T<19$!fy)H`L;oK9L+H_7P<!@<Vh0+Uh1c
zGOXe-UTi}_VfQwzD^l0RBAVM)yYMwpYb(4De@bZ&&?Ku;=E5K2bkMD5D!Q1)KJg1b
z*jnlg^^4}6elOPdHonj_)yDG4NeaJm?^6Rx#?)20%+sTGJioYUVerZN(i?R@ql^Vf
zH7-e{r#MNfF@~-WB&T*b_TSI}IBM_@aCQ{2%%5(;H88z_LMi&!q>R7r_MIyCr8+i>
zZ!lMXt!`8><q0z&<Mvq<>ak){xp=$d#-y~UNf*+tzr1v_B{0uTrqGR4;w_g~2j;Uj
zKzpjGy=EW8--sK-{F2IYu))$tNu2|<PUhU}<*&aU*Or)Ds0uXIHnhgE{?t#qYt`Rs
z_iZiY_N!+;z`P9)j-$-=NCW#J?D-B8rRyGfKK*Og)Q@$d{I1=UEYo=ct~3`$H2|SF
zWnLIPYCTRqClL?Lh1rj8JRp_j>@TE!Wq4Qf_Vv3%)V5V}x#Zgj1_ETWr6M`cg+~r^
z7Ey?%K|GsV*ml=Y&U<IOgRLb`mxH+TybqE?kPyZZG#vt#18iZ^^A@Mx0fBO^lfTFN
zz)oyQ^OkNTwpV6l8Sf?(8x48-Jp0K<2dZ|%rlZJ~Bf0unb6DCSG9l^oj>rPP-BPo%
z%?Qn?5y)frm=O~>s)ECdpz%$f%j1eB`SY;IZsbNznUl#!YvieZvu>>KG?Qe?W#Jy)
zxX@`%0Aq6jexJBH#L=?@+~Ne5{dli^*#TA)h|`2rPQB_|Oh-g3$4T6+`4owXS0}l2
zvjn#Nvj;XrWJd2CI36NmFoEa5SC&5zn6Tw@;5iYz`7d5DFz!|EhKB5tp%U5V@d{Yi
z@O&JitAl^ab+7)55|pLFP2AIoH$kGUHYH7#H0g#+T+W+k;Q2qLANY%kV^1SW>8GD_
z#x<ghFDwUOHvdH^`tQ(cFl)f?v4~kExL)2lur;(4q}=_dw)yt}*bTVv6C&qz`E6vP
zZq;4$iY=u?)x=sg`W|lCJ-ck%eCKio>uGsoRsqJ3fqu+p&Ii4}Yk_u~x6#7!V?QEK
zyZvl}>H@4Wjxvotv{3I9;R1buO8(5xNlgc4bKOBrf;GM1!}tH7Oa49hc2LQ94h&A=
z70-bh#H#q&E!@i>CGm$57Fm*eODu~5RoYQE<EjmTjUeI`E7|GjF%bR=?h)qH3X?Ut
z<g~;zb-4Q!wpX4ZD!Gjuy6Cvp`!>}h(qMwZCMiXgH;KlE!KsrWkO%&Fwu2vMgj(NE
zkji1(Y=MS<{g8=pfch6Lerq2c=vy1_#b~En?gng5FfpFsb1(J4MRED)MQ?pJye?)m
z-lyCeFEev^O?gm6&kgF~?<bhh<&$Ldocbd4nn8lC5Wn|TaKbo&3T`}AcTJgR5wE~(
z=|bScq({@o-p{kQon*e>IK>LHGZI2_!V+@i)z|WSyX^$W**=pk%B~FbM(1H>gDX4O
zEkvwozQT<@TH!)gA2ue;_a-~bw8Tlwmqcf5%Vh26rJ)L=Z%cO$*<I!glJmC-UcR)C
zm%$F85*fC<{mAoC{y(VoZX@V)(AVpxzKo~LC=@@~6%$?g30<o{bzE*2STG_&+&kw0
zHGvCOc?T!AS}toec@!nx$!_h!nPKL9a?ETM?RuH9^3C)7m`S0e+~gF|&zzc9GzH@U
zUr3)yunoHxeWZ*HEuMSDtbAt!r^^=hwa#JUvE6i>p-4kUc;7dtv=L@%-dfVyx3M?4
zID-^82(+*<<9h5;yLhnjmoiv7nj)zF^iflDolCIohYw+{)be*fmxC>w`9aZ7%WGkt
zBA_l<;;|pD3++Y&vp_w{=_!`-&!JU`D=)(KILm~K(Ulf#r(vvmOD;E$)Gn(2jEpyX
zhBO&5uu_#rFyr}g;aFk+cx(Mo37qC26wdZuH%~v{iTHfp6kgrU*Gp(m(6QqSS<PKO
zx?%pvB;UK<rGV!+o((IV6T2@xFMeTO{Wvl11+Hm9OSGlc=^XgB_54JL_T5{>-fJhL
zrn_B(z%jKR^TQV0+hSP0Vf~!#lY*YYEgCBw`!sdY)ga2t7Q?mrnG`o9`Y#B8H!_zg
zah?4giny;UW3=#_x;eXuXaY~Wve>Cz*^8~G!+H}!_Q~9RT`AI<j(4=Tt)H^@%Sr(D
z)ffeIe-VmHJkofXj;ww*Yy+E+yCg^$uRXx4pB37csL@D(gx=NYi{hE}zZW=GLVHw(
zK7n5jqqKU7cp+@PgRik?uuaS51Fk|zQ;BtnFz71w1VQPP;f7rKxuJb0+0w$YuR(`j
zkjL0i&0vN7B!^B>jjf+PJdiT6J3}bqaG!0<V4EP0A*8ztMC!%%%yvR+?f_4EtehFt
z`Dv~(uGo3U0EhcX=57QNTnC*<qrb9DZ@sux?c~?~dVMcJDr~y^d()|8AD3Rq^$X7n
z?qrV2Y)_>mp|nXE=%sKTn0$0)C|>*n%)eI!EZV03q5`Jh^_#mlBcbRiw9f3X7+*5;
zotcA&#oaYerrX#dIaEX|14y*NkzpNrXE+-pr@BiWOQ6`>$ZPxJyB)*Dw6?uz@`Z5@
z!^vx=6{0gR%NmjhIdw4_dsW)u{jd%NTqxFX0Bu}^Da1<`<S@XDbv$+F=Q)HP6smu$
z-Ik>GEr{G-Lf(l<m}F@%90I6vc?K{x-SVBGo;l3lwvkUwi557l$MP$bUn@Rz-f~H?
zdhmdW%^li%>4v}>Sd}A(oaiaP$9oecS+K)7P+SrEH?H!31Oh?2W)o6wj83(VOT3dJ
z^$OJS9bV<@EqXMWrjmPFWcNALI4&{JI|CaC)LlH$F@LYSGKObBo&(qxqK5ka<*AHe
z6kB(^%<=MrsSbA7-I=|ahuM@X6VMdx0z;~n8}~{{K%_4(<kB_#gO*YJ*$cVU#__?$
zhld%n*ipKw&t$H%Zs|ziY+!qd96TF-`!)snrR9?47gja)T$=UN<7BM6y*8W+vpqQQ
z`N~hYT5;7FRyfn&-{;3Cm)DywJ%T`*ZgaJIv-PDm%(xSDVj^6vY~FnES$dMQ)B6@R
z{?b7z<#g`05{_qy*EcTh80zp#fpQV{kYJ;`Kew}i#u=*<SE&8LcS9j>BDj(GR$xaD
z;5~bduvcRyrXkQAVOC0OS{=HN@1y}iFKtIPH@+{7lgREgV?ej#6;KK31sAy<)On}t
z*>MO(G<fo-=&!2MsenYKwyY84cw<~TY5>~zNKsd;gi2y;G1iDpJC`{rmy8{td7(-x
zkJ!D8lA?c7iIYXM)G%iT+(%lG<0>zmeW0&X<3H*?%N7&@YC@Ah(&+<ZG~cYd^5OR<
zyb>eLvwF%T#G~@#MO+13S)NQ^_NnuJ>R$cU*eLl;7ctf=|97MQS6*xxE*`z;L1gas
z*$6nd7aD3|(AhXuw(~TphattdD-G?QxTYGzk*yb}%1$++%1Z>3C<FPI;hK}ukuz;F
zEv&VuFV5f9zKWpQPwiS3x{KFyo8~yuBvbN8`{U@=?Z~uqi4pi^PDQOk_yZM6MT(Vn
zh~=#4;Z?Yrdq!tef9iLOVJWPDx69hSS;>fo)WAD0avd9QEq@|wS3i!$+2tahnd4f1
zn2dXg8r`p+-V+h4vuGBW<ZFGJMC5rzi8f=iWNW1<`D=~~<O({uR4tLax)}9)vlZ~0
z9_V<TBQB`ss+NQW&c`UrewueI(~f$;bx`Ru*e8|4i3(2S*=b-=>$nt0|AG2S6SlCo
zG$3CKebL2=&7j$|ZXm^!Fe?{B&WgR>!}ob|hmnCaQ&vn>6oRDaLI#`zvAkHew@z*-
zBG_t<bJKTWFPh$yep@sNm1dDSo|#ar=vFA=;x|*)=DdA-?J9+ZEta{9Gch<5ZB-dv
zhCs3D%6U<G$w7uA*!gJeoTwiU)Y?YUfDFsnNr)VysgHD6>E%>+kqx}kS+6lIdYJFz
zk-L3kIkPlsN!PJ2^OG3;yXwwYcVJ2J#b!|#K?n`pIe<y*Wle8v@rhMU)SIk0DxL6n
z;_>`=CaqSHO;1>tvqsUd_lLke(kSn}2hA@*3r^-)+4Y5UKjb|=A2%Lj7d}ni!AitW
zOQs}%uXS-40|{Z^e+dbb1o6i#T`)I^U|)608F}?F$f}CX!|$DNT=pkF1JYN30lB9#
z7iPDi-2<H&Kyg3Tysa@|^7{kbhZZHdx9{FL2yRc-m7uy05+Y_WX#Tk<6-!r^@vm$W
zmX8fY&VeJAaYyjKgDU?SZTNS)^y)wPjYgauH>HLVOKGWSRcy{C#zUN-%z9n{FNa2|
zEV`SWo(U(+KXfl1asN01+<JrOUfqwOJzkDsYvkd=?0j6Z36sj4OdX25071%*S2;Xq
z%=Gs*4PGc097=KokGCnI#^_;Mn)R6P;hv%@nv-Ql!}ZluyD3Xcq(AEURavxzg0B7t
zX~fQwP|tIT-X9H6cu*IzJ-Hzh0TYVTxm4UkQ&to~%5*`I82pgIS)l17h0_rv^;{OT
z%4C=ETqW8>tLCSe&b;Hw&8_lLidU5Y$65b<4CpHkf+kO2b*v~)>p#(&Ijo8OxapVj
z-um0ky>C_z-tx$PJG~O0eN9GY09qbI_cuoUUzzbzf%2O+EN!|gUK7fUHd`}(Szpgu
zwwbK9e>%A$-37)ju^G%SC^9usg{UzKm#yDzG6op&S25>6d>ePJBY`~=H4p+9Ei@nW
z7AyC(VYJ#-xEKyAG`mt0{2@)p-1b^3?K|aP9N(@|C}1;D4)KaU<@X?62s&k6oc+e=
z)!oBHcGtQ9R;=!ROG9(SBx>y^SnHLt1^;a-ZP_QPik<yLJv}g><loH*<8BMEL;L$9
zD7XAH$~Vf<R)$@oN7Pndq-y5o^9wO=em&Op;8#@hXfV9sPJeg2gA&#-W`e0GFSgwL
z=!u9%evjYhkMXsUNpEp2f?j%&%Cz_TSx4BraI4}q3EpeK!7c?#d;mfP+wTozz`RZj
zbImwK;mz&7?v+SSdsj;@)t8T?+};BF?q2jLNm+<GVbfo_3RIEz?eC<b%f+<!@d~Bp
z+f19;e09@OrA~@kC61N8PqR6yCf3X<pPP&}WRBE2fizxo(9J;PzJZo1cTQjUo=R??
z17O$6=bpP-7WLBt+2~R9F?#9xd~1o=+ql}H*_+~-LZiEUZ8-|?@T%NPMG>9$%bfHV
zw>o67v>!t4iY;)lS{+TX1g3I_UZ(c=FgBDH@`$lHbF|YzJMGq_MBBdi^Kr1E-Ki+7
zwYPi0{6~g#RR=b(35j!fbynN$S3YTJZ6?Ni=6{wk=N_o`6F(3a6d*~==nZ21lQ;jC
z+vtUHvgd#x+iK(4D{7T94Np$s$o#KC)W77-pG5fx_r^J(+KpFxqRYoBe$13H1#tB=
z+}u9FNQ{z>BlGKk75wCmHJvsvPW~8|h4nB_=ixlds(&Lo{KQpJtJ3MWU(?tUrR#}d
z@XlsA;d3s<0NMs=6{aWDH5PfH{4$d`UPNJX^WEd%v-k)V|G3~!-?IUJWo6v2XL<GF
zbI1LgYkO8){Ra164CvQ&xzYy~7=UwD1FMS^W6_A99Ok-Lv2q!r%3}|;IQrMDls9}Y
zD7hQR$Opl4No|mb6-%0AT<Cxk%ETimTcoOGoJ+1>qF94oZg(k2iCzOs9*WkLD2Dl1
zRDBiI`IJ4hE)<ue_gOoa%Dge54Uz{dx`Q#UM=MZsd-PT>>g0c0xXYh{w;;V)CWo41
zefTtVV?M2)B?_?Ngbiq6gd#VvboJi{ITgK+k7ouqEUhhv(6^wdd+`T@{K8j1bGT{)
zNubD;9MWyw5=3_oE<#icZ|4L46br6P%Mi?La;ut411G*#{1N~0M3S$TG@dsEphN%X
zi3O)%XM<*YcU{o5RA-(xY@37P$1*j1lD-PqjwSSs;p0pls-jHLldR&+3B5}<H9_J*
za5qTY;c3(UbMo{`$3~U8-y5&&Br~s9#jG3O{%99HF{xdMY+dzyTCAf1Z2!3yz(Z|X
z=YWtt<|kqB9Ej^YTty85+sZ=bFyv_u!~r84)mc>$ICU`{8D{L_<wG08G!>A-mY}_a
z3JF);98ME*VE_T^2>YB%u*88Jy1jbHMdw-b_@$gmXFp4iZB|O#;`%l=nyRywuPm%f
z--nTABtm3dPhF9K;>oIbZw`Z#4NoPWOQladdWmhkOp)!wj~f;^{w|3hUqq)HTf$zp
z_aN++>U3+C7;@hbBaEpjfZQVg@z)kiWzRkpghM6l2*G3tQ-!rYOgrgqXpv~G_WhnP
z_Z&UXXK8b{;efT#I6=xw_?cxfF=8;O;>c~=CqME#GPL88&PN=VUT6K3my+l9Q?4d~
zj7hGvw;~zhOBTK9jFgu~D(KFto9-piS^qq4n5k1C8(!^X#$S<<_byp!JqIqqtZ#XB
zDgKVn`l!xtu_&JOeiD`69Nt}$CDF#V(8PN&@Frmb!hQ&eL@?Q5(t8xRL>)d6*wfAd
zlTJO~EAn~^7+t}t#u!~A?$thq2;a~*6Y8(kmF0k=oLChmxZaADdKZ!+w2jRPt!LZs
z*NfDvGs$S4D&HOPmX(Ey)RZyHEH;!iJrokTL4OxGSX4RwoITuKvYHqLp<eDmN`DWR
zMlgNzm0$4jk?hEP;Rb!-!pEMUz<02ywKljRSpy$i%XtNGB6g!?DxtoGo67W<u_#47
zXz8ZkGj~Vjn~4UK&#a7fY$Mc*1)GE?6q$S{d<9?MuK5)_%gZK}gKV^{pDTgi!CBBp
zdP+^Vspl=)nEzOI^X#11-K!A0_aY!$>3EFgb^o9)f10I~_JyawuI+IFHZtsR+Kmm*
z!J5%u<dVN_&hyp*&C)EJ)`%?6&fXG<Cv~A65l6UwVPEEQ1N6SW0>4KjB0DlG23Men
zPcv1pCRpt4iOW;F(h=FzZ`^jnduNkJW;XkHwWal6^;4e(9b`Lv27UX{$=F#P$lJA|
zzvHGbis{cstJDYNyDASW)mh*FQnVjXq;T@0mh1jYK+#|GtOEX^bjv(FZlLS)Qw$>V
z@wltkqAr2yhU?>4!>O`;j@-u68_Z`}ZYsVl+BH!evNzeMNuXNCVYt{bT+SzxB_7PJ
zYK&HtqDzV6gKeD>b?K!d+N`t!vuE`wf)<YJ86%2@B0IgBvQa&|yq9K5PaorYdadcO
zan^K1ogMlZD-&9{mC@e1)o~-yv@U@wcA3vq>`KgZfzN~89?g)4zYYXeljv&ioK>|c
zeZ=1j{dCh5=f#)VJB5=AZ;au~F^)9~9npCfAUkBnIyM%KG8^6(a(VNb%N_X5_orO>
zUoqz7=sDqcClv*KSc4s&_@n8W7rTyQMZvm{{F~n|F^|6NvuzBM=*qCyK9C794d=Zy
z1((ED_8J{m#$}77EbAqabTDE17G@gmN(?P7RSajYz6+t4Td`BHrszk5Hm!LuvyqB|
zg?)KABh@KSs65j>Z@+*tZ_H&S_b8IYp&+T^f3+xG-fPESiYkkvs;T<SdqiqLk^Lj5
zcHq-KgH1<u0H(30YP<5?wl}M;UBH;7<^0Z@KzdaO7hz7N+GJoXZb=H06Gph;ET5f;
zs-$r+)@OvZ_IoV*a)#cZ=6>s;bZBkf&++z6X)UiohxG<<k~25lqr&0^brKDf!K>C{
z%fENaLYbxv>uX$K`_jI<$={n_n*N&Xa&d1cz434`d)Jh0VH$Lp^=I_|?~|8(9r{Q*
z>G5_%S0pZN6l<<4T@((#7o%)e=2~&b+lPLPd}bRSu~}_p#XK;Q%Tx4c)c&usFD$Us
zpEwb{N_?~TTV|x8Jo|bbu|`w%O2W?)wuiQF3<>B<@W>1)Mj7AnwDTs@1MG0jD8a-Y
zEaha|6SP9JU`P8`X<s+HMxU8|u}2rjyc$@5_x-`Fj7(PnC@%oWh_+D{nBz631FsBt
zz`XSIHEPD~!?OZM8;++IzHkZeE?lZ#2ukUFEC%qVfW8|LhLzc-o?ouUj1dt7m~Y2`
zN8!?flh4eAKB8#YS7{Qdy|LIJqVIJp#uVN>@$9*7ifPdEg}|{T_Vm6<Sjv1aL8x}h
z_2@d(!2q+vRvj-%AS|(?W6+N$-VRdIDCC=^2}xX+ml)(_mmmygley6^;Vd3t5pmL@
zA~@ImZh58l#r0NN@ZF|^-XybRK;#7tl|a)?ZB{|6Z=ZQNRajK)bEx1ZnZ5SJ+*E?d
zc)6l&;XolQVpy}aF^2p8t<b1vsb3<rbQqLn?+ms-GF0QEtphE=V!Px`qTAF^HcU1P
zRs$X=?%M%(`!0Ji4vI8*AaA^45vWY}xD2|R)pehU$<guH%;liI&V(1j+zgs#Tz90h
zsLe2V9l=R{9k&vFH>uIls4_vV`ndj06E`8!l^|+k8&DnQa!zk2qoNbqe^WY3!Ay72
z{NP)Y3se6E<ExJoNE5}@V>D~zp4X;;04DuY6&TY2m#&_s-ra-A*<ShOC?wL!R$@W#
zqna<-v?LJp#@lABj)eV*<d@JUFzeW{H6}xy=|{E<HZe-5hA<F$zj=CZH`{_*Pxe6w
zf!Z|Ao%fk<`4gtL;EiMh@Ri$1KrLRZ9ZSD!<c9Sz!0h%VWTD)sv-n02EQ;SsTMQQ1
z>lg}nd$CUF`7$`&W^xlmQpJye#^HLn>xJ-pUqy}@u!+%lX$16x^=7Jm-k58=Pmxuf
zo>E&jTr!-ri-Sp%>0uZ0N2)516G~-{Y3%-2ync~0K@uwryM3^)7O|`u@*7NEY5T#+
zn011B<?&LPeSIn{7|nEL{4|{kkQdcG*2C~r7Ju@hXTfz?kq;Y9ANfP*s5hDK*EUb{
zHb?9XG%za9TFi%hoVpR$e9%pImE>yYvwfQS`1Q0xtoVR@A`&ml7M`XXU3YlHOkMKJ
zb6f3lCg~J5!$%ESr=MTxh9g0fn-CJuF*@9PrNH5VB}u5V(%n@Hc~E<Zl8oM6FNE^e
z%q?SMNgaV&sVg@8UtXelUj)(X{TURZAJa!#9A}>8so2>t<E?O^m|4{K2)NeDFT!QB
zIp-6Tt(QNyPN1lO3%yoPlS#T)pmZ<6@dY89?BLDLZ?mgKy^ur*T_M({k9j3a!7}Ym
zh7yHZ3ZmZWI3cKiwtBQLdF^x)K-ymIihg<#y!p5GKgpjm{vS{Ngc|i{3u5>3eNg&g
z=@RA^NV&#oyp>X8j*}DbtrThj0nx?J>$*Ql#cmK>^5>K2GP?pnnAxCRvCHG-92G~D
ztO5Fl%?Ozj!*^G0<Tq@8-aS<XD+&jFWO?)u@bvuq@(j>=XDwIeeh3d2R$=I17~R*5
zeORNi<S<=f*Un#p3LIzAbZ2=;M@&Q7CT5#aq`@Cy#p5azeOJc90xw`maKuDQR^*3r
zM0t6E&&_!=(}1_u*Pp$SvWBNvcSR&yJz_GdPPhQ9NRP#<Fe_8z@*m+`%2bz`>sb->
zI;hj;C0enKhhN>qMDVQx5fEPX!iMDyEh_T(&vPx3rX)F0osN|;<>()7R0AmP>$lq`
zp3OdQGjidnKhCPL*fLnQ&~vzHhJGkvskzdk=@X%KPfVM%=(rfW9v}0i2+yzQuRjIp
zZh`LQiASwAwluZRpzAH%#K`IozKNW?5Gkm8@+kCuqeL+|Ar(6?U@eG^c#VqHr=0r+
z3!zlhDY9{*uOOeg(pJT)$IR7PEwv$|n>+M_lVzc6^(FZ@oj-w#&9Hj>aDJZ&rrzZz
zI1qI-e0H<vBz;Lv=C@d$gB?>nF?2s)R1-hRf9>EY35Dc4dBSekE6}Dv;JEz#lG9sD
zgmH73WPxgn&6CCnr+zPl<lbUI)O=aVYcHqOt2N?7L+W(2fQJoZhdAs<7jii`5CVI7
zFl*M?FoRbqf-PHSFFs;)yP_Q?(Vr{4pr4-Zh`#!Y36#dCs>Jq*M|Ao~g@t&Z`N9VJ
zM-OM4J#wYL*siKISd90x0lBS12H^KP*eu%KR+ak;aejy44>4_lvN${RG6k{hAl~gx
z$5d}JlR$i7@VGo<2!{@_Qihb|Du8>0ZdUZgbaoMLy$KbHs-smgeh!ekicT7R+B(?;
zm;5Vh{iS;{f!oC*G;!i9FVNij4<~lSrFFm6L9ggNJrWhsIP8j%?j3z(8_pu|di15C
z3>C*v39xYvJQI%+LTi0mlr?L?Mcb`H?@vAWDs`r1+|U$ZBk8zrKi78WZvQ=6@JDa0
zxWrc>s=Qx4e=NsBxD_9**f79)dNY*6SI*dUT56}HMD15~(hmG*TEu;%jw@2-#ph-O
z;%+6r95S=HUI!k-vtV^s`@SbeeyXH^<@d-3Pr9}lLGS`gSF19P{WGm(prVg&r%inl
zcx_?i;v^uzJ=6DD^tC&Uu6{hx7so#-w}cH@nIp&UOiLw(dx(XCTJ@_Jw|TBN*R(qB
zJ#EsL&<=S#Fib6laL}?!{#+a66$rr%pK9SsdpTJmGR1o`6~x(Gc&<;@9p`9wq*+Nz
z#SZ2rI5jL5H~&myq7qN7YH}Y1B?MjFw5el@lisIwfiUyW)Va|}U{pfk8UnHzUVHV-
z`o*F+7X^zu?TzG4S4{ci^5`!MS6tfp1}rlKPAo&(uBf0mXW)Vb3$UJ^4#qmHOuKC%
zOxJ9>{D``>#I>zlX34_jwFp&Vo^f-N?}IGDBV?H>#q{rh<@p`a%8oG+h*P-WL7`P8
z&pL$*U9SVF-#A&!KS3?Py^7o`%QSHHnNG(sFm6mM^3nLw80Y+)&D@uynqI=iaWxJ@
z4t@6_^=m-L1@a&TCH<Aio&bH-2`R{q&hz;m8AhiJu_8Mk(T<`bi{6jL$=3XopSrKC
zi#@{rjNl*z3s~aqanGM79M2JiV7}zDt&I$3xRAM*flimt7R%AW!&3H?fTf5LF^{*g
z3&PmOg5@0D*ze}t@kzAh4IxZwuQj$$ZSeDPm6|<rXh>&Qwk(bhp}XM-F8otJ{H2h-
zP~i$KD=%zoYf8AltC?nI=U&2jb<kcY2p~;y1drTn`QCqkwa8p#Mh*Vb8&a5G#=UaL
zxf-WbAAHI8Ez`oYR|k32f}bf0osix+`CG>=n#oVxY;A3+EX<Mfcd9i=wet+6Gyvg`
zK_o%MB5GAu$A5%o>BY|8*xBJIR~Grq*3wpc8bnd0A5!;Yvt&%F-&&-8PMPcaUJv-i
z6S;Z-e;a3)4_D`WwBBFxaWi8y*!{#@v%cVdvbE{wO+(efiB(TeL36nWtIw}pe{BdT
zjtxL+FMpM~P=1)hh6^>s@)V-8=uKzf`o&AV8JQN}^fxs5ByLw}@(tIy7hLE0{_<h^
z%THYD$F`W#@U~WLQLNZe$^QNkt-e`?i@MK8rNxxFyQ#shmmjiXyQ8WGSjPk<5;NV&
z!z*|8S4#DSziTHxoEecd&x!CdvCro2wxZoxnov8auKyCI{J3A+0cphVNREhAv4>FL
zO6r>jV%se87hTFr>t@aE7TY|CkztlH6TT}jF1JAkcEgvNzvKie#wlly{WT`*ETnOb
ztENZvG}TrV#*Lv`4Na4~8I07(zKb%H&oPpuL%P%}?^%Q&X#<<}m0>M1wHSXxoUBY`
zsjhAD<}9b~wrn~P=x%xV*wtKXeVuOp=cfg7VtqJh7}j`fi_^je((?BrSOZyLx82x8
ze!J$E+H@6|>x+KvtGW$c^38J+Y*0|FDG#v)1zLaNjtqu#`R2hw`>jR4BKeDPl_qRl
zd{2kTvf8deq3i-QDajm}2|Pa5w?kAFgn_Hz99-ldsdF%J?~>HC&;itwmgnSQw*H<L
zDLudJ%Eb&|6n7Ifyib9LU}VGURc`JQL)dj+<X1;d3D0G!`{!$yg*{ZciA&%Qeg9Vu
z2?CwN3N!hHWEqQ5bU{O}_xzSl8>`!vFWm7?t8(+Yvn&$+z(;`?i_it+_=4p0Bm5k?
zzBs5)91)X+G6*?kj(&Rm`Nz~*%)Lz3rf(|>C9NnVMkYp<Y$Ztamdu2RK+zCxxG-?c
z!?{H{AB*Vo(~9tl=1&>B*XhPGj5_^=wkKOjKvSYJOoyRYl=?hEG#$aR7H8>!+%AEF
zn1IFhSf}hun8e7VfGflLlj3`aTQe3vV&%;3n(Nc#d@Y>9f~tQi9++mo1ifFTML4|0
ztJB4j`blwq2zSYI4ArTfS!+C%GL}_An%`qG*ATAQTo2!f^>e-U9k}PlS4E-o0j`8|
zLG9Y8*H6`*`a^Yf(@nm57IXeK63!gN5C3}2G3X_^5I;v|Fcj>)t^s(Y=hS@fx;{^P
z{f~?*Tf@3o_1Jrn`Y!a0&fYKQE3&wIy-m}o!(Q6zwgyo?)qz(FJ`AEZAKO_a#*Pfk
z=5e7KUymf5oM-3-zeh{U5eqma>pJZ_lJ}^WRe7}dxKf3$J|ziyp~@4DKoo6p2J{z|
zELqH%&fWgGV5!eHaFLNO&B6OQ*&_oTffY~&P)&3-O**DUq=r3wO04IRs@(jNW5l}G
ztPsBSC=Po&1BDb|?O!A_$%mp+CqL1m`J=46Gu`uFV(5t_iZLpZW~{eTJs4ya6Qc4<
z><uKfFM^c%dRdcnyu1|Md#E@BQZ%WGbDvcch1y!GDp`9ko4b~mi=v0RBqe`}3{6GB
zvm5*&7uFb6nf{~bm*)}2jc`|d!0q%ZE8=&|rqsWbhfzRQ5#h>o*!9JR+G6B_({Dl!
zM@)~+WCiD#QLW+22V%@LwxS{99h*5|zZ>Wvxo=0yu{pGBx(Ifsm&D4%9@KHXhen<0
zk;}uKS#Rot1<;c2Jqabzput@Fd$lbwzbC1`8QUeGrC+l;T3;in`b?7m=Jyz?BIO}!
zlq>&Cj$hyA439{C;_-32vEkU;*uA~)%a`8RO9s6$W{ZnaRJkNNLI8;#;M?Btx&-c+
zp=8^aVK<Yr;#go-_l0WjO2n$E@XJQ?uzO$uxgIUY6&b+y)6>gQ;ORfn*}s$62g?+l
zcwQA&yza{NIV)mnFNcm$tfI2TnfYDTt4G&0<WOO)Vj<ZUyB>_mTx7rzk^TSf{(t*+
zupCD$r=(2CN9~6+Fm-046^f6P90w*v=|*7%%_edymMX7Z63S{vOwKG={-%|`3RPV`
z`;a?cgyBgeT)}=l39TEpVcL?db@85ITqsXfa~F(Wmn_wwd1|^kGFhq_Ussc?dhr}^
z*pFetMQS%q!u3~Ol)Uz&&+ZR*`}GXawsi<j?xtOlbxiuGaiu?bRq}Q$S5#;*C>(Gn
z0<O|IG&ucLnIg0%E-M$tY}Jk_%uW{xRGj2*CgNyOWhwrR7XD`N%|f$@jn{VZmr`mz
z-IIiW2vY1Z#S%(fR^EPZn#Sv4=6jKDJzuE4qfA0}y<(eeTc__ap}#^0jY=f4c!SYd
zmIHMC&!SqT0as^bW=ww$mhZGzzN5O_>`rGLZFN(IKJ)T`8<!61+lVc>du)quiZnY1
zO)`C|vjN46)4SX9TA>}1F#mzsmh`CdN|6p$9L8y@z}Zmih0?TM;oW2li&idQSH;Tu
zyNs*uful9@RgJfUc!=>!9$chhW^W+*I6-ttH2k;{v!}}y4;diV*Cjt~pRuc-&R8~V
zzT-m|OQl^6rg?s<N?J2c@d%yB+!2Xo9aC$@6`^+(`eq;(;VOBiI8RsvHpPv4zy6`G
zDDt*Ygn?N>Thh(8k@30$W^TaOH4kkQZLNY+S#1~ZwU#)&-|mYU=t~cj8Hd7cK5nzQ
zJ5xHFv#(X1inwo)%MGMd3FL|KIbAI84mkw<zKn|Y)FGw<eL<C*oa$?*^pjV$pZYHc
zd|2pLYz`dX*;QWe@r@g?QHW*oP=)Y2rW38&I#Golf-+_~zRjcpm($xJiVH5N$V8M`
zT7J{#e(ypo!4u4Hj!_ZQrG8uyQ=z#o@-Q6WarN3Y>Q|vVdQuM8mCuPR^IWENwicA>
zQ2cr%S9T;(Zq6YlCX})?=h?`RSu2{+#Q5t)Q-X?!G!Wo`%^C1>>ltIjU&9h>4;b@c
z8ui1i0TvIMr)gx!uS|;5b|OF6e=``W-j%$>A@g|ex++9sbNM1(0S(n0FvR6xSNoSZ
z#w+2XXj&tk6bPgB){++*7i*C44WIDAA}sh3Up=&2HS?Y;KS=eu3y<V%n-(gQ6}Pxr
z$>?Omsvip{t-`5EQ|31Kvu0|gq1B2=_%g)hDwcFp!t>KqMC5lElPK%mqSCE}u%r2f
zU8_(>b68fJ(|cz>tJ#D~F}I!pH_zj=pH9^S+zlzHaxBT6C3rPWwZ{f05Zva!$|W^%
zSV;-js+;QTj8qisr2jIOAT2GsFW~ITqi{jAg3Vm?#fF-&4nMo0^rtZR8u*asKVX8t
ztfe4^5o@v3{VkB#Lva!q&uRB>y`{puAIg_=9%2QU_3V?jb{#(e)w+zv9<WP8W?@du
zS&8oedTC4%(K(H7v57uHFj(0*A_u~bRCp@lx#O;&_qJ@)Z-;DIkFI^Lj=Gai&6fN#
z(qW;nVVq3C4T(U9aoU705yZ(B7vXot5A|F$n(7*wEUbtiZG7RIgp-@qEmYzxDR*QD
zE<JqEM1@x*V^Ijl<yd*8ti2qv&ajPk%xF=CnTua}N0!pGWX}_pccm{_-=*K);_dsM
zu2@|^(U@}MgPdYl5Umvj5v(V!_!y@r1u$9d%ul;$Q@oTI=f=#nHPL5{SGMH}I?OCl
zjpbD}BIAiSbnv_+`~513fsz=uUUn%2?Uh*n{H@szp4u7c+LV|;7F|)GXw7T{EVuD8
zw-uw|`a)>4EtzT1%RmL(EY`;qXG5&zbj%jT=trX3^<Hwb4)2^$nAJdKjVw&O&v3dS
zpDUhGRsEtDa5>%61003=-J+9RGL}Q(DhQMd*AEKp{d$iQmul+;<QKZP(q(~nBr<cy
z$z>JZhQ%d)tl)z#0A`rPU23d)pWjDV&Z$sO0}lg&8RkRWRAA1-Y=QeB!r2k&wUu0<
zh+?Xf+y;6767tLAxGnh!qBBkoiC5H835TnQ)w&c7_}^%%Z%CjNx{)t+!q`oH%S0HI
zDn#XHqRAT4`{?0(1Hu0azLkCU<7U^F>$|wtOPzQjf;@4<7;#nNPBRg11ocYwQ<-!;
z9)Tm)-LECsR6HbToSXxn=yCdZFXCP^*1^P*eVS!_q}DN1{MR1|Ff7ERWr|s01m7t!
zE1BXSo6G;3)<S>0R!JtN@nFCN=5v4>F`k}_K&cZ$-IpiMGIa@oDtI!)f!fhf&gYJE
z00$y!v~wV#b5nO*1(TO~4kT?7z7(DVDi8um=P%yh>-uXp2>c&g<(vZpHiv?n5S#(A
zGU&4>!hZ4`$Vog0KsNZKfR{xF;++IykvL+t@;B2VJ^m^YI9`)isdYk(4V(i(l>}|&
zW}{zh#4LAU&1<WJ7uMQmY)%9}<xulEbemsjmvZtxG~bGWYC!y!tg7qIPF~%yzu?X!
zcX4#-dB_5?Fl~z9^JhCM>%=aBEB<qz&Vj#ro(f6@J9|dyM?tuF6LG)|nXBvkPTXMT
zlgT+C0I@8=$L<pwhG24VKL}y%U}7#SN9QlP#2uu~Pb_h)uM;a3>Hqi557+{260=JL
zqo*)|$=vl{&k9r9IM-@EXT!HHJdStHt9nMk6$viCW>@#?*{ZB(CeEMO;Ts~)_EKZM
z7Iyl$4&<98`ThIO6PLz%=}FaBvv{Z->p7&_Ep%J|M|<BH7R9!0+r&nRA~{Qx43Y$z
z21Js8AW=X-R3wQ60YRE>1WA$v1q78KIg8}j<cxqs$x$+NBcXw&eWiQfz1{ox_C5EU
z^WFE}ediZnRZ-RTRn3}ft}(_OOGj62_?*&?{Xs{5-g$ksrSm(goOe!f#$E5k>jf##
zDzYF$N7hx)#K~-cqK<}hok=nIv$t}0NMQ<$Kq`IOyi~%8aeQT+Wk84QCacrm(l}uG
z1qAad9Q<*iwkF7D^U{%3@v59os)~2+G>xsfqlc%Fh!N|jm$~XpnnK~xZCfdT6ps={
zpKT|agDGl_r?S9Qv=q=J=J(5M5|z3J?Fcy~#9p6h;C^l|kTI>9Ay6QA9<-7DYq|DQ
z(CZJP4hlKMlO+6V^drpt@|Bu+1Ff<lI_Y7N-e)z}pJ+YUSCrV8c2NSwcTbB^Jm>{^
zO5$}nu#-b%bF6f;Z>(7gYToC*AUA<2G4kio>?CD*csfLZR*B$U3dLK7IFc9|5o4*-
zq*b0AWNs}nx>d50pd(-+$q3_ln-zWsJ>JN-Ut4|4UYPHt=Sc#Oy^-hchlATmrFbR(
zL0kPQ+5K4=j~6_TB=F*Ek8`n~zKDW{&I_8`2ewc-jKgGHTh9}oqO~zT-9UXo_OqW5
z<!Q`K&1FjR)myq1<hqElg}w>GTjH*-5d|dp&c_o<jO{ddv9_qZIe4;df&D%^Iab)o
zasHCIsXr^*6xUEF_j9*YacM1?nLdMcC8{Yb8%7iHb`{QCk1?NPe#3V;I&)BnXFxb<
zymf*nk>sgU$m`d!px6DxQCJR3Vm3i5HcFxuE7Wstb{h-ZV>$K;p^NQtBGQMC#oMOF
zT0w5fbP(FP^+RE-sVj=B=_`mzWKP8iv0vk3kW&!JuvzwHo?rgH;#!iOiTy2x@-u#v
z;MEyCI2SvWS6DM3W-6Ob&2Jlj7&_uhabAj%fHHAZKj`|1iw1%C)!J~DGXYA>N6Afy
zz{HU^#8LS$DOABpV8KA0|H!ghc^pF_Nn^sbXSp3p1TKsRDdcxi6+vv8pAi)HPCUK4
zX6ak^7I@7&VlK{C-i}H%)UY;s@I|1-kBa6dfxh3xzOXE3e~UUP3l6~fLT$Qcr|%$k
zRrG}C^~;%Aq|sqo{IP}4jo>#u?2c*AWCJ$c-N0hLg^^01Da!^rSQ_`MWeHwFN7XqV
zfX7!BDQ+HHusr^&fBi602x-jVE38Wc+Gv`lgPAWO=HhWNjq+X4yf5nICf#haKg|1V
z(G@tVbuD2D)1fj86>YRro(_AEh`TwW)L}<^4%WfpN3b7~eDCh{bYd1x&2zf^HsSbD
zLG|M6)2yX~r8DEC&Ej4X`Zb}_j%T`x-rl&<3^lmQ+WPt=TtfCz`#p)tSpdxHIOk`>
z5zxU^2NM3nSI+jZ2y5}w*9-y1iVPs<E}*%|ZdFm;ASvzTZ{#|GqKL1DaJ{^3Dk4j$
zS|J3Igwdk*o#^%B+n{12@hz5OH5Jk&4hY*Fj<Slo6eHz&bj~FWrh4xtcQkH?cXJ0x
zj^lmm|1Ev}3`WWQ{qXOS_djwhzifG%G$IerBUzCfDW+dRYl12L;6I$}2EBl>noDM!
zo>-45+lroNrafg_xO(;^5l2KA%?P7S6pGQ75Q?UD(@O+>ZV!j@Vx^3u=I@3A1E^)-
z=|!(s&BoNs*`pV~g0u(eVXyfa<344OsfY&=0kn<pToDb%xy=m1#(_$zv(C4#U_4`C
zdbQ*-AmyJ-_fziR%H{|NP-1$4*Mhw*4+Bd)a(JW(^kW6qJz}&v9`fb+3Q8||&WZ)E
zAthZD9iU~qkl^SZb_Zzbw=l~-`W|9w>)*pGs^D8_{S$}8jgMyQ+9Ipgi|pBttcY|p
z#$N+401)bt)U^x14fyQA9`By}C28SP)6m@lv3!GF??d?<lc3SN$&W|k9z9`6$BEXf
z`O*ysI9cQ839eCxv&ZpXS0<F~YPJTgKxk2YDkzVoz7XE(no)>ucFVgpD96yJJGE7+
zi)uAW8t1^DsV?PE-7>%b`7Q^m(QrZ8X6cAYIr@hAq}qTMO*SjH8iS1ugi4tsV8jp7
zj-zo;qIPvf>9&nd6JM6KplXn!q)dXZyo-J@!FNm-GeMCKpeQ5|Z*)o1<1}=8V=0=v
z_I8ANqglQ|9OF{@`#LdAdPEGr2nE>SHHgT0+<CUwPkDak>?B5N%qG4y(PBEJct`z$
zMLUacbr455l?F#Y*gOrUi?N^Q<p<;mA#lc_F=~>M;eyW`kx18GkUOc*T{PpzExK?7
z!!ug!3c*=ZOeiqjZjL^zKZhO-E-fXHFY>s^!EjV_8nnoBk_iVZ^pu;?M>%12Jx;m%
zAV|k29b<xTY+Wkn=O-iKvR{UHq>%KrY(K5%Q%a_U6R$~UFFwx3^j!<nC?{?2dDkQ&
zGWO+wJ4h%xqWiTU2zw4~8{YQ{{e0eHtH5Dl8Tjeo+rG@2T<o6uF$z-+8(jx%2H~Hn
zPMLvrEl8F7Yemz|Fo<S3;h^Y@v9@z%#l_`XK3-Jrh7lkmwgx;U`jsCb5m{YU7$3$I
zt+JtrDuFV28MGolB{`2FE8HC&0g>zBD_Ec42enT%-8vJlZ^DxN{ba2$ceX#>#zkb!
znc&V;1C<;j(Cf%3zkeNYpt8pp0j1piH(x<*c0i#SboC@oab+A%i<o21LZB0Y9+c-u
zo2Vn{<UlKo+UgAUE?`sj@2mZHKetUFb00J)lDp@)iv~l%Ks*Jyf7)W&GFLdO4VZrL
z*%;ZIT{JSRGE@~(b9oB52CV?F8Huy#z1?-qqCL_|Cnu+Yj70nNQ!IAGT7csSK@0%z
zy@mH%GL61gM<JVVe3&Ps0%5)Qt!jzNILRTABxcPQ9#HM~RN`$KgdkmJETVcsxTwL2
zX`I*k#SqLP`a|CH<Tz&2=UkC83rVG*fcfPp_lt?ntS|z!#cPr-rfsFlvXMDG<<6bA
z9yRxFW1J?kQ^@;Bu=W_!g*O^2P*!w7z`C8krlfh@h*|m-HRYJDS)L{e<RkHZphF)p
zBsfpthn8uqBQP8;gqhB<UO8@r3sl*7@4cbyjwoVOJ}t`OVJ9*QW{f4xK8_3~1lb^4
zlG(73hr#m)0pnjZE_r(8HH{2fnC03A)xKliDxl0$-w!b|tc_{82%@X!!GtNJb(@su
zbxGGfnd)PGy)$&}eXZT%>AtjikisP0fHvbvYQcdn#Pq`VLIVI}4<7Dd2bPn8kIZqe
zC}#<B_2#kFu~h*}Do{&&+n4qNC_NCH@KU#6@p(A&$y=C_FJ(Yk-&qWS`XEGc!Rhj9
zIXxYm$ET*_jv&8U;<T2A;klj2`55TrqCcKg`{^Cn`xo~zGQWZ{q6Y*tE%<VG0CPC!
z8TbI4C|;#z7$*FXw@9kwdbN2sQR}m+@~WE1$r}w{E)${xcrQmgtN6)~Hw@RRLe1MP
zYAZaE38!Ba+0$iU4rhG@wdJBh%f#*4^@ca;1O**yKA*8ah%wi3<%lF7Xv?7Q@wDP+
zM1Q_GzHw4_F|WI?=F>z47PRknzq=bPy#W{WVmB}c4`P+#2@Hla`!^w#HoWFzF{a$%
z<aPKeT-%E0^ccj+(#LbajtW!tcPcyY*Y+q_EjYymqF?D^baGZcwmRpm>P;)>A1-v#
z$8@_Co5?vHYGvg26x9$G_TZa{3*<`0@>ZCi9KneMoCcS*{k!)AQFqMF55ulWjvE$a
zuX{Q=ITN)W7u3JX*0JT8OuBrzm2xZg(<Gs;82|0?7kN#iajy<HHeK8^d_+ZCZ%m+&
ztrmQ(@C#`AfZcWT-f8y8k5=W(-l}yUb=gq2U!+9%F|>_1HAgo{xxBWOAuQ*;=KA#c
zn&&SsvY<)>s2sn7%8mxQfa(#sk2I52c^O(LvnI#61q{rJM;1gjmeOg8sO}vbgNHfn
zKm|Gl(^^O-!bmS@mZEK+?!IALu&fW${+L=<8_6iTwRkg!>~ss`jW~7zfA794n6y=$
zE})&GFe7KRTIxl0TyeJf^-m2o8I@HDw<5*YdRjoxQoLn1m@g``0+MLelt&7Ta*v)$
zMxRYd@s(qwa^oAi5q2FlNlPtR=h@bBMu`f5HgeJIEtf=a@(%lt0s~;(vzt7NOq@|$
z7Bzv-4YphISO{-R&-pok_DDGVM@TlEtRoq$m04WdQjKWHh(InNmRo+@zcL2vN-Rlb
zsQ5XPkg0SuWwYXGhscH?$0@pTH)RG{k>G4zKSi;6ajfsjYZ&FR?+TJ~6Ap%5U?+vh
zZO$QS6|tOVu0cANj9W(Yr%mubsIb^_#GaYL!<FiRSIeVO=CjMepV*Z!I<vh}X$&g1
zDdSCswvc<uk(!DTAW!jj7V-RRoNqZ!=ca-1fclc+e@A1S{y9_dD`D^-e+UB6RxLUC
zdkK&;v#h6Tb>_>I2F*jYrRB`t2Wg6K`M>iM6EK!CNoc;W@rj_?@9rloT`8JqNN&5$
z$qIM2;3_{?rGMAj)U5%@Rg-5<_hj{r>TTlYvJ%X<I?{lq(KwJEtrGC`Qiy=t6oXmK
zDiXq!1_!U8`<}QXsLa%GcU;ztuT2W+wz0|t6h8(9-Jnw9Kr6B60>*#T7)|YD61d=c
zwE#bnB!WtQ{pcD%G|*Fsa<cljx`(8_r2>5eh)8YQ*+@@jfNtG`k@C>SQS5n}k<tys
zqwD%obpZ;xTil_<4|N__`aQ%kplyR7j_ImJ7?l<|&$7%uT;W>V5?{z`e6=NR{0kT4
zB6sl$l-Rh?HFJsqLhd!&A!bAc9|8(=b5=_nbTe*9l`>VyyyKTCUq5~o-!`eea#t@!
zFuMa9r(YEAzlTj|e<^Rh1Z2VHll-I6F}GxSmt#{P?}@NG0>yA(^QP@cq+Zn`g1jEj
zOR$Lz9IQ12IX4#q(G?CHj@_@yTpbV|JUniGS1F#K9t)#>G|Iz5!#`N^^z#<7XC)`}
z#j^WU4TFpo36$F_s9n7RO*<3hl!2;`a89-Fy$#u@$QIo_4lU#*r`oyND<}4>=p})?
z4ccGA>q?2{m2p8)1p{Q<gyWi75lMyPdeQ7xP*2@$)Zo0ylQPmGDvTO}%+f*bNfJjz
z9P5>NcYg*^5AW@QO#Nu_hpNf<o;}N525I0gB{mIb9;Q0hJ7Fgrai_7D*DpDItSiOR
zKAi@kj&H#Gp`1en9$`-E>ZrYN>Mr5=Q`BDmpGh_gU1(yBwHz5jl@H)!pB6u6T)~7&
zZpGtX>>~WJ>iQYk=?92-tUQ*YrZt(2zAZ@*&6b*fqn$0G!0W-)j|GeVYUVXs-R60D
zPpYnp<wr=0grAb{U%W}SX+XNk$GdMYDxW#klV%>nFFc4yT{?QWae}_keC0h7IV@>m
z(qck4-kO=WxPFuwZC|}@R8_Azxbf0n{m20PxPj1*PC2|npker$&Zj!(JT2oHT?^#d
z)Oq`g%&@DW8!_vLNyVryjZE==Se2X*;@9ImA7V31$6wk++k|d%h`kUOvwoMY&)j==
zjYQFFZ=kta-!~sgxn9+$pZnUx7jYH|L_lGr0dp|m^~aL>Ydom(y+L?qn3~V@+6W!B
zy<)o2K-EdW+@Tarkax}o64BKi-NE&={VcOy4vrD7JrZ`Zp<wbEwkNtlbgYG8-Ur$w
z+G>)+aGaRXx4k)1b$ub$i5-sPLI&K?`31yppHE1NCB&cT*AP+YUkuTQk{vzYTtHd1
zemuOjy&sFoxEO97BzY0`L9=tGbVvc+c9Ec72P6saOH}yCu(u7&8EN*6Zjll!Xsxp>
zC~R2bgB6I0K&FF;yEBg;d2BpNN0LiMnCm?WVBN1Q%Ck7fI^x(Cr{vpw*H3}-d|HrX
z5W~pD1-<O2Vl)&WkUPycoau-193s&I2cbAlQfukw&de5ok4cSC?2$akLB$g5W1flL
zTrby5d+Uk!mo~J-Z$6~f1ejcxAO@#7px}==|5h&~B-O}}3<R15$?FlU{BYgBFWdca
zg6vliEdX9O;=on?LRizn#?0RN2r~sr4O642@uGBf_a{zZ!hBu-F-DLrH$GnX8jc$c
zZn3!frHP$FieGX{pv_)bs))8J8)vw(#Pj|g+38M~KntFP0CfWYmoY}soS+Su@SG@x
zm!PHvWz&(NG2JcoH^ZlF6YZJZpKEbZ?{sZwK_X43LWMNB`-$s*z%9T8^F{uaU^H)u
zXqY=@yH<2QL5X~xonrD8^Q!d8fUMr_8r9`9lj+3)A11#jF)B_VnBi>DKI#=Qm}>tE
z0RPs#6d|-1a(2X#kwL}vaW{mTD=G9oVEmknCYV?EhjPMrMx0@+^CY6<&3c39>dWqU
zi3;(^^q-*}E=|2#{+#8|?X9$I>mzTO;|(a{Nb6z=`++?SDd{Z(c-Q$N`M*FK0Vg~4
z^8j4b{!4ks2Gn$7fW_VSdf=ZpL?@Z&2l#{>*%sCTF#qZuslM=tcf*R!qMfg-r`A=5
z;kF}XhpT(3e*z`HV=LfTuPEHr<hkXO=fDgOvD)7J;&%typJ6V4l=J`17MOm1CISJ5
z5xMS*GGPr-alsyyDdor!r`b2Jj`Nr=dOF`cl|pv*SQ~ckS%>6_Zi`$>SezV?Onvs&
zz{JzTjf?v!lz{+9pF%J{vnh@?nahku1!$q$gKA~xG}ukUS`KjJXr8Cad5}31eEG9-
zn$#0*s~pOu9U9gne$clk$v6oN^ITNZiD=^t16<MT?((&d!@{>}s!^S&z>I?GwmjPR
z%@<UupNut76^GxvXHo0t3!dAi#aOkDa2WvhU6OSUJqz?q9*Ymf-7Y}wI$5F~frkH7
zEl{$Zq(e^4Zkjg<FZil7$LV_os8tqgu^Qd}^pH%)hXV7YJ6K%WZ6HSP*(rS`uA$CY
z^w4P0;s=(_2~yGc&rzqo*Z2UVb8hE#4|Eha`VHq1`=#)I+|&F^y$_c7wOs6A`IrCC
zg@fSxGMK#Pu$scUOvLIPqt2*^cV!3wyyb!k-meAnrtfpBSIKw&=KX#H?*7ux@vjlP
zKV1(me%^}4>tSQHB<>vAxXeDozI@_xyo~-6rJhT<;U}N(o-<O@5(nV}Qkq6e2CNz8
zaVb^~^@5FVfLBeh$g6tXrUssB#>#!~B|&3vPx)3YeD!e$$oeGNT>&NpP}nx}NH%Tt
zZH}Qb-m*dS*R!kpaGjFu706KZJK4M3#h^_HA20)k;+4{|5uu=u=O%0zs8;3fNr!-s
ziv^y3pYIZKhzT@Md2s+<Pv8~=2u_MA#H=$Rn{@~s#+uefJl}R-Q^vJUGV@&x*s5Gn
zI8PuBXT)ltT&V$1!jkN%@s@Dn&;{Y~vYNdHt;vFodv18%Jg-5-Tpu(+73o-OT;EVW
zRtkN)SpvglTJI4wV$XMM8RaV>Jsdf4jU==#5?@tLSz>u9)Q{Is;RoXSJi!N~okYc)
z$h+z1tII&w;>{ux?yn$3))#C-W5QQZb>Grq+LBf2@#xBARtJ!Cd{>qLj9Rzu+r-lT
z;QOo+y8A8vWKBoveFl~#_@0H59|3a`VD*CUY3i>}k%I6*fs}!<c`#}@h}bO@nD4oL
zkJSo=>{w~+!h}33>N&oT6o-!eIE?)^-26;HqhS0yFaOh%e!6qH)^R~WvC-@v{n?3L
z-)sjH{AP_oVf22))!nwOax}iuhmJ+hyXUwfWx3t5!y5<)nia{mL_G4G*<pG$<)gJG
zc(%n6CUTyBj4rp{Mfk~=#}v6Qe29PW6_gi}e#Y<7ALE&3&zD^TO&sr$x;}1^l#yk1
zx!=NcTB$@CqAk{et`z7}2PEAol+xC|C`P8Ob(6|^j16_%o9&DXlU_!k4`0N)yR*n(
zZI}9-Vp=0AIfs7q5jb6)6rbs6e8E2P!y8A0tu&<D=cV!WWJ0m7cO0{RYU+9dbFv2U
zlB~(@+G&i1rh;i;9IbP+=n<bK127<~BEN#pn4kCntg<4{0rJa*qXNoBD>q7v5t0nK
z^2X~nVDSwsUV>F+w`@i9b^2~&o7YMKR=GoQ&KL`rjlT7mi^9^F0~RqfzQ0|BGPfUK
zu>pD{&>Yo`3mpwoL|t<RY`m}SZJ1!O@zX#t49!zHq@N7<n_E#IEw=iyW90)-m;b(#
zUtiBZ@J!-H=a9|a%-PQOYKi71e8RlxuHOyQddwzc5NmU@ijazg>zTb4V`z+PtfnM(
zY=kM?Vu<$S%)qGlJH6jHmfu4dZ^@GDE9iL^#v5n?0Ki$RBUjYY4^F**O3!~a{^KnH
zmV%Aw?TA_x^j*whoTrJ2B^|2kluA2oM(MQ0X4m77np^Yke0)H+O>{)>GfKS_G6X%Q
zqInu0_02_x&Vk)}pw!${)VUCr72g?1Y+A0Czv(A;o^RZbT&s1O?IT1z%}Yyf2vT6)
za?0(<1NmZexu7~v`yr7)jnNRc#TIhcPx?H8pe9-^u+X7;Iy7U(`=Fk+I<ng!tjlAE
zH)dM!$&Gh-B{XPh!{{$JU;Bdy7SL>=q9Mzu@P{iNK91+h7@a)Jm&)TbxilrhZ`SIQ
z*(!=&eSw#u#6lScmBmV<tS=Am37f6id&s)m73?A?*W~hdrdmk=A~An9c8rrfq2YW!
zqZYA#3PCFBAfi@&-aBqkD``{Y)zz|Lor0}oR~whOQDWt@5Q(>P)Yi{vY{)WP8Z{X~
zQz^uqEN*QBSDvoS<_VGofwQ?PQz#`cY9BEwH_+^jjtQolwc`7}>dn2}jgu=6@9J`%
zoh0>6sjAR(gS{;1<arr`w>tD!Z|AR%{mY6z@Jo0PY1F5Qxt4JDzT{?OdV2HuGvZY-
zwBFt4JIHKo`I4$qD0NdPWr5XSxC$``v!X4O#2Cwd3rD8*VCT(y!rf24EZdpi@p*-(
zB~8!mC(%cPH=A5)65<RM!T4N9Ljz`$8SLe3pTL>D#&ygVTI7gEUT`L@>zq+>Vjn*P
zJCMI~tA9J3@c7V%kRU#}f!(3Z9!G~|N5jKr4L{aG>J<je)v8L8E#nlf@QcJ<d@X3z
zQ{J?&c~-UdMp{U7kA@j2jqmZjSNdqypePinUNOcuA9Gl=J66_*?@;|FHvd$L>7_`?
z^V(N$$GEx`C%jO--yM^E1u(_Wz=~lW^EIRPtte`hZ5B@9SnR5W4h}3CD`lL>BWunN
z3ymx3mDDHEQlE1Pav$UL)MEhoo;&8p03w>^{Uq_Kpe|pT=A)oIf>kpIjNC%gz8Fn;
z#NC*|t5r#+a}i_Y6C)fSxg)bxwr^4@47!sOFEk*4Rgu{35NEUERfk|EPB~UyB0zS9
z4jRYcloiLh1`&f-x;c9~(6|jmw|2f?KiC@eo;{>*!73oFz0VWJ_a5U52yB`(4v5IQ
zSt`tIKN+ugAHw^m4Ok*&R3B7?_k#!A0b=u|!&9!4OswR{Tw^7Qw^9U6H4dv&(K?1M
zuSb8LoO6x5exjrR{g0tq3~N6xixr$Z^mt_fmH!mjT2ShuDI3U`RT)WT+Ek@SmT=MF
zW3^Z*9cOOlRK=H1GD~A^btY;Q_EyfVV?YY|qmbv(ACx?9C;t@<(*M(%p#KA!t^Zq}
z7jZ>P5$(9H%}Lhm38%bc-mw;+msF=sH01CjRU^}|%FPZ6849qOez*NjXWRW%8PvWB
z?&kH*DL&dfE%??{lv)wT?oWG+e&#Cn57{_TieMlHjfu!mIN4Obi*%STv@A7y3KyMb
zrOVx6@{q96Wt48o@J$X4nd|35C53M`QW*ty<5nT1KHHS*aa1$*)o)76AI6o4tStMh
z@^jyl&l99lPfa0pZHrbbzMOguWOS_`uV0TK`y22g@M-mt=NqY|)`%Bkn%TPhd6^MP
z5;{xiJ`=;`YGKK$G$7TC)&4aTrWRwpg1t#`^F9|E?-<oH518oIi~z1p4u8WZ5mfeV
z-uAHhLXr%ifD40)sukBK2(->QQ)<^B5w^BM1rOXL+CJT1iluoR967>B6YF9}Fl6^D
z@Iw%esrJP1Obi-*q{JCIts?;!EN{!0ub_{noe?<FJ*z}nMA>Lrhxg=vSrzkB0ZsU(
z!$J>}{-gDAMcixQ71vTe8~{xSovls#rd6<k6>t4s()=*e{Zm?!A4?yWqR3iWzR-Bx
zky0HcdpK`Dcg%wN(3QB$4|U!vyZA-Pja)0$q*J@n&Pfw@K3$0fb_UJV*D7Pe#x&x-
z>P|_w8XYRdO0jw%m+2xRyx2f*oDW}I5l@b8am$b909*eFhha0%_6kBoh(4H8B$&2n
zp52}{TTkLEjZKvaAhPV%PH-`e5)@vvy{F>tQkDE*0`$l@Icv;bnr_@O-p1M1#`d)N
zgt>WJaVwcd>a{a8pj@DzN(=<j`@M&79xnc-o*<!4N&%Q5E?t_gBP%*tgZ}dRB5?%3
z>$}Ka#%^}RrqTg0RP1KobWKCPcHcBj*Y>_kpVUXb%bxy;LwUX~mJ8}P_C`yiPDK4P
z)19t<S3k9%vUCN0U*V`;>4@r2Aj|iU_&?YG{q$S^=}rpgwi!y8AR2#UM3ic*s(LU(
zhUtEqqoDcp>R@z~^tAiA`CAv|uQF;Hlcnc?76V7y0ENXX>za3A^YGx)GDj9Qjait-
zpiaD5PI<xiY8O`aD`!bwarYDTp51?p(am!Zvy++T*QIc*U|-v{qE_LJhA*;EJ+mOR
zS3^8iSW_dp1(>-G0P8bRD1Rq*PJ?<!v6^!;r|sXbL(h+vZnEn>xvN!koL&(KKEAqm
z9t+UvM_Av1TCET$6~@XOjg84;*e)G|_N7Hu3TO9~gKtD$Cbk6yyz{>Rl4%xYfT<ci
zU+_fQ^Tw@*LReEuA_T5JDBqb^HQjC#J5B2k)>mo3GJ$r1aB*a|yO&=uUr|vJaVv=7
z8K>G)vE1B?K;#f)fV6&(PnQ{{$eG{@oz-l6+pi!2y1kVp#ckJJK(_V)f;g@@gxl@(
zeWx@>hjx<q#C(fRiU6+Wcbc<R$M`GAFj%K5{&HDa%@8%Obiaj#RP6=Ls8~y*NHtZy
zJi=QF<ov1=Q)d1SM@c#<SQz@sq{D$CRW1A6CRk&@BMyZfw6EUVD)4soT~b?#8wt(g
z>S69B2qNDp9#<;7J`z`$uCF<vHi3*OuE{K2dgx^32+uNiLU+%#F4&2WJC2^!nYXQu
z(yq{vUg;H_&}E?)7&!Eqw@Z(`9LAyVCofIQQkYvN<EXGdusgGLZZNU_741EQo%56G
zh~D8+Hqlf2Iv18rz3&SZ-t^ZZVWrl3nHeJ$X;*Q@4|1=o<xR#rSAja-O?dp_L*|TY
z?mL2(xc+I<$OV;(kMqOZ>BD<tyZotgsv9zN7ZhrGiX<sXqG@t-m}6B0=?HVh35v*t
zhbL@tN;z8Q`&BYgP-#;P!94pD2yL$2$GYIB=W1enEo|`>q|)NX8DK*bU0PC;*S066
zOL&zDV+D*1bJfW0j`k<8O;iuUVpcSgSs=1@N8^|v*^R3S^NbKhd-8mVnaMS3o>CK{
zG=GwDgTUxeBja%{p{sI=PPFj*kyG0lts4_%H8);0BbaI>=4Y7MNxWbdmY0rb%WH_U
zSWDYR>@s66k8$(AJF|3fZ}I(tn;4-$+k?d@$4Q=3YLLFV$n|e%Wf_t``EOr;i;gSu
z+5r7S3m-uF_Hji}0A0oisDZ4W3%sh+I4bI8*BEVOzw%?|{VlKm>oCS|0|bp!FTo|W
z^ed5wc^$fD<9ErSPR<1}UREcIE`~TxKgbHMCmAekC>prSs@piHc_*sLY!4TB`gzph
zZ5zpJXBuWx*@i$1C>7I+3y3@Les-GDz2|RF>YKjUa-i;Sd{!v#1Y8mqur{co{ony9
zW(CJ4WGP=1%;4?BWmVjho2bpW4Eg)qn$B+S!+{^flC+cb6n*+`IC0Qo&!u7|&|7aI
z!iCs-m;%0#1js5@wg8i~t~5+*axN;`cHh?P2)83VAl69MrOd$=oeJ+uCW0vBv=@GY
zNezg2geaX^tA4>gPQM<0o8E+1vxahEO+tw+@b{0H23U{s+(`<K6k~hMm-;JcUgWt)
zzWPCF1pkEo5CTO#qzRBZ_2&sX5%!b}USH;Q8)xY3ad;^87G07ExhCH$lnj%OR;aLy
zun8{B&zr_Rqfw_Hw(U1soWgtDd>;H^=KUS3^7gQL(i<@SlazXX2Yn3Nj*blO%;s5x
zcNLB&I!s41HoGMZSj=3zxc_m+2Q10HWPRK<+#zK$kVu4@Po}Lzd)WZ{{E~Gm>n_;d
zFN|-cZ$6-upr<TLHj}-y9OVe8n&CoHW2LOO1}KUaREoNt=?Xluq^6Uc^ttYjx~SPy
zAjD}q!&#n8JYy|x7n0I~n$#B6n3+2kT>F@JY3+ozW_`r~<BEFFf@2se?esX@O@42W
z!W6Ra3QmM(2<BOiTd1jMzpCJP>VDUjdybjAhA{D>W@DkF`83uH@mcO=NT<9v{8W;6
z1`G<+P%V}2ec-ht+kJ)jKp3g(T#e5sp=~F!ic8x%ag&{OoLnx09UkT0Mq*cbCPrD%
z%|iutQg_ns=jB%1pAubj5_p&8&F53I3s??9k*+f<GbC7Rn;bJtu9glHxfw7#IZ#h@
z&MW8|h}r^P@p#6yHOA4eeG5$t&v1qlaW8wVL{i&-S@kI_Sb4s7;=@plc8t$RzKkSV
z@dWI)389kcvZSnnW?PRGcG7h=c4TSvtDYCry0_L0@gh>4?sVTLJEcv~$vo72teGX8
zKRV;Qb0H9rDmc62){{)Ku=toTYiHuD9IwbetC7-eDhGNO<o*)w+31MG_{2n7?*qH~
zJ0=&)B3=vB-ZO1yer&1gpBp<-qqFC*nqFbyF2}}8c{xA7n(uH|z9>kM+@mjed@pud
zGN>3q$(#I0c<a7atCy&VNJ83K?<fHY3#i2sc`O+0N&hyI$uK=Bu42LS=;+`=o~fbn
zmlNS5wj3ZbEN)~e{2?#|E!7|7c4yhgJ*pdtSC}ryQlZ^@+cG#_W6?rq*V2Ke*H^*g
zl)FRT)y%OF8j)=XSR*5O9aVArY<XW49RFrX$h_IUod4g}$`rw<&rU4|V;?x7Nn2*<
zur>=82D%@e-5v1?n$w$UGwLQs-cQn+lM3GI>9uk0j%xtaU@U%*RDWPye*!Z8P?7(Z
z&i|O>t-X*9^`w}`09$zrv(S`yweA*!#ybb;P`S2+v`m!e#WhBkh8+?~ycYJy)j|G`
z;Y_ew=)S=DOQu5iMQ@L6MCraLeEVEGM7&8wQpufxMUNn{d;iTxCfyQD@LbkY&gCxi
z2$5se7i1{5%ZZ!U7b+vvk2(FAU>3)kW-0W^gfLq7+1GCr+e_r4+ep5Yjz#K1Joh9)
z$*3eOoR9Pvy1Fn`YYSpeC-!5hKYx<3HQb79ECPK(??`P&-46wG<+f&2-1`cmcW(1L
zyOCVRmYI6B4zOK&^1QMzHrtV#Qu$Tb)kizNl__}jv{=It{3sguKs{HWYbXb5IwEtm
zShI6&Q00MK)B%q{yBUam-xi~p^9CpRanG4q93<yeVqQDEIYYpXUU%g(FZ2lvA?W1d
z?gys;VqU<zI4P-GHxQ<fxiZ4vHNLtZJ<)=^tOUYl<6}|E9)xkDV(IU?0=WTe<mzaD
z5(t&L185E^&x$1Tpxdn-)Wzv4TPEq?j(<tL?I+{kQZE5OOj|j3!TSCcZ1m6Q?bZl^
zS;dfAXab;fLmZisKUm<pC#4Gr-R_P(eAK^Zm9SR;f8=ZTGsyZskETDy*59`UPQv=i
z!)ezFRv%=sW0U;rwBt{XlKXaSDQs}--x%7&`cd5Q?*#2IAi)1d%U=71i~SdGcH>wp
zzd(%UTBZOhO>y_Vz+!vbm>F^o5nWGPVF~0bXh%_R-}~R#v!4=S|I(dbipI${F<SXD
z=|=!n&^abJ+^IOTiv0@Wb-9b!Dgg@rLz7MD-jNLd4xnZHFR+GxAJG`0drBAC250;>
zUz|A+d|Fw;Fab8fitfmXxz%%idms}<J-X%pjSnO*XZ6wDvfI1~`Ku!p`wwpSR8$4p
zgrZc)^6)802=92{7oLrkeW3j5;BuFE;!;et7~;>g8ZGiSModEV-@MxY1g<gE3C@g#
z6julBcQ;tFR#a9*+XUH_R-Re#kTo_YeWncZmnD8t{4#-$(eSx$Z-jMr>y)z904wVQ
Vz>?zc%Txc*@cKXb(%`S3{ufzeZlwSK

literal 23737
zcmeFZbyQr<w;<XCCxN7KPiWkO1Pz+TElA@MTpO1F0TSHZ8wkNAG|)gdPH-o|rEv{G
z5<K|JH+SYuesAtw@6BEF$E@{cpS{jH=TukK*;S{v)~@@x`&GabWqBoe00ssCfbr)5
z+@k@n0GJQ{fq%}Je*g;y>mR_w!NJDC$0H!X$HT`bc=Y%Y!9${l`1piCLZZh{o&cW^
zJR&9`enRr+`pG|yVEj`P6ASN8#U~H(AO3mzKMD6=06;vz5ym|x1_<x~h=B>jxbFeb
z{qw4r{{;d66>za}@USo*VB-HN)_4N=)A54`IM{f99%5r*0v=#sVgaymaDjNlB$N#L
z>64@&K9L4|GIAL`pNwe=Mt;pOB$I%!xtn)tn&1mr3uY~C_aqjfSD!uvhyH1h`cHQc
z{`B`3U}8PM#`))6c!7V0>;Wb=E++PW`@x@EEFc-Nrq-u%Y*Ge30k>Kb!CxR5bN3|K
z`vt(GKMg$qVgdnDfIUen5vw)iJ)qk;)y$<;(!Q#1%dK-?Yg;uV>Wl9sZxcqc+bk_h
zYV1Iok`m+(=#~d)H|ifw7Ml{Hd<ECUmn`M)ejfdd;DZ?A>**Yli`Vb!g+Q0p#7bRj
zmr9jeX`GK8DwY3F(f=8Z|MPA9`+o-yy&;M2nGbIkDSyywhuH;w+_)$+6TEuSHyhg(
z#UAhfSz1!T$Vt`r{eYhJ0kMv0ui;_EmRZ`<MGfts;2R4vY`xH*UU4+n9sTXJy$}aT
zmrrwF-0jN73Xw%cJ%Lv0|5x0(Nr*JjcGMJjDS$hns<nqn)YR7?D!G?6h-zcrfqvUA
zXCyRY!i#=1PIx5;c@3Et2E*UOAs=q*9DOdwaF0imLo9Az9z8s#D-DV~>0m0|-H9}-
zIS{{5$h~A{m;T?rNT%((&1WmI@`qfHsvn~)GmJH!sgOpVUw1BUw2it%2%6yL#L=cq
zXM+l=&e?b!3Z{iV(Q@4bMjJl<#7NkxWH$LuTFmx8BH2&}hg4%;$vpu3cMr#p|B0#7
z;nelKz?gzF)i3v!%}Gk78R>VP;6q=vp{a7ZU1D$E%8f(U7+%Q;kTR*A^J`Q&zWI^c
z)$g6l@eA5Am|rnbZ1NYU{{Kf&)7^N{l|k{1_L^CtgkUcpZ_epGfctbM^YeJVpPqtS
zf3sx9Cybq~-NzZQfTm#=hgs>@?8<-eH$+BGO9!msP=!0nxkg4NEGH*-HPLQIHdG^p
zggV~?M8Dh_ARtRGV$@P}=*E4H%f>yVgO^4tRg;8_(<m>`-9@RNNuj_y{6a;A1FZv5
zuOpwLg-}5`t3h1zY(gAbHBIrB32#JBb4uCMPaU1K?NNl8)k<z^v)Cm)hE_`5AORp4
z2wAcd(5aFloc>JlvPluKae;rZSb#`24)Lg0MJnm~Hu-5f7^!8UM#?xWqS-1D@UzC(
z?E3x3DqQc9=|QK<M)kk7cz_$HHmyf<LLVl#(P^Se=PA}YknMdrunEGGK$nqwXrEF@
zz+;3z>n_Buu20m%>QsIQ{EcPPI6qnr5g;4gv83NJR$+e|JR4^UZxZx2w;{1G+IPXD
zi4whhHS#6mS;hV<^hgT<YlBvZV==q*e?gg~HQPJr=X=20M?u^M96w*jlLu`te7$(z
ze>|NGKmGSv(BDkV3*ufqlyJye-q9JzM-W{3Hxp~%_*XSUSK+5d=|`W-j2^e4>m!;J
z2RLB~#o|3c1wNOf=jN!^b@C!Mc$~#`*jXIwd;LN!dzd{$MpMamvO&UhY^CI{+UPU!
zhiVFwPSqN^TPJ>2=LX0imG6FL3a8<O@RXEH-%H<%t!=e)wCDLZb+=o~VLqDC1gz;`
zTeiVtyD;DIx8A%UE=Aohx*v9|J&#v(<VU9<OiWs`Kmcgp?6WgVl~0T3nk~s-?ZZ^h
z`qAha4O>zJ<Zc~0dhxw|76J4VJKOQgDzSyKqy_{V{OjBC^%iSFtEw&c{KgqF;m1$q
zY?EeYUd4Va4#`<AkC28D4KK8uczgHN(Bs21J$JUr?L)y-GuKvBqn3@thPPa+i~5<1
zr8%elw0^ImCku8P<vK*z^4*g~Xig<Le(m+HUDI_?4dvJGT~t~(-?}zLE^L`_=Oyj(
zVQ5Zyj>vQN#SCy~A}x44q6i*G$I8!(y~x{$+Y*QtnU_C%`@s&3G*aG1J5tRmE^W&X
zECMyLJkd*d<CteL*D7m0R&z%&95x*I@m5XbuO{BDbQHb&u{0t^*Q}vkaQQV5It%-R
zHs4tcQfTy1sQH{hWZb67&-&TLPa!`@^TgxH0y?AUtXV^LWg(}HWf{fugFez&WzFNH
z(wpKb^{}qQ`UNj7a%E<=p~Sk6tzF9ix9KvaQ2*;=zV}gy#6ocQ?D!=L-o}nfy_a~I
zv55?pHO{L)@I8l4CaeWzD?4~O9=YnMp_cR-{DGn<s!8rQCbnHiCF>@IZ$#vM0*5eS
z;-UotFo`MixStpY8x$o8m-5Pfleh0%U{pv`s?|o?X4_Zz&`Dnf6fb;C*x*&F<Fkd)
z|BP<(QIN}6_Nse;U)%g*=|oOWcBO7!<}4_3QBuhHEv)fSJsnw1xDmf)8jNrKu(JZM
zQ;%#6ma7`po<^+;oRCjjPMP}r(v<u3;JbEpirYx@(wm18UDq~ks>t`R1{k`a#)+;`
zwAxEFWX~yj(>#TRzIBqO(2#_;4i3iZyDSts<_4#$F58NYjR~o$CV44L7`#_LEg)Yf
z-8{`pC}1TCOeof@QsG_mfBkG1dzRGyg(VMaO<(S~_k#h00Ws->yIkAAXQn3BXI-!I
zyG|cSKYDN)Y7^b!9#*O5h3G98u8bF=Y}bf0$$u5+iD!U?5xw8y8763Bq{tHZiNuBj
zCx6dMTx5=;<Wwx(-X_B~^ITh8YFa`TWi5QEYv;|@>r_)!A^0|yCLTy)&2TK7<2J3$
z<B27W^@M)uhv(l+f%tR6@<$EHpC`PL8)=B`tLo4<#03kydcsU*JcN~rT)K0IYRzh-
zCaDY2LnP4)JOk<#jOOjz7P$>#(=c_Tck3!@2XEtbC^xECFW-+e#y(GC&TRjf;NkbY
zsYUiG1BBm91SsXNw4F<P-Q~GMG&wh6*t<Vk%d%>5Hnh6SISJPQIVC73`?oNRwut61
z>!>9Ofx*&KIQM`adaI7#L}ccZd%gw7!-GZd_T0|BgJeQiYozoXMwoBcq{n07zuH)Q
zstuN=@yQ72iU(#y3Y(!nxP-#Bf^Lh*mtatc@@wV(aBfO3-)i!QZMP6IGWSR6J3cF(
z^FF6dM0&&TIDU<wgO6NqOA{IiLRlGUB9B0&wVG-TCij5$3eWsBUV@j;f3_IEv~MBk
zFn(u2!g&uE5K23K$7`p>`=SGjMh5iklL4InpdvB~;Z&Oo)wTjx6thX?`K=t=+EQ=y
zU>c+t=HJnUTPCgdA(Y&&JqKLk6M0pg8B=o2<iK9tPo8*&`idqBL$nI*$6l}!jlD1<
z&q_7r7q0SEL-Lup5QU^zA8(L@ciw)H!#h7uN5jz-6^E5`)BhqJUpqmB*2(*Q2Tpsh
zCEl}MdCbNz2yNm3cQ*2u7=i5%WM$hZqePnsqMvajbAYB<QfXVtuly^?{g>rVB=s`3
z1>pLECEORH&kb<%pNz(9G4i__#V*k7Cnpn>i79t^Iui{Qe(I^|L=cuCjF`7|F{Jq3
zPv{?ra8HcWt<ykytw?nH%>s?8$~g(H7%k=t<y+y->(wrL(E-`ho+&O_Zm3#@Yn`wR
z{C*75!qzGp<yGD(6B_uRNp2jJY$j5;63#?(Rk|Wp4K(L*sfRb_4-WbGvYsDI@FYr5
z#laR{seF3~7p5aM{obAUTF3h-jz~tENI$~($7qZ(byUdm!6i0g36}&G_vIt@_g-R4
zHwzWlDJ9>LDZJ3ZzAsb2M~W?4wLUjybVITv26WlngDsvja6VZTJBz9G4DThd>6p3c
zlv%mAnFBLc3d<Llb2#Z?rttvJ3O~M(o=FpbxBR7Qqv8ucEr53tzSzbec~G+tb4n9v
z-Yv1_Il!dO3R@f-Ef$r_iZxh`EgH$3sMahZ8O2L@(;bXKXRO{S7Zi}<x7YmIwVP71
zEZGWP#5rNjX*)!GQ~Ufngm)1#aEv3MzPOmKsxIf6$+kh2_Dn_wXy+SVCMq9l#3gK;
zXTuzBl7_ZYwOF(AMI;}`4Nsa<Aq*jq?PS#u6M?9v#A<j8@z1HL{4GavDZ#q8STkg)
z3RB+hu-4e<tF7kO@<~?A8Q)O5MY)*!B(_fs;&H6zLA=k(s8}eU8>gsxwy2d@kPr9J
zrbyNbN34Wh#923Ze56XIi4A?%G`0TOM>}KTnQ>X<v&h^q{qi}us7Hs9drVw96RaVV
z{^7fRBUyJ2u}B_IhYmy$JEFq-pxK|yPC^%EEY4g(mc6_Xst-g*f2lYum?OX|**n+=
zSDUcQRDBNw!Zv+PYZhw*tOwkLjK&xduNlzLWJ?kIc!M+(<`l~19UUezup90hx-g0?
z-5UK_V)7R|bwARdCL<zYeI1tSJ@00(i@}NS@$UhbF*p9=USt2F{ZI=rXF6~VFKjf8
zr7G5o#KA(|B25av);9*1!5FCPnAChUbP5j^T7XU!uu-*!tJ@6y=XMIAX34`|O`AxX
z=U?@qL%LL19tm{o@{4lBJaLogAdCGDh{wT2lq$TpO5GCJ0BHiQFT*LPhPnH<(+N{;
zCK<jT-%{1G2Rjd&*HD<cA#)X88^zUPeDgNpOgyA6ul%i6TSn!Lh?Z@icQdNLsW8cY
z<ks8gfiTSeMeu+>_OP36UP-}^(Mzezt$vG6fOeMZ`G#4vFo|vpSs1!R#;b1Y1Htl!
zuUBtGXUS@WGS*}nh;eLD2iJ--QWa(q8cnyNyd`u}<u~k}h&jbB;x{;S@b+_zpd*+a
z17Nk`cF1o`@zgFy5B+YDJT=WoZ8jIBorcShWs52|=HN@)o^K%^Om~_fZ1z?G32?_i
zrH9Z_yJM)K*yu_p#*#?31&;Iq#<>rZsc`o8BZicoIsuz7c)?lK_0|lFcn<1>%jQ@J
z)Dyf)Fzj0pZo8{hr0{)RPWb7mrS8=WXs=mUf7RuyZdQI6{DXa1R(ChE$sx-hCm6Vg
zVHh>Yy46o%^vuaPbx7e=GW<PF9)dW68`pWFF?YXT4F3{dv(`KfI?XJ}U2xug5rd^m
zIkeH&Y~}}iYi!qKBxJ<2TW~7-qmfszU?Gy%vEUH)?cGFbzE*0!9>-1D-MSy_hGOGM
zWkqFLpiYpRgxcN1qevYxQ<AnBL6^gO0R3+dGDx`|{_-BR@I_F<JJINhZ^{TMzkrw5
z9V6|G@etLiZ}>)F?RnqV8CH-hlvzb_*3&l0ynvN$GhjbQrA&d{vSpgh+-ToM7W%-|
zInn)LThScy<;Hk#V;!A!TaGI@PDh4JN!kZu5=Pu`Q>yRY$9pMJ2<)W!XxA&6Z}L;!
zI1FXJNQ5`+lFWd+H&4EtF!59wEBeQ?5Eya%<wNW~Xi=xpr-9g{!{h1#uCaGm+XD)I
zmaebQU6std`~>8%n2BX%0h(XTPI*-k?|SMgr;{CLg=K)-T}Lm<G_v`fV#d00wZs!*
z<fVCO99E+V7+9YjhhBZ{(JdHFqrmc4;vOs$$O;kSt~rKKvOfUZE-MjR7eRE@$QchR
zxdrb5@Tz@fEOp$9$0Wiuon0*pf<sD{NgwOK3=|JFBd3O~u0J5rP6CwDeOiJVi0Y@R
zbNP+Yv+8BCFgWBBwRNVFQQXY9Yl55nn&493ydTUt`%5oAVCV%%;LC;))}ZtsS}l(7
zPgn->EY{G9fu0Bx>OLz4+JH#SwJ!-?nbVDZ)F)upHOz8)C4>=)&54byB1d#IsEl|o
zh^kr@3FEIn$3}s&++&=P3oM%)^EEF084M4;)5jv9has;`wPfP29f}qN_XfwRCRz5d
z4-WYIJ>FSrFnw*cLKJq!S!IcsSMoN3ncy$;72_fph?DU-f}KIiWe>Mvy0+tF8k+3G
z>^h#m98c#;KdvkrL56yI>N}98rcBD(;k;Undk&3Y9K+>h=g>WK>8y2xqz@-_Hk`V;
zUC?+hFSM_vI0F(tB_*+osEjEWIVdYD%QeGKfg$VdcAJ+x;uIPn<95dc5AO-&VptQA
znm5+>UwIa?g-Z0C;rofNNdqQi<aU!n<gk9(yi@-H=Cb8)uxQqH3KHw)MrHX#7AKw<
ztrjVMZtoPlRKQGCk8RwHtAr9fYYkm3OOOCj5E&|Ag8bQShP_3{TKL8(5S%gN^QYy6
zgNcv&g{RY{SFBMwG#K=5atrNQ9fW@3Wg5msR+(qL^sOz-!znX+w_S^yoryi(Ppv1T
zudSS5Zj224mE#o%j>7gIpUS=R8oQ99DH>li7p;3+jjCo@oP#E8mbl%DCD;oitDL%-
zYM4WuPfL`mN(C%JIhr2%nAT<a7SS4UPw3C+BY2|{2D1CWDP-l&=$UuUy0^4qGP)#b
z)7$iM(=tSv!frx5@Md{bW!%w7pa<M|F15Rswrxy=>q>=VBtk#3niII})xaQ}4RCE*
zgm8~cA$>eA9tUnxL_ldL(9I}+3JHeg!K$89X&+~33+0)p_tw?4Xp0@6hOIz1#|Vve
zx+!AqJe)Q_CP@FFr?Y?2TJJ7iahI|!w>>%L%zHHdh!-}RV%MvoU@14kpLxFf`?l#O
ziJYX?T0Vn8*|;WFj;=2pEA%;zsT{~QG44@X3+-p231m+6lB3<5wiM38(WE^Q1%^KC
zywt=D)Ek^$5>N0GQne%wx42)qMQ`sIX(-t)v7)$M00Q4$X?oM7umVe|ZO5%Y7bgKf
zQEcWTh6@%RMMOYjQG|v&y(%*iMOGO`v}Ngv<Ygx;Mly-sj{HUrH+7V|qL;Mu!>`_8
ziG$#K9qfnV*u#;JzCMg&@{&^29Tnt8i6(}l1Dx0BH(10q7cFRrIMt|IuOwFsNGxmn
zyhw6<A0f}anJ|hw3i;GDewDRZo~|BW4%opMsH)>mg7z?)rnok$%4#pEnL?PEWprk(
zevOAe$}uEE%^>Cw&xcvD*LE{v(z>?0*`|agB#lE{v5RE(ncQ@YoUn~Zb)KE>l{Aiy
zw@5U5P`0%n@ax!n<b~hvjT?UlA#r~71FUz7+G-2%3T0Pp_vjqmwkOXXM{z4PtX`J*
zl=>+>*s|!i`07kIvlSrkr&NR14ls@8E`Q|t%<B`Mm+F#>H4d67u);5{FcYVemT?q1
zi#zm?_)~CKt17Cp)+(PV@++5~(Qc+kfn#o|==-$VMLmZvQr+Y+!`_Z5M=h~m8jY5a
z9rU^~Zh=yawaEh2m@HSw#T0W<B1Hm8#p0%lB#!W=z;zmqnzRF$_s183{^SWlXGx`F
z-H+ZFQF?aU4XIHSD>7T30eC{msF;O~w6M7D7^-|)GsJ8<g)yC%5*D|Xs?KAYO_FxD
zlu)hGKSar>)x!`#e&Stxp9jRiGrVIe-pACM8Cc#w(~V&HU)6aZ3h#yBi<U+zZL=`C
zJTBq4!Yi7*WpqS0UiG$*KgQ^(I(har%1(X-Ig%^M?m5OQf730H?9cAhy151)ZYH{`
z0D)fH8~iPywEnjBApZ!)e?vWvFL(0|EjK%umo|eY5aUYBYnz#$CpnL|V9YqZ%32@(
z2Ml5C`g(&lxcfzUXNZ6-|3BTaWM`X1);uyx-b>yGCT(fw#kidW#B9l+f|8+_vzfF=
z8|`f?lKKmgFk*T&M|)NUw)I!!2N+7&EWNKFnp-dAm#h`zLY{4F;p%%WEi@3SbTMt~
z$}iP?b(l>x$T9eR`>j(6qzcplyOCH6(UR%rV(Dl(jRL_<imzOXm1~!o1kVrNb=Nqy
zW^jwbO>c+rEc>GOew7$4FlJBoun&P~V+PjudGRq&BUmvI!^hR~%Uu$b`*Z~`f2po$
zKv_XSbTo_udZ2u*^<n?N`Pro5kW0<mo-^U!ynPIMkiRvGGLBYO|Ic<SOLr2#wmB-b
z;Qyjg<fmB$o?iK01ZBNbpsm^yZtSdE%eZcsxB!h3ks)-VDA?AS_E|m@fZja`4O0Y?
zCXp%$gO@1g4SD`@mzBRf;hPsFro+j$(-|2_1j~_lK_L5J@YQ$G>M%OluId$0ll|;M
zrxg1RI^>ddKmC|CQ)6!;&`Qi%CBW2|CjD9sNb9}$oHSfv4D8I|>&0s7{*Ez{>g`_B
zq_uq<m0Ya*3N>xHGP_ftX6vc29~J!SfK}m-URhhyI2nf#)l7#O{(~I9Eb(LGszF)?
zD2B4Lbh%;7K~d0Zt`Sdpp_(Jb1_yQx?n&{~G5qR|E2T7)gNBZxoL8Dd;_D|3&oH%3
zBf>Wp_NTfU%`7o91~_dRM-za15`9WS^s!yt>ITmf6{h_&62IiSCMLafgkOEMXxQj1
z6WhUk8kS;Iv$yq?F$Ma83dXji-|VOAO*V7I;FyPrH$R|&M>wI)Ncce<ER7+AA)|)e
zSF5D+?b~m{JZ}<?G)>$}cQ<}ht!O!}D4%2h*jz{UjNVO%dvaz4I!=ZeyR~<o-y1K=
z6ubY-(1p>lL1}~>ERvMf5IT}*X_C!u@mgOrg!qAH_(HvDA|B(y1he*w8LoxV?sgiO
z%3#$ws`_}nr`4LPiJI%}lSSM#R?HM|q3}ed69AU4=AlTJn55u;I$Tx4^j$Zqj?x)2
zXiOPP#VYV)?u42k!2hwsJ;2K!58Mk5BcrI*)ULG?<)5rH?0<`THA13kc}18f`GUcs
z6D9A?eU^2xTcfl?FVxgz963pZuke~E#S)t(4Qtnrv5fjd);1MAZz@&(?lE2X{#75&
z#FnQh&IT__S5C+8w_O3RCDaZHBxlZ5)c!3>^0r#9QeD(GH*p32NE&IxD9&$%8pSBv
z7vfMCv1*G1u~k%_fnNLDn;L~p%-bV7-Zn2<%6=d#2)NXuBcHb~QkxnX)Wp&hhd{iQ
z&;HGe|N37vW!>U`;XD84ss86&ey9B>;ASg4Z=mk@=_)AEwB*=Nt#3EBD}((``U#Pb
zu~Tf5G?up4J`k+lqnxi544D=Wi$4&9tfGBV)XNvSeC#KxanP;V6%+s47tLmGxuXnL
zrMy}e8D+vq$!0ePJq_*iHSB@O1q7@=A^AP#hW_t&tIE;jIjg_#IH7?uD&BO-b0g%m
z??S=3Eb~CJ7f&#>bXMMwUL_+5(s+Sy<Iq-<IcEG{zg-(9B^c1^KQ5iBGG?lLZX$QP
zdR02xK9qLcwB0cb_Ct4{A;no>F+NV?=}eC=DEku_>dkK9#6bbrT4`LyKh53<@jEYI
zKr>hPPXsJR=2MtS>a1ll=s6uK(eo3mb=P~VZzFrT@kh(`+x1GJp!}TH>;>*G_ImdK
zt#=-NjP~;f!bwu@-yocWJ`KHX3nlX<If)69;z_1#6UMQ}7|~IjE`qWf)(ChX7856;
zprC-;feHt?o5z=GPMNmB7CY4@C(3P^@WG=ud+!-eRbh7{!;{7w7`(5&tRgv@^X2eQ
z%m9>q9?^H<6w~!K!R^MOm!o-@Hh@i(3`8_Gst3o&3@vFrFH@@XDwx?d!ha1G{|MKh
zb`;#^pySIkBFyEUXV4@LjDC1D-Mi!+D8|khAX;g`w2JQp%k};t(WCA_u&?-gCJQ~P
zoyb~`Lf7It-*in@fn#^-<~D2h@9suXgkNg8Uq)Yh>lX+(J3mBm<`lfj^-K&IiXZe1
zu+?zxS*g)BCk28@=f5)t<+H%M!g3uBORVB6Dn9Zk9;6lD0~%=Ri!`@17LA9>ua!+I
z3f3GWX1@ixPoO2{!i4mAmY6%c4pwGFv8+s2s&p$-^V6Ra+z3)48S@PTC+c6mSKX*J
z<WrmWmIcV2S5%`GD$O2t)5kv#IU!3wiA!YZ(mz!3sORwhxsb#p6t>XbB`0^z!2!`q
zB}PmHDD}@4O&+}KK!T!%F*)D&9#eA!I4FHnHMwis;t1SSU%J!NI(v%m3Y#_&^tyKR
zuh`U&%I#x&;C(U>s>nZQ(q0m-SitgQ-JheVA`(`yA0?*)hCt?tLpt9KIEk8Mv&hAo
z<}X!gqZKSh!=9O$|2p@;2rf%@j?Rra)0Hh1rN`i;Y#yFcV@K7IpI*msoY-1pl+9B+
z<t%=$ct#>b2TQDg!MlDWLI;s+^Uw_O-YVaCvC9_cmOgw{&$vqG2gKgURo>6&_bpK$
zoMdHX%c7%Gsj~l9gsoOQC{tM1g&T$TxSNkS<eF7fjo>QvO7SFC!Na#<SA`H6p58Nv
z{~!5F{+Il%`1VTBMyL=xc07W-Et!Yr&1a~hkc&oV6Z0UGnydt7Q`Z3USMQ$#wFJ}z
znbZs8#jG9>6lF?yjz8?v6XzI#W`<;x*D=kn=SX{t$1i@4Eu(EzYhd2qIBxtN-Ha}p
zYv+ojPRuXR73|P+gBq(Q-6%EnKBjHu&M6+SEOc=56CfrATSJ!m@R)EJVIXSO=O+)M
zD0Y2k=*0Su&s~k$rBzLGzmID&D2j|Uu5;8+PRKQwXBdMa(9%b0U$3yrM92CIeV1zY
z-?#^mw_5Ybij=$@M$J9qdCcxYTtw4napj4Z^fYoVmMOU^ls`UHt%4>`h5UmpDjcK_
z&o+=8$e%{idwF>8KGGJ>rl|DzX`H10Zt=O|cqxUz_sPZNuq48iROrLr^r#NI2*0`&
z9+ce)T*%)apQv9rx3gJ%ygL;HCCK>`gE)P^4OObc(sC2!WOQBEP;yeB=6MyAMC6pY
zuXO6ojo5a7xV`7rs}>&ndRfRACnV7JkuL^b8c^1x#7`#4vU$Uy<?Hyo8`FI0=9uc}
ztq^fvqjdU@s&-Db*s#>HNqo9g3Q7!n#<Jit_+Hk~Z~L0<tkHhvq33u$noXMmRw|<E
z$kicKRX&w@DIm%>ONskKAqR}HDyFrpeAMVVM|L+wsGlFRS>1hXk+N<`$6sfVND$r2
zrdJw|2A>AXg<erNV+_a^7#BsS?bYp#rJ5a{J2HRdIL)H?J-#LF`esnEFQF!x(r%N}
zZc?LhkTJhIoF|c&|3N7fm*2dZ3?94rjVG&w_ia{kafPUO^39sdYa9|fXGUaQfT;mE
zMwHggGR8Dkomg?V>4JKt;8$N}Lm%^=#u46(V=GNu<u_-*Z(z6OI9b!r$9xqhBGxU#
z*BWK`6k^fbo?C396xeI$TYfgo@wWx;M=BW1Sve(-TBdw4l8hNSw>VY<Ft-taX+sj<
zEsD*kvep$gknUc$9DGZFchp(k#M1aEN9<kBD_{oIrdMeJS8x^sIAYo~GA|B8lgwM(
z{IGRMHus?!9vgd20Vr@Ye|=Opq+Ckw0J&c&$IsNTF)L1(hT$Ug^{&`$F&)V1mS}xd
z1><JDQLY=EWFsaTYFM2j#Qw^2<>H0FeJm_KLemg`i}|SU!2y)Ci)qle*SufMAovRb
zQtkMPPpq~_?fp{SynlQuE#F7idq9O~?v7?wN&y{X7F{7Ll6J~(F3GuKEo3IdZI(F)
zH5EYn+Rxm+aZKd>q}}o%Hlvzg^=od!a|1OrL9<bo!<uXQ)+N!xBr%i+vD(>?BzT>i
zM|sPr7)zpt!0BuxBL5zLdi!73kTCy?^70=UP5-y`^xrR9Q7XmAL1^0BR*eK4EOK>l
z7C){0f_1)}mH=oTOA`oz_ydVcJ_=hIu3Rk!bAP3qoIG&;c4AAsl^8Kq2Tr1mdD7M%
z-C8XMdw5I9nY8$L!KTtwKq+BPP-vR`+(|@pYm3C^GJJWpnmsb{Nt48yj*f0&HHhtr
z*flnScNbHp)*u7#lBj6x<HzN~#OBG_69scQFOu{1FQkgK(-d=d3oXb2b#nAHwV>U%
z!WTre-kI@V9;yj`dlvGBy(Ti+8}tw8?fs*vRfmC@G@0b&_H_&|i*Zk8qgGOabYBc>
z-(|d62nn&j4M($1IGv*<;9yJWnT}TGF|v%AX&N`P7q<pOqO8b-wHw+YinONw;5?mE
zz<du7LRQ7E2)z@Dp?+Cbg*FoUh%=R8ExeR9ioiSh#1>hQU%XtJ)fRsdZ|!#G_#}eG
zKUUVrTgEz;4Q^u2@Tq=Ju_9`hFUsV##y#NK%aeejDmJWS9*n~5mMXuFr)HnYX|uOt
zha>1mF`1yKY^3F(k!8~^pP|pRopEZN=q3RJkUzM%&LXOKH_*O3BUZjUcFt>@&r#6H
zpQYW9M-OlOkQgpBpqJt>cO=e7W*qa~<UwN)5(=}556)0*d-`_r7H9T4pp<*C=B2n;
z{1=MR<3nUPOR6VC-FvbGn`Ii$TWA{iI>G|1^T&Z!`uIw_?9#f0b&8gTeh^lVCP3q0
zFMp-BdRqpM$z@dhPQl<8DG47apR1?6EuK&JN|(Iz&sbSW^`+Mc8+CaKBTKgE)2aem
ziSXTmUA<X#G?gF$>Mje~kbbnsA&WDmbqI3`d>^r`R6Ke12xdtUc1!l#{z-Q9L}L1Z
z82|WF?d=!OAa+@@V4`MY5<Je~CMxrp7cG)WVe`W-wiC39KUC=O$4sriUw%#TI`hqd
zq71Ja!puMS#JSiKBT(dY_50&<l!S~<=ur%!X9PrDos7@|WM0KF(e7L-ZA0Mnejd4K
z*7DI}ESIR)3TH!SEt;Er&gNXkJuTjF3CF|$r1&0djV6Jk0NE{3Sp(vEs(n@$6$=WO
zeTp)PzHply5t<EXT7-^&c7sx=godR}RDC%lp7(yqy^)UrR(CUX2SLF0eld*Wbd)2O
zk6yMY)>Lk~`ZC4nabhVwOJL-#iGS9(P(aK-tajq>OM`Jv01B|O^NvyBdb?#$PX1$E
zG33ooO|MzDCO#oCZ~0ZIESO0eZ4EYq;xtqtHjNL(v?E5Leb6RTi(x&R*DZvN$jc0z
zIgs1|aS~xD(>diQf<vaVT3T^UXTJ}<^-NQ=map?`6D_kOB`Bzy)>E<?UZEOzaOJNP
zXe1u?KLbvBZN6{<&VA;ssui84>LYO(gVuZ+&S0U1Sr0KUU=8#{?%VS%z9J(tvr9Zd
zTJEzfy6V-WBPfhW#baVz?MoO<79v2T;spv$o3C)2NXHROCP_*qQ3~fuhJ@q3S|Q-I
z=AV)WMdT%i{2$Z3qfjajwLW^8xT`$jA!b1AW>=Iap=j}yAUbIfJ+xU~cxKUoY7B6P
z4%zpWwFo4LwiY=uNoHJVDNuDc3^488udY==+1rR(j+=1CoWx7_wlW%R4Du%?&XH~z
z#eKO<Och1(LzvtyL_>v{L3AiUH|PyzS_eg(m|W-F7j@M}WgP<wd}rdEDD?_&&-iBf
zHE2yidRJs*<cHe7-6H?_Knn#!4H^N%9;Rx=?0;|o{LUYiv1s0F{q40N1&!u&Q!SH7
z2);yFS%cWX?e6KxpU@5e?|4z2n5lVkwBU8DVkz_Q+FaiffKJrp6NofeeHx;Jwz@qf
zA!J*-l8iO2K5-ox`gPmF*rw2^9O3^_><9Ff%>qzXKz%UPfpr2&#5ojQF&9JInm>;l
zBk(OY{T`5JYJ(QA1J)XcA3+D<GnREJDVxTtSe}-jvJpU}K6A+990JjdvhBW?chz;$
zFIdG#DVydRbjLx%8HR<@SsXLBS!GygV(D-6MB(qg361tK5jd}AR)WLvS~zaq4}yG1
z2jQc6^vQf&HU};>W<SkD+?8Tnv-t);*gOTdi7TdnPUAiE3n#Afci5e%$+LgWR}k65
z8ECZXuC)!wB^-J!&>LMfo~mfsS@tMrJ_a1iDE3JqxdJZ~jf>5<_ayGJ#S8T^EgoEm
zZ7^@4hJoo>et|RcQeE=nS@l6B40I2eDC}^leNpCjR;h=l>rMGhe`Fak7OpE!udYAW
z<Bngmx9wqux9#@Pi5e&bbCBjtrjqvTFLtXga%L^fD?-;OWxIXDn#R)*8a*iYQS@Q1
zPaCytBYQ7ogylq51WJx4fw3W~p0vtDcKrI7I)GT*Z)~qLGUp3ujD^)Z<_T`jq44ZI
zn~jx8zD)HI8VWSMr)G={2Tm!G(mz+-s*B)UG|^I%{g|a+R4eJpxwJ0u_WH~9tm3b%
zm{GFsOlV)M5?$f<!3-0!9dM9^y77yc7_AnIk1Eb0DVp?Q%-h8VRkP1CcwyfNRDh<q
z==gn6<<|GmmveZ@=Ts5Z27`tN6?`imq;%xSmaM+SUqiv7hRpShSTf1{4=j2W2=Vc$
zH}%&Qe@<2HpJDf<jc)ax2**r`dke&NdC&SG3ln+Ry%kFMxQ7``vQHz`>I=Fh@bUv2
ze)($5^~)FhILtbPThFQ9(0%`XmK_j(<k+<6GEuZRL35PM^;ikZoEB_r$;1Tmn(5KD
zhM>Kq;ad43J0z&<8@#v|CEu1?1_pz_$Pip@q@P+<#Ty4aXBH<bbj3~|fMDmQ_2@hh
zoXhxf4<NtL_s*mcz0&8COj$U%aDvdaCveS_v<(cYXqqd7-gAikd?qy{p6^){El5YB
z9AnSYxNVqQM$F~@OJCRVA?5c){2f*?_uH4w{A-!-Q)GGM1S8>sKaPPI7;LOkY*uHb
zhP+ogHfMzz{S0BwHOBn@Jt-}*QgR`9^9o_Lx}&D=K1{N7Phsn(Dt+0O%dgaGiV)er
zxhCq(S**VYXsjA5(8c(vST&)9!6s>ro(n0v(1$eq6P4<fOgz~w<Hy;2BQbiFaO1OV
zuabG88BWj^fpMvrC`|}6T@45eLaHUFJ^#?^+y&)4MZ?UQyRyEtXUqw=44PChUp;3H
zr|UXgTBJ)|46YY&$EJ*)`&2h{AzM1}i7_W;Aio6q8~#qkAy6P?`Llt1-uf>d3A#w9
z^KwxQMyooSrk5KLY>Y7YQzjy?c_chr1goV|8UEh0iZgR={p#wpA?uWAZW?iI-g_w0
zqIC$bxkkW0tGz?#NO$LALPJ@7SUM-v6Ff<@w;m#4+vR2YR*xE-CaM&;&Ftrrfxa-e
zjJOm{X7CPFso|MKN)D-t)rteYWW$g6yj|QUVnLZdoQ+)iovK!w-g#xc7<;X!yb*sK
z3~f&08EOG>obw)jh9@0Gb#J$BRh#=sWep7;0xfgOZK;Pv%f?}JDLzlV5~0EnFjy7>
zG5u<B@cTzQMf4Azb_+^*7t5HpwWgjcf-z#GNsDw)XOWe|17bNa1A`6&lPsAWSSQPM
z>5Is(Us=V~G}Bz^v(#(;r~BT!c1=%)@RYwD(0dBQM0iPT$AZ`E9TY2+#!uLud2Mmf
zrEfOh14<HC(Kefu>Ix^fW6aT$erZlgc&kH+-y@s&#hp{!WiV*XrbX%0#-i$<ywZE%
z6F7bAg)rf7?ORFNy<K!Q=DyCoV`9!=S#cGi;0$rbl2#aJP;zVRBRY+~s@bquI33{7
zp>tH;R_P$Q>=S=<1`0?XlzN=zJLtsc#7Fto#i_~GVs%$hczVLV#|s+IDjOS&<VBh=
zRIpWC!2R|TR9;GNtj?luC%8-YpcyuiYgi(6M9MTO*t_{YxHNbrtDOAQtRLtZ?8x%;
zFiUogGcU*&!d%U5)@L~BI%AzX$u9ySC>1N)#{KzPA^}_WKJpo9-3!rZX^Glo6tZ(7
z!vJ&`*#hmDmN`+@d$iSTP$Jsb;V6fGF&S(2EN2Gd62Ay5&U=ZHDeBwbjQi9`P{syU
zL0&s6cNk9R@KC$yN#>|4x-7?#LPu;49rPZa`F-Q9Igt7_!poCuTK{lgT;oRI;c!)6
zghxE7m$HcU5-FVG$63Ca`9jcIrm2Zon;pfR_s8Eh6d7tnJG9irwETzB|4}O?e;Ogw
zil_3C;|#m{?-kXn4YB`Z4BSzLZB4#v>c=Tpc+^tRn&ID8(dhsSXkT1MS0U#PRkHYv
zx`u(7PC7YW-OMropd5r*2MmT7uJim=zjEe7K!p80Al%&7^V^|YXN6`rPhOoNPjk6<
zME17spzglS;&0V{zWL878tuP0Nc6-*z>(7H91Q)DcG3M6Cj-g8G-1mBOGr$6`=;5x
zB*I*5$+s~#fv~%FantH$6+e)n_d)-DMQc?s$b%+g4wTbp6B${g7^BhMSUmo@P}Rw>
z)@%69diuLGdiR;JSKOKIMfHyxzflU7RG6?Tj6D!f{*+*In^xN54Ap~>T&wo~o*UO)
z;?q5IPUE<h;J%oeG-3JbzU<tYimc9BW4&E^F;IB=;U3@t`lFv{alcFJh<a3?G3hcF
znAgdCS25F#LPsY|l`|Y!h%2D#b;Z|X=0X)=pPi;DOzgwsc5=wkPCBuJ-ayg=2*k`n
zXUGgF7Qms6Y?E|8C_@Rw8L`d9f^!B$a--38slg9%J8U^8Xv|y=P1@sJw>}u>;2S20
z-2=)dz=^ZF*1gs}+UiqGs9<3LJ_X7(o^$_putA7PB>i`l%m=c}@k}gei2)YCNxK$H
zP8>*b!HfkGO8&7dmn~kN2v3(_&Sg`w>a0cTDLD5I*U*it8Ak6CJS#05e94yci5=I|
zq*x%=zFWp%tiH^>@t^`^@+*t*@D;ix9=$C_RCf=k%_!z|@V@r0&<fUnsqx*@njG<N
z>$9W`1*m_xK(R(7>>8Ov=yI^Hxu|lNrZey8+2BBg9cbM|{&Km~&3<YL1v1Wht0v{g
zmRvCV_@?9!UA=+$wrQbu%O4lHGDpf1NcX##kRLy?CnZb_oa$0D`*A)Bt`KMICBxUY
zhKedJM}Av+I$``(`gGzX)83mgpi#a1^Vgva4}^T`$ssLE<jCk3EfDCR9J@bY_|Pe)
z7(t_W(-Lb#!?Z-$A1bGP198+GI2)F8!+Kh5*b;6?dJlMGvtogeZyya+99Qz$p9kvJ
zj=FyB4#6Sf>pq7GK}*@5SR?}Fgdy`})cXezvm;1V9sQSI?-cF<^}o<$8Fel<Vv8fr
zUze_Gl?k6UM)ksn$L(hrvv*uLKv4!<&caVn=TI?=TEd_<;F-?VxUkxtE>4?m_$#lb
z1<&OH|LA@shY7E9@VPU>m^AK5$J-Ds$zla;hf-xiAA-pn1r6v=qgoZ|-qX?mkW;CN
zDDjRIa_BURr(B$28VuH>1#u^l1SAn7IkY|)R~{E$NBwa5Wfm(7ulPI6`5N+8{X&1d
zqvghbZ3LoQZ2WhaQ-<STwC~Ec5_DjLTJ5sA0RM_x{HBsy9=*-7b*JWYrmJx|2$_`o
z)U4J~fc7o>I(1fwH?7vi35!%<ty)+>$6!NHzfwahx=Lh(-T0<Mm@+Xj;VihU5>YIf
zbfj@ETx=HQ^K_t0fYK(?{v2z=o$s8tn2lMO+j`MmC9O-P0!@UNRF`1BAe;d_^+^iR
zvQup8c|A_hevwN&85<((_}Y0Us(Oh=h+atR;5qm4Q)?mXi`AHNvJ046BAr_S-=>R&
zy7X380OR_{eHY~p;zOw<?cJw;=Gf8?@;k%rCOYKEGR%ZH2FhtAPWVG;tY*{K$(llU
zM+S$+t3*GB%c}6mGQ^z;&~MdQM3F;E7@HL2E%Ihu1f`GCZ~WXx3>0*G^TI!md<MT!
z1sm^K;c_fTEtx*E{Kd0<?!AqF>FJ~5@6v0UiRzw~giRg#N*5wOCFlAPM2kh^QlW-j
zX0l%8{&gXoUo&1<7Cz+LFD7U`i}ebY3h8D7gS3oG7z@RMLO*b4FS+@t#7mw`!hNDL
zsFDe1V0P2<{3dR*LAt}zFE?8Y9)t`<=|3%cr+pi@tG8~_f6VbW@vj3wt_=mQEM2gN
zbj=8%O>2<cqvY+xIhOn@KxW@*d>F0%$8yHaZNA%L$U#;0^NG)oC6Jx<*50pRM2jPl
zBm4qStsog2w6@tEg?zh4EXEA4CCyh-k=sC~>ASyXwEQ$7I;ggd<3^?NPEIDZ=af;c
zrfJZ#UZ!;*C*{F9;;@OZYx`&j58k5?xz)a&<R`v8v)8`<KIT^6eYAcjB*%x!@5VRo
z7yMw+LKbBdJk7EPH8QrVZoH<CwU$^9mvv@k>((EAK=fs<U>{aJA<2H%cUpPGvvEoG
zFCP~Fc`ii{pLUD@9{V|GtRgE^sr$vQdVA*pI}ulcBBK&dO5aBG^?>t<*pE+lHJ5F(
zg!~ArbaXCCG~g!Gd1laCiRJi8EUjHJ`epwQQQ(s%pzq|YMp<%0^?x|Rew)8`i#(WD
zHb?q*Bxh8ptqi{9w2iSn<Db0J?2oSY<TLToNnLt_(FFo&Ek^0Fmx+l7{|HDXlFH=s
z_2X`OWFR7Zd45TNGf}_ctQdRZ+>C=&W?>^65Up4d+3@~Kp;u~dIW3TRF$h%pH`zE+
zv};DH`<H<-=}Y=qZ!eesn3>Qdw`}Bhr{by@A5a(Tkigg}w}@n-8e>iZo6CIj-LFRN
zahSj3_g{$KRD`ASW#`E9`Q{PZf!MG#f3#sMH=`CkQKTA;rfZU%<K*8Hs_{4RkH<yq
z#%HSd_P>1=R^;kNeyQ*ACVJ%-2ja4m-v!HmgdTa-IhMY~?qrgT>w+rD=s3RXt@UT2
zdJ_@5kwhfXj>lF{B)M@&oWxGmxEHD}wgv=}goCZXVC9O62C4s$UQEV|(#fwaH^b{9
zTM!4SxDCJYpqUu@LI)wvFw67BAd0qmYKOs8l@=-H%i{wV{3mVB(cJF|8^<AdX^_r+
zwKRuLnYe+Y&pyu)Dt!nvVe^??9vx{J1>Pr+_*(l-$$UW+%l8;W>9qcDF}G4=3!i2B
zA_;wrz57DCW$<Y2`*_{coxA9&{NqZg&mLzEIqgl}e#7m(#zKC~)%?ec?LW;Y?SHpz
zFg9$>WX+bPa39g+oR)^69k;kxBwXLhA*1rWmbKzFf(5&!qvIlUhI<5!9*sVh@+azr
zW(9)LHR=1)<w;G8VMww`Yg)jrsg8|U28g2~hJCKJ9F;x$DI4#cikA}keT;HUAW39;
z2wM4l(c4nRr{=?KVTxV!>b|%~!t;s%<+KO>GGp&rjTpM$8W=Oj$iH%Ri}66wxG9EC
zo>TEfzd?+IBv7gw$ZwO=3AoA7^U1Eb%uE+7MEBW4-8V{7zH-n^N3YSmp4cGlJMh*Y
zFL>0V@h2m#_<`s*WqW=5Li4VaN#F^)djwdjpjnjds5`Bg(!Vz%GW;c6e}j18AM1{1
z?J86bsI!T-CjGMr-*IS%w5lS%X96|(1Cr$Ccn_Fm&`s%X4>J|a-U%^A_gK!GW|WrW
ziujf;2(y^MM3g%vWv!|+3BHoLk%nRW`ZoCzH}eGRrsuI9Dpn1S3C{G*mkeq04c09^
zy)D}83kmx9TIX&H9@KZ>0~0FSBSSF^7Fki?)ZsbY_WmB}7f!83i-J6zBH!m1<g*O6
zcfrcZH`~gMgMXecRejvSdpfLZxZo|d*k50Ep72TX*RWP9yf&X*IrfO#=IRv%0C7uN
zXVspTAtWWZZ2J9^^1_NB(({Vd8;s79(lsfFLd|3vS6k$k=x8_Ss=VOLslCw_pPc{Y
z8uJd#k}E&<v$~C*`9o(e{lv&yHgJ!IvK6($iYxwvdPowMi4NI|ohtjA<;$Q!H*~`R
z+G|F~U0iHfvHbgqOl&)rbSn2@0ZRd!T|o=}dHg_R8rHgd0_D-oUK5x8*gNJK9k`FB
z;54lL<+-`}RHadd<D^~z(!xdB(#AM+?bJ!of@8fgmssek8T@jrB0|25t=@!TisDSE
zW!upyXDU|nQnj~+#6nyQ?G!xVt0b=HGunqVvz3`YFieg)o#g}42Yt-@)m5pjYV&EL
zag1~?{NrjS{Zp?Lev=rnfurH7aGB`FBpxZT=Qndqig|@rY+BUYX18uD(^w|Wy%h`H
zX;x03L|sva?wrNv12~p;cr~y{{_)aJhnK3}im9%8GTS<2q|;bWw6i(f+(f}WJ8PFm
zcSSibFMEEA`K1-wS!Phw+v28pw<)%$20aZvnPg(Cj8Cdz-eW82DQ{m~o<@89QSz#`
zKTqmT^XZ<F$&ZYX7IU&jVJ=c`7@PJ^UE64v8w7lS@#H^DdWQaJSJnTlwuOT!qA<~U
zFPNi%dF*8YI10t*^6duntA_UZ+Shd-8sv)<Wd|89&QoLaO6$RaP1vb>E(w2toR17M
z_ctK!Tx8Tqm)?zO?d|G|8<bV$t?TcO#-`iSt;w}pYKg+tVs^;npX_C_GXb10Ij-Np
znmSQ>R^UHb#RpnTDy(OWjF#Z4igm-hH_lW!`Abi@EBu9>W$yt~nFSU`-m%`B&iU&6
z%1_fXI~}!n^{VXJdY#ppuPHw_wmc@Fv411GWaZRSP_#o*dJky&5FQ#SU9PhSMK0N*
z<mwHdDTLrWWT58KAAP7=GeCeR1YipWfg&T>{&;gE)2;<4osf?Zb4avBFn}FFz-j>k
z`4hMQl~nqljL2iNEC0VEry-s4<XJ+Bst13)00m)UEk8c*FisXyEEajmI7XcxKH5Cb
zQLzuJ;P|t=<d5ws^&_zIYVINYO=EfB=-rkRg)@rb9`Kcbad?odWAPL@5@Wp}1qvym
z+1ATi7hx`{Zh~t%)S%xtkwb<o-LkjmA=r4+KqYUJByrt(Nra!j)N~KGL5aTOHMV6B
zoVQ=<SGsDf%bkhxs0B55u!XSOP&z5>@b!B&_^W_q?L$T8Ygshh<@-U#%p@K<=v?<g
zhSS4zf-2GE)KX{dSw%tOh9e)v_2f9Mmu}NNfM$@vdZF;M0bw)Q43n>KsHD7e`XKqG
z%wYU{r|-r9z*AffA{r(d_BzE)2XdgiNfCrRaJmPGNdH-IIWt*%w=GeuP#=JQ5pI}T
z>=lP$YVW=G?!ux&49k~D#(x3?tHMtqI)362T;W$dWFuZOTubav<~XVRNL?*z{j#`h
z-ymGnsSO?5+ZE*+O-!Xlwy2UguI+i1-u@xn{Gg6V{B3cLA&P(M?VChHnb=8Z%(xKy
z;U9R7lQwKH&=Z1EyMhv%wCxeqos`9$!Ov2qZ+m=EIT4hI1t%(rAEJn}J36@7sFsde
zZ0rX=wKQyRiE$lmPs6h{k+{kSJqK?C(u~>C8`lRK50@B=4}>3EfwlRyKssQxL4V@9
z)M`+)g-0YydFGIQKVJ@X@13~H+-?0TI@kWUTQsA?ln*tJO<WE7%`+W2GO(PS+N8tr
zt1qrb-bw=esI_#F+3WY5?uW7H1$=jG??tsKdUv@kx^X>Jnv|Dqx#@o2hJ#Nlxj110
zRYR?1-~KnK?)SwdED_DlaKmk8&=vp;T#SNvYdBQQwx?EG@c)X)vu0N&EDf#x-DY|H
z#qrX4Q19x!YuY`4;&aej!XWN>j(dQyB<~?lko~3S?;!aEyWi}}5YJwAeWwm?#GASO
z){oBu`qp~CcVd@J_kZ;5CZ9J?@1JqoF?kL}_L9bqfHfhSY<yxVIf~s?eCu;n2TS}h
z8}|U=Iqq-=Oct%^3RqEO1#kh2)x-^`WAtE4_62}fK5zBtmpJeXXGn&4RhQCYwXog?
z40P%R5ZFHo)Jqe30>FaEFF;HBr=2wU{DNZ4#<n;$PtJSz9b53}G9Ua98I8vk7U{Cr
zo@OOQ3NJt1GX|<nBlVvw|I1bi|M?vh{{77_8meT*7*;7SzVdbkZt$p<o$o=L`J<&I
z9S<3uYG{WdVM)&N?Um4&c;b8v4%|xKQ8#0p5xtwP3+j1nT5`Y=M9X($AIA_wPT1td
zK6xkJpg%U5Kc~W#ZIx(K;u}MM5@IC$yKv5A=CagH$uwLbat-&V!b+??YM<x2#P-Rg
z{F+I(#8_@j;Are4;`PsW6?5YS9&+!Qn`_1fB2})PD?CIhe?<P&A+8_KX}*fy&fjad
zch^>*^JcW^ek2tnU#l=N8Dv|{bPs5;Ued|o{T|X)=-b1ziKQG3w-PSDh#=DrvLErC
zFmt%|(kaHO713SGrDoBKxRqeyCjoJ}Gf@9o1I^2i8g2HV__MQt;cS;{Oa4f7T&0HP
zJU)bs4xyUj=m`-<rwzb`UL9Peag;0Ln|&ABwI+RuYve`T^QNL;>bk<L@DKlRjPjsZ
z*1QMIeW$oxo~ouhVP2Qg;HtA3>EaU0=UZ>yV2#V3-+>h{<uem$!v!tctNB7xd0iNp
zDuUB)|F3GUG@9*oU!z;?wshAJt*J;&HB&W|PKqiehQvJ75G|3Ihf*zynrDKbL=6dH
z8zL&jR+O5$#h662Y92z-qQ;wj?!D*q+<W$2_uh5xhx_4O>$l$j`>yx*|MZ)l=NY<Q
zX~bB!4{AO+-rj9MVhM7_{nf#jzPLi4X+bB(?x`9Qem0*BV4oA?4TR@b2rjp`7Gk;1
zg_JTnzhIcjK*QJo(jX<Z({J9hjzS{9dYn~mAbG&Av(S*EW({dcO}3`<gkMwjZGuoe
zy|&F0>t7;If)aZ#oX?49(=q0Dp7FJ!K@2p1dvwc?5+6ejp{vIVgfW5wXtVHQ!x@d%
z-d;cZUxp7Nzo})A5Nh|`cwU1xnZo^=XI!DZOh8o4gLq&d5W<qlRs91dG{AV;g=m}=
ztV)|5wfKMViT?;t-gGvPQc&wY_)E&?#_?Gmf)iOTvE8Y=N9P_1p2lfz$^;NM+GYk}
zWFGvXa1y+JKEe2=NAA-$yFa?K^MY9n<Zg7COC8HAgt}`5kb&IAVq;_D8-#7=Ha{ei
z=VvuoyJC5%ev&{i@<Uc5iic559?0l*tp+9Th`hk$cHwt9akB{LZ9Wn7ERUjyHR*O(
z`lM&S&Ut*yq{yn8P))*h><kuZ#n<TQj`Hk=`*JyGKFB*gOtJ^)dMtRmMxP?Eni^=C
z)1f>L!DgUKCK#R{+2w!O|I{48us(x(94D7>b%90XZI5d=4+zp1j=vxPu;0H~Lewy?
zeFhE7GR56B^Ty;R6<%r-D6*-O5ge<|wvC9)xgv_6KzW;*-b3a|Uhp?Hg%IxP>in*M
z2T^T{Oq=iwJS)%l#-Z4j7U!L&G@ZWNVD#&@lGjOlusFzMpq^8~=^U5Da(vd4_N@tx
z3}vS_4yx|anD!{8Pbz~Xs*agxDM(ha$5K0*_(RY8iwfT`Euk2pX0unjm4{_5_MbNR
zhOiFKJW7!OJKR|~n>5TSI(O4r)uD@Sl>I!9$^i0qKbTTT4N&E|aDjE<T=&fM2b&o$
zACD(8&RW#9^bl?JyGkQ2B_?w|=sR~`jIcvGOs)uwnZDxh&RNc9lLO7=g45o`kb08x
zMQ!BiDb-}z=c*BPd@LTe&7Xb0+Es{?c&CK@>*UKEuZAqWAbasaDf0upa+EOam3p}X
zL8%Sh91_p9TO1`A`oIK8qp^o*0ZkGjslNulS-ydEHk!>Suq?HI*T*OE=5r&Q6E&GX
zYd}qA!B#Iz=(of-2>Py7tQ1<hwoH%HjrwG3T2Ao1FQG};Rw+RnxoYGTVSre~8{BOr
zu*fM?+w!^G989T~Ob!3+d|l54(>3`S3;OMp$b3t)&Mr*P%Fy!^f$Bbi^bZcHFPY$l
zu@OJ9)_*Lk9Ibz0EL`>LD_c0fK)80Cyyk|OlCVGw)k|?RvUw<8t(Ljc$nW&Ff}X%p
zFFa33dYH4HGU7B`1yycZ6?3bJb9Wi0LQkYIx%z&Ml{#Hgr?9#mswpMeToN!pHi-ZX
zhAP2xrUcTURkEeCiB~96wWxGgRCRx?w3&Pl@zVY(_Br9Ym8V+&8Dn3L8-1nI1kLru
zTE7Gb`(#l5rXJFBQ4sq5l-$BK?aTIpF|U5nOD;jgH^kl`KxeJw7!~HmWYcU5?@V%j
z(Fu13uKS*x?@i7TF?$M-SBR|Y5KNcf2clQF)dNVX8!}_uv7feOwaFP;?T)tHbS0;Q
z1wnhRK+VY{G&dqrK$6^csk|4Bjf!ZqTDb04XS-&`XJL;7ln>yk-oT&4FLr@fA=Fxn
z@{}BW=U0)at$4FvH4Oc{yi80hpI?l!U8g(bF%sM*2g>B;L2rAc&B?i7pDs&w7wu?3
zYdUy|b)WsNyQZCY>nr0<9W!xGfY{rmLCMf))vZFeD4BHE`5{3k6;-;TjZg`dDhAQM
zWfst(a!4rj&v-D)$?-X}l%@E)CC~3ZlMCYQaGR?Yd@>VN??gZaFilM#$c|HYPWVNn
zH<U!ic&@eZODbc5)zs0quUS9RiMBTf&)v+f!Ur+GC<lZtjn5s(IM+a&r)YA=Ot`96
zx=rO#iNFD+01n;p7<K%1;p6;hr5PbaLO9H>`D$JPedt4_`wC2fXn|#nKi)BLXXUom
zVHCXOp`O}zyvImc+>4Kv@-gM1{(j4p5K7<LD7I}`scir=Hs%<JgB&xNCLO_T@R!j}
zJEMKYm?@gzUSnZn-9Fx<W-rle%Oe1fq*X$mjkr8RS>OyF<%E6nUGxYd+`WM-|5NYs
z{^A_}|AlWjevPU?nrq|7vbUXe><DAE;P2^A&QNrgx)_w|q{}~B$gDTq!WlwMO?E!!
z0=PYjz$bplV=WRZWon%QA9|75-#TA02nSLnwmvl5hxf@gM$HW7?G;g-^>ivC<16ug
z$i)Op540BAO@qLT&08$P9DWvV**j)}cCC^&1>+YkeuRfpT7ZW0ksR@19&Iyqz)%Q;
zlTk>8+Q+YB#)GYsy}X{D)I>bSG#zP(<JknC062)zUHI*5KgHhHb9e_}T~JFKN(A9w
zaQ9q+_21tD!;hj;^Y8HrS{1ON@LW&(<6J2jUW3#X7KxmXX=oplSDJbi>Hkd8uYa7f
zX7U&Ml&M#T_#n<pxc=5cHA$z2HEPu&Of(uHdM__dX@ia3QgX{xb-Xz=WvsroiYS_k
z<-!^~;ID#A1E-A0>knR<R4ND;zZc&d5Q>i@FDrPA8fS7fd<GfAV{r?$b@w^H7){<U
z>;{VY95bD&5NAJTYD?RsQiwqG*!vV1Z`xdch{a`WaP(Xj`I=$ano0yMte^;H8x6BQ
zo4eF%v)q=obaXYu8yTP(T^c9WcFZKt^7}&;M(BvnoZOx54Yj+g%a}S9Y*%q?f&D-5
zCHrm~>AsVbRoh1l61yL;9e@WoW$fhnf)UE*^1}Y3`f(f(A9VqY#J<8ehmm>5Bg>t!
z!0uPqK#5w)N;t(%oO~oYA=NW2M~q6Y3kUuV6LM>a`6Q)ozdW6<<xw%=tcp<!7eCsW
zVOE^jKB6Ww9onvI_uyC5Z^JIUmD_vF-1%*d&m1=?|1;{CY4?-%V{5wch?+P$9_y!K
zU&0V<(eYmwOeqhEoi&1_&K)x;wlwVrvQJH~JelJQ@_`SqeGB}~5x!=cbBn<~tGmsU
ze#aCyn%J{J<h6%pq_}2}J*3Y0eocJcMrRg{h?D3%c@p@*x#E_ym~>Pu=BC%F(Mjg^
z>o*36W$RhWwgtOBYjCg~S?ySKIjkCu@0r|FhEPj?+GaIoOm#1<B!a%DoaJ(*)4!L?
zgdZI<O)s4hK4zL4-F!UNGIc&f;CKDNOt2BHPq}9!Q&ThWOsL=E#>2^()~S&qtfdI?
zkN|u5{Vm((X!h%!Sb5Ifyusb(dIM5FMlm$m8X?f?6xwMv?S-dM;iK+i>Kiu?fv^fK
z^sqK5k1(hoTm44$z9hg-`1+`w&Zf-h0p0p)qXtdE{4{teDrW$hJ6<49Qnnx8k*ww9
z<dkurV9c5?l1eByqV_xdOE25I+yd{ITn#v2*<c~fBi+>At@-l5;DzGkmOIkz?ufy@
zZRV-Sg=ax@*~lx9(+zap?zNqT4Q{;75@|fWtI6Q-LCRf0fnfbZsL{6h#ZFpiSeDQL
z?&a`&Z({u+$n9o@)xe$!IeCVcmKx=nr67#Fres&la_FTpT=x7(3VXekatJc{ax&+h
z{6qC!WwRJAbxYNA7Ppz2WY-4>*CHZB_QCp=@GNO&VbC{tfHpN0^@5fTXp<!l1_J@P
zD8~{*XIt6#53|f*)fLU}-_!^@UEX94sZhsQeXG&)(Nd<SyRT__KgOwNz){erVU3dL
z_qSckdakBA4=3^YCaf{5Kdn#@EtVcdY}o1ff%ad1dAT@rCDX8M)-CwSy=>x>s_Op!
z%Fli_6TW`hv$N#$fvko>=1|>lL8~?gK3eNsgU%mNaUX$plt$yHRH>3?$sv8$%=_n|
ztJ{c|EdYQffcLcl6>XCg*i|*8H=M2^aH(S>M{2HR<qrE>si7`*@%%Bt>~1}e>c}o_
z2WLmHkp{9<<!y&wckygCF*+Q#_?Y9J-NRI}NnXdgOwr<3S1d|&Q=0!2;taX5_cf)(
zcg{g&jvZl(j{TB9m(qpbI??ZqGJfp(y}id0_9m2H(KGygg4}{`?fK^@Cy?DQ{*Xy_
z{l{tMVKEz+<}fGc1A3oweZ&l>1$;_<!1H4i>FlDhYC^xjp?(Dli<a0QxmUnBBBM(%
zn4S6JQU{k;AM}Cye1ZT7vE05Cu^YPnJdhI8c}sy=>eECEJoQGzvJCcH@`^{BdSFv7
z*H0K_&vwu3)%ll9i;(Q=8dI4CY#d@aMX5lHI$FB+5H`fg4N4&_%t8<9_bkfaNe9@M
z(cx`2pB<{Ua&k}li2_w(1n<Dd-m7tO8Fw+bOQj(z+;MvbF1(q!4+DoqMU|7={qrs*
zJ~~D8z;UW#6UfWy<!d&5kcs>m4amz=mklgS7H)0ryK&7|-IemK9w$~8pRh=f<>5kl
zd;1XikwC7Z=Pv!yh`a<i8l5Jw)KbayoBZGuWkkp*mun$2_QKmFPJP@MZ<4BnQ!$z|
zFTUcnRHt2#?}Rm6HyrsT#id>?17>VM!-~*exz#=(YVcjmpCP1J_w$=m1DMt9#Dumd
zN1rw?Ql>Ya`S|UF`pP@@R6_5z8<<s={YBXD--wa_YRmkfIRY7l{GBr8>6#xkM@A}t
z6cv5+pCjC#?Ere@kD1^U@{xDM0Ulc2TCLxwblv&a72Yx$ou4n$bu2OO@bnU*iLw@G
zLGdZ7qdN0T85Z`TwZGoYfO2GU0sy8kCo=L;eQbQSv1umO<q>ucm*=7px@1+DU`woC
zX4`MQ`W<&o4zWh85MJ9IpU0CtE+Ew1LrahNpqM{9)G(qr=^bQvpxkGfKJAXTk1lGq
z1>Wa15`v2<%Zu=b{`@#C5PZ1gVEm8YXjtF1X2?Wo{x{z+tG&)k2(lmfW#<WO%f{b^
zMhh~zN!zFyaIbJ``m~1Jg=UvJhy3jq8W!=y=Cz_{#2LrDVUrG@cACPMJ~@NVuRq)z
zf&~T8ZXGiPy=%K#Rva_)yxDP@0Sf-Rmj0bx>n^|iz_Z0PzbfE#d`tTL3IFFTPb$1x
zYxO>O#*w%>!scHDIcvq{A5ekPXhfEGwIMgyGNc)^nfnrX+f4!m?p%w^EWMTaSHk&U
qjUOQy|0Cx4-<T=?&h-Ch?fe%wZ{gqg|0I+8kBx!<?J?nF^1lF*w=x?5

diff --git a/_templates/layout.html b/_templates/layout.html
index f33e834ad..a6fa24075 100644
--- a/_templates/layout.html
+++ b/_templates/layout.html
@@ -46,7 +46,7 @@
       url="https://docs.microsoft.com/learn/modules/intro-machine-learning-pytorch/8-inference?WT.mc_id=aiml-7486-cxa";
     }
 
-    $(".pytorch-call-to-action-links").children().first().before("<a href="+url+' data-behavior="call-to-action-event" data-response="Run in Microsoft Learn" target="_blank"><div id="microsoft-learn-link" style="padding-bottom: 0.625rem;border-bottom: 1px solid #f3f4f7;padding-right: 2.5rem;display: -webkit-box;  display: -ms-flexbox; isplay: flex; -webkit-box-align: center;-ms-flex-align: center;align-items: center;"><img class="call-to-action-img" src="../../_static/images/microsoft-logo.svg"/><div class="call-to-action-desktop-view">Run in Microsoft Learn</div><div class="call-to-action-mobile-view">Learn</div></div></a>')
+    $(".pytorch-call-to-action-links").children().first().before("<a href="+url+' data-behavior="call-to-action-event" data-response="Run in Microsoft Learn" target="_blank"><div id="microsoft-learn-link" style="padding-bottom: 0.625rem;border-bottom: 1px solid #f3f4f7;padding-right: 2.5rem;display: -webkit-box;  display: -ms-flexbox; display: flex; -webkit-box-align: center;-ms-flex-align: center;align-items: center;"><img class="call-to-action-img" src="../../_static/images/microsoft-logo.svg"/><div class="call-to-action-desktop-view">Run in Microsoft Learn</div><div class="call-to-action-mobile-view">Learn</div></div></a>')
   }
 </script>
 
@@ -91,7 +91,7 @@
 </script>
 
 <script type="text/javascript">
-  var collapsedSections = ['파이토치(PyTorch) 레시피', '파이토치(PyTorch) 배우기', '이미지/비디오', '오디오', '텍스트', '강화학습', 'PyTorch 모델을 프로덕션 환경에 배포하기', 'Code Transforms with FX', '프론트엔드 API', 'PyTorch 확장하기', '모델 최적화', '병렬 및 분산 학습', 'Mobile', 'Introduction to PyTorch on YouTube', 'Recommendation Systems'];
+  var collapsedSections = ['파이토치(PyTorch) 레시피', '파이토치(PyTorch) 배우기', '이미지/비디오', '오디오', '텍스트', '강화학습', 'PyTorch 모델을 프로덕션 환경에 배포하기', 'Code Transforms with FX', '프론트엔드 API', 'PyTorch 확장하기', '모델 최적화', '병렬 및 분산 학습', '모바일', 'Introduction to PyTorch on YouTube', '추천 시스템', 'Multimodality'];
 </script>
 
 {% endblock %}
diff --git a/advanced_source/cpp_frontend.rst b/advanced_source/cpp_frontend.rst
index 7952b4593..878b2859c 100644
--- a/advanced_source/cpp_frontend.rst
+++ b/advanced_source/cpp_frontend.rst
@@ -1,5 +1,5 @@
 PyTorch C++ 프론트엔드 사용하기
-=============================
+=================================
 
 **번역**: `유용환 <https://github.com/yoosful>`_
 
@@ -87,7 +87,7 @@ C++ 프론트엔드에 대해 자세히 살펴보세요.
   것에 유의하세요.)
 
 기본 애플리케이션 작성하기
-------------------------
+---------------------------
 
 먼저 최소한의 C++ 애플리케이션을 작성해 우리의 설정과
 빌드 환경이 동일한지 확인하겠습니다. 먼저, C++
@@ -228,7 +228,7 @@ CMake가 해당 파일의 *위치* 를 찾을 수 있도록 하려면 ``cmake``
 제가 보기엔 항등 행렬인 것 같군요!
 
 신경망 모델 정의하기
--------------------
+----------------------
 
 이제 기본적인 환경을 설정했으니, 이번 튜토리얼에서 훨씬
 더 흥미로운 부분을 살펴봅시다. 먼저 C++ 프론트엔드에서 모듈을
@@ -237,7 +237,7 @@ CMake가 해당 파일의 *위치* 를 찾을 수 있도록 하려면 ``cmake``
 내장 모듈 라이브러리를 사용하여 완성도 있는 GAN을 구현하겠습니다.
 
 모듈 API 기초
-^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^
 
 파이썬 인터페이스와 마찬가지로, C++ 프론트엔드에 기반을 둔 신경망도
 *모듈* 이라 불리는 재사용 가능한 빌딩 블록으로 구성되어 있습니다. 파이썬에
@@ -262,7 +262,7 @@ CMake가 해당 파일의 *위치* 를 찾을 수 있도록 하려면 ``cmake``
 이동시킵니다.
 
 모듈 정의 및 매개변수 등록
-*************************
+***************************
 
 이 내용을 코드로 구현하기 위해, 파이썬 인터페이스로 작성된 간단한 모듈 하나를
 생각해 봅시다.
@@ -310,7 +310,7 @@ C++에서는 ``register_parameter`` 메서드를 통해 텐서를 전달해야
 전통적인 (그리하여 덜 마법적인) 방식이 제공됩니다.
 
 서브모듈 등록 및 모듈 계층 구조 탐색
-**********************************
+*************************************
 
 매개변수 등록과 마찬가지 방법으로 서브모듈을 등록할 수 있습니다.
 파이썬에서 서브모듈은 어떤 모듈의 속성으로 지정될 때 자동으로
@@ -457,7 +457,7 @@ C++에서 ``torch::nn::Linear`` 등의 모듈을 서브모듈로 등록하려면
   있습니다.
 
 순전파(forward) 모드로 네트워크 실행
-**********************************
+*************************************
 
 네트워크를 C++로 실행하기 위해서는, 우리가 정의한 ``forward()`` 메서드를
 호출하기만 하면 됩니다.
@@ -479,7 +479,7 @@ C++에서 ``torch::nn::Linear`` 등의 모듈을 서브모듈로 등록하려면
   [ Variable[CPUFloatType]{2,5} ]
 
 모듈 오너십 (Ownership)
-**********************
+************************
 
 이제 우리는 C++에서 모듈을 정의하고, 매개변수를 등록하고, 하위 모듈을
 등록하고, ``parameters()`` 등의 메서드를 통해 모듈 계층을 탐색하고,
@@ -659,7 +659,7 @@ API(``torch::save`` 및 ``torch::load``)는 모듈 holder(혹은 일반
 이 API를 사용하겠습니다.
 
 DCGAN 모듈 정의하기
-^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^
 
 이제 이 글에서 해결하려는 머신러닝 태스크를 위한 모듈을 정의하는데
 필요한 배경과 도입부 설명이 끝났습니다. 다시 상기하자면, 우리의 태스크는
@@ -697,7 +697,7 @@ GAN은 *생성기(generator)* 와 *판별기(discriminator)* 라는
 기계입니다.
 
 생성기 (Generator) 모듈
-********************
+*************************
 
 먼저 일련의 전치된 (transposed) 2D 합성곱, 배치 정규화 및
 ReLU 활성화 유닛으로 구성된 생성기 모듈을 정의하겠습니다.
@@ -778,7 +778,7 @@ ReLU 활성화 유닛으로 구성된 생성기 모듈을 정의하겠습니다.
 
 
 판별기(Discriminator) 모듈
-************************
+*****************************
 
 판별기는 마찬가지로 합성곱, 배치 정규화 및 활성화의
 연속입니다. 하지만 이번에 합성곱은 전치되지 않은 기본
@@ -823,7 +823,7 @@ API입니다. `Sequential` 을 사용하면 판별기는 대략 다음과 같습
 
 
 데이터 불러오기
-------------
+-----------------
 
 이제 생성기와 판별기 모델을 정의했으므로 이러한 모델을 학습시킬
 데이터가 필요합니다. 파이썬과 마찬가지로 C++ 프론트엔드는
@@ -936,7 +936,7 @@ MNIST 데이터셋은 학습 바이너리 실행 위치를 기준으로 ``./mnis
 즉, MNIST 데이터셋에서 데이터를 성공적으로 로드할 수 있습니다.
 
 학습 루프 작성하기
------------------
+--------------------
 
 이제 예제의 알고리즘 부분을 마무리하고 생성기와 판별기 사이에서 일어나는 섬세한
 작용을 구현해 보겠습니다. 먼저 생성기와 판별기 각각을 위해
@@ -946,9 +946,9 @@ optimizer는 `Adam <https://arxiv.org/pdf/1412.6980.pdf>`_ 알고리즘을 구
 .. code-block:: cpp
 
   torch::optim::Adam generator_optimizer(
-      generator->parameters(), torch::optim::AdamOptions(2e-4).beta1(0.5));
+      generator->parameters(), torch::optim::AdamOptions(2e-4).betas(std::make_tuple(0.5, 0.5)));
   torch::optim::Adam discriminator_optimizer(
-      discriminator->parameters(), torch::optim::AdamOptions(5e-4).beta1(0.5));
+      discriminator->parameters(), torch::optim::AdamOptions(5e-4).betas(std::make_tuple(0.5, 0.5)));
 
 .. note::
 
@@ -1058,7 +1058,7 @@ optimizer는 `Adam <https://arxiv.org/pdf/1412.6980.pdf>`_ 알고리즘을 구
   ...
 
 GPU로 이동하기
---------------
+----------------
 
 이 스크립트는 CPU에서 잘 동작하지만, 합성곱 연산이 GPU에서 훨씬 빠르다는
 것은 잘 알려진 사실입니다. 어떻게 학습을 GPU로 옮길 수 있을 지에 대해 빠르게 논의해
@@ -1141,7 +1141,7 @@ MNIST 데이터셋의 텐서처럼 우리가 직접 생성하지 않는 텐서
   torch::Device device(torch::cuda::is_available() ? torch::kCUDA : torch::kCPU);
 
 학습 상태 저장 및 복원하기
-------------------------
+-----------------------------
 
 마지막으로 학습 스크립트에 추가해야 할 내용은 모델 매개변수 및
 옵티마이저의 상태, 그리고 생성된 몇 개의 이미지 샘플을
@@ -1200,7 +1200,7 @@ C++ 프론트엔드는 개별 텐서뿐만 아니라 모델 및 옵티마이저
 
 
 생성한 이미지 검사하기
---------------------
+--------------------------
 
 학습 스크립트가 완성되어 CPU에서든 GPU에서든 GAN을 훈련시킬 준비가
 됐습니다. 학습 과정의 중간 출력을 검사하기 위해
@@ -1280,7 +1280,7 @@ C++ 프론트엔드는 개별 텐서뿐만 아니라 모델 및 옵티마이저
 모델을 개선할 수 있나요?
 
 결론
------
+------
 
 이 튜토리얼을 통해 PyTorch C++ 프론트엔드에 대한 어느 정도 이해도가 생기셨기
 바랍니다. 필연적으로 PyTorch 같은 머신러닝 라이브러리는 매우 다양하고
diff --git a/advanced_source/ddp_pipeline.py b/advanced_source/ddp_pipeline.py
index 36f1d2641..93858443a 100644
--- a/advanced_source/ddp_pipeline.py
+++ b/advanced_source/ddp_pipeline.py
@@ -22,7 +22,7 @@
 
 ######################################################################
 # 모델 정의하기
-# -------------
+# ---------------
 #
 
 ######################################################################
@@ -77,7 +77,7 @@ def forward(self, x):
 # ``nn.TransformerEncoderLayer`` 의 절반은 한 GPU에 두고
 # 나머지 절반은 다른 GPU에 있도록 모델을 분할합니다. 이를 위해서 ``Encoder`` 와
 # ``Decoder`` 섹션을 분리된 모듈로 빼낸 다음, 원본 트랜스포머 모듈을
-# 나타내는 nn.Sequential을 빌드 합니다.
+# 나타내는 ``nn.Sequential`` 을 빌드 합니다.
 
 
 if sys.platform == 'win32':
@@ -122,7 +122,7 @@ def forward(self, inp):
 
 ######################################################################
 # 학습을 위한 다중 프로세스 시작
-# ------------------------------
+# --------------------------------
 #
 
 
@@ -135,7 +135,7 @@ def run_worker(rank, world_size):
 
 ######################################################################
 # 데이터 로드하고 배치 만들기
-# ---------------------------
+# -----------------------------
 #
 
 ######################################################################
@@ -149,16 +149,17 @@ def run_worker(rank, world_size):
 # 알파벳을 길이가 6인 4개의 시퀀스로 나눌 수 있습니다:
 #
 # .. math::
-#   \begin{bmatrix}
-#   \text{A} & \text{B} & \text{C} & \ldots & \text{X} & \text{Y} & \text{Z}
-#   \end{bmatrix}
-#   \Rightarrow
-#   \begin{bmatrix}
-#   \begin{bmatrix}\text{A} \\ \text{B} \\ \text{C} \\ \text{D} \\ \text{E} \\ \text{F}\end{bmatrix} &
-#   \begin{bmatrix}\text{G} \\ \text{H} \\ \text{I} \\ \text{J} \\ \text{K} \\ \text{L}\end{bmatrix} &
-#   \begin{bmatrix}\text{M} \\ \text{N} \\ \text{O} \\ \text{P} \\ \text{Q} \\ \text{R}\end{bmatrix} &
-#   \begin{bmatrix}\text{S} \\ \text{T} \\ \text{U} \\ \text{V} \\ \text{W} \\ \text{X}\end{bmatrix}
-#   \end{bmatrix}
+#
+#     \begin{bmatrix}
+#    \text{A} & \text{B} & \text{C} & \ldots & \text{X} & \text{Y} & \text{Z}
+#    \end{bmatrix}
+#    \Rightarrow
+#    \begin{bmatrix}
+#    \begin{bmatrix}\text{A} \\ \text{B} \\ \text{C} \\ \text{D} \\ \text{E} \\ \text{F}\end{bmatrix} &
+#    \begin{bmatrix}\text{G} \\ \text{H} \\ \text{I} \\ \text{J} \\ \text{K} \\ \text{L}\end{bmatrix} &
+#    \begin{bmatrix}\text{M} \\ \text{N} \\ \text{O} \\ \text{P} \\ \text{Q} \\ \text{R}\end{bmatrix} &
+#    \begin{bmatrix}\text{S} \\ \text{T} \\ \text{U} \\ \text{V} \\ \text{W} \\ \text{X}\end{bmatrix}
+#    \end{bmatrix}
 #
 # 이 열들은 모델에 의해서 독립적으로 취급되며, 이는
 # ``G`` 와 ``F`` 의 의존성이 학습될 수 없다는 것을 의미하지만, 더 효율적인
@@ -190,11 +191,11 @@ def data_process(raw_text_iter):
     device = torch.device(2 * rank)
 
     def batchify(data, bsz, rank, world_size, is_train=False):
-        # Divide the dataset into bsz parts.
+        # Divide the dataset into ``bsz`` parts.
         nbatch = data.size(0) // bsz
         # Trim off any extra elements that wouldn't cleanly fit (remainders).
         data = data.narrow(0, 0, nbatch * bsz)
-        # Evenly divide the data across the bsz batches.
+        # Evenly divide the data across the ``bsz`` batches.
         data = data.view(bsz, -1).t().contiguous()
         # Divide the data across the ranks only for training data.
         if is_train:
@@ -211,7 +212,7 @@ def batchify(data, bsz, rank, world_size, is_train=False):
 
 ######################################################################
 # 입력과 타겟 시퀀스를 생성하기 위한 함수들
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
 
 
@@ -240,7 +241,7 @@ def get_batch(source, i):
 
 ######################################################################
 # 모델 규모와 파이프 초기화
-# -------------------------
+# ---------------------------
 #
 
 
@@ -264,9 +265,9 @@ def get_batch(source, i):
 # In 'run_worker'
     ntokens = len(vocab) # the size of vocabulary
     emsize = 4096 # embedding dimension
-    nhid = 4096 # the dimension of the feedforward network model in nn.TransformerEncoder
-    nlayers = 8 # the number of nn.TransformerEncoderLayer in nn.TransformerEncoder
-    nhead = 16 # the number of heads in the multiheadattention models
+    nhid = 4096 # the dimension of the feedforward network model in ``nn.TransformerEncoder``
+    nlayers = 8 # the number of ``nn.TransformerEncoderLayer`` in ``nn.TransformerEncoder``
+    nhead = 16 # the number of heads in the Multihead Attention models
     dropout = 0.2 # the dropout value
 
     from torch.distributed import rpc
@@ -332,7 +333,7 @@ def get_total_params(module: torch.nn.Module):
 
 ######################################################################
 # 모델 실행하기
-# -------------
+# ---------------
 #
 
 
@@ -435,7 +436,7 @@ def evaluate(eval_model, data_source):
 
 ######################################################################
 # 평가 데이터셋으로 모델 평가하기
-# -------------------------------
+# ---------------------------------
 #
 # 평가 데이터셋에서의 결과를 확인하기 위해 최고의 모델을 적용합니다.
 
@@ -454,7 +455,7 @@ def evaluate(eval_model, data_source):
     mp.spawn(run_worker, args=(world_size, ), nprocs=world_size, join=True)
 ######################################################################
 # Output
-# ------
+# --------
 #
 
 
diff --git a/advanced_source/dynamic_quantization_tutorial.py b/advanced_source/dynamic_quantization_tutorial.py
index be0182a5d..3c9f2af99 100644
--- a/advanced_source/dynamic_quantization_tutorial.py
+++ b/advanced_source/dynamic_quantization_tutorial.py
@@ -72,7 +72,7 @@ def init_hidden(self, bsz):
 
 ######################################################################
 # 2. 텍스트 데이터 불러오기
-# ------------------------
+# ---------------------------
 #
 # 다음으로, 단어 단위 언어 모델 예제의 `전처리 <https://github.com/pytorch/examples/blob/master/word_language_model/data.py>`_
 # 과정을 따라 `Wikitext-2 데이터셋 <https://www.google.com/search?q=wikitext+2+data>`_ 을 `Corpus` 인스턴스에 불러옵니다.
@@ -195,11 +195,11 @@ def tokenize(self, path):
 
 # 테스트 데이터셋 만들기
 def batchify(data, bsz):
-    # 데이터셋을 bsz 부분으로 얼마나 깔끔하게 나눌 수 있는지 계산합니다.
+    # 데이터셋을 ``bsz`` 부분으로 얼마나 깔끔하게 나눌 수 있는지 계산합니다.
     nbatch = data.size(0) // bsz
     # 깔끔하게 맞지 않는 추가적인 부분(나머지들)을 잘라냅니다.
     data = data.narrow(0, 0, nbatch * bsz)
-    # 데이터에 대하여 bsz 배치들로 동등하게 나눕니다.
+    # 데이터에 대하여 ``bsz`` 묶음(batch)들로 동등하게 나눕니다.
     return data.view(bsz, -1).t().contiguous()
 
 test_data = batchify(corpus.test, eval_batch_size)
diff --git a/advanced_source/generic_join.rst b/advanced_source/generic_join.rst
index ec9ef5610..c083d0cfa 100644
--- a/advanced_source/generic_join.rst
+++ b/advanced_source/generic_join.rst
@@ -4,7 +4,7 @@ Distributed Training with Uneven Inputs Using the Join Context Manager
 **Author**\ : `Andrew Gu <https://github.com/andwgu>`_
 
 .. note::
-   |edit| View and edit this tutorial in `github <https://github.com/pytorch/tutorials/blob/master/advanced_source/generic_join.rst>`__.
+   |edit| View and edit this tutorial in `github <https://github.com/pytorch/tutorials/blob/main/advanced_source/generic_join.rst>`__.
 
 .. note:: ``Join`` is introduced in PyTorch 1.10 as a prototype feature. This
     API is subject to change.
@@ -369,7 +369,7 @@ of inputs across all ranks.
         def join_hook(self, **kwargs) -> JoinHook:
             r"""
             Return a join hook that shadows the all-reduce in :meth:`__call__`.
-            
+
             This join hook supports the following keyword arguments:
                 sync_max_count (bool, optional): whether to synchronize the maximum
                     count across all ranks once all ranks join; default is ``False``.
@@ -442,9 +442,9 @@ Some key points to highlight:
 
 
 .. _Join: https://pytorch.org/docs/master/distributed.algorithms.join.html
-.. _Getting Started with Distributed Data Parallel: https://pytorch.org/tutorials/intermediate/ddp_tutorial.html
-.. _Getting Started with Distributed Data Parallel - Basic Use Case: https://pytorch.org/tutorials/intermediate/ddp_tutorial.html#basic-use-case
-.. _Shard Optimizer States with ZeroRedundancyOptimizer: https://pytorch.org/tutorials/recipes/zero_redundancy_optimizer.html
+.. _Getting Started with Distributed Data Parallel: https://tutorials.pytorch.kr/intermediate/ddp_tutorial.html
+.. _Getting Started with Distributed Data Parallel - Basic Use Case: https://tutorials.pytorch.kr/intermediate/ddp_tutorial.html#basic-use-case
+.. _Shard Optimizer States with ZeroRedundancyOptimizer: https://tutorials.pytorch.kr/recipes/zero_redundancy_optimizer.html
 .. _DistributedDataParallel: https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html
 .. _join(): https://pytorch.org/docs/stable/_modules/torch/nn/parallel/distributed.html#DistributedDataParallel.join
 .. _ZeroRedundancyOptimizer: https://pytorch.org/docs/stable/distributed.optim.html
diff --git a/advanced_source/neural_style_tutorial.py b/advanced_source/neural_style_tutorial.py
index 1403cebd9..8a24e8943 100644
--- a/advanced_source/neural_style_tutorial.py
+++ b/advanced_source/neural_style_tutorial.py
@@ -1,6 +1,6 @@
 """
-PyTorch를 이용하여 뉴럴 변환(Neural Transfer)
-=============================
+PyTorch를 이용한 뉴럴 변환(Neural Transfer)
+==========================================================
 
 
 **Author**: `Alexis Jacq <https://alexis-jacq.github.io>`_
@@ -140,7 +140,7 @@ def imshow(tensor, title=None):
 # 손실 함수
 # --------------
 # 콘텐츠 손실(Content Loss)
-# ~~~~~~~~~~~~
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
 # Content 손실은 각 계층에 대한 Content 거리의 가중치 버전을 나타냅니다.
 # 이 함수는 입력 :math:`X` 를 처리하는 레이어 :math:`L` 의 특징 맵 :math:`F_{XL}` 을 가져와서
@@ -179,7 +179,7 @@ def forward(self, input):
 
 ######################################################################
 # 스타일 손실(Style Loss)
-# ~~~~~~~~~~
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
 # Style 손실 모듈은 Content 손실 모듈과 유사하게 구현됩니다.
 # 네트워크에서 해당 계층의 Style 손실을 계산하는 역할을 합니다.
@@ -256,7 +256,7 @@ def forward(self, input):
 cnn_normalization_mean = torch.tensor([0.485, 0.456, 0.406]).to(device)
 cnn_normalization_std = torch.tensor([0.229, 0.224, 0.225]).to(device)
 
-# 입력 이미지를 정규화하는 모듈을 생성하여 쉽게 nn.Sequential에 넣을 수 있습니다.
+# 입력 이미지를 정규화하는 모듈을 생성하여 쉽게 ``nn.Sequential`` 에 넣을 수 있습니다.
 class Normalization(nn.Module):
     def __init__(self, mean, std):
         super(Normalization, self).__init__()
@@ -267,14 +267,14 @@ def __init__(self, mean, std):
         self.std = torch.tensor(std).view(-1, 1, 1)
 
     def forward(self, img):
-        # img 정규화
+        # ``img`` 정규화
         return (img - self.mean) / self.std
 
 
 ######################################################################
 # ``Sequential`` 모듈은 순서가 있는 하위 모듈의 리스트가 포함됩니다.
 # 예를 들어, ``vgg19.features`` 은 올바른 순서로 정렬 된
-# 시퀀스(Conv2d, ReLU, MaxPool2d, Conv2d, ReLU…)가 포함되어 있습니다.
+# 시퀀스( ``Conv2d``, ``ReLU``, ``MaxPool2d``, ``Conv2d``, ``ReLU``…)가 포함되어 있습니다.
 # Content 손실과 Style 손실 계층을 감지하는 합성곱 계층 바로 뒤에 추가해야합니다.
 # 이렇게 하기 위해서는 Content 손실과 Style 손실 모듈이
 # 올바르게 삽입된 새로운 ``Sequential`` 모듈을 만들어야 합니다.
@@ -295,8 +295,8 @@ def get_style_model_and_losses(cnn, normalization_mean, normalization_std,
     content_losses = []
     style_losses = []
 
-    # cnn이 nn.Sequential이라고 가정하고,
-    # 순차적으로 활성화되어야 하는 모듈에 새로운 nn.Sequential을 만듭니다.
+    # ``cnn`` 이 ``nn.Sequential`` 이라고 가정하고,
+    # 순차적으로 활성화되어야 하는 모듈에 새로운 ``nn.Sequential`` 을 만듭니다.
     model = nn.Sequential(normalization)
 
     i = 0  # conv를 볼 때마다 증가
@@ -306,7 +306,7 @@ def get_style_model_and_losses(cnn, normalization_mean, normalization_std,
             name = 'conv_{}'.format(i)
         elif isinstance(layer, nn.ReLU):
             name = 'relu_{}'.format(i)
-            # in-place 버전은 아래에 삽입한 Content 손실과 Style 손실와 잘 어울리지 않습니다.
+            # 아래에 추가한 ``ContentLoss`` 와 ``StyleLoss`` 는 in-place 버전에서는 잘 동작하지 않습니다.
             # 그래서 여기서는 out-of-place로 대체합니다.
             layer = nn.ReLU(inplace=False)
         elif isinstance(layer, nn.MaxPool2d):
@@ -347,8 +347,11 @@ def get_style_model_and_losses(cnn, normalization_mean, normalization_std,
 #
 
 input_img = content_img.clone()
-# 만약 화이트 노이즈(white noise)을 사용하려면 아래 주석을 제거하세요
-# input_img = torch.randn(content_img.data.size(), device=device)
+# 만약 화이트 노이즈(white noise)을 사용하려면 아래 주석을 제거하세요:
+#
+# ::
+#
+#    input_img = torch.randn(content_img.data.size(), device=device)
 
 # 그림에 원본 입력 이미지를 추가합니다.
 plt.figure()
diff --git a/advanced_source/numpy_extensions_tutorial.py b/advanced_source/numpy_extensions_tutorial.py
index 9475ff084..f08bf6a09 100644
--- a/advanced_source/numpy_extensions_tutorial.py
+++ b/advanced_source/numpy_extensions_tutorial.py
@@ -1,7 +1,8 @@
 # -*- coding: utf-8 -*-
 """
-numpy 와 scipy 를 이용한 확장(Extensions) 만들기
+NumPy와 SciPy를 사용한 확장(Extension) 만들기
 =====================================================
+
 **Author**: `Adam Paszke <https://github.com/apaszke>`_
 
 **Updated by**: `Adam Dziedzic <https://github.com/adam-dziedzic>`_
@@ -11,7 +12,7 @@
 이번 튜토리얼에서는 두 가지 작업을 수행할 것입니다:
 
 1. 매개 변수가 없는 신경망 계층(layer) 만들기
-    - 이는 구현의 일부로 **numpy** 를 호출합니다.
+    - 이는 구현의 일부로 **NumPy** 를 호출합니다.
 
 2. 학습 가능한 가중치가 있는 신경망 계층(layer) 만들기
     - 이는 구현의 일부로 **Scipy** 를 호출합니다.
@@ -26,7 +27,7 @@
 #
 # 이 계층(layer)은 특별히 유용하거나 수학적으로 올바른 작업을 수행하지 않습니다.
 #
-# 이름은 대충 BadFFTFunction으로 지었습니다.
+# 이름은 대충 ``BadFFTFunction`` 으로 지었습니다.
 #
 # **계층(layer) 구현**
 
@@ -46,7 +47,7 @@ def backward(ctx, grad_output):
         result = irfft2(numpy_go)
         return grad_output.new(result)
 
-# 이 계층에는 매개 변수가 없으므로 nn.Module 클래스가 아닌 함수로 간단히 선언할 수 있습니다.
+# 이 계층에는 매개 변수가 없으므로 ``nn.Module`` 클래스가 아닌 함수로 간단히 선언할 수 있습니다.
 
 
 def incorrect_fft(input):
diff --git a/advanced_source/static_quantization_tutorial.rst b/advanced_source/static_quantization_tutorial.rst
index 8bafc2999..fe24050e0 100644
--- a/advanced_source/static_quantization_tutorial.rst
+++ b/advanced_source/static_quantization_tutorial.rst
@@ -217,7 +217,7 @@
                             torch.ao.quantization.fuse_modules(m.conv, [str(idx), str(idx + 1)], inplace=True)
 
 2. 헬퍼(Helper) 함수
---------------------
+----------------------
 
 다음으로 모델 평가를 위한 헬퍼 함수들을 만듭니다. 코드 대부분은
 `여기 <https://github.com/pytorch/examples/blob/master/imagenet/main.py>`_ 에서 가져왔습니다.
@@ -302,7 +302,7 @@
 마지막 주요 설정 단계로서 학습과 테스트 데이터를 위한 DataLoader를 정의합니다.
 
 ImageNet 데이터
-^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^
 
 전체 ImageNet Dataset을 이용해서 이 튜토리얼의 코드를 실행시키기 위해, 첫번째로 `ImageNet Data <http://www.image-net.org/download>`_ 의 지시를 따라 ImageNet을 다운로드합니다. 다운로드한 파일의 압축을 'data_path'에 풉니다.
 
@@ -398,7 +398,7 @@ ImageNet 데이터
 이 값이 비교를 위한 기준이 될 것입니다. 다음으로 양자화된 모델을 봅시다.
 
 4. 학습 후 정적 양자화(post-training static quantization)
---------------------------------------------------------
+-----------------------------------------------------------
 
 학습 후 정적 양자화는 동적 양자화처럼 가중치를 float에서 int로 변환하는 것뿐만 아니라
 추가적인 단계도 수행합니다. 네트워크에 데이터 배치의 첫 번째 공급과 다른 활성값들의
@@ -459,7 +459,8 @@ x86 아키텍처에서 양자화를 위한 권장 설정을 그대로 쓰기만
     per_channel_quantized_model = load_model(saved_model_dir + float_model_file)
     per_channel_quantized_model.eval()
     per_channel_quantized_model.fuse_model()
-    per_channel_quantized_model.qconfig = torch.ao.quantization.get_default_qconfig('fbgemm')
+    # 이전의 'fbgemm' 또한 여전히 사용 가능하지만, 'x86'을 기본으로 사용하는 것을 권장합니다.
+    per_channel_quantized_model.qconfig = torch.ao.quantization.get_default_qconfig('x86')
     print(per_channel_quantized_model.qconfig)
 
     torch.ao.quantization.prepare(per_channel_quantized_model, inplace=True)
@@ -475,7 +476,7 @@ x86 아키텍처에서 양자화를 위한 권장 설정을 그대로 쓰기만
 이제 양자화 자각 학습을 시도해 봅시다.
 
 5. 양자화 자각 학습(Quantization-aware training)
--------------------------------------------------
+---------------------------------------------------
 
 양자화 자각 학습(QAT)은 일반적으로 가장 높은 정확도를 제공하는 양자화 방법입니다.
 모든 가중치화 활성값은 QAT로 인해 학습 도중에 순전파와 역전파를 도중 "가짜 양자화"됩니다.
@@ -535,7 +536,8 @@ x86 아키텍처에서 양자화를 위한 권장 설정을 그대로 쓰기만
     qat_model.fuse_model()
 
     optimizer = torch.optim.SGD(qat_model.parameters(), lr = 0.0001)
-    qat_model.qconfig = torch.ao.quantization.get_default_qat_qconfig('fbgemm')
+    # 이전의 'fbgemm' 또한 여전히 사용 가능하지만, 'x86'을 기본으로 사용하는 것을 권장합니다.
+    qat_model.qconfig = torch.ao.quantization.get_default_qat_qconfig('x86')
 
 마지막으로 모델이 양자화 자각 학습을 준비하기 위해 ``prepare_qat`` 로 "가짜 양자화"를 수행합니다.
 
diff --git a/advanced_source/super_resolution_with_onnxruntime.py b/advanced_source/super_resolution_with_onnxruntime.py
index 7c2886342..5fdf00cf4 100644
--- a/advanced_source/super_resolution_with_onnxruntime.py
+++ b/advanced_source/super_resolution_with_onnxruntime.py
@@ -110,7 +110,7 @@ def _initialize_weights(self):
 # 특정 차원을 동적 차원으로 지정하지 않는 이상, ONNX로 변환된 그래프의 경우 입력값의 크기는 모든 차원에 대해 고정됩니다.
 # 예시에서는 모델이 항상 배치 사이즈 1을 사용하도록 변환하였지만, ``torch.onnx.export()`` 의 ``dynamic_axes`` 인자의
 # 첫번째 차원은 동적 차원으로 지정합니다. 따라서 변환된 모델은 임의의 batch_size에 대해 [batch_size, 1, 224, 224] 사이즈
-# 입력값을 받을 수 있습니다. 
+# 입력값을 받을 수 있습니다.
 #
 # PyTorch의 변환 인터페이스에 대해 더 자세히 알고 싶다면
 # `torch.onnx 문서 <https://pytorch.org/docs/master/onnx.html>`__ 를 참고해주세요.
@@ -137,8 +137,8 @@ def _initialize_weights(self):
 #
 # ONNX 런타임에서의 모델 결과값을 확인하기 전에 먼저 ONNX API를 사용해 ONNX 모델을 확인해보도록 하겠습니다.
 # 먼저, ``onnx.load("super_resolution.onnx")`` 는 저장된 모델을 읽어온 후
-# 머신러닝 모델을 취합하여 저장하고 있는 상위 파일 컨테이너인 onnx.ModelProto를 리턴합니다.
-# onnx.ModelProto에 대해 더 자세한 것은 `onnx.proto 기술문서 <https://github.com/onnx/onnx/blob/master/onnx/onnx.proto>`__ 에서
+# 머신러닝 모델을 취합하여 저장하고 있는 상위 파일 컨테이너인 ``onnx.ModelProto`` 를 리턴합니다.
+# ``onnx.ModelProto`` 에 대해 더 자세한 것은 `onnx.proto 기술문서 <https://github.com/onnx/onnx/blob/master/onnx/onnx.proto>`__ 에서
 # 확인하실 수 있습니다.
 # ``onnx.checker.check_model(onnx_model)`` 는 모델의 구조를 확인하고
 # 모델이 유효한 스키마(valid schema)를 가지고 있는지를 체크합니다.
@@ -182,7 +182,7 @@ def to_numpy(tensor):
 
 
 ######################################################################
-# 이제 PyTorch와 ONNX 런타임에서 연산된 결과값이 서로 일치하는지 오차범위 (rtol=1e-03, atol=1e-05)
+# 이제 PyTorch와 ONNX 런타임에서 연산된 결과값이 서로 일치하는지 오차범위( ``rtol=1e-03`` 와 ``atol=1e-05``)
 # 이내에서 확인해야 합니다.
 # 만약 결과가 일치하지 않는다면 ONNX 변환기에 문제가 있는 것이니 저희에게 알려주시기 바랍니다.
 #
diff --git a/beginner_source/Intro_to_TorchScript_tutorial.py b/beginner_source/Intro_to_TorchScript_tutorial.py
index 47da92ee6..832536a74 100644
--- a/beginner_source/Intro_to_TorchScript_tutorial.py
+++ b/beginner_source/Intro_to_TorchScript_tutorial.py
@@ -7,7 +7,7 @@
 **번역**: `강준혁 <https://github.com/k1101jh>`_
 
 이 튜토리얼은 C++와 같은 고성능 환경에서 실행될 수 있는
-PyTorch 모델(``nn.Module`` 의 하위클래스)의 중간 표현인
+PyTorch 모델( ``nn.Module`` 의 하위클래스)의 중간 표현인
 TorchScript에 대한 소개입니다.
 
 이 튜토리얼에서는 다음을 다룰 것입니다:
@@ -144,7 +144,7 @@ def forward(self, x, h):
 
 
 ######################################################################
-# MyCell 클래스를 다시 정의했지만, 여기선 ``MyDecisionGate`` 를 정의했습니다.
+# ``MyCell`` 클래스를 다시 정의했지만, 여기선 ``MyDecisionGate`` 를 정의했습니다.
 # 이 모듈은 **제어 흐름** 을 활용합니다. 제어 흐름은 루프와 ``if`` 명령문과
 # 같은 것으로 구성됩니다.
 #
@@ -271,7 +271,7 @@ def forward(self, x, h):
 ######################################################################
 # ``.code`` 출력을 보면, ``if-else`` 분기가 어디에도 없다는 것을 알 수 있습니다!
 # 어째서일까요? 추적은 코드를 실행하고 *발생하는* 작업을 기록하며 정확하게 수행하는
-# 스크립트 모듈(ScriptModule)을 구성하는 일을 수행합니다. 불행하게도, 제어 흐름과
+# 스크립트 모듈( ``ScriptModule`` )을 구성하는 일을 수행합니다. 불행하게도, 제어 흐름과
 # 같은 것들은 지워집니다.
 #
 # TorchScript에서 이 모듈을 어떻게 충실하게 나타낼 수 있을까요? Python 소스 코드를
diff --git a/beginner_source/audio_data_augmentation_tutorial.py b/beginner_source/audio_data_augmentation_tutorial.py
deleted file mode 100644
index 5ffe96106..000000000
--- a/beginner_source/audio_data_augmentation_tutorial.py
+++ /dev/null
@@ -1,434 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-오디오 데이터 증강
-=======================
-
-*역자*: Lee Jong Bub <https://github.com/bub3690>
-
-``torchaudio`` 는 오디오 데이터를 증강시키는 다양한 방법들을 제공합니다.
-
-이 튜토리얼에서는 효과, 필터,
-공간 임펄스 응답(RIR, Room Impulse Response)과 코덱을 적용하는 방법을 살펴보겠습니다.
-
-하단부에서는, 깨끗한 음성으로 부터 휴대폰 너머의 잡음이 낀 음성을 합성하겠습니다.
-"""
-
-import torch
-import torchaudio
-import torchaudio.functional as F
-
-print(torch.__version__)
-print(torchaudio.__version__)
-
-######################################################################
-# 준비
-# -----------
-#
-# 먼저, 모듈을 불러오고 튜토리얼에 사용할 오디오 자료들을 다운로드합니다.
-#
-
-import math
-
-from IPython.display import Audio
-import matplotlib.pyplot as plt
-
-from torchaudio.utils import download_asset
-
-SAMPLE_WAV = download_asset("tutorial-assets/steam-train-whistle-daniel_simon.wav")
-SAMPLE_RIR = download_asset("tutorial-assets/Lab41-SRI-VOiCES-rm1-impulse-mc01-stu-clo-8000hz.wav")
-SAMPLE_SPEECH = download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042-8000hz.wav")
-SAMPLE_NOISE = download_asset("tutorial-assets/Lab41-SRI-VOiCES-rm1-babb-mc01-stu-clo-8000hz.wav")
-
-
-######################################################################
-# 효과와 필터링 적용하기
-# ------------------------------
-#
-# :py:func:`torchaudio.sox_effects` 는 ``sox`` 와 유사한 필터들을
-# 텐서 객체들과 파일 객체 오디오 소스들에 직접 적용 해줍니다.
-#
-# 이를 위해 두가지 함수가 사용됩니다:
-#
-# -  :py:func:`torchaudio.sox_effects.apply_effects_tensor` 는 텐서에
-#    효과를 적용합니다.
-# -  :py:func:`torchaudio.sox_effects.apply_effects_file` 는 다른 오디오 소스들에
-#    효과를 적용합니다.
-#
-# 두 함수들은 효과의 정의를  ``List[List[str]]`` 형태로 받아들입니다.
-# ``sox`` 와 작동하는 방법이 거의 유사합니다. 하지만, 한가지 유의점은
-# ``sox`` 는 자동으로 효과를 추가하지만, ``torchaudio`` 의 구현은 그렇지 않다는 점입니다.
-#
-# 사용 가능한 효과들의 목록을 알고싶다면, `the sox
-# documentation <http://sox.sourceforge.net/sox.html>`__ 을 참조해주세요.
-#
-# **Tip** 즉석으로 오디오 데이터 로드와 다시 샘플링 하고싶다면,
-# 효과 ``"rate"`` 와 함께 :py:func:`torchaudio.sox_effects.apply_effects_file` 을 사용하세요.
-#
-# **Note** :py:func:`torchaudio.sox_effects.apply_effects_file` 는 파일 형태의 객체 또는 주소 형태의 객체를 받습니다.
-# :py:func:`torchaudio.load` 와 유사하게, 오디오 포맷이
-# 파일 확장자나 헤더를 통해 추론될 수 없으면,
-# 전달인자 ``format`` 을 주어, 오디오 소스의 포맷을 구체화 해줄 수 있습니다.
-#
-# **Note** 이 과정은 미분 불가능합니다.
-#
-
-# 데이터를 불러옵니다.
-waveform1, sample_rate1 = torchaudio.load(SAMPLE_WAV)
-
-# 효과들을 정의합니다.
-effects = [
-    ["lowpass", "-1", "300"],  # 단극 저주파 통과 필터를 적용합니다.
-    ["speed", "0.8"],  # 속도를 감소시킵니다.
-    # 이 부분은 샘플 레이트만 변경하기에, 이후에
-    # 필수적으로 `rate` 효과를 기존 샘플 레이트로 주어야합니다.
-    ["rate", f"{sample_rate1}"],
-    ["reverb", "-w"],  # 잔향은 약간의 극적인 느낌을 줍니다.
-]
-
-# 효과들을 적용합니다.
-waveform2, sample_rate2 = torchaudio.sox_effects.apply_effects_tensor(waveform1, sample_rate1, effects)
-
-print(waveform1.shape, sample_rate1)
-print(waveform2.shape, sample_rate2)
-
-######################################################################
-# 효과가 적용되면, 프레임의 수와 채널의 수는 기존에 적용된 것들과 달라짐에 주의하세요.
-# 이제 오디오를 들어봅시다.
-#
-
-def plot_waveform(waveform, sample_rate, title="Waveform", xlim=None):
-    waveform = waveform.numpy()
-
-    num_channels, num_frames = waveform.shape
-    time_axis = torch.arange(0, num_frames) / sample_rate
-
-    figure, axes = plt.subplots(num_channels, 1)
-    if num_channels == 1:
-        axes = [axes]
-    for c in range(num_channels):
-        axes[c].plot(time_axis, waveform[c], linewidth=1)
-        axes[c].grid(True)
-        if num_channels > 1:
-            axes[c].set_ylabel(f"Channel {c+1}")
-        if xlim:
-            axes[c].set_xlim(xlim)
-    figure.suptitle(title)
-    plt.show(block=False)
-
-######################################################################
-#
-
-def plot_specgram(waveform, sample_rate, title="Spectrogram", xlim=None):
-    waveform = waveform.numpy()
-
-    num_channels, _ = waveform.shape
-
-    figure, axes = plt.subplots(num_channels, 1)
-    if num_channels == 1:
-        axes = [axes]
-    for c in range(num_channels):
-        axes[c].specgram(waveform[c], Fs=sample_rate)
-        if num_channels > 1:
-            axes[c].set_ylabel(f"Channel {c+1}")
-        if xlim:
-            axes[c].set_xlim(xlim)
-    figure.suptitle(title)
-    plt.show(block=False)
-
-######################################################################
-# 기존:
-# ~~~~~~~~~
-#
-
-plot_waveform(waveform1, sample_rate1, title="Original", xlim=(-0.1, 3.2))
-plot_specgram(waveform1, sample_rate1, title="Original", xlim=(0, 3.04))
-Audio(waveform1, rate=sample_rate1)
-
-######################################################################
-# 효과 적용 후:
-# ~~~~~~~~~~~~~~~~
-#
-
-plot_waveform(waveform2, sample_rate2, title="Effects Applied", xlim=(-0.1, 3.2))
-plot_specgram(waveform2, sample_rate2, title="Effects Applied", xlim=(0, 3.04))
-Audio(waveform2, rate=sample_rate2)
-
-######################################################################
-# 좀 더 극적으로 들리지 않나요?
-#
-
-######################################################################
-# 방 잔향 모의 실험하기
-# -----------------------------
-#
-# `Convolution
-# reverb <https://en.wikipedia.org/wiki/Convolution_reverb>`__ 는
-# 깨끗한 오디오를 다른 환경에서 생성된 것처럼 만들어주는 기술입니다.
-#
-# 예를들어, 공간 임펄스 응답 (RIR)을 활용하여, 깨끗한 음성을
-# 마치 회의실에서 발음된 것처럼 만들 수 있습니다.
-#
-# 이 과정을 위해서, RIR 데이터가 필요합니다. 다음 데이터들은 VOiCES 데이터셋에서 왔습니다.
-# 하지만, 직접 녹음할 수도 있습니다. - 직접 마이크를 켜시고, 박수를 치세요!
-#
-
-rir_raw, sample_rate = torchaudio.load(SAMPLE_RIR)
-plot_waveform(rir_raw, sample_rate, title="Room Impulse Response (raw)")
-plot_specgram(rir_raw, sample_rate, title="Room Impulse Response (raw)")
-Audio(rir_raw, rate=sample_rate)
-
-######################################################################
-# 먼저, RIR을 깨끗하게 만들어줘야합니다. 주요한 임펄스를 추출하고,
-# 신호 전력을 정규화 합니다. 그리고 나서 시간축을 뒤집어 줍니다.
-#
-
-rir = rir_raw[:, int(sample_rate * 1.01) : int(sample_rate * 1.3)]
-rir = rir / torch.norm(rir, p=2)
-RIR = torch.flip(rir, [1])
-
-plot_waveform(rir, sample_rate, title="Room Impulse Response")
-
-######################################################################
-# 그 후, RIR 필터와 음성 신호를 합성곱 합니다.
-#
-
-speech, _ = torchaudio.load(SAMPLE_SPEECH)
-
-speech_ = torch.nn.functional.pad(speech, (RIR.shape[1] - 1, 0))
-augmented = torch.nn.functional.conv1d(speech_[None, ...], RIR[None, ...])[0]
-
-######################################################################
-# 기존:
-# ~~~~~~~~~
-#
-
-plot_waveform(speech, sample_rate, title="Original")
-plot_specgram(speech, sample_rate, title="Original")
-Audio(speech, rate=sample_rate)
-
-######################################################################
-# RIR 적용 후:
-# ~~~~~~~~~~~~
-#
-
-plot_waveform(augmented, sample_rate, title="RIR Applied")
-plot_specgram(augmented, sample_rate, title="RIR Applied")
-Audio(augmented, rate=sample_rate)
-
-
-######################################################################
-# 배경 소음 추가하기
-# -----------------------
-#
-# 오디오 데이터에 소음을 추가하기 위해서, 간단히 소음 텐서를 오디오 데이터 텐서에 더할 수 있습니다.
-# 소음의 정도를 조절하는 흔한 방법은 신호 대 잡음비 (SNR)를 바꾸는 것입니다.
-# [`wikipedia <https://ko.wikipedia.org/wiki/%EC%8B%A0%ED%98%B8_%EB%8C%80_%EC%9E%A1%EC%9D%8C%EB%B9%84>`__]
-#
-# $$ \\mathrm{SNR} = \\frac{P_{signal}}{P_{noise}} $$
-#
-# $$ \\mathrm{SNR_{dB}} = 10 \\log _{{10}} \\mathrm {SNR} $$
-#
-
-speech, _ = torchaudio.load(SAMPLE_SPEECH)
-noise, _ = torchaudio.load(SAMPLE_NOISE)
-noise = noise[:, : speech.shape[1]]
-
-speech_rms = speech.norm(p=2)
-noise_rms = noise.norm(p=2)
-
-snr_dbs = [20, 10, 3]
-noisy_speeches = []
-for snr_db in snr_dbs:
-    snr = 10 ** (snr_db / 20)
-    scale = snr * noise_rms / speech_rms
-    noisy_speeches.append((scale * speech + noise) / 2)
-
-######################################################################
-# 배경 잡음:
-# ~~~~~~~~~~~~~~~~~
-#
-
-plot_waveform(noise, sample_rate, title="Background noise")
-plot_specgram(noise, sample_rate, title="Background noise")
-Audio(noise, rate=sample_rate)
-
-######################################################################
-# SNR 20 dB:
-# ~~~~~~~~~~
-#
-
-snr_db, noisy_speech = snr_dbs[0], noisy_speeches[0]
-plot_waveform(noisy_speech, sample_rate, title=f"SNR: {snr_db} [dB]")
-plot_specgram(noisy_speech, sample_rate, title=f"SNR: {snr_db} [dB]")
-Audio(noisy_speech, rate=sample_rate)
-
-######################################################################
-# SNR 10 dB:
-# ~~~~~~~~~~
-#
-
-snr_db, noisy_speech = snr_dbs[1], noisy_speeches[1]
-plot_waveform(noisy_speech, sample_rate, title=f"SNR: {snr_db} [dB]")
-plot_specgram(noisy_speech, sample_rate, title=f"SNR: {snr_db} [dB]")
-Audio(noisy_speech, rate=sample_rate)
-
-######################################################################
-# SNR 3 dB:
-# ~~~~~~~~~
-#
-
-snr_db, noisy_speech = snr_dbs[2], noisy_speeches[2]
-plot_waveform(noisy_speech, sample_rate, title=f"SNR: {snr_db} [dB]")
-plot_specgram(noisy_speech, sample_rate, title=f"SNR: {snr_db} [dB]")
-Audio(noisy_speech, rate=sample_rate)
-
-
-######################################################################
-# 코덱을 텐서 객체에 적용하기
-# -------------------------------
-#
-# :py:func:`torchaudio.functional.apply_codec` 는 텐서 오브젝트에 코덱을 적용합니다.
-#
-# **Note** 이 과정은 미분 불가능합니다.
-#
-
-
-waveform, sample_rate = torchaudio.load(SAMPLE_SPEECH)
-
-configs = [
-    {"format": "wav", "encoding": "ULAW", "bits_per_sample": 8},
-    {"format": "gsm"},
-    {"format": "vorbis", "compression": -1},
-]
-waveforms = []
-for param in configs:
-    augmented = F.apply_codec(waveform, sample_rate, **param)
-    waveforms.append(augmented)
-
-######################################################################
-# Original:
-# ~~~~~~~~~
-#
-
-plot_waveform(waveform, sample_rate, title="Original")
-plot_specgram(waveform, sample_rate, title="Original")
-Audio(waveform, rate=sample_rate)
-
-######################################################################
-# 8 bit mu-law:
-# ~~~~~~~~~~~~~
-#
-
-plot_waveform(waveforms[0], sample_rate, title="8 bit mu-law")
-plot_specgram(waveforms[0], sample_rate, title="8 bit mu-law")
-Audio(waveforms[0], rate=sample_rate)
-
-######################################################################
-# GSM-FR:
-# ~~~~~~~
-#
-
-plot_waveform(waveforms[1], sample_rate, title="GSM-FR")
-plot_specgram(waveforms[1], sample_rate, title="GSM-FR")
-Audio(waveforms[1], rate=sample_rate)
-
-######################################################################
-# Vorbis:
-# ~~~~~~~
-#
-
-plot_waveform(waveforms[2], sample_rate, title="Vorbis")
-plot_specgram(waveforms[2], sample_rate, title="Vorbis")
-Audio(waveforms[2], rate=sample_rate)
-
-######################################################################
-# 전화 녹음 모의 실험하기
-# ---------------------------
-#
-# 이전 기술들을 혼합하여, 반향 있는 방의 사람들이 이야기하는 배경에서 전화 통화하는
-# 것처럼 들리는 오디오를 모의 실험할 수 있습니다.
-#
-
-sample_rate = 16000
-original_speech, sample_rate = torchaudio.load(SAMPLE_SPEECH)
-
-plot_specgram(original_speech, sample_rate, title="Original")
-
-# RIR 적용하기
-speech_ = torch.nn.functional.pad(original_speech, (RIR.shape[1] - 1, 0))
-rir_applied = torch.nn.functional.conv1d(speech_[None, ...], RIR[None, ...])[0]
-
-plot_specgram(rir_applied, sample_rate, title="RIR Applied")
-
-# 배경 잡음 추가하기
-# 잡음이 실제 환경에서 녹음되었기 때문에, 잡음이 환경의 음향 특징을 가지고 있다고 고려했습니다.
-# 따라서, RIR 적용 후에 잡음을 추가했습니다
-noise, _ = torchaudio.load(SAMPLE_NOISE)
-noise = noise[:, : rir_applied.shape[1]]
-
-snr_db = 8
-scale = (10 ** (snr_db / 20)) * noise.norm(p=2) / rir_applied.norm(p=2)
-bg_added = (scale * rir_applied + noise) / 2
-
-plot_specgram(bg_added, sample_rate, title="BG noise added")
-
-# 필터링을 적용하고 샘플 레이트 수정하기
-filtered, sample_rate2 = torchaudio.sox_effects.apply_effects_tensor(
-    bg_added,
-    sample_rate,
-    effects=[
-        ["lowpass", "4000"],
-        [
-            "compand",
-            "0.02,0.05",
-            "-60,-60,-30,-10,-20,-8,-5,-8,-2,-8",
-            "-8",
-            "-7",
-            "0.05",
-        ],
-        ["rate", "8000"],
-    ],
-)
-
-plot_specgram(filtered, sample_rate2, title="Filtered")
-
-# 전화 코덱 적용하기
-codec_applied = F.apply_codec(filtered, sample_rate2, format="gsm")
-
-plot_specgram(codec_applied, sample_rate2, title="GSM Codec Applied")
-
-
-######################################################################
-# 기존 음성:
-# ~~~~~~~~~~~~~~~~
-#
-
-Audio(original_speech, rate=sample_rate)
-
-######################################################################
-# RIR 적용 후:
-# ~~~~~~~~~~~~
-#
-
-Audio(rir_applied, rate=sample_rate)
-
-######################################################################
-# 배경 잡음 추가 후:
-# ~~~~~~~~~~~~~~~~~~~~~~~
-#
-
-Audio(bg_added, rate=sample_rate)
-
-######################################################################
-# 필터링 적용 후:
-# ~~~~~~~~~
-#
-
-Audio(filtered, rate=sample_rate2)
-
-######################################################################
-# 코덱 적용 후:
-# ~~~~~~~~~~~~~
-#
-
-Audio(codec_applied, rate=sample_rate2)
diff --git a/beginner_source/audio_data_augmentation_tutorial.rst b/beginner_source/audio_data_augmentation_tutorial.rst
new file mode 100644
index 000000000..55ba024a5
--- /dev/null
+++ b/beginner_source/audio_data_augmentation_tutorial.rst
@@ -0,0 +1,10 @@
+Audio Data Augmentation
+=======================
+
+This tutorial has been moved to https://pytorch.org/audio/stable/tutorials/audio_data_augmentation_tutorial.html
+
+It will redirect in 3 seconds.
+
+.. raw:: html
+
+   <meta http-equiv="Refresh" content="3; url='https://pytorch.org/audio/stable/tutorials/audio_data_augmentation_tutorial.html'" />
diff --git a/beginner_source/audio_datasets_tutorial.py b/beginner_source/audio_datasets_tutorial.py
deleted file mode 100644
index f08ed99e0..000000000
--- a/beginner_source/audio_datasets_tutorial.py
+++ /dev/null
@@ -1,87 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Audio Datasets
-==============
-
-``torchaudio`` provides easy access to common, publicly accessible
-datasets. Please refer to the official documentation for the list of
-available datasets.
-"""
-
-# When running this tutorial in Google Colab, install the required packages
-# with the following.
-# !pip install torchaudio
-
-import torch
-import torchaudio
-
-print(torch.__version__)
-print(torchaudio.__version__)
-
-######################################################################
-# Preparing data and utility functions (skip this section)
-# --------------------------------------------------------
-#
-
-# @title Prepare data and utility functions. {display-mode: "form"}
-# @markdown
-# @markdown You do not need to look into this cell.
-# @markdown Just execute once and you are good to go.
-
-# -------------------------------------------------------------------------------
-# Preparation of data and helper functions.
-# -------------------------------------------------------------------------------
-import multiprocessing
-import os
-
-import matplotlib.pyplot as plt
-from IPython.display import Audio, display
-
-
-_SAMPLE_DIR = "_assets"
-YESNO_DATASET_PATH = os.path.join(_SAMPLE_DIR, "yes_no")
-os.makedirs(YESNO_DATASET_PATH, exist_ok=True)
-
-
-def plot_specgram(waveform, sample_rate, title="Spectrogram", xlim=None):
-    waveform = waveform.numpy()
-
-    num_channels, num_frames = waveform.shape
-
-    figure, axes = plt.subplots(num_channels, 1)
-    if num_channels == 1:
-        axes = [axes]
-    for c in range(num_channels):
-        axes[c].specgram(waveform[c], Fs=sample_rate)
-        if num_channels > 1:
-            axes[c].set_ylabel(f"Channel {c+1}")
-        if xlim:
-            axes[c].set_xlim(xlim)
-    figure.suptitle(title)
-    plt.show(block=False)
-
-
-def play_audio(waveform, sample_rate):
-    waveform = waveform.numpy()
-
-    num_channels, num_frames = waveform.shape
-    if num_channels == 1:
-        display(Audio(waveform[0], rate=sample_rate))
-    elif num_channels == 2:
-        display(Audio((waveform[0], waveform[1]), rate=sample_rate))
-    else:
-        raise ValueError("Waveform with more than 2 channels are not supported.")
-
-
-######################################################################
-# Here, we show how to use the
-# :py:func:`torchaudio.datasets.YESNO` dataset.
-#
-
-
-dataset = torchaudio.datasets.YESNO(YESNO_DATASET_PATH, download=True)
-
-for i in [1, 3, 5]:
-    waveform, sample_rate, label = dataset[i]
-    plot_specgram(waveform, sample_rate, title=f"Sample {i}: {label}")
-    play_audio(waveform, sample_rate)
diff --git a/beginner_source/audio_datasets_tutorial.rst b/beginner_source/audio_datasets_tutorial.rst
new file mode 100644
index 000000000..6e9b4f4f4
--- /dev/null
+++ b/beginner_source/audio_datasets_tutorial.rst
@@ -0,0 +1,10 @@
+Audio Datasets
+==============
+
+This tutorial has been moved to https://pytorch.org/audio/stable/tutorials/audio_datasets_tutorial.html
+
+It will redirect in 3 seconds.
+
+.. raw:: html
+
+   <meta http-equiv="Refresh" content="3; url='https://pytorch.org/audio/stable/tutorials/audio_datasets_tutorial.html'" />
diff --git a/beginner_source/audio_feature_augmentation_tutorial.py b/beginner_source/audio_feature_augmentation_tutorial.py
deleted file mode 100644
index 3961dafbc..000000000
--- a/beginner_source/audio_feature_augmentation_tutorial.py
+++ /dev/null
@@ -1,168 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Audio Feature Augmentation
-==========================
-"""
-
-# When running this tutorial in Google Colab, install the required packages
-# with the following.
-# !pip install torchaudio librosa
-
-import torch
-import torchaudio
-import torchaudio.transforms as T
-
-print(torch.__version__)
-print(torchaudio.__version__)
-
-######################################################################
-# Preparing data and utility functions (skip this section)
-# --------------------------------------------------------
-#
-
-# @title Prepare data and utility functions. {display-mode: "form"}
-# @markdown
-# @markdown You do not need to look into this cell.
-# @markdown Just execute once and you are good to go.
-# @markdown
-# @markdown In this tutorial, we will use a speech data from [VOiCES dataset](https://iqtlabs.github.io/voices/),
-# @markdown which is licensed under Creative Commos BY 4.0.
-
-# -------------------------------------------------------------------------------
-# Preparation of data and helper functions.
-# -------------------------------------------------------------------------------
-
-import os
-
-import librosa
-import matplotlib.pyplot as plt
-import requests
-
-
-_SAMPLE_DIR = "_assets"
-
-SAMPLE_WAV_SPEECH_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/VOiCES_devkit/source-16k/train/sp0307/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav"  # noqa: E501
-SAMPLE_WAV_SPEECH_PATH = os.path.join(_SAMPLE_DIR, "speech.wav")
-
-os.makedirs(_SAMPLE_DIR, exist_ok=True)
-
-
-def _fetch_data():
-    uri = [
-        (SAMPLE_WAV_SPEECH_URL, SAMPLE_WAV_SPEECH_PATH),
-    ]
-    for url, path in uri:
-        with open(path, "wb") as file_:
-            file_.write(requests.get(url).content)
-
-
-_fetch_data()
-
-
-def _get_sample(path, resample=None):
-    effects = [["remix", "1"]]
-    if resample:
-        effects.extend(
-            [
-                ["lowpass", f"{resample // 2}"],
-                ["rate", f"{resample}"],
-            ]
-        )
-    return torchaudio.sox_effects.apply_effects_file(path, effects=effects)
-
-
-def get_speech_sample(*, resample=None):
-    return _get_sample(SAMPLE_WAV_SPEECH_PATH, resample=resample)
-
-
-def get_spectrogram(
-    n_fft=400,
-    win_len=None,
-    hop_len=None,
-    power=2.0,
-):
-    waveform, _ = get_speech_sample()
-    spectrogram = T.Spectrogram(
-        n_fft=n_fft,
-        win_length=win_len,
-        hop_length=hop_len,
-        center=True,
-        pad_mode="reflect",
-        power=power,
-    )
-    return spectrogram(waveform)
-
-
-def plot_spectrogram(spec, title=None, ylabel="freq_bin", aspect="auto", xmax=None):
-    fig, axs = plt.subplots(1, 1)
-    axs.set_title(title or "Spectrogram (db)")
-    axs.set_ylabel(ylabel)
-    axs.set_xlabel("frame")
-    im = axs.imshow(librosa.power_to_db(spec), origin="lower", aspect=aspect)
-    if xmax:
-        axs.set_xlim((0, xmax))
-    fig.colorbar(im, ax=axs)
-    plt.show(block=False)
-
-
-######################################################################
-# SpecAugment
-# -----------
-#
-# `SpecAugment <https://ai.googleblog.com/2019/04/specaugment-new-data-augmentation.html>`__
-# is a popular spectrogram augmentation technique.
-#
-# ``torchaudio`` implements :py:func:`torchaudio.transforms.TimeStretch`,
-# :py:func:`torchaudio.transforms.TimeMasking` and
-# :py:func:`torchaudio.transforms.FrequencyMasking`.
-#
-
-######################################################################
-# TimeStretch
-# -----------
-#
-
-
-spec = get_spectrogram(power=None)
-stretch = T.TimeStretch()
-
-rate = 1.2
-spec_ = stretch(spec, rate)
-plot_spectrogram(torch.abs(spec_[0]), title=f"Stretched x{rate}", aspect="equal", xmax=304)
-
-plot_spectrogram(torch.abs(spec[0]), title="Original", aspect="equal", xmax=304)
-
-rate = 0.9
-spec_ = stretch(spec, rate)
-plot_spectrogram(torch.abs(spec_[0]), title=f"Stretched x{rate}", aspect="equal", xmax=304)
-
-######################################################################
-# TimeMasking
-# -----------
-#
-
-torch.random.manual_seed(4)
-
-spec = get_spectrogram()
-plot_spectrogram(spec[0], title="Original")
-
-masking = T.TimeMasking(time_mask_param=80)
-spec = masking(spec)
-
-plot_spectrogram(spec[0], title="Masked along time axis")
-
-######################################################################
-# FrequencyMasking
-# ----------------
-#
-
-
-torch.random.manual_seed(4)
-
-spec = get_spectrogram()
-plot_spectrogram(spec[0], title="Original")
-
-masking = T.FrequencyMasking(freq_mask_param=80)
-spec = masking(spec)
-
-plot_spectrogram(spec[0], title="Masked along frequency axis")
diff --git a/beginner_source/audio_feature_augmentation_tutorial.rst b/beginner_source/audio_feature_augmentation_tutorial.rst
new file mode 100644
index 000000000..55d3811b3
--- /dev/null
+++ b/beginner_source/audio_feature_augmentation_tutorial.rst
@@ -0,0 +1,10 @@
+Audio Feature Augmentation
+==========================
+
+This tutorial has been moved to https://pytorch.org/audio/stable/tutorials/audio_data_augmentation_tutorial.html
+
+It will redirect in 3 seconds.
+
+.. raw:: html
+
+   <meta http-equiv="Refresh" content="3; url='https://pytorch.org/audio/stable/tutorials/audio_data_augmentation_tutorial.html'" />
diff --git a/beginner_source/audio_feature_extractions_tutorial.py b/beginner_source/audio_feature_extractions_tutorial.py
deleted file mode 100644
index 822c00d97..000000000
--- a/beginner_source/audio_feature_extractions_tutorial.py
+++ /dev/null
@@ -1,457 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Audio Feature Extractions
-=========================
-
-``torchaudio`` implements feature extractions commonly used in the audio
-domain. They are available in ``torchaudio.functional`` and
-``torchaudio.transforms``.
-
-``functional`` implements features as standalone functions.
-They are stateless.
-
-``transforms`` implements features as objects,
-using implementations from ``functional`` and ``torch.nn.Module``.
-They can be serialized using TorchScript.
-"""
-
-import torch
-import torchaudio
-import torchaudio.functional as F
-import torchaudio.transforms as T
-
-print(torch.__version__)
-print(torchaudio.__version__)
-
-######################################################################
-# Preparation
-# -----------
-#
-# .. note::
-#
-#    When running this tutorial in Google Colab, install the required packages
-#
-#    .. code::
-#
-#       !pip install librosa
-#
-from IPython.display import Audio
-import librosa
-import matplotlib.pyplot as plt
-from torchaudio.utils import download_asset
-
-torch.random.manual_seed(0)
-
-SAMPLE_SPEECH = download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
-
-
-def plot_waveform(waveform, sr, title="Waveform"):
-    waveform = waveform.numpy()
-
-    num_channels, num_frames = waveform.shape
-    time_axis = torch.arange(0, num_frames) / sr
-
-    figure, axes = plt.subplots(num_channels, 1)
-    axes.plot(time_axis, waveform[0], linewidth=1)
-    axes.grid(True)
-    figure.suptitle(title)
-    plt.show(block=False)
-
-
-def plot_spectrogram(specgram, title=None, ylabel="freq_bin"):
-    fig, axs = plt.subplots(1, 1)
-    axs.set_title(title or "Spectrogram (db)")
-    axs.set_ylabel(ylabel)
-    axs.set_xlabel("frame")
-    im = axs.imshow(librosa.power_to_db(specgram), origin="lower", aspect="auto")
-    fig.colorbar(im, ax=axs)
-    plt.show(block=False)
-
-
-def plot_fbank(fbank, title=None):
-    fig, axs = plt.subplots(1, 1)
-    axs.set_title(title or "Filter bank")
-    axs.imshow(fbank, aspect="auto")
-    axs.set_ylabel("frequency bin")
-    axs.set_xlabel("mel bin")
-    plt.show(block=False)
-
-
-######################################################################
-# Overview of audio features
-# --------------------------
-#
-# The following diagram shows the relationship between common audio features
-# and torchaudio APIs to generate them.
-#
-# .. image:: https://download.pytorch.org/torchaudio/tutorial-assets/torchaudio_feature_extractions.png
-#
-# For the complete list of available features, please refer to the
-# documentation.
-#
-
-
-######################################################################
-# Spectrogram
-# -----------
-#
-# To get the frequency make-up of an audio signal as it varies with time,
-# you can use :py:func:`torchaudio.transforms.Spectrogram`.
-#
-
-SPEECH_WAVEFORM, SAMPLE_RATE = torchaudio.load(SAMPLE_SPEECH)
-
-plot_waveform(SPEECH_WAVEFORM, SAMPLE_RATE, title="Original waveform")
-Audio(SPEECH_WAVEFORM.numpy(), rate=SAMPLE_RATE)
-
-
-######################################################################
-#
-
-n_fft = 1024
-win_length = None
-hop_length = 512
-
-# Define transform
-spectrogram = T.Spectrogram(
-    n_fft=n_fft,
-    win_length=win_length,
-    hop_length=hop_length,
-    center=True,
-    pad_mode="reflect",
-    power=2.0,
-)
-
-######################################################################
-#
-
-# Perform transform
-spec = spectrogram(SPEECH_WAVEFORM)
-
-######################################################################
-#
-
-plot_spectrogram(spec[0], title="torchaudio")
-
-######################################################################
-# GriffinLim
-# ----------
-#
-# To recover a waveform from a spectrogram, you can use ``GriffinLim``.
-#
-
-torch.random.manual_seed(0)
-
-n_fft = 1024
-win_length = None
-hop_length = 512
-
-spec = T.Spectrogram(
-    n_fft=n_fft,
-    win_length=win_length,
-    hop_length=hop_length,
-)(SPEECH_WAVEFORM)
-
-######################################################################
-#
-
-griffin_lim = T.GriffinLim(
-    n_fft=n_fft,
-    win_length=win_length,
-    hop_length=hop_length,
-)
-
-######################################################################
-#
-
-reconstructed_waveform = griffin_lim(spec)
-
-######################################################################
-#
-
-plot_waveform(reconstructed_waveform, SAMPLE_RATE, title="Reconstructed")
-Audio(reconstructed_waveform, rate=SAMPLE_RATE)
-
-######################################################################
-# Mel Filter Bank
-# ---------------
-#
-# :py:func:`torchaudio.functional.melscale_fbanks` generates the filter bank
-# for converting frequency bins to mel-scale bins.
-#
-# Since this function does not require input audio/features, there is no
-# equivalent transform in :py:func:`torchaudio.transforms`.
-#
-
-n_fft = 256
-n_mels = 64
-sample_rate = 6000
-
-mel_filters = F.melscale_fbanks(
-    int(n_fft // 2 + 1),
-    n_mels=n_mels,
-    f_min=0.0,
-    f_max=sample_rate / 2.0,
-    sample_rate=sample_rate,
-    norm="slaney",
-)
-
-######################################################################
-#
-
-plot_fbank(mel_filters, "Mel Filter Bank - torchaudio")
-
-######################################################################
-# Comparison against librosa
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~
-#
-# For reference, here is the equivalent way to get the mel filter bank
-# with ``librosa``.
-#
-
-mel_filters_librosa = librosa.filters.mel(
-    sr=sample_rate,
-    n_fft=n_fft,
-    n_mels=n_mels,
-    fmin=0.0,
-    fmax=sample_rate / 2.0,
-    norm="slaney",
-    htk=True,
-).T
-
-######################################################################
-#
-
-plot_fbank(mel_filters_librosa, "Mel Filter Bank - librosa")
-
-mse = torch.square(mel_filters - mel_filters_librosa).mean().item()
-print("Mean Square Difference: ", mse)
-
-######################################################################
-# MelSpectrogram
-# --------------
-#
-# Generating a mel-scale spectrogram involves generating a spectrogram
-# and performing mel-scale conversion. In ``torchaudio``,
-# :py:func:`torchaudio.transforms.MelSpectrogram` provides
-# this functionality.
-#
-
-n_fft = 1024
-win_length = None
-hop_length = 512
-n_mels = 128
-
-mel_spectrogram = T.MelSpectrogram(
-    sample_rate=sample_rate,
-    n_fft=n_fft,
-    win_length=win_length,
-    hop_length=hop_length,
-    center=True,
-    pad_mode="reflect",
-    power=2.0,
-    norm="slaney",
-    onesided=True,
-    n_mels=n_mels,
-    mel_scale="htk",
-)
-
-melspec = mel_spectrogram(SPEECH_WAVEFORM)
-
-######################################################################
-#
-
-plot_spectrogram(melspec[0], title="MelSpectrogram - torchaudio", ylabel="mel freq")
-
-######################################################################
-# Comparison against librosa
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~
-#
-# For reference, here is the equivalent means of generating mel-scale
-# spectrograms with ``librosa``.
-#
-
-melspec_librosa = librosa.feature.melspectrogram(
-    y=SPEECH_WAVEFORM.numpy()[0],
-    sr=sample_rate,
-    n_fft=n_fft,
-    hop_length=hop_length,
-    win_length=win_length,
-    center=True,
-    pad_mode="reflect",
-    power=2.0,
-    n_mels=n_mels,
-    norm="slaney",
-    htk=True,
-)
-
-######################################################################
-#
-
-plot_spectrogram(melspec_librosa, title="MelSpectrogram - librosa", ylabel="mel freq")
-
-mse = torch.square(melspec - melspec_librosa).mean().item()
-print("Mean Square Difference: ", mse)
-
-######################################################################
-# MFCC
-# ----
-#
-
-n_fft = 2048
-win_length = None
-hop_length = 512
-n_mels = 256
-n_mfcc = 256
-
-mfcc_transform = T.MFCC(
-    sample_rate=sample_rate,
-    n_mfcc=n_mfcc,
-    melkwargs={
-        "n_fft": n_fft,
-        "n_mels": n_mels,
-        "hop_length": hop_length,
-        "mel_scale": "htk",
-    },
-)
-
-mfcc = mfcc_transform(SPEECH_WAVEFORM)
-
-######################################################################
-#
-
-plot_spectrogram(mfcc[0])
-
-######################################################################
-# Comparison against librosa
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~
-#
-
-melspec = librosa.feature.melspectrogram(
-    y=SPEECH_WAVEFORM.numpy()[0],
-    sr=sample_rate,
-    n_fft=n_fft,
-    win_length=win_length,
-    hop_length=hop_length,
-    n_mels=n_mels,
-    htk=True,
-    norm=None,
-)
-
-mfcc_librosa = librosa.feature.mfcc(
-    S=librosa.core.spectrum.power_to_db(melspec),
-    n_mfcc=n_mfcc,
-    dct_type=2,
-    norm="ortho",
-)
-
-######################################################################
-#
-
-plot_spectrogram(mfcc_librosa)
-
-mse = torch.square(mfcc - mfcc_librosa).mean().item()
-print("Mean Square Difference: ", mse)
-
-######################################################################
-# LFCC
-# ----
-#
-
-n_fft = 2048
-win_length = None
-hop_length = 512
-n_lfcc = 256
-
-lfcc_transform = T.LFCC(
-    sample_rate=sample_rate,
-    n_lfcc=n_lfcc,
-    speckwargs={
-        "n_fft": n_fft,
-        "win_length": win_length,
-        "hop_length": hop_length,
-    },
-)
-
-lfcc = lfcc_transform(SPEECH_WAVEFORM)
-plot_spectrogram(lfcc[0])
-
-######################################################################
-# Pitch
-# -----
-#
-
-pitch = F.detect_pitch_frequency(SPEECH_WAVEFORM, SAMPLE_RATE)
-
-######################################################################
-#
-
-def plot_pitch(waveform, sr, pitch):
-    figure, axis = plt.subplots(1, 1)
-    axis.set_title("Pitch Feature")
-    axis.grid(True)
-
-    end_time = waveform.shape[1] / sr
-    time_axis = torch.linspace(0, end_time, waveform.shape[1])
-    axis.plot(time_axis, waveform[0], linewidth=1, color="gray", alpha=0.3)
-
-    axis2 = axis.twinx()
-    time_axis = torch.linspace(0, end_time, pitch.shape[1])
-    axis2.plot(time_axis, pitch[0], linewidth=2, label="Pitch", color="green")
-
-    axis2.legend(loc=0)
-    plt.show(block=False)
-
-
-plot_pitch(SPEECH_WAVEFORM, SAMPLE_RATE, pitch)
-
-######################################################################
-# Kaldi Pitch (beta)
-# ------------------
-#
-# Kaldi Pitch feature [1] is a pitch detection mechanism tuned for automatic
-# speech recognition (ASR) applications. This is a beta feature in ``torchaudio``,
-# and it is available as :py:func:`torchaudio.functional.compute_kaldi_pitch`.
-#
-# 1. A pitch extraction algorithm tuned for automatic speech recognition
-#
-#    Ghahremani, B. BabaAli, D. Povey, K. Riedhammer, J. Trmal and S.
-#    Khudanpur
-#
-#    2014 IEEE International Conference on Acoustics, Speech and Signal
-#    Processing (ICASSP), Florence, 2014, pp. 2494-2498, doi:
-#    10.1109/ICASSP.2014.6854049.
-#    [`abstract <https://ieeexplore.ieee.org/document/6854049>`__],
-#    [`paper <https://danielpovey.com/files/2014_icassp_pitch.pdf>`__]
-#
-
-pitch_feature = F.compute_kaldi_pitch(SPEECH_WAVEFORM, SAMPLE_RATE)
-pitch, nfcc = pitch_feature[..., 0], pitch_feature[..., 1]
-
-######################################################################
-#
-
-def plot_kaldi_pitch(waveform, sr, pitch, nfcc):
-    _, axis = plt.subplots(1, 1)
-    axis.set_title("Kaldi Pitch Feature")
-    axis.grid(True)
-
-    end_time = waveform.shape[1] / sr
-    time_axis = torch.linspace(0, end_time, waveform.shape[1])
-    axis.plot(time_axis, waveform[0], linewidth=1, color="gray", alpha=0.3)
-
-    time_axis = torch.linspace(0, end_time, pitch.shape[1])
-    ln1 = axis.plot(time_axis, pitch[0], linewidth=2, label="Pitch", color="green")
-    axis.set_ylim((-1.3, 1.3))
-
-    axis2 = axis.twinx()
-    time_axis = torch.linspace(0, end_time, nfcc.shape[1])
-    ln2 = axis2.plot(time_axis, nfcc[0], linewidth=2, label="NFCC", color="blue", linestyle="--")
-
-    lns = ln1 + ln2
-    labels = [l.get_label() for l in lns]
-    axis.legend(lns, labels, loc=0)
-    plt.show(block=False)
-
-
-plot_kaldi_pitch(SPEECH_WAVEFORM, SAMPLE_RATE, pitch, nfcc)
diff --git a/beginner_source/audio_feature_extractions_tutorial.rst b/beginner_source/audio_feature_extractions_tutorial.rst
new file mode 100644
index 000000000..a2a8da4ab
--- /dev/null
+++ b/beginner_source/audio_feature_extractions_tutorial.rst
@@ -0,0 +1,10 @@
+Audio Feature Extractions
+=========================
+
+This tutorial has been moved to https://pytorch.org/audio/stable/tutorials/audio_feature_extractions_tutorial.html
+
+It will redirect in 3 seconds.
+
+.. raw:: html
+
+   <meta http-equiv="Refresh" content="3; url='https://pytorch.org/audio/stable/tutorials/audio_feature_extractions_tutorial.html'" />
diff --git a/beginner_source/audio_io_tutorial.py b/beginner_source/audio_io_tutorial.py
deleted file mode 100644
index 4917f1b10..000000000
--- a/beginner_source/audio_io_tutorial.py
+++ /dev/null
@@ -1,385 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Audio I/O
-=========
-
-This tutorial shows how to use TorchAudio's basic I/O API to load audio files
-into PyTorch's Tensor object, and save Tensor objects to audio files.
-"""
-
-import torch
-import torchaudio
-
-print(torch.__version__)
-print(torchaudio.__version__)
-
-######################################################################
-# Preparation
-# -----------
-#
-# First, we import the modules and download the audio assets we use in this tutorial.
-#
-# .. note::
-#    When running this tutorial in Google Colab, install the required packages
-#    with the following:
-#
-#    .. code::
-#
-#       !pip install boto3
-
-import io
-import os
-import tarfile
-import tempfile
-
-import boto3
-import matplotlib.pyplot as plt
-import requests
-from botocore import UNSIGNED
-from botocore.config import Config
-from IPython.display import Audio
-from torchaudio.utils import download_asset
-
-SAMPLE_GSM = download_asset("tutorial-assets/steam-train-whistle-daniel_simon.gsm")
-SAMPLE_WAV = download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
-SAMPLE_WAV_8000 = download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042-8000hz.wav")
-
-
-
-######################################################################
-# Querying audio metadata
-# -----------------------
-#
-# Function :py:func:`torchaudio.info` fetches audio metadata.
-# You can provide a path-like object or file-like object.
-#
-
-metadata = torchaudio.info(SAMPLE_WAV)
-print(metadata)
-
-######################################################################
-# Where
-#
-# -  ``sample_rate`` is the sampling rate of the audio
-# -  ``num_channels`` is the number of channels
-# -  ``num_frames`` is the number of frames per channel
-# -  ``bits_per_sample`` is bit depth
-# -  ``encoding`` is the sample coding format
-#
-# ``encoding`` can take on one of the following values:
-#
-# -  ``"PCM_S"``: Signed integer linear PCM
-# -  ``"PCM_U"``: Unsigned integer linear PCM
-# -  ``"PCM_F"``: Floating point linear PCM
-# -  ``"FLAC"``: Flac, `Free Lossless Audio
-#    Codec <https://xiph.org/flac/>`__
-# -  ``"ULAW"``: Mu-law,
-#    [`wikipedia <https://en.wikipedia.org/wiki/%CE%9C-law_algorithm>`__]
-# -  ``"ALAW"``: A-law
-#    [`wikipedia <https://en.wikipedia.org/wiki/A-law_algorithm>`__]
-# -  ``"MP3"`` : MP3, MPEG-1 Audio Layer III
-# -  ``"VORBIS"``: OGG Vorbis [`xiph.org <https://xiph.org/vorbis/>`__]
-# -  ``"AMR_NB"``: Adaptive Multi-Rate
-#    [`wikipedia <https://en.wikipedia.org/wiki/Adaptive_Multi-Rate_audio_codec>`__]
-# -  ``"AMR_WB"``: Adaptive Multi-Rate Wideband
-#    [`wikipedia <https://en.wikipedia.org/wiki/Adaptive_Multi-Rate_Wideband>`__]
-# -  ``"OPUS"``: Opus [`opus-codec.org <https://opus-codec.org/>`__]
-# -  ``"GSM"``: GSM-FR
-#    [`wikipedia <https://en.wikipedia.org/wiki/Full_Rate>`__]
-# -  ``"HTK"``: Single channel 16-bit PCM
-# -  ``"UNKNOWN"`` None of above
-#
-
-######################################################################
-# **Note**
-#
-# -  ``bits_per_sample`` can be ``0`` for formats with compression and/or
-#    variable bit rate (such as MP3).
-# -  ``num_frames`` can be ``0`` for GSM-FR format.
-#
-
-metadata = torchaudio.info(SAMPLE_GSM)
-print(metadata)
-
-
-######################################################################
-# Querying file-like object
-# -------------------------
-#
-# :py:func:`torchaudio.info` works on file-like objects.
-#
-
-url = "https://download.pytorch.org/torchaudio/tutorial-assets/steam-train-whistle-daniel_simon.wav"
-with requests.get(url, stream=True) as response:
-    metadata = torchaudio.info(response.raw)
-print(metadata)
-
-######################################################################
-# .. note::
-#
-#    When passing a file-like object, ``info`` does not read
-#    all of the underlying data; rather, it reads only a portion
-#    of the data from the beginning.
-#    Therefore, for a given audio format, it may not be able to retrieve the
-#    correct metadata, including the format itself. In such case, you
-#    can pass ``format`` argument to specify the format of the audio.
-
-######################################################################
-# Loading audio data
-# ------------------
-#
-# To load audio data, you can use :py:func:`torchaudio.load`.
-#
-# This function accepts a path-like object or file-like object as input.
-#
-# The returned value is a tuple of waveform (``Tensor``) and sample rate
-# (``int``).
-#
-# By default, the resulting tensor object has ``dtype=torch.float32`` and
-# its value range is ``[-1.0, 1.0]``.
-#
-# For the list of supported format, please refer to `the torchaudio
-# documentation <https://pytorch.org/audio>`__.
-#
-
-waveform, sample_rate = torchaudio.load(SAMPLE_WAV)
-
-
-######################################################################
-#
-def plot_waveform(waveform, sample_rate):
-    waveform = waveform.numpy()
-
-    num_channels, num_frames = waveform.shape
-    time_axis = torch.arange(0, num_frames) / sample_rate
-
-    figure, axes = plt.subplots(num_channels, 1)
-    if num_channels == 1:
-        axes = [axes]
-    for c in range(num_channels):
-        axes[c].plot(time_axis, waveform[c], linewidth=1)
-        axes[c].grid(True)
-        if num_channels > 1:
-            axes[c].set_ylabel(f"Channel {c+1}")
-    figure.suptitle("waveform")
-    plt.show(block=False)
-
-
-######################################################################
-#
-plot_waveform(waveform, sample_rate)
-
-
-######################################################################
-#
-def plot_specgram(waveform, sample_rate, title="Spectrogram"):
-    waveform = waveform.numpy()
-
-    num_channels, num_frames = waveform.shape
-
-    figure, axes = plt.subplots(num_channels, 1)
-    if num_channels == 1:
-        axes = [axes]
-    for c in range(num_channels):
-        axes[c].specgram(waveform[c], Fs=sample_rate)
-        if num_channels > 1:
-            axes[c].set_ylabel(f"Channel {c+1}")
-    figure.suptitle(title)
-    plt.show(block=False)
-
-
-######################################################################
-#
-plot_specgram(waveform, sample_rate)
-
-
-######################################################################
-#
-Audio(waveform.numpy()[0], rate=sample_rate)
-
-######################################################################
-# Loading from file-like object
-# -----------------------------
-#
-# The I/O functions support file-like objects.
-# This allows for fetching and decoding audio data from locations
-# within and beyond the local file system.
-# The following examples illustrate this.
-#
-
-######################################################################
-#
-
-# Load audio data as HTTP request
-url = "https://download.pytorch.org/torchaudio/tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav"
-with requests.get(url, stream=True) as response:
-    waveform, sample_rate = torchaudio.load(response.raw)
-plot_specgram(waveform, sample_rate, title="HTTP datasource")
-
-######################################################################
-#
-
-# Load audio from tar file
-tar_path = download_asset("tutorial-assets/VOiCES_devkit.tar.gz")
-tar_item = "VOiCES_devkit/source-16k/train/sp0307/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav"
-with tarfile.open(tar_path, mode="r") as tarfile_:
-    fileobj = tarfile_.extractfile(tar_item)
-    waveform, sample_rate = torchaudio.load(fileobj)
-plot_specgram(waveform, sample_rate, title="TAR file")
-
-######################################################################
-#
-
-# Load audio from S3
-bucket = "pytorch-tutorial-assets"
-key = "VOiCES_devkit/source-16k/train/sp0307/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav"
-client = boto3.client("s3", config=Config(signature_version=UNSIGNED))
-response = client.get_object(Bucket=bucket, Key=key)
-waveform, sample_rate = torchaudio.load(response["Body"])
-plot_specgram(waveform, sample_rate, title="From S3")
-
-
-######################################################################
-# Tips on slicing
-# ---------------
-#
-# Providing ``num_frames`` and ``frame_offset`` arguments restricts
-# decoding to the corresponding segment of the input.
-#
-# The same result can be achieved using vanilla Tensor slicing,
-# (i.e. ``waveform[:, frame_offset:frame_offset+num_frames]``). However,
-# providing ``num_frames`` and ``frame_offset`` arguments is more
-# efficient.
-#
-# This is because the function will end data acquisition and decoding
-# once it finishes decoding the requested frames. This is advantageous
-# when the audio data are transferred via network as the data transfer will
-# stop as soon as the necessary amount of data is fetched.
-#
-# The following example illustrates this.
-#
-
-# Illustration of two different decoding methods.
-# The first one will fetch all the data and decode them, while
-# the second one will stop fetching data once it completes decoding.
-# The resulting waveforms are identical.
-
-frame_offset, num_frames = 16000, 16000  # Fetch and decode the 1 - 2 seconds
-
-url = "https://download.pytorch.org/torchaudio/tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav"
-print("Fetching all the data...")
-with requests.get(url, stream=True) as response:
-    waveform1, sample_rate1 = torchaudio.load(response.raw)
-    waveform1 = waveform1[:, frame_offset : frame_offset + num_frames]
-    print(f" - Fetched {response.raw.tell()} bytes")
-
-print("Fetching until the requested frames are available...")
-with requests.get(url, stream=True) as response:
-    waveform2, sample_rate2 = torchaudio.load(response.raw, frame_offset=frame_offset, num_frames=num_frames)
-    print(f" - Fetched {response.raw.tell()} bytes")
-
-print("Checking the resulting waveform ... ", end="")
-assert (waveform1 == waveform2).all()
-print("matched!")
-
-######################################################################
-# Saving audio to file
-# --------------------
-#
-# To save audio data in formats interpretable by common applications,
-# you can use :py:func:`torchaudio.save`.
-#
-# This function accepts a path-like object or file-like object.
-#
-# When passing a file-like object, you also need to provide argument ``format``
-# so that the function knows which format it should use. In the
-# case of a path-like object, the function will infer the format from
-# the extension. If you are saving to a file without an extension, you need
-# to provide argument ``format``.
-#
-# When saving WAV-formatted data, the default encoding for ``float32`` Tensor
-# is 32-bit floating-point PCM. You can provide arguments ``encoding`` and
-# ``bits_per_sample`` to change this behavior. For example, to save data
-# in 16-bit signed integer PCM, you can do the following.
-#
-# .. note::
-#
-# Saving data in encodings with a lower bit depth reduces the
-# resulting file size but also precision.
-#
-
-waveform, sample_rate = torchaudio.load(SAMPLE_WAV)
-
-
-######################################################################
-#
-
-def inspect_file(path):
-    print("-" * 10)
-    print("Source:", path)
-    print("-" * 10)
-    print(f" - File size: {os.path.getsize(path)} bytes")
-    print(f" - {torchaudio.info(path)}")
-    print()
-
-######################################################################
-#
-# Save without any encoding option.
-# The function will pick up the encoding which
-# the provided data fit
-with tempfile.TemporaryDirectory() as tempdir:
-    path = f"{tempdir}/save_example_default.wav"
-    torchaudio.save(path, waveform, sample_rate)
-    inspect_file(path)
-
-######################################################################
-#
-# Save as 16-bit signed integer Linear PCM
-# The resulting file occupies half the storage but loses precision
-with tempfile.TemporaryDirectory() as tempdir:
-    path = f"{tempdir}/save_example_PCM_S16.wav"
-    torchaudio.save(path, waveform, sample_rate, encoding="PCM_S", bits_per_sample=16)
-    inspect_file(path)
-
-
-######################################################################
-# :py:func:`torchaudio.save` can also handle other formats.
-# To name a few:
-#
-
-formats = [
-    "flac",
-    "vorbis",
-    "sph",
-    "amb",
-    "amr-nb",
-    "gsm",
-]
-
-######################################################################
-#
-waveform, sample_rate = torchaudio.load(SAMPLE_WAV_8000)
-with tempfile.TemporaryDirectory() as tempdir:
-    for format in formats:
-        path = f"{tempdir}/save_example.{format}"
-        torchaudio.save(path, waveform, sample_rate, format=format)
-        inspect_file(path)
-
-######################################################################
-# Saving to file-like object
-# --------------------------
-#
-# Similar to the other I/O functions, you can save audio to file-like
-# objects. When saving to a file-like object, argument ``format`` is
-# required.
-#
-
-
-waveform, sample_rate = torchaudio.load(SAMPLE_WAV)
-
-# Saving to bytes buffer
-buffer_ = io.BytesIO()
-torchaudio.save(buffer_, waveform, sample_rate, format="wav")
-
-buffer_.seek(0)
-print(buffer_.read(16))
diff --git a/beginner_source/audio_io_tutorial.rst b/beginner_source/audio_io_tutorial.rst
new file mode 100644
index 000000000..3263ad93a
--- /dev/null
+++ b/beginner_source/audio_io_tutorial.rst
@@ -0,0 +1,10 @@
+Audio I/O
+=========
+
+This tutorial has been moved to https://pytorch.org/audio/stable/tutorials/audio_io_tutorial.html
+
+It will redirect in 3 seconds.
+
+.. raw:: html
+
+   <meta http-equiv="Refresh" content="3; url='https://pytorch.org/audio/stable/tutorials/audio_io_tutorial.html'" />
diff --git a/beginner_source/audio_resampling_tutorial.py b/beginner_source/audio_resampling_tutorial.py
deleted file mode 100644
index 3ffd73998..000000000
--- a/beginner_source/audio_resampling_tutorial.py
+++ /dev/null
@@ -1,476 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Audio Resampling
-================
-
-This tutorial shows how to use torchaudio's resampling API.
-"""
-
-import torch
-import torchaudio
-import torchaudio.functional as F
-import torchaudio.transforms as T
-
-print(torch.__version__)
-print(torchaudio.__version__)
-
-######################################################################
-# Preparation
-# -----------
-#
-# First, we import the modules and define the helper functions.
-#
-# .. note::
-#    When running this tutorial in Google Colab, install the required packages
-#    with the following.
-#
-#    .. code::
-#
-#       !pip install librosa
-
-import math
-import time
-
-import librosa
-import matplotlib.pyplot as plt
-import pandas as pd
-from IPython.display import Audio, display
-
-pd.set_option('display.max_rows', None)
-pd.set_option('display.max_columns', None)
-
-DEFAULT_OFFSET = 201
-
-
-def _get_log_freq(sample_rate, max_sweep_rate, offset):
-    """Get freqs evenly spaced out in log-scale, between [0, max_sweep_rate // 2]
-
-    offset is used to avoid negative infinity `log(offset + x)`.
-
-    """
-    start, stop = math.log(offset), math.log(offset + max_sweep_rate // 2)
-    return torch.exp(torch.linspace(start, stop, sample_rate, dtype=torch.double)) - offset
-
-
-def _get_inverse_log_freq(freq, sample_rate, offset):
-    """Find the time where the given frequency is given by _get_log_freq"""
-    half = sample_rate // 2
-    return sample_rate * (math.log(1 + freq / offset) / math.log(1 + half / offset))
-
-
-def _get_freq_ticks(sample_rate, offset, f_max):
-    # Given the original sample rate used for generating the sweep,
-    # find the x-axis value where the log-scale major frequency values fall in
-    time, freq = [], []
-    for exp in range(2, 5):
-        for v in range(1, 10):
-            f = v * 10**exp
-            if f < sample_rate // 2:
-                t = _get_inverse_log_freq(f, sample_rate, offset) / sample_rate
-                time.append(t)
-                freq.append(f)
-    t_max = _get_inverse_log_freq(f_max, sample_rate, offset) / sample_rate
-    time.append(t_max)
-    freq.append(f_max)
-    return time, freq
-
-
-def get_sine_sweep(sample_rate, offset=DEFAULT_OFFSET):
-    max_sweep_rate = sample_rate
-    freq = _get_log_freq(sample_rate, max_sweep_rate, offset)
-    delta = 2 * math.pi * freq / sample_rate
-    cummulative = torch.cumsum(delta, dim=0)
-    signal = torch.sin(cummulative).unsqueeze(dim=0)
-    return signal
-
-
-def plot_sweep(
-    waveform,
-    sample_rate,
-    title,
-    max_sweep_rate=48000,
-    offset=DEFAULT_OFFSET,
-):
-    x_ticks = [100, 500, 1000, 5000, 10000, 20000, max_sweep_rate // 2]
-    y_ticks = [1000, 5000, 10000, 20000, sample_rate // 2]
-
-    time, freq = _get_freq_ticks(max_sweep_rate, offset, sample_rate // 2)
-    freq_x = [f if f in x_ticks and f <= max_sweep_rate // 2 else None for f in freq]
-    freq_y = [f for f in freq if f in y_ticks and 1000 <= f <= sample_rate // 2]
-
-    figure, axis = plt.subplots(1, 1)
-    _, _, _, cax = axis.specgram(waveform[0].numpy(), Fs=sample_rate)
-    plt.xticks(time, freq_x)
-    plt.yticks(freq_y, freq_y)
-    axis.set_xlabel("Original Signal Frequency (Hz, log scale)")
-    axis.set_ylabel("Waveform Frequency (Hz)")
-    axis.xaxis.grid(True, alpha=0.67)
-    axis.yaxis.grid(True, alpha=0.67)
-    figure.suptitle(f"{title} (sample rate: {sample_rate} Hz)")
-    plt.colorbar(cax)
-    plt.show(block=True)
-
-
-######################################################################
-# Resampling Overview
-# -------------------
-#
-# To resample an audio waveform from one freqeuncy to another, you can use
-# :py:func:`torchaudio.transforms.Resample` or
-# :py:func:`torchaudio.functional.resample`.
-# ``transforms.Resample`` precomputes and caches the kernel used for resampling,
-# while ``functional.resample`` computes it on the fly, so using
-# ``torchaudio.transforms.Resample`` will result in a speedup when resampling
-# multiple waveforms using the same parameters (see Benchmarking section).
-#
-# Both resampling methods use `bandlimited sinc
-# interpolation <https://ccrma.stanford.edu/~jos/resample/>`__ to compute
-# signal values at arbitrary time steps. The implementation involves
-# convolution, so we can take advantage of GPU / multithreading for
-# performance improvements.
-#
-# .. note::
-#
-#    When using resampling in multiple subprocesses, such as data loading
-#    with multiple worker processes, your application might create more
-#    threads than your system can handle efficiently.
-#    Setting ``torch.set_num_threads(1)`` might help in this case.
-#
-# Because a finite number of samples can only represent a finite number of
-# frequencies, resampling does not produce perfect results, and a variety
-# of parameters can be used to control for its quality and computational
-# speed. We demonstrate these properties through resampling a logarithmic
-# sine sweep, which is a sine wave that increases exponentially in
-# frequency over time.
-#
-# The spectrograms below show the frequency representation of the signal,
-# where the x-axis corresponds to the frequency of the original
-# waveform (in log scale), y-axis the frequency of the
-# plotted waveform, and color intensity the amplitude.
-#
-
-sample_rate = 48000
-waveform = get_sine_sweep(sample_rate)
-
-plot_sweep(waveform, sample_rate, title="Original Waveform")
-Audio(waveform.numpy()[0], rate=sample_rate)
-
-######################################################################
-#
-# Now we resample (downsample) it.
-#
-# We see that in the spectrogram of the resampled waveform, there is an
-# artifact, which was not present in the original waveform.
-
-resample_rate = 32000
-resampler = T.Resample(sample_rate, resample_rate, dtype=waveform.dtype)
-resampled_waveform = resampler(waveform)
-
-plot_sweep(resampled_waveform, resample_rate, title="Resampled Waveform")
-Audio(resampled_waveform.numpy()[0], rate=resample_rate)
-
-######################################################################
-# Controling resampling quality with parameters
-# ---------------------------------------------
-#
-# Lowpass filter width
-# ~~~~~~~~~~~~~~~~~~~~
-#
-# Because the filter used for interpolation extends infinitely, the
-# ``lowpass_filter_width`` parameter is used to control for the width of
-# the filter to use to window the interpolation. It is also referred to as
-# the number of zero crossings, since the interpolation passes through
-# zero at every time unit. Using a larger ``lowpass_filter_width``
-# provides a sharper, more precise filter, but is more computationally
-# expensive.
-#
-
-sample_rate = 48000
-resample_rate = 32000
-
-resampled_waveform = F.resample(waveform, sample_rate, resample_rate, lowpass_filter_width=6)
-plot_sweep(resampled_waveform, resample_rate, title="lowpass_filter_width=6")
-
-######################################################################
-#
-
-resampled_waveform = F.resample(waveform, sample_rate, resample_rate, lowpass_filter_width=128)
-plot_sweep(resampled_waveform, resample_rate, title="lowpass_filter_width=128")
-
-######################################################################
-# Rolloff
-# ~~~~~~~
-#
-# The ``rolloff`` parameter is represented as a fraction of the Nyquist
-# frequency, which is the maximal frequency representable by a given
-# finite sample rate. ``rolloff`` determines the lowpass filter cutoff and
-# controls the degree of aliasing, which takes place when frequencies
-# higher than the Nyquist are mapped to lower frequencies. A lower rolloff
-# will therefore reduce the amount of aliasing, but it will also reduce
-# some of the higher frequencies.
-#
-
-
-sample_rate = 48000
-resample_rate = 32000
-
-resampled_waveform = F.resample(waveform, sample_rate, resample_rate, rolloff=0.99)
-plot_sweep(resampled_waveform, resample_rate, title="rolloff=0.99")
-
-######################################################################
-#
-
-resampled_waveform = F.resample(waveform, sample_rate, resample_rate, rolloff=0.8)
-plot_sweep(resampled_waveform, resample_rate, title="rolloff=0.8")
-
-
-######################################################################
-# Window function
-# ~~~~~~~~~~~~~~~
-#
-# By default, ``torchaudio``’s resample uses the Hann window filter, which is
-# a weighted cosine function. It additionally supports the Kaiser window,
-# which is a near optimal window function that contains an additional
-# ``beta`` parameter that allows for the design of the smoothness of the
-# filter and width of impulse. This can be controlled using the
-# ``resampling_method`` parameter.
-#
-
-
-sample_rate = 48000
-resample_rate = 32000
-
-resampled_waveform = F.resample(waveform, sample_rate, resample_rate, resampling_method="sinc_interpolation")
-plot_sweep(resampled_waveform, resample_rate, title="Hann Window Default")
-
-######################################################################
-#
-
-resampled_waveform = F.resample(waveform, sample_rate, resample_rate, resampling_method="kaiser_window")
-plot_sweep(resampled_waveform, resample_rate, title="Kaiser Window Default")
-
-
-######################################################################
-# Comparison against librosa
-# --------------------------
-#
-# ``torchaudio``’s resample function can be used to produce results similar to
-# that of librosa (resampy)’s kaiser window resampling, with some noise
-#
-
-sample_rate = 48000
-resample_rate = 32000
-
-######################################################################
-# kaiser_best
-# ~~~~~~~~~~~
-#
-resampled_waveform = F.resample(
-    waveform,
-    sample_rate,
-    resample_rate,
-    lowpass_filter_width=64,
-    rolloff=0.9475937167399596,
-    resampling_method="kaiser_window",
-    beta=14.769656459379492,
-)
-plot_sweep(resampled_waveform, resample_rate, title="Kaiser Window Best (torchaudio)")
-
-######################################################################
-#
-
-librosa_resampled_waveform = torch.from_numpy(
-    librosa.resample(waveform.squeeze().numpy(), orig_sr=sample_rate, target_sr=resample_rate, res_type="kaiser_best")
-).unsqueeze(0)
-plot_sweep(librosa_resampled_waveform, resample_rate, title="Kaiser Window Best (librosa)")
-
-######################################################################
-#
-
-mse = torch.square(resampled_waveform - librosa_resampled_waveform).mean().item()
-print("torchaudio and librosa kaiser best MSE:", mse)
-
-######################################################################
-# kaiser_fast
-# ~~~~~~~~~~~
-#
-resampled_waveform = F.resample(
-    waveform,
-    sample_rate,
-    resample_rate,
-    lowpass_filter_width=16,
-    rolloff=0.85,
-    resampling_method="kaiser_window",
-    beta=8.555504641634386,
-)
-plot_sweep(resampled_waveform, resample_rate, title="Kaiser Window Fast (torchaudio)")
-
-######################################################################
-#
-
-librosa_resampled_waveform = torch.from_numpy(
-    librosa.resample(waveform.squeeze().numpy(), orig_sr=sample_rate, target_sr=resample_rate, res_type="kaiser_fast")
-).unsqueeze(0)
-plot_sweep(librosa_resampled_waveform, resample_rate, title="Kaiser Window Fast (librosa)")
-
-######################################################################
-#
-
-mse = torch.square(resampled_waveform - librosa_resampled_waveform).mean().item()
-print("torchaudio and librosa kaiser fast MSE:", mse)
-
-######################################################################
-# Performance Benchmarking
-# ------------------------
-#
-# Below are benchmarks for downsampling and upsampling waveforms between
-# two pairs of sampling rates. We demonstrate the performance implications
-# that the ``lowpass_filter_wdith``, window type, and sample rates can
-# have. Additionally, we provide a comparison against ``librosa``\ ’s
-# ``kaiser_best`` and ``kaiser_fast`` using their corresponding parameters
-# in ``torchaudio``.
-#
-# To elaborate on the results:
-#
-# - a larger ``lowpass_filter_width`` results in a larger resampling kernel,
-#   and therefore increases computation time for both the kernel computation
-#   and convolution
-# - using ``kaiser_window`` results in longer computation times than the default
-#   ``sinc_interpolation`` because it is more complex to compute the intermediate
-#   window values - a large GCD between the sample and resample rate will result
-#   in a simplification that allows for a smaller kernel and faster kernel computation.
-#
-
-
-def benchmark_resample(
-    method,
-    waveform,
-    sample_rate,
-    resample_rate,
-    lowpass_filter_width=6,
-    rolloff=0.99,
-    resampling_method="sinc_interpolation",
-    beta=None,
-    librosa_type=None,
-    iters=5,
-):
-    if method == "functional":
-        begin = time.monotonic()
-        for _ in range(iters):
-            F.resample(
-                waveform,
-                sample_rate,
-                resample_rate,
-                lowpass_filter_width=lowpass_filter_width,
-                rolloff=rolloff,
-                resampling_method=resampling_method,
-            )
-        elapsed = time.monotonic() - begin
-        return elapsed / iters
-    elif method == "transforms":
-        resampler = T.Resample(
-            sample_rate,
-            resample_rate,
-            lowpass_filter_width=lowpass_filter_width,
-            rolloff=rolloff,
-            resampling_method=resampling_method,
-            dtype=waveform.dtype,
-        )
-        begin = time.monotonic()
-        for _ in range(iters):
-            resampler(waveform)
-        elapsed = time.monotonic() - begin
-        return elapsed / iters
-    elif method == "librosa":
-        waveform_np = waveform.squeeze().numpy()
-        begin = time.monotonic()
-        for _ in range(iters):
-            librosa.resample(waveform_np, orig_sr=sample_rate, target_sr=resample_rate, res_type=librosa_type)
-        elapsed = time.monotonic() - begin
-        return elapsed / iters
-
-
-######################################################################
-#
-
-configs = {
-    "downsample (48 -> 44.1 kHz)": [48000, 44100],
-    "downsample (16 -> 8 kHz)": [16000, 8000],
-    "upsample (44.1 -> 48 kHz)": [44100, 48000],
-    "upsample (8 -> 16 kHz)": [8000, 16000],
-}
-
-for label in configs:
-    times, rows = [], []
-    sample_rate = configs[label][0]
-    resample_rate = configs[label][1]
-    waveform = get_sine_sweep(sample_rate)
-
-    # sinc 64 zero-crossings
-    f_time = benchmark_resample("functional", waveform, sample_rate, resample_rate, lowpass_filter_width=64)
-    t_time = benchmark_resample("transforms", waveform, sample_rate, resample_rate, lowpass_filter_width=64)
-    times.append([None, 1000 * f_time, 1000 * t_time])
-    rows.append("sinc (width 64)")
-
-    # sinc 6 zero-crossings
-    f_time = benchmark_resample("functional", waveform, sample_rate, resample_rate, lowpass_filter_width=16)
-    t_time = benchmark_resample("transforms", waveform, sample_rate, resample_rate, lowpass_filter_width=16)
-    times.append([None, 1000 * f_time, 1000 * t_time])
-    rows.append("sinc (width 16)")
-
-    # kaiser best
-    lib_time = benchmark_resample("librosa", waveform, sample_rate, resample_rate, librosa_type="kaiser_best")
-    f_time = benchmark_resample(
-        "functional",
-        waveform,
-        sample_rate,
-        resample_rate,
-        lowpass_filter_width=64,
-        rolloff=0.9475937167399596,
-        resampling_method="kaiser_window",
-        beta=14.769656459379492,
-    )
-    t_time = benchmark_resample(
-        "transforms",
-        waveform,
-        sample_rate,
-        resample_rate,
-        lowpass_filter_width=64,
-        rolloff=0.9475937167399596,
-        resampling_method="kaiser_window",
-        beta=14.769656459379492,
-    )
-    times.append([1000 * lib_time, 1000 * f_time, 1000 * t_time])
-    rows.append("kaiser_best")
-
-    # kaiser fast
-    lib_time = benchmark_resample("librosa", waveform, sample_rate, resample_rate, librosa_type="kaiser_fast")
-    f_time = benchmark_resample(
-        "functional",
-        waveform,
-        sample_rate,
-        resample_rate,
-        lowpass_filter_width=16,
-        rolloff=0.85,
-        resampling_method="kaiser_window",
-        beta=8.555504641634386,
-    )
-    t_time = benchmark_resample(
-        "transforms",
-        waveform,
-        sample_rate,
-        resample_rate,
-        lowpass_filter_width=16,
-        rolloff=0.85,
-        resampling_method="kaiser_window",
-        beta=8.555504641634386,
-    )
-    times.append([1000 * lib_time, 1000 * f_time, 1000 * t_time])
-    rows.append("kaiser_fast")
-
-    df = pd.DataFrame(times, columns=["librosa", "functional", "transforms"], index=rows)
-    df.columns = pd.MultiIndex.from_product([[f"{label} time (ms)"], df.columns])
-
-    print(f"torchaudio: {torchaudio.__version__}")
-    print(f"librosa: {librosa.__version__}")
-    display(df.round(2))
diff --git a/beginner_source/audio_resampling_tutorial.rst b/beginner_source/audio_resampling_tutorial.rst
new file mode 100644
index 000000000..01210830e
--- /dev/null
+++ b/beginner_source/audio_resampling_tutorial.rst
@@ -0,0 +1,9 @@
+Audio Resampling
+================
+
+This tutorial has been moved to `a new location <https://pytorch.org/audio/stable/tutorials/audio_resampling_tutorial.html>`_
+You will be redirected in 3 seconds.
+
+.. raw:: html
+
+   <meta http-equiv="Refresh" content="3; url='https://pytorch.org/audio/stable/tutorials/audio_resampling_tutorial.html'" />
diff --git a/beginner_source/basics/autogradqs_tutorial.py b/beginner_source/basics/autogradqs_tutorial.py
index 2ea815895..a9f162aad 100644
--- a/beginner_source/basics/autogradqs_tutorial.py
+++ b/beginner_source/basics/autogradqs_tutorial.py
@@ -117,9 +117,7 @@
 
 ######################################################################
 # 변화도 추적을 멈춰야 하는 이유들은 다음과 같습니다:
-#   - 신경망의 일부 매개변수를 **고정된 매개변수(frozen parameter)**\ 로 표시합니다. 이는
-#     `사전 학습된 신경망을 미세조정 <https://tutorials.pytorch.kr/beginner/finetuning_torchvision_models_tutorial.html>`__
-#     할 때 매우 일반적인 시나리오입니다.
+#   - 신경망의 일부 매개변수를 **고정된 매개변수(frozen parameter)**\ 로 표시합니다.
 #   - 변화도를 추적하지 않는 텐서의 연산이 더 효율적이기 때문에, 순전파 단계만 수행할 때
 #     **연산 속도가 향상됩니다.**
 
diff --git a/beginner_source/basics/buildmodel_tutorial.py b/beginner_source/basics/buildmodel_tutorial.py
index e6dc90bc5..ae80f4e10 100644
--- a/beginner_source/basics/buildmodel_tutorial.py
+++ b/beginner_source/basics/buildmodel_tutorial.py
@@ -32,11 +32,17 @@
 # 학습을 위한 장치 얻기
 # ------------------------------------------------------------------------------------------
 #
-# 가능한 경우 GPU와 같은 하드웨어 가속기에서 모델을 학습하려고 합니다.
-# `torch.cuda <https://pytorch.org/docs/stable/notes/cuda.html>`_ 를 사용할 수 있는지
-# 확인하고 그렇지 않으면 CPU를 계속 사용합니다.
-
-device = "cuda" if torch.cuda.is_available() else "cpu"
+# 가능한 경우 GPU 또는 MPS와 같은 하드웨어 가속기에서 모델을 학습하려고 합니다.
+# `torch.cuda <https://pytorch.org/docs/stable/notes/cuda.html>`_ 또는 `torch.backends.mps <https://pytorch.org/docs/stable/notes/mps.html>`_
+# 가 사용 가능한지 확인해보고, 그렇지 않으면 CPU를 계속 사용합니다.
+
+device = (
+    "cuda"
+    if torch.cuda.is_available()
+    else "mps"
+    if torch.backends.mps.is_available()
+    else "cpu"
+)
 print(f"Using {device} device")
 
 ##############################################
@@ -48,7 +54,7 @@
 
 class NeuralNetwork(nn.Module):
     def __init__(self):
-        super(NeuralNetwork, self).__init__()
+        super().__init__()
         self.flatten = nn.Flatten()
         self.linear_relu_stack = nn.Sequential(
             nn.Linear(28*28, 512),
diff --git a/beginner_source/basics/intro.py b/beginner_source/basics/intro.py
index 4207bcccd..2aae4d0bb 100644
--- a/beginner_source/basics/intro.py
+++ b/beginner_source/basics/intro.py
@@ -36,7 +36,7 @@
 
 다음의 두 가지 방법으로 이 튜토리얼을 실행해볼 수 있습니다:
 
-- **클라우드**: 시작하기 가장 쉬운 방법입니다! 각 섹션의 맨 위에는 "Run in Microsoft Learn" 링크가 있으며, 이 링크는 완전히 호스팅되는 환경에서 Microsoft Learn의 노트북을 엽니다.
+- **클라우드**: 시작하기 가장 쉬운 방법입니다! 각 섹션의 맨 위에는 "Run in Microsoft Learn" 링크와 "Run in Google Colab" 링크가 있으며, 이 링크들은 각각 완전히 호스팅되는 환경에서 Microsoft Learn 또는 Google Colab의 노트북을 엽니다.
 - **로컬**: 먼저 로컬 컴퓨터에 PyTorch와 TorchVision을 설치해야 합니다 (`설치 방법 <https://pytorch.kr/get-started/locally/>`_). 노트북을 내려받거나 코드를 원하는 IDE에 복사하세요.
 
 
diff --git a/beginner_source/basics/optimization_tutorial.py b/beginner_source/basics/optimization_tutorial.py
index b459bf649..67ade9650 100644
--- a/beginner_source/basics/optimization_tutorial.py
+++ b/beginner_source/basics/optimization_tutorial.py
@@ -13,7 +13,7 @@
 ==========================================================================
 
 이제 모델과 데이터가 준비되었으니, 데이터에 매개변수를 최적화하여 모델을 학습하고, 검증하고, 테스트할 차례입니다.
-모델을 학습하는 과정은 반복적인 과정을 거칩니다; (*에폭(epoch)*\ 이라고 부르는) 각 반복 단계에서 모델은 출력을 추측하고,
+모델을 학습하는 과정은 반복적인 과정을 거칩니다; 각 반복 단계에서 모델은 출력을 추측하고,
 추측과 정답 사이의 오류(\ *손실(loss)*\ )를 계산하고, (`이전 장 <autograd_tutorial.html>`_\ 에서 본 것처럼)
 매개변수에 대한 오류의 도함수(derivative)를 수집한 뒤, 경사하강법을 사용하여 이 파라미터들을 **최적화(optimize)**\ 합니다.
 이 과정에 대한 자세한 설명은 `3Blue1Brown의 역전파 <https://www.youtube.com/watch?v=tIeHLnjs5U8>`__ 영상을 참고하세요.
@@ -160,7 +160,7 @@ def train_loop(dataloader, model, loss_fn, optimizer):
         optimizer.step()
 
         if batch % 100 == 0:
-            loss, current = loss.item(), batch * len(X)
+            loss, current = loss.item(), (batch + 1) * len(X)
             print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
 
 
diff --git a/beginner_source/basics/quickstart_tutorial.py b/beginner_source/basics/quickstart_tutorial.py
index c7f7d9a33..fb36ac1b2 100644
--- a/beginner_source/basics/quickstart_tutorial.py
+++ b/beginner_source/basics/quickstart_tutorial.py
@@ -82,10 +82,16 @@
 # ------------------------------------------------------------------------------------------
 # PyTorch에서 신경망 모델은 `nn.Module <https://pytorch.org/docs/stable/generated/torch.nn.Module.html>`_ 을
 # 상속받는 클래스(class)를 생성하여 정의합니다. ``__init__`` 함수에서 신경망의 계층(layer)들을 정의하고 ``forward`` 함수에서
-# 신경망에 데이터를 어떻게 전달할지 지정합니다. 가능한 경우 GPU로 신경망을 이동시켜 연산을 가속(accelerate)합니다.
-
-# 학습에 사용할 CPU나 GPU 장치를 얻습니다.
-device = "cuda" if torch.cuda.is_available() else "cpu"
+# 신경망에 데이터를 어떻게 전달할지 지정합니다. 가능한 경우 GPU 또는 MPS로 신경망을 이동시켜 연산을 가속(accelerate)합니다.
+
+# 학습에 사용할 CPU나 GPU, MPS 장치를 얻습니다.
+device = (
+    "cuda"
+    if torch.cuda.is_available()
+    else "mps"
+    if torch.backends.mps.is_available()
+    else "cpu"
+)
 print(f"Using {device} device")
 
 # 모델을 정의합니다.
@@ -148,7 +154,7 @@ def train(dataloader, model, loss_fn, optimizer):
         optimizer.step()
 
         if batch % 100 == 0:
-            loss, current = loss.item(), batch * len(X)
+            loss, current = loss.item(), (batch + 1) * len(X)
             print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
 
 ##############################################################################
@@ -205,7 +211,7 @@ def test(dataloader, model, loss_fn):
 #
 # 모델을 불러오는 과정에는 모델 구조를 다시 만들고 상태 사전을 모델에 불러오는 과정이 포함됩니다.
 
-model = NeuralNetwork()
+model = NeuralNetwork().to(device)
 model.load_state_dict(torch.load("model.pth"))
 
 #############################################################
@@ -227,6 +233,7 @@ def test(dataloader, model, loss_fn):
 model.eval()
 x, y = test_data[0][0], test_data[0][1]
 with torch.no_grad():
+    x = x.to(device)
     pred = model(x)
     predicted, actual = classes[pred[0].argmax(0)], classes[y]
     print(f'Predicted: "{predicted}", Actual: "{actual}"')
diff --git a/beginner_source/basics/saveloadrun_tutorial.py b/beginner_source/basics/saveloadrun_tutorial.py
index e94c0119d..cff004d14 100644
--- a/beginner_source/basics/saveloadrun_tutorial.py
+++ b/beginner_source/basics/saveloadrun_tutorial.py
@@ -26,14 +26,14 @@
 # PyTorch 모델은 학습한 매개변수를 ``state_dict``\ 라고 불리는 내부 상태 사전(internal state dictionary)에 저장합니다.
 # 이 상태 값들은 ``torch.save`` 메소드를 사용하여 저장(persist)할 수 있습니다:
 
-model = models.vgg16(pretrained=True)
+model = models.vgg16(weights='IMAGENET1K_V1')
 torch.save(model.state_dict(), 'model_weights.pth')
 
 ##########################
 # 모델 가중치를 불러오기 위해서는, 먼저 동일한 모델의 인스턴스(instance)를 생성한 다음에 ``load_state_dict()`` 메소드를 사용하여
 # 매개변수들을 불러옵니다.
 
-model = models.vgg16() # 기본 가중치를 불러오지 않으므로 pretrained=True를 지정하지 않습니다.
+model = models.vgg16() # 여기서는 ``weights`` 를 지정하지 않았으므로, 학습되지 않은 모델을 생성합니다.
 model.load_state_dict(torch.load('model_weights.pth'))
 model.eval()
 
diff --git a/beginner_source/basics/tensorqs_tutorial.py b/beginner_source/basics/tensorqs_tutorial.py
index 9dff0dfe4..04c4fd50c 100644
--- a/beginner_source/basics/tensorqs_tutorial.py
+++ b/beginner_source/basics/tensorqs_tutorial.py
@@ -142,6 +142,7 @@
 # **산술 연산(Arithmetic operations)**
 
 # 두 텐서 간의 행렬 곱(matrix multiplication)을 계산합니다. y1, y2, y3은 모두 같은 값을 갖습니다.
+# ``tensor.T`` 는 텐서의 전치(transpose)를 반환합니다.
 y1 = tensor @ tensor.T
 y2 = tensor.matmul(tensor.T)
 
diff --git a/beginner_source/bettertransformer_tutorial.rst b/beginner_source/bettertransformer_tutorial.rst
index 10302331b..96249d886 100644
--- a/beginner_source/bettertransformer_tutorial.rst
+++ b/beginner_source/bettertransformer_tutorial.rst
@@ -18,7 +18,7 @@ been updated to use the core library modules to benefit from fastpath accelerati
 
 Better Transformer offers two types of acceleration:
 
-* Native multihead attention implementation for CPU and GPU to improvee overall execution efficiency.  
+* Native multihead attention (MHA) implementation for CPU and GPU to improve overall execution efficiency.  
 * Exploiting sparsity in NLP inference.  Because of variable input lengths, input
   tokens may contain a large number of padding tokens for which processing may be
   skipped, delivering significant speedups.
@@ -124,6 +124,7 @@ Finally, we set the benchmark iteration count:
 2.1  Run and benchmark inference on CPU with and without BT fastpath (native MHA only)
 
 We run the model on CPU, and collect profile information:  
+
 * The first run uses traditional ("slow path") execution.
 * The second run enables BT fastpath execution by putting the model in inference mode using `model.eval()` and disables gradient collection with `torch.no_grad()`.
 
@@ -167,6 +168,7 @@ We disable the BT sparsity:
     
  
 We run the model on DEVICE, and collect profile information for native MHA execution on DEVICE:  
+
 * The first run uses traditional ("slow path") execution.
 * The second run enables BT fastpath execution by putting the model in inference mode using `model.eval()`
   and disables gradient collection with `torch.no_grad()`.
diff --git a/beginner_source/blitz/autograd_tutorial.py b/beginner_source/blitz/autograd_tutorial.py
index 9948320f5..e377ce5c4 100644
--- a/beginner_source/blitz/autograd_tutorial.py
+++ b/beginner_source/blitz/autograd_tutorial.py
@@ -268,10 +268,6 @@
 # 이러한 매개변수의 변화도가 필요하지 않다는 것을 미리 알고 있으면, 신경망 모델의 일부를 "고정(freeze)"하는 것이 유용합니다.
 # (이렇게 하면 autograd 연산량을 줄임으로써 성능 상의 이득을 제공합니다.)
 #
-# DAG에서 제외하는 것이 중요한 또 다른 일반적인 사례(usecase)는
-# `미리 학습된 모델을 미세조정 <https://tutorials.pytorch.kr/beginner/finetuning_torchvision_models_tutorial.html>`__
-# 하는 경우입니다.
-#
 # 미세조정(finetuning)을 하는 과정에서, 새로운 정답(label)을 예측할 수 있도록 모델의 대부분을 고정한 뒤 일반적으로 분류 계층(classifier layer)만 변경합니다.
 # 이를 설명하기 위해 간단한 예제를 살펴보겠습니다. 이전과 마찬가지로 이미 학습된 resnet18 모델을 불러온 뒤 모든 매개변수를 고정합니다.
 
diff --git a/beginner_source/blitz/neural_networks_tutorial.py b/beginner_source/blitz/neural_networks_tutorial.py
index 8b86d65c7..bed2d8e4c 100644
--- a/beginner_source/blitz/neural_networks_tutorial.py
+++ b/beginner_source/blitz/neural_networks_tutorial.py
@@ -130,7 +130,7 @@ def forward(self, x):
 #   -  신경망의 가중치를 갱신하는 것
 #
 # 손실 함수 (Loss Function)
-# -------------------------
+# ---------------------------
 # 손실 함수는 (output, target)을 한 쌍(pair)의 입력으로 받아, 출력(output)이
 # 정답(target)으로부터 얼마나 멀리 떨어져 있는지 추정하는 값을 계산합니다.
 #
@@ -172,7 +172,7 @@ def forward(self, x):
 
 ########################################################################
 # 역전파(Backprop)
-# ----------------
+# ------------------
 # 오차(error)를 역전파하기 위해서는 ``loss.backward()`` 만 해주면 됩니다.
 # 기존에 계산된 변화도의 값을 누적 시키고 싶지 않다면 기존에 계산된 변화도를 0으로 만드는
 # 작업이 필요합니다.
@@ -210,7 +210,10 @@ def forward(self, x):
 # 실제로 많이 사용되는 가장 단순한 갱신 규칙은 확률적 경사하강법(SGD; Stochastic
 # Gradient Descent)입니다:
 #
-#      ``새로운 가중치(weight) = 가중치(weight) - 학습률(learning rate) * 변화도(gradient)``
+# .. code:: python
+#
+#     # 새로운 가중치 = 가중치 - 학습률 * 변화도
+#     weight = weight - learning_rate * gradient
 #
 # 간단한 Python 코드로 이를 구현해볼 수 있습니다:
 #
@@ -223,18 +226,21 @@ def forward(self, x):
 # 신경망을 구성할 때 SGD, Nesterov-SGD, Adam, RMSProp 등과 같은 다양한 갱신 규칙을
 # 사용하고 싶을 수 있습니다. 이를 위해서 ``torch.optim`` 라는 작은 패키지에 이러한
 # 방법들을 모두 구현해두었습니다. 사용법은 매우 간단합니다:
-
-import torch.optim as optim
-
-# Optimizer를 생성합니다.
-optimizer = optim.SGD(net.parameters(), lr=0.01)
-
-# 학습 과정(training loop)은 다음과 같습니다:
-optimizer.zero_grad()   # 변화도 버퍼를 0으로
-output = net(input)
-loss = criterion(output, target)
-loss.backward()
-optimizer.step()    # 업데이트 진행
+#
+# .. code:: python
+#
+#     import torch.optim as optim
+#
+#     # Optimizer를 생성합니다.
+#     optimizer = optim.SGD(net.parameters(), lr=0.01)
+#
+#     # 학습 과정(training loop)은 다음과 같습니다:
+#     optimizer.zero_grad()   # 변화도 버퍼를 0으로
+#     output = net(input)
+#     loss = criterion(output, target)
+#     loss.backward()
+#     optimizer.step()    # 업데이트 진행
+#
 
 
 ###############################################################
diff --git a/beginner_source/chatbot_tutorial.py b/beginner_source/chatbot_tutorial.py
index e16489ede..6244d8536 100644
--- a/beginner_source/chatbot_tutorial.py
+++ b/beginner_source/chatbot_tutorial.py
@@ -35,28 +35,28 @@
 #    :align: center
 #    :alt: bot
 #
-# .. code:: python
-#
-#   > hello? (안녕하세요?)
-#   Bot: hello . (안녕하세요.)
-#   > where am I? (여긴 어디죠?)
-#   Bot: you re in a hospital . (병원입니다.)
-#   > who are you? (당신은 누구시죠?)
-#   Bot: i m a lawyer . (변호사입니다.)
-#   > how are you doing? (어떻게 지내세요?)
-#   Bot: i m fine . (잘 지냅니다.)
-#   > are you my friend? (당신은 제 친구인가요?)
-#   Bot: no . (아뇨.)
-#   > you're under arrest (당신을 체포하겠습니다)
-#   Bot: i m trying to help you ! (난 당신을 도우려 하는 겁니다!)
-#   > i'm just kidding (농담이었어요)
-#   Bot: i m sorry . (미안하네요.)
-#   > where are you from? (어디서 오셨어요?)
-#   Bot: san francisco . (샌프란시스코요.)
-#   > it's time for me to leave (전 이제 가봐야겠네요)
-#   Bot: i know . (알겠습니다.)
-#   > goodbye (안녕히 계세요)
-#   Bot: goodbye . (안녕히 가세요.)
+# .. code-block:: python
+#
+#    > hello? (안녕하세요?)
+#    Bot: hello . (안녕하세요.)
+#    > where am I? (여긴 어디죠?)
+#    Bot: you re in a hospital . (병원입니다.)
+#    > who are you? (당신은 누구시죠?)
+#    Bot: i m a lawyer . (변호사입니다.)
+#    > how are you doing? (어떻게 지내세요?)
+#    Bot: i m fine . (잘 지냅니다.)
+#    > are you my friend? (당신은 제 친구인가요?)
+#    Bot: no . (아뇨.)
+#    > you're under arrest (당신을 체포하겠습니다)
+#    Bot: i m trying to help you ! (난 당신을 도우려 하는 겁니다!)
+#    > i'm just kidding (농담이었어요)
+#    Bot: i m sorry . (미안하네요.)
+#    > where are you from? (어디서 오셨어요?)
+#    Bot: san francisco . (샌프란시스코요.)
+#    > it's time for me to leave (전 이제 가봐야겠네요)
+#    Bot: i know . (알겠습니다.)
+#    > goodbye (안녕히 계세요)
+#    Bot: goodbye . (안녕히 가세요.)
 #
 # **이 튜토리얼의 핵심 내용**
 #
@@ -86,7 +86,7 @@
 
 ######################################################################
 # 준비 단계
-# ---------
+# -----------
 #
 # 시작에 앞서, `여기 <https://zissou.infosci.cornell.edu/convokit/datasets/movie-corpus/movie-corpus.zip>`__ 에서
 # ZIP 파일 형태의 데이터를 내려받고, 현재 디렉토리 아래에 ``data/`` 라는
@@ -123,7 +123,7 @@
 
 ######################################################################
 # 데이터 읽기 & 전처리하기
-# ------------------------
+# --------------------------
 #
 # 다음 단계는 데이터 파일의 형식을 재조정한 후, 우리가 작업하기 편한
 # 구조로 읽어들이는 것입니다.
@@ -159,21 +159,21 @@ def printLines(file, n=10):
 
 ######################################################################
 # 원하는 형식의 데이터 파일로 만들기
-# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
 # 편의를 위해 데이터의 형식을 원하는 형태로 만들려고 합니다. 각 줄에
 # *질의 문장* 과 *응답 문장* 의 쌍이 탭으로 구분되어 있게끔 하는 것입니다.
 #
-# 다음의 함수를 통해 *utterances.jsonl* 원본 데이터 파일을 파싱하려
+# 다음의 함수를 통해 ``utterances.jsonl`` 원본 데이터 파일을 파싱하려
 # 합니다.
 #
 # -  ``loadLines`` 는 파일에 포함된 대사를 변환하여 항목(대사 ID ``lineID``,
 #    인물 ID ``characterID``, 영화 ID ``movieID``, 인물 ``character``, 대사
 #    내용 ``text``)에 대한 사전 형태로 변환합니다
 # -  ``loadConversations`` 는 ``loadLines`` 를 통해 읽어들인
-#    대사(``lines``)의 항목(``fields``)를 *movie_conversations.txt* 에 나와
+#    대사( ``lines`` )의 항목( ``fields`` )를 *movie_conversations.txt* 에 나와
 #    있는 내용에 맞춰 대화 형태로 묶습니다
-# -  ``extractSentencePairs`` 는 대화(``conversations``)에서 문장 쌍을
+# -  ``extractSentencePairs`` 는 대화( ``conversations`` )에서 문장 쌍을
 #    추출합니다
 #
 
@@ -220,12 +220,12 @@ def extractSentencePairs(conversations):
 
 
 ######################################################################
-# 이제 이 함수들을 호출하여 새로운 파일인 *formatted_utterances.jsonl* 를
+# 이제 이 함수들을 호출하여 새로운 파일인 ``formatted_movie_lines.txt`` 를
 # 만듭니다.
 #
 
 # 새 파일에 대한 경로를 정의합니다
-datafile = os.path.join(corpus, "formatted_utterances.jsonl")
+datafile = os.path.join(corpus, "formatted_movie_lines.txt")
 
 delimiter = '\t'
 # 구분자에 대해 unescape 함수를 호출합니다
@@ -252,7 +252,7 @@ def extractSentencePairs(conversations):
 
 ######################################################################
 # 데이터 읽고 정리하기
-# ~~~~~~~~~~~~~~~~~~~~
+# ~~~~~~~~~~~~~~~~~~~~~~
 #
 # 다음에 해야 할 일은 어휘집을 만들고, 질의/응답 문장 쌍을 메모리로
 # 읽어들이는 것입니다.
@@ -265,9 +265,9 @@ def extractSentencePairs(conversations):
 # 이를 위해 우리는 ``Voc`` 라는 클래스를 만들어 단어에서 인덱스로의
 # 매핑, 인덱스에서 단어로의 역 매핑, 각 단어의 등장 횟수, 전체 단어 수
 # 등을 관리하려 합니다. 이 클래스는 어휘집에 새로운 단어를 추가하는
-# 메서드(``addWord``), 문장에 등장하는 모든 단어를 추가하는
-# 메서드(``addSentence``), 그리고 자주 등장하지 않는 단어를 정리하는
-# 메서드(``trim``)를 제공합니다. 단어를 정리하는 내용에 대해서는 뒤에서
+# 메서드( ``addWord``), 문장에 등장하는 모든 단어를 추가하는
+# 메서드( ``addSentence``), 그리고 자주 등장하지 않는 단어를 정리하는
+# 메서드( ``trim``)를 제공합니다. 단어를 정리하는 내용에 대해서는 뒤에서
 # 좀 더 자세히 살펴보겠습니다.
 #
 
@@ -332,7 +332,7 @@ def trim(self, min_count):
 # 합니다. 다음에는 모든 글자를 소문자로 변환하고, 알파벳도 아니고 기본적인
 # 문장 부호도 아닌 글자는 제거합니다(정규화, ``normalizeString``).
 # 마지막으로는 학습할 때의 편의성을 위해서, 길이가 일정 기준을 초과하는,
-# 즉 ``MAX_LENGTH`` 보다 긴 문장을 제거합니다(``filterPairs``).
+# 즉 ``MAX_LENGTH`` 보다 긴 문장을 제거합니다( ``filterPairs``).
 #
 
 MAX_LENGTH = 10  # 고려할 문장의 최대 길이
@@ -369,7 +369,7 @@ def filterPair(p):
     # EOS 토큰을 위해 입력 시퀀스의 마지막 단어를 보존해야 합니다
     return len(p[0].split(' ')) < MAX_LENGTH and len(p[1].split(' ')) < MAX_LENGTH
 
-# 조건식 filterPair에 따라 pairs를 필터링합니다
+# 조건식 ``filterPair`` 에 따라 pairs를 필터링합니다
 def filterPairs(pairs):
     return [pair for pair in pairs if filterPair(pair)]
 
@@ -446,7 +446,7 @@ def trimRareWords(voc, pairs, MIN_COUNT):
 
 ######################################################################
 # 모델을 위한 데이터 준비하기
-# ---------------------------
+# -----------------------------
 #
 # 상당한 노력을 기울여 데이터를 전처리하고, 잘 정리하여 어휘집 객체와
 # 문장 쌍의 리스트 형태로 만들어두긴 했지만, 결국 우리가 만들 모델에서
@@ -464,7 +464,7 @@ def trimRareWords(voc, pairs, MIN_COUNT):
 # 점에 유의해야 한다는 것을 뜻합니다. 같은 배치 안에서 크기가 다른
 # 문장을 처리하기 위해서는 배치용 입력 텐서의 모양을 *(max_length,
 # batch_size)* 로 맞춰야 합니다. 이때 *max_length* 보다 짧은 문장에
-# 대해서는 *EOS 토큰* 뒤에 제로 토큰을 덧붙이면 됩니다.
+# 대해서는 *EOS_token* 뒤에 제로 토큰을 덧붙이면 됩니다.
 #
 # 영어로 된 문장을 텐서로 변환하기 위해 단순히 그에 대응하는 인덱스를
 # 사용하고(``indexesFromSentence``) 제로 토큰을 패딩한다고 해봅시다.
@@ -489,7 +489,7 @@ def trimRareWords(voc, pairs, MIN_COUNT):
 # ``outputVar`` 함수는 ``inputVar`` 와 비슷한 작업을 수행하지만, ``lengths``
 # 텐서를 반환하는 대신에 이진 마스크로 구성된 텐서와 목표 문장의 최대
 # 길이를 같이 반환합니다. 이진 마스크 텐서는 출력에 해당하는 목표 텐서와
-# 그 모양이 같지만, 패딩 토큰(*PAD_token*)에 해당하는 경우에는 값이 0이며
+# 그 모양이 같지만, 패딩 토큰( *PAD_token* )에 해당하는 경우에는 값이 0이며
 # 나머지 경우의 값은 1입니다.
 #
 # ``batch2TrainData`` 는 단순히 여러 쌍을 입력으로 받아서, 앞서 설명한
@@ -558,10 +558,10 @@ def batch2TrainData(voc, pair_batch):
 
 ######################################################################
 # 모델 정의하기
-# -------------
+# ---------------
 #
 # Seq2Seq 모델
-# ~~~~~~~~~~~~
+# ~~~~~~~~~~~~~~
 #
 # 우리 챗봇의 두뇌에 해당하는 모델은 sequence-to-sequence (seq2seq)
 # 모델입니다. seq2seq 모델의 목표는 가변 길이 시퀀스를 입력으로 받고,
@@ -588,7 +588,7 @@ def batch2TrainData(voc, pair_batch):
 
 ######################################################################
 # 인코더
-# ~~~~~~
+# ~~~~~~~
 #
 # 인코더 RNN은 입력 시퀀스를 토큰 단위로(예를 들어, 단어 단위로) 한번에
 # 하나씩 살펴보며 진행합니다. 그리고 각 단계마다 "출력" 벡터와 "은닉
@@ -627,7 +627,7 @@ def batch2TrainData(voc, pair_batch):
 # ``nn.utils.rnn.pack_padded_sequence`` 와
 # ``nn.utils.rnn.pad_packed_sequence`` 를 통해 수행할 수 있습니다.
 #
-# **계산 그래프:**
+# **연산 그래프:**
 #
 #    1) 단어 인덱스를 임베딩으로 변환합니다.
 #    2) RNN 모듈을 위한 패딩된 배치 시퀀스를 패킹합니다.
@@ -788,7 +788,7 @@ def forward(self, hidden, encoder_outputs):
 # 제공하려 합니다. 이는 임베딩된 단어 텐서와 GRU 출력의 모양이 둘 다
 # *(1, batch_size, hidden_size)* 라는 의미입니다.
 #
-# **계산 그래프:**
+# **연산 그래프:**
 #
 #    1) 현재의 입력 단어에 대한 임베딩을 구합니다.
 #    2) 무방향 GRU로 포워드 패스를 수행합니다.
@@ -861,10 +861,10 @@ def forward(self, input_step, last_hidden, encoder_outputs):
 
 ######################################################################
 # 학습 프로시저 정의하기
-# ----------------------
+# ------------------------
 #
 # Masked loss
-# ~~~~~~~~~~~
+# ~~~~~~~~~~~~~
 #
 # 우리는 패딩된 시퀀스 배치를 다루기 때문에 손실을 계산할 때 단순히 텐서의
 # 모든 원소를 고려할 수는 없습니다. 우리는 ``maskNLLLoss`` 를 정의하여
@@ -883,7 +883,7 @@ def maskNLLLoss(inp, target, mask):
 
 ######################################################################
 # 한 번의 학습 단계
-# ~~~~~~~~~~~~~~~~~
+# ~~~~~~~~~~~~~~~~~~~
 #
 # ``train`` 함수에 학습을 한 단계(입력 배치 한 개에 대한) 진행하는 알고리즘이
 # 나와 있습니다.
@@ -927,7 +927,7 @@ def maskNLLLoss(inp, target, mask):
 #
 # .. warning::
 #
-#   PyTorch의 RNN 모듈(``RNN``, ``LSTM``, ``GRU``)은 전체 입력 시퀀스(또는
+#   PyTorch의 RNN 모듈( ``RNN``, ``LSTM``, ``GRU`` )은 전체 입력 시퀀스(또는
 #   시퀀스의 배치)를 단순히 넣어주기만 하면 다른 비순환 레이어처럼 사용할 수
 #   있습니다. 우리는 ``encoder`` 에서 ``GRU`` 레이어를 이런 식으로 사용합니다.
 #   그 안이 실제로 어떻게 되어 있는지를 살펴보면, 매 시간 단계마다 은닉 상태를
@@ -951,7 +951,7 @@ def train(input_variable, lengths, target_variable, mask, max_target_len, encode
     input_variable = input_variable.to(device)
     target_variable = target_variable.to(device)
     mask = mask.to(device)
-    # Lengths for rnn packing should always be on the cpu
+    # RNN 패킹의 길이는 항상 CPU에 위치해야 합니다
     lengths = lengths.to("cpu")
 
     # 변수를 초기화합니다
@@ -1016,7 +1016,7 @@ def train(input_variable, lengths, target_variable, mask, max_target_len, encode
 
 ######################################################################
 # 학습 단계
-# ~~~~~~~~~
+# ~~~~~~~~~~~
 #
 # 이제 마지막으로 전체 학습 프로시저와 데이터를 하나로 엮을 때가
 # 되었습니다. ``trainIters`` 함수는 주어진 모델, optimizer, 데이터 등을
@@ -1025,7 +1025,7 @@ def train(input_variable, lengths, target_variable, mask, max_target_len, encode
 # 함수에 옮겨 놓았기 때문입니다.
 #
 # 한 가지 주의할 점은 우리가 모델을 저장하려 할 때, 인코더와 디코더의
-# state_dicts (매개변수), optimizer의 state_dicts, 손실, 진행 단계 수
+# ``state_dicts`` (매개변수), optimizer의 ``state_dicts``, 손실, 진행 단계 수
 # 등을 tarball로 만들어 저장한다는 점입니다. 모델을 이러한 방식으로
 # 저장하면 checkpoint에 대해 아주 높은 수준의 유연성을 확보할 수 있게
 # 됩니다. Checkpoint를 불러오고 나면, 우리는 모델 매개변수를 이용하여
@@ -1083,13 +1083,13 @@ def trainIters(model_name, voc, pairs, encoder, decoder, encoder_optimizer, deco
 
 ######################################################################
 # 평가 정의하기
-# -------------
+# ---------------
 #
 # 모델을 학습시키고 나면 직접 봇과 대화를 나눠보고 싶어질 것입니다. 그러려면
 # 먼저 모델이 인코딩된 입력을 어떻게 디코딩할지를 정의해줘야 합니다.
 #
 # 탐욕적 디코딩
-# ~~~~~~~~~~~~~
+# ~~~~~~~~~~~~~~~
 #
 # 탐욕적 디코딩(Greedy decoding)은 우리가 학습 단계에서 teacher forcing을
 # 적용하지 않았을 때 사용한 디코딩 방법입니다. 달리 말하면, 각 단계에 대해
@@ -1098,12 +1098,12 @@ def trainIters(model_name, voc, pairs, encoder, decoder, encoder_optimizer, deco
 #
 # 우리는 탐욕적 디코딩 연산을 수행할 수 있도록 ``GreedySearchDecoder``
 # 클래스를 만들었습니다. 수행 과정에서 이 클래스의 인스턴스는 모양이
-# *(input_seq length, 1)* 인 입력 시퀀스(``input_seq``), 조종할 입력
-# 길이(``input_length``) 텐서, 그리고 응답 문장 길이의 제한을 나타내는
+# *(input_seq length, 1)* 인 입력 시퀀스( ``input_seq`` ), 조종할 입력
+# 길이( ``input_length`` ) 텐서, 그리고 응답 문장 길이의 제한을 나타내는
 # ``max_length`` 를 입력으로 받습니다. 입력 시퀀서는 다음과 같은 계산 그래프에
 # 의해 평가됩니다.
 #
-# **계산 그래프:**
+# **연산 그래프:**
 #
 #    1) 인코더 모델로 입력을 포워드 패스합니다.
 #    2) 인코더의 마지막 은닉 레이어가 디코더의 첫 번째 은닉 레이어의 입력이 되도록 준비합니다.
@@ -1150,7 +1150,7 @@ def forward(self, input_seq, input_length, max_length):
 
 ######################################################################
 # 내 텍스트 평가하기
-# ~~~~~~~~~~~~~~~~~~
+# ~~~~~~~~~~~~~~~~~~~~
 #
 # 이제 디코딩 모델을 정의했으니, 문자열로 된 입력 시퀀스를 평가하는 함수를
 # 작성해볼 수 있을 것입니다. ``evaluate`` 함수에 입력 시퀀스를 낮은
@@ -1231,8 +1231,8 @@ def evaluateInput(encoder, decoder, searcher, voc):
 # 모델을 설정합니다
 model_name = 'cb_model'
 attn_model = 'dot'
-#attn_model = 'general'
-#attn_model = 'concat'
+#``attn_model = 'general'``
+#``attn_model = 'concat'``
 hidden_size = 500
 encoder_n_layers = 2
 decoder_n_layers = 2
@@ -1242,12 +1242,17 @@ def evaluateInput(encoder, decoder, searcher, voc):
 # 불러올 checkpoint를 설정합니다. 처음부터 시작할 때는 None으로 둡니다.
 loadFilename = None
 checkpoint_iter = 4000
-#loadFilename = os.path.join(save_dir, model_name, corpus_name,
-#                            '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size),
-#                            '{}_checkpoint.tar'.format(checkpoint_iter))
 
+#############################################################
+# checkpoint로부터 불러오는 샘플 코드:
+#
+# .. code-block:: python
+#
+#    loadFilename = os.path.join(save_dir, model_name, corpus_name,
+#                        '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size),
+#                        '{}_checkpoint.tar'.format(checkpoint_iter))
 
-# loadFilename이 제공되는 경우에는 모델을 불러옵니다
+# ``loadFilename`` 이 존재하는 경우에는 모델을 불러옵니다
 if loadFilename:
     # 모델을 학습할 때와 같은 기기에서 불러오는 경우
     checkpoint = torch.load(loadFilename)
@@ -1309,7 +1314,7 @@ def evaluateInput(encoder, decoder, searcher, voc):
     encoder_optimizer.load_state_dict(encoder_optimizer_sd)
     decoder_optimizer.load_state_dict(decoder_optimizer_sd)
 
-# cuda가 있다면 cuda를 설정합니다
+# CUDA가 있으면 CUDA를 설정합니다
 for state in encoder_optimizer.state.values():
     for k, v in state.items():
         if isinstance(v, torch.Tensor):
@@ -1329,12 +1334,12 @@ def evaluateInput(encoder, decoder, searcher, voc):
 
 ######################################################################
 # 평가 수행하기
-# ~~~~~~~~~~~~~
+# ~~~~~~~~~~~~~~~
 #
 # 여러분의 모델과 채팅을 해보고 싶다면 다음 블록을 수행하면 됩니다.
 #
 
-# Dropout 레이어를 평가 모드로 설정합니다
+# Dropout 레이어를 평가( ``eval`` ) 모드로 설정합니다
 encoder.eval()
 decoder.eval()
 
@@ -1347,7 +1352,7 @@ def evaluateInput(encoder, decoder, searcher, voc):
 
 ######################################################################
 # 맺음말
-# ------
+# --------
 #
 # 이번 튜토리얼을 이것으로 마무리하겠습니다. 축하합니다! 여러분은 이제 생성
 # 챗봇 모델을 만들기 위한 기초 지식을 습득했습니다. 만약 좀 더 관심이 있다면
diff --git a/beginner_source/colab.rst b/beginner_source/colab.rst
index 4b301e250..07bf5fbcc 100644
--- a/beginner_source/colab.rst
+++ b/beginner_source/colab.rst
@@ -1,5 +1,29 @@
+Google Colab에서 튜토리얼 실행하기
+=====================================
+
+Google Colab에서 튜토리얼을 실행할 때, 튜토리얼이 제대로 동작하기
+위해서 충족해야 하는 추가적인 추가 요구 사항과 종속성(dependancy)이
+있을 수 있습니다. 이 섹션에서는 Google Colab에서 파이토치(PyTorch) 튜토리얼을
+성공적으로 실행하기 위해 다양한 설정을 구성하는 방법에 대해 설명합니다.
+
+Google Colab의 PyTorch 버전
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+공개(release)된지 얼마되지 않은 PyTorch 버전을 사용하는 튜토리얼을 실행하는 경우,
+해당 버전이 아직 Google Colab에 반영되지 않았을 수 있습니다.
+필요한 ``torch`` 와 호환되는 도메인 라이브러리(domain library)가 설치되어 있는지
+확인하려면 ``!pip list`` 를 실행하세요.
+
+만약 필요한 PyTorch 버전보다 낮은 버전이 설치되어 있는 경우,
+다음 명령어를 실행하여 제거한 뒤, 다시 설치하세요:
+
+.. code-block:: python
+   !pip3 uninstall --yes torch torchaudio torchvision torchtext torchdata
+   !pip3 install torch torchaudio torchvision torchtext torchdata
+
+
 Colab에서 Google Drive의 튜토리얼 데이터 사용하기
-====================================================
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 사용자가 Google Colab에서 튜토리얼과 관련된 노트북을 열 수 있도록 하는 새로운
 기능이 튜토리얼에 추가되었습니다. 이 때, 보다 복잡한 튜토리얼을 실행하려면
diff --git a/beginner_source/data_loading_tutorial.py b/beginner_source/data_loading_tutorial.py
index 2f3c7e198..12c0f9f1d 100644
--- a/beginner_source/data_loading_tutorial.py
+++ b/beginner_source/data_loading_tutorial.py
@@ -83,6 +83,7 @@
 # 이미지와 랜드마크(landmark)를 보여주는 간단한 함수를 작성해보고,
 # 실제로 적용해보겠습니다.
 #
+
 def show_landmarks(image, landmarks):
     """Show image with landmarks"""
     """ 랜드마크(landmark)와 이미지를 보여줍니다. """
@@ -123,7 +124,7 @@ class FaceLandmarksDataset(Dataset):
 
     def __init__(self, csv_file, root_dir, transform=None):
         """
-        Args:
+        Arguments:
             csv_file (string): csv 파일의 경로
             root_dir (string): 모든 이미지가 존재하는 디렉토리 경로
             transform (callable, optional): 샘플에 적용될 Optional transform
@@ -181,7 +182,7 @@ def __getitem__(self, idx):
 
 ######################################################################
 # Transforms
-# ----------
+# ------------
 #
 # 위에서 볼 수 있었던 한가지 문제점은 샘플들이 다 같은 크기가 아니라는 것입니다.
 # 대부분의 신경망(neural networks)은 고정된 크기의 이미지라고 가정합니다.
@@ -291,7 +292,7 @@ def __call__(self, sample):
 # .. note::
 #     위 예시에서, `RandomCrop` 은 외부 라이브러리의 난수 생성기(random number generator; 이 경우, Numpy의 `np.random.int` )를
 #     사용하고 있습니다. 이는 `DataLoader` 가 예상치 못한 동작을 하도록 할 수 있습니다.
-#     (https://pytorch.org/docs/stable/notes/faq.html#my-data-loader-workers-return-identical-random-numbers 를 참고하세요)
+#     (`여기 <https://pytorch.org/docs/stable/notes/faq.html#my-data-loader-workers-return-identical-random-numbers>`_ 를 참고하세요)
 #     실제 상황에서는 `torch.randint` 와 같은 PyTorch가 제공하는 난수 생성기를 사용하는 것이 안전합니다.
 
 ######################################################################
@@ -398,7 +399,7 @@ def show_landmarks_batch(sample_batched):
         plt.title('Batch from dataloader')
 
 # Windows를 사용 중이라면, 다음 줄의 주석을 제거하고 for 반복문을 들여쓰기합니다.
-# "num_workers"를 0으로 변경해야 할 수도 있습니다.
+# ``num_workers`` 를 0으로 변경해야 할 수도 있습니다.
 
 # if __name__ == '__main__':
 for i_batch, sample_batched in enumerate(dataloader):
@@ -416,7 +417,7 @@ def show_landmarks_batch(sample_batched):
 
 ######################################################################
 # Afterword: torchvision
-# ----------------------
+# ------------------------
 #
 # 이번 튜토리얼에서는, 데이터셋 작성과 사용, 전이(transforms), 데이터를 불러오는 방법에 대해서 알아봤습니다.
 # ``torchvision`` 패키지는 몇몇의 일반적인 데이터셋과 전이(transforms)들을 제공합니다.
@@ -438,21 +439,21 @@ def show_landmarks_batch(sample_batched):
 # 비슷하게, ``RandomHorizontalFlip`` , ``Scale`` 과 같이  ``PIL.Image`` 에서 작동하는
 # 일반적인 전이(transforms)도 사용가능합니다. 이와 같이 데이터로더(dataloader)를 사용할 수 있습니다: ::
 #
-#   import torch
-#   from torchvision import transforms, datasets
-#
-#   data_transform = transforms.Compose([
-#           transforms.RandomSizedCrop(224),
-#           transforms.RandomHorizontalFlip(),
-#           transforms.ToTensor(),
-#           transforms.Normalize(mean=[0.485, 0.456, 0.406],
-#                                std=[0.229, 0.224, 0.225])
-#       ])
-#   hymenoptera_dataset = datasets.ImageFolder(root='hymenoptera_data/train',
-#                                              transform=data_transform)
-#   dataset_loader = torch.utils.data.DataLoader(hymenoptera_dataset,
-#                                                batch_size=4, shuffle=True,
-#                                                num_workers=4)
+#    import torch
+#    from torchvision import transforms, datasets
+#
+#    data_transform = transforms.Compose([
+#            transforms.RandomSizedCrop(224),
+#            transforms.RandomHorizontalFlip(),
+#            transforms.ToTensor(),
+#            transforms.Normalize(mean=[0.485, 0.456, 0.406],
+#                                 std=[0.229, 0.224, 0.225])
+#        ])
+#    hymenoptera_dataset = datasets.ImageFolder(root='hymenoptera_data/train',
+#                                               transform=data_transform)
+#    dataset_loader = torch.utils.data.DataLoader(hymenoptera_dataset,
+#                                                 batch_size=4, shuffle=True,
+#                                                 num_workers=4)
 #
 #  training code에 대한 예시를 알고 싶다면,
 #  :doc:`transfer_learning_tutorial` 문서를 참고해주세요
diff --git a/beginner_source/dcgan_faces_tutorial.py b/beginner_source/dcgan_faces_tutorial.py
index 0a2961bdb..624e47b3f 100644
--- a/beginner_source/dcgan_faces_tutorial.py
+++ b/beginner_source/dcgan_faces_tutorial.py
@@ -1,10 +1,10 @@
 # -*- coding: utf-8 -*-
 """
 DCGAN 튜토리얼
-==============
+================
 
 **저자**: `Nathan Inkawhich <https://github.com/inkawhich>`_
- **번역**: `조민성 <https://github.com/miNept>`_
+**번역**: `조민성 <https://github.com/miNept>`_
 
 """
 
@@ -121,22 +121,22 @@
 #
 # 몇 가지 설정값들을 정의해봅시다:
 #
-# -  **dataroot** - 데이터셋 폴더의 경로입니다. 데이터셋에 관한건 다음 섹션에서
+# -  ``dataroot`` - 데이터셋 폴더의 경로입니다. 데이터셋에 관한건 다음 섹션에서
 #    더 자세히 설명하겠습니다.
-# -  **workers** - DataLoader에서 데이터를 불러올 때 사용할 쓰레드의 개수입니다.
-# -  **batch_size** - 학습에 사용할 배치 크기입니다. DCGAN에서는 128을 사용했습니다.
-# -  **image_size** - 학습에 사용되는 이미지의 크기입니다.
+# -  ``workers`` - DataLoader에서 데이터를 불러올 때 사용할 쓰레드의 개수입니다.
+# -  ``batch_size`` - 학습에 사용할 배치 크기입니다. DCGAN에서는 128을 사용했습니다.
+# -  ``image_size`` - 학습에 사용되는 이미지의 크기입니다.
 #    본 문서에서는 64x64의 크기를 기본으로 하나, 만일 다른 크기의 이미지를 사용한다면
 #    D와 G의 구조 역시 변경되어야 합니다. 더 자세한 정보를 위해선
 #    `이곳 <https://github.com/pytorch/examples/issues/70>`__ 을 확인해 보세요.
-# -  **nc** - 입력 이미지의 색 채널개수입니다. RGB 이미지이기 때문에 3으로 설정합니다.
-# -  **nz** - 잠재공간 벡터의 원소들 개수입니다.
-# -  **ngf** - 생성자를 통과할때 만들어질 특징 데이터의 채널개수입니다.
-# -  **ndf** - 구분자를 통과할때 만들어질 특징 데이터의 채널개수입니다.
-# -  **num_epochs** - 학습시킬 에폭 수입니다. 오래 학습시키는 것이 대부분 좋은 결과를 보이지만, 당연히도 시간이 오래걸리는 것이 단점입니다.
-# -  **lr** - 모델의 학습률입니다. DCGAN에서 사용된대로 0.0002로 설정합니다.
-# -  **beta1** - Adam 옵티마이저에서 사용할 beta1 하이퍼파라미터 값입니다. 역시나 논문에서 사용한대로 0.5로 설정했습니다.
-# -  **ngpu** - 사용가능한 GPU의 번호입니다. 0으로 두면 CPU에서 학습하고, 0보다 큰 수로 설정하면 각 숫자가 가리키는 GPU로 학습시킵니다.
+# -  ``nc`` - 입력 이미지의 색 채널개수입니다. RGB 이미지이기 때문에 3으로 설정합니다.
+# -  ``nz`` - 잠재공간 벡터의 원소들 개수입니다.
+# -  ``ngf`` - 생성자를 통과할때 만들어질 특징 데이터의 채널개수입니다.
+# -  ``ndf`` - 구분자를 통과할때 만들어질 특징 데이터의 채널개수입니다.
+# -  ``num_epochs`` - 학습시킬 에폭 수입니다. 오래 학습시키는 것이 대부분 좋은 결과를 보이지만, 당연히도 시간이 오래걸리는 것이 단점입니다.
+# -  ``lr`` - 모델의 학습률입니다. DCGAN에서 사용된대로 0.0002로 설정합니다.
+# -  ``beta1`` - Adam 옵티마이저에서 사용할 beta1 하이퍼파라미터 값입니다. 역시나 논문에서 사용한대로 0.5로 설정했습니다.
+# -  ``ngpu`` - 사용가능한 GPU의 번호입니다. 0으로 두면 CPU에서 학습하고, 0보다 큰 수로 설정하면 각 숫자가 가리키는 GPU로 학습시킵니다.
 #
 
 # 데이터셋의 경로
@@ -183,10 +183,10 @@
 # 본 튜토리얼에서 사용할 데이터는 `Celeb-A Faces
 # dataset <http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html>`__ 로, 해당 링크를 이용하거나 `Google
 # Drive <https://drive.google.com/drive/folders/0B7EVK8r0v71pTUZsaXdaSnZBZzg>`__ 에서 데이터를 받을 수 있습니다.
-# 데이터를 받으면 *img_align_celeba.zip* 라는 파일을 보게될 겁니다. 다운로드가 끝나면
-# *celeba* 이라는 폴더를 새로 만들고, 해당 폴더에 해당 zip 파일을 압축해제 해주시면 됩니다.
-# 압축 해제 후, 위에서 정의한 *dataroot* 변수에 방금 만든 *celeba* 폴더의 경로를 넣어주세요.
-# 위의 작업이 끝나면 *celeba* 폴더의 구조는 다음과 같아야 합니다:
+# 데이터를 받으면 ``img_align_celeba.zip`` 라는 파일을 보게될 겁니다. 다운로드가 끝나면
+# ``celeba`` 이라는 폴더를 새로 만들고, 해당 폴더에 해당 zip 파일을 압축해제 해주시면 됩니다.
+# 압축 해제 후, 위에서 정의한 ``dataroot`` 변수에 방금 만든 ``celeba`` 폴더의 경로를 넣어주세요.
+# 위의 작업이 끝나면 ``celeba`` 폴더의 구조는 다음과 같아야 합니다:
 #
 # ::
 #
@@ -198,9 +198,10 @@
 #            -> 537394.jpg
 #               ...
 #
-# 이 과정들은 프로그램이 정상적으로 구동하기 위해서는 중요한 부분입니다. 이때 celeba 폴더안에 다시 폴더를 두는 이유는,
-# ImageFolder 클래스가 데이터셋의 최상위 폴더에 서브폴더를 요구하기 때문입니다.
-# 이제 데이터셋과 DataLoader의 설정을 끝냈습니다.
+# 이 과정들은 프로그램이 정상적으로 구동하기 위해서는 중요한 부분입니다.
+# 이때 ``celeba`` 폴더 안에 다시 폴더를 두는 이유는,
+# ``ImageFolder`` 클래스가 데이터셋의 최상위 폴더에 서브폴더를 요구하기 때문입니다.
+# 이제 ``Dataset`` 과 ``DataLoader`` 의 설정을 끝냈습니다.
 # 최종적으로 학습 데이터들을 시각화해봅시다.
 #
 
@@ -230,23 +231,23 @@
 
 ######################################################################
 # 구현
-# ----
+# ------
 #
 # 모델의 설정값들과 데이터들이 준비되었기 때문에, 드디어 모델의 구현으로
 # 들어갈 수 있을 것 같습니다. 먼저 가중치 초기화에 대해 이야기 해보고,
 # 순서대로 생성자, 구분자, 손실 함수, 학습 방법들을 알아보겠습니다.
 #
 # 가중치 초기화
-# ~~~~~~~~~~~~~~
+# ~~~~~~~~~~~~~~~~
 #
-# DCGAN 논문에서는, 평균이 0이고 분산이 0.02인 정규분포을 이용해,
-# 구분자와 생성자 모두 무작위 초기화를 진행하는 것이 좋다고 합니다.
+# DCGAN 논문에서는, 평균이 0( ``mean=0`` )이고 분산이 0.02( ``stdev=0.02`` )인
+# 정규분포을 시용해, 구분자와 생성자 모두 무작위 초기화를 진행하는 것이 좋다고 합니다.
 # ``weights_init`` 함수는 매개변수로 모델을 입력받아,
 # 모든 합성곱 계층, 전치 합성곱 계층, 배치 정규화 계층을, 위에서 말한 조건대로
 # 가중치들을 다시 초기화 시킵니다. 이 함수는 모델이 만들어지자 마자 바로 적용을
 # 시키게 됩니다.
 
-# netG와 netD에 적용시킬 커스텀 가중치 초기화 함수
+# ``netG`` 와 ``netD`` 에 적용시킬 커스텀 가중치 초기화 함수
 def weights_init(m):
     classname = m.__class__.__name__
     if classname.find('Conv') != -1:
@@ -258,7 +259,7 @@ def weights_init(m):
 
 ######################################################################
 # 생성자
-# ~~~~~~
+# ~~~~~~~~
 #
 # 생성자 :math:`G` 는 잠재 공간 벡터 :math:`z` 를, 데이터 공간으로
 # 변환시키도록 설계되었습니다. 우리에게 데이터라 함은 이미지이기 때문에,
@@ -275,9 +276,9 @@ def weights_init(m):
 # .. figure:: /_static/img/dcgan_generator.png
 #    :alt: dcgan_generator
 #
-# 우리가 설정값 섹션에서 정의한 값들이 (*nz*, *ngf*, 그리고
-# *nc*) 생성자 모델 아키텍쳐에 어떻게 영향을 끼치는지 주목해주세요. *nz* 는 z 입력 벡터의
-# 길이, *ngf* 는 생성자를 통과하는 특징 데이터의 크기, 그리고 *nc* 는 출력 이미지의
+# 우리가 설정값 섹션에서 정의한 값들이 (``nz``, ``ngf``, 그리고
+# ``nc``) 생성자 모델 아키텍쳐에 어떻게 영향을 끼치는지 주목해주세요. ``nz`` 는 z 입력 벡터의
+# 길이, ``ngf`` 는 생성자를 통과하는 특징 데이터의 크기, 그리고 ``nc`` 는 출력 이미지의
 # 채널 개수입니다 (RGB 이미지이기 때문에 3으로 설정을 했습니다).
 # 아래는 생성자의 코드입니다.
 #
@@ -293,22 +294,22 @@ def __init__(self, ngpu):
             nn.ConvTranspose2d( nz, ngf * 8, 4, 1, 0, bias=False),
             nn.BatchNorm2d(ngf * 8),
             nn.ReLU(True),
-            # 위의 계층을 통과한 데이터의 크기. (ngf*8) x 4 x 4
+            # 위의 계층을 통과한 데이터의 크기. ``(ngf*8) x 4 x 4``
             nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
             nn.BatchNorm2d(ngf * 4),
             nn.ReLU(True),
-            # 위의 계층을 통과한 데이터의 크기. (ngf*4) x 8 x 8
+            # 위의 계층을 통과한 데이터의 크기. ``(ngf*4) x 8 x 8``
             nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False),
             nn.BatchNorm2d(ngf * 2),
             nn.ReLU(True),
-            # 위의 계층을 통과한 데이터의 크기. (ngf*2) x 16 x 16
+            # 위의 계층을 통과한 데이터의 크기. ``(ngf*2) x 16 x 16``
             nn.ConvTranspose2d( ngf * 2, ngf, 4, 2, 1, bias=False),
             nn.BatchNorm2d(ngf),
             nn.ReLU(True),
-            # 위의 계층을 통과한 데이터의 크기. (ngf) x 32 x 32
+            # 위의 계층을 통과한 데이터의 크기. ``(ngf) x 32 x 32``
             nn.ConvTranspose2d( ngf, nc, 4, 2, 1, bias=False),
             nn.Tanh()
-            # 위의 계층을 통과한 데이터의 크기. (nc) x 64 x 64
+            # 위의 계층을 통과한 데이터의 크기. ``(nc) x 64 x 64``
         )
 
     def forward(self, input):
@@ -324,12 +325,12 @@ def forward(self, input):
 # 생성자를 만듭니다
 netG = Generator(ngpu).to(device)
 
-# 필요한 경우 multi-gpu를 설정 해주세요
+# 필요한 경우 multi-GPU를 설정 해주세요
 if (device.type == 'cuda') and (ngpu > 1):
     netG = nn.DataParallel(netG, list(range(ngpu)))
 
-# 모든 가중치의 평균을 0, 분산을 0.02로 초기화 하기 위해
-# weight_init 함수를 적용시킵니다
+# 모든 가중치의 평균을 0( ``mean=0`` ), 분산을 0.02( ``stdev=0.02`` )로 초기화하기 위해
+# ``weight_init`` 함수를 적용시킵니다
 netG.apply(weights_init)
 
 # 모델의 구조를 출력합니다
@@ -340,7 +341,7 @@ def forward(self, input):
 # 구분자
 # ~~~~~~
 #
-# 앞서 언급했듯, 구분자 :math:`D`는 입력 이미지가 진짜 이미지인지 (혹은 반대로 가짜 이미지인지)
+# 앞서 언급했듯, 구분자 :math:`D` 는 입력 이미지가 진짜 이미지인지 (혹은 반대로 가짜 이미지인지)
 # 판별하는 전통적인 이진 분류 신경망으로 볼 수 있습니다. 이때 :math:`D` 는
 # 3x64x64 이미지를 입력받아, Conv2d, BatchNorm2d, 그리고 LeakyReLU 계층을 통과시켜
 # 데이터를 가공시키고, 마지막 출력에서 Sigmoid 함수를 이용하여
@@ -362,22 +363,22 @@ def __init__(self, ngpu):
         super(Discriminator, self).__init__()
         self.ngpu = ngpu
         self.main = nn.Sequential(
-            # 입력 데이터의 크기는 (nc) x 64 x 64 입니다
+            # 입력 데이터의 크기는 ``(nc) x 64 x 64`` 입니다
             nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
             nn.LeakyReLU(0.2, inplace=True),
-            # 위의 계층을 통과한 데이터의 크기. (ndf) x 32 x 32
+            # 위의 계층을 통과한 데이터의 크기. ``(ndf) x 32 x 32``
             nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
             nn.BatchNorm2d(ndf * 2),
             nn.LeakyReLU(0.2, inplace=True),
-            # 위의 계층을 통과한 데이터의 크기. (ndf*2) x 16 x 16
+            # 위의 계층을 통과한 데이터의 크기. ``(ndf*2) x 16 x 16``
             nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
             nn.BatchNorm2d(ndf * 4),
             nn.LeakyReLU(0.2, inplace=True),
-            # 위의 계층을 통과한 데이터의 크기. (ndf*4) x 8 x 8
+            # 위의 계층을 통과한 데이터의 크기. ``(ndf*4) x 8 x 8``
             nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1, bias=False),
             nn.BatchNorm2d(ndf * 8),
             nn.LeakyReLU(0.2, inplace=True),
-            # 위의 계층을 통과한 데이터의 크기. (ndf*8) x 4 x 4
+            # 위의 계층을 통과한 데이터의 크기. ``(ndf*8) x 4 x 4``
             nn.Conv2d(ndf * 8, 1, 4, 1, 0, bias=False),
             nn.Sigmoid()
         )
@@ -394,12 +395,12 @@ def forward(self, input):
 # 구분자를 만듭니다
 netD = Discriminator(ngpu).to(device)
 
-# 필요한 경우 multi-gpu를 설정 해주세요
+# 필요한 경우 multi-GPU를 설정 해주세요
 if (device.type == 'cuda') and (ngpu > 1):
     netD = nn.DataParallel(netD, list(range(ngpu)))
 
-# 모든 가중치의 평균을 0, 분산을 0.02로 초기화 하기 위해
-# weight_init 함수를 적용시킵니다
+# 모든 가중치의 평균을 0( ``mean=0`` ), 분산을 0.02( ``stdev=0.02`` )로 초기화하기 위해
+# ``weight_init`` 함수를 적용시킵니다
 netD.apply(weights_init)
 
 # 모델의 구조를 출력합니다
@@ -412,7 +413,7 @@ def forward(self, input):
 #
 # :math:`D` 와 :math:`G` 의 설정을 끝냈으니, 이제 손실함수와 옵티마이저를 정하여
 # 학습을 구체화시킬 시간입니다. 손실함수로는 Binary Cross Entropy loss
-# (`BCELoss <https://pytorch.org/docs/stable/nn.html#torch.nn.BCELoss>`__)
+# (`BCELoss <https://pytorch.org/docs/stable/generated/torch.nn.BCELoss.html#torch.nn.BCELoss>`__)
 # 를 사용할겁니다. 해당함수는 아래의 식으로 파이토치에 구현되어 있습니다:
 #
 # .. math:: \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad l_n = - \left[ y_n \cdot \log x_n + (1 - y_n) \cdot \log (1 - x_n) \right]
@@ -434,7 +435,7 @@ def forward(self, input):
 # 입력하면, 그 출력값을 기반으로 생성자의 상태를 확인 할 수 있습니다.
 #
 
-# BCELoss 함수의 인스턴스를 생성합니다
+# ``BCELoss`` 함수의 인스턴스를 초기화합니다
 criterion = nn.BCELoss()
 
 # 생성자의 학습상태를 확인할 잠재 공간 벡터를 생성합니다
@@ -457,10 +458,10 @@ def forward(self, input):
 # 실제 모델을 학습시키는 방법을 알아보겠습니다. 주의를 기울일 것은, GAN을 학습시키는 건
 # 관례적인 기술들의 집합이기 때문에, 잘못된 하이퍼파라미터의 설정은
 # 모델의 학습을 망가뜨릴 수 있습니다. 무엇이 잘못되었는지 알아내는 것 조차도 힘들죠.
-# 그러한 이유로, 본 튜토리얼에서는 Goodfellow의 논문에서 서술된 Algorithm 1을 기반으로,
-# `ganhacks <https://github.com/soumith/ganhacks>`__ 에서 사용된 몇가지 괜찮은 테크닉들을
+# 그러한 이유로, 본 튜토리얼에서는 `Goodfellow’s paper <https://papers.nips.cc/paper/5423-generative-adversarial-nets.pdf>`__
+# 에서 서술된 Algorithm 1을 기반으로, `ganhacks <https://github.com/soumith/ganhacks>`__ 에서 사용된 몇가지 괜찮은 테크닉들을
 # 더할 것입니다. 앞서 몇번 설명했지만, 우리의 의도는 “진짜 혹은 가짜 이미지를 구성”하고,
-# :math:`logD(G(z))` 를 최대화하는 G의 목적함수를 최적화 시키는 겁니다. 학습과정은 크게 두가지로 나눕니다.
+# :math:`log(D(G(z)))` 를 최대화하는 G의 목적함수를 최적화 시키는 겁니다. 학습과정은 크게 두가지로 나눕니다.
 # Part 1은 구분자를, Part 2는 생성자를 업데이트하는 과정입니다.
 #
 # **Part 1 - 구분자의 학습**
@@ -468,7 +469,7 @@ def forward(self, input):
 # 구분자의 목적은 주어진 입력값이 진짜인지 가짜인지 판별하는 것임을 상기합시다.
 # Goodfellow의 말을 빌리자면, 구분자는 “변화도(gradient)를 상승(ascending)시키며 훈련”하게 됩니다.
 # 실전적으로 얘기하면, :math:`log(D(x)) + log(1-D(G(z)))` 를 최대화시키는 것과 같습니다.
-# ganhacks에서 미니 배치(mini-batch)를 분리하여 사용한 개념을 가져와서,
+# `ganhacks <https://github.com/soumith/ganhacks>`__ 에서 미니 배치(mini-batch)를 분리하여 사용한 개념을 가져와서,
 # 우리 역시 두가지 스텝으로 분리해 계산을 해보겠습니다. 먼저,
 # 진짜 데이터들로만 이루어진 배치를 만들어 :math:`D` 에 통과시킵니다. 그 출력값으로 (:math:`log(D(x))`) 의 손실값을 계산하고,
 # 역전파 과정에서의 변화도들을 계산합니다. 여기까지가 첫번째 스텝입니다. 두번째 스텝에서는, 오로지 가짜 데이터들로만
@@ -484,11 +485,11 @@ def forward(self, input):
 # 위해서는 : Part 1에서 한대로 구분자를 이용해 생성자의 출력값을 판별해주고, *진짜 라벨값* 을 이용해 G의 손실값을 구해줍니다.
 # 그러면 구해진 손실값으로 변화도를 구하고, 최종적으로는 옵티마이저를 이용해 G의 가중치들을 업데이트시켜주면 됩니다.
 # 언뜻 볼때는, 생성자가 만들어낸 *가짜* 이미지에 *진짜* 라벨을 사용하는것이 직관적으로 위배가 될테지만, 이렇게 라벨을
-# 바꿈으로써 :math:`log(x)` 라는 BCELoss의 일부분을 사용할 수 있게 합니다 (앞서 우리는 BCELoss에서 라벨을 이용해 원하는 로그 계산
+# 바꿈으로써 :math:`log(x)` 라는 ``BCELoss`` 의 일부분을 사용할 수 있게 합니다 (앞서 우리는 BCELoss에서 라벨을 이용해 원하는 로그 계산
 # 요소를 고를 수 있음을 알아봤습니다).
 #
 # 마무리로 G의 훈련 상태를 알아보기 위하여, 몇가지 통계적인 수치들과, fixed_noise를 통과시킨
-# 결과를 화면에 출력하는 코드를 추가하겠습니다. 이때 통계적인 수치들이라 함은 :
+# 결과를 화면에 출력하는 코드를 추가하겠습니다. 이때 통계적인 수치들이라 함은:
 #
 # -  **Loss_D** - 진짜 데이터와 가짜 데이터들 모두에서 구해진 손실값. (:math:`log(D(x)) + log(1 - D(G(z)))`).
 # -  **Loss_G** - 생성자의 손실값. :math:`log(D(G(z)))`
@@ -589,7 +590,7 @@ def forward(self, input):
 
 ######################################################################
 # 결과
-# ----
+# ------
 #
 # 결과를 알아봅시다. 이 섹션에서는 총 세가지를 확인할겁니다.
 # 첫번째는 G와 D의 손실값들이 어떻게 변했는가, 두번째는 매 에폭마다
@@ -653,7 +654,7 @@ def forward(self, input):
 
 ######################################################################
 # 이제 어디로 여행을 떠나볼까요?
-# ------------------------------
+# --------------------------------
 #
 # 드디어 DCGAN이 끝났습니다! 하지만 더 알아볼 것들이 많이 남아있죠.
 # 무엇을 더 시도해볼 수 있을까요?
diff --git a/beginner_source/ddp_series_fault_tolerance.rst b/beginner_source/ddp_series_fault_tolerance.rst
index e141b4a7f..2bc63d7ec 100644
--- a/beginner_source/ddp_series_fault_tolerance.rst
+++ b/beginner_source/ddp_series_fault_tolerance.rst
@@ -42,8 +42,8 @@ Follow along with the video below or on `youtube <https://www.youtube.com/watch/
 
 In distributed training, a single process failure can
 disrupt the entire training job. Since the susceptibility for failure can be higher here, making your training
-script robust is particularly important here. You might also prefer your training job to be *elastic* i.e. 
-
+script robust is particularly important here. You might also prefer your training job to be *elastic*, for example,
+compute resources can join and leave dynamically over the course of the job.
 
 PyTorch offers a utility called ``torchrun`` that provides fault-tolerance and 
 elastic training. When a failure occurs, ``torchrun`` logs the errors and
@@ -60,7 +60,7 @@ Why use ``torchrun``
 ``torchrun`` handles the minutiae of distributed training so that you
 don't need to. For instance,
 
--  You don't need to set environment variables or explicitly pass the ``rank`` and ``world_size``; torchrun assigns this along with several other `environment variables <https://pytorch.org/docs/stable/elastic/run.html#environment-variables>`__.
+-  You don't need to set environment variables or explicitly pass the ``rank`` and ``world_size``; ``torchrun`` assigns this along with several other `environment variables <https://pytorch.org/docs/stable/elastic/run.html#environment-variables>`__.
 -  No need to call ``mp.spawn`` in your script; you only need a generic ``main()`` entrypoint, and launch the script with ``torchrun``. This way the same script can be run in non-distributed as well as single-node and multinode setups. 
 -  Gracefully restarting training from the last saved training snapshot
 
@@ -117,7 +117,7 @@ Process group initialization
    -     os.environ["MASTER_PORT"] = "12355"
    -     init_process_group(backend="nccl", rank=rank, world_size=world_size)
    +     init_process_group(backend="nccl")
-
+         torch.cuda.set_device(int(os.environ["LOCAL_RANK"]))
 
 Use Torchrun-provided env variables
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/beginner_source/ddp_series_multigpu.rst b/beginner_source/ddp_series_multigpu.rst
index baf92d8f8..5d25bfa62 100644
--- a/beginner_source/ddp_series_multigpu.rst
+++ b/beginner_source/ddp_series_multigpu.rst
@@ -15,13 +15,13 @@ Authors: `Suraj Subramanian <https://github.com/suraj813>`__
       -  How to migrate a single-GPU training script to multi-GPU via DDP
       -  Setting up the distributed process group
       -  Saving and loading models in a distributed setup
-      
+
       .. grid:: 1
 
          .. grid-item::
 
             :octicon:`code-square;1.0em;` View the code used in this tutorial on `GitHub <https://github.com/pytorch/examples/blob/main/distributed/ddp-tutorial-series/multigpu.py>`__
-      
+
    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites
 
       * High-level overview of `how DDP works  <ddp_series_theory.html>`__
@@ -40,11 +40,11 @@ In the `previous tutorial <ddp_series_theory.html>`__, we got a high-level overv
 In this tutorial, we start with a single-GPU training script and migrate that to running it on 4 GPUs on a single node.
 Along the way, we will talk through important concepts in distributed training while implementing them in our code.
 
-.. note:: 
-   If your model contains any ``BatchNorm`` layer, it needs to be converted to ``SyncBatchNorm`` to sync the running stats of ``BatchNorm`` 
+.. note::
+   If your model contains any ``BatchNorm`` layers, it needs to be converted to ``SyncBatchNorm`` to sync the running stats of ``BatchNorm``
    layers across replicas.
 
-   Use the helper function 
+   Use the helper function
    `torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) <https://pytorch.org/docs/stable/generated/torch.nn.SyncBatchNorm.html#torch.nn.SyncBatchNorm.convert_sync_batchnorm>`__ to convert all ``BatchNorm`` layers in the model to ``SyncBatchNorm``.
 
 
@@ -57,7 +57,7 @@ Imports
 ~~~~~~~
 -  ``torch.multiprocessing`` is a PyTorch wrapper around Python's native
    multiprocessing
--  The dsitributed process group contains all the processes that can
+-  The distributed process group contains all the processes that can
    communicate and synchronize with each other.
 
 .. code:: diff
@@ -65,7 +65,7 @@ Imports
    import torch
    import torch.nn.functional as F
    from utils import MyTrainDataset
-    
+
    + import torch.multiprocessing as mp
    + from torch.utils.data.distributed import DistributedSampler
    + from torch.nn.parallel import DistributedDataParallel as DDP
@@ -83,6 +83,8 @@ Constructing the process group
    initializes the distributed process group.
 -  Read more about `choosing a DDP
    backend <https://pytorch.org/docs/stable/distributed.html#which-backend-to-use>`__
+-  `set_device <https://pytorch.org/docs/stable/generated/torch.cuda.set_device.html?highlight=set_device#torch.cuda.set_device>`__
+   sets the default GPU for each process. This is important to prevent hangs or excessive memory utilization on `GPU:0`
 
 .. code:: diff
 
@@ -95,6 +97,7 @@ Constructing the process group
    +   os.environ["MASTER_ADDR"] = "localhost"
    +   os.environ["MASTER_PORT"] = "12355"
    +   init_process_group(backend="nccl", rank=rank, world_size=world_size)
+   +   torch.cuda.set_device(rank)
 
 
 Constructing the DDP model
@@ -123,7 +126,7 @@ Distributing input data
    +   sampler=DistributedSampler(train_dataset),
    )
 
--  Calling the ``set_epoch()`` method on the ``DistributedSampler`` at the beginning of each epoch is necessary to make shuffling work 
+-  Calling the ``set_epoch()`` method on the ``DistributedSampler`` at the beginning of each epoch is necessary to make shuffling work
    properly across multiple epochs. Otherwise, the same ordering will be used in each epoch.
 
 .. code:: diff
@@ -138,10 +141,10 @@ Distributing input data
 
 Saving model checkpoints
 ~~~~~~~~~~~~~~~~~~~~~~~~
--  We only need to save model checkpoints from one process. Without this 
+-  We only need to save model checkpoints from one process. Without this
    condition, each process would save its copy of the identical mode. Read
    more on saving and loading models with
-   DDP `here <https://pytorch.org/tutorials/intermediate/ddp_tutorial.html#save-and-load-checkpoints>`__  
+   DDP `here <https://tutorials.pytorch.kr/intermediate/ddp_tutorial.html#save-and-load-checkpoints>`__
 
 .. code:: diff
 
@@ -156,7 +159,7 @@ Saving model checkpoints
 .. warning::
    `Collective calls <https://pytorch.org/docs/stable/distributed.html#collective-functions>`__ are functions that run on all the distributed processes,
    and they are used to gather certain states or values to a specific process. Collective calls require all ranks to run the collective code.
-   In this example, `_save_checkpoint` should not have any collective calls because it is only run on the ``rank:0`` process. 
+   In this example, `_save_checkpoint` should not have any collective calls because it is only run on the ``rank:0`` process.
    If you need to make any collective calls, it should be before the ``if self.gpu_id == 0`` check.
 
 
@@ -167,7 +170,7 @@ Running the distributed training job
    ``world_size``.
 -  ``rank`` is auto-allocated by DDP when calling
    `mp.spawn <https://pytorch.org/docs/stable/multiprocessing.html#spawning-subprocesses>`__.
--  ``world_size`` is the number of processes across the training job. For GPU training, 
+-  ``world_size`` is the number of processes across the training job. For GPU training,
    this corresponds to the number of GPUs in use, and each process works on a dedicated GPU.
 
 .. code:: diff
@@ -177,11 +180,11 @@ Running the distributed training job
    +  ddp_setup(rank, world_size)
       dataset, model, optimizer = load_train_objs()
       train_data = prepare_dataloader(dataset, batch_size=32)
-   -  trainer = Trainer(model, dataset, optimizer, device, save_every)
-   +  trainer = Trainer(model, dataset, optimizer, rank, save_every)
+   -  trainer = Trainer(model, train_data, optimizer, device, save_every)
+   +  trainer = Trainer(model, train_data, optimizer, rank, save_every)
       trainer.train(total_epochs)
    +  destroy_process_group()
-    
+
    if __name__ == "__main__":
       import sys
       total_epochs = int(sys.argv[1])
@@ -198,6 +201,6 @@ Further Reading
 
 -  `Fault Tolerant distributed training <ddp_series_fault_tolerance.html>`__  (next tutorial in this series)
 -  `Intro to DDP <ddp_series_theory.html>`__ (previous tutorial in this series)
--  `Getting Started with DDP <https://pytorch.org/tutorials/intermediate/ddp_tutorial.html>`__ 
+-  `Getting Started with DDP <https://tutorials.pytorch.kr/intermediate/ddp_tutorial.html>`__
 -  `Process Group
    initialization <https://pytorch.org/docs/stable/distributed.html#tcp-initialization>`__
diff --git a/beginner_source/ddp_series_theory.rst b/beginner_source/ddp_series_theory.rst
index 8e4935231..963f8f9c6 100644
--- a/beginner_source/ddp_series_theory.rst
+++ b/beginner_source/ddp_series_theory.rst
@@ -19,7 +19,7 @@ Authors: `Suraj Subramanian <https://github.com/suraj813>`__
 
    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites
 
-      * Familiarity with `basic non-distributed training  <https://pytorch.org/tutorials/beginner/basics/quickstart_tutorial.html>`__ in PyTorch
+      * Familiarity with `basic non-distributed training  <https://tutorials.pytorch.kr/beginner/basics/quickstart_tutorial.html>`__ in PyTorch
 
 Follow along with the video below or on `youtube <https://www.youtube.com/watch/Cvdhwx-OBBo>`__.
 
@@ -40,7 +40,7 @@ algorithm <https://tech.preferred.jp/en/blog/technologies-behind-distributed-dee
 Why you should prefer DDP over DataParallel (DP)
 -------------------------------------------------
 
-`DataParallel <https://pytorch.org/docs/stable/generated/torch.nn.DataParallel.html>`__ 
+`DataParallel <https://pytorch.org/docs/stable/generated/torch.nn.DataParallel.html>`__
 is an older approach to data parallelism. DP is trivially simple (with just one extra line of code) but it is much less performant.
 DDP improves upon the architecture in a few ways:
 
@@ -54,8 +54,8 @@ DDP improves upon the architecture in a few ways:
 |                                       | machines                     |
 +---------------------------------------+------------------------------+
 | Slower; uses multithreading on a      | Faster (no GIL contention)   |
-| single process and runs into GIL      | because it uses              |
-| contention                            | multiprocessing              |
+| single process and runs into Global   | because it uses              |
+| Interpreter Lock (GIL) contention     | multiprocessing              |
 +---------------------------------------+------------------------------+
 
 Further Reading
diff --git a/beginner_source/deploy_seq2seq_hybrid_frontend_tutorial.py b/beginner_source/deploy_seq2seq_hybrid_frontend_tutorial.py
index 0812992f0..17b7d663b 100644
--- a/beginner_source/deploy_seq2seq_hybrid_frontend_tutorial.py
+++ b/beginner_source/deploy_seq2seq_hybrid_frontend_tutorial.py
@@ -39,7 +39,7 @@
 # the Python runtime.
 #
 # The API for converting eager-mode PyTorch programs into TorchScript is
-# found in the torch.jit module. This module has two core modalities for
+# found in the ``torch.jit`` module. This module has two core modalities for
 # converting an eager-mode model to a TorchScript graph representation:
 # **tracing** and **scripting**. The ``torch.jit.trace`` function takes a
 # module or function and a set of example inputs. It then runs the example
@@ -74,18 +74,18 @@
 
 
 ######################################################################
-# Acknowledgements
+# Acknowledgments
 # ----------------
 #
 # This tutorial was inspired by the following sources:
 #
-# 1) Yuan-Kuei Wu’s pytorch-chatbot implementation:
+# 1) Yuan-Kuei Wu's pytorch-chatbot implementation:
 #    https://github.com/ywk991112/pytorch-chatbot
 #
-# 2) Sean Robertson’s practical-pytorch seq2seq-translation example:
+# 2) Sean Robertson's practical-pytorch seq2seq-translation example:
 #    https://github.com/spro/practical-pytorch/tree/master/seq2seq-translation
 #
-# 3) FloydHub’s Cornell Movie Corpus preprocessing code:
+# 3) FloydHub's Cornell Movie Corpus preprocessing code:
 #    https://github.com/floydhub/textutil-preprocess-cornell-movie-corpus
 #
 
@@ -290,7 +290,7 @@ def __init__(self, hidden_size, embedding, n_layers=1, dropout=0):
         self.hidden_size = hidden_size
         self.embedding = embedding
 
-        # Initialize GRU; the input_size and hidden_size params are both set to 'hidden_size'
+        # Initialize GRU; the ``input_size`` and ``hidden_size`` parameters are both set to 'hidden_size'
         #   because our input size is a word embedding with number of features == hidden_size
         self.gru = nn.GRU(hidden_size, hidden_size, n_layers,
                           dropout=(0 if n_layers == 1 else dropout), bidirectional=True)
@@ -525,7 +525,7 @@ def forward(self, input_step, last_hidden, encoder_outputs):
 #       we can use function type annotations as introduced in `PEP
 #       3107 <https://www.python.org/dev/peps/pep-3107/>`__. In addition,
 #       it is possible to declare arguments of different types using
-#       MyPy-style type annotations (see
+#       Mypy-style type annotations (see
 #       `doc <https://pytorch.org/docs/master/jit.html#types>`__).
 #
 #
@@ -618,7 +618,7 @@ def evaluate(searcher, voc, sentence, max_length=MAX_LENGTH):
     return decoded_words
 
 
-# Evaluate inputs from user input (stdin)
+# Evaluate inputs from user input (``stdin``)
 def evaluateInput(searcher, voc):
     input_sentence = ''
     while(1):
@@ -638,7 +638,7 @@ def evaluateInput(searcher, voc):
         except KeyError:
             print("Error: Encountered unknown word.")
 
-# Normalize input sentence and call evaluate()
+# Normalize input sentence and call ``evaluate()``
 def evaluateExample(sentence, searcher, voc):
     print("> " + sentence)
     # Normalize sentence
@@ -653,7 +653,7 @@ def evaluateExample(sentence, searcher, voc):
 # Load Pretrained Parameters
 # --------------------------
 #
-# Ok, its time to load our model!
+# No, let's load our model!
 #
 # Use hosted model
 # ~~~~~~~~~~~~~~~~
@@ -671,7 +671,7 @@ def evaluateExample(sentence, searcher, voc):
 # Use your own model
 # ~~~~~~~~~~~~~~~~~~
 #
-# To load your own pre-trained model:
+# To load your own pretrained model:
 #
 # 1) Set the ``loadFilename`` variable to the path to the checkpoint file
 #    that you wish to load. Note that if you followed the convention for
@@ -691,9 +691,9 @@ def evaluateExample(sentence, searcher, voc):
 # ~~~~~~~~~~~~~~~~~~~~~~
 #
 # Notice that we initialize and load parameters into our encoder and
-# decoder models as usual. If you are using tracing mode(`torch.jit.trace`)
-# for some part of your models, you must call .to(device) to set the device
-# options of the models and .eval() to set the dropout layers to test mode
+# decoder models as usual. If you are using tracing mode(``torch.jit.trace``)
+# for some part of your models, you must call ``.to(device)`` to set the device
+# options of the models and ``.eval()`` to set the dropout layers to test mode
 # **before** tracing the models. `TracedModule` objects do not inherit the
 # ``to`` or ``eval`` methods. Since in this tutorial we are only using
 # scripting instead of tracing, we only need to do this before we do
@@ -706,7 +706,7 @@ def evaluateExample(sentence, searcher, voc):
 # Configure models
 model_name = 'cb_model'
 attn_model = 'dot'
-#attn_model = 'general'
+#attn_model = 'general'``
 #attn_model = 'concat'
 hidden_size = 500
 encoder_n_layers = 2
@@ -717,7 +717,13 @@ def evaluateExample(sentence, searcher, voc):
 # If you're loading your own model
 # Set checkpoint to load from
 checkpoint_iter = 4000
-# loadFilename = os.path.join(save_dir, model_name, corpus_name,
+
+#############################################################
+# Sample code to load from a checkpoint:
+#
+# .. code-block:: python
+#
+#    loadFilename = os.path.join(save_dir, model_name, corpus_name,
 #                             '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size),
 #                             '{}_checkpoint.tar'.format(checkpoint_iter))
 
@@ -743,13 +749,13 @@ def evaluateExample(sentence, searcher, voc):
 # Initialize encoder & decoder models
 encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
 decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout)
-# Load trained model params
+# Load trained model parameters
 encoder.load_state_dict(encoder_sd)
 decoder.load_state_dict(decoder_sd)
 # Use appropriate device
 encoder = encoder.to(device)
 decoder = decoder.to(device)
-# Set dropout layers to eval mode
+# Set dropout layers to ``eval`` mode
 encoder.eval()
 decoder.eval()
 print('Models built and ready to go!')
@@ -794,7 +800,7 @@ def evaluateExample(sentence, searcher, voc):
 # data-dependent control flow. In the case of scripting, we do necessary
 # language changes to make sure the implementation complies with
 # TorchScript. We initialize the scripted searcher the same way that we
-# would initialize an un-scripted variant.
+# would initialize an unscripted variant.
 #
 
 ### Compile the whole greedy search model to TorchScript model
@@ -847,7 +853,7 @@ def evaluateExample(sentence, searcher, voc):
 
 # Use appropriate device
 scripted_searcher.to(device)
-# Set dropout layers to eval mode
+# Set dropout layers to ``eval`` mode
 scripted_searcher.eval()
 
 # Evaluate examples
@@ -855,8 +861,8 @@ def evaluateExample(sentence, searcher, voc):
 for s in sentences:
     evaluateExample(s, scripted_searcher, voc)
 
-# Evaluate your input
-#evaluateInput(traced_encoder, traced_decoder, scripted_searcher, voc)
+# Evaluate your input by running
+# ``evaluateInput(traced_encoder, traced_decoder, scripted_searcher, voc)``
 
 
 ######################################################################
diff --git a/beginner_source/dist_overview.rst b/beginner_source/dist_overview.rst
index 11b32a363..542eb31e0 100644
--- a/beginner_source/dist_overview.rst
+++ b/beginner_source/dist_overview.rst
@@ -3,7 +3,7 @@ PyTorch Distributed Overview
 **Author**: `Shen Li <https://mrshenli.github.io/>`_
 
 .. note::
-   |edit| View and edit this tutorial in `github <https://github.com/pytorch/tutorials/blob/master/beginner_source/dist_overview.rst>`__.
+   |edit| View and edit this tutorial in `github <https://github.com/pytorch/tutorials/blob/main/beginner_source/dist_overview.rst>`__.
 
 This is the overview page for the ``torch.distributed`` package. The goal of
 this page is to categorize documents into different topics and briefly
@@ -126,7 +126,7 @@ DDP materials are listed below:
    described in the
    `Single-Machine Model Parallel Best Practices <../intermediate/model_parallel_tutorial.html>`__
    tutorial.
-3. The `Launching and configuring distributed data parallel applications <https://github.com/pytorch/examples/blob/stable/distributed/ddp/README.md>`__
+3. The `Launching and configuring distributed data parallel applications <https://github.com/pytorch/examples/blob/main/distributed/ddp/README.md>`__
    document shows how to use the DDP launching script.
 4. The `Shard Optimizer States With ZeroRedundancyOptimizer <../recipes/zero_redundancy_optimizer.html>`__
    recipe demonstrates how `ZeroRedundancyOptimizer <https://pytorch.org/docs/stable/distributed.optim.html>`__
diff --git a/beginner_source/fgsm_tutorial.py b/beginner_source/fgsm_tutorial.py
index 2257a24a1..45c72972f 100644
--- a/beginner_source/fgsm_tutorial.py
+++ b/beginner_source/fgsm_tutorial.py
@@ -13,15 +13,15 @@
 이 튜토리얼은 ML 모델들의 보안 취약점에 대한 인식을 높이고, 요즘 화두가 되고있는 적대적 머신 러닝에 대한 통찰력을 제공할 것입니다.
 이미지에 눈치챌 수 없는 작은 변화(perturbation)를 추가하면 모델 성능이 크게 달라질 수 있다는 사실에 놀랄 수 있습니다.
 이번 튜토리얼에서는 이미지 분류기의 예제를 통해 위 내용에 대해 살펴볼 것입니다.
-특히 우리는 가장 많이 사용되는 공격 방법 중 하나인 FGSM (Fast Gradient Sign Attack)을 이용해 MNIST 분류기를 속여 볼 것입니다. 
- 
+특히 우리는 가장 많이 사용되는 공격 방법 중 하나인 FGSM (Fast Gradient Sign Attack)을 이용해 MNIST 분류기를 속여 볼 것입니다.
+
 """
 
 
 ######################################################################
 # 위협 모델
 # ------------
-# 
+#
 # 상황에 따라 다양한 범주의 적대적 공격이 있는데 각각 목표가 다르고 공격자가 알고 있는 정보
 # 대한 가정도 다릅니다. 그러나 보통 가장 중요한 목표는 입력 데이터에 최소한의 작은 변화를
 # 추가하여 이것이 의도적으로 잘못 분류되게 하는 것입니다. 공격자가 가지고 있는 정보에 대한
@@ -37,10 +37,10 @@
 #
 # 이 경우 FGSM 공격은 *오분류* 를 목표로 하는 화이트 박스 공격입니다.
 # 이런 배경 정보를 갖고 공격에 대해 자세히 알아 보겠습니다.
-# 
+#
 # 빠른 변화도 부호 공격
 # -------------------------
-# 
+#
 # 공격 방법에 있어 초기 방식이면서 가장 유명한 방식은 *빠른 변화도 부호 공격 (FGSM)* 이라고 하며
 # `적대적 예제에 대한 설명과 활용 <https://arxiv.org/abs/1412.6572>`__ 에서
 # 이안 갓펠로우가 기고하였습니다.
@@ -95,16 +95,16 @@
 #
 # 이 학습서에는 입력이 3 개이며 다음과 같이 정의됩니다:
 #
-# - **epsilons** - 실행에 사용할 엡실론의 리스트입니다. 엡실론 0의 값은 원래 테스트 셋의 모델 성능을
+# - ``epsilons`` - 실행에 사용할 엡실론의 리스트입니다. 엡실론 0의 값은 원래 테스트 셋의 모델 성능을
 #   나타내므로 목록에 유지하는 것이 중요합니다. 또한 직관적으로 엡실론이 클수록 작은 변화가 더 눈에 띄지만
 #   모델 정확도를 저하 시키는 측면에서 더 효과가 있습니다. 여기서 데이터의 범위는 0-1 이기 때문에
 #   엡실론의 값은 1을 초과할 수 없습니다.
 #
-# - **pretrained_model** - `pytorch/examples/mnist <https://github.com/pytorch/examples/tree/master/mnist>`__
+# - ``pretrained_model`` - `pytorch/examples/mnist <https://github.com/pytorch/examples/tree/master/mnist>`__
 #    를 통해 미리 학습된 MNIST 모델의 경로.
 #    튜토리얼을 간편하게 하려면 `여기 <https://drive.google.com/drive/folders/1fn83DF14tWmit0RTKWRhPq5uVXt73e0h?usp=sharing>`__ 에서 미리 학습된 모델을 다운로드하세요.
 #
-# -  **use_cuda** - CUDA 를 사용할지 말지 정하는 이진 플래그.
+# -  ``use_cuda`` - CUDA 를 사용할지 말지 정하는 이진 플래그.
 #    본 튜토리얼에서는 CPU 시간이 오래 걸리지 않으므로 CUDA를 지원하는 GPU 의 여부는 중요하지 않습니다.
 #
 
@@ -301,7 +301,7 @@ def test( model, device, test_loader, epsilon ):
 # :math:`\epsilon=0.2` 에서의 정확도는 :math:`\epsilon=0.15` 보다 약 25% 정도 낮습니다.
 # 또한, :math:`\epsilon=0.25` 와 :math:`\epsilon=0.3` 사이의 모델 정확도는 랜덤으로
 # 10개중 1개를 선택했을 때의 정확도와 유사한 수준입니다.
-# 
+#
 
 plt.figure(figsize=(5,5))
 plt.plot(epsilons, accuracies, "*-")
@@ -325,7 +325,7 @@ def test( model, device, test_loader, epsilon ):
 # 각 이미지의 위의 글자는 "원래 분류 결과 -> 적대적 분류 결과"를 나타냅니다.
 # :math:`\epsilon=0.15` 에서 작은 변화가 눈에 띄기 시작하고 :math:`\epsilon=0.3` 에서는 확실해 보입니다.
 # 그러나 모든 경우에 대해서 노이즈가 추가되었더라도 사람은 올바르게 분류를 수행할 수 있습니다.
-# 
+#
 
 # 각 엡실론에서 적대적 샘플의 몇 가지 예를 도식화합니다
 cnt = 0
diff --git a/beginner_source/flava_finetuning_tutorial.py b/beginner_source/flava_finetuning_tutorial.py
index 92bc50317..12e20f475 100644
--- a/beginner_source/flava_finetuning_tutorial.py
+++ b/beginner_source/flava_finetuning_tutorial.py
@@ -24,7 +24,7 @@
 ######################################################################
 # Installation
 # -----------------
-# We will use TextVQA dataset and bert tokenizer from HuggingFace for this
+# We will use TextVQA dataset and ``bert tokenizer`` from Hugging Face for this
 # tutorial. So you need to install datasets and transformers in addition to TorchMultimodal.
 #
 # .. note::
@@ -40,21 +40,21 @@
 #
 
 ######################################################################
-# Steps 
+# Steps
 # -----
-# 
-# 1. Download the HuggingFace dataset to a directory on your computer by running the following command:
-# 
+#
+# 1. Download the Hugging Face dataset to a directory on your computer by running the following command:
+#
 #    .. code-block::
-# 
+#
 #       wget http://dl.fbaipublicfiles.com/pythia/data/vocab.tar.gz 
 #       tar xf vocab.tar.gz
-# 
+#
 #    .. note:: 
 #       If you are running this tutorial in Google Colab, run these commands
 #       in a new cell and prepend these commands with an exclamation mark (!)
 #
-#  
+#
 # 2. For this tutorial, we treat VQA as a classification task where
 #    the inputs are images and question (text) and the output is an answer class. 
 #    So we need to download the vocab file with answer classes and create the answer to
@@ -62,7 +62,7 @@
 #
 #    We also load the `textvqa
 #    dataset <https://arxiv.org/pdf/1904.08920.pdf>`__ containing 34602 training samples
-#    (images,questions and answers) from HuggingFace
+#    (images,questions and answers) from Hugging Face
 #
 # We see there are 3997 answer classes including a class representing
 # unknown answers.
@@ -98,8 +98,8 @@
 # 3. Next, we write the transform function to convert the image and text into
 # Tensors consumable by our model - For images, we use the transforms from
 # torchvision to convert to Tensor and resize to uniform sizes - For text,
-# we tokenize (and pad) them using the BertTokenizer from HuggingFace -
-# For answers (i.e. labels), we take the most frequently occuring answer
+# we tokenize (and pad) them using the ``BertTokenizer`` from Hugging Face -
+# For answers (i.e. labels), we take the most frequently occurring answer
 # as the label to train with:
 #
 
@@ -133,8 +133,8 @@ def transform(tokenizer, input):
 
 
 ######################################################################
-# 4. Finally, we import the flava_model_for_classification from
-# torchmultimodal. It loads the pretrained flava checkpoint by default and
+# 4. Finally, we import the ``flava_model_for_classification`` from
+# ``torchmultimodal``. It loads the pretrained FLAVA checkpoint by default and
 # includes a classification head.
 #
 # The model forward function passes the image through the visual encoder
@@ -172,7 +172,7 @@ def transform(tokenizer, input):
     loss.backward()
     optimizer.step()
     print(f"Loss at step {idx} = {loss}")
-    if idx > MAX_STEPS-1:
+    if idx >= MAX_STEPS-1:
       break
 
 
diff --git a/beginner_source/former_torchies/parallelism_tutorial.py b/beginner_source/former_torchies/parallelism_tutorial.py
index 9e1c5f29b..ac7c8ec83 100644
--- a/beginner_source/former_torchies/parallelism_tutorial.py
+++ b/beginner_source/former_torchies/parallelism_tutorial.py
@@ -130,7 +130,7 @@ def forward(self, x):
 # -  `포럼에서 PyTorch에 대해 얘기하기`_
 # -  `Slack에서 다른 사용자와 대화하기`_
 #
-# .. _`PyTorch로 딥러닝하기 : 60분만에 끝장내기`: https://github.com/pytorch/tutorials/blob/master/Deep%20Learning%20with%20PyTorch.ipynb
+# .. _`PyTorch로 딥러닝하기 : 60분만에 끝장내기`: https://github.com/pytorch/tutorials/blob/main/Deep%20Learning%20with%20PyTorch.ipynb
 # .. _imagenet으로 최첨단(state-of-the-art) ResNet 신경망 학습시키기: https://github.com/pytorch/examples/tree/master/imagenet
 # .. _적대적 생성 신경망으로 얼굴 생성기 학습시키기: https://github.com/pytorch/examples/tree/master/dcgan
 # .. _순환 LSTM 네트워크를 사용해 단어 단위 언어 모델 학습시키기: https://github.com/pytorch/examples/tree/master/word_language_model
diff --git a/beginner_source/hyperparameter_tuning_tutorial.py b/beginner_source/hyperparameter_tuning_tutorial.py
index 7f45e0d58..3d4b57601 100644
--- a/beginner_source/hyperparameter_tuning_tutorial.py
+++ b/beginner_source/hyperparameter_tuning_tutorial.py
@@ -1,28 +1,38 @@
 # -*- coding: utf-8 -*-
 """
 Ray Tune을 이용한 하이퍼파라미터 튜닝
-===================================
+======================================
+
 **번역**: `심형준 <http://github.com/95hj>`_
-하이퍼파라미터 튜닝은 보통의 모델과 매우 정확한 모델간의 차이를 만들어 낼 수 있습니다. 
+
+하이퍼파라미터 튜닝은 보통의 모델과 매우 정확한 모델간의 차이를 만들어 낼 수 있습니다.
 종종 다른 학습률(Learnig rate)을 선택하거나 layer size를 변경하는 것과 같은 간단한 작업만으로도 모델 성능에 큰 영향을 미치기도 합니다.
+
 다행히, 최적의 매개변수 조합을 찾는데 도움이 되는 도구가 있습니다.
-`Ray Tune <https://docs.ray.io/en/latest/tune.html>`_ 은 분산 하이퍼파라미터 튜닝을 위한 업계 표준 도구입니다. 
+`Ray Tune <https://docs.ray.io/en/latest/tune.html>`_ 은 분산 하이퍼파라미터 튜닝을 위한 업계 표준 도구입니다.
 Ray Tune은 최신 하이퍼파라미터 검색 알고리즘을 포함하고 TensorBoard 및 기타 분석 라이브러리와 통합되며 기본적으로
-`Ray' 의 분산 기계 학습 엔진
-<https://ray.io/>`_ 을 통해 교육을 지원합니다.
+`Ray 의 분산 기계 학습 엔진 <https://ray.io/>`_ 을 통해 학습을 지원합니다.
+
 이 튜토리얼은 Ray Tune을 파이토치 학습 workflow에 통합하는 방법을 알려줍니다.
 CIFAR10 이미지 분류기를 훈련하기 위해 `파이토치 문서에서 이 튜토리얼을 <https://tutorials.pytorch.kr/beginner/blitz/cifar10_tutorial.html>`_ 확장할 것입니다.
+
 아래와 같이 약간의 수정만 추가하면 됩니다.
+
 1. 함수에서 데이터 로딩 및 학습 부분을 감싸두고,
 2. 일부 네트워크 파라미터를 구성 가능하게 하고,
 3. 체크포인트를 추가하고 (선택 사항),
 4. 모델 튜닝을 위한 검색 공간을 정의합니다.
+
 |
-이 튜토리얼을 실행하기 위해 아래의 패키지가 설치되어 있는지 확인하십시오.
+
+이 튜토리얼을 실행하기 위해 아래의 패키지가 설치되어 있는지 확인하세요:
+
 -  ``ray[tune]``: 배포된 하이퍼파라미터 튜닝 라이브러리
--  ``torchvision``: 데이터 트랜스포머의 경우
-설정 / Imports
----------------
+-  ``torchvision``: 데이터 변형을 위해 필요
+
+설정 / 불러오기
+-----------------
+
 import들로 시작합니다.
 """
 from functools import partial
@@ -40,12 +50,12 @@
 from ray.tune.schedulers import ASHAScheduler
 
 ######################################################################
-# 대부분의 import들은 파이토치 모델을 빌드하는데 필요합니다. 
+# 대부분의 import들은 파이토치 모델을 빌드하는데 필요합니다.
 # 마지막 세 개의 import들만 Ray Tune을 사용하기 위한 것입니다.
 #
 # Data loaders
-# ------------
-# data loader를 자체 함수로 감싸두고 전역 데이터 디렉토리로 전달합니다. 
+# -------------
+# data loader를 자체 함수로 감싸두고 전역 데이터 디렉토리로 전달합니다.
 # 이런 식으로 서로 다른 실험들 간에 데이터 디렉토리를 공유할 수 있습니다.
 
 
@@ -66,8 +76,8 @@ def load_data(data_dir="./data"):
 ######################################################################
 # 구성 가능한 신경망
 # ---------------------------
-# 구성 가능한 파라미터만 튜닝이 가능합니다. 
-# 이 예시를 통해 fully connected layer 크기를 지정할 수 있습니다.
+# 구성 가능한 파라미터만 튜닝이 가능합니다.
+# 이 예시를 통해 fully connected layer 크기를 지정할 수 있습니다:
 
 
 class Net(nn.Module):
@@ -92,12 +102,12 @@ def forward(self, x):
 ######################################################################
 # 학습 함수
 # ------------------
-# 흥미롭게 하기 위해 `파이토치 문서에서 <https://tutorials.pytorch.kr/beginner/blitz/cifar10_tutorial.html>`_ 
-# 예제에 일부를 변경하여 소개합니다.
+# 흥미를 더해보고자 `파이토치 문서의 예제 <https://tutorials.pytorch.kr/beginner/blitz/cifar10_tutorial.html>`_
+# 일부를 변경하여 소개합니다.
 #
-# 훈련 스크립트를 ``train_cifar(config, checkpoint_dir=None, data_dir=None)`` 함수로 감싸둡니다. 
+# 학습 스크립트를 ``train_cifar(config, checkpoint_dir=None, data_dir=None)`` 함수로 감싸둡니다.
 # 짐작할 수 있듯이, ``config`` 매개변수는 훈련할 하이퍼파라미터를 받습니다. ``checkpoint_dir`` 매개변수는 체크포인트를
-# 복원하는 데 사용됩니다. ``data_dir`` 은 데이터를 읽고 저장하는 디렉토리를 지정하므로, 
+# 복원하는 데 사용됩니다. ``data_dir`` 은 데이터를 읽고 저장하는 디렉토리를 지정하므로,
 # 여러 실행들이 동일한 데이터 소스를 공유할 수 있습니다.
 #
 # .. code-block:: python
@@ -116,12 +126,12 @@ def forward(self, x):
 #
 #     optimizer = optim.SGD(net.parameters(), lr=config["lr"], momentum=0.9)
 #
-# 또한 학습 데이터를 학습 및 검증 세트로 나눕니다. 따라서 데이터의 80%는 모델 학습에 사용하고, 
+# 또한 학습 데이터를 학습 및 검증 세트로 나눕니다. 따라서 데이터의 80%는 모델 학습에 사용하고,
 # 나머지 20%에 대해 유효성 검사 및 손실을 계산합니다. 학습 및 테스트 세트를 반복하는 배치 크기도 구성할 수 있습니다.
 #
 # DataParallel을 이용한 GPU(다중)지원 추가
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-# 이미지 분류는 GPU를 사용할 때 이점이 많습니다. 운좋게도 Ray Tune에서 파이토치의 추상화를 계속 사용할 수 있습니다. 
+# 이미지 분류는 GPU를 사용할 때 이점이 많습니다. 운좋게도 Ray Tune에서 파이토치의 추상화를 계속 사용할 수 있습니다.
 # 따라서 여러 GPU에서 데이터 병렬 훈련을 지원하기 위해 모델을 ``nn.DataParallel`` 으로 감쌀 수 있습니다.
 #
 # .. code-block:: python
@@ -133,7 +143,7 @@ def forward(self, x):
 #             net = nn.DataParallel(net)
 #     net.to(device)
 #
-# ``device`` 변수를 사용하여 사용 가능한 GPU가 없을 때도 학습이 가능한지 확인합니다. 
+# ``device`` 변수를 사용하여 사용 가능한 GPU가 없을 때도 학습이 가능한지 확인합니다.
 # 파이토치는 다음과 같이 데이터를 GPU메모리에 명시적으로 보내도록 요구합니다.
 #
 # .. code-block:: python
@@ -142,8 +152,8 @@ def forward(self, x):
 #         inputs, labels = data
 #         inputs, labels = inputs.to(device), labels.to(device)
 #
-# 이 코드는 이제 CPU들, 단일 GPU 및 다중 GPU에 대한 학습을 지원합니다. 
-# 특히 Ray는 `부분GPU <https://docs.ray.io/en/master/using-ray-with-gpus.html#fractional-gpus>`_ 도 지원하므로 
+# 이 코드는 이제 CPU들, 단일 GPU 및 다중 GPU에 대한 학습을 지원합니다.
+# 특히 Ray는 `fractional-GPU <https://docs.ray.io/en/master/using-ray-with-gpus.html#fractional-gpus>`_ 도 지원하므로
 # 모델이 GPU 메모리에 적합한 상황에서는 테스트 간에 GPU를 공유할 수 있습니다. 이는 나중에 다룰 것입니다.
 #
 # Ray Tune과 소통하기
@@ -159,17 +169,17 @@ def forward(self, x):
 #
 #     tune.report(loss=(val_loss / val_steps), accuracy=correct / total)
 #
-# 여기서 먼저 체크포인트를 저장한 다음 일부 메트릭을 Ray Tune에 다시 보냅니다. 특히, validation loss와 accuracy를 
-# Ray Tune으로 다시 보냅니다. 그 후 Ray Tune은 이러한 메트릭을 사용하여 최상의 결과를 유도하는 하이퍼파라미터 구성을 
+# 여기서 먼저 체크포인트를 저장한 다음 일부 메트릭을 Ray Tune에 다시 보냅니다. 특히, validation loss와 accuracy를
+# Ray Tune으로 다시 보냅니다. 그 후 Ray Tune은 이러한 메트릭을 사용하여 최상의 결과를 유도하는 하이퍼파라미터 구성을
 # 결정할 수 있습니다. 이러한 메트릭들은 또한 리소스 낭비를 방지하기 위해 성능이 좋지 않은 실험을 조기에 중지하는 데 사용할 수 있습니다.
 #
-# 체크포인트 저장은 선택사항이지만 `Population Based Training <https://docs.ray.io/en/master/tune/tutorials/tune-advanced-tutorial.html>`_ 
+# 체크포인트 저장은 선택사항이지만 `Population Based Training <https://docs.ray.io/en/master/tune/tutorials/tune-advanced-tutorial.html>`_
 # 과 같은 고급 스케줄러를 사용하려면 필요합니다. 또한 체크포인트를 저장하면 나중에 학습된 모델을 로드하고 평가 세트(test set)에서 검증할 수 있습니다.
 #
 # Full training function
-# ~~~~~~~~~~~~~~~~~~~~~~
+# ~~~~~~~~~~~~~~~~~~~~~~~~
 #
-# 전체 코드 예제는 다음과 같습니다.
+# 전체 예제 코드는 다음과 같습니다.
 
 
 def train_cifar(config, checkpoint_dir=None, data_dir=None):
@@ -264,7 +274,7 @@ def train_cifar(config, checkpoint_dir=None, data_dir=None):
 #
 # Test set 정확도(accuracy)
 # -----------------
-# 일반적으로 머신러닝 모델의 성능은 모델 학습에 사용되지 않은 데이터를 사용해 테스트합니다. 
+# 일반적으로 머신러닝 모델의 성능은 모델 학습에 사용되지 않은 데이터를 사용해 테스트합니다.
 # Test set 또한 함수로 감싸둘 수 있습니다.
 
 
@@ -303,11 +313,11 @@ def test_accuracy(net, device="cpu"):
 #         "batch_size": tune.choice([2, 4, 8, 16])
 #     }
 #
-# ``tune.sample_from()`` 함수를 사용하면 고유한 샘플 방법을 정의하여 하이퍼파라미터를 얻을 수 있습니다. 
-# 이 예제에서 ``l1`` 과 ``l2`` 파라미터는 4와 256 사이의 2의 거듭제곱이어야 하므로 4, 8, 16, 32, 64, 128, 256입니다. 
+# ``tune.sample_from()`` 함수를 사용하면 고유한 샘플 방법을 정의하여 하이퍼파라미터를 얻을 수 있습니다.
+# 이 예제에서 ``l1`` 과 ``l2`` 파라미터는 4와 256 사이의 2의 거듭제곱이어야 하므로 4, 8, 16, 32, 64, 128, 256입니다.
 # ``lr`` (학습률)은 0.0001과 0.1 사이에서 균일하게 샘플링 되어야 합니다. 마지막으로, 배치 크기는 2, 4, 8, 16중에서 선택할 수 있습니다.
 #
-# 각 실험에서, Ray Tune은 이제 이러한 검색 공간에서 매개변수 조합을 무작위로 샘플링합니다. 
+# 각 실험에서, Ray Tune은 이제 이러한 검색 공간에서 매개변수 조합을 무작위로 샘플링합니다.
 # 그런 다음 여러 모델을 병렬로 훈련하고 이 중에서 가장 성능이 좋은 모델을 찾습니다. 또한 성능이 좋지 않은 실험을 조기에 종료하는 ``ASHAScheduler`` 를 사용합니다.
 #
 # 상수 ``data_dir`` 파라미터를 설정하기 위해 ``functools.partial`` 로 ``train_cifar`` 함수를 감싸둡니다. 또한 각 실험에 사용할 수 있는 자원들(resources)을 Ray Tune에 알릴 수 있습니다.
@@ -325,7 +335,7 @@ def test_accuracy(net, device="cpu"):
 #         progress_reporter=reporter,
 #         checkpoint_at_end=True)
 #
-# 파이토치 ``DataLoader`` 인스턴스의 ``num_workers`` 을 늘리기 위해 CPU 수를 지정하고 사용할 수 있습니다. 
+# 파이토치 ``DataLoader`` 인스턴스의 ``num_workers`` 을 늘리기 위해 CPU 수를 지정하고 사용할 수 있습니다.
 # 각 실험에서 선택한 수의 GPU들은 파이토치에 표시됩니다. 실험들은 요청되지 않은 GPU에 액세스할 수 없으므로 같은 자원들을 사용하는 중복된 실험에 대해 신경쓰지 않아도 됩니다.
 #
 # 부분 GPUs를 지정할 수도 있으므로, ``gpus_per_trial=0.5`` 와 같은 것 또한 가능합니다. 이후 각 실험은 GPU를 공유합니다. 사용자는 모델이 여전히 GPU메모리에 적합한지만 확인하면 됩니다.
@@ -351,7 +361,7 @@ def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2):
         grace_period=1,
         reduction_factor=2)
     reporter = CLIReporter(
-        # parameter_columns=["l1", "l2", "lr", "batch_size"],
+        # ``parameter_columns=["l1", "l2", "lr", "batch_size"]``,
         metric_columns=["loss", "accuracy", "training_iteration"])
     result = tune.run(
         partial(train_cifar, data_dir=data_dir),
@@ -386,6 +396,12 @@ def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2):
 
 
 if __name__ == "__main__":
+    # sphinx_gallery_start_ignore
+    # Fixes ``AttributeError: '_LoggingTee' object has no attribute 'fileno'``.
+    # This is only needed to run with sphinx-build.
+    import sys
+    sys.stdout.fileno = lambda: False
+    # sphinx_gallery_end_ignore
     # You can change the number of GPUs per trial here:
     main(num_samples=10, max_num_epochs=10, gpus_per_trial=0)
 
diff --git a/beginner_source/introyt/autogradyt_tutorial.py b/beginner_source/introyt/autogradyt_tutorial.py
index 19c25c5e8..a2ed238e5 100644
--- a/beginner_source/introyt/autogradyt_tutorial.py
+++ b/beginner_source/introyt/autogradyt_tutorial.py
@@ -153,7 +153,7 @@
 #######################################################################
 # This ``grad_fn`` gives us a hint that when we execute the
 # backpropagation step and compute gradients, we’ll need to compute the
-# derivative of :math:`sin(x)` for all this tensor’s inputs.
+# derivative of :math:`\sin(x)` for all this tensor’s inputs.
 # 
 # Let’s perform some more computations:
 # 
@@ -222,8 +222,8 @@
 #    out = d.sum()
 # 
 # Adding a constant, as we did to compute ``d``, does not change the
-# derivative. That leaves :math:`c = 2 * b = 2 * sin(a)`, the derivative
-# of which should be :math:`2 * cos(a)`. Looking at the graph above,
+# derivative. That leaves :math:`c = 2 * b = 2 * \sin(a)`, the derivative
+# of which should be :math:`2 * \cos(a)`. Looking at the graph above,
 # that’s just what we see.
 # 
 # Be aware that only *leaf nodes* of the computation have their gradients
@@ -334,7 +334,7 @@ def forward(self, x):
     
 print(model.layer2.weight.grad[0][0:10])
 
-optimizer.zero_grad()
+optimizer.zero_grad(set_to_none=False)
 
 print(model.layer2.weight.grad[0][0:10])
 
diff --git a/beginner_source/introyt/captumyt.py b/beginner_source/introyt/captumyt.py
index 5e1bb9201..2ff8e9e70 100644
--- a/beginner_source/introyt/captumyt.py
+++ b/beginner_source/introyt/captumyt.py
@@ -106,13 +106,13 @@
 To install Captum in an Anaconda or pip virtual environment, use the
 appropriate command for your environment below:
 
-With ``conda``:
+With ``conda``::
 
-``conda install pytorch torchvision captum -c pytorch``
+    conda install pytorch torchvision captum -c pytorch
 
-With ``pip``:
+With ``pip``::
 
-``pip install torch torchvision captum``
+    pip install torch torchvision captum
 
 Restart this notebook in the environment you set up, and you’re ready to
 go!
@@ -155,7 +155,7 @@
 # now.
 # 
 
-model = models.resnet101(pretrained=True)
+model = models.resnet101(weights='IMAGENET1K_V1')
 model = model.eval()
 
 
diff --git a/beginner_source/introyt/introyt1_tutorial.py b/beginner_source/introyt/introyt1_tutorial.py
index 77a72f1e1..9aa32238c 100644
--- a/beginner_source/introyt/introyt1_tutorial.py
+++ b/beginner_source/introyt/introyt1_tutorial.py
@@ -173,11 +173,11 @@ class LeNet(nn.Module):
 
     def __init__(self):
         super(LeNet, self).__init__()
-        # 입력 이미지 채널, 6개의 output 채널, 3x3 정방 합성곱 커널을 사용합니다.
-        self.conv1 = nn.Conv2d(1, 6, 3)
-        self.conv2 = nn.Conv2d(6, 16, 3)
+        # 입력 이미지 채널, 6개의 output 채널, 5x5 정방 합성곱 커널을 사용합니다.
+        self.conv1 = nn.Conv2d(1, 6, 5)
+        self.conv2 = nn.Conv2d(6, 16, 5)
         # 아핀 변환: y = Wx + b
-        self.fc1 = nn.Linear(16 * 6 * 6, 120)  # 6*6 이미지 차원
+        self.fc1 = nn.Linear(16 * 5 * 5, 120)  # 5x5 이미지 차원
         self.fc2 = nn.Linear(120, 84)
         self.fc3 = nn.Linear(84, 10)
 
diff --git a/beginner_source/introyt/modelsyt_tutorial.py b/beginner_source/introyt/modelsyt_tutorial.py
index 884fcbdb1..8126ce841 100644
--- a/beginner_source/introyt/modelsyt_tutorial.py
+++ b/beginner_source/introyt/modelsyt_tutorial.py
@@ -46,15 +46,15 @@ class is a subclass of ``torch.Tensor``, with the special behavior that
 import torch
 
 class TinyModel(torch.nn.Module):
-    
+
     def __init__(self):
         super(TinyModel, self).__init__()
-        
+
         self.linear1 = torch.nn.Linear(100, 200)
         self.activation = torch.nn.ReLU()
         self.linear2 = torch.nn.Linear(200, 10)
         self.softmax = torch.nn.Softmax()
-    
+
     def forward(self, x):
         x = self.linear1(x)
         x = self.activation(x)
@@ -85,19 +85,19 @@ def forward(self, x):
 # model, and a ``forward()`` method where the computation gets done. Note
 # that we can print the model, or any of its submodules, to learn about
 # its structure.
-# 
+#
 # Common Layer Types
 # ------------------
-# 
+#
 # Linear Layers
 # ~~~~~~~~~~~~~
-# 
+#
 # The most basic type of neural network layer is a *linear* or *fully
 # connected* layer. This is a layer where every input influences every
 # output of the layer to a degree specified by the layer’s weights. If a
 # model has *m* inputs and *n* outputs, the weights will be an *m* x *n*
 # matrix. For example:
-# 
+#
 
 lin = torch.nn.Linear(3, 2)
 x = torch.rand(1, 3)
@@ -117,22 +117,22 @@ def forward(self, x):
 # If you do the matrix multiplication of ``x`` by the linear layer’s
 # weights, and add the biases, you’ll find that you get the output vector
 # ``y``.
-# 
+#
 # One other important feature to note: When we checked the weights of our
 # layer with ``lin.weight``, it reported itself as a ``Parameter`` (which
 # is a subclass of ``Tensor``), and let us know that it’s tracking
 # gradients with autograd. This is a default behavior for ``Parameter``
 # that differs from ``Tensor``.
-# 
+#
 # Linear layers are used widely in deep learning models. One of the most
 # common places you’ll see them is in classifier models, which will
 # usually have one or more linear layers at the end, where the last layer
 # will have *n* outputs, where *n* is the number of classes the classifier
 # addresses.
-# 
+#
 # Convolutional Layers
 # ~~~~~~~~~~~~~~~~~~~~
-# 
+#
 # *Convolutional* layers are built to handle data with a high degree of
 # spatial correlation. They are very commonly used in computer vision,
 # where they detect close groupings of features which the compose into
@@ -140,9 +140,9 @@ def forward(self, x):
 # in NLP applications, where a word’s immediate context (that is, the
 # other words nearby in the sequence) can affect the meaning of a
 # sentence.
-# 
+#
 # We saw convolutional layers in action in LeNet5 in an earlier video:
-# 
+#
 
 import torch.functional as F
 
@@ -182,7 +182,7 @@ def num_flat_features(self, x):
 ##########################################################################
 # Let’s break down what’s happening in the convolutional layers of this
 # model. Starting with ``conv1``:
-# 
+#
 # -  LeNet5 is meant to take in a 1x32x32 black & white image. **The first
 #    argument to a convolutional layer’s constructor is the number of
 #    input channels.** Here, it is 1. If we were building this model to
@@ -198,14 +198,14 @@ def num_flat_features(self, x):
 #    size.** Here, the “5” means we’ve chosen a 5x5 kernel. (If you want a
 #    kernel with height different from width, you can specify a tuple for
 #    this argument - e.g., ``(3, 5)`` to get a 3x5 convolution kernel.)
-# 
+#
 # The output of a convolutional layer is an *activation map* - a spatial
 # representation of the presence of features in the input tensor.
 # ``conv1`` will give us an output tensor of 6x28x28; 6 is the number of
 # features, and 28 is the height and width of our map. (The 28 comes from
 # the fact that when scanning a 5-pixel window over a 32-pixel row, there
 # are only 28 valid positions.)
-# 
+#
 # We then pass the output of the convolution through a ReLU activation
 # function (more on activation functions later), then through a max
 # pooling layer. The max pooling layer takes features near each other in
@@ -214,14 +214,14 @@ def num_flat_features(self, x):
 # cell, and assigning that cell the maximum value of the 4 cells that went
 # into it. This gives us a lower-resolution version of the activation map,
 # with dimensions 6x14x14.
-# 
+#
 # Our next convolutional layer, ``conv2``, expects 6 input channels
 # (corresponding to the 6 features sought by the first layer), has 16
 # output channels, and a 3x3 kernel. It puts out a 16x12x12 activation
 # map, which is again reduced by a max pooling layer to 16x6x6. Prior to
 # passing this output to the linear layers, it is reshaped to a 16 \* 6 \*
 # 6 = 576-element vector for consumption by the next layer.
-# 
+#
 # There are convolutional layers for addressing 1D, 2D, and 3D tensors.
 # There are also many more optional arguments for a conv layer
 # constructor, including stride length(e.g., only scanning every second or
@@ -229,22 +229,22 @@ def num_flat_features(self, x):
 # edges of the input), and more. See the
 # `documentation <https://pytorch.org/docs/stable/nn.html#convolution-layers>`__
 # for more information.
-# 
+#
 # Recurrent Layers
 # ~~~~~~~~~~~~~~~~
-# 
+#
 # *Recurrent neural networks* (or *RNNs)* are used for sequential data -
 # anything from time-series measurements from a scientific instrument to
 # natural language sentences to DNA nucleotides. An RNN does this by
 # maintaining a *hidden state* that acts as a sort of memory for what it
 # has seen in the sequence so far.
-# 
+#
 # The internal structure of an RNN layer - or its variants, the LSTM (long
 # short-term memory) and GRU (gated recurrent unit) - is moderately
 # complex and beyond the scope of this video, but we’ll show you what one
 # looks like in action with an LSTM-based part-of-speech tagger (a type of
 # classifier that tells you if a word is a noun, verb, etc.):
-# 
+#
 
 class LSTMTagger(torch.nn.Module):
 
@@ -271,7 +271,7 @@ def forward(self, sentence):
 
 ########################################################################
 # The constructor has four arguments:
-# 
+#
 # -  ``vocab_size`` is the number of words in the input vocabulary. Each
 #    word is a one-hot vector (or unit vector) in a
 #    ``vocab_size``-dimensional space.
@@ -281,7 +281,7 @@ def forward(self, sentence):
 #    space, where words with similar meanings are close together in the
 #    space.
 # -  ``hidden_dim`` is the size of the LSTM’s memory.
-# 
+#
 # The input will be a sentence with the words represented as indices of
 # one-hot vectors. The embedding layer will then map these down to an
 # ``embedding_dim``-dimensional space. The LSTM takes this sequence of
@@ -290,15 +290,15 @@ def forward(self, sentence):
 # ``log_softmax()`` to the output of the final layer converts the output
 # into a normalized set of estimated probabilities that a given word maps
 # to a given tag.
-# 
+#
 # If you’d like to see this network in action, check out the `Sequence
 # Models and LSTM
-# Networks <https://pytorch.org/tutorials/beginner/nlp/sequence_models_tutorial.html>`__
+# Networks <https://tutorials.pytorch.kr/beginner/nlp/sequence_models_tutorial.html>`__
 # tutorial on pytorch.org.
-# 
+#
 # Transformers
 # ~~~~~~~~~~~~
-# 
+#
 # *Transformers* are multi-purpose networks that have taken over the state
 # of the art in NLP with models like BERT. A discussion of transformer
 # architecture is beyond the scope of this video, but PyTorch has a
@@ -312,22 +312,22 @@ def forward(self, sentence):
 # ``TransformerDecoderLayer``). For details, check out the
 # `documentation <https://pytorch.org/docs/stable/nn.html#transformer-layers>`__
 # on transformer classes, and the relevant
-# `tutorial <https://pytorch.org/tutorials/beginner/transformer_tutorial.html>`__
+# `tutorial <https://tutorials.pytorch.kr/beginner/transformer_tutorial.html>`__
 # on pytorch.org.
-# 
+#
 # Other Layers and Functions
 # --------------------------
-# 
+#
 # Data Manipulation Layers
 # ~~~~~~~~~~~~~~~~~~~~~~~~
-# 
+#
 # There are other layer types that perform important functions in models,
 # but don’t participate in the learning process themselves.
-# 
+#
 # **Max pooling** (and its twin, min pooling) reduce a tensor by combining
 # cells, and assigning the maximum value of the input cells to the output
 # cell (we saw this). For example:
-# 
+#
 
 my_tensor = torch.rand(1, 6, 6)
 print(my_tensor)
@@ -340,12 +340,12 @@ def forward(self, sentence):
 # If you look closely at the values above, you’ll see that each of the
 # values in the maxpooled output is the maximum value of each quadrant of
 # the 6x6 input.
-# 
+#
 # **Normalization layers** re-center and normalize the output of one layer
 # before feeding it to another. Centering the and scaling the intermediate
 # tensors has a number of beneficial effects, such as letting you use
 # higher learning rates without exploding/vanishing gradients.
-# 
+#
 
 my_tensor = torch.rand(1, 4, 4) * 20 + 5
 print(my_tensor)
@@ -366,22 +366,22 @@ def forward(self, sentence):
 # in the neighborhood of 15. After running it through the normalization
 # layer, you can see that the values are smaller, and grouped around zero
 # - in fact, the mean should be very small (> 1e-8).
-# 
+#
 # This is beneficial because many activation functions (discussed below)
 # have their strongest gradients near 0, but sometimes suffer from
 # vanishing or exploding gradients for inputs that drive them far away
 # from zero. Keeping the data centered around the area of steepest
 # gradient will tend to mean faster, better learning and higher feasible
 # learning rates.
-# 
+#
 # **Dropout layers** are a tool for encouraging *sparse representations*
 # in your model - that is, pushing it to do inference with less data.
-# 
+#
 # Dropout layers work by randomly setting parts of the input tensor
 # *during training* - dropout layers are always turned off for inference.
 # This forces the model to learn against this masked or reduced dataset.
 # For example:
-# 
+#
 
 my_tensor = torch.rand(1, 4, 4)
 
@@ -394,10 +394,10 @@ def forward(self, sentence):
 # Above, you can see the effect of dropout on a sample tensor. You can use
 # the optional ``p`` argument to set the probability of an individual
 # weight dropping out; if you don’t it defaults to 0.5.
-# 
+#
 # Activation Functions
 # ~~~~~~~~~~~~~~~~~~~~
-# 
+#
 # Activation functions make deep learning possible. A neural network is
 # really a program - with many parameters - that *simulates a mathematical
 # function*. If all we did was multiple tensors by layer weights
@@ -406,17 +406,17 @@ def forward(self, sentence):
 # reduce could be reduced to a single matrix multiplication. Inserting
 # *non-linear* activation functions between layers is what allows a deep
 # learning model to simulate any function, rather than just linear ones.
-# 
+#
 # ``torch.nn.Module`` has objects encapsulating all of the major
 # activation functions including ReLU and its many variants, Tanh,
 # Hardtanh, sigmoid, and more. It also includes other functions, such as
 # Softmax, that are most useful at the output stage of a model.
-# 
+#
 # Loss Functions
 # ~~~~~~~~~~~~~~
-# 
+#
 # Loss functions tell us how far a model’s prediction is from the correct
 # answer. PyTorch contains a variety of loss functions, including common
 # MSE (mean squared error = L2 norm), Cross Entropy Loss and Negative
 # Likelihood Loss (useful for classifiers), and others.
-# 
+#
diff --git a/beginner_source/introyt/tensorboardyt_tutorial.py b/beginner_source/introyt/tensorboardyt_tutorial.py
index 3b07fe0b8..8e3263204 100644
--- a/beginner_source/introyt/tensorboardyt_tutorial.py
+++ b/beginner_source/introyt/tensorboardyt_tutorial.py
@@ -24,14 +24,14 @@
 To run this tutorial, you’ll need to install PyTorch, TorchVision,
 Matplotlib, and TensorBoard.
 
-With ``conda``:
+With ``conda``::
 
-``conda install pytorch torchvision -c pytorch``
-``conda install matplotlib tensorboard``
+    conda install pytorch torchvision -c pytorch
+    conda install matplotlib tensorboard
 
-With ``pip``:
+With ``pip``::
 
-``pip install torch torchvision matplotlib tensorboard``
+    pip install torch torchvision matplotlib tensorboard
 
 Once the dependencies are installed, restart this notebook in the Python
 environment where you installed them.
@@ -39,11 +39,11 @@
 
 Introduction
 ------------
- 
+
 In this notebook, we’ll be training a variant of LeNet-5 against the
 Fashion-MNIST dataset. Fashion-MNIST is a set of image tiles depicting
 various garments, with ten class labels indicating the type of garment
-depicted. 
+depicted.
 
 """
 
@@ -68,9 +68,9 @@
 ######################################################################
 # Showing Images in TensorBoard
 # -----------------------------
-# 
+#
 # Let’s start by adding sample images from our dataset to TensorBoard:
-# 
+#
 
 # Gather datasets and prepare them for consumption
 transform = transforms.Compose(
@@ -127,7 +127,7 @@ def matplotlib_imshow(img, one_channel=False):
 # minibatch of our input data. Below, we use the ``add_image()`` call on
 # ``SummaryWriter`` to log the image for consumption by TensorBoard, and
 # we also call ``flush()`` to make sure it’s written to disk right away.
-# 
+#
 
 # Default log_dir argument is "runs" - but it's good to be specific
 # torch.utils.tensorboard.SummaryWriter is imported above
@@ -146,17 +146,17 @@ def matplotlib_imshow(img, one_channel=False):
 # If you start TensorBoard at the command line and open it in a new
 # browser tab (usually at `localhost:6006 <localhost:6006>`__), you should
 # see the image grid under the IMAGES tab.
-# 
+#
 # Graphing Scalars to Visualize Training
 # --------------------------------------
-# 
+#
 # TensorBoard is useful for tracking the progress and efficacy of your
 # training. Below, we’ll run a training loop, track some metrics, and save
 # the data for TensorBoard’s consumption.
-# 
+#
 # Let’s define a model to categorize our image tiles, and an optimizer and
 # loss function for training:
-# 
+#
 
 class Net(nn.Module):
     def __init__(self):
@@ -176,7 +176,7 @@ def forward(self, x):
         x = F.relu(self.fc2(x))
         x = self.fc3(x)
         return x
-    
+
 
 net = Net()
 criterion = nn.CrossEntropyLoss()
@@ -186,7 +186,7 @@ def forward(self, x):
 ##########################################################################
 # Now let’s train a single epoch, and evaluate the training vs. validation
 # set losses every 1000 batches:
-# 
+#
 
 print(len(validation_loader))
 for epoch in range(1):  # loop over the dataset multiple times
@@ -206,7 +206,7 @@ def forward(self, x):
             print('Batch {}'.format(i + 1))
             # Check against the validation set
             running_vloss = 0.0
-            
+
             net.train(False) # Don't need to track gradents for validation
             for j, vdata in enumerate(validation_loader, 0):
                 vinputs, vlabels = vdata
@@ -214,10 +214,10 @@ def forward(self, x):
                 vloss = criterion(voutputs, vlabels)
                 running_vloss += vloss.item()
             net.train(True) # Turn gradients back on for training
-            
+
             avg_loss = running_loss / 1000
             avg_vloss = running_vloss / len(validation_loader)
-            
+
             # Log the running loss averaged per batch
             writer.add_scalars('Training vs. Validation Loss',
                             { 'Training' : avg_loss, 'Validation' : avg_vloss },
@@ -231,14 +231,14 @@ def forward(self, x):
 
 #########################################################################
 # Switch to your open TensorBoard and have a look at the SCALARS tab.
-# 
+#
 # Visualizing Your Model
 # ----------------------
-# 
+#
 # TensorBoard can also be used to examine the data flow within your model.
 # To do this, call the ``add_graph()`` method with a model and sample
 # input. When you open
-# 
+#
 
 # Again, grab a single mini-batch of images
 dataiter = iter(training_loader)
@@ -254,10 +254,10 @@ def forward(self, x):
 # When you switch over to TensorBoard, you should see a GRAPHS tab.
 # Double-click the “NET” node to see the layers and data flow within your
 # model.
-# 
+#
 # Visualizing Your Dataset with Embeddings
 # ----------------------------------------
-# 
+#
 # The 28-by-28 image tiles we’re using can be modeled as 784-dimensional
 # vectors (28 \* 28 = 784). It can be instructive to project this to a
 # lower-dimensional representation. The ``add_embedding()`` method will
@@ -265,9 +265,9 @@ def forward(self, x):
 # and display them as an interactive 3D chart. The ``add_embedding()``
 # method does this automatically by projecting to the three dimensions
 # with highest variance.
-# 
+#
 # Below, we’ll take a sample of our data, and generate such an embedding:
-# 
+#
 
 # Select a random subset of data and corresponding labels
 def select_n_random(data, labels, n=100):
@@ -297,19 +297,19 @@ def select_n_random(data, labels, n=100):
 # zoom the model. Examine it at large and small scales, and see whether
 # you can spot patterns in the projected data and the clustering of
 # labels.
-# 
+#
 # For better visibility, it’s recommended to:
-# 
+#
 # - Select “label” from the “Color by” drop-down on the left.
 # - Toggle the Night Mode icon along the top to place the
 #   light-colored images on a dark background.
-# 
+#
 # Other Resources
 # ---------------
-# 
+#
 # For more information, have a look at:
-# 
+#
 # - PyTorch documentation on `torch.utils.tensorboard.SummaryWriter <https://pytorch.org/docs/stable/tensorboard.html?highlight=summarywriter>`__
-# - Tensorboard tutorial content in the `PyTorch.org Tutorials <https://pytorch.org/tutorials/>`__ 
+# - Tensorboard tutorial content in the `PyTorch.org Tutorials <https://tutorials.pytorch.kr/>`__
 # - For more information about TensorBoard, see the `TensorBoard
 #   documentation <https://www.tensorflow.org/tensorboard>`__
diff --git a/beginner_source/introyt/tensors_deeper_tutorial.py b/beginner_source/introyt/tensors_deeper_tutorial.py
index 1f6d72488..8b2c1630a 100644
--- a/beginner_source/introyt/tensors_deeper_tutorial.py
+++ b/beginner_source/introyt/tensors_deeper_tutorial.py
@@ -544,11 +544,11 @@
 print(c)                # contents of c have changed
 
 assert c is d           # test c & d are same object, not just containing equal values
-assert id(c), old_id    # make sure that our new c is the same object as the old one
+assert id(c) == old_id  # make sure that our new c is the same object as the old one
 
 torch.rand(2, 2, out=c) # works for creation too!
 print(c)                # c has changed again
-assert id(c), old_id    # still the same object!
+assert id(c) == old_id  # still the same object!
 
 
 ##########################################################################
diff --git a/beginner_source/introyt/trainingyt.py b/beginner_source/introyt/trainingyt.py
index 5c374ca7a..84750da77 100644
--- a/beginner_source/introyt/trainingyt.py
+++ b/beginner_source/introyt/trainingyt.py
@@ -25,13 +25,13 @@
 
 - Building models with the neural network layers and functions of the torch.nn module
 - The mechanics of automated gradient computation, which is central to
-  gradient-based model training 
+  gradient-based model training
 - Using TensorBoard to visualize training progress and other activities
 
 In this video, we’ll be adding some new tools to your inventory:
 
 - We’ll get familiar with the dataset and dataloader abstractions, and how
-  they ease the process of feeding data to your model during a training loop 
+  they ease the process of feeding data to your model during a training loop
 - We’ll discuss specific loss functions and when to use them
 - We’ll look at PyTorch optimizers, which implement algorithms to adjust
   model weights based on the outcome of a loss function
@@ -42,26 +42,26 @@
 
 Dataset and DataLoader
 ----------------------
- 
+
 The ``Dataset`` and ``DataLoader`` classes encapsulate the process of
 pulling your data from storage and exposing it to your training loop in
 batches.
 
 The ``Dataset`` is responsible for accessing and processing single
 instances of data.
- 
+
 The ``DataLoader`` pulls instances of data from the ``Dataset`` (either
 automatically or with a sampler that you define), collects them in
 batches, and returns them for consumption by your training loop. The
 ``DataLoader`` works with all kinds of datasets, regardless of the type
 of data they contain.
- 
+
 For this tutorial, we’ll be using the Fashion-MNIST dataset provided by
 TorchVision. We use ``torchvision.transforms.Normalize()`` to
 zero-center and normalize the distribution of the image tile content,
 and download both training and validation data splits.
 
-""" 
+"""
 
 import torch
 import torchvision
@@ -81,8 +81,8 @@
 validation_set = torchvision.datasets.FashionMNIST('./data', train=False, transform=transform, download=True)
 
 # Create data loaders for our datasets; shuffle for training, not for validation
-training_loader = torch.utils.data.DataLoader(training_set, batch_size=4, shuffle=True, num_workers=2)
-validation_loader = torch.utils.data.DataLoader(validation_set, batch_size=4, shuffle=False, num_workers=2)
+training_loader = torch.utils.data.DataLoader(training_set, batch_size=4, shuffle=True)
+validation_loader = torch.utils.data.DataLoader(validation_set, batch_size=4, shuffle=False)
 
 # Class labels
 classes = ('T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
@@ -95,7 +95,7 @@
 
 ######################################################################
 # As always, let’s visualize the data as a sanity check:
-# 
+#
 
 import matplotlib.pyplot as plt
 import numpy as np
@@ -123,10 +123,10 @@ def matplotlib_imshow(img, one_channel=False):
 #########################################################################
 # The Model
 # ---------
-# 
+#
 # The model we’ll use in this example is a variant of LeNet-5 - it should
 # be familiar if you’ve watched the previous videos in this series.
-# 
+#
 
 import torch.nn as nn
 import torch.nn.functional as F
@@ -150,7 +150,7 @@ def forward(self, x):
         x = F.relu(self.fc2(x))
         x = self.fc3(x)
         return x
-    
+
 
 model = GarmentClassifier()
 
@@ -158,11 +158,11 @@ def forward(self, x):
 ##########################################################################
 # Loss Function
 # -------------
-# 
+#
 # For this example, we’ll be using a cross-entropy loss. For demonstration
 # purposes, we’ll create batches of dummy output and label values, run
 # them through the loss function, and examine the result.
-# 
+#
 
 loss_fn = torch.nn.CrossEntropyLoss()
 
@@ -171,7 +171,7 @@ def forward(self, x):
 dummy_outputs = torch.rand(4, 10)
 # Represents the correct class among the 10 being tested
 dummy_labels = torch.tensor([1, 5, 3, 7])
-    
+
 print(dummy_outputs)
 print(dummy_labels)
 
@@ -182,21 +182,21 @@ def forward(self, x):
 #################################################################################
 # Optimizer
 # ---------
-# 
+#
 # For this example, we’ll be using simple `stochastic gradient
 # descent <https://pytorch.org/docs/stable/optim.html>`__ with momentum.
-# 
+#
 # It can be instructive to try some variations on this optimization
 # scheme:
-# 
+#
 # - Learning rate determines the size of the steps the optimizer
 #   takes. What does a different learning rate do to the your training
 #   results, in terms of accuracy and convergence time?
 # - Momentum nudges the optimizer in the direction of strongest gradient over
-#   multiple steps. What does changing this value do to your results? 
+#   multiple steps. What does changing this value do to your results?
 # - Try some different optimization algorithms, such as averaged SGD, Adagrad, or
 #   Adam. How do your results differ?
-# 
+#
 
 # Optimizers specified in the torch.optim package
 optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
@@ -205,13 +205,13 @@ def forward(self, x):
 #######################################################################################
 # The Training Loop
 # -----------------
-# 
+#
 # Below, we have a function that performs one training epoch. It
 # enumerates data from the DataLoader, and on each pass of the loop does
 # the following:
-# 
+#
 # - Gets a batch of training data from the DataLoader
-# - Zeros the optimizer’s gradients 
+# - Zeros the optimizer’s gradients
 # - Performs an inference - that is, gets predictions from the model for an input batch
 # - Calculates the loss for that set of predictions vs. the labels on the dataset
 # - Calculates the backward gradients over the learning weights
@@ -221,32 +221,32 @@ def forward(self, x):
 # - It reports on the loss for every 1000 batches.
 # - Finally, it reports the average per-batch loss for the last
 #   1000 batches, for comparison with a validation run
-# 
+#
 
 def train_one_epoch(epoch_index, tb_writer):
     running_loss = 0.
     last_loss = 0.
-    
+
     # Here, we use enumerate(training_loader) instead of
     # iter(training_loader) so that we can track the batch
     # index and do some intra-epoch reporting
     for i, data in enumerate(training_loader):
         # Every data instance is an input + label pair
         inputs, labels = data
-        
+
         # Zero your gradients for every batch!
         optimizer.zero_grad()
-        
+
         # Make predictions for this batch
         outputs = model(inputs)
-        
+
         # Compute the loss and its gradients
         loss = loss_fn(outputs, labels)
         loss.backward()
-        
+
         # Adjust learning weights
         optimizer.step()
-        
+
         # Gather data and report
         running_loss += loss.item()
         if i % 1000 == 999:
@@ -255,24 +255,24 @@ def train_one_epoch(epoch_index, tb_writer):
             tb_x = epoch_index * len(training_loader) + i + 1
             tb_writer.add_scalar('Loss/train', last_loss, tb_x)
             running_loss = 0.
-            
+
     return last_loss
 
 
 ##################################################################################
 # Per-Epoch Activity
 # ~~~~~~~~~~~~~~~~~~
-# 
-# There are a couple of things we’ll want to do once per epoch: 
+#
+# There are a couple of things we’ll want to do once per epoch:
 #
 # - Perform validation by checking our relative loss on a set of data that was not
-#   used for training, and report this 
+#   used for training, and report this
 # - Save a copy of the model
-# 
+#
 # Here, we’ll do our reporting in TensorBoard. This will require going to
 # the command line to start TensorBoard, and opening it in another browser
 # tab.
-# 
+#
 
 # Initializing in a separate cell so we can easily add more epochs to the same run
 timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
@@ -285,58 +285,58 @@ def train_one_epoch(epoch_index, tb_writer):
 
 for epoch in range(EPOCHS):
     print('EPOCH {}:'.format(epoch_number + 1))
-    
+
     # Make sure gradient tracking is on, and do a pass over the data
     model.train(True)
     avg_loss = train_one_epoch(epoch_number, writer)
-    
+
     # We don't need gradients on to do reporting
     model.train(False)
-    
+
     running_vloss = 0.0
     for i, vdata in enumerate(validation_loader):
         vinputs, vlabels = vdata
         voutputs = model(vinputs)
         vloss = loss_fn(voutputs, vlabels)
         running_vloss += vloss
-    
+
     avg_vloss = running_vloss / (i + 1)
     print('LOSS train {} valid {}'.format(avg_loss, avg_vloss))
-    
+
     # Log the running loss averaged per batch
     # for both training and validation
     writer.add_scalars('Training vs. Validation Loss',
                     { 'Training' : avg_loss, 'Validation' : avg_vloss },
                     epoch_number + 1)
     writer.flush()
-    
+
     # Track best performance, and save the model's state
     if avg_vloss < best_vloss:
         best_vloss = avg_vloss
         model_path = 'model_{}_{}'.format(timestamp, epoch_number)
         torch.save(model.state_dict(), model_path)
-    
+
     epoch_number += 1
 
 
 #########################################################################
 # To load a saved version of the model:
-# 
-# ::
-# 
-#    saved_model = GarmentClassifier()
-#    saved_model.load_state_dict(torch.load(PATH))
-# 
+#
+# .. code:: python
+#
+#     saved_model = GarmentClassifier()
+#     saved_model.load_state_dict(torch.load(PATH))
+#
 # Once you’ve loaded the model, it’s ready for whatever you need it for -
 # more training, inference, or analysis.
-# 
+#
 # Note that if your model has constructor parameters that affect model
 # structure, you’ll need to provide them and configure the model
 # identically to the state in which it was saved.
-# 
+#
 # Other Resources
 # ---------------
-# 
+#
 # -  Docs on the `data
 #    utilities <https://pytorch.org/docs/stable/data.html>`__, including
 #    Dataset and DataLoader, at pytorch.org
@@ -355,10 +355,10 @@ def train_one_epoch(epoch_index, tb_writer):
 #    includes optimizers and related tools, such as learning rate
 #    scheduling
 # -  A detailed `tutorial on saving and loading
-#    models <https://pytorch.org/tutorials/beginner/saving_loading_models.html>`__
+#    models <https://tutorials.pytorch.kr/beginner/saving_loading_models.html>`__
 # -  The `Tutorials section of
-#    pytorch.org <https://pytorch.org/tutorials/>`__ contains tutorials on
+#    pytorch.org <https://tutorials.pytorch.kr/>`__ contains tutorials on
 #    a broad variety of training tasks, including classification in
 #    different domains, generative adversarial networks, reinforcement
-#    learning, and more 
-# 
+#    learning, and more
+#
diff --git a/beginner_source/nn_tutorial.py b/beginner_source/nn_tutorial.py
index c629b4d0e..e9bf54214 100644
--- a/beginner_source/nn_tutorial.py
+++ b/beginner_source/nn_tutorial.py
@@ -2,16 +2,14 @@
 """
 `torch.nn` 이 *실제로* 무엇인가요?
 =====================================
-저자: Jeremy Howard, `fast.ai <https://www.fast.ai>`_.
 
-도움: Rachel Thomas, Francisco Ingham.
-
-번역: `남상호 <https://github.com/namdori61>`_
+**저자**: Jeremy Howard, `fast.ai <https://www.fast.ai>`_.  Rachel Thomas, Francisco Ingham에 감사합니다.
 
+**번역**: `남상호 <https://github.com/namdori61>`_
 """
 
 ###############################################################################
-# 이 튜토리얼을 스크립트가 아닌 노트북으로 실행하기를 권장합니다. 노트북 (.ipynb) 파일을 다운 받으시려면,
+# 이 튜토리얼을 스크립트가 아닌 노트북으로 실행하기를 권장합니다. 노트북 (``.ipynb``) 파일을 다운 받으시려면,
 # 페이지 상단에 있는 링크를 클릭해주세요.
 #
 # PyTorch 는 여러분이 신경망(neural network)를 생성하고 학습시키는 것을 도와주기 위해서
@@ -30,7 +28,7 @@
 # 이것이 어떻게 코드를 더 간결하고 유연하게 만드는지 보여줄 것입니다.
 #
 # **이 튜토리얼은 여러분이 이미 PyTorch를 설치하였고, 그리고 텐서 연산의 기초에 대해 익숙하다고 가정합니다.**
-# (만약 여러분이 Numpy 배열(array) 연산에 익숙하다면, 여기에서 사용되는 PyTorch 텐서 연산도
+# (만약 여러분이 NumPy 배열(array) 연산에 익숙하다면, 여기에서 사용되는 PyTorch 텐서 연산도
 # 거의 동일하다는 것을 알게 될 것입니다).
 #
 # MNIST 데이터 준비
@@ -61,7 +59,7 @@
         (PATH / FILENAME).open("wb").write(content)
 
 ###############################################################################
-# 이 데이터셋은 numpy 배열 포맷이고, 데이터를 직렬화하기 위한
+# 이 데이터셋은 NumPy 배열 포맷이고, 데이터를 직렬화하기 위한
 # python 전용 포맷 pickle 을 이용하여 저장되어 있습니다.
 
 import pickle
@@ -81,7 +79,7 @@
 print(x_train.shape)
 
 ###############################################################################
-# PyTorch는 numpy 배열 보다는 ``torch.tensor`` 를 사용하므로, 우리는 데이터를 변환해야 합니다.
+# PyTorch는 NumPy 배열 보다는 ``torch.tensor`` 를 사용하므로, 우리는 데이터를 변환해야 합니다.
 
 import torch
 
@@ -94,8 +92,8 @@
 print(y_train.min(), y_train.max())
 
 ###############################################################################
-# torch.nn 없이 밑바닥부터 신경망 만들기
-# ---------------------------------------------
+# ()``torch.nn`` 없이) 밑바닥부터 신경망 만들기
+# -----------------------------------------------
 #
 # PyTorch 텐서 연산만으로 첫 모델을 만들어봅시다.
 # 여러분이 신경망의 기초에 대해서 이미 익숙하다고 가정합니다.
@@ -113,7 +111,7 @@
 # (PyTorch에서 ``_`` 다음에 오는 메소드 이름은 연산이 인플레이스(in-place)로 수행되는 것을 의미합니다.)
 #
 # .. note:: `Xavier initialisation <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
-#    기법을 이용하여 가중치를 초기화 합니다. (1/sqrt(n)을 곱해주는 것을 통해서 초기화).
+#    기법을 이용하여 가중치를 초기화 합니다. (``1/sqrt(n)`` 을 곱해서 초기화).
 
 import math
 
@@ -242,8 +240,8 @@ def accuracy(out, yb):
 print(loss_func(model(xb), yb), accuracy(model(xb), yb))
 
 ###############################################################################
-# torch.nn.functional 사용하기
-# ------------------------------
+# ``torch.nn.functional`` 사용하기
+# -----------------------------------
 #
 # 이제 우리는 코드를 리팩토링(refactoring) 하겠습니다, 그럼으로써 이전과 동일하지만,
 # PyTorch의 ``nn`` 클래스의 장점을 활용하여 더 간결하고 유연하게 만들 것입니다.
@@ -260,7 +258,7 @@ def accuracy(out, yb):
 # 앞으로 보시겠지만 일반적으로 라이브러리의 다른 부분을 사용하여 더 잘 처리 할 수 있습니다.)
 #
 # 만약 여러분들이 음의 로그 우도 손실과 로그 소프트맥스 (log softmax) 활성화 함수를 사용하는 경우,
-# Pytorch는 이 둘을 결합하는 단일 함수인 ``F.cross_entropy`` 를 제공합니다.
+# PyTorch는 이 둘을 결합하는 단일 함수인 ``F.cross_entropy`` 를 제공합니다.
 # 따라서 모델에서 활성화 함수를 제거할 수도 있습니다.
 
 import torch.nn.functional as F
@@ -277,8 +275,8 @@ def model(xb):
 print(loss_func(model(xb), yb), accuracy(model(xb), yb))
 
 ###############################################################################
-# nn.Module 을 이용하여 리팩토링 하기
-# --------------------------------------
+# ``nn.Module`` 을 이용하여 리팩토링 하기
+# -----------------------------------------
 # 다음으로, 더 명확하고 간결한 훈련 루프를 위해 ``nn.Module`` 및 ``nn.Parameter`` 를 사용합니다.
 # 우리는 ``nn.Module`` (자체가 클래스이고 상태를 추척할 수 있는) 하위 클래스(subclass)를 만듭니다.
 # 이 경우에는, 포워드(forward) 단계에 대한 가중치, 절편, 그리고 메소드(method) 등을 유지하는
@@ -311,7 +309,7 @@ def forward(self, xb):
 ###############################################################################
 # 이제 우리는 이전과 동일한 방식으로 손실을 계산할 수 있습니다.
 # 여기서 ``nn.Module`` 오브젝트들은 마치 함수처럼 사용됩니다 (즉, 이들은 *호출가능* 합니다),
-# 그러나 배후에서 Pytorch 는 우리의 ``forward`` 메소드를 자동으로 호출합니다.
+# 그러나 배후에서 PyTorch 는 우리의 ``forward`` 메소드를 자동으로 호출합니다.
 
 print(loss_func(model(xb), yb))
 
@@ -321,11 +319,11 @@ def forward(self, xb):
 #
 # ::
 #
-#   with torch.no_grad():
-#       weights -= weights.grad * lr
-#       bias -= bias.grad * lr
-#       weights.grad.zero_()
-#       bias.grad.zero_()
+#    with torch.no_grad():
+#        weights -= weights.grad * lr
+#        bias -= bias.grad * lr
+#        weights.grad.zero_()
+#        bias.grad.zero_()
 #
 #
 # 이제 우리는 model.parameters() 및 model.zero_grad() (모두
@@ -334,9 +332,9 @@ def forward(self, xb):
 #
 # ::
 #
-#   with torch.no_grad():
-#       for p in model.parameters(): p -= p.grad * lr
-#       model.zero_grad()
+#    with torch.no_grad():
+#        for p in model.parameters(): p -= p.grad * lr
+#        model.zero_grad()
 #
 #
 # 이제 이것을 나중에 다시 실행할 수 있도록 ``fit`` 함수로 작은 훈련 루프를 감쌀 것입니다.
@@ -365,15 +363,15 @@ def fit():
 print(loss_func(model(xb), yb))
 
 ###############################################################################
-# nn.Linear 를 이용하여 리팩토링 하기
-# ------------------------------------
+# ``nn.Linear`` 를 사용하여 리팩토링 하기
+# -----------------------------------------
 #
 # 계속해서 코드를 리팩토링 합니다. ``self.weights`` 및 ``self.bias`` 를 수동으로 정의 및
 # 초기화하고, ``xb  @ self.weights + self.bias`` 를 계산하는 대신에,
-# 위의 모든 것을 해줄 Pytorch 클래스인
+# 위의 모든 것을 해줄 PyTorch 클래스인
 # `nn.Linear <https://pytorch.org/docs/stable/nn.html#linear-layers>`_ 를 선형
 # 레이어로 사용합니다.
-# Pytorch 에는 다양한 유형의 코드를 크게 단순화 할 수 있는 미리 정의된 레이어가 있고 이는 또한
+# PyTorch 에는 다양한 유형의 코드를 크게 단순화 할 수 있는 미리 정의된 레이어가 있고 이는 또한
 # 종종 기존 코드보다 속도를 빠르게 합니다.
 
 class Mnist_Logistic(nn.Module):
@@ -398,10 +396,10 @@ def forward(self, xb):
 print(loss_func(model(xb), yb))
 
 ###############################################################################
-# optim 을 이용하여 리팩토링 하기
+# ``torch.optim`` 을 이용하여 리팩토링 하기
 # ---------------------------------
 #
-# Pytorch에는 다양한 최적화(optimization) 알고리즘을 가진 패키지인 ``torch.optim`` 도 있습니다.
+# PyTorch에는 다양한 최적화(optimization) 알고리즘을 가진 패키지인 ``torch.optim`` 도 있습니다.
 # 각 매개변수를 수동으로 업데이트 하는 대신, 옵티마이저(optimizer)의 ``step`` 메소드를 사용하여
 # 업데이트를 진행할 수 있습니다.
 #
@@ -409,16 +407,16 @@ def forward(self, xb):
 #
 # ::
 #
-#   with torch.no_grad():
-#       for p in model.parameters(): p -= p.grad * lr
-#       model.zero_grad()
+#    with torch.no_grad():
+#        for p in model.parameters(): p -= p.grad * lr
+#        model.zero_grad()
 #
 # 대신에 이렇게 말이죠:
 #
 # ::
 #
-#   opt.step()
-#   opt.zero_grad()
+#    opt.step()
+#    opt.zero_grad()
 #
 # (``optim.zero_grad()`` 는 기울기를 0으로 재설정 해줍니다. 다음 미니 배치에 대한
 # 기울기를 계산하기 전에 호출해야 합니다.)
@@ -476,7 +474,7 @@ def get_model():
 train_ds = TensorDataset(x_train, y_train)
 
 ###############################################################################
-# 이전에는 x 및 y 값의 미니 배치를 별도로 반복해야했습니다:
+# 이전에는 ``x`` 및 ``y`` 값의 미니 배치를 별도로 반복해야 했습니다:
 #
 # ::
 #
@@ -484,7 +482,7 @@ def get_model():
 #     yb = y_train[start_i:end_i]
 #
 #
-# 이제 이 두 단계를 함께 수행 할 수 있습니다:
+# 이제 이 두 단계를 함께 수행할 수 있습니다:
 #
 # ::
 #
@@ -506,14 +504,14 @@ def get_model():
 print(loss_func(model(xb), yb))
 
 ###############################################################################
-# DataLoader 를 이용하여 리팩토링하기
+# ``DataLoader`` 를 사용하여 리팩토링하기
 # -----------------------------------
 #
-# Pytorch 의 ``DataLoader`` 는 배치 관리를 담당합니다.
+# PyTorch 의 ``DataLoader`` 는 배치 관리를 담당합니다.
 # 여러분들은 모든 ``Dataset`` 으로부터 ``DataLoader`` 를 생성할 수 있습니다.
 # ``DataLoader`` 는 배치들에 대해서 반복하기 쉽게 만들어줍니다.
 # ``train_ds[i*bs : i*bs+bs]`` 를 사용하는 대신,
-# DataLoader 는 매 미니배치를 자동적으로 제공합니다.
+# ``DataLoader`` 는 매 미니배치를 자동적으로 제공합니다.
 
 from torch.utils.data import DataLoader
 
@@ -521,20 +519,20 @@ def get_model():
 train_dl = DataLoader(train_ds, batch_size=bs)
 
 ###############################################################################
-# 이전에는 루프가 다음과 같이 배치 (xb, yb)를 반복했습니다:
+# 이전에는 루프가 다음과 같이 배치 ``(xb, yb)`` 를 반복했습니다:
 #
 # ::
 #
-#       for i in range((n-1)//bs + 1):
-#           xb,yb = train_ds[i*bs : i*bs+bs]
-#           pred = model(xb)
+#     for i in range((n-1)//bs + 1):
+#         xb,yb = train_ds[i*bs : i*bs+bs]
+#         pred = model(xb)
 #
 # 이제 (xb, yb)가 DataLoader 에서 자동으로 로드되므로 루프가 훨씬 깨끗해졌습니다:
 #
 # ::
 #
-#       for xb,yb in train_dl:
-#           pred = model(xb)
+#     for xb,yb in train_dl:
+#         pred = model(xb)
 
 model, opt = get_model()
 
@@ -550,7 +548,7 @@ def get_model():
 print(loss_func(model(xb), yb))
 
 ###############################################################################
-# Pytorch의 nn.Module, nn.Parameter, Dataset 및 DataLoader 덕분에 이제 훈련 루프가
+# PyTorch의 nn.Module, nn.Parameter, Dataset 및 DataLoader 덕분에 이제 훈련 루프가
 # 훨씬 더 작아지고 이해하기 쉬워졌습니다.
 # 이제 실제로 효과적인 모델을 만드는 데 필요한 기본 기능을 추가해 보겠습니다.
 #
@@ -673,11 +671,11 @@ def get_data(train_ds, valid_ds, bs):
 # 이전 섹션의 어떤 함수도 모델의 형식에 대해 가정하지 않기 때문에,
 # 별도의 수정없이 CNN을 학습하는 데 사용할 수 있습니다.
 #
-# Pytorch 의 사전정의된
+# Pytorch의 사전정의된
 # `Conv2d <https://pytorch.org/docs/stable/nn.html#torch.nn.Conv2d>`_ 클래스를
 # 컨볼루션 레이어로 사용합니다. 3개의 컨볼루션 레이어로 CNN을 정의합니다.
 # 각 컨볼루션 뒤에는 ReLU가 있습니다. 마지막으로 평균 풀링(average pooling)을 수행합니다.
-# (``view`` 는 PyTorch의 numpy ``reshape`` 버전입니다.)
+# (``view`` 는 PyTorch의 NumPy ``reshape`` 버전입니다.)
 
 class Mnist_CNN(nn.Module):
     def __init__(self):
@@ -707,8 +705,8 @@ def forward(self, xb):
 fit(epochs, model, loss_func, opt, train_dl, valid_dl)
 
 ###############################################################################
-# nn.Sequential
-# ------------------------
+# ``nn.Sequential`` 사용하기
+# ----------------------------
 #
 # ``torch.nn`` 에는 코드를 간단히 사용할 수 있는 또 다른 편리한 클래스인
 # `Sequential <https://pytorch.org/docs/stable/nn.html#torch.nn.Sequential>`_
@@ -753,7 +751,7 @@ def preprocess(x):
 fit(epochs, model, loss_func, opt, train_dl, valid_dl)
 
 ###############################################################################
-# DataLoader 감싸기
+# ``DataLoader`` 감싸기
 # -----------------------------
 #
 # 우리의 CNN은 상당히 간결하지만, MNIST에서만 작동합니다, 왜냐하면:
@@ -813,7 +811,7 @@ def __iter__(self):
 #
 # 만약 여러분들이 운이 좋아서 CUDA 지원 GPU (대부분의 클라우드 제공 업체에서
 # 시간당 약 $0.50 에 이용할 수 있습니다) 를 사용할 수 있다면, 코드 실행 속도를 높일 수 있습니다.
-# 먼저 GPU가 Pytorch에서 작동하는지 확인합니다:
+# 먼저 GPU가 PyTorch에서 작동하는지 확인합니다:
 
 print(torch.cuda.is_available())
 
@@ -864,7 +862,7 @@ def preprocess(x, y):
 # 그리고 ``DataLoader`` 의 각 예제를 통해 설명하겠다고 이야기했었습니다.
 # 이제 위의 내용들을 요약해보겠습니다:
 #
-#  - **torch.nn**
+#  - ``torch.nn``:
 #
 #    + ``Module``: 함수처럼 동작하지만, 또한 상태(state) (예를 들어, 신경망의 레이어 가중치)를
 #      포함할 수 있는 호출 가능한 오브젝트를 생성합니다.
@@ -877,6 +875,6 @@ def preprocess(x, y):
 #      상태를 저장하지않는(non-stateful) 버전의 레이어를 포함합니다.
 #  - ``torch.optim``: 역전파 단계에서 ``Parameter`` 의 가중치를 업데이트하는,
 #    ``SGD`` 와 같은 옵티마이저를 포함합니다.
-#  - ``Dataset``: ``TensorDataset`` 과 같이 Pytorch와 함께 제공되는 클래스를 포함하여 ``__len__`` 및
+#  - ``Dataset``: ``TensorDataset`` 과 같이 PyTorch와 함께 제공되는 클래스를 포함하여 ``__len__`` 및
 #    ``__getitem__`` 이 있는 객체의 추상 인터페이스
 #  - ``DataLoader``: 모든 종류의 ``Dataset`` 을 기반으로 데이터의 배치들을 출력하는 반복자(iterator)를 생성합니다.
diff --git a/beginner_source/saving_loading_models.py b/beginner_source/saving_loading_models.py
index d89f84efd..9633ad899 100644
--- a/beginner_source/saving_loading_models.py
+++ b/beginner_source/saving_loading_models.py
@@ -148,8 +148,8 @@
 #     PyTorch 버전 1.6에서는 ``torch.save`` 가 새로운 Zip파일-기반의 파일
 #     포맷을 사용하도록 변경되었습니다. ``torch.load`` 는 예전 방식의 파일들을
 #     읽어올 수 있도록 하고 있습니다. 어떤 이유에서든 ``torch.save`` 가 예전
-#     방식을 사용하도록 하고 싶다면, ``_use_new_zipfile_serialization=False`` 을
-#     kwarg로 전달하세요.
+#     방식을 사용하도록 하고 싶다면, ``kwarg`` 매개변수로
+#     ``_use_new_zipfile_serialization=False`` 을 전달하세요.
 #
 # 추론을 위해 모델을 저장할 때는 그 모델의 학습된 매개변수만 저장하면 됩니다.
 # ``torch.save()`` 를 사용하여 모델의 *state_dict* 를 저장하는 것이 나중에 모델을
@@ -471,7 +471,7 @@
 #    # 모델에서 사용하는 input Tensor들은 input = input.to(device) 을 호출해야 합니다.
 #
 # CPU에서 학습한 모델을 GPU에서 불러올 때는 ``torch.load()`` 함수의
-# ``map_location`` 인자에 *cuda:device_id* 을 설정합니다. 이렇게 하면 모델이 해당
+# ``map_location`` 인자에 ``cuda:device_id`` 을 설정합니다. 이렇게 하면 모델이 해당
 # GPU 장치에 불러와집니다. 다음으로 ``model.to(torch.device('cuda'))`` 을 호출하여
 # 모델의 매개변수 Tensor들을 CUDA Tensor들로 변환해야 합니다. 마지막으로 모든
 # 모델 입력에 ``.to(torch.device('cuda'))`` 을 사용하여 CUDA 최적화된 모델을 위한
diff --git a/beginner_source/t5_tutorial.py b/beginner_source/t5_tutorial.py
new file mode 100644
index 000000000..8f77cd278
--- /dev/null
+++ b/beginner_source/t5_tutorial.py
@@ -0,0 +1,456 @@
+"""
+T5-Base Model for Summarization, Sentiment Classification, and Translation
+==========================================================================
+
+**Authors**: `Pendo Abbo <pabbo@fb.com>`__, `Joe Cummings <jrcummings@fb.com>`__
+
+"""
+
+######################################################################
+# Overview
+# --------
+#
+# This tutorial demonstrates how to use a pretrained T5 Model for summarization, sentiment classification, and
+# translation tasks. We will demonstrate how to use the torchtext library to:
+#
+# 1. Build a text preprocessing pipeline for a T5 model
+# 2. Instantiate a pretrained T5 model with base configuration
+# 3. Read in the CNNDM, IMDB, and Multi30k datasets and preprocess their texts in preparation for the model
+# 4. Perform text summarization, sentiment classification, and translation
+#
+# .. note::
+#    This tutorial requires PyTorch 2.0.0 or later.
+#
+#######################################################################
+# Data Transformation
+# -------------------
+#
+# The T5 model does not work with raw text. Instead, it requires the text to be transformed into numerical form
+# in order to perform training and inference. The following transformations are required for the T5 model:
+#
+# 1. Tokenize text
+# 2. Convert tokens into (integer) IDs
+# 3. Truncate the sequences to a specified maximum length
+# 4. Add end-of-sequence (EOS) and padding token IDs
+#
+# T5 uses a ``SentencePiece`` model for text tokenization. Below, we use a pretrained ``SentencePiece`` model to build
+# the text preprocessing pipeline using torchtext's T5Transform. Note that the transform supports both
+# batched and non-batched text input (for example, one can either pass a single sentence or a list of sentences), however the T5 model expects the input to be batched.
+#
+
+from torchtext.models import T5Transform
+
+padding_idx = 0
+eos_idx = 1
+max_seq_len = 512
+t5_sp_model_path = "https://download.pytorch.org/models/text/t5_tokenizer_base.model"
+
+transform = T5Transform(
+    sp_model_path=t5_sp_model_path,
+    max_seq_len=max_seq_len,
+    eos_idx=eos_idx,
+    padding_idx=padding_idx,
+)
+
+#######################################################################
+# Alternatively, we can also use the transform shipped with the pretrained models that does all of the above out-of-the-box
+#
+# .. code-block::
+#
+#   from torchtext.models import T5_BASE_GENERATION
+#   transform = T5_BASE_GENERATION.transform()
+#
+
+
+######################################################################
+# Model Preparation
+# -----------------
+#
+# torchtext provides SOTA pretrained models that can be used directly for NLP tasks or fine-tuned on downstream tasks. Below
+# we use the pretrained T5 model with standard base configuration to perform text summarization, sentiment classification, and
+# translation. For additional details on available pretrained models, see `the torchtext documentation <https://pytorch.org/text/main/models.html>`__
+#
+#
+from torchtext.models import T5_BASE_GENERATION
+
+
+t5_base = T5_BASE_GENERATION
+transform = t5_base.transform()
+model = t5_base.get_model()
+model.eval()
+
+
+#######################################################################
+# Using ``GenerationUtils``
+# -------------------------
+#
+# We can use torchtext's ``GenerationUtils`` to produce an output sequence based on the input sequence provided. This calls on the
+# model's encoder and decoder, and iteratively expands the decoded sequences until the end-of-sequence token is generated
+# for all sequences in the batch. The ``generate`` method shown below uses greedy search to generate the sequences. Beam search and
+# other decoding strategies are also supported.
+#
+#
+from torchtext.prototype.generate import GenerationUtils
+
+sequence_generator = GenerationUtils(model)
+
+
+#######################################################################
+# Datasets
+# --------
+# torchtext provides several standard NLP datasets. For a complete list, refer to the documentation
+# at https://pytorch.org/text/stable/datasets.html. These datasets are built using composable torchdata
+# datapipes and hence support standard flow-control and mapping/transformation using user defined
+# functions and transforms.
+#
+# Below we demonstrate how to preprocess the CNNDM dataset to include the prefix necessary for the
+# model to identify the task it is performing. The CNNDM dataset has a train, validation, and test
+# split. Below we demo on the test split.
+#
+# The T5 model uses the prefix "summarize" for text summarization. For more information on task
+# prefixes, please visit Appendix D of the `T5 Paper <https://arxiv.org/pdf/1910.10683.pdf>`__
+#
+# .. note::
+#       Using datapipes is still currently subject to a few caveats. If you wish
+#       to extend this example to include shuffling, multi-processing, or
+#       distributed learning, please see :ref:`this note <datapipes_warnings>`
+#       for further instructions.
+
+from functools import partial
+
+from torch.utils.data import DataLoader
+from torchtext.datasets import CNNDM
+
+cnndm_batch_size = 5
+cnndm_datapipe = CNNDM(split="test")
+task = "summarize"
+
+
+def apply_prefix(task, x):
+    return f"{task}: " + x[0], x[1]
+
+
+cnndm_datapipe = cnndm_datapipe.map(partial(apply_prefix, task))
+cnndm_datapipe = cnndm_datapipe.batch(cnndm_batch_size)
+cnndm_datapipe = cnndm_datapipe.rows2columnar(["article", "abstract"])
+cnndm_dataloader = DataLoader(cnndm_datapipe, shuffle=True, batch_size=None)
+
+#######################################################################
+# Alternately, we can also use batched API, for example, apply the prefix on the whole batch:
+#
+# .. code-block::
+#
+#   def batch_prefix(task, x):
+#    return {
+#        "article": [f'{task}: ' + y for y in x["article"]],
+#        "abstract": x["abstract"]
+#    }
+#
+#   cnndm_batch_size = 5
+#   cnndm_datapipe = CNNDM(split="test")
+#   task = 'summarize'
+#
+#   cnndm_datapipe = cnndm_datapipe.batch(cnndm_batch_size).rows2columnar(["article", "abstract"])
+#   cnndm_datapipe = cnndm_datapipe.map(partial(batch_prefix, task))
+#   cnndm_dataloader = DataLoader(cnndm_datapipe, batch_size=None)
+#
+
+#######################################################################
+# We can also load the IMDB dataset, which will be used to demonstrate sentiment classification using the T5 model.
+# This dataset has a train and test split. Below we demo on the test split.
+#
+# The T5 model was trained on the SST2 dataset (also available in torchtext) for sentiment classification using the
+# prefix ``sst2 sentence``. Therefore, we will use this prefix to perform sentiment classification on the IMDB dataset.
+#
+
+from torchtext.datasets import IMDB
+
+imdb_batch_size = 3
+imdb_datapipe = IMDB(split="test")
+task = "sst2 sentence"
+labels = {"1": "negative", "2": "positive"}
+
+
+def process_labels(labels, x):
+    return x[1], labels[str(x[0])]
+
+
+imdb_datapipe = imdb_datapipe.map(partial(process_labels, labels))
+imdb_datapipe = imdb_datapipe.map(partial(apply_prefix, task))
+imdb_datapipe = imdb_datapipe.batch(imdb_batch_size)
+imdb_datapipe = imdb_datapipe.rows2columnar(["text", "label"])
+imdb_dataloader = DataLoader(imdb_datapipe, batch_size=None)
+
+#######################################################################
+# Finally, we can also load the Multi30k dataset to demonstrate English to German translation using the T5 model.
+# This dataset has a train, validation, and test split. Below we demo on the test split.
+#
+# The T5 model uses the prefix "translate English to German" for this task.
+
+from torchtext.datasets import Multi30k
+
+multi_batch_size = 5
+language_pair = ("en", "de")
+multi_datapipe = Multi30k(split="test", language_pair=language_pair)
+task = "translate English to German"
+
+multi_datapipe = multi_datapipe.map(partial(apply_prefix, task))
+multi_datapipe = multi_datapipe.batch(multi_batch_size)
+multi_datapipe = multi_datapipe.rows2columnar(["english", "german"])
+multi_dataloader = DataLoader(multi_datapipe, batch_size=None)
+
+#######################################################################
+# Generate Summaries
+# ------------------
+#
+# We can put all of the components together to generate summaries on the first batch of articles in the CNNDM test set
+# using a beam size of 1.
+#
+
+batch = next(iter(cnndm_dataloader))
+input_text = batch["article"]
+target = batch["abstract"]
+beam_size = 1
+
+model_input = transform(input_text)
+model_output = sequence_generator.generate(model_input, eos_idx=eos_idx, num_beams=beam_size)
+output_text = transform.decode(model_output.tolist())
+
+for i in range(cnndm_batch_size):
+    print(f"Example {i+1}:\n")
+    print(f"prediction: {output_text[i]}\n")
+    print(f"target: {target[i]}\n\n")
+
+
+#######################################################################
+# Summarization Output (Might vary since we shuffle the dataloader)
+# --------------------
+#
+# .. code-block::
+#
+#    Example 1:
+#
+#    prediction: the 24-year-old has been tattooed for over a decade . he has landed in australia
+#    to start work on a new campaign . he says he is 'taking it in your stride' to be honest .
+#
+#    target: London-based model Stephen James Hendry famed for his full body tattoo . The supermodel
+#    is in Sydney for a new modelling campaign . Australian fans understood to have already located
+#    him at his hotel . The 24-year-old heartthrob is recently single .
+#
+#
+#    Example 2:
+#
+#    prediction: a stray pooch has used up at least three of her own after being hit by a
+#    car and buried in a field . the dog managed to stagger to a nearby farm, dirt-covered
+#    and emaciated, where she was found . she suffered a dislocated jaw, leg injuries and a
+#    caved-in sinus cavity -- and still requires surgery to help her breathe .
+#
+#    target: Theia, a bully breed mix, was apparently hit by a car, whacked with a hammer
+#    and buried in a field . "She's a true miracle dog and she deserves a good life," says
+#    Sara Mellado, who is looking for a home for Theia .
+#
+#
+#    Example 3:
+#
+#    prediction: mohammad Javad Zarif arrived in Iran on a sunny friday morning . he has gone
+#    a long way to bring Iran in from the cold and allow it to rejoin the international
+#    community . but there are some facts about him that are less well-known .
+#
+#    target: Mohammad Javad Zarif has spent more time with John Kerry than any other
+#    foreign minister . He once participated in a takeover of the Iranian Consulate in San
+#    Francisco . The Iranian foreign minister tweets in English .
+#
+#
+#    Example 4:
+#
+#    prediction: five americans were monitored for three weeks after being exposed to Ebola in
+#    west africa . one of the five had a heart-related issue and has been discharged but hasn't
+#    left the area . they are clinicians for Partners in Health, a Boston-based aid group .
+#
+#    target: 17 Americans were exposed to the Ebola virus while in Sierra Leone in March .
+#    Another person was diagnosed with the disease and taken to hospital in Maryland .
+#    National Institutes of Health says the patient is in fair condition after weeks of
+#    treatment .
+#
+#
+#    Example 5:
+#
+#    prediction: the student was identified during an investigation by campus police and
+#    the office of student affairs . he admitted to placing the noose on the tree early
+#    Wednesday morning . the incident is one of several recent racist events to affect
+#    college students .
+#
+#    target: Student is no longer on Duke University campus and will face disciplinary
+#    review . School officials identified student during investigation and the person
+#    admitted to hanging the noose, Duke says . The noose, made of rope, was discovered on
+#    campus about 2 a.m.
+#
+
+
+#######################################################################
+# Generate Sentiment Classifications
+# ----------------------------------
+#
+# Similarly, we can use the model to generate sentiment classifications on the first batch of reviews from the IMDB test set
+# using a beam size of 1.
+#
+
+batch = next(iter(imdb_dataloader))
+input_text = batch["text"]
+target = batch["label"]
+beam_size = 1
+
+model_input = transform(input_text)
+model_output = sequence_generator.generate(model_input, eos_idx=eos_idx, num_beams=beam_size)
+output_text = transform.decode(model_output.tolist())
+
+for i in range(imdb_batch_size):
+    print(f"Example {i+1}:\n")
+    print(f"input_text: {input_text[i]}\n")
+    print(f"prediction: {output_text[i]}\n")
+    print(f"target: {target[i]}\n\n")
+
+
+#######################################################################
+# Sentiment Output
+# ----------------
+#
+# ::
+#
+#    Example 1:
+#
+#    input_text: sst2 sentence: I love sci-fi and am willing to put up with a lot. Sci-fi
+#    movies/TV are usually underfunded, under-appreciated and misunderstood. I tried to like
+#    this, I really did, but it is to good TV sci-fi as Babylon 5 is to Star Trek (the original).
+#    Silly prosthetics, cheap cardboard sets, stilted dialogues, CG that doesn't match the
+#    background, and painfully one-dimensional characters cannot be overcome with a 'sci-fi'
+#    setting. (I'm sure there are those of you out there who think Babylon 5 is good sci-fi TV.
+#    It's not. It's clichéd and uninspiring.) While US viewers might like emotion and character
+#    development, sci-fi is a genre that does not take itself seriously (cf. Star Trek). It may
+#    treat important issues, yet not as a serious philosophy. It's really difficult to care about
+#    the characters here as they are not simply foolish, just missing a spark of life. Their
+#    actions and reactions are wooden and predictable, often painful to watch. The makers of Earth
+#    KNOW it's rubbish as they have to always say "Gene Roddenberry's Earth..." otherwise people
+#    would not continue watching. Roddenberry's ashes must be turning in their orbit as this dull,
+#    cheap, poorly edited (watching it without advert breaks really brings this home) trudging
+#    Trabant of a show lumbers into space. Spoiler. So, kill off a main character. And then bring
+#    him back as another actor. Jeeez. Dallas all over again.
+#
+#    prediction: negative
+#
+#    target: negative
+#
+#
+#    Example 2:
+#
+#    input_text: sst2 sentence: Worth the entertainment value of a rental, especially if you like
+#    action movies. This one features the usual car chases, fights with the great Van Damme kick
+#    style, shooting battles with the 40 shell load shotgun, and even terrorist style bombs. All
+#    of this is entertaining and competently handled but there is nothing that really blows you
+#    away if you've seen your share before.<br /><br />The plot is made interesting by the
+#    inclusion of a rabbit, which is clever but hardly profound. Many of the characters are
+#    heavily stereotyped -- the angry veterans, the terrified illegal aliens, the crooked cops,
+#    the indifferent feds, the bitchy tough lady station head, the crooked politician, the fat
+#    federale who looks like he was typecast as the Mexican in a Hollywood movie from the 1940s.
+#    All passably acted but again nothing special.<br /><br />I thought the main villains were
+#    pretty well done and fairly well acted. By the end of the movie you certainly knew who the
+#    good guys were and weren't. There was an emotional lift as the really bad ones got their just
+#    deserts. Very simplistic, but then you weren't expecting Hamlet, right? The only thing I found
+#    really annoying was the constant cuts to VDs daughter during the last fight scene.<br /><br />
+#    Not bad. Not good. Passable 4.
+#
+#    prediction: positive
+#
+#    target: negative
+#
+#
+#    Example 3:
+#
+#    input_text: sst2 sentence: its a totally average film with a few semi-alright action sequences
+#    that make the plot seem a little better and remind the viewer of the classic van dam films.
+#    parts of the plot don't make sense and seem to be added in to use up time. the end plot is that
+#    of a very basic type that doesn't leave the viewer guessing and any twists are obvious from the
+#    beginning. the end scene with the flask backs don't make sense as they are added in and seem to
+#    have little relevance to the history of van dam's character. not really worth watching again,
+#    bit disappointed in the end production, even though it is apparent it was shot on a low budget
+#    certain shots and sections in the film are of poor directed quality.
+#
+#    prediction: negative
+#
+#    target: negative
+#
+
+
+#######################################################################
+# Generate Translations
+# ---------------------
+#
+# Finally, we can also use the model to generate English to German translations on the first batch of examples from the Multi30k
+# test set.
+#
+
+batch = next(iter(multi_dataloader))
+input_text = batch["english"]
+target = batch["german"]
+
+model_input = transform(input_text)
+model_output = sequence_generator.generate(model_input, eos_idx=eos_idx, num_beams=beam_size)
+output_text = transform.decode(model_output.tolist())
+
+for i in range(multi_batch_size):
+    print(f"Example {i+1}:\n")
+    print(f"input_text: {input_text[i]}\n")
+    print(f"prediction: {output_text[i]}\n")
+    print(f"target: {target[i]}\n\n")
+
+
+#######################################################################
+# Translation Output
+# ------------------
+#
+# ::
+#
+#    Example 1:
+#
+#    input_text: translate English to German: A man in an orange hat starring at something.
+#
+#    prediction: Ein Mann in einem orangen Hut, der an etwas schaut.
+#
+#    target: Ein Mann mit einem orangefarbenen Hut, der etwas anstarrt.
+#
+#
+#    Example 2:
+#
+#    input_text: translate English to German: A Boston Terrier is running on lush green grass in front of a white fence.
+#
+#    prediction: Ein Boston Terrier läuft auf üppigem grünem Gras vor einem weißen Zaun.
+#
+#    target: Ein Boston Terrier läuft über saftig-grünes Gras vor einem weißen Zaun.
+#
+#
+#    Example 3:
+#
+#    input_text: translate English to German: A girl in karate uniform breaking a stick with a front kick.
+#
+#    prediction: Ein Mädchen in Karate-Uniform bricht einen Stöck mit einem Frontkick.
+#
+#    target: Ein Mädchen in einem Karateanzug bricht ein Brett mit einem Tritt.
+#
+#
+#    Example 4:
+#
+#    input_text: translate English to German: Five people wearing winter jackets and helmets stand in the snow, with snowmobiles in the background.
+#
+#    prediction: Fünf Menschen mit Winterjacken und Helmen stehen im Schnee, mit Schneemobilen im Hintergrund.
+#
+#    target: Fünf Leute in Winterjacken und mit Helmen stehen im Schnee mit Schneemobilen im Hintergrund.
+#
+#
+#    Example 5:
+#
+#    input_text: translate English to German: People are fixing the roof of a house.
+#
+#    prediction: Die Leute fixieren das Dach eines Hauses.
+#
+#    target: Leute Reparieren das Dach eines Hauses.
+#
diff --git a/beginner_source/template_tutorial.py b/beginner_source/template_tutorial.py
new file mode 100644
index 000000000..75e8a551f
--- /dev/null
+++ b/beginner_source/template_tutorial.py
@@ -0,0 +1,92 @@
+# -*- coding: utf-8 -*-
+
+"""
+Template Tutorial
+=================
+
+**Author:** `FirstName LastName <https://github.com/username>`_
+
+.. grid:: 2
+
+    .. grid-item-card:: :octicon:`mortar-board;1em;` What you will learn
+
+      * Item 1
+      * Item 2
+      * Item 3
+
+    .. grid-item-card:: :octicon:`list-unordered;1em;` Prerequisites
+
+      * PyTorch v2.0.0
+      * GPU ???
+      * Other items 3
+
+If you have a video, add it here like this:
+
+.. raw:: html
+
+   <div style="margin-top:10px; margin-bottom:10px;">
+     <iframe width="560" height="315" src="https://www.youtube.com/embed/IC0_FRiX-sw" frameborder="0" allow="accelerometer; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
+   </div>
+
+To test your tutorial locally, you can do one of the following:
+
+*  You can control specific files that generate the results by using
+   ``GALLERY_PATTERN`` environment variable. The GALLERY_PATTERN variable
+   respects regular expressions.
+   For example to run only ``neural_style_transfer_tutorial.py``,
+   use the following command:
+
+   .. code-block:: sh
+
+      GALLERY_PATTERN="neural_style_transfer_tutorial.py" make html
+
+   or
+
+   .. code-block:: sh
+
+      GALLERY_PATTERN="neural_style_transfer_tutorial.py" sphinx-build . _build
+
+* Make a copy of this repository and add only your
+  tutorial to the `beginner_source` directory removing all other tutorials.
+  Then run ``make html``.
+
+Verify that all outputs were generated correctly in the created HTML.
+"""
+
+#########################################################################
+# Overview
+# --------
+#
+# Describe Why is this topic important? Add Links to relevant research papers.
+#
+# This tutorial walks you through the process of....
+#
+# Steps
+# -----
+#
+# Example code (the output below is generated automatically):
+#
+import torch
+x = torch.rand(5, 3)
+print(x)
+
+######################################################################
+# (Optional) Additional Exercises
+# -------------------------------
+#
+# Add additional practice exercises for users to test their knowledge.
+# Example: `NLP from Scratch <https://tutorials.pytorch.kr/intermediate/char_rnn_generation_tutorial.html#exercises>`__.
+#
+
+######################################################################
+# Conclusion
+# ----------
+#
+# Summarize the steps and concepts covered. Highlight key takeaways.
+#
+# Further Reading
+# ---------------
+#
+# * Link1
+# * Link2
+
diff --git a/beginner_source/text_sentiment_ngrams_tutorial.py b/beginner_source/text_sentiment_ngrams_tutorial.py
index 98bc04ebc..b6ec7b11e 100644
--- a/beginner_source/text_sentiment_ngrams_tutorial.py
+++ b/beginner_source/text_sentiment_ngrams_tutorial.py
@@ -166,7 +166,7 @@ class TextClassificationModel(nn.Module):
 
     def __init__(self, vocab_size, embed_dim, num_class):
         super(TextClassificationModel, self).__init__()
-        self.embedding = nn.EmbeddingBag(vocab_size, embed_dim, sparse=True)
+        self.embedding = nn.EmbeddingBag(vocab_size, embed_dim, sparse=False)
         self.fc = nn.Linear(embed_dim, num_class)
         self.init_weights()
 
@@ -183,7 +183,7 @@ def forward(self, text, offsets):
 
 ######################################################################
 # 인스턴스 생성하기
-# -----------------
+# -------------------
 #
 # ``AG_NEWS`` 데이터셋에는 4종류의 레이블이 존재하므로 클래스의 개수도 4개입니다.
 #
@@ -208,7 +208,7 @@ def forward(self, text, offsets):
 
 ######################################################################
 # 모델을 학습하고 결과를 평가하는 함수 정의하기
-# ---------------------------------------------
+# -----------------------------------------------
 #
 
 
@@ -251,7 +251,7 @@ def evaluate(dataloader):
 
 ######################################################################
 # 데이터셋을 분할하고 모델 수행하기
-# ---------------------------------
+# -----------------------------------
 #
 # 원본 ``AG_NEWS`` 에는 검증용 데이터가 포함되어 있지 않기 때문에, 우리는 학습
 # 데이터를 학습 및 검증 데이터로 분할하려 합니다. 이때 데이터를 분할하는
@@ -331,7 +331,7 @@ def evaluate(dataloader):
 
 ######################################################################
 # 임의의 뉴스로 평가하기
-# ----------------------
+# ------------------------
 #
 # 현재까지 최고의 모델로 골프 뉴스를 테스트해보겠습니다.
 #
diff --git a/beginner_source/transfer_learning_tutorial.py b/beginner_source/transfer_learning_tutorial.py
index 8dca57e8c..13691ae24 100644
--- a/beginner_source/transfer_learning_tutorial.py
+++ b/beginner_source/transfer_learning_tutorial.py
@@ -2,6 +2,7 @@
 """
 컴퓨터 비전(Vision)을 위한 전이학습(Transfer Learning)
 =======================================================
+
 **Author**: `Sasank Chilamkurthy <https://chsasank.github.io>`_
   **번역**: `박정환 <http://github.com/9bow>`_
 
@@ -103,7 +104,7 @@
 # 데이터 증가를 이해하기 위해 일부 학습용 이미지를 시각화해보겠습니다.
 
 def imshow(inp, title=None):
-    """Imshow for Tensor."""
+    """tensor를 입력받아 일반적인 이미지로 보여줍니다."""
     inp = inp.numpy().transpose((1, 2, 0))
     mean = np.array([0.485, 0.456, 0.406])
     std = np.array([0.229, 0.224, 0.225])
@@ -126,7 +127,7 @@ def imshow(inp, title=None):
 
 ######################################################################
 # 모델 학습하기
-# --------------
+# ---------------
 #
 # 이제 모델을 학습하기 위한 일반 함수를 작성해보겠습니다. 여기서는 다음 내용들을
 # 설명합니다:
@@ -240,15 +241,15 @@ def visualize_model(model, num_images=6):
 
 ######################################################################
 # 합성곱 신경망 미세조정(finetuning)
-# ----------------------------------
+# ------------------------------------
 #
 # 미리 학습한 모델을 불러온 후 마지막의 완전히 연결된 계층을 초기화합니다.
 #
 
-model_ft = models.resnet18(pretrained=True)
+model_ft = models.resnet18(weights='IMAGENET1K_V1')
 num_ftrs = model_ft.fc.in_features
 # 여기서 각 출력 샘플의 크기는 2로 설정합니다.
-# 또는, nn.Linear(num_ftrs, len (class_names))로 일반화할 수 있습니다.
+# 또는, ``nn.Linear(num_ftrs, len (class_names))`` 로 일반화할 수 있습니다.
 model_ft.fc = nn.Linear(num_ftrs, 2)
 
 model_ft = model_ft.to(device)
@@ -290,7 +291,7 @@ def visualize_model(model, num_images=6):
 # 에서 확인할 수 있습니다.
 #
 
-model_conv = torchvision.models.resnet18(pretrained=True)
+model_conv = torchvision.models.resnet18(weights='IMAGENET1K_V1')
 for param in model_conv.parameters():
     param.requires_grad = False
 
diff --git a/beginner_source/transformer_tutorial.py b/beginner_source/transformer_tutorial.py
index d39dfe7c1..5fe3bc27e 100644
--- a/beginner_source/transformer_tutorial.py
+++ b/beginner_source/transformer_tutorial.py
@@ -1,6 +1,6 @@
 """
-nn.Transformer 와 TorchText 로 시퀀스-투-시퀀스(Sequence-to-Sequence) 모델링하기
-=================================================================================
+``nn.Transformer`` 와 torchtext로 시퀀스-투-시퀀스(Sequence-to-Sequence) 모델링하기
+=====================================================================================
 
 이 튜토리얼에서는
 `nn.Transformer <https://pytorch.org/docs/stable/generated/torch.nn.Transformer.html>`__ 모듈을
@@ -42,6 +42,8 @@
 #
 
 import math
+import os
+from tempfile import TemporaryDirectory
 from typing import Tuple
 
 import torch
@@ -73,12 +75,12 @@ def init_weights(self) -> None:
 
     def forward(self, src: Tensor, src_mask: Tensor) -> Tensor:
         """
-        Args:
-            src: Tensor, shape [seq_len, batch_size]
-            src_mask: Tensor, shape [seq_len, seq_len]
+        Arguments:
+            src: Tensor, shape ``[seq_len, batch_size]``
+            src_mask: Tensor, shape ``[seq_len, seq_len]``
 
         Returns:
-            output Tensor of shape [seq_len, batch_size, ntoken]
+            output Tensor of shape ``[seq_len, batch_size, ntoken]``
         """
         src = self.encoder(src) * math.sqrt(self.d_model)
         src = self.pos_encoder(src)
@@ -88,7 +90,7 @@ def forward(self, src: Tensor, src_mask: Tensor) -> Tensor:
 
 
 def generate_square_subsequent_mask(sz: int) -> Tensor:
-    """Generates an upper-triangular matrix of -inf, with zeros on diag."""
+    """Generates an upper-triangular matrix of ``-inf``, with zeros on ``diag``."""
     return torch.triu(torch.ones(sz, sz) * float('-inf'), diagonal=1)
 
 
@@ -113,8 +115,8 @@ def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
 
     def forward(self, x: Tensor) -> Tensor:
         """
-        Args:
-            x: Tensor, shape [seq_len, batch_size, embedding_dim]
+        Arguments:
+            x: Tensor, shape ``[seq_len, batch_size, embedding_dim]``
         """
         x = x + self.pe[:x.size(0)]
         return self.dropout(x)
@@ -172,7 +174,7 @@ def data_process(raw_text_iter: dataset.IterableDataset) -> Tensor:
     data = [torch.tensor(vocab(tokenizer(item)), dtype=torch.long) for item in raw_text_iter]
     return torch.cat(tuple(filter(lambda t: t.numel() > 0, data)))
 
-# train_iter was "consumed" by the process of building the vocab,
+# ``train_iter`` was "consumed" by the process of building the vocab,
 # so we have to create it again
 train_iter, val_iter, test_iter = WikiText2()
 train_data = data_process(train_iter)
@@ -182,15 +184,15 @@ def data_process(raw_text_iter: dataset.IterableDataset) -> Tensor:
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
 def batchify(data: Tensor, bsz: int) -> Tensor:
-    """Divides the data into bsz separate sequences, removing extra elements
+    """Divides the data into ``bsz`` separate sequences, removing extra elements
     that wouldn't cleanly fit.
 
-    Args:
-        data: Tensor, shape [N]
+    Arguments:
+        data: Tensor, shape ``[N]``
         bsz: int, batch size
 
     Returns:
-        Tensor of shape [N // bsz, bsz]
+        Tensor of shape ``[N // bsz, bsz]``
     """
     seq_len = data.size(0) // bsz
     data = data[:seq_len * bsz]
@@ -225,13 +227,13 @@ def batchify(data: Tensor, bsz: int) -> Tensor:
 bptt = 35
 def get_batch(source: Tensor, i: int) -> Tuple[Tensor, Tensor]:
     """
-    Args:
-        source: Tensor, shape [full_seq_len, batch_size]
+    Arguments:
+        source: Tensor, shape ``[full_seq_len, batch_size]``
         i: int
 
     Returns:
-        tuple (data, target), where data has shape [seq_len, batch_size] and
-        target has shape [seq_len * batch_size]
+        tuple ``(data, target)``, where data has shape ``[seq_len, batch_size]`` and
+        target has shape ``[seq_len * batch_size]``
     """
     seq_len = min(bptt, len(source) - 1 - i)
     data = source[i:i+seq_len]
@@ -247,21 +249,21 @@ def get_batch(source: Tensor, i: int) -> Tuple[Tensor, Tensor]:
 
 ######################################################################
 # 모델의 하이퍼파라미터(hyperparameter)는 아래와 같이 정의됩니다.
-# 단어 사이즈는 단어 오브젝트의 길이와 일치 합니다.
+# 어휘집( ``vocab`` )의 크기는 단어 오브젝트의 길이와 일치 합니다.
 #
 
 ntokens = len(vocab) # 단어 사전(어휘집)의 크기
 emsize = 200 # 임베딩 차원
-d_hid = 200 # nn.TransformerEncoder 에서 피드포워드 네트워크(feedforward network) 모델의 차원
-nlayers = 2 # nn.TransformerEncoder 내부의 nn.TransformerEncoderLayer 개수
-nhead = 2 # nn.MultiheadAttention의 헤드 개수
+d_hid = 200 # ``nn.TransformerEncoder`` 에서 피드포워드 네트워크(feedforward network) 모델의 차원
+nlayers = 2 # ``nn.TransformerEncoder`` 내부의 nn.TransformerEncoderLayer 개수
+nhead = 2 # ``nn.MultiheadAttention`` 의 헤드 개수
 dropout = 0.2 # 드랍아웃(dropout) 확률
 model = TransformerModel(ntokens, emsize, nhead, d_hid, nlayers, dropout).to(device)
 
 
 ######################################################################
 # 모델 실행하기
-# -------------
+# ---------------
 #
 
 
@@ -335,24 +337,27 @@ def evaluate(model: nn.Module, eval_data: Tensor) -> float:
 
 best_val_loss = float('inf')
 epochs = 3
-best_model = None
 
-for epoch in range(1, epochs + 1):
-    epoch_start_time = time.time()
-    train(model)
-    val_loss = evaluate(model, val_data)
-    val_ppl = math.exp(val_loss)
-    elapsed = time.time() - epoch_start_time
-    print('-' * 89)
-    print(f'| end of epoch {epoch:3d} | time: {elapsed:5.2f}s | '
-          f'valid loss {val_loss:5.2f} | valid ppl {val_ppl:8.2f}')
-    print('-' * 89)
+with TemporaryDirectory() as tempdir:
+    best_model_params_path = os.path.join(tempdir, "best_model_params.pt")
 
-    if val_loss < best_val_loss:
-        best_val_loss = val_loss
-        best_model = copy.deepcopy(model)
+    for epoch in range(1, epochs + 1):
+        epoch_start_time = time.time()
+        train(model)
+        val_loss = evaluate(model, val_data)
+        val_ppl = math.exp(val_loss)
+        elapsed = time.time() - epoch_start_time
+        print('-' * 89)
+        print(f'| end of epoch {epoch:3d} | time: {elapsed:5.2f}s | '
+            f'valid loss {val_loss:5.2f} | valid ppl {val_ppl:8.2f}')
+        print('-' * 89)
 
-    scheduler.step()
+        if val_loss < best_val_loss:
+            best_val_loss = val_loss
+            torch.save(model.state_dict(), best_model_params_path)
+
+        scheduler.step()
+    model.load_state_dict(torch.load(best_model_params_path)) # load best model states
 
 
 ######################################################################
@@ -360,9 +365,9 @@ def evaluate(model: nn.Module, eval_data: Tensor) -> float:
 # -------------------------------------------------
 #
 
-test_loss = evaluate(best_model, test_data)
+test_loss = evaluate(model, test_data)
 test_ppl = math.exp(test_loss)
 print('=' * 89)
 print(f'| End of training | test loss {test_loss:5.2f} | '
       f'test ppl {test_ppl:8.2f}')
-print('=' * 89)
+print('=' * 89)
\ No newline at end of file
diff --git a/beginner_source/translation_transformer.py b/beginner_source/translation_transformer.py
index 68a75c70d..768228523 100644
--- a/beginner_source/translation_transformer.py
+++ b/beginner_source/translation_transformer.py
@@ -1,5 +1,5 @@
 """
-nn.Transformer와 torchtext로 언어 번역하기
+``nn.Transformer`` 와 torchtext로 언어 번역하기
 ======================================================
 
 이 튜토리얼에서는,
@@ -41,13 +41,17 @@
 token_transform = {}
 vocab_transform = {}
 
-
+###################################################################################
 # 출발어(source)와 목적어(target)의 토크나이저(tokenizer)를 생성합니다.
 # 아래 필요 사항(dependency)을 모두 설치해주세요.
-# pip install -U torchdata
-# pip install -U spacy
-# python -m spacy download en_core_web_sm
-# python -m spacy download de_core_news_sm
+#
+# .. code-block:: python
+#
+#    pip install -U torchdata
+#    pip install -U spacy
+#    python -m spacy download en_core_web_sm
+#    python -m spacy download de_core_news_sm
+
 token_transform[SRC_LANGUAGE] = get_tokenizer('spacy', language='de_core_news_sm')
 token_transform[TGT_LANGUAGE] = get_tokenizer('spacy', language='en_core_web_sm')
 
@@ -73,9 +77,9 @@ def yield_tokens(data_iter: Iterable, language: str) -> List[str]:
                                                     specials=special_symbols,
                                                     special_first=True)
 
-# UNK_IDX를 기본 인덱스로 설정합니다. 이 인덱스는 토큰을 찾지 못하는 경우에 반환됩니다.
+# ``UNK_IDX`` 를 기본 인덱스로 설정합니다. 이 인덱스는 토큰을 찾지 못하는 경우에 반환됩니다.
 # 만약 기본 인덱스를 설정하지 않으면 어휘집(Vocabulary)에서 토큰을 찾지 못하는 경우
-# RuntimeError가 발생합니다.
+# ``RuntimeError`` 가 발생합니다.
 for ln in [SRC_LANGUAGE, TGT_LANGUAGE]:
     vocab_transform[ln].set_default_index(UNK_IDX)
 
@@ -183,7 +187,7 @@ def decode(self, tgt: Tensor, memory: Tensor, tgt_mask: Tensor):
 
 
 ######################################################################
-# 학습하는 동안, 모델이 예측하는 동안 정답(이후 출현하는 단어)을 보지 못하도록 하는
+# 학습하는 동안, 모델이 예측할 때 정답(이후 출현하는 단어)을 보지 못하도록 하는
 # 후속 단어 마스크(subsequent word mask)가 필요합니다. 또한, 출발어와 도착어의 패딩(padding) 토큰들
 # 또한 숨겨야 합니다. 아래에 두 가지 모두를 처리할 함수를 정의해보겠습니다.
 #
diff --git a/beginner_source/vt_tutorial.py b/beginner_source/vt_tutorial.py
index 483a6d6c1..7ded2f824 100644
--- a/beginner_source/vt_tutorial.py
+++ b/beginner_source/vt_tutorial.py
@@ -44,16 +44,20 @@
 # DeiT를 활용한 이미지 분류
 # -------------------------------
 #
-# DeiT를 사용하여 이미지를 분류하는 방법에 대한 자세한 정보는 DeiT 저장소에 README를 참고하시길 바랍니다.
+# DeiT를 사용하여 이미지를 분류하는 방법에 대한 자세한 정보는 DeiT 저장소의 ``README.md`` 를 참고하시길 바랍니다.
 # 빠른 테스트를 위해서, 먼저 필요한 패키지들을
 # 설치합니다:
 #
-# pip install torch torchvision timm pandas requests
+# .. code-block:: python
+#
+#    pip install torch torchvision timm pandas requests
 
 #######################################################
 # Google Colab에서는 아래와 같이 실행합니다:
-
-# !pip install timm pandas requests
+#
+# .. code-block:: python
+#
+#    !pip install timm pandas requests
 
 #############################
 # 그런 다음 아래 스크립트를 실행합니다:
@@ -89,7 +93,7 @@
 
 ######################################################################
 # ImageNet 목록에 따라 `라벨(labels) 파일 <https://gist.github.com/yrevar/942d3a0ac09ec9e5eb3a>`_
-# 클래스 인덱스의 출력은 269여야 하며, 이는 ‘timber wolf, grey wolf, gray wolf, Canis lupus’에 매핑됩니다.
+# 클래스 인덱스의 출력은 269여야 하며, 이는 ``timber wolf, grey wolf, gray wolf, Canis lupus`` 에 매핑됩니다.
 #
 # 이제 DeiT 모델을 사용하여 이미지들을 분류할 수 있음을 확인했습니다.
 # iOS 및 Android 앱에서 실행할 수 있도록 모델을 수정하는 방법을 살펴보겠습니다.
@@ -112,7 +116,7 @@
 
 
 ######################################################################
-# 약 346MB 크기의 스크립팅된 모델 파일 fbdeit_scripted.pt가 생성됩니다.
+# 약 346MB 크기의 스크립팅된 모델 파일 ``fbdeit_scripted.pt`` 가 생성됩니다.
 #
 #
 
@@ -131,8 +135,10 @@
 # 아래의 코드를 실행시켜 봅시다.
 #
 
+# 서버 추론을 위해 'x86'을, 모바일 추론을 위해 ``qnnpack`` 을 사용합니다.
+# (이전의 'fbgemm' 또한 여전히 사용 가능하지만, 'x86'을 기본으로 사용하는 것을 권장합니다.)
 # 서버 추론을 위해 'fbgemm'을, 모바일 추론을 위해 'qnnpack'을 사용해 봅시다.
-backend = "fbgemm" # 이 주피터 노트북에서는 양자화된 모델의 더 느린 추론 속도를 일으키는 qnnpack으로 대체되었습니다.
+backend = "x86" # 이 주피터 노트북에서는 양자화된 모델의 더 느린 추론 속도를 일으키는 ``qnnpack`` 으로 대체되었습니다.
 model.qconfig = torch.quantization.get_default_qconfig(backend)
 torch.backends.quantized.engine = backend
 
@@ -142,7 +148,7 @@
 
 
 ######################################################################
-# fbdeit_quantized_scripted.pt 모델의 스크립팅과 양자화가 적용된 버전이 만들어졌습니다.
+# ``fbdeit_quantized_scripted.pt`` 모델의 스크립팅과 양자화가 적용된 버전이 만들어졌습니다.
 # 모델의 크기는 단지 89MB 입니다.
 # 양자화가 적용되지 않은 모델의 크기인 346MB보다 74%나 감소했습니다!
 #
@@ -170,7 +176,7 @@
 
 
 ######################################################################
-# 생성된 fbdeit_optimized_scripted_quantized.pt 파일은
+# 생성된 ``fbdeit_optimized_scripted_quantized.pt`` 파일은
 # 양자화되고 스크립트되지만 최적화되지 않은 모델과 크기가 거의 같습니다.
 # 추론 결과는 동일하게 유지됩니다.
 #
diff --git a/conf.py b/conf.py
index e66bd0bea..193a08a3a 100644
--- a/conf.py
+++ b/conf.py
@@ -30,7 +30,7 @@
 import os
 import sys
 sys.path.insert(0, os.path.abspath('.'))
-sys.path.insert(0, os.path.abspath('./.build'))
+sys.path.insert(0, os.path.abspath('./.build'))     # pytorch/tutorials의 .jenkins/ 의 일부 파일들을 .build/ 에 복사하여 사용
 import pytorch_sphinx_theme
 import torch
 import glob
@@ -38,7 +38,7 @@
 from custom_directives import IncludeDirective, GalleryItemDirective, CustomGalleryItemDirective, CustomCalloutItemDirective, CustomCardItemDirective
 import distutils.file_util
 import re
-from validate_tutorials_built import NOT_RUN
+from get_sphinx_filenames import SPHINX_SHOULD_RUN
 
 import plotly.io as pio
 pio.renderers.default = 'sphinx_gallery'
@@ -80,6 +80,8 @@
 
 intersphinx_mapping = {
     "torch": ("https://pytorch.org/docs/stable/", None),
+    "tensordict": ("https://pytorch-labs.github.io/tensordict/", None),
+    "torchrl": ("https://pytorch.org/rl/", None),
     "torchaudio": ("https://pytorch.org/audio/stable/", None),
     "torchtext": ("https://pytorch.org/text/stable/", None),
     "torchvision": ("https://pytorch.org/vision/stable/", None),
@@ -107,10 +109,12 @@
     'examples_dirs': ['beginner_source', 'intermediate_source',
                       'advanced_source', 'recipes_source', 'prototype_source'],
     'gallery_dirs': ['beginner', 'intermediate', 'advanced', 'recipes', 'prototype'],
-    'filename_pattern': '.py',
-    'ignore_pattern': re.compile(f"({'|'.join(NOT_RUN)}).py$"),
+    'filename_pattern': re.compile(SPHINX_SHOULD_RUN),
     'promote_jupyter_magic': True,
-    'backreferences_dir': None
+    'backreferences_dir': None,
+    'first_notebook_cell': ("# Google Colab에서 노트북을 실행하실 때에는 \n"
+                            "# https://tutorials.pytorch.kr/beginner/colab 를 참고하세요.\n"
+                            "%matplotlib inline")
 }
 
 if os.getenv('GALLERY_PATTERN'):
@@ -153,7 +157,7 @@
 
 # General information about the project.
 project = 'PyTorch Tutorials'
-copyright = '2022, PyTorch & 파이토치 한국 사용자 모임(PyTorch Korea User Group)'
+copyright = '2018-2023, PyTorch & 파이토치 한국 사용자 모임(PyTorch Korea User Group)'
 author = 'PyTorch contributors'
 
 # The version info for the project you're documenting, acts as replacement for
@@ -230,6 +234,7 @@
     'collapse_navigation': False,
     'display_version': True,
     'logo_only': False,
+    'navigation_with_keys': True,
 }
 
 
diff --git a/distributed/home.rst b/distributed/home.rst
index aac2a1df4..09008a50e 100644
--- a/distributed/home.rst
+++ b/distributed/home.rst
@@ -27,7 +27,7 @@ Learn DDP
 
      .. grid-item-card:: :octicon:`file-code;1em`
         DDP Intro Video Tutorials
-        :link: https://pytorch.org/tutorials/beginner/ddp_series_intro.html?utm_source=distr_landing&utm_medium=ddp_series_intro
+        :link: https://tutorials.pytorch.kr/beginner/ddp_series_intro.html?utm_source=distr_landing&utm_medium=ddp_series_intro
         :link-type: url
 
         A step-by-step video series on how to get started with
@@ -37,7 +37,7 @@ Learn DDP
 
      .. grid-item-card:: :octicon:`file-code;1em`
         Getting Started with Distributed Data Parallel
-        :link: https://pytorch.org/tutorials/intermediate/ddp_tutorial.html?utm_source=distr_landing&utm_medium=intermediate_ddp_tutorial
+        :link: https://tutorials.pytorch.kr/intermediate/ddp_tutorial.html?utm_source=distr_landing&utm_medium=intermediate_ddp_tutorial
         :link-type: url
 
         This tutorial provides a short and gentle intro to the PyTorch
@@ -48,10 +48,10 @@ Learn DDP
      .. grid-item-card:: :octicon:`file-code;1em`
         Distributed Training with Uneven Inputs Using
         the Join Context Manager
-        :link: https://pytorch.org/tutorials/advanced/generic_join.html?utm_source=distr_landing&utm_medium=generic_join
+        :link: https://tutorials.pytorch.kr/advanced/generic_join.html?utm_source=distr_landing&utm_medium=generic_join
         :link-type: url
 
-        This tutorial describes the Join context manager and 
+        This tutorial describes the Join context manager and
         demonstrates it's use with DistributedData Parallel.
         +++
         :octicon:`code;1em` Code
@@ -65,7 +65,7 @@ Learn FSDP
 
      .. grid-item-card:: :octicon:`file-code;1em`
         Getting Started with FSDP
-        :link: https://pytorch.org/tutorials/intermediate/FSDP_tutorial.html?utm_source=distr_landing&utm_medium=FSDP_getting_started
+        :link: https://tutorials.pytorch.kr/intermediate/FSDP_tutorial.html?utm_source=distr_landing&utm_medium=FSDP_getting_started
         :link-type: url
 
         This tutorial demonstrates how you can perform distributed training
@@ -75,7 +75,7 @@ Learn FSDP
 
      .. grid-item-card:: :octicon:`file-code;1em`
         FSDP Advanced
-        :link: https://pytorch.org/tutorials/intermediate/FSDP_adavnced_tutorial.html?utm_source=distr_landing&utm_medium=FSDP_advanced
+        :link: https://tutorials.pytorch.kr/intermediate/FSDP_adavnced_tutorial.html?utm_source=distr_landing&utm_medium=FSDP_advanced
         :link-type: url
 
         In this tutorial, you will learn how to fine-tune a HuggingFace (HF) T5
@@ -92,7 +92,7 @@ Learn RPC
 
      .. grid-item-card:: :octicon:`file-code;1em`
         Getting Started with Distributed RPC Framework
-        :link: https://pytorch.org/tutorials/intermediate/rpc_tutorial.html?utm_source=distr_landing&utm_medium=rpc_getting_started
+        :link: https://tutorials.pytorch.kr/intermediate/rpc_tutorial.html?utm_source=distr_landing&utm_medium=rpc_getting_started
         :link-type: url
 
         This tutorial demonstrates how to get started with RPC-based distributed
@@ -102,7 +102,7 @@ Learn RPC
 
      .. grid-item-card:: :octicon:`file-code;1em`
         Implementing a Parameter Server Using Distributed RPC Framework
-        :link: https://pytorch.org/tutorials/intermediate/rpc_param_server_tutorial.html?utm_source=distr_landing&utm_medium=rpc_param_server_tutorial
+        :link: https://tutorials.pytorch.kr/intermediate/rpc_param_server_tutorial.html?utm_source=distr_landing&utm_medium=rpc_param_server_tutorial
         :link-type: url
 
         This tutorial walks you through a simple example of implementing a
@@ -112,7 +112,7 @@ Learn RPC
 
      .. grid-item-card:: :octicon:`file-code;1em`
         Implementing Batch RPC Processing Using Asynchronous Executions
-        :link: https://pytorch.org/tutorials/intermediate/rpc_async_execution.html?utm_source=distr_landing&utm_medium=rpc_async_execution
+        :link: https://tutorials.pytorch.kr/intermediate/rpc_async_execution.html?utm_source=distr_landing&utm_medium=rpc_async_execution
         :link-type: url
 
         In this tutorial you will build batch-processing RPC applications
@@ -124,7 +124,7 @@ Learn RPC
 
      .. grid-item-card:: :octicon:`file-code;1em`
         Combining Distributed DataParallel with Distributed RPC Framework
-        :link: https://pytorch.org/tutorials/advanced/rpc_ddp_tutorial.html?utm_source=distr_landing&utm_medium=rpc_plus_ddp
+        :link: https://tutorials.pytorch.kr/advanced/rpc_ddp_tutorial.html?utm_source=distr_landing&utm_medium=rpc_plus_ddp
         :link-type: url
 
         In this tutorial you will learn how to combine distributed data
@@ -141,7 +141,7 @@ Custom Extensions
 
      .. grid-item-card:: :octicon:`file-code;1em`
         Customize Process Group Backends Using Cpp Extensions
-        :link: https://pytorch.org/tutorials/intermediate/process_group_cpp_extension_tutorial.html?utm_source=distr_landing&utm_medium=custom_extensions_cpp
+        :link: https://tutorials.pytorch.kr/intermediate/process_group_cpp_extension_tutorial.html?utm_source=distr_landing&utm_medium=custom_extensions_cpp
         :link-type: url
 
         In this tutorial you will learn to implement a custom `ProcessGroup`
diff --git a/docs/_downloads/09dab7b70298bcb798ab79840558b800/maskedtensor_sparsity.ipynb b/docs/_downloads/09dab7b70298bcb798ab79840558b800/maskedtensor_sparsity.ipynb
index f213f30f8..a739336da 100644
--- a/docs/_downloads/09dab7b70298bcb798ab79840558b800/maskedtensor_sparsity.ipynb
+++ b/docs/_downloads/09dab7b70298bcb798ab79840558b800/maskedtensor_sparsity.ipynb
@@ -22,7 +22,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Before working on this tutorial, please make sure to review our\n`MaskedTensor Overview tutorial <https://pytorch.org/tutorials/prototype/maskedtensor_overview.html>`.\n\n## Introduction\n\nSparsity has been an area of rapid growth and importance within PyTorch; if any sparsity terms are confusing below,\nplease refer to the [sparsity tutorial](https://pytorch.org/docs/stable/sparse.html)_ for additional details.\n\nSparse storage formats have been proven to be powerful in a variety of ways. As a primer, the first use case\nmost practitioners think about is when the majority of elements are equal to zero (a high degree of sparsity),\nbut even in cases of lower sparsity, certain formats (e.g. BSR) can take advantage of substructures within a matrix.\n\n<div class=\"alert alert-info\"><h4>Note</h4><p>At the moment, MaskedTensor supports COO and CSR tensors with plans to support additional formats\n    (such as BSR and CSC) in the future. If you have any requests for additional formats,\n    please file a feature request [here](https://github.com/pytorch/pytorch/issues)_!</p></div>\n\n## Principles\n\nWhen creating a :class:`MaskedTensor` with sparse tensors, there are a few principles that must be observed:\n\n1. ``data`` and ``mask`` must have the same storage format, whether that's :attr:`torch.strided`, :attr:`torch.sparse_coo`, or :attr:`torch.sparse_csr`\n2. ``data`` and ``mask`` must have the same size, indicated by :func:`size()`\n\n\n## Sparse COO tensors\n\nIn accordance with Principle #1, a sparse COO MaskedTensor is created by passing in two sparse COO tensors,\nwhich can be initialized by any of its constructors, for example :func:`torch.sparse_coo_tensor`.\n\nAs a recap of [sparse COO tensors](https://pytorch.org/docs/stable/sparse.html#sparse-coo-tensors)_, the COO format\nstands for \"coordinate format\", where the specified elements are stored as tuples of their indices and the\ncorresponding values. That is, the following are provided:\n\n* ``indices``: array of size ``(ndim, nse)`` and dtype ``torch.int64``\n* ``values``: array of size `(nse,)` with any integer or floating point dtype\n\nwhere ``ndim`` is the dimensionality of the tensor and ``nse`` is the number of specified elements.\n\nFor both sparse COO and CSR tensors, you can construct a :class:`MaskedTensor` by doing either:\n\n1. ``masked_tensor(sparse_tensor_data, sparse_tensor_mask)``\n2. ``dense_masked_tensor.to_sparse_coo()`` or ``dense_masked_tensor.to_sparse_csr()``\n\nThe second method is easier to illustrate so we've shown that below, but for more on the first and the nuances behind\nthe approach, please read the `Sparse COO Appendix <sparse-coo-appendix>`.\n\n\n"
+        "Before working on this tutorial, please make sure to review our\n`MaskedTensor Overview tutorial <https://tutorials.pytorch.kr/prototype/maskedtensor_overview.html>`.\n\n## Introduction\n\nSparsity has been an area of rapid growth and importance within PyTorch; if any sparsity terms are confusing below,\nplease refer to the [sparsity tutorial](https://pytorch.org/docs/stable/sparse.html)_ for additional details.\n\nSparse storage formats have been proven to be powerful in a variety of ways. As a primer, the first use case\nmost practitioners think about is when the majority of elements are equal to zero (a high degree of sparsity),\nbut even in cases of lower sparsity, certain formats (e.g. BSR) can take advantage of substructures within a matrix.\n\n<div class=\"alert alert-info\"><h4>Note</h4><p>At the moment, MaskedTensor supports COO and CSR tensors with plans to support additional formats\n    (such as BSR and CSC) in the future. If you have any requests for additional formats,\n    please file a feature request [here](https://github.com/pytorch/pytorch/issues)_!</p></div>\n\n## Principles\n\nWhen creating a :class:`MaskedTensor` with sparse tensors, there are a few principles that must be observed:\n\n1. ``data`` and ``mask`` must have the same storage format, whether that's :attr:`torch.strided`, :attr:`torch.sparse_coo`, or :attr:`torch.sparse_csr`\n2. ``data`` and ``mask`` must have the same size, indicated by :func:`size()`\n\n\n## Sparse COO tensors\n\nIn accordance with Principle #1, a sparse COO MaskedTensor is created by passing in two sparse COO tensors,\nwhich can be initialized by any of its constructors, for example :func:`torch.sparse_coo_tensor`.\n\nAs a recap of [sparse COO tensors](https://pytorch.org/docs/stable/sparse.html#sparse-coo-tensors)_, the COO format\nstands for \"coordinate format\", where the specified elements are stored as tuples of their indices and the\ncorresponding values. That is, the following are provided:\n\n* ``indices``: array of size ``(ndim, nse)`` and dtype ``torch.int64``\n* ``values``: array of size `(nse,)` with any integer or floating point dtype\n\nwhere ``ndim`` is the dimensionality of the tensor and ``nse`` is the number of specified elements.\n\nFor both sparse COO and CSR tensors, you can construct a :class:`MaskedTensor` by doing either:\n\n1. ``masked_tensor(sparse_tensor_data, sparse_tensor_mask)``\n2. ``dense_masked_tensor.to_sparse_coo()`` or ``dense_masked_tensor.to_sparse_csr()``\n\nThe second method is easier to illustrate so we've shown that below, but for more on the first and the nuances behind\nthe approach, please read the `Sparse COO Appendix <sparse-coo-appendix>`.\n\n\n"
       ]
     },
     {
@@ -76,7 +76,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "### Binary\n[Binary operators](https://pytorch.org/docs/master/masked.html#unary-operators)_ are also supported, but the\ninput masks from the two masked tensors must match. For more information on why this decision was made, please\nfind our [MaskedTensor: Advanced Semantics tutorial](https://pytorch.org/tutorials/prototype/maskedtensor_advanced_semantics.html)_.\n\nPlease find an example below:\n\n\n"
+        "### Binary\n[Binary operators](https://pytorch.org/docs/master/masked.html#unary-operators)_ are also supported, but the\ninput masks from the two masked tensors must match. For more information on why this decision was made, please\nfind our [MaskedTensor: Advanced Semantics tutorial](https://tutorials.pytorch.kr/prototype/maskedtensor_advanced_semantics.html)_.\n\nPlease find an example below:\n\n\n"
       ]
     },
     {
@@ -296,7 +296,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "## Conclusion\nIn this tutorial, we have introduced how to use :class:`MaskedTensor` with sparse COO and CSR formats and\ndiscussed some of the subtleties under the hood in case users decide to access the underlying data structures\ndirectly. Sparse storage formats and masked semantics indeed have strong synergies, so much so that they are\nsometimes used as proxies for each other (as we will see in the next tutorial). In the future, we certainly plan\nto invest and continue developing in this direction.\n\n## Further Reading\n\nTo continue learning more, you can find our\n[Efficiently writing \"sparse\" semantics for Adagrad with MaskedTensor tutorial](https://pytorch.org/tutorials/prototype/maskedtensor_adagrad.html)_\nto see an example of how MaskedTensor can simplify existing workflows with native masking semantics.\n\n\n"
+        "## Conclusion\nIn this tutorial, we have introduced how to use :class:`MaskedTensor` with sparse COO and CSR formats and\ndiscussed some of the subtleties under the hood in case users decide to access the underlying data structures\ndirectly. Sparse storage formats and masked semantics indeed have strong synergies, so much so that they are\nsometimes used as proxies for each other (as we will see in the next tutorial). In the future, we certainly plan\nto invest and continue developing in this direction.\n\n## Further Reading\n\nTo continue learning more, you can find our\n[Efficiently writing \"sparse\" semantics for Adagrad with MaskedTensor tutorial](https://tutorials.pytorch.kr/prototype/maskedtensor_adagrad.html)_\nto see an example of how MaskedTensor can simplify existing workflows with native masking semantics.\n\n\n"
       ]
     }
   ],
diff --git a/docs/_downloads/25b0b65f5731eb063aab5b9034092772/maskedtensor_adagrad.py b/docs/_downloads/25b0b65f5731eb063aab5b9034092772/maskedtensor_adagrad.py
index 445da1e0e..97c7483f3 100644
--- a/docs/_downloads/25b0b65f5731eb063aab5b9034092772/maskedtensor_adagrad.py
+++ b/docs/_downloads/25b0b65f5731eb063aab5b9034092772/maskedtensor_adagrad.py
@@ -7,8 +7,8 @@
 
 ######################################################################
 # Before working through this tutorial, please review the MaskedTensor
-# `Overview <https://pytorch.org/tutorials/prototype/maskedtensor_overview.html>`__ and
-# `Sparsity <https://pytorch.org/tutorials/prototype/maskedtensor_sparsity.html>`__ tutorials.
+# `Overview <https://tutorials.pytorch.kr/prototype/maskedtensor_overview.html>`__ and
+# `Sparsity <https://tutorials.pytorch.kr/prototype/maskedtensor_sparsity.html>`__ tutorials.
 #
 # Introduction and Motivation
 # ---------------------------
@@ -212,7 +212,7 @@ def _make_sparse(grad, grad_indices, values):
 # ---------------
 #
 # To continue learning more, you can find our final review (for now) on
-# `MaskedTensor Advanced Semantics <https://pytorch.org/tutorials/prototype/maskedtensor_advanced_semantics.html>`__
+# `MaskedTensor Advanced Semantics <https://tutorials.pytorch.kr/prototype/maskedtensor_advanced_semantics.html>`__
 # to see some of the differences in design decisions between :class:`MaskedTensor` and NumPy's MaskedArray, as well
 # as reduction semantics.
 #
diff --git a/docs/_downloads/38991cbc7763ed7e0f1b711da737b391/tuning_guide.ipynb b/docs/_downloads/38991cbc7763ed7e0f1b711da737b391/tuning_guide.ipynb
index 15641278c..143452576 100644
--- a/docs/_downloads/38991cbc7763ed7e0f1b711da737b391/tuning_guide.ipynb
+++ b/docs/_downloads/38991cbc7763ed7e0f1b711da737b391/tuning_guide.ipynb
@@ -111,7 +111,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Enable channels_last memory format for computer vision models\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\nPyTorch 1.5 introduced support for ``channels_last`` memory format for\nconvolutional networks. This format is meant to be used in conjunction with\n`AMP <https://pytorch.org/docs/stable/amp.html>`_ to further accelerate\nconvolutional neural networks with\n`Tensor Cores <https://www.nvidia.com/en-us/data-center/tensor-cores/>`_.\n\nSupport for ``channels_last`` is experimental, but it's expected to work for\nstandard computer vision models (e.g. ResNet-50, SSD). To convert models to\n``channels_last`` format follow\n`Channels Last Memory Format Tutorial <https://pytorch.org/tutorials/intermediate/memory_format_tutorial.html>`_.\nThe tutorial includes a section on\n`converting existing models <https://pytorch.org/tutorials/intermediate/memory_format_tutorial.html#converting-existing-models>`_.\n\n"
+        "Enable channels_last memory format for computer vision models\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\nPyTorch 1.5 introduced support for ``channels_last`` memory format for\nconvolutional networks. This format is meant to be used in conjunction with\n`AMP <https://pytorch.org/docs/stable/amp.html>`_ to further accelerate\nconvolutional neural networks with\n`Tensor Cores <https://www.nvidia.com/en-us/data-center/tensor-cores/>`_.\n\nSupport for ``channels_last`` is experimental, but it's expected to work for\nstandard computer vision models (e.g. ResNet-50, SSD). To convert models to\n``channels_last`` format follow\n`Channels Last Memory Format Tutorial <https://tutorials.pytorch.kr/intermediate/memory_format_tutorial.html>`_.\nThe tutorial includes a section on\n`converting existing models <https://tutorials.pytorch.kr/intermediate/memory_format_tutorial.html#converting-existing-models>`_.\n\n"
       ]
     },
     {
@@ -368,7 +368,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Use mixed precision and AMP\n~~~~~~~~~~~~~~~~~~~~~~~~~~~\nMixed precision leverages\n`Tensor Cores <https://www.nvidia.com/en-us/data-center/tensor-cores/>`_\nand offers up to 3x overall speedup on Volta and newer GPU architectures. To\nuse Tensor Cores AMP should be enabled and matrix/tensor dimensions should\nsatisfy requirements for calling kernels that use Tensor Cores.\n\nTo use Tensor Cores:\n\n* set sizes to multiples of 8 (to map onto dimensions of Tensor Cores)\n\n  * see\n    `Deep Learning Performance Documentation\n    <https://docs.nvidia.com/deeplearning/performance/index.html#optimizing-performance>`_\n    for more details and guidelines specific to layer type\n  * if layer size is derived from other parameters rather than fixed, it can\n    still be explicitly padded e.g. vocabulary size in NLP models\n\n* enable AMP\n\n  * Introduction to Mixed Precision Training and AMP:\n    `video <https://www.youtube.com/watch?v=jF4-_ZK_tyc&feature=youtu.be>`_,\n    `slides <https://nvlabs.github.io/eccv2020-mixed-precision-tutorial/files/dusan_stosic-training-neural-networks-with-tensor-cores.pdf>`_\n  * native PyTorch AMP is available starting from PyTorch 1.6:\n    `documentation <https://pytorch.org/docs/stable/amp.html>`_,\n    `examples <https://pytorch.org/docs/stable/notes/amp_examples.html#amp-examples>`_,\n    `tutorial <https://pytorch.org/tutorials/recipes/recipes/amp_recipe.html>`_\n\n\n\n"
+        "Use mixed precision and AMP\n~~~~~~~~~~~~~~~~~~~~~~~~~~~\nMixed precision leverages\n`Tensor Cores <https://www.nvidia.com/en-us/data-center/tensor-cores/>`_\nand offers up to 3x overall speedup on Volta and newer GPU architectures. To\nuse Tensor Cores AMP should be enabled and matrix/tensor dimensions should\nsatisfy requirements for calling kernels that use Tensor Cores.\n\nTo use Tensor Cores:\n\n* set sizes to multiples of 8 (to map onto dimensions of Tensor Cores)\n\n  * see\n    `Deep Learning Performance Documentation\n    <https://docs.nvidia.com/deeplearning/performance/index.html#optimizing-performance>`_\n    for more details and guidelines specific to layer type\n  * if layer size is derived from other parameters rather than fixed, it can\n    still be explicitly padded e.g. vocabulary size in NLP models\n\n* enable AMP\n\n  * Introduction to Mixed Precision Training and AMP:\n    `video <https://www.youtube.com/watch?v=jF4-_ZK_tyc&feature=youtu.be>`_,\n    `slides <https://nvlabs.github.io/eccv2020-mixed-precision-tutorial/files/dusan_stosic-training-neural-networks-with-tensor-cores.pdf>`_\n  * native PyTorch AMP is available starting from PyTorch 1.6:\n    `documentation <https://pytorch.org/docs/stable/amp.html>`_,\n    `examples <https://pytorch.org/docs/stable/notes/amp_examples.html#amp-examples>`_,\n    `tutorial <https://tutorials.pytorch.kr/recipes/recipes/amp_recipe.html>`_\n\n\n\n"
       ]
     },
     {
diff --git a/docs/_downloads/4b012086d592d0c5d81c7f66a6259343/maskedtensor_advanced_semantics.ipynb b/docs/_downloads/4b012086d592d0c5d81c7f66a6259343/maskedtensor_advanced_semantics.ipynb
index c2b3a23fa..bf5782669 100644
--- a/docs/_downloads/4b012086d592d0c5d81c7f66a6259343/maskedtensor_advanced_semantics.ipynb
+++ b/docs/_downloads/4b012086d592d0c5d81c7f66a6259343/maskedtensor_advanced_semantics.ipynb
@@ -22,7 +22,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Before working on this tutorial, please make sure to review our\n`MaskedTensor Overview tutorial <https://pytorch.org/tutorials/prototype/maskedtensor_overview.html>`.\n\nThe purpose of this tutorial is to help users understand how some of the advanced semantics work\nand how they came to be. We will focus on two particular ones:\n\n*. Differences between MaskedTensor and [NumPy's MaskedArray](https://numpy.org/doc/stable/reference/maskedarray.html)_  \n*. Reduction semantics\n\n## Preparation\n\n\n"
+        "Before working on this tutorial, please make sure to review our\n`MaskedTensor Overview tutorial <https://tutorials.pytorch.kr/prototype/maskedtensor_overview.html>`.\n\nThe purpose of this tutorial is to help users understand how some of the advanced semantics work\nand how they came to be. We will focus on two particular ones:\n\n*. Differences between MaskedTensor and [NumPy's MaskedArray](https://numpy.org/doc/stable/reference/maskedarray.html)_  \n*. Reduction semantics\n\n## Preparation\n\n\n"
       ]
     },
     {
@@ -94,7 +94,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Note that the mask is `mt0.get_mask() & mt1.get_mask()` since :class:`MaskedTensor`'s mask is the inverse of NumPy's.\n\n\n## Reduction Semantics\n\nRecall in [MaskedTensor's Overview tutorial](https://pytorch.org/tutorials/prototype/maskedtensor_overview.html)_\nwe discussed \"Implementing missing torch.nan* ops\". Those are examples of reductions -- operators that remove one\n(or more) dimensions from a Tensor and then aggregate the result. In this section, we will use reduction semantics\nto motivate our strict requirements around matching masks from above.\n\nFundamentally, :class:`MaskedTensor`s perform the same reduction operation while ignoring the masked out\n(unspecified) values. By way of example:\n\n\n"
+        "Note that the mask is `mt0.get_mask() & mt1.get_mask()` since :class:`MaskedTensor`'s mask is the inverse of NumPy's.\n\n\n## Reduction Semantics\n\nRecall in [MaskedTensor's Overview tutorial](https://tutorials.pytorch.kr/prototype/maskedtensor_overview.html)_\nwe discussed \"Implementing missing torch.nan* ops\". Those are examples of reductions -- operators that remove one\n(or more) dimensions from a Tensor and then aggregate the result. In this section, we will use reduction semantics\nto motivate our strict requirements around matching masks from above.\n\nFundamentally, :class:`MaskedTensor`s perform the same reduction operation while ignoring the masked out\n(unspecified) values. By way of example:\n\n\n"
       ]
     },
     {
diff --git a/docs/_downloads/5b467300752c6d9599d51103ddd06f4b/maskedtensor_overview.py b/docs/_downloads/5b467300752c6d9599d51103ddd06f4b/maskedtensor_overview.py
index 288286936..e65e86d41 100644
--- a/docs/_downloads/5b467300752c6d9599d51103ddd06f4b/maskedtensor_overview.py
+++ b/docs/_downloads/5b467300752c6d9599d51103ddd06f4b/maskedtensor_overview.py
@@ -328,6 +328,6 @@
 # ===============
 #
 # To continue learning more, you can find our
-# `MaskedTensor Sparsity tutorial <https://pytorch.org/tutorials/prototype/maskedtensor_sparsity.html>`__
+# `MaskedTensor Sparsity tutorial <https://tutorials.pytorch.kr/prototype/maskedtensor_sparsity.html>`__
 # to see how MaskedTensor enables sparsity and the different storage formats we currently support.
 #
diff --git a/docs/_downloads/7f8a3da4497ba6bec12ef2e8d4d82051/maskedtensor_overview.ipynb b/docs/_downloads/7f8a3da4497ba6bec12ef2e8d4d82051/maskedtensor_overview.ipynb
index b6850cd6d..53f1a216a 100644
--- a/docs/_downloads/7f8a3da4497ba6bec12ef2e8d4d82051/maskedtensor_overview.ipynb
+++ b/docs/_downloads/7f8a3da4497ba6bec12ef2e8d4d82051/maskedtensor_overview.ipynb
@@ -343,7 +343,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "This is a similar problem to safe softmax where `0/0 = nan` when what we really want is an undefined value.\n\n## Conclusion\n\nIn this tutorial, we've introduced what MaskedTensors are, demonstrated how to use them, and motivated their\nvalue through a series of examples and issues that they've helped resolve.\n\n## Further Reading\n\nTo continue learning more, you can find our\n[MaskedTensor Sparsity tutorial](https://pytorch.org/tutorials/prototype/maskedtensor_sparsity.html)_\nto see how MaskedTensor enables sparsity and the different storage formats we currently support.\n\n\n"
+        "This is a similar problem to safe softmax where `0/0 = nan` when what we really want is an undefined value.\n\n## Conclusion\n\nIn this tutorial, we've introduced what MaskedTensors are, demonstrated how to use them, and motivated their\nvalue through a series of examples and issues that they've helped resolve.\n\n## Further Reading\n\nTo continue learning more, you can find our\n[MaskedTensor Sparsity tutorial](https://tutorials.pytorch.kr/prototype/maskedtensor_sparsity.html)_\nto see how MaskedTensor enables sparsity and the different storage formats we currently support.\n\n\n"
       ]
     }
   ],
diff --git a/docs/_downloads/88355d650eb3d5ee6afedaebb57fb9b3/modelsyt_tutorial.py b/docs/_downloads/88355d650eb3d5ee6afedaebb57fb9b3/modelsyt_tutorial.py
index 884fcbdb1..8126ce841 100644
--- a/docs/_downloads/88355d650eb3d5ee6afedaebb57fb9b3/modelsyt_tutorial.py
+++ b/docs/_downloads/88355d650eb3d5ee6afedaebb57fb9b3/modelsyt_tutorial.py
@@ -46,15 +46,15 @@ class is a subclass of ``torch.Tensor``, with the special behavior that
 import torch
 
 class TinyModel(torch.nn.Module):
-    
+
     def __init__(self):
         super(TinyModel, self).__init__()
-        
+
         self.linear1 = torch.nn.Linear(100, 200)
         self.activation = torch.nn.ReLU()
         self.linear2 = torch.nn.Linear(200, 10)
         self.softmax = torch.nn.Softmax()
-    
+
     def forward(self, x):
         x = self.linear1(x)
         x = self.activation(x)
@@ -85,19 +85,19 @@ def forward(self, x):
 # model, and a ``forward()`` method where the computation gets done. Note
 # that we can print the model, or any of its submodules, to learn about
 # its structure.
-# 
+#
 # Common Layer Types
 # ------------------
-# 
+#
 # Linear Layers
 # ~~~~~~~~~~~~~
-# 
+#
 # The most basic type of neural network layer is a *linear* or *fully
 # connected* layer. This is a layer where every input influences every
 # output of the layer to a degree specified by the layer’s weights. If a
 # model has *m* inputs and *n* outputs, the weights will be an *m* x *n*
 # matrix. For example:
-# 
+#
 
 lin = torch.nn.Linear(3, 2)
 x = torch.rand(1, 3)
@@ -117,22 +117,22 @@ def forward(self, x):
 # If you do the matrix multiplication of ``x`` by the linear layer’s
 # weights, and add the biases, you’ll find that you get the output vector
 # ``y``.
-# 
+#
 # One other important feature to note: When we checked the weights of our
 # layer with ``lin.weight``, it reported itself as a ``Parameter`` (which
 # is a subclass of ``Tensor``), and let us know that it’s tracking
 # gradients with autograd. This is a default behavior for ``Parameter``
 # that differs from ``Tensor``.
-# 
+#
 # Linear layers are used widely in deep learning models. One of the most
 # common places you’ll see them is in classifier models, which will
 # usually have one or more linear layers at the end, where the last layer
 # will have *n* outputs, where *n* is the number of classes the classifier
 # addresses.
-# 
+#
 # Convolutional Layers
 # ~~~~~~~~~~~~~~~~~~~~
-# 
+#
 # *Convolutional* layers are built to handle data with a high degree of
 # spatial correlation. They are very commonly used in computer vision,
 # where they detect close groupings of features which the compose into
@@ -140,9 +140,9 @@ def forward(self, x):
 # in NLP applications, where a word’s immediate context (that is, the
 # other words nearby in the sequence) can affect the meaning of a
 # sentence.
-# 
+#
 # We saw convolutional layers in action in LeNet5 in an earlier video:
-# 
+#
 
 import torch.functional as F
 
@@ -182,7 +182,7 @@ def num_flat_features(self, x):
 ##########################################################################
 # Let’s break down what’s happening in the convolutional layers of this
 # model. Starting with ``conv1``:
-# 
+#
 # -  LeNet5 is meant to take in a 1x32x32 black & white image. **The first
 #    argument to a convolutional layer’s constructor is the number of
 #    input channels.** Here, it is 1. If we were building this model to
@@ -198,14 +198,14 @@ def num_flat_features(self, x):
 #    size.** Here, the “5” means we’ve chosen a 5x5 kernel. (If you want a
 #    kernel with height different from width, you can specify a tuple for
 #    this argument - e.g., ``(3, 5)`` to get a 3x5 convolution kernel.)
-# 
+#
 # The output of a convolutional layer is an *activation map* - a spatial
 # representation of the presence of features in the input tensor.
 # ``conv1`` will give us an output tensor of 6x28x28; 6 is the number of
 # features, and 28 is the height and width of our map. (The 28 comes from
 # the fact that when scanning a 5-pixel window over a 32-pixel row, there
 # are only 28 valid positions.)
-# 
+#
 # We then pass the output of the convolution through a ReLU activation
 # function (more on activation functions later), then through a max
 # pooling layer. The max pooling layer takes features near each other in
@@ -214,14 +214,14 @@ def num_flat_features(self, x):
 # cell, and assigning that cell the maximum value of the 4 cells that went
 # into it. This gives us a lower-resolution version of the activation map,
 # with dimensions 6x14x14.
-# 
+#
 # Our next convolutional layer, ``conv2``, expects 6 input channels
 # (corresponding to the 6 features sought by the first layer), has 16
 # output channels, and a 3x3 kernel. It puts out a 16x12x12 activation
 # map, which is again reduced by a max pooling layer to 16x6x6. Prior to
 # passing this output to the linear layers, it is reshaped to a 16 \* 6 \*
 # 6 = 576-element vector for consumption by the next layer.
-# 
+#
 # There are convolutional layers for addressing 1D, 2D, and 3D tensors.
 # There are also many more optional arguments for a conv layer
 # constructor, including stride length(e.g., only scanning every second or
@@ -229,22 +229,22 @@ def num_flat_features(self, x):
 # edges of the input), and more. See the
 # `documentation <https://pytorch.org/docs/stable/nn.html#convolution-layers>`__
 # for more information.
-# 
+#
 # Recurrent Layers
 # ~~~~~~~~~~~~~~~~
-# 
+#
 # *Recurrent neural networks* (or *RNNs)* are used for sequential data -
 # anything from time-series measurements from a scientific instrument to
 # natural language sentences to DNA nucleotides. An RNN does this by
 # maintaining a *hidden state* that acts as a sort of memory for what it
 # has seen in the sequence so far.
-# 
+#
 # The internal structure of an RNN layer - or its variants, the LSTM (long
 # short-term memory) and GRU (gated recurrent unit) - is moderately
 # complex and beyond the scope of this video, but we’ll show you what one
 # looks like in action with an LSTM-based part-of-speech tagger (a type of
 # classifier that tells you if a word is a noun, verb, etc.):
-# 
+#
 
 class LSTMTagger(torch.nn.Module):
 
@@ -271,7 +271,7 @@ def forward(self, sentence):
 
 ########################################################################
 # The constructor has four arguments:
-# 
+#
 # -  ``vocab_size`` is the number of words in the input vocabulary. Each
 #    word is a one-hot vector (or unit vector) in a
 #    ``vocab_size``-dimensional space.
@@ -281,7 +281,7 @@ def forward(self, sentence):
 #    space, where words with similar meanings are close together in the
 #    space.
 # -  ``hidden_dim`` is the size of the LSTM’s memory.
-# 
+#
 # The input will be a sentence with the words represented as indices of
 # one-hot vectors. The embedding layer will then map these down to an
 # ``embedding_dim``-dimensional space. The LSTM takes this sequence of
@@ -290,15 +290,15 @@ def forward(self, sentence):
 # ``log_softmax()`` to the output of the final layer converts the output
 # into a normalized set of estimated probabilities that a given word maps
 # to a given tag.
-# 
+#
 # If you’d like to see this network in action, check out the `Sequence
 # Models and LSTM
-# Networks <https://pytorch.org/tutorials/beginner/nlp/sequence_models_tutorial.html>`__
+# Networks <https://tutorials.pytorch.kr/beginner/nlp/sequence_models_tutorial.html>`__
 # tutorial on pytorch.org.
-# 
+#
 # Transformers
 # ~~~~~~~~~~~~
-# 
+#
 # *Transformers* are multi-purpose networks that have taken over the state
 # of the art in NLP with models like BERT. A discussion of transformer
 # architecture is beyond the scope of this video, but PyTorch has a
@@ -312,22 +312,22 @@ def forward(self, sentence):
 # ``TransformerDecoderLayer``). For details, check out the
 # `documentation <https://pytorch.org/docs/stable/nn.html#transformer-layers>`__
 # on transformer classes, and the relevant
-# `tutorial <https://pytorch.org/tutorials/beginner/transformer_tutorial.html>`__
+# `tutorial <https://tutorials.pytorch.kr/beginner/transformer_tutorial.html>`__
 # on pytorch.org.
-# 
+#
 # Other Layers and Functions
 # --------------------------
-# 
+#
 # Data Manipulation Layers
 # ~~~~~~~~~~~~~~~~~~~~~~~~
-# 
+#
 # There are other layer types that perform important functions in models,
 # but don’t participate in the learning process themselves.
-# 
+#
 # **Max pooling** (and its twin, min pooling) reduce a tensor by combining
 # cells, and assigning the maximum value of the input cells to the output
 # cell (we saw this). For example:
-# 
+#
 
 my_tensor = torch.rand(1, 6, 6)
 print(my_tensor)
@@ -340,12 +340,12 @@ def forward(self, sentence):
 # If you look closely at the values above, you’ll see that each of the
 # values in the maxpooled output is the maximum value of each quadrant of
 # the 6x6 input.
-# 
+#
 # **Normalization layers** re-center and normalize the output of one layer
 # before feeding it to another. Centering the and scaling the intermediate
 # tensors has a number of beneficial effects, such as letting you use
 # higher learning rates without exploding/vanishing gradients.
-# 
+#
 
 my_tensor = torch.rand(1, 4, 4) * 20 + 5
 print(my_tensor)
@@ -366,22 +366,22 @@ def forward(self, sentence):
 # in the neighborhood of 15. After running it through the normalization
 # layer, you can see that the values are smaller, and grouped around zero
 # - in fact, the mean should be very small (> 1e-8).
-# 
+#
 # This is beneficial because many activation functions (discussed below)
 # have their strongest gradients near 0, but sometimes suffer from
 # vanishing or exploding gradients for inputs that drive them far away
 # from zero. Keeping the data centered around the area of steepest
 # gradient will tend to mean faster, better learning and higher feasible
 # learning rates.
-# 
+#
 # **Dropout layers** are a tool for encouraging *sparse representations*
 # in your model - that is, pushing it to do inference with less data.
-# 
+#
 # Dropout layers work by randomly setting parts of the input tensor
 # *during training* - dropout layers are always turned off for inference.
 # This forces the model to learn against this masked or reduced dataset.
 # For example:
-# 
+#
 
 my_tensor = torch.rand(1, 4, 4)
 
@@ -394,10 +394,10 @@ def forward(self, sentence):
 # Above, you can see the effect of dropout on a sample tensor. You can use
 # the optional ``p`` argument to set the probability of an individual
 # weight dropping out; if you don’t it defaults to 0.5.
-# 
+#
 # Activation Functions
 # ~~~~~~~~~~~~~~~~~~~~
-# 
+#
 # Activation functions make deep learning possible. A neural network is
 # really a program - with many parameters - that *simulates a mathematical
 # function*. If all we did was multiple tensors by layer weights
@@ -406,17 +406,17 @@ def forward(self, sentence):
 # reduce could be reduced to a single matrix multiplication. Inserting
 # *non-linear* activation functions between layers is what allows a deep
 # learning model to simulate any function, rather than just linear ones.
-# 
+#
 # ``torch.nn.Module`` has objects encapsulating all of the major
 # activation functions including ReLU and its many variants, Tanh,
 # Hardtanh, sigmoid, and more. It also includes other functions, such as
 # Softmax, that are most useful at the output stage of a model.
-# 
+#
 # Loss Functions
 # ~~~~~~~~~~~~~~
-# 
+#
 # Loss functions tell us how far a model’s prediction is from the correct
 # answer. PyTorch contains a variety of loss functions, including common
 # MSE (mean squared error = L2 norm), Cross Entropy Loss and Negative
 # Likelihood Loss (useful for classifiers), and others.
-# 
+#
diff --git a/docs/_downloads/8c575aa36ad9a61584ec0ddf11cbe84d/fx_profiling_tutorial.py b/docs/_downloads/8c575aa36ad9a61584ec0ddf11cbe84d/fx_profiling_tutorial.py
index 06726e4dd..8b4faf72f 100644
--- a/docs/_downloads/8c575aa36ad9a61584ec0ddf11cbe84d/fx_profiling_tutorial.py
+++ b/docs/_downloads/8c575aa36ad9a61584ec0ddf11cbe84d/fx_profiling_tutorial.py
@@ -218,7 +218,7 @@ def summary(self, should_sort : bool = False) -> str:
 #   https://github.com/pytorch/pytorch/issues/51393
 # * BatchNorm2d also takes up significant time. We can continue this
 #   line of thinking and optimize this in the Conv-BN Fusion with FX
-#   `tutorial <https://pytorch.org/tutorials/intermediate/fx_conv_bn_fuser.html>`_. 
+#   `tutorial <https://tutorials.pytorch.kr/intermediate/fx_conv_bn_fuser.html>`_.
 #
 #
 # Conclusion
diff --git a/docs/_downloads/8c82db84c10318a94cbe213adb618139/tuning_guide.py b/docs/_downloads/8c82db84c10318a94cbe213adb618139/tuning_guide.py
index 86d0d4cf4..b969fea32 100644
--- a/docs/_downloads/8c82db84c10318a94cbe213adb618139/tuning_guide.py
+++ b/docs/_downloads/8c82db84c10318a94cbe213adb618139/tuning_guide.py
@@ -137,9 +137,9 @@ def fused_gelu(x):
 # Support for ``channels_last`` is experimental, but it's expected to work for
 # standard computer vision models (e.g. ResNet-50, SSD). To convert models to
 # ``channels_last`` format follow
-# `Channels Last Memory Format Tutorial <https://pytorch.org/tutorials/intermediate/memory_format_tutorial.html>`_.
+# `Channels Last Memory Format Tutorial <https://tutorials.pytorch.kr/intermediate/memory_format_tutorial.html>`_.
 # The tutorial includes a section on
-# `converting existing models <https://pytorch.org/tutorials/intermediate/memory_format_tutorial.html#converting-existing-models>`_.
+# `converting existing models <https://tutorials.pytorch.kr/intermediate/memory_format_tutorial.html#converting-existing-models>`_.
 
 ###############################################################################
 # Checkpoint intermediate buffers
@@ -363,7 +363,7 @@ def fused_gelu(x):
 #   * native PyTorch AMP is available starting from PyTorch 1.6:
 #     `documentation <https://pytorch.org/docs/stable/amp.html>`_,
 #     `examples <https://pytorch.org/docs/stable/notes/amp_examples.html#amp-examples>`_,
-#     `tutorial <https://pytorch.org/tutorials/recipes/recipes/amp_recipe.html>`_
+#     `tutorial <https://tutorials.pytorch.kr/recipes/recipes/amp_recipe.html>`_
 #
 #
 
diff --git a/docs/_downloads/939d5d91f629a3f2d79e69156ffbcaad/maskedtensor_sparsity.py b/docs/_downloads/939d5d91f629a3f2d79e69156ffbcaad/maskedtensor_sparsity.py
index 74024f8e2..0ef0b8f5b 100644
--- a/docs/_downloads/939d5d91f629a3f2d79e69156ffbcaad/maskedtensor_sparsity.py
+++ b/docs/_downloads/939d5d91f629a3f2d79e69156ffbcaad/maskedtensor_sparsity.py
@@ -7,7 +7,7 @@
 
 ######################################################################
 # Before working on this tutorial, please make sure to review our
-# `MaskedTensor Overview tutorial <https://pytorch.org/tutorials/prototype/maskedtensor_overview.html>`.
+# `MaskedTensor Overview tutorial <https://tutorials.pytorch.kr/prototype/maskedtensor_overview.html>`.
 #
 # Introduction
 # ------------
@@ -117,7 +117,7 @@
 # ^^^^^^
 # `Binary operators <https://pytorch.org/docs/master/masked.html#unary-operators>`__ are also supported, but the
 # input masks from the two masked tensors must match. For more information on why this decision was made, please
-# find our `MaskedTensor: Advanced Semantics tutorial <https://pytorch.org/tutorials/prototype/maskedtensor_advanced_semantics.html>`__.
+# find our `MaskedTensor: Advanced Semantics tutorial <https://tutorials.pytorch.kr/prototype/maskedtensor_advanced_semantics.html>`__.
 #
 # Please find an example below:
 #
@@ -310,6 +310,6 @@
 # ---------------
 #
 # To continue learning more, you can find our
-# `Efficiently writing "sparse" semantics for Adagrad with MaskedTensor tutorial <https://pytorch.org/tutorials/prototype/maskedtensor_adagrad.html>`__
+# `Efficiently writing "sparse" semantics for Adagrad with MaskedTensor tutorial <https://tutorials.pytorch.kr/prototype/maskedtensor_adagrad.html>`__
 # to see an example of how MaskedTensor can simplify existing workflows with native masking semantics.
 #
diff --git a/docs/_downloads/945dab6b984b8789385e32187d4a8964/fx_profiling_tutorial.ipynb b/docs/_downloads/945dab6b984b8789385e32187d4a8964/fx_profiling_tutorial.ipynb
index 5c4727071..495ee721c 100644
--- a/docs/_downloads/945dab6b984b8789385e32187d4a8964/fx_profiling_tutorial.ipynb
+++ b/docs/_downloads/945dab6b984b8789385e32187d4a8964/fx_profiling_tutorial.ipynb
@@ -158,7 +158,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "There are two things we should call out here:\n\n* MaxPool2d takes up the most time. This is a known issue:\n  https://github.com/pytorch/pytorch/issues/51393\n* BatchNorm2d also takes up significant time. We can continue this\n  line of thinking and optimize this in the Conv-BN Fusion with FX\n  [tutorial](https://pytorch.org/tutorials/intermediate/fx_conv_bn_fuser.html). \n\n\n## Conclusion\nAs we can see, using FX we can easily capture PyTorch programs (even\nones we don't have the source code for!) in a machine-interpretable\nformat and use that for analysis, such as the performance analysis\nwe've done here. FX opens up an exiciting world of possibilities for\nworking with PyTorch programs.\n\nFinally, since FX is still in beta, we would be happy to hear any\nfeedback you have about using it. Please feel free to use the\nPyTorch Forums (https://discuss.pytorch.org/) and the issue tracker\n(https://github.com/pytorch/pytorch/issues) to provide any feedback\nyou might have.\n\n"
+        "There are two things we should call out here:\n\n* MaxPool2d takes up the most time. This is a known issue:\n  https://github.com/pytorch/pytorch/issues/51393\n* BatchNorm2d also takes up significant time. We can continue this\n  line of thinking and optimize this in the Conv-BN Fusion with FX\n  [tutorial](https://tutorials.pytorch.kr/intermediate/fx_conv_bn_fuser.html). \n\n\n## Conclusion\nAs we can see, using FX we can easily capture PyTorch programs (even\nones we don't have the source code for!) in a machine-interpretable\nformat and use that for analysis, such as the performance analysis\nwe've done here. FX opens up an exiciting world of possibilities for\nworking with PyTorch programs.\n\nFinally, since FX is still in beta, we would be happy to hear any\nfeedback you have about using it. Please feel free to use the\nPyTorch Forums (https://discuss.pytorch.org/) and the issue tracker\n(https://github.com/pytorch/pytorch/issues) to provide any feedback\nyou might have.\n\n"
       ]
     }
   ],
diff --git a/docs/_downloads/9c494adfa705afca5e6375d6cb339a14/maskedtensor_adagrad.ipynb b/docs/_downloads/9c494adfa705afca5e6375d6cb339a14/maskedtensor_adagrad.ipynb
index 537d92c70..7f58469f1 100644
--- a/docs/_downloads/9c494adfa705afca5e6375d6cb339a14/maskedtensor_adagrad.ipynb
+++ b/docs/_downloads/9c494adfa705afca5e6375d6cb339a14/maskedtensor_adagrad.ipynb
@@ -22,7 +22,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Before working through this tutorial, please review the MaskedTensor\n[Overview](https://pytorch.org/tutorials/prototype/maskedtensor_overview.html)_ and\n[Sparsity](https://pytorch.org/tutorials/prototype/maskedtensor_sparsity.html)_ tutorials.\n\n## Introduction and Motivation\n[Issue 1369](https://github.com/pytorch/pytorch/issues/1369)_ discussed the additional lines of code\nthat were introduced while writing \"sparse\" semantics for Adagrad, but really,\nthe code uses sparsity as a proxy for masked semantics rather than the intended use case of sparsity:\na compression and optimization technique.\nPreviously, we worked around the lack of formal masked semantics by introducing one-off semantics and operators\nwhile forcing users to be aware of storage details such as indices and values.\n\nNow that we have masked semantics, we are better equipped to point out when sparsity is used as a semantic extension.\nWe'll also compare and contrast this with equivalent code written using MaskedTensor.\nIn the end the code snippets are repeated without additional comments to show the difference in brevity.\n\n## Preparation\n\n\n"
+        "Before working through this tutorial, please review the MaskedTensor\n[Overview](https://tutorials.pytorch.kr/prototype/maskedtensor_overview.html)_ and\n[Sparsity](https://tutorials.pytorch.kr/prototype/maskedtensor_sparsity.html)_ tutorials.\n\n## Introduction and Motivation\n[Issue 1369](https://github.com/pytorch/pytorch/issues/1369)_ discussed the additional lines of code\nthat were introduced while writing \"sparse\" semantics for Adagrad, but really,\nthe code uses sparsity as a proxy for masked semantics rather than the intended use case of sparsity:\na compression and optimization technique.\nPreviously, we worked around the lack of formal masked semantics by introducing one-off semantics and operators\nwhile forcing users to be aware of storage details such as indices and values.\n\nNow that we have masked semantics, we are better equipped to point out when sparsity is used as a semantic extension.\nWe'll also compare and contrast this with equivalent code written using MaskedTensor.\nIn the end the code snippets are repeated without additional comments to show the difference in brevity.\n\n## Preparation\n\n\n"
       ]
     },
     {
@@ -123,7 +123,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "## Conclusion\n\nIn this tutorial, we've discussed how native masked semantics can enable a cleaner developer experience for\nAdagrad's existing implementation in PyTorch, which used sparsity as a proxy for writing masked semantics.\nBut more importantly, allowing masked semantics to be a first class citizen through MaskedTensor\nremoves the reliance on sparsity or unreliable hacks to mimic masking, thereby allowing for proper independence\nand development, while enabling sparse semantics, such as this one.\n\n## Further Reading\n\nTo continue learning more, you can find our final review (for now) on\n[MaskedTensor Advanced Semantics](https://pytorch.org/tutorials/prototype/maskedtensor_advanced_semantics.html)_\nto see some of the differences in design decisions between :class:`MaskedTensor` and NumPy's MaskedArray, as well\nas reduction semantics.\n\n\n"
+        "## Conclusion\n\nIn this tutorial, we've discussed how native masked semantics can enable a cleaner developer experience for\nAdagrad's existing implementation in PyTorch, which used sparsity as a proxy for writing masked semantics.\nBut more importantly, allowing masked semantics to be a first class citizen through MaskedTensor\nremoves the reliance on sparsity or unreliable hacks to mimic masking, thereby allowing for proper independence\nand development, while enabling sparse semantics, such as this one.\n\n## Further Reading\n\nTo continue learning more, you can find our final review (for now) on\n[MaskedTensor Advanced Semantics](https://tutorials.pytorch.kr/prototype/maskedtensor_advanced_semantics.html)_\nto see some of the differences in design decisions between :class:`MaskedTensor` and NumPy's MaskedArray, as well\nas reduction semantics.\n\n\n"
       ]
     }
   ],
diff --git a/docs/_downloads/ba6d64f1f8bd0d6b3c21839705dc840a/tensorboardyt_tutorial.py b/docs/_downloads/ba6d64f1f8bd0d6b3c21839705dc840a/tensorboardyt_tutorial.py
index 3b07fe0b8..7903e898f 100644
--- a/docs/_downloads/ba6d64f1f8bd0d6b3c21839705dc840a/tensorboardyt_tutorial.py
+++ b/docs/_downloads/ba6d64f1f8bd0d6b3c21839705dc840a/tensorboardyt_tutorial.py
@@ -39,11 +39,11 @@
 
 Introduction
 ------------
- 
+
 In this notebook, we’ll be training a variant of LeNet-5 against the
 Fashion-MNIST dataset. Fashion-MNIST is a set of image tiles depicting
 various garments, with ten class labels indicating the type of garment
-depicted. 
+depicted.
 
 """
 
@@ -68,9 +68,9 @@
 ######################################################################
 # Showing Images in TensorBoard
 # -----------------------------
-# 
+#
 # Let’s start by adding sample images from our dataset to TensorBoard:
-# 
+#
 
 # Gather datasets and prepare them for consumption
 transform = transforms.Compose(
@@ -127,7 +127,7 @@ def matplotlib_imshow(img, one_channel=False):
 # minibatch of our input data. Below, we use the ``add_image()`` call on
 # ``SummaryWriter`` to log the image for consumption by TensorBoard, and
 # we also call ``flush()`` to make sure it’s written to disk right away.
-# 
+#
 
 # Default log_dir argument is "runs" - but it's good to be specific
 # torch.utils.tensorboard.SummaryWriter is imported above
@@ -146,17 +146,17 @@ def matplotlib_imshow(img, one_channel=False):
 # If you start TensorBoard at the command line and open it in a new
 # browser tab (usually at `localhost:6006 <localhost:6006>`__), you should
 # see the image grid under the IMAGES tab.
-# 
+#
 # Graphing Scalars to Visualize Training
 # --------------------------------------
-# 
+#
 # TensorBoard is useful for tracking the progress and efficacy of your
 # training. Below, we’ll run a training loop, track some metrics, and save
 # the data for TensorBoard’s consumption.
-# 
+#
 # Let’s define a model to categorize our image tiles, and an optimizer and
 # loss function for training:
-# 
+#
 
 class Net(nn.Module):
     def __init__(self):
@@ -176,7 +176,7 @@ def forward(self, x):
         x = F.relu(self.fc2(x))
         x = self.fc3(x)
         return x
-    
+
 
 net = Net()
 criterion = nn.CrossEntropyLoss()
@@ -186,7 +186,7 @@ def forward(self, x):
 ##########################################################################
 # Now let’s train a single epoch, and evaluate the training vs. validation
 # set losses every 1000 batches:
-# 
+#
 
 print(len(validation_loader))
 for epoch in range(1):  # loop over the dataset multiple times
@@ -206,7 +206,7 @@ def forward(self, x):
             print('Batch {}'.format(i + 1))
             # Check against the validation set
             running_vloss = 0.0
-            
+
             net.train(False) # Don't need to track gradents for validation
             for j, vdata in enumerate(validation_loader, 0):
                 vinputs, vlabels = vdata
@@ -214,10 +214,10 @@ def forward(self, x):
                 vloss = criterion(voutputs, vlabels)
                 running_vloss += vloss.item()
             net.train(True) # Turn gradients back on for training
-            
+
             avg_loss = running_loss / 1000
             avg_vloss = running_vloss / len(validation_loader)
-            
+
             # Log the running loss averaged per batch
             writer.add_scalars('Training vs. Validation Loss',
                             { 'Training' : avg_loss, 'Validation' : avg_vloss },
@@ -231,14 +231,14 @@ def forward(self, x):
 
 #########################################################################
 # Switch to your open TensorBoard and have a look at the SCALARS tab.
-# 
+#
 # Visualizing Your Model
 # ----------------------
-# 
+#
 # TensorBoard can also be used to examine the data flow within your model.
 # To do this, call the ``add_graph()`` method with a model and sample
 # input. When you open
-# 
+#
 
 # Again, grab a single mini-batch of images
 dataiter = iter(training_loader)
@@ -254,10 +254,10 @@ def forward(self, x):
 # When you switch over to TensorBoard, you should see a GRAPHS tab.
 # Double-click the “NET” node to see the layers and data flow within your
 # model.
-# 
+#
 # Visualizing Your Dataset with Embeddings
 # ----------------------------------------
-# 
+#
 # The 28-by-28 image tiles we’re using can be modeled as 784-dimensional
 # vectors (28 \* 28 = 784). It can be instructive to project this to a
 # lower-dimensional representation. The ``add_embedding()`` method will
@@ -265,9 +265,9 @@ def forward(self, x):
 # and display them as an interactive 3D chart. The ``add_embedding()``
 # method does this automatically by projecting to the three dimensions
 # with highest variance.
-# 
+#
 # Below, we’ll take a sample of our data, and generate such an embedding:
-# 
+#
 
 # Select a random subset of data and corresponding labels
 def select_n_random(data, labels, n=100):
@@ -297,19 +297,19 @@ def select_n_random(data, labels, n=100):
 # zoom the model. Examine it at large and small scales, and see whether
 # you can spot patterns in the projected data and the clustering of
 # labels.
-# 
+#
 # For better visibility, it’s recommended to:
-# 
+#
 # - Select “label” from the “Color by” drop-down on the left.
 # - Toggle the Night Mode icon along the top to place the
 #   light-colored images on a dark background.
-# 
+#
 # Other Resources
 # ---------------
-# 
+#
 # For more information, have a look at:
-# 
+#
 # - PyTorch documentation on `torch.utils.tensorboard.SummaryWriter <https://pytorch.org/docs/stable/tensorboard.html?highlight=summarywriter>`__
-# - Tensorboard tutorial content in the `PyTorch.org Tutorials <https://pytorch.org/tutorials/>`__ 
+# - Tensorboard tutorial content in the `PyTorch.org Tutorials <https://tutorials.pytorch.kr/>`__
 # - For more information about TensorBoard, see the `TensorBoard
 #   documentation <https://www.tensorflow.org/tensorboard>`__
diff --git a/docs/_downloads/d58dcaa8f603e286a2501a4e2a87d1b7/maskedtensor_advanced_semantics.py b/docs/_downloads/d58dcaa8f603e286a2501a4e2a87d1b7/maskedtensor_advanced_semantics.py
index 7a0233042..ccc3d90af 100644
--- a/docs/_downloads/d58dcaa8f603e286a2501a4e2a87d1b7/maskedtensor_advanced_semantics.py
+++ b/docs/_downloads/d58dcaa8f603e286a2501a4e2a87d1b7/maskedtensor_advanced_semantics.py
@@ -6,14 +6,14 @@
 """
 
 ######################################################################
-# 
+#
 # Before working on this tutorial, please make sure to review our
-# `MaskedTensor Overview tutorial <https://pytorch.org/tutorials/prototype/maskedtensor_overview.html>`.
+# `MaskedTensor Overview tutorial <https://tutorials.pytorch.kr/prototype/maskedtensor_overview.html>`.
 #
 # The purpose of this tutorial is to help users understand how some of the advanced semantics work
 # and how they came to be. We will focus on two particular ones:
 #
-# *. Differences between MaskedTensor and `NumPy's MaskedArray <https://numpy.org/doc/stable/reference/maskedarray.html>`__  
+# *. Differences between MaskedTensor and `NumPy's MaskedArray <https://numpy.org/doc/stable/reference/maskedarray.html>`__
 # *. Reduction semantics
 #
 # Preparation
@@ -89,7 +89,7 @@
 # Reduction Semantics
 # -------------------
 #
-# Recall in `MaskedTensor's Overview tutorial <https://pytorch.org/tutorials/prototype/maskedtensor_overview.html>`__
+# Recall in `MaskedTensor's Overview tutorial <https://tutorials.pytorch.kr/prototype/maskedtensor_overview.html>`__
 # we discussed "Implementing missing torch.nan* ops". Those are examples of reductions -- operators that remove one
 # (or more) dimensions from a Tensor and then aggregate the result. In this section, we will use reduction semantics
 # to motivate our strict requirements around matching masks from above.
@@ -167,4 +167,4 @@
 # the associative property amongst binary operations), which in turn can necessitate the user
 # to be more intentional with their code at times, but we believe this to be the better move.
 # If you have any thoughts on this, please `let us know <https://github.com/pytorch/pytorch/issues>`__!
-# 
+#
diff --git a/docs/_downloads/e2e556f6b4693c2cef716dd7f40caaf6/tensorboardyt_tutorial.ipynb b/docs/_downloads/e2e556f6b4693c2cef716dd7f40caaf6/tensorboardyt_tutorial.ipynb
index bd9f38633..2259bff64 100644
--- a/docs/_downloads/e2e556f6b4693c2cef716dd7f40caaf6/tensorboardyt_tutorial.ipynb
+++ b/docs/_downloads/e2e556f6b4693c2cef716dd7f40caaf6/tensorboardyt_tutorial.ipynb
@@ -141,7 +141,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Now if you switch to TensorBoard and select the PROJECTOR tab, you\nshould see a 3D representation of the projection. You can rotate and\nzoom the model. Examine it at large and small scales, and see whether\nyou can spot patterns in the projected data and the clustering of\nlabels.\n\nFor better visibility, it\u2019s recommended to:\n\n- Select \u201clabel\u201d from the \u201cColor by\u201d drop-down on the left.\n- Toggle the Night Mode icon along the top to place the\n  light-colored images on a dark background.\n\n## Other Resources\n\nFor more information, have a look at:\n\n- PyTorch documentation on [torch.utils.tensorboard.SummaryWriter](https://pytorch.org/docs/stable/tensorboard.html?highlight=summarywriter)_\n- Tensorboard tutorial content in the [PyTorch.org Tutorials](https://pytorch.org/tutorials/)_ \n- For more information about TensorBoard, see the [TensorBoard\n  documentation](https://www.tensorflow.org/tensorboard)_\n\n"
+        "Now if you switch to TensorBoard and select the PROJECTOR tab, you\nshould see a 3D representation of the projection. You can rotate and\nzoom the model. Examine it at large and small scales, and see whether\nyou can spot patterns in the projected data and the clustering of\nlabels.\n\nFor better visibility, it\u2019s recommended to:\n\n- Select \u201clabel\u201d from the \u201cColor by\u201d drop-down on the left.\n- Toggle the Night Mode icon along the top to place the\n  light-colored images on a dark background.\n\n## Other Resources\n\nFor more information, have a look at:\n\n- PyTorch documentation on [torch.utils.tensorboard.SummaryWriter](https://pytorch.org/docs/stable/tensorboard.html?highlight=summarywriter)_\n- Tensorboard tutorial content in the [PyTorch.org Tutorials](https://tutorials.pytorch.kr/)_ \n- For more information about TensorBoard, see the [TensorBoard\n  documentation](https://www.tensorflow.org/tensorboard)_\n\n"
       ]
     }
   ],
diff --git a/docs/_downloads/fe726e041160526cf828806536922cf6/modelsyt_tutorial.ipynb b/docs/_downloads/fe726e041160526cf828806536922cf6/modelsyt_tutorial.ipynb
index d8cfcad55..1b6dde6b4 100644
--- a/docs/_downloads/fe726e041160526cf828806536922cf6/modelsyt_tutorial.ipynb
+++ b/docs/_downloads/fe726e041160526cf828806536922cf6/modelsyt_tutorial.ipynb
@@ -87,7 +87,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "The constructor has four arguments:\n\n-  ``vocab_size`` is the number of words in the input vocabulary. Each\n   word is a one-hot vector (or unit vector) in a\n   ``vocab_size``-dimensional space.\n-  ``tagset_size`` is the number of tags in the output set.\n-  ``embedding_dim`` is the size of the *embedding* space for the\n   vocabulary. An embedding maps a vocabulary onto a low-dimensional\n   space, where words with similar meanings are close together in the\n   space.\n-  ``hidden_dim`` is the size of the LSTM\u2019s memory.\n\nThe input will be a sentence with the words represented as indices of\none-hot vectors. The embedding layer will then map these down to an\n``embedding_dim``-dimensional space. The LSTM takes this sequence of\nembeddings and iterates over it, fielding an output vector of length\n``hidden_dim``. The final linear layer acts as a classifier; applying\n``log_softmax()`` to the output of the final layer converts the output\ninto a normalized set of estimated probabilities that a given word maps\nto a given tag.\n\nIf you\u2019d like to see this network in action, check out the [Sequence\nModels and LSTM\nNetworks](https://pytorch.org/tutorials/beginner/nlp/sequence_models_tutorial.html)_\ntutorial on pytorch.org.\n\n### Transformers\n\n*Transformers* are multi-purpose networks that have taken over the state\nof the art in NLP with models like BERT. A discussion of transformer\narchitecture is beyond the scope of this video, but PyTorch has a\n``Transformer`` class that allows you to define the overall parameters\nof a transformer model - the number of attention heads, the number of\nencoder & decoder layers, dropout and activation functions, etc. (You\ncan even build the BERT model from this single class, with the right\nparameters!) The ``torch.nn.Transformer`` class also has classes to\nencapsulate the individual components (``TransformerEncoder``,\n``TransformerDecoder``) and subcomponents (``TransformerEncoderLayer``,\n``TransformerDecoderLayer``). For details, check out the\n[documentation](https://pytorch.org/docs/stable/nn.html#transformer-layers)_\non transformer classes, and the relevant\n[tutorial](https://pytorch.org/tutorials/beginner/transformer_tutorial.html)_\non pytorch.org.\n\n## Other Layers and Functions\n\n### Data Manipulation Layers\n\nThere are other layer types that perform important functions in models,\nbut don\u2019t participate in the learning process themselves.\n\n**Max pooling** (and its twin, min pooling) reduce a tensor by combining\ncells, and assigning the maximum value of the input cells to the output\ncell (we saw this). For example:\n\n\n"
+        "The constructor has four arguments:\n\n-  ``vocab_size`` is the number of words in the input vocabulary. Each\n   word is a one-hot vector (or unit vector) in a\n   ``vocab_size``-dimensional space.\n-  ``tagset_size`` is the number of tags in the output set.\n-  ``embedding_dim`` is the size of the *embedding* space for the\n   vocabulary. An embedding maps a vocabulary onto a low-dimensional\n   space, where words with similar meanings are close together in the\n   space.\n-  ``hidden_dim`` is the size of the LSTM\u2019s memory.\n\nThe input will be a sentence with the words represented as indices of\none-hot vectors. The embedding layer will then map these down to an\n``embedding_dim``-dimensional space. The LSTM takes this sequence of\nembeddings and iterates over it, fielding an output vector of length\n``hidden_dim``. The final linear layer acts as a classifier; applying\n``log_softmax()`` to the output of the final layer converts the output\ninto a normalized set of estimated probabilities that a given word maps\nto a given tag.\n\nIf you\u2019d like to see this network in action, check out the [Sequence\nModels and LSTM\nNetworks](https://tutorials.pytorch.kr/beginner/nlp/sequence_models_tutorial.html)_\ntutorial on pytorch.org.\n\n### Transformers\n\n*Transformers* are multi-purpose networks that have taken over the state\nof the art in NLP with models like BERT. A discussion of transformer\narchitecture is beyond the scope of this video, but PyTorch has a\n``Transformer`` class that allows you to define the overall parameters\nof a transformer model - the number of attention heads, the number of\nencoder & decoder layers, dropout and activation functions, etc. (You\ncan even build the BERT model from this single class, with the right\nparameters!) The ``torch.nn.Transformer`` class also has classes to\nencapsulate the individual components (``TransformerEncoder``,\n``TransformerDecoder``) and subcomponents (``TransformerEncoderLayer``,\n``TransformerDecoderLayer``). For details, check out the\n[documentation](https://pytorch.org/docs/stable/nn.html#transformer-layers)_\non transformer classes, and the relevant\n[tutorial](https://tutorials.pytorch.kr/beginner/transformer_tutorial.html)_\non pytorch.org.\n\n## Other Layers and Functions\n\n### Data Manipulation Layers\n\nThere are other layer types that perform important functions in models,\nbut don\u2019t participate in the learning process themselves.\n\n**Max pooling** (and its twin, min pooling) reduce a tensor by combining\ncells, and assigning the maximum value of the input cells to the output\ncell (we saw this). For example:\n\n\n"
       ]
     },
     {
diff --git a/docs/advanced/generic_join.html b/docs/advanced/generic_join.html
index 781034f19..edd9629c1 100644
--- a/docs/advanced/generic_join.html
+++ b/docs/advanced/generic_join.html
@@ -6,39 +6,39 @@
 <!--[if gt IE 8]><!--> <html class="no-js" lang="ko" > <!--<![endif]-->
 <head>
   <meta charset="utf-8">
-  
+
 <meta property="og:title" content="Distributed Training with Uneven Inputs Using the Join Context Manager" />
-  
+
 <meta property="og:type" content="article" />
-  
+
 <meta property="og:url" content="https://tutorials.pytorch.kr/advanced/generic_join.html" />
-  
+
 <meta property="og:description" content="Author: Andrew Gu In this tutorial, you will see: An overview of the Join context manager., An example of how to use the context manager with DistributedDataParallel., An example of how to use the context manager with both DistributedDataParallel and ZeroRedundancyOptimizer., An example of passin..." />
-  
+
 <meta property="og:image" content="https://tutorials.pytorch.kr/_static/logos/logo-kr-sm-dark.png" />
-  
+
 <meta property="og:image:alt" content="Distributed Training with Uneven Inputs Using the Join Context Manager" />
-  
+
 <meta property="og:ignore_canonical" content="true" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  
+
   <title>Distributed Training with Uneven Inputs Using the Join Context Manager &mdash; 파이토치 한국어 튜토리얼 (PyTorch tutorials in Korean)</title>
-  
 
-  
-  
+
+
+
     <link rel="shortcut icon" href="../_static/favicon.ico"/>
-  
-  
-  
 
-  
 
-  
-  
-    
 
-  
+
+
+
+
+
+
+
+
 
   <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
   <!-- <link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> -->
@@ -57,9 +57,9 @@
     <link rel="index" title="색인" href="../genindex.html" />
     <link rel="search" title="검색" href="../search.html" />
     <link rel="next" title="iOS에서의 이미지 분할 DeepLapV3" href="../beginner/deeplabv3_on_ios.html" />
-    <link rel="prev" title="파이프라인 병렬화로 트랜스포머 모델 학습시키기" href="../intermediate/pipeline_tutorial.html" /> 
+    <link rel="prev" title="파이프라인 병렬화로 트랜스포머 모델 학습시키기" href="../intermediate/pipeline_tutorial.html" />
+
 
-  
   <script src="../_static/js/modernizr.min.js"></script>
 
   <!-- Preload the theme fonts -->
@@ -116,9 +116,9 @@
 
 <body class="pytorch-body">
 
-   
 
-    
+
+
 
     <div class="table-of-contents-link-wrapper">
       <span>Table of Contents</span>
@@ -129,21 +129,21 @@
       <div class="pytorch-side-scroll">
         <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
           <div class="pytorch-left-menu-search">
-            
 
-            
-              
-              
+
+
+
+
                 <div class="version">
                   1.13.1+cu117
                 </div>
-              
-            
 
-            
 
 
-  
+
+
+
+
 
 
 <div role="search">
@@ -154,15 +154,15 @@
   </form>
 </div>
 
-            
+
           </div>
 
-          
-            
-            
-              
-            
-            
+
+
+
+
+
+
               <p class="caption" role="heading"><span class="caption-text">파이토치(PyTorch) 레시피</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../recipes/recipes_index.html">모든 레시피 보기</a></li>
@@ -305,8 +305,8 @@
 <li class="toctree-l1"><a class="reference internal" href="../beginner/flava_finetuning_tutorial.html">TorchMultimodal Tutorial: Finetuning FLAVA</a></li>
 </ul>
 
-            
-          
+
+
         </div>
       </div>
     </nav>
@@ -314,7 +314,7 @@
     <div class="pytorch-container">
       <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
         <div class="pytorch-breadcrumbs-wrapper">
-          
+
 
 
 
@@ -333,30 +333,30 @@
 <div role="navigation" aria-label="breadcrumbs navigation">
 
   <ul class="pytorch-breadcrumbs">
-    
+
       <li>
         <a href="../index.html">
-          
+
             Tutorials
-          
+
         </a> &gt;
       </li>
 
-        
+
       <li>Distributed Training with Uneven Inputs Using the Join Context Manager</li>
-    
-    
+
+
       <li class="pytorch-breadcrumbs-aside">
-        
-            
+
+
             <a href="../_sources/advanced/generic_join.rst.txt" rel="nofollow"><img src="../_static/images/view-page-source-icon.svg"></a>
-          
-        
+
+
       </li>
-    
+
   </ul>
 
-  
+
 </div>
         </div>
 
@@ -368,7 +368,7 @@
       <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
         <div class="pytorch-content-left">
 
-        
+
 
           <div class="pytorch-call-to-action-links">
             <div id="tutorial-type">advanced/generic_join</div>
@@ -390,14 +390,14 @@
             </div>
           </div>
 
-        
 
-          
+
+
           <div class="rst-content">
-          
+
             <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
              <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
-              
+
   <div class="section" id="distributed-training-with-uneven-inputs-using-the-join-context-manager">
 <h1>Distributed Training with Uneven Inputs Using the Join Context Manager<a class="headerlink" href="#distributed-training-with-uneven-inputs-using-the-join-context-manager" title="이 제목에 대한 퍼머링크">¶</a></h1>
 <p><strong>Author</strong>: <a class="reference external" href="https://github.com/andwgu">Andrew Gu</a></p>
@@ -425,13 +425,13 @@ <h1>Distributed Training with Uneven Inputs Using the Join Context Manager<a cla
 <h2>Requirements<a class="headerlink" href="#requirements" title="이 제목에 대한 퍼머링크">¶</a></h2>
 <ul class="simple">
 <li><p>PyTorch 1.10+</p></li>
-<li><p><a class="reference external" href="https://pytorch.org/tutorials/intermediate/ddp_tutorial.html">Getting Started with Distributed Data Parallel</a></p></li>
-<li><p><a class="reference external" href="https://pytorch.org/tutorials/recipes/zero_redundancy_optimizer.html">Shard Optimizer States with ZeroRedundancyOptimizer</a></p></li>
+<li><p><a class="reference external" href="https://tutorials.pytorch.kr/intermediate/ddp_tutorial.html">Getting Started with Distributed Data Parallel</a></p></li>
+<li><p><a class="reference external" href="https://tutorials.pytorch.kr/recipes/zero_redundancy_optimizer.html">Shard Optimizer States with ZeroRedundancyOptimizer</a></p></li>
 </ul>
 </div>
 <div class="section" id="what-is-join">
 <h2>What is <code class="docutils literal notranslate"><span class="pre">Join</span></code>?<a class="headerlink" href="#what-is-join" title="이 제목에 대한 퍼머링크">¶</a></h2>
-<p>In <a class="reference external" href="https://pytorch.org/tutorials/intermediate/ddp_tutorial.html#basic-use-case">Getting Started with Distributed Data Parallel - Basic Use Case</a>, you saw
+<p>In <a class="reference external" href="https://tutorials.pytorch.kr/intermediate/ddp_tutorial.html#basic-use-case">Getting Started with Distributed Data Parallel - Basic Use Case</a>, you saw
 the general skeleton for using <a class="reference external" href="https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html">DistributedDataParallel</a> to perform data
 parallel training. This implicitly schedules all-reduces in each backward pass
 to synchronize gradients across ranks. Such <a class="reference external" href="https://pytorch.org/docs/stable/distributed.html">collective communications</a> require participation
@@ -826,21 +826,21 @@ <h2>Making a Toy Class Work with <code class="docutils literal notranslate"><spa
 
 
              </article>
-             
+
             </div>
             <footer>
-  
+
     <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
-      
+
         <a href="../beginner/deeplabv3_on_ios.html" class="btn btn-neutral float-right" title="iOS에서의 이미지 분할 DeepLapV3" accesskey="n" rel="next">Next <img src="../_static/images/chevron-right-orange.svg" class="next-page"></a>
-      
-      
+
+
         <a href="../intermediate/pipeline_tutorial.html" class="btn btn-neutral" title="파이프라인 병렬화로 트랜스포머 모델 학습시키기" accesskey="p" rel="prev"><img src="../_static/images/chevron-right-orange.svg" class="previous-page"> Previous</a>
-      
+
     </div>
-  
 
-  
+
+
     <hr class="community-hr hr-top" />
     <div class="community-container">
       <div class="community-prompt">더 궁금하시거나 개선할 내용이 있으신가요? 커뮤니티에 참여해보세요!</div>
@@ -860,7 +860,7 @@ <h2>Making a Toy Class Work with <code class="docutils literal notranslate"><spa
       </div>
     </div>
     <hr class="rating-hr hr-bottom"/>
-  
+
 
   <div role="contentinfo">
     <p>
@@ -868,11 +868,11 @@ <h2>Making a Toy Class Work with <code class="docutils literal notranslate"><spa
 
     </p>
   </div>
-    
+
       <div>
         Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
       </div>
-     
+
 
 </footer>
 
@@ -913,12 +913,12 @@ <h2>Making a Toy Class Work with <code class="docutils literal notranslate"><spa
       </section>
     </div>
 
-  
 
 
-  
-  
-     
+
+
+
+
        <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
          <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
          <script src="../_static/jquery.js"></script>
@@ -932,8 +932,8 @@ <h2>Making a Toy Class Work with <code class="docutils literal notranslate"><spa
          <script src="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/contrib/auto-render.min.js"></script>
          <script src="../_static/katex_autorenderer.js"></script>
          <script src="../_static/design-tabs.js"></script>
-     
-  
+
+
 
   <script type="text/javascript" src="../_static/js/vendor/popper.min.js"></script>
   <script type="text/javascript" src="../_static/js/vendor/bootstrap.min.js"></script>
@@ -945,7 +945,7 @@ <h2>Making a Toy Class Work with <code class="docutils literal notranslate"><spa
           SphinxRtdTheme.Navigation.enable(true);
       });
   </script>
- 
+
 <script>
 //add microsoft link
 
diff --git a/docs/beginner/ddp_series_multigpu.html b/docs/beginner/ddp_series_multigpu.html
index 4709ce2d7..fc920a967 100644
--- a/docs/beginner/ddp_series_multigpu.html
+++ b/docs/beginner/ddp_series_multigpu.html
@@ -6,39 +6,39 @@
 <!--[if gt IE 8]><!--> <html class="no-js" lang="ko" > <!--<![endif]-->
 <head>
   <meta charset="utf-8">
-  
+
 <meta property="og:title" content="Multi GPU training with DDP" />
-  
+
 <meta property="og:type" content="article" />
-  
+
 <meta property="og:url" content="https://tutorials.pytorch.kr/beginner/ddp_series_multigpu.html" />
-  
+
 <meta property="og:description" content="Introduction|| What is DDP|| Single-Node Multi-GPU Training|| Fault Tolerance|| Multi-Node training|| minGPT Training Authors: Suraj Subramanian What you will learn How to migrate a single-GPU training script to multi-GPU via DDP, Setting up the distributed process group, Saving and loading model..." />
-  
+
 <meta property="og:image" content="https://tutorials.pytorch.kr/_static/logos/logo-kr-sm-dark.png" />
-  
+
 <meta property="og:image:alt" content="Multi GPU training with DDP" />
-  
+
 <meta property="og:ignore_canonical" content="true" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  
+
   <title>Multi GPU training with DDP &mdash; 파이토치 한국어 튜토리얼 (PyTorch tutorials in Korean)</title>
-  
 
-  
-  
+
+
+
     <link rel="shortcut icon" href="../_static/favicon.ico"/>
-  
-  
-  
 
-  
 
-  
-  
-    
 
-  
+
+
+
+
+
+
+
+
 
   <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
   <!-- <link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> -->
@@ -55,9 +55,9 @@
   <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.10.0-beta/dist/katex.min.css" type="text/css" />
   <link rel="stylesheet" href="../_static/css/custom.css" type="text/css" />
     <link rel="index" title="색인" href="../genindex.html" />
-    <link rel="search" title="검색" href="../search.html" /> 
+    <link rel="search" title="검색" href="../search.html" />
+
 
-  
   <script src="../_static/js/modernizr.min.js"></script>
 
   <!-- Preload the theme fonts -->
@@ -114,9 +114,9 @@
 
 <body class="pytorch-body">
 
-   
 
-    
+
+
 
     <div class="table-of-contents-link-wrapper">
       <span>Table of Contents</span>
@@ -127,21 +127,21 @@
       <div class="pytorch-side-scroll">
         <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
           <div class="pytorch-left-menu-search">
-            
 
-            
-              
-              
+
+
+
+
                 <div class="version">
                   1.13.1+cu117
                 </div>
-              
-            
 
-            
 
 
-  
+
+
+
+
 
 
 <div role="search">
@@ -152,15 +152,15 @@
   </form>
 </div>
 
-            
+
           </div>
 
-          
-            
-            
-              
-            
-            
+
+
+
+
+
+
               <p class="caption" role="heading"><span class="caption-text">파이토치(PyTorch) 레시피</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../recipes/recipes_index.html">모든 레시피 보기</a></li>
@@ -303,8 +303,8 @@
 <li class="toctree-l1"><a class="reference internal" href="flava_finetuning_tutorial.html">TorchMultimodal Tutorial: Finetuning FLAVA</a></li>
 </ul>
 
-            
-          
+
+
         </div>
       </div>
     </nav>
@@ -312,7 +312,7 @@
     <div class="pytorch-container">
       <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
         <div class="pytorch-breadcrumbs-wrapper">
-          
+
 
 
 
@@ -331,30 +331,30 @@
 <div role="navigation" aria-label="breadcrumbs navigation">
 
   <ul class="pytorch-breadcrumbs">
-    
+
       <li>
         <a href="../index.html">
-          
+
             Tutorials
-          
+
         </a> &gt;
       </li>
 
-        
+
       <li>Multi GPU training with DDP</li>
-    
-    
+
+
       <li class="pytorch-breadcrumbs-aside">
-        
-            
+
+
             <a href="../_sources/beginner/ddp_series_multigpu.rst.txt" rel="nofollow"><img src="../_static/images/view-page-source-icon.svg"></a>
-          
-        
+
+
       </li>
-    
+
   </ul>
 
-  
+
 </div>
         </div>
 
@@ -366,7 +366,7 @@
       <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
         <div class="pytorch-content-left">
 
-        
+
 
           <div class="pytorch-call-to-action-links">
             <div id="tutorial-type">beginner/ddp_series_multigpu</div>
@@ -388,14 +388,14 @@
             </div>
           </div>
 
-        
 
-          
+
+
           <div class="rst-content">
-          
+
             <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
              <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
-              
+
   <p><a class="reference external" href="ddp_series_intro.html">Introduction</a> || <a class="reference external" href="ddp_series_theory.html">What is DDP</a> || <strong>Single-Node Multi-GPU Training</strong> || <a class="reference external" href="ddp_series_fault_tolerance.html">Fault
 Tolerance</a> || <a class="reference external" href="../intermediate/ddp_series_multinode.html">Multi-Node
 training</a> || <a class="reference external" href="../intermediate/ddp_series_minGPT.html">minGPT Training</a></p>
@@ -541,7 +541,7 @@ <h3>Saving model checkpoints<a class="headerlink" href="#saving-model-checkpoint
 <li><p>We only need to save model checkpoints from one process. Without this
 condition, each process would save its copy of the identical mode. Read
 more on saving and loading models with
-DDP <a class="reference external" href="https://pytorch.org/tutorials/intermediate/ddp_tutorial.html#save-and-load-checkpoints">here</a></p></li>
+DDP <a class="reference external" href="https://tutorials.pytorch.kr/intermediate/ddp_tutorial.html#save-and-load-checkpoints">here</a></p></li>
 </ul>
 <div class="highlight-diff notranslate"><div class="highlight"><pre><span></span><span class="gd">- ckp = self.model.state_dict()</span>
 <span class="gi">+ ckp = self.model.module.state_dict()</span>
@@ -597,7 +597,7 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
 <ul class="simple">
 <li><p><a class="reference external" href="ddp_series_fault_tolerance.html">Fault Tolerant distributed training</a>  (next tutorial in this series)</p></li>
 <li><p><a class="reference external" href="ddp_series_theory.html">Intro to DDP</a> (previous tutorial in this series)</p></li>
-<li><p><a class="reference external" href="https://pytorch.org/tutorials/intermediate/ddp_tutorial.html">Getting Started with DDP</a></p></li>
+<li><p><a class="reference external" href="https://tutorials.pytorch.kr/intermediate/ddp_tutorial.html">Getting Started with DDP</a></p></li>
 <li><p><a class="reference external" href="https://pytorch.org/docs/stable/distributed.html#tcp-initialization">Process Group
 initialization</a></p></li>
 </ul>
@@ -606,12 +606,12 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
 
 
              </article>
-             
+
             </div>
             <footer>
-  
 
-  
+
+
     <hr class="community-hr hr-top" />
     <div class="community-container">
       <div class="community-prompt">더 궁금하시거나 개선할 내용이 있으신가요? 커뮤니티에 참여해보세요!</div>
@@ -631,7 +631,7 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
       </div>
     </div>
     <hr class="rating-hr hr-bottom"/>
-  
+
 
   <div role="contentinfo">
     <p>
@@ -639,11 +639,11 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
 
     </p>
   </div>
-    
+
       <div>
         Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
       </div>
-     
+
 
 </footer>
 
@@ -682,12 +682,12 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
       </section>
     </div>
 
-  
 
 
-  
-  
-     
+
+
+
+
        <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
          <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
          <script src="../_static/jquery.js"></script>
@@ -701,8 +701,8 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
          <script src="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/contrib/auto-render.min.js"></script>
          <script src="../_static/katex_autorenderer.js"></script>
          <script src="../_static/design-tabs.js"></script>
-     
-  
+
+
 
   <script type="text/javascript" src="../_static/js/vendor/popper.min.js"></script>
   <script type="text/javascript" src="../_static/js/vendor/bootstrap.min.js"></script>
@@ -714,7 +714,7 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
           SphinxRtdTheme.Navigation.enable(true);
       });
   </script>
- 
+
 <script>
 //add microsoft link
 
diff --git a/docs/beginner/ddp_series_theory.html b/docs/beginner/ddp_series_theory.html
index cfdaca18c..e7864c216 100644
--- a/docs/beginner/ddp_series_theory.html
+++ b/docs/beginner/ddp_series_theory.html
@@ -6,39 +6,39 @@
 <!--[if gt IE 8]><!--> <html class="no-js" lang="ko" > <!--<![endif]-->
 <head>
   <meta charset="utf-8">
-  
+
 <meta property="og:title" content="What is Distributed Data Parallel (DDP)" />
-  
+
 <meta property="og:type" content="article" />
-  
+
 <meta property="og:url" content="https://tutorials.pytorch.kr/beginner/ddp_series_theory.html" />
-  
+
 <meta property="og:description" content="Introduction|| What is DDP|| Single-Node Multi-GPU Training|| Fault Tolerance|| Multi-Node training|| minGPT Training Authors: Suraj Subramanian What you will learn How DDP works under the hood, What is the DistributedSampler, How gradients are synchronized across GPUs. Prerequisites Familiarity ..." />
-  
+
 <meta property="og:image" content="https://tutorials.pytorch.kr/_static/logos/logo-kr-sm-dark.png" />
-  
+
 <meta property="og:image:alt" content="What is Distributed Data Parallel (DDP)" />
-  
+
 <meta property="og:ignore_canonical" content="true" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  
+
   <title>What is Distributed Data Parallel (DDP) &mdash; 파이토치 한국어 튜토리얼 (PyTorch tutorials in Korean)</title>
-  
 
-  
-  
+
+
+
     <link rel="shortcut icon" href="../_static/favicon.ico"/>
-  
-  
-  
 
-  
 
-  
-  
-    
 
-  
+
+
+
+
+
+
+
+
 
   <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
   <!-- <link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> -->
@@ -55,9 +55,9 @@
   <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.10.0-beta/dist/katex.min.css" type="text/css" />
   <link rel="stylesheet" href="../_static/css/custom.css" type="text/css" />
     <link rel="index" title="색인" href="../genindex.html" />
-    <link rel="search" title="검색" href="../search.html" /> 
+    <link rel="search" title="검색" href="../search.html" />
+
 
-  
   <script src="../_static/js/modernizr.min.js"></script>
 
   <!-- Preload the theme fonts -->
@@ -114,9 +114,9 @@
 
 <body class="pytorch-body">
 
-   
 
-    
+
+
 
     <div class="table-of-contents-link-wrapper">
       <span>Table of Contents</span>
@@ -127,21 +127,21 @@
       <div class="pytorch-side-scroll">
         <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
           <div class="pytorch-left-menu-search">
-            
 
-            
-              
-              
+
+
+
+
                 <div class="version">
                   1.13.1+cu117
                 </div>
-              
-            
 
-            
 
 
-  
+
+
+
+
 
 
 <div role="search">
@@ -152,15 +152,15 @@
   </form>
 </div>
 
-            
+
           </div>
 
-          
-            
-            
-              
-            
-            
+
+
+
+
+
+
               <p class="caption" role="heading"><span class="caption-text">파이토치(PyTorch) 레시피</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../recipes/recipes_index.html">모든 레시피 보기</a></li>
@@ -303,8 +303,8 @@
 <li class="toctree-l1"><a class="reference internal" href="flava_finetuning_tutorial.html">TorchMultimodal Tutorial: Finetuning FLAVA</a></li>
 </ul>
 
-            
-          
+
+
         </div>
       </div>
     </nav>
@@ -312,7 +312,7 @@
     <div class="pytorch-container">
       <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
         <div class="pytorch-breadcrumbs-wrapper">
-          
+
 
 
 
@@ -331,30 +331,30 @@
 <div role="navigation" aria-label="breadcrumbs navigation">
 
   <ul class="pytorch-breadcrumbs">
-    
+
       <li>
         <a href="../index.html">
-          
+
             Tutorials
-          
+
         </a> &gt;
       </li>
 
-        
+
       <li>What is Distributed Data Parallel (DDP)</li>
-    
-    
+
+
       <li class="pytorch-breadcrumbs-aside">
-        
-            
+
+
             <a href="../_sources/beginner/ddp_series_theory.rst.txt" rel="nofollow"><img src="../_static/images/view-page-source-icon.svg"></a>
-          
-        
+
+
       </li>
-    
+
   </ul>
 
-  
+
 </div>
         </div>
 
@@ -366,7 +366,7 @@
       <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
         <div class="pytorch-content-left">
 
-        
+
 
           <div class="pytorch-call-to-action-links">
             <div id="tutorial-type">beginner/ddp_series_theory</div>
@@ -388,14 +388,14 @@
             </div>
           </div>
 
-        
 
-          
+
+
           <div class="rst-content">
-          
+
             <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
              <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
-              
+
   <p><a class="reference external" href="ddp_series_intro.html">Introduction</a> || <strong>What is DDP</strong> || <a class="reference external" href="ddp_series_multigpu.html">Single-Node
 Multi-GPU Training</a> || <a class="reference external" href="ddp_series_fault_tolerance.html">Fault
 Tolerance</a> || <a class="reference external" href="../intermediate/ddp_series_multinode.html">Multi-Node
@@ -424,7 +424,7 @@ <h1>What is Distributed Data Parallel (DDP)<a class="headerlink" href="#what-is-
 <div class="sd-card-title sd-font-weight-bold docutils">
 <svg version="1.1" width="1.0em" height="1.0em" class="sd-octicon sd-octicon-list-unordered" viewBox="0 0 16 16" aria-hidden="true"><path fill-rule="evenodd" d="M2 4a1 1 0 100-2 1 1 0 000 2zm3.75-1.5a.75.75 0 000 1.5h8.5a.75.75 0 000-1.5h-8.5zm0 5a.75.75 0 000 1.5h8.5a.75.75 0 000-1.5h-8.5zm0 5a.75.75 0 000 1.5h8.5a.75.75 0 000-1.5h-8.5zM3 8a1 1 0 11-2 0 1 1 0 012 0zm-1 6a1 1 0 100-2 1 1 0 000 2z"></path></svg> Prerequisites</div>
 <ul class="simple">
-<li><p class="sd-card-text">Familiarity with <a class="reference external" href="https://pytorch.org/tutorials/beginner/basics/quickstart_tutorial.html">basic non-distributed training</a> in PyTorch</p></li>
+<li><p class="sd-card-text">Familiarity with <a class="reference external" href="https://tutorials.pytorch.kr/beginner/basics/quickstart_tutorial.html">basic non-distributed training</a> in PyTorch</p></li>
 </ul>
 </div>
 </div>
@@ -490,12 +490,12 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
 
 
              </article>
-             
+
             </div>
             <footer>
-  
 
-  
+
+
     <hr class="community-hr hr-top" />
     <div class="community-container">
       <div class="community-prompt">더 궁금하시거나 개선할 내용이 있으신가요? 커뮤니티에 참여해보세요!</div>
@@ -515,7 +515,7 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
       </div>
     </div>
     <hr class="rating-hr hr-bottom"/>
-  
+
 
   <div role="contentinfo">
     <p>
@@ -523,11 +523,11 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
 
     </p>
   </div>
-    
+
       <div>
         Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
       </div>
-     
+
 
 </footer>
 
@@ -558,12 +558,12 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
       </section>
     </div>
 
-  
 
 
-  
-  
-     
+
+
+
+
        <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
          <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
          <script src="../_static/jquery.js"></script>
@@ -577,8 +577,8 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
          <script src="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/contrib/auto-render.min.js"></script>
          <script src="../_static/katex_autorenderer.js"></script>
          <script src="../_static/design-tabs.js"></script>
-     
-  
+
+
 
   <script type="text/javascript" src="../_static/js/vendor/popper.min.js"></script>
   <script type="text/javascript" src="../_static/js/vendor/bootstrap.min.js"></script>
@@ -590,7 +590,7 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
           SphinxRtdTheme.Navigation.enable(true);
       });
   </script>
- 
+
 <script>
 //add microsoft link
 
diff --git a/docs/beginner/introyt/modelsyt_tutorial.html b/docs/beginner/introyt/modelsyt_tutorial.html
index 6f38a745e..d073176c5 100644
--- a/docs/beginner/introyt/modelsyt_tutorial.html
+++ b/docs/beginner/introyt/modelsyt_tutorial.html
@@ -6,39 +6,39 @@
 <!--[if gt IE 8]><!--> <html class="no-js" lang="ko" > <!--<![endif]-->
 <head>
   <meta charset="utf-8">
-  
+
 <meta property="og:title" content="Building Models with PyTorch" />
-  
+
 <meta property="og:type" content="article" />
-  
+
 <meta property="og:url" content="https://tutorials.pytorch.kr/beginner/introyt/modelsyt_tutorial.html" />
-  
+
 <meta property="og:description" content="Introduction|| Tensors|| Autograd|| Building Models|| TensorBoard Support|| Training Models|| Model Understanding Follow along with the video below or on youtube. torch.nn.Module and torch.nn.Parameter: In this video, we’ll be discussing some of the tools PyTorch makes available for building deep..." />
-  
+
 <meta property="og:image" content="https://tutorials.pytorch.kr/_static/logos/logo-kr-sm-dark.png" />
-  
+
 <meta property="og:image:alt" content="Building Models with PyTorch" />
-  
+
 <meta property="og:ignore_canonical" content="true" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  
+
   <title>Building Models with PyTorch &mdash; 파이토치 한국어 튜토리얼 (PyTorch tutorials in Korean)</title>
-  
 
-  
-  
+
+
+
     <link rel="shortcut icon" href="../../_static/favicon.ico"/>
-  
-  
-  
 
-  
 
-  
-  
-    
 
-  
+
+
+
+
+
+
+
+
 
   <link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
   <!-- <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" /> -->
@@ -57,9 +57,9 @@
     <link rel="index" title="색인" href="../../genindex.html" />
     <link rel="search" title="검색" href="../../search.html" />
     <link rel="next" title="PyTorch TensorBoard Support" href="tensorboardyt_tutorial.html" />
-    <link rel="prev" title="The Fundamentals of Autograd" href="autogradyt_tutorial.html" /> 
+    <link rel="prev" title="The Fundamentals of Autograd" href="autogradyt_tutorial.html" />
+
 
-  
   <script src="../../_static/js/modernizr.min.js"></script>
 
   <!-- Preload the theme fonts -->
@@ -116,9 +116,9 @@
 
 <body class="pytorch-body">
 
-   
 
-    
+
+
 
     <div class="table-of-contents-link-wrapper">
       <span>Table of Contents</span>
@@ -129,21 +129,21 @@
       <div class="pytorch-side-scroll">
         <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
           <div class="pytorch-left-menu-search">
-            
 
-            
-              
-              
+
+
+
+
                 <div class="version">
                   1.13.1+cu117
                 </div>
-              
-            
 
-            
 
 
-  
+
+
+
+
 
 
 <div role="search">
@@ -154,15 +154,15 @@
   </form>
 </div>
 
-            
+
           </div>
 
-          
-            
-            
-              
-            
-            
+
+
+
+
+
+
               <p class="caption" role="heading"><span class="caption-text">파이토치(PyTorch) 레시피</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../recipes/recipes_index.html">모든 레시피 보기</a></li>
@@ -305,8 +305,8 @@
 <li class="toctree-l1"><a class="reference internal" href="../flava_finetuning_tutorial.html">TorchMultimodal Tutorial: Finetuning FLAVA</a></li>
 </ul>
 
-            
-          
+
+
         </div>
       </div>
     </nav>
@@ -314,7 +314,7 @@
     <div class="pytorch-container">
       <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
         <div class="pytorch-breadcrumbs-wrapper">
-          
+
 
 
 
@@ -333,30 +333,30 @@
 <div role="navigation" aria-label="breadcrumbs navigation">
 
   <ul class="pytorch-breadcrumbs">
-    
+
       <li>
         <a href="../../index.html">
-          
+
             Tutorials
-          
+
         </a> &gt;
       </li>
 
-        
+
       <li>Building Models with PyTorch</li>
-    
-    
+
+
       <li class="pytorch-breadcrumbs-aside">
-        
-            
+
+
             <a href="../../_sources/beginner/introyt/modelsyt_tutorial.rst.txt" rel="nofollow"><img src="../../_static/images/view-page-source-icon.svg"></a>
-          
-        
+
+
       </li>
-    
+
   </ul>
 
-  
+
 </div>
         </div>
 
@@ -368,7 +368,7 @@
       <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
         <div class="pytorch-content-left">
 
-        
+
 
           <div class="pytorch-call-to-action-links">
             <div id="tutorial-type">beginner/introyt/modelsyt_tutorial</div>
@@ -390,14 +390,14 @@
             </div>
           </div>
 
-        
 
-          
+
+
           <div class="rst-content">
-          
+
             <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
              <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
-              
+
   <div class="sphx-glr-download-link-note admonition note">
 <p class="admonition-title">참고</p>
 <p>Click <a class="reference internal" href="#sphx-glr-download-beginner-introyt-modelsyt-tutorial-py"><span class="std std-ref">here</span></a>
@@ -754,7 +754,7 @@ <h3>Recurrent Layers<a class="headerlink" href="#recurrent-layers" title="이 
 <code class="docutils literal notranslate"><span class="pre">log_softmax()</span></code> to the output of the final layer converts the output
 into a normalized set of estimated probabilities that a given word maps
 to a given tag.</p>
-<p>If you’d like to see this network in action, check out the <a class="reference external" href="https://pytorch.org/tutorials/beginner/nlp/sequence_models_tutorial.html">Sequence
+<p>If you’d like to see this network in action, check out the <a class="reference external" href="https://tutorials.pytorch.kr/beginner/nlp/sequence_models_tutorial.html">Sequence
 Models and LSTM
 Networks</a>
 tutorial on pytorch.org.</p>
@@ -774,7 +774,7 @@ <h3>Transformers<a class="headerlink" href="#transformers" title="이 제목에
 <code class="docutils literal notranslate"><span class="pre">TransformerDecoderLayer</span></code>). For details, check out the
 <a class="reference external" href="https://pytorch.org/docs/stable/nn.html#transformer-layers">documentation</a>
 on transformer classes, and the relevant
-<a class="reference external" href="https://pytorch.org/tutorials/beginner/transformer_tutorial.html">tutorial</a>
+<a class="reference external" href="https://tutorials.pytorch.kr/beginner/transformer_tutorial.html">tutorial</a>
 on pytorch.org.</p>
 </div>
 </div>
@@ -911,21 +911,21 @@ <h3>Loss Functions<a class="headerlink" href="#loss-functions" title="이 제목
 
 
              </article>
-             
+
             </div>
             <footer>
-  
+
     <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
-      
+
         <a href="tensorboardyt_tutorial.html" class="btn btn-neutral float-right" title="PyTorch TensorBoard Support" accesskey="n" rel="next">Next <img src="../../_static/images/chevron-right-orange.svg" class="next-page"></a>
-      
-      
+
+
         <a href="autogradyt_tutorial.html" class="btn btn-neutral" title="The Fundamentals of Autograd" accesskey="p" rel="prev"><img src="../../_static/images/chevron-right-orange.svg" class="previous-page"> Previous</a>
-      
+
     </div>
-  
 
-  
+
+
     <hr class="community-hr hr-top" />
     <div class="community-container">
       <div class="community-prompt">더 궁금하시거나 개선할 내용이 있으신가요? 커뮤니티에 참여해보세요!</div>
@@ -945,7 +945,7 @@ <h3>Loss Functions<a class="headerlink" href="#loss-functions" title="이 제목
       </div>
     </div>
     <hr class="rating-hr hr-bottom"/>
-  
+
 
   <div role="contentinfo">
     <p>
@@ -953,11 +953,11 @@ <h3>Loss Functions<a class="headerlink" href="#loss-functions" title="이 제목
 
     </p>
   </div>
-    
+
       <div>
         Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
       </div>
-     
+
 
 </footer>
 
@@ -1000,12 +1000,12 @@ <h3>Loss Functions<a class="headerlink" href="#loss-functions" title="이 제목
       </section>
     </div>
 
-  
 
 
-  
-  
-     
+
+
+
+
        <script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
          <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
          <script src="../../_static/jquery.js"></script>
@@ -1019,8 +1019,8 @@ <h3>Loss Functions<a class="headerlink" href="#loss-functions" title="이 제목
          <script src="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/contrib/auto-render.min.js"></script>
          <script src="../../_static/katex_autorenderer.js"></script>
          <script src="../../_static/design-tabs.js"></script>
-     
-  
+
+
 
   <script type="text/javascript" src="../../_static/js/vendor/popper.min.js"></script>
   <script type="text/javascript" src="../../_static/js/vendor/bootstrap.min.js"></script>
@@ -1032,7 +1032,7 @@ <h3>Loss Functions<a class="headerlink" href="#loss-functions" title="이 제목
           SphinxRtdTheme.Navigation.enable(true);
       });
   </script>
- 
+
 <script>
 //add microsoft link
 
diff --git a/docs/beginner/introyt/tensorboardyt_tutorial.html b/docs/beginner/introyt/tensorboardyt_tutorial.html
index 07312cbcf..16bcad72f 100644
--- a/docs/beginner/introyt/tensorboardyt_tutorial.html
+++ b/docs/beginner/introyt/tensorboardyt_tutorial.html
@@ -6,39 +6,39 @@
 <!--[if gt IE 8]><!--> <html class="no-js" lang="ko" > <!--<![endif]-->
 <head>
   <meta charset="utf-8">
-  
+
 <meta property="og:title" content="PyTorch TensorBoard Support" />
-  
+
 <meta property="og:type" content="article" />
-  
+
 <meta property="og:url" content="https://tutorials.pytorch.kr/beginner/introyt/tensorboardyt_tutorial.html" />
-  
+
 <meta property="og:description" content="Introduction|| Tensors|| Autograd|| Building Models|| TensorBoard Support|| Training Models|| Model Understanding Follow along with the video below or on youtube. Before You Start: To run this tutorial, you’ll need to install PyTorch, TorchVision, Matplotlib, and TensorBoard. With conda: conda in..." />
-  
+
 <meta property="og:image" content="https://tutorials.pytorch.kr/_static/logos/logo-kr-sm-dark.png" />
-  
+
 <meta property="og:image:alt" content="PyTorch TensorBoard Support" />
-  
+
 <meta property="og:ignore_canonical" content="true" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  
+
   <title>PyTorch TensorBoard Support &mdash; 파이토치 한국어 튜토리얼 (PyTorch tutorials in Korean)</title>
-  
 
-  
-  
+
+
+
     <link rel="shortcut icon" href="../../_static/favicon.ico"/>
-  
-  
-  
 
-  
 
-  
-  
-    
 
-  
+
+
+
+
+
+
+
+
 
   <link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
   <!-- <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" /> -->
@@ -57,9 +57,9 @@
     <link rel="index" title="색인" href="../../genindex.html" />
     <link rel="search" title="검색" href="../../search.html" />
     <link rel="next" title="PyTorch로 딥러닝하기: 60분만에 끝장내기" href="../deep_learning_60min_blitz.html" />
-    <link rel="prev" title="Building Models with PyTorch" href="modelsyt_tutorial.html" /> 
+    <link rel="prev" title="Building Models with PyTorch" href="modelsyt_tutorial.html" />
+
 
-  
   <script src="../../_static/js/modernizr.min.js"></script>
 
   <!-- Preload the theme fonts -->
@@ -116,9 +116,9 @@
 
 <body class="pytorch-body">
 
-   
 
-    
+
+
 
     <div class="table-of-contents-link-wrapper">
       <span>Table of Contents</span>
@@ -129,21 +129,21 @@
       <div class="pytorch-side-scroll">
         <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
           <div class="pytorch-left-menu-search">
-            
 
-            
-              
-              
+
+
+
+
                 <div class="version">
                   1.13.1+cu117
                 </div>
-              
-            
 
-            
 
 
-  
+
+
+
+
 
 
 <div role="search">
@@ -154,15 +154,15 @@
   </form>
 </div>
 
-            
+
           </div>
 
-          
-            
-            
-              
-            
-            
+
+
+
+
+
+
               <p class="caption" role="heading"><span class="caption-text">파이토치(PyTorch) 레시피</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../recipes/recipes_index.html">모든 레시피 보기</a></li>
@@ -305,8 +305,8 @@
 <li class="toctree-l1"><a class="reference internal" href="../flava_finetuning_tutorial.html">TorchMultimodal Tutorial: Finetuning FLAVA</a></li>
 </ul>
 
-            
-          
+
+
         </div>
       </div>
     </nav>
@@ -314,7 +314,7 @@
     <div class="pytorch-container">
       <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
         <div class="pytorch-breadcrumbs-wrapper">
-          
+
 
 
 
@@ -333,30 +333,30 @@
 <div role="navigation" aria-label="breadcrumbs navigation">
 
   <ul class="pytorch-breadcrumbs">
-    
+
       <li>
         <a href="../../index.html">
-          
+
             Tutorials
-          
+
         </a> &gt;
       </li>
 
-        
+
       <li>PyTorch TensorBoard Support</li>
-    
-    
+
+
       <li class="pytorch-breadcrumbs-aside">
-        
-            
+
+
             <a href="../../_sources/beginner/introyt/tensorboardyt_tutorial.rst.txt" rel="nofollow"><img src="../../_static/images/view-page-source-icon.svg"></a>
-          
-        
+
+
       </li>
-    
+
   </ul>
 
-  
+
 </div>
         </div>
 
@@ -368,7 +368,7 @@
       <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
         <div class="pytorch-content-left">
 
-        
+
 
           <div class="pytorch-call-to-action-links">
             <div id="tutorial-type">beginner/introyt/tensorboardyt_tutorial</div>
@@ -390,14 +390,14 @@
             </div>
           </div>
 
-        
 
-          
+
+
           <div class="rst-content">
-          
+
             <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
              <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
-              
+
   <div class="sphx-glr-download-link-note admonition note">
 <p class="admonition-title">참고</p>
 <p>Click <a class="reference internal" href="#sphx-glr-download-beginner-introyt-tensorboardyt-tutorial-py"><span class="std std-ref">here</span></a>
@@ -693,7 +693,7 @@ <h2>Other Resources<a class="headerlink" href="#other-resources" title="이 제
 <p>For more information, have a look at:</p>
 <ul class="simple">
 <li><p>PyTorch documentation on <a class="reference external" href="https://pytorch.org/docs/stable/tensorboard.html?highlight=summarywriter">torch.utils.tensorboard.SummaryWriter</a></p></li>
-<li><p>Tensorboard tutorial content in the <a class="reference external" href="https://pytorch.org/tutorials/">PyTorch.org Tutorials</a></p></li>
+<li><p>Tensorboard tutorial content in the <a class="reference external" href="https://tutorials.pytorch.kr/">PyTorch.org Tutorials</a></p></li>
 <li><p>For more information about TensorBoard, see the <a class="reference external" href="https://www.tensorflow.org/tensorboard">TensorBoard
 documentation</a></p></li>
 </ul>
@@ -712,21 +712,21 @@ <h2>Other Resources<a class="headerlink" href="#other-resources" title="이 제
 
 
              </article>
-             
+
             </div>
             <footer>
-  
+
     <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
-      
+
         <a href="../deep_learning_60min_blitz.html" class="btn btn-neutral float-right" title="PyTorch로 딥러닝하기: 60분만에 끝장내기" accesskey="n" rel="next">Next <img src="../../_static/images/chevron-right-orange.svg" class="next-page"></a>
-      
-      
+
+
         <a href="modelsyt_tutorial.html" class="btn btn-neutral" title="Building Models with PyTorch" accesskey="p" rel="prev"><img src="../../_static/images/chevron-right-orange.svg" class="previous-page"> Previous</a>
-      
+
     </div>
-  
 
-  
+
+
     <hr class="community-hr hr-top" />
     <div class="community-container">
       <div class="community-prompt">더 궁금하시거나 개선할 내용이 있으신가요? 커뮤니티에 참여해보세요!</div>
@@ -746,7 +746,7 @@ <h2>Other Resources<a class="headerlink" href="#other-resources" title="이 제
       </div>
     </div>
     <hr class="rating-hr hr-bottom"/>
-  
+
 
   <div role="contentinfo">
     <p>
@@ -754,11 +754,11 @@ <h2>Other Resources<a class="headerlink" href="#other-resources" title="이 제
 
     </p>
   </div>
-    
+
       <div>
         Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
       </div>
-     
+
 
 </footer>
 
@@ -794,12 +794,12 @@ <h2>Other Resources<a class="headerlink" href="#other-resources" title="이 제
       </section>
     </div>
 
-  
 
 
-  
-  
-     
+
+
+
+
        <script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
          <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
          <script src="../../_static/jquery.js"></script>
@@ -813,8 +813,8 @@ <h2>Other Resources<a class="headerlink" href="#other-resources" title="이 제
          <script src="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/contrib/auto-render.min.js"></script>
          <script src="../../_static/katex_autorenderer.js"></script>
          <script src="../../_static/design-tabs.js"></script>
-     
-  
+
+
 
   <script type="text/javascript" src="../../_static/js/vendor/popper.min.js"></script>
   <script type="text/javascript" src="../../_static/js/vendor/bootstrap.min.js"></script>
@@ -826,7 +826,7 @@ <h2>Other Resources<a class="headerlink" href="#other-resources" title="이 제
           SphinxRtdTheme.Navigation.enable(true);
       });
   </script>
- 
+
 <script>
 //add microsoft link
 
diff --git a/docs/distributed/home.html b/docs/distributed/home.html
index 14a39a0de..d84d243cd 100644
--- a/docs/distributed/home.html
+++ b/docs/distributed/home.html
@@ -6,39 +6,39 @@
 <!--[if gt IE 8]><!--> <html class="no-js" lang="ko" > <!--<![endif]-->
 <head>
   <meta charset="utf-8">
-  
+
 <meta property="og:title" content="Distributed and Parallel Training Tutorials" />
-  
+
 <meta property="og:type" content="article" />
-  
+
 <meta property="og:url" content="https://tutorials.pytorch.kr/distributed/home.html" />
-  
+
 <meta property="og:description" content="Distributed training is a model training paradigm that involves spreading training workload across multiple worker nodes, therefore significantly improving the speed of training and model accuracy. While distributed training can be used for any type of ML model training, it is most beneficial to ..." />
-  
+
 <meta property="og:image" content="https://tutorials.pytorch.kr/_static/logos/logo-kr-sm-dark.png" />
-  
+
 <meta property="og:image:alt" content="Distributed and Parallel Training Tutorials" />
-  
+
 <meta property="og:ignore_canonical" content="true" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  
+
   <title>Distributed and Parallel Training Tutorials &mdash; 파이토치 한국어 튜토리얼 (PyTorch tutorials in Korean)</title>
-  
 
-  
-  
+
+
+
     <link rel="shortcut icon" href="../_static/favicon.ico"/>
-  
-  
-  
 
-  
 
-  
-  
-    
 
-  
+
+
+
+
+
+
+
+
 
   <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
   <!-- <link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> -->
@@ -57,9 +57,9 @@
     <link rel="index" title="색인" href="../genindex.html" />
     <link rel="search" title="검색" href="../search.html" />
     <link rel="next" title="PyTorch Distributed Overview" href="../beginner/dist_overview.html" />
-    <link rel="prev" title="Multi-Objective NAS with Ax" href="../intermediate/ax_multiobjective_nas_tutorial.html" /> 
+    <link rel="prev" title="Multi-Objective NAS with Ax" href="../intermediate/ax_multiobjective_nas_tutorial.html" />
+
 
-  
   <script src="../_static/js/modernizr.min.js"></script>
 
   <!-- Preload the theme fonts -->
@@ -116,9 +116,9 @@
 
 <body class="pytorch-body">
 
-   
 
-    
+
+
 
     <div class="table-of-contents-link-wrapper">
       <span>Table of Contents</span>
@@ -129,21 +129,21 @@
       <div class="pytorch-side-scroll">
         <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
           <div class="pytorch-left-menu-search">
-            
 
-            
-              
-              
+
+
+
+
                 <div class="version">
                   1.13.1+cu117
                 </div>
-              
-            
 
-            
 
 
-  
+
+
+
+
 
 
 <div role="search">
@@ -154,15 +154,15 @@
   </form>
 </div>
 
-            
+
           </div>
 
-          
-            
-            
-              
-            
-            
+
+
+
+
+
+
               <p class="caption" role="heading"><span class="caption-text">파이토치(PyTorch) 레시피</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../recipes/recipes_index.html">모든 레시피 보기</a></li>
@@ -305,8 +305,8 @@
 <li class="toctree-l1"><a class="reference internal" href="../beginner/flava_finetuning_tutorial.html">TorchMultimodal Tutorial: Finetuning FLAVA</a></li>
 </ul>
 
-            
-          
+
+
         </div>
       </div>
     </nav>
@@ -314,7 +314,7 @@
     <div class="pytorch-container">
       <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
         <div class="pytorch-breadcrumbs-wrapper">
-          
+
 
 
 
@@ -333,30 +333,30 @@
 <div role="navigation" aria-label="breadcrumbs navigation">
 
   <ul class="pytorch-breadcrumbs">
-    
+
       <li>
         <a href="../index.html">
-          
+
             Tutorials
-          
+
         </a> &gt;
       </li>
 
-        
+
       <li>Distributed and Parallel Training Tutorials</li>
-    
-    
+
+
       <li class="pytorch-breadcrumbs-aside">
-        
-            
+
+
             <a href="../_sources/distributed/home.rst.txt" rel="nofollow"><img src="../_static/images/view-page-source-icon.svg"></a>
-          
-        
+
+
       </li>
-    
+
   </ul>
 
-  
+
 </div>
         </div>
 
@@ -368,7 +368,7 @@
       <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
         <div class="pytorch-content-left">
 
-        
+
 
           <div class="pytorch-call-to-action-links">
             <div id="tutorial-type">distributed/home</div>
@@ -390,14 +390,14 @@
             </div>
           </div>
 
-        
 
-          
+
+
           <div class="rst-content">
-          
+
             <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
              <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
-              
+
   <div class="section" id="distributed-and-parallel-training-tutorials">
 <h1>Distributed and Parallel Training Tutorials<a class="headerlink" href="#distributed-and-parallel-training-tutorials" title="이 제목에 대한 퍼머링크">¶</a></h1>
 <p>Distributed training is a model training paradigm that involves
@@ -431,7 +431,7 @@ <h1>Distributed and Parallel Training Tutorials<a class="headerlink" href="#dist
 <div class="sd-card-footer docutils">
 <p class="sd-card-text"><svg version="1.1" width="1.0em" height="1.0em" class="sd-octicon sd-octicon-code" viewBox="0 0 16 16" aria-hidden="true"><path fill-rule="evenodd" d="M4.72 3.22a.75.75 0 011.06 1.06L2.06 8l3.72 3.72a.75.75 0 11-1.06 1.06L.47 8.53a.75.75 0 010-1.06l4.25-4.25zm6.56 0a.75.75 0 10-1.06 1.06L13.94 8l-3.72 3.72a.75.75 0 101.06 1.06l4.25-4.25a.75.75 0 000-1.06l-4.25-4.25z"></path></svg> Code <svg version="1.1" width="1.0em" height="1.0em" class="sd-octicon sd-octicon-square-fill" viewBox="0 0 16 16" aria-hidden="true"><path fill-rule="evenodd" d="M5.75 4A1.75 1.75 0 004 5.75v4.5c0 .966.784 1.75 1.75 1.75h4.5A1.75 1.75 0 0012 10.25v-4.5A1.75 1.75 0 0010.25 4h-4.5z"></path></svg> <svg version="1.1" width="1.0em" height="1.0em" class="sd-octicon sd-octicon-video" viewBox="0 0 16 16" aria-hidden="true"><path fill-rule="evenodd" d="M1.75 3.5a.25.25 0 00-.25.25v8.5c0 .138.112.25.25.25h12.5a.25.25 0 00.25-.25v-8.5a.25.25 0 00-.25-.25H1.75zM0 3.75C0 2.784.784 2 1.75 2h12.5c.966 0 1.75.784 1.75 1.75v8.5A1.75 1.75 0 0114.25 14H1.75A1.75 1.75 0 010 12.25v-8.5z"></path><path d="M6 10.559V5.442a.25.25 0 01.379-.215l4.264 2.559a.25.25 0 010 .428l-4.264 2.559A.25.25 0 016 10.559z"></path></svg> Video</p>
 </div>
-<a class="sd-stretched-link reference external" href="https://pytorch.org/tutorials/beginner/ddp_series_intro.html?utm_source=distr_landing&amp;utm_medium=ddp_series_intro"></a></div>
+<a class="sd-stretched-link reference external" href="https://tutorials.pytorch.kr/beginner/ddp_series_intro.html?utm_source=distr_landing&amp;utm_medium=ddp_series_intro"></a></div>
 </div>
 <div class="sd-col sd-d-flex-row docutils">
 <div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
@@ -445,7 +445,7 @@ <h1>Distributed and Parallel Training Tutorials<a class="headerlink" href="#dist
 <div class="sd-card-footer docutils">
 <p class="sd-card-text"><svg version="1.1" width="1.0em" height="1.0em" class="sd-octicon sd-octicon-code" viewBox="0 0 16 16" aria-hidden="true"><path fill-rule="evenodd" d="M4.72 3.22a.75.75 0 011.06 1.06L2.06 8l3.72 3.72a.75.75 0 11-1.06 1.06L.47 8.53a.75.75 0 010-1.06l4.25-4.25zm6.56 0a.75.75 0 10-1.06 1.06L13.94 8l-3.72 3.72a.75.75 0 101.06 1.06l4.25-4.25a.75.75 0 000-1.06l-4.25-4.25z"></path></svg> Code</p>
 </div>
-<a class="sd-stretched-link reference external" href="https://pytorch.org/tutorials/intermediate/ddp_tutorial.html?utm_source=distr_landing&amp;utm_medium=intermediate_ddp_tutorial"></a></div>
+<a class="sd-stretched-link reference external" href="https://tutorials.pytorch.kr/intermediate/ddp_tutorial.html?utm_source=distr_landing&amp;utm_medium=intermediate_ddp_tutorial"></a></div>
 </div>
 <div class="sd-col sd-d-flex-row docutils">
 <div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
@@ -460,7 +460,7 @@ <h1>Distributed and Parallel Training Tutorials<a class="headerlink" href="#dist
 <div class="sd-card-footer docutils">
 <p class="sd-card-text"><svg version="1.1" width="1.0em" height="1.0em" class="sd-octicon sd-octicon-code" viewBox="0 0 16 16" aria-hidden="true"><path fill-rule="evenodd" d="M4.72 3.22a.75.75 0 011.06 1.06L2.06 8l3.72 3.72a.75.75 0 11-1.06 1.06L.47 8.53a.75.75 0 010-1.06l4.25-4.25zm6.56 0a.75.75 0 10-1.06 1.06L13.94 8l-3.72 3.72a.75.75 0 101.06 1.06l4.25-4.25a.75.75 0 000-1.06l-4.25-4.25z"></path></svg> Code</p>
 </div>
-<a class="sd-stretched-link reference external" href="https://pytorch.org/tutorials/advanced/generic_join.html?utm_source=distr_landing&amp;utm_medium=generic_join"></a></div>
+<a class="sd-stretched-link reference external" href="https://tutorials.pytorch.kr/advanced/generic_join.html?utm_source=distr_landing&amp;utm_medium=generic_join"></a></div>
 </div>
 </div>
 </div>
@@ -481,7 +481,7 @@ <h1>Distributed and Parallel Training Tutorials<a class="headerlink" href="#dist
 <div class="sd-card-footer docutils">
 <p class="sd-card-text"><svg version="1.1" width="1.0em" height="1.0em" class="sd-octicon sd-octicon-code" viewBox="0 0 16 16" aria-hidden="true"><path fill-rule="evenodd" d="M4.72 3.22a.75.75 0 011.06 1.06L2.06 8l3.72 3.72a.75.75 0 11-1.06 1.06L.47 8.53a.75.75 0 010-1.06l4.25-4.25zm6.56 0a.75.75 0 10-1.06 1.06L13.94 8l-3.72 3.72a.75.75 0 101.06 1.06l4.25-4.25a.75.75 0 000-1.06l-4.25-4.25z"></path></svg> Code</p>
 </div>
-<a class="sd-stretched-link reference external" href="https://pytorch.org/tutorials/intermediate/FSDP_tutorial.html?utm_source=distr_landing&amp;utm_medium=FSDP_getting_started"></a></div>
+<a class="sd-stretched-link reference external" href="https://tutorials.pytorch.kr/intermediate/FSDP_tutorial.html?utm_source=distr_landing&amp;utm_medium=FSDP_getting_started"></a></div>
 </div>
 <div class="sd-col sd-d-flex-row docutils">
 <div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
@@ -495,7 +495,7 @@ <h1>Distributed and Parallel Training Tutorials<a class="headerlink" href="#dist
 <div class="sd-card-footer docutils">
 <p class="sd-card-text"><svg version="1.1" width="1.0em" height="1.0em" class="sd-octicon sd-octicon-code" viewBox="0 0 16 16" aria-hidden="true"><path fill-rule="evenodd" d="M4.72 3.22a.75.75 0 011.06 1.06L2.06 8l3.72 3.72a.75.75 0 11-1.06 1.06L.47 8.53a.75.75 0 010-1.06l4.25-4.25zm6.56 0a.75.75 0 10-1.06 1.06L13.94 8l-3.72 3.72a.75.75 0 101.06 1.06l4.25-4.25a.75.75 0 000-1.06l-4.25-4.25z"></path></svg> Code</p>
 </div>
-<a class="sd-stretched-link reference external" href="https://pytorch.org/tutorials/intermediate/FSDP_adavnced_tutorial.html?utm_source=distr_landing&amp;utm_medium=FSDP_advanced"></a></div>
+<a class="sd-stretched-link reference external" href="https://tutorials.pytorch.kr/intermediate/FSDP_adavnced_tutorial.html?utm_source=distr_landing&amp;utm_medium=FSDP_advanced"></a></div>
 </div>
 </div>
 </div>
@@ -516,7 +516,7 @@ <h1>Distributed and Parallel Training Tutorials<a class="headerlink" href="#dist
 <div class="sd-card-footer docutils">
 <p class="sd-card-text"><svg version="1.1" width="1.0em" height="1.0em" class="sd-octicon sd-octicon-code" viewBox="0 0 16 16" aria-hidden="true"><path fill-rule="evenodd" d="M4.72 3.22a.75.75 0 011.06 1.06L2.06 8l3.72 3.72a.75.75 0 11-1.06 1.06L.47 8.53a.75.75 0 010-1.06l4.25-4.25zm6.56 0a.75.75 0 10-1.06 1.06L13.94 8l-3.72 3.72a.75.75 0 101.06 1.06l4.25-4.25a.75.75 0 000-1.06l-4.25-4.25z"></path></svg> Code</p>
 </div>
-<a class="sd-stretched-link reference external" href="https://pytorch.org/tutorials/intermediate/rpc_tutorial.html?utm_source=distr_landing&amp;utm_medium=rpc_getting_started"></a></div>
+<a class="sd-stretched-link reference external" href="https://tutorials.pytorch.kr/intermediate/rpc_tutorial.html?utm_source=distr_landing&amp;utm_medium=rpc_getting_started"></a></div>
 </div>
 <div class="sd-col sd-d-flex-row docutils">
 <div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
@@ -530,7 +530,7 @@ <h1>Distributed and Parallel Training Tutorials<a class="headerlink" href="#dist
 <div class="sd-card-footer docutils">
 <p class="sd-card-text"><svg version="1.1" width="1.0em" height="1.0em" class="sd-octicon sd-octicon-code" viewBox="0 0 16 16" aria-hidden="true"><path fill-rule="evenodd" d="M4.72 3.22a.75.75 0 011.06 1.06L2.06 8l3.72 3.72a.75.75 0 11-1.06 1.06L.47 8.53a.75.75 0 010-1.06l4.25-4.25zm6.56 0a.75.75 0 10-1.06 1.06L13.94 8l-3.72 3.72a.75.75 0 101.06 1.06l4.25-4.25a.75.75 0 000-1.06l-4.25-4.25z"></path></svg> Code</p>
 </div>
-<a class="sd-stretched-link reference external" href="https://pytorch.org/tutorials/intermediate/rpc_param_server_tutorial.html?utm_source=distr_landing&amp;utm_medium=rpc_param_server_tutorial"></a></div>
+<a class="sd-stretched-link reference external" href="https://tutorials.pytorch.kr/intermediate/rpc_param_server_tutorial.html?utm_source=distr_landing&amp;utm_medium=rpc_param_server_tutorial"></a></div>
 </div>
 <div class="sd-col sd-d-flex-row docutils">
 <div class="sd-card sd-sphinx-override sd-w-100 sd-shadow-sm sd-card-hover docutils">
@@ -544,7 +544,7 @@ <h1>Distributed and Parallel Training Tutorials<a class="headerlink" href="#dist
 <div class="sd-card-footer docutils">
 <p class="sd-card-text"><svg version="1.1" width="1.0em" height="1.0em" class="sd-octicon sd-octicon-code" viewBox="0 0 16 16" aria-hidden="true"><path fill-rule="evenodd" d="M4.72 3.22a.75.75 0 011.06 1.06L2.06 8l3.72 3.72a.75.75 0 11-1.06 1.06L.47 8.53a.75.75 0 010-1.06l4.25-4.25zm6.56 0a.75.75 0 10-1.06 1.06L13.94 8l-3.72 3.72a.75.75 0 101.06 1.06l4.25-4.25a.75.75 0 000-1.06l-4.25-4.25z"></path></svg> Code</p>
 </div>
-<a class="sd-stretched-link reference external" href="https://pytorch.org/tutorials/intermediate/rpc_async_execution.html?utm_source=distr_landing&amp;utm_medium=rpc_async_execution"></a></div>
+<a class="sd-stretched-link reference external" href="https://tutorials.pytorch.kr/intermediate/rpc_async_execution.html?utm_source=distr_landing&amp;utm_medium=rpc_async_execution"></a></div>
 </div>
 </div>
 </div>
@@ -562,7 +562,7 @@ <h1>Distributed and Parallel Training Tutorials<a class="headerlink" href="#dist
 <div class="sd-card-footer docutils">
 <p class="sd-card-text"><svg version="1.1" width="1.0em" height="1.0em" class="sd-octicon sd-octicon-code" viewBox="0 0 16 16" aria-hidden="true"><path fill-rule="evenodd" d="M4.72 3.22a.75.75 0 011.06 1.06L2.06 8l3.72 3.72a.75.75 0 11-1.06 1.06L.47 8.53a.75.75 0 010-1.06l4.25-4.25zm6.56 0a.75.75 0 10-1.06 1.06L13.94 8l-3.72 3.72a.75.75 0 101.06 1.06l4.25-4.25a.75.75 0 000-1.06l-4.25-4.25z"></path></svg> Code</p>
 </div>
-<a class="sd-stretched-link reference external" href="https://pytorch.org/tutorials/advanced/rpc_ddp_tutorial.html?utm_source=distr_landing&amp;utm_medium=rpc_plus_ddp"></a></div>
+<a class="sd-stretched-link reference external" href="https://tutorials.pytorch.kr/advanced/rpc_ddp_tutorial.html?utm_source=distr_landing&amp;utm_medium=rpc_plus_ddp"></a></div>
 </div>
 </div>
 </div>
@@ -584,7 +584,7 @@ <h1>Distributed and Parallel Training Tutorials<a class="headerlink" href="#dist
 <div class="sd-card-footer docutils">
 <p class="sd-card-text"><svg version="1.1" width="1.0em" height="1.0em" class="sd-octicon sd-octicon-code" viewBox="0 0 16 16" aria-hidden="true"><path fill-rule="evenodd" d="M4.72 3.22a.75.75 0 011.06 1.06L2.06 8l3.72 3.72a.75.75 0 11-1.06 1.06L.47 8.53a.75.75 0 010-1.06l4.25-4.25zm6.56 0a.75.75 0 10-1.06 1.06L13.94 8l-3.72 3.72a.75.75 0 101.06 1.06l4.25-4.25a.75.75 0 000-1.06l-4.25-4.25z"></path></svg> Code</p>
 </div>
-<a class="sd-stretched-link reference external" href="https://pytorch.org/tutorials/intermediate/process_group_cpp_extension_tutorial.html?utm_source=distr_landing&amp;utm_medium=custom_extensions_cpp"></a></div>
+<a class="sd-stretched-link reference external" href="https://tutorials.pytorch.kr/intermediate/process_group_cpp_extension_tutorial.html?utm_source=distr_landing&amp;utm_medium=custom_extensions_cpp"></a></div>
 </div>
 </div>
 </div>
@@ -593,21 +593,21 @@ <h1>Distributed and Parallel Training Tutorials<a class="headerlink" href="#dist
 
 
              </article>
-             
+
             </div>
             <footer>
-  
+
     <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
-      
+
         <a href="../beginner/dist_overview.html" class="btn btn-neutral float-right" title="PyTorch Distributed Overview" accesskey="n" rel="next">Next <img src="../_static/images/chevron-right-orange.svg" class="next-page"></a>
-      
-      
+
+
         <a href="../intermediate/ax_multiobjective_nas_tutorial.html" class="btn btn-neutral" title="Multi-Objective NAS with Ax" accesskey="p" rel="prev"><img src="../_static/images/chevron-right-orange.svg" class="previous-page"> Previous</a>
-      
+
     </div>
-  
 
-  
+
+
     <hr class="community-hr hr-top" />
     <div class="community-container">
       <div class="community-prompt">더 궁금하시거나 개선할 내용이 있으신가요? 커뮤니티에 참여해보세요!</div>
@@ -627,7 +627,7 @@ <h1>Distributed and Parallel Training Tutorials<a class="headerlink" href="#dist
       </div>
     </div>
     <hr class="rating-hr hr-bottom"/>
-  
+
 
   <div role="contentinfo">
     <p>
@@ -635,11 +635,11 @@ <h1>Distributed and Parallel Training Tutorials<a class="headerlink" href="#dist
 
     </p>
   </div>
-    
+
       <div>
         Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
       </div>
-     
+
 
 </footer>
 
@@ -672,12 +672,12 @@ <h1>Distributed and Parallel Training Tutorials<a class="headerlink" href="#dist
       </section>
     </div>
 
-  
 
 
-  
-  
-     
+
+
+
+
        <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
          <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
          <script src="../_static/jquery.js"></script>
@@ -691,8 +691,8 @@ <h1>Distributed and Parallel Training Tutorials<a class="headerlink" href="#dist
          <script src="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/contrib/auto-render.min.js"></script>
          <script src="../_static/katex_autorenderer.js"></script>
          <script src="../_static/design-tabs.js"></script>
-     
-  
+
+
 
   <script type="text/javascript" src="../_static/js/vendor/popper.min.js"></script>
   <script type="text/javascript" src="../_static/js/vendor/bootstrap.min.js"></script>
@@ -704,7 +704,7 @@ <h1>Distributed and Parallel Training Tutorials<a class="headerlink" href="#dist
           SphinxRtdTheme.Navigation.enable(true);
       });
   </script>
- 
+
 <script>
 //add microsoft link
 
diff --git a/docs/intermediate/FSDP_adavnced_tutorial.html b/docs/intermediate/FSDP_adavnced_tutorial.html
index 955085d54..65f29ce39 100644
--- a/docs/intermediate/FSDP_adavnced_tutorial.html
+++ b/docs/intermediate/FSDP_adavnced_tutorial.html
@@ -6,39 +6,39 @@
 <!--[if gt IE 8]><!--> <html class="no-js" lang="ko" > <!--<![endif]-->
 <head>
   <meta charset="utf-8">
-  
+
 <meta property="og:title" content="Advanced Model Training with Fully Sharded Data Parallel (FSDP)" />
-  
+
 <meta property="og:type" content="article" />
-  
+
 <meta property="og:url" content="https://tutorials.pytorch.kr/intermediate/FSDP_adavnced_tutorial.html" />
-  
+
 <meta property="og:description" content="Author: Hamid Shojanazeri, Less Wright, Rohan Varma, Yanli Zhao This tutorial introduces more advanced features of Fully Sharded Data Parallel (FSDP) as part of the PyTorch 1.12 release. To get familiar with FSDP, please refer to the FSDP getting started tutorial. In this tutorial, we fine-tune a..." />
-  
+
 <meta property="og:image" content="https://tutorials.pytorch.kr/_static/logos/logo-kr-sm-dark.png" />
-  
+
 <meta property="og:image:alt" content="Advanced Model Training with Fully Sharded Data Parallel (FSDP)" />
-  
+
 <meta property="og:ignore_canonical" content="true" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  
+
   <title>Advanced Model Training with Fully Sharded Data Parallel (FSDP) &mdash; 파이토치 한국어 튜토리얼 (PyTorch tutorials in Korean)</title>
-  
 
-  
-  
+
+
+
     <link rel="shortcut icon" href="../_static/favicon.ico"/>
-  
-  
-  
 
-  
 
-  
-  
-    
 
-  
+
+
+
+
+
+
+
+
 
   <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
   <!-- <link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> -->
@@ -57,9 +57,9 @@
     <link rel="index" title="색인" href="../genindex.html" />
     <link rel="search" title="검색" href="../search.html" />
     <link rel="next" title="Customize Process Group Backends Using Cpp Extensions" href="process_group_cpp_extension_tutorial.html" />
-    <link rel="prev" title="Getting Started with Fully Sharded Data Parallel(FSDP)" href="FSDP_tutorial.html" /> 
+    <link rel="prev" title="Getting Started with Fully Sharded Data Parallel(FSDP)" href="FSDP_tutorial.html" />
+
 
-  
   <script src="../_static/js/modernizr.min.js"></script>
 
   <!-- Preload the theme fonts -->
@@ -116,9 +116,9 @@
 
 <body class="pytorch-body">
 
-   
 
-    
+
+
 
     <div class="table-of-contents-link-wrapper">
       <span>Table of Contents</span>
@@ -129,21 +129,21 @@
       <div class="pytorch-side-scroll">
         <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
           <div class="pytorch-left-menu-search">
-            
 
-            
-              
-              
+
+
+
+
                 <div class="version">
                   1.13.1+cu117
                 </div>
-              
-            
 
-            
 
 
-  
+
+
+
+
 
 
 <div role="search">
@@ -154,15 +154,15 @@
   </form>
 </div>
 
-            
+
           </div>
 
-          
-            
-            
-              
-            
-            
+
+
+
+
+
+
               <p class="caption" role="heading"><span class="caption-text">파이토치(PyTorch) 레시피</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../recipes/recipes_index.html">모든 레시피 보기</a></li>
@@ -305,8 +305,8 @@
 <li class="toctree-l1"><a class="reference internal" href="../beginner/flava_finetuning_tutorial.html">TorchMultimodal Tutorial: Finetuning FLAVA</a></li>
 </ul>
 
-            
-          
+
+
         </div>
       </div>
     </nav>
@@ -314,7 +314,7 @@
     <div class="pytorch-container">
       <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
         <div class="pytorch-breadcrumbs-wrapper">
-          
+
 
 
 
@@ -333,30 +333,30 @@
 <div role="navigation" aria-label="breadcrumbs navigation">
 
   <ul class="pytorch-breadcrumbs">
-    
+
       <li>
         <a href="../index.html">
-          
+
             Tutorials
-          
+
         </a> &gt;
       </li>
 
-        
+
       <li>Advanced Model Training with Fully Sharded Data Parallel (FSDP)</li>
-    
-    
+
+
       <li class="pytorch-breadcrumbs-aside">
-        
-            
+
+
             <a href="../_sources/intermediate/FSDP_adavnced_tutorial.rst.txt" rel="nofollow"><img src="../_static/images/view-page-source-icon.svg"></a>
-          
-        
+
+
       </li>
-    
+
   </ul>
 
-  
+
 </div>
         </div>
 
@@ -368,7 +368,7 @@
       <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
         <div class="pytorch-content-left">
 
-        
+
 
           <div class="pytorch-call-to-action-links">
             <div id="tutorial-type">intermediate/FSDP_adavnced_tutorial</div>
@@ -390,21 +390,21 @@
             </div>
           </div>
 
-        
 
-          
+
+
           <div class="rst-content">
-          
+
             <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
              <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
-              
+
   <div class="section" id="advanced-model-training-with-fully-sharded-data-parallel-fsdp">
 <h1>Advanced Model Training with Fully Sharded Data Parallel (FSDP)<a class="headerlink" href="#advanced-model-training-with-fully-sharded-data-parallel-fsdp" title="이 제목에 대한 퍼머링크">¶</a></h1>
 <p><strong>Author</strong>: <a class="reference external" href="https://github.com/HamidShojanazeri">Hamid Shojanazeri</a>, <a class="reference external" href="https://github.com/lessw2020">Less
 Wright</a>, <a class="reference external" href="https://github.com/rohan-varma/">Rohan Varma</a>, <a class="reference external" href="https://github.com/zhaojuanmao">Yanli Zhao</a></p>
 <p>This tutorial introduces more advanced features of Fully Sharded Data Parallel
 (FSDP) as part of the PyTorch 1.12 release. To get familiar with FSDP, please
-refer to the <a class="reference external" href="https://pytorch.org/tutorials/intermediate/FSDP_tutorial.html">FSDP getting started tutorial</a>.</p>
+refer to the <a class="reference external" href="https://tutorials.pytorch.kr/intermediate/FSDP_tutorial.html">FSDP getting started tutorial</a>.</p>
 <p>In this tutorial, we fine-tune a HuggingFace (HF) T5 model with FSDP for text
 summarization as a working example.</p>
 <p>The example uses Wikihow and for simplicity, we will showcase the training on a
@@ -818,7 +818,7 @@ <h2>Fine-tuning HF T5<a class="headerlink" href="#fine-tuning-hf-t5" title="이
 </div>
 <div class="section" id="transformer-wrapping-policy">
 <span id="id1"></span><h2>Transformer Wrapping Policy<a class="headerlink" href="#transformer-wrapping-policy" title="이 제목에 대한 퍼머링크">¶</a></h2>
-<p>As discussed in the <a class="reference external" href="https://pytorch.org/tutorials/intermediate/FSDP_tutorial.html">previous tutorial</a>,
+<p>As discussed in the <a class="reference external" href="https://tutorials.pytorch.kr/intermediate/FSDP_tutorial.html">previous tutorial</a>,
 auto_wrap_policy is one of the FSDP features that make it easy to automatically
 shard a given model and put the model, optimizer and gradient shards into
 distinct FSDP units.</p>
@@ -1019,21 +1019,21 @@ <h2>Summary<a class="headerlink" href="#summary" title="이 제목에 대한 퍼
 
 
              </article>
-             
+
             </div>
             <footer>
-  
+
     <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
-      
+
         <a href="process_group_cpp_extension_tutorial.html" class="btn btn-neutral float-right" title="Customize Process Group Backends Using Cpp Extensions" accesskey="n" rel="next">Next <img src="../_static/images/chevron-right-orange.svg" class="next-page"></a>
-      
-      
+
+
         <a href="FSDP_tutorial.html" class="btn btn-neutral" title="Getting Started with Fully Sharded Data Parallel(FSDP)" accesskey="p" rel="prev"><img src="../_static/images/chevron-right-orange.svg" class="previous-page"> Previous</a>
-      
+
     </div>
-  
 
-  
+
+
     <hr class="community-hr hr-top" />
     <div class="community-container">
       <div class="community-prompt">더 궁금하시거나 개선할 내용이 있으신가요? 커뮤니티에 참여해보세요!</div>
@@ -1053,7 +1053,7 @@ <h2>Summary<a class="headerlink" href="#summary" title="이 제목에 대한 퍼
       </div>
     </div>
     <hr class="rating-hr hr-bottom"/>
-  
+
 
   <div role="contentinfo">
     <p>
@@ -1061,11 +1061,11 @@ <h2>Summary<a class="headerlink" href="#summary" title="이 제목에 대한 퍼
 
     </p>
   </div>
-    
+
       <div>
         Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
       </div>
-     
+
 
 </footer>
 
@@ -1104,12 +1104,12 @@ <h2>Summary<a class="headerlink" href="#summary" title="이 제목에 대한 퍼
       </section>
     </div>
 
-  
 
 
-  
-  
-     
+
+
+
+
        <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
          <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
          <script src="../_static/jquery.js"></script>
@@ -1123,8 +1123,8 @@ <h2>Summary<a class="headerlink" href="#summary" title="이 제목에 대한 퍼
          <script src="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/contrib/auto-render.min.js"></script>
          <script src="../_static/katex_autorenderer.js"></script>
          <script src="../_static/design-tabs.js"></script>
-     
-  
+
+
 
   <script type="text/javascript" src="../_static/js/vendor/popper.min.js"></script>
   <script type="text/javascript" src="../_static/js/vendor/bootstrap.min.js"></script>
@@ -1136,7 +1136,7 @@ <h2>Summary<a class="headerlink" href="#summary" title="이 제목에 대한 퍼
           SphinxRtdTheme.Navigation.enable(true);
       });
   </script>
- 
+
 <script>
 //add microsoft link
 
diff --git a/docs/intermediate/dist_pipeline_parallel_tutorial.html b/docs/intermediate/dist_pipeline_parallel_tutorial.html
index 221922f68..6f76dfadb 100644
--- a/docs/intermediate/dist_pipeline_parallel_tutorial.html
+++ b/docs/intermediate/dist_pipeline_parallel_tutorial.html
@@ -6,39 +6,39 @@
 <!--[if gt IE 8]><!--> <html class="no-js" lang="ko" > <!--<![endif]-->
 <head>
   <meta charset="utf-8">
-  
+
 <meta property="og:title" content="Distributed Pipeline Parallelism Using RPC" />
-  
+
 <meta property="og:type" content="article" />
-  
+
 <meta property="og:url" content="https://tutorials.pytorch.kr/intermediate/dist_pipeline_parallel_tutorial.html" />
-  
+
 <meta property="og:description" content="Author: Shen Li Prerequisites: PyTorch Distributed Overview, Single-Machine Model Parallel Best Practices, Getting started with Distributed RPC Framework, RRef helper functions: RRef.rpc_sync(), RRef.rpc_async(), and RRef.remote(). This tutorial uses a Resnet50 model to demonstrate implementing d..." />
-  
+
 <meta property="og:image" content="https://tutorials.pytorch.kr/_static/logos/logo-kr-sm-dark.png" />
-  
+
 <meta property="og:image:alt" content="Distributed Pipeline Parallelism Using RPC" />
-  
+
 <meta property="og:ignore_canonical" content="true" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  
+
   <title>Distributed Pipeline Parallelism Using RPC &mdash; 파이토치 한국어 튜토리얼 (PyTorch tutorials in Korean)</title>
-  
 
-  
-  
+
+
+
     <link rel="shortcut icon" href="../_static/favicon.ico"/>
-  
-  
-  
 
-  
 
-  
-  
-    
 
-  
+
+
+
+
+
+
+
+
 
   <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
   <!-- <link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> -->
@@ -57,9 +57,9 @@
     <link rel="index" title="색인" href="../genindex.html" />
     <link rel="search" title="검색" href="../search.html" />
     <link rel="next" title="Implementing Batch RPC Processing Using Asynchronous Executions" href="rpc_async_execution.html" />
-    <link rel="prev" title="Implementing a Parameter Server Using Distributed RPC Framework" href="rpc_param_server_tutorial.html" /> 
+    <link rel="prev" title="Implementing a Parameter Server Using Distributed RPC Framework" href="rpc_param_server_tutorial.html" />
+
 
-  
   <script src="../_static/js/modernizr.min.js"></script>
 
   <!-- Preload the theme fonts -->
@@ -116,9 +116,9 @@
 
 <body class="pytorch-body">
 
-   
 
-    
+
+
 
     <div class="table-of-contents-link-wrapper">
       <span>Table of Contents</span>
@@ -129,21 +129,21 @@
       <div class="pytorch-side-scroll">
         <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
           <div class="pytorch-left-menu-search">
-            
 
-            
-              
-              
+
+
+
+
                 <div class="version">
                   1.13.1+cu117
                 </div>
-              
-            
 
-            
 
 
-  
+
+
+
+
 
 
 <div role="search">
@@ -154,15 +154,15 @@
   </form>
 </div>
 
-            
+
           </div>
 
-          
-            
-            
-              
-            
-            
+
+
+
+
+
+
               <p class="caption" role="heading"><span class="caption-text">파이토치(PyTorch) 레시피</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../recipes/recipes_index.html">모든 레시피 보기</a></li>
@@ -305,8 +305,8 @@
 <li class="toctree-l1"><a class="reference internal" href="../beginner/flava_finetuning_tutorial.html">TorchMultimodal Tutorial: Finetuning FLAVA</a></li>
 </ul>
 
-            
-          
+
+
         </div>
       </div>
     </nav>
@@ -314,7 +314,7 @@
     <div class="pytorch-container">
       <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
         <div class="pytorch-breadcrumbs-wrapper">
-          
+
 
 
 
@@ -333,30 +333,30 @@
 <div role="navigation" aria-label="breadcrumbs navigation">
 
   <ul class="pytorch-breadcrumbs">
-    
+
       <li>
         <a href="../index.html">
-          
+
             Tutorials
-          
+
         </a> &gt;
       </li>
 
-        
+
       <li>Distributed Pipeline Parallelism Using RPC</li>
-    
-    
+
+
       <li class="pytorch-breadcrumbs-aside">
-        
-            
+
+
             <a href="../_sources/intermediate/dist_pipeline_parallel_tutorial.rst.txt" rel="nofollow"><img src="../_static/images/view-page-source-icon.svg"></a>
-          
-        
+
+
       </li>
-    
+
   </ul>
 
-  
+
 </div>
         </div>
 
@@ -368,7 +368,7 @@
       <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
         <div class="pytorch-content-left">
 
-        
+
 
           <div class="pytorch-call-to-action-links">
             <div id="tutorial-type">intermediate/dist_pipeline_parallel_tutorial</div>
@@ -390,14 +390,14 @@
             </div>
           </div>
 
-        
 
-          
+
+
           <div class="rst-content">
-          
+
             <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
              <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
-              
+
   <div class="section" id="distributed-pipeline-parallelism-using-rpc">
 <h1>Distributed Pipeline Parallelism Using RPC<a class="headerlink" href="#distributed-pipeline-parallelism-using-rpc" title="이 제목에 대한 퍼머링크">¶</a></h1>
 <p><strong>Author</strong>: <a class="reference external" href="https://mrshenli.github.io/">Shen Li</a></p>
@@ -408,8 +408,8 @@ <h1>Distributed Pipeline Parallelism Using RPC<a class="headerlink" href="#distr
 <p>Prerequisites:</p>
 <ul class="simple">
 <li><p><a class="reference external" href="../beginner/dist_overview.html">PyTorch Distributed Overview</a></p></li>
-<li><p><a class="reference external" href="https://pytorch.org/tutorials/intermediate/model_parallel_tutorial.html">Single-Machine Model Parallel Best Practices</a></p></li>
-<li><p><a class="reference external" href="https://pytorch.org/tutorials/intermediate/rpc_tutorial.html">Getting started with Distributed RPC Framework</a></p></li>
+<li><p><a class="reference external" href="https://tutorials.pytorch.kr/intermediate/model_parallel_tutorial.html">Single-Machine Model Parallel Best Practices</a></p></li>
+<li><p><a class="reference external" href="https://tutorials.pytorch.kr/intermediate/rpc_tutorial.html">Getting started with Distributed RPC Framework</a></p></li>
 <li><p>RRef helper functions:
 <a class="reference external" href="https://pytorch.org/docs/master/rpc.html#torch.distributed.rpc.RRef.rpc_sync">RRef.rpc_sync()</a>,
 <a class="reference external" href="https://pytorch.org/docs/master/rpc.html#torch.distributed.rpc.RRef.rpc_async">RRef.rpc_async()</a>, and
@@ -728,21 +728,21 @@ <h2>Step 4: Launch RPC Processes<a class="headerlink" href="#step-4-launch-rpc-p
 
 
              </article>
-             
+
             </div>
             <footer>
-  
+
     <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
-      
+
         <a href="rpc_async_execution.html" class="btn btn-neutral float-right" title="Implementing Batch RPC Processing Using Asynchronous Executions" accesskey="n" rel="next">Next <img src="../_static/images/chevron-right-orange.svg" class="next-page"></a>
-      
-      
+
+
         <a href="rpc_param_server_tutorial.html" class="btn btn-neutral" title="Implementing a Parameter Server Using Distributed RPC Framework" accesskey="p" rel="prev"><img src="../_static/images/chevron-right-orange.svg" class="previous-page"> Previous</a>
-      
+
     </div>
-  
 
-  
+
+
     <hr class="community-hr hr-top" />
     <div class="community-container">
       <div class="community-prompt">더 궁금하시거나 개선할 내용이 있으신가요? 커뮤니티에 참여해보세요!</div>
@@ -762,7 +762,7 @@ <h2>Step 4: Launch RPC Processes<a class="headerlink" href="#step-4-launch-rpc-p
       </div>
     </div>
     <hr class="rating-hr hr-bottom"/>
-  
+
 
   <div role="contentinfo">
     <p>
@@ -770,11 +770,11 @@ <h2>Step 4: Launch RPC Processes<a class="headerlink" href="#step-4-launch-rpc-p
 
     </p>
   </div>
-    
+
       <div>
         Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
       </div>
-     
+
 
 </footer>
 
@@ -808,12 +808,12 @@ <h2>Step 4: Launch RPC Processes<a class="headerlink" href="#step-4-launch-rpc-p
       </section>
     </div>
 
-  
 
 
-  
-  
-     
+
+
+
+
        <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
          <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
          <script src="../_static/jquery.js"></script>
@@ -827,8 +827,8 @@ <h2>Step 4: Launch RPC Processes<a class="headerlink" href="#step-4-launch-rpc-p
          <script src="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/contrib/auto-render.min.js"></script>
          <script src="../_static/katex_autorenderer.js"></script>
          <script src="../_static/design-tabs.js"></script>
-     
-  
+
+
 
   <script type="text/javascript" src="../_static/js/vendor/popper.min.js"></script>
   <script type="text/javascript" src="../_static/js/vendor/bootstrap.min.js"></script>
@@ -840,7 +840,7 @@ <h2>Step 4: Launch RPC Processes<a class="headerlink" href="#step-4-launch-rpc-p
           SphinxRtdTheme.Navigation.enable(true);
       });
   </script>
- 
+
 <script>
 //add microsoft link
 
diff --git a/docs/intermediate/fx_profiling_tutorial.html b/docs/intermediate/fx_profiling_tutorial.html
index 5b3298026..410643c71 100644
--- a/docs/intermediate/fx_profiling_tutorial.html
+++ b/docs/intermediate/fx_profiling_tutorial.html
@@ -6,39 +6,39 @@
 <!--[if gt IE 8]><!--> <html class="no-js" lang="ko" > <!--<![endif]-->
 <head>
   <meta charset="utf-8">
-  
+
 <meta property="og:title" content="(beta) Building a Simple CPU Performance Profiler with FX" />
-  
+
 <meta property="og:type" content="article" />
-  
+
 <meta property="og:url" content="https://tutorials.pytorch.kr/intermediate/fx_profiling_tutorial.html" />
-  
+
 <meta property="og:description" content="Author: James Reed In this tutorial, we are going to use FX to do the following: Capture PyTorch Python code in a way that we can inspect and gather statistics about the structure and execution of the code, Build out a small class that will serve as a simple performance 《profiler》, collecting run..." />
-  
+
 <meta property="og:image" content="https://tutorials.pytorch.kr/_static/logos/logo-kr-sm-dark.png" />
-  
+
 <meta property="og:image:alt" content="(beta) Building a Simple CPU Performance Profiler with FX" />
-  
+
 <meta property="og:ignore_canonical" content="true" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  
+
   <title>(beta) Building a Simple CPU Performance Profiler with FX &mdash; 파이토치 한국어 튜토리얼 (PyTorch tutorials in Korean)</title>
-  
 
-  
-  
+
+
+
     <link rel="shortcut icon" href="../_static/favicon.ico"/>
-  
-  
-  
 
-  
 
-  
-  
-    
 
-  
+
+
+
+
+
+
+
+
 
   <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
   <!-- <link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> -->
@@ -57,9 +57,9 @@
     <link rel="index" title="색인" href="../genindex.html" />
     <link rel="search" title="검색" href="../search.html" />
     <link rel="next" title="(베타) PyTorch를 사용한 Channels Last 메모리 형식" href="memory_format_tutorial.html" />
-    <link rel="prev" title="Raspberry Pi 4 에서 실시간 추론(Inference) (30fps!)" href="realtime_rpi.html" /> 
+    <link rel="prev" title="Raspberry Pi 4 에서 실시간 추론(Inference) (30fps!)" href="realtime_rpi.html" />
+
 
-  
   <script src="../_static/js/modernizr.min.js"></script>
 
   <!-- Preload the theme fonts -->
@@ -116,9 +116,9 @@
 
 <body class="pytorch-body">
 
-   
 
-    
+
+
 
     <div class="table-of-contents-link-wrapper">
       <span>Table of Contents</span>
@@ -129,21 +129,21 @@
       <div class="pytorch-side-scroll">
         <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
           <div class="pytorch-left-menu-search">
-            
 
-            
-              
-              
+
+
+
+
                 <div class="version">
                   1.13.1+cu117
                 </div>
-              
-            
 
-            
 
 
-  
+
+
+
+
 
 
 <div role="search">
@@ -154,15 +154,15 @@
   </form>
 </div>
 
-            
+
           </div>
 
-          
-            
-            
-              
-            
-            
+
+
+
+
+
+
               <p class="caption" role="heading"><span class="caption-text">파이토치(PyTorch) 레시피</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../recipes/recipes_index.html">모든 레시피 보기</a></li>
@@ -305,8 +305,8 @@
 <li class="toctree-l1"><a class="reference internal" href="../beginner/flava_finetuning_tutorial.html">TorchMultimodal Tutorial: Finetuning FLAVA</a></li>
 </ul>
 
-            
-          
+
+
         </div>
       </div>
     </nav>
@@ -314,7 +314,7 @@
     <div class="pytorch-container">
       <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
         <div class="pytorch-breadcrumbs-wrapper">
-          
+
 
 
 
@@ -333,30 +333,30 @@
 <div role="navigation" aria-label="breadcrumbs navigation">
 
   <ul class="pytorch-breadcrumbs">
-    
+
       <li>
         <a href="../index.html">
-          
+
             Tutorials
-          
+
         </a> &gt;
       </li>
 
-        
+
       <li>(beta) Building a Simple CPU Performance Profiler with FX</li>
-    
-    
+
+
       <li class="pytorch-breadcrumbs-aside">
-        
-            
+
+
             <a href="../_sources/intermediate/fx_profiling_tutorial.rst.txt" rel="nofollow"><img src="../_static/images/view-page-source-icon.svg"></a>
-          
-        
+
+
       </li>
-    
+
   </ul>
 
-  
+
 </div>
         </div>
 
@@ -368,7 +368,7 @@
       <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
         <div class="pytorch-content-left">
 
-        
+
 
           <div class="pytorch-call-to-action-links">
             <div id="tutorial-type">intermediate/fx_profiling_tutorial</div>
@@ -390,14 +390,14 @@
             </div>
           </div>
 
-        
 
-          
+
+
           <div class="rst-content">
-          
+
             <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
              <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
-              
+
   <div class="sphx-glr-download-link-note admonition note">
 <p class="admonition-title">참고</p>
 <p>Click <a class="reference internal" href="#sphx-glr-download-intermediate-fx-profiling-tutorial-py"><span class="std std-ref">here</span></a>
@@ -841,7 +841,7 @@ <h2>Investigating the Performance of ResNet18<a class="headerlink" href="#invest
 <a class="reference external" href="https://github.com/pytorch/pytorch/issues/51393">https://github.com/pytorch/pytorch/issues/51393</a></p></li>
 <li><p>BatchNorm2d also takes up significant time. We can continue this
 line of thinking and optimize this in the Conv-BN Fusion with FX
-<a class="reference external" href="https://pytorch.org/tutorials/intermediate/fx_conv_bn_fuser.html">tutorial</a>.</p></li>
+<a class="reference external" href="https://tutorials.pytorch.kr/intermediate/fx_conv_bn_fuser.html">tutorial</a>.</p></li>
 </ul>
 </div>
 <div class="section" id="conclusion">
@@ -871,21 +871,21 @@ <h2>Conclusion<a class="headerlink" href="#conclusion" title="이 제목에 대
 
 
              </article>
-             
+
             </div>
             <footer>
-  
+
     <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
-      
+
         <a href="memory_format_tutorial.html" class="btn btn-neutral float-right" title="(베타) PyTorch를 사용한 Channels Last 메모리 형식" accesskey="n" rel="next">Next <img src="../_static/images/chevron-right-orange.svg" class="next-page"></a>
-      
-      
+
+
         <a href="realtime_rpi.html" class="btn btn-neutral" title="Raspberry Pi 4 에서 실시간 추론(Inference) (30fps!)" accesskey="p" rel="prev"><img src="../_static/images/chevron-right-orange.svg" class="previous-page"> Previous</a>
-      
+
     </div>
-  
 
-  
+
+
     <hr class="community-hr hr-top" />
     <div class="community-container">
       <div class="community-prompt">더 궁금하시거나 개선할 내용이 있으신가요? 커뮤니티에 참여해보세요!</div>
@@ -905,7 +905,7 @@ <h2>Conclusion<a class="headerlink" href="#conclusion" title="이 제목에 대
       </div>
     </div>
     <hr class="rating-hr hr-bottom"/>
-  
+
 
   <div role="contentinfo">
     <p>
@@ -913,11 +913,11 @@ <h2>Conclusion<a class="headerlink" href="#conclusion" title="이 제목에 대
 
     </p>
   </div>
-    
+
       <div>
         Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
       </div>
-     
+
 
 </footer>
 
@@ -950,12 +950,12 @@ <h2>Conclusion<a class="headerlink" href="#conclusion" title="이 제목에 대
       </section>
     </div>
 
-  
 
 
-  
-  
-     
+
+
+
+
        <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
          <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
          <script src="../_static/jquery.js"></script>
@@ -969,8 +969,8 @@ <h2>Conclusion<a class="headerlink" href="#conclusion" title="이 제목에 대
          <script src="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/contrib/auto-render.min.js"></script>
          <script src="../_static/katex_autorenderer.js"></script>
          <script src="../_static/design-tabs.js"></script>
-     
-  
+
+
 
   <script type="text/javascript" src="../_static/js/vendor/popper.min.js"></script>
   <script type="text/javascript" src="../_static/js/vendor/bootstrap.min.js"></script>
@@ -982,7 +982,7 @@ <h2>Conclusion<a class="headerlink" href="#conclusion" title="이 제목에 대
           SphinxRtdTheme.Navigation.enable(true);
       });
   </script>
- 
+
 <script>
 //add microsoft link
 
diff --git a/docs/intermediate/process_group_cpp_extension_tutorial.html b/docs/intermediate/process_group_cpp_extension_tutorial.html
index 2ae52e303..78a823c24 100644
--- a/docs/intermediate/process_group_cpp_extension_tutorial.html
+++ b/docs/intermediate/process_group_cpp_extension_tutorial.html
@@ -6,39 +6,39 @@
 <!--[if gt IE 8]><!--> <html class="no-js" lang="ko" > <!--<![endif]-->
 <head>
   <meta charset="utf-8">
-  
+
 <meta property="og:title" content="Customize Process Group Backends Using Cpp Extensions" />
-  
+
 <meta property="og:type" content="article" />
-  
+
 <meta property="og:url" content="https://tutorials.pytorch.kr/intermediate/process_group_cpp_extension_tutorial.html" />
-  
+
 <meta property="og:description" content="Author: Feng Tian, Shen Li, Min Si Prerequisites: PyTorch Distributed Overview, PyTorch Collective Communication Package, PyTorch Cpp Extension, Writing Distributed Applications with PyTorch. This tutorial demonstrates how to implement a custom ProcessGroup backend and plug that into PyTorch dist..." />
-  
+
 <meta property="og:image" content="https://tutorials.pytorch.kr/_static/logos/logo-kr-sm-dark.png" />
-  
+
 <meta property="og:image:alt" content="Customize Process Group Backends Using Cpp Extensions" />
-  
+
 <meta property="og:ignore_canonical" content="true" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  
+
   <title>Customize Process Group Backends Using Cpp Extensions &mdash; 파이토치 한국어 튜토리얼 (PyTorch tutorials in Korean)</title>
-  
 
-  
-  
+
+
+
     <link rel="shortcut icon" href="../_static/favicon.ico"/>
-  
-  
-  
 
-  
 
-  
-  
-    
 
-  
+
+
+
+
+
+
+
+
 
   <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
   <!-- <link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> -->
@@ -57,9 +57,9 @@
     <link rel="index" title="색인" href="../genindex.html" />
     <link rel="search" title="검색" href="../search.html" />
     <link rel="next" title="Getting Started with Distributed RPC Framework" href="rpc_tutorial.html" />
-    <link rel="prev" title="Advanced Model Training with Fully Sharded Data Parallel (FSDP)" href="FSDP_adavnced_tutorial.html" /> 
+    <link rel="prev" title="Advanced Model Training with Fully Sharded Data Parallel (FSDP)" href="FSDP_adavnced_tutorial.html" />
+
 
-  
   <script src="../_static/js/modernizr.min.js"></script>
 
   <!-- Preload the theme fonts -->
@@ -116,9 +116,9 @@
 
 <body class="pytorch-body">
 
-   
 
-    
+
+
 
     <div class="table-of-contents-link-wrapper">
       <span>Table of Contents</span>
@@ -129,21 +129,21 @@
       <div class="pytorch-side-scroll">
         <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
           <div class="pytorch-left-menu-search">
-            
 
-            
-              
-              
+
+
+
+
                 <div class="version">
                   1.13.1+cu117
                 </div>
-              
-            
 
-            
 
 
-  
+
+
+
+
 
 
 <div role="search">
@@ -154,15 +154,15 @@
   </form>
 </div>
 
-            
+
           </div>
 
-          
-            
-            
-              
-            
-            
+
+
+
+
+
+
               <p class="caption" role="heading"><span class="caption-text">파이토치(PyTorch) 레시피</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../recipes/recipes_index.html">모든 레시피 보기</a></li>
@@ -305,8 +305,8 @@
 <li class="toctree-l1"><a class="reference internal" href="../beginner/flava_finetuning_tutorial.html">TorchMultimodal Tutorial: Finetuning FLAVA</a></li>
 </ul>
 
-            
-          
+
+
         </div>
       </div>
     </nav>
@@ -314,7 +314,7 @@
     <div class="pytorch-container">
       <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
         <div class="pytorch-breadcrumbs-wrapper">
-          
+
 
 
 
@@ -333,30 +333,30 @@
 <div role="navigation" aria-label="breadcrumbs navigation">
 
   <ul class="pytorch-breadcrumbs">
-    
+
       <li>
         <a href="../index.html">
-          
+
             Tutorials
-          
+
         </a> &gt;
       </li>
 
-        
+
       <li>Customize Process Group Backends Using Cpp Extensions</li>
-    
-    
+
+
       <li class="pytorch-breadcrumbs-aside">
-        
-            
+
+
             <a href="../_sources/intermediate/process_group_cpp_extension_tutorial.rst.txt" rel="nofollow"><img src="../_static/images/view-page-source-icon.svg"></a>
-          
-        
+
+
       </li>
-    
+
   </ul>
 
-  
+
 </div>
         </div>
 
@@ -368,7 +368,7 @@
       <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
         <div class="pytorch-content-left">
 
-        
+
 
           <div class="pytorch-call-to-action-links">
             <div id="tutorial-type">intermediate/process_group_cpp_extension_tutorial</div>
@@ -390,14 +390,14 @@
             </div>
           </div>
 
-        
 
-          
+
+
           <div class="rst-content">
-          
+
             <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
              <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
-              
+
   <div class="section" id="customize-process-group-backends-using-cpp-extensions">
 <h1>Customize Process Group Backends Using Cpp Extensions<a class="headerlink" href="#customize-process-group-backends-using-cpp-extensions" title="이 제목에 대한 퍼머링크">¶</a></h1>
 <p><strong>Author</strong>: <a class="reference external" href="https://github.com/ftian1">Feng Tian</a>, <a class="reference external" href="https://mrshenli.github.io/">Shen Li</a>, <a class="reference external" href="https://minsii.github.io/">Min Si</a></p>
@@ -410,7 +410,7 @@ <h1>Customize Process Group Backends Using Cpp Extensions<a class="headerlink" h
 <li><p><a class="reference external" href="../beginner/dist_overview.html">PyTorch Distributed Overview</a></p></li>
 <li><p><a class="reference external" href="https://pytorch.org/docs/stable/distributed.html">PyTorch Collective Communication Package</a></p></li>
 <li><p><a class="reference external" href="https://pytorch.org/docs/stable/cpp_extension.html">PyTorch Cpp Extension</a></p></li>
-<li><p><a class="reference external" href="https://pytorch.org/tutorials/intermediate/dist_tuto.html">Writing Distributed Applications with PyTorch</a></p></li>
+<li><p><a class="reference external" href="https://tutorials.pytorch.kr/intermediate/dist_tuto.html">Writing Distributed Applications with PyTorch</a></p></li>
 </ul>
 <p>This tutorial demonstrates how to implement a custom <code class="docutils literal notranslate"><span class="pre">ProcessGroup</span></code>
 backend and plug that into
@@ -673,21 +673,21 @@ <h2>Step 4: Use The Extension in Application<a class="headerlink" href="#step-4-
 
 
              </article>
-             
+
             </div>
             <footer>
-  
+
     <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
-      
+
         <a href="rpc_tutorial.html" class="btn btn-neutral float-right" title="Getting Started with Distributed RPC Framework" accesskey="n" rel="next">Next <img src="../_static/images/chevron-right-orange.svg" class="next-page"></a>
-      
-      
+
+
         <a href="FSDP_adavnced_tutorial.html" class="btn btn-neutral" title="Advanced Model Training with Fully Sharded Data Parallel (FSDP)" accesskey="p" rel="prev"><img src="../_static/images/chevron-right-orange.svg" class="previous-page"> Previous</a>
-      
+
     </div>
-  
 
-  
+
+
     <hr class="community-hr hr-top" />
     <div class="community-container">
       <div class="community-prompt">더 궁금하시거나 개선할 내용이 있으신가요? 커뮤니티에 참여해보세요!</div>
@@ -707,7 +707,7 @@ <h2>Step 4: Use The Extension in Application<a class="headerlink" href="#step-4-
       </div>
     </div>
     <hr class="rating-hr hr-bottom"/>
-  
+
 
   <div role="contentinfo">
     <p>
@@ -715,11 +715,11 @@ <h2>Step 4: Use The Extension in Application<a class="headerlink" href="#step-4-
 
     </p>
   </div>
-    
+
       <div>
         Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
       </div>
-     
+
 
 </footer>
 
@@ -753,12 +753,12 @@ <h2>Step 4: Use The Extension in Application<a class="headerlink" href="#step-4-
       </section>
     </div>
 
-  
 
 
-  
-  
-     
+
+
+
+
        <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
          <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
          <script src="../_static/jquery.js"></script>
@@ -772,8 +772,8 @@ <h2>Step 4: Use The Extension in Application<a class="headerlink" href="#step-4-
          <script src="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/contrib/auto-render.min.js"></script>
          <script src="../_static/katex_autorenderer.js"></script>
          <script src="../_static/design-tabs.js"></script>
-     
-  
+
+
 
   <script type="text/javascript" src="../_static/js/vendor/popper.min.js"></script>
   <script type="text/javascript" src="../_static/js/vendor/bootstrap.min.js"></script>
@@ -785,7 +785,7 @@ <h2>Step 4: Use The Extension in Application<a class="headerlink" href="#step-4-
           SphinxRtdTheme.Navigation.enable(true);
       });
   </script>
- 
+
 <script>
 //add microsoft link
 
diff --git a/docs/intermediate/rpc_async_execution.html b/docs/intermediate/rpc_async_execution.html
index 2ccec4923..866512b57 100644
--- a/docs/intermediate/rpc_async_execution.html
+++ b/docs/intermediate/rpc_async_execution.html
@@ -6,39 +6,39 @@
 <!--[if gt IE 8]><!--> <html class="no-js" lang="ko" > <!--<![endif]-->
 <head>
   <meta charset="utf-8">
-  
+
 <meta property="og:title" content="Implementing Batch RPC Processing Using Asynchronous Executions" />
-  
+
 <meta property="og:type" content="article" />
-  
+
 <meta property="og:url" content="https://tutorials.pytorch.kr/intermediate/rpc_async_execution.html" />
-  
+
 <meta property="og:description" content="Author: Shen Li Prerequisites: PyTorch Distributed Overview, Getting started with Distributed RPC Framework, Implementing a Parameter Server using Distributed RPC Framework, RPC Asynchronous Execution Decorator. This tutorial demonstrates how to build batch-processing RPC applications with the@rp..." />
-  
+
 <meta property="og:image" content="https://tutorials.pytorch.kr/_static/logos/logo-kr-sm-dark.png" />
-  
+
 <meta property="og:image:alt" content="Implementing Batch RPC Processing Using Asynchronous Executions" />
-  
+
 <meta property="og:ignore_canonical" content="true" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  
+
   <title>Implementing Batch RPC Processing Using Asynchronous Executions &mdash; 파이토치 한국어 튜토리얼 (PyTorch tutorials in Korean)</title>
-  
 
-  
-  
+
+
+
     <link rel="shortcut icon" href="../_static/favicon.ico"/>
-  
-  
-  
 
-  
 
-  
-  
-    
 
-  
+
+
+
+
+
+
+
+
 
   <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
   <!-- <link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> -->
@@ -57,9 +57,9 @@
     <link rel="index" title="색인" href="../genindex.html" />
     <link rel="search" title="검색" href="../search.html" />
     <link rel="next" title="분산 데이터 병렬(DDP)과 분산 RPC 프레임워크 결합" href="../advanced/rpc_ddp_tutorial.html" />
-    <link rel="prev" title="Distributed Pipeline Parallelism Using RPC" href="dist_pipeline_parallel_tutorial.html" /> 
+    <link rel="prev" title="Distributed Pipeline Parallelism Using RPC" href="dist_pipeline_parallel_tutorial.html" />
+
 
-  
   <script src="../_static/js/modernizr.min.js"></script>
 
   <!-- Preload the theme fonts -->
@@ -116,9 +116,9 @@
 
 <body class="pytorch-body">
 
-   
 
-    
+
+
 
     <div class="table-of-contents-link-wrapper">
       <span>Table of Contents</span>
@@ -129,21 +129,21 @@
       <div class="pytorch-side-scroll">
         <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
           <div class="pytorch-left-menu-search">
-            
 
-            
-              
-              
+
+
+
+
                 <div class="version">
                   1.13.1+cu117
                 </div>
-              
-            
 
-            
 
 
-  
+
+
+
+
 
 
 <div role="search">
@@ -154,15 +154,15 @@
   </form>
 </div>
 
-            
+
           </div>
 
-          
-            
-            
-              
-            
-            
+
+
+
+
+
+
               <p class="caption" role="heading"><span class="caption-text">파이토치(PyTorch) 레시피</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../recipes/recipes_index.html">모든 레시피 보기</a></li>
@@ -305,8 +305,8 @@
 <li class="toctree-l1"><a class="reference internal" href="../beginner/flava_finetuning_tutorial.html">TorchMultimodal Tutorial: Finetuning FLAVA</a></li>
 </ul>
 
-            
-          
+
+
         </div>
       </div>
     </nav>
@@ -314,7 +314,7 @@
     <div class="pytorch-container">
       <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
         <div class="pytorch-breadcrumbs-wrapper">
-          
+
 
 
 
@@ -333,30 +333,30 @@
 <div role="navigation" aria-label="breadcrumbs navigation">
 
   <ul class="pytorch-breadcrumbs">
-    
+
       <li>
         <a href="../index.html">
-          
+
             Tutorials
-          
+
         </a> &gt;
       </li>
 
-        
+
       <li>Implementing Batch RPC Processing Using Asynchronous Executions</li>
-    
-    
+
+
       <li class="pytorch-breadcrumbs-aside">
-        
-            
+
+
             <a href="../_sources/intermediate/rpc_async_execution.rst.txt" rel="nofollow"><img src="../_static/images/view-page-source-icon.svg"></a>
-          
-        
+
+
       </li>
-    
+
   </ul>
 
-  
+
 </div>
         </div>
 
@@ -368,7 +368,7 @@
       <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
         <div class="pytorch-content-left">
 
-        
+
 
           <div class="pytorch-call-to-action-links">
             <div id="tutorial-type">intermediate/rpc_async_execution</div>
@@ -390,14 +390,14 @@
             </div>
           </div>
 
-        
 
-          
+
+
           <div class="rst-content">
-          
+
             <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
              <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
-              
+
   <div class="section" id="implementing-batch-rpc-processing-using-asynchronous-executions">
 <h1>Implementing Batch RPC Processing Using Asynchronous Executions<a class="headerlink" href="#implementing-batch-rpc-processing-using-asynchronous-executions" title="이 제목에 대한 퍼머링크">¶</a></h1>
 <p><strong>Author</strong>: <a class="reference external" href="https://mrshenli.github.io/">Shen Li</a></p>
@@ -584,7 +584,7 @@ <h2>Batch-Updating Parameter Server<a class="headerlink" href="#batch-updating-p
 <a class="reference external" href="https://pytorch.org/docs/master/rpc.html#torch.distributed.rpc.functions.async_execution">&#64;rpc.functions.async_execution</a>
 decorator. In the next section, we re-implement the reinforcement learning
 example in the previous
-<a class="reference external" href="https://pytorch.org/tutorials/intermediate/rpc_tutorial.html">Getting started with Distributed RPC Framework</a>
+<a class="reference external" href="https://tutorials.pytorch.kr/intermediate/rpc_tutorial.html">Getting started with Distributed RPC Framework</a>
 tutorial using batch processing, and demonstrate its impact on the training
 speed.</p>
 </div>
@@ -651,7 +651,7 @@ <h2>Batch-Processing CartPole Solver<a class="headerlink" href="#batch-processin
 </pre></div>
 </div>
 <p>Compared to the previous tutorial
-<a class="reference external" href="https://pytorch.org/tutorials/intermediate/rpc_tutorial.html">Getting started with Distributed RPC Framework</a>,
+<a class="reference external" href="https://tutorials.pytorch.kr/intermediate/rpc_tutorial.html">Getting started with Distributed RPC Framework</a>,
 observers behave a little differently. Instead of exiting when the environment
 is stopped, it always runs <code class="docutils literal notranslate"><span class="pre">n_steps</span></code> iterations in every episode. When the
 environment returns, the observer simply resets the environment and start over
@@ -900,21 +900,21 @@ <h2>Learn More<a class="headerlink" href="#learn-more" title="이 제목에 대
 
 
              </article>
-             
+
             </div>
             <footer>
-  
+
     <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
-      
+
         <a href="../advanced/rpc_ddp_tutorial.html" class="btn btn-neutral float-right" title="분산 데이터 병렬(DDP)과 분산 RPC 프레임워크 결합" accesskey="n" rel="next">Next <img src="../_static/images/chevron-right-orange.svg" class="next-page"></a>
-      
-      
+
+
         <a href="dist_pipeline_parallel_tutorial.html" class="btn btn-neutral" title="Distributed Pipeline Parallelism Using RPC" accesskey="p" rel="prev"><img src="../_static/images/chevron-right-orange.svg" class="previous-page"> Previous</a>
-      
+
     </div>
-  
 
-  
+
+
     <hr class="community-hr hr-top" />
     <div class="community-container">
       <div class="community-prompt">더 궁금하시거나 개선할 내용이 있으신가요? 커뮤니티에 참여해보세요!</div>
@@ -934,7 +934,7 @@ <h2>Learn More<a class="headerlink" href="#learn-more" title="이 제목에 대
       </div>
     </div>
     <hr class="rating-hr hr-bottom"/>
-  
+
 
   <div role="contentinfo">
     <p>
@@ -942,11 +942,11 @@ <h2>Learn More<a class="headerlink" href="#learn-more" title="이 제목에 대
 
     </p>
   </div>
-    
+
       <div>
         Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
       </div>
-     
+
 
 </footer>
 
@@ -979,12 +979,12 @@ <h2>Learn More<a class="headerlink" href="#learn-more" title="이 제목에 대
       </section>
     </div>
 
-  
 
 
-  
-  
-     
+
+
+
+
        <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
          <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
          <script src="../_static/jquery.js"></script>
@@ -998,8 +998,8 @@ <h2>Learn More<a class="headerlink" href="#learn-more" title="이 제목에 대
          <script src="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/contrib/auto-render.min.js"></script>
          <script src="../_static/katex_autorenderer.js"></script>
          <script src="../_static/design-tabs.js"></script>
-     
-  
+
+
 
   <script type="text/javascript" src="../_static/js/vendor/popper.min.js"></script>
   <script type="text/javascript" src="../_static/js/vendor/bootstrap.min.js"></script>
@@ -1011,7 +1011,7 @@ <h2>Learn More<a class="headerlink" href="#learn-more" title="이 제목에 대
           SphinxRtdTheme.Navigation.enable(true);
       });
   </script>
- 
+
 <script>
 //add microsoft link
 
diff --git a/docs/intermediate/rpc_param_server_tutorial.html b/docs/intermediate/rpc_param_server_tutorial.html
index 0b7065249..3bdb88789 100644
--- a/docs/intermediate/rpc_param_server_tutorial.html
+++ b/docs/intermediate/rpc_param_server_tutorial.html
@@ -6,39 +6,39 @@
 <!--[if gt IE 8]><!--> <html class="no-js" lang="ko" > <!--<![endif]-->
 <head>
   <meta charset="utf-8">
-  
+
 <meta property="og:title" content="Implementing a Parameter Server Using Distributed RPC Framework" />
-  
+
 <meta property="og:type" content="article" />
-  
+
 <meta property="og:url" content="https://tutorials.pytorch.kr/intermediate/rpc_param_server_tutorial.html" />
-  
+
 <meta property="og:description" content="Author: Rohan Varma Prerequisites: PyTorch Distributed Overview, RPC API documents. This tutorial walks through a simple example of implementing a parameter server using PyTorch’s Distributed RPC framework. The parameter server framework is a paradigm in which a set of servers store parameters, s..." />
-  
+
 <meta property="og:image" content="https://tutorials.pytorch.kr/_static/logos/logo-kr-sm-dark.png" />
-  
+
 <meta property="og:image:alt" content="Implementing a Parameter Server Using Distributed RPC Framework" />
-  
+
 <meta property="og:ignore_canonical" content="true" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  
+
   <title>Implementing a Parameter Server Using Distributed RPC Framework &mdash; 파이토치 한국어 튜토리얼 (PyTorch tutorials in Korean)</title>
-  
 
-  
-  
+
+
+
     <link rel="shortcut icon" href="../_static/favicon.ico"/>
-  
-  
-  
 
-  
 
-  
-  
-    
 
-  
+
+
+
+
+
+
+
+
 
   <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
   <!-- <link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> -->
@@ -57,9 +57,9 @@
     <link rel="index" title="색인" href="../genindex.html" />
     <link rel="search" title="검색" href="../search.html" />
     <link rel="next" title="Distributed Pipeline Parallelism Using RPC" href="dist_pipeline_parallel_tutorial.html" />
-    <link rel="prev" title="Getting Started with Distributed RPC Framework" href="rpc_tutorial.html" /> 
+    <link rel="prev" title="Getting Started with Distributed RPC Framework" href="rpc_tutorial.html" />
+
 
-  
   <script src="../_static/js/modernizr.min.js"></script>
 
   <!-- Preload the theme fonts -->
@@ -116,9 +116,9 @@
 
 <body class="pytorch-body">
 
-   
 
-    
+
+
 
     <div class="table-of-contents-link-wrapper">
       <span>Table of Contents</span>
@@ -129,21 +129,21 @@
       <div class="pytorch-side-scroll">
         <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
           <div class="pytorch-left-menu-search">
-            
 
-            
-              
-              
+
+
+
+
                 <div class="version">
                   1.13.1+cu117
                 </div>
-              
-            
 
-            
 
 
-  
+
+
+
+
 
 
 <div role="search">
@@ -154,15 +154,15 @@
   </form>
 </div>
 
-            
+
           </div>
 
-          
-            
-            
-              
-            
-            
+
+
+
+
+
+
               <p class="caption" role="heading"><span class="caption-text">파이토치(PyTorch) 레시피</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../recipes/recipes_index.html">모든 레시피 보기</a></li>
@@ -305,8 +305,8 @@
 <li class="toctree-l1"><a class="reference internal" href="../beginner/flava_finetuning_tutorial.html">TorchMultimodal Tutorial: Finetuning FLAVA</a></li>
 </ul>
 
-            
-          
+
+
         </div>
       </div>
     </nav>
@@ -314,7 +314,7 @@
     <div class="pytorch-container">
       <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
         <div class="pytorch-breadcrumbs-wrapper">
-          
+
 
 
 
@@ -333,30 +333,30 @@
 <div role="navigation" aria-label="breadcrumbs navigation">
 
   <ul class="pytorch-breadcrumbs">
-    
+
       <li>
         <a href="../index.html">
-          
+
             Tutorials
-          
+
         </a> &gt;
       </li>
 
-        
+
       <li>Implementing a Parameter Server Using Distributed RPC Framework</li>
-    
-    
+
+
       <li class="pytorch-breadcrumbs-aside">
-        
-            
+
+
             <a href="../_sources/intermediate/rpc_param_server_tutorial.rst.txt" rel="nofollow"><img src="../_static/images/view-page-source-icon.svg"></a>
-          
-        
+
+
       </li>
-    
+
   </ul>
 
-  
+
 </div>
         </div>
 
@@ -368,7 +368,7 @@
       <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
         <div class="pytorch-content-left">
 
-        
+
 
           <div class="pytorch-call-to-action-links">
             <div id="tutorial-type">intermediate/rpc_param_server_tutorial</div>
@@ -390,14 +390,14 @@
             </div>
           </div>
 
-        
 
-          
+
+
           <div class="rst-content">
-          
+
             <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
              <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
-              
+
   <div class="section" id="implementing-a-parameter-server-using-distributed-rpc-framework">
 <h1>Implementing a Parameter Server Using Distributed RPC Framework<a class="headerlink" href="#implementing-a-parameter-server-using-distributed-rpc-framework" title="이 제목에 대한 퍼머링크">¶</a></h1>
 <p><strong>Author</strong>: <a class="reference external" href="https://github.com/rohan-varma">Rohan Varma</a></p>
@@ -412,7 +412,7 @@ <h1>Implementing a Parameter Server Using Distributed RPC Framework<a class="hea
 </ul>
 <p>This tutorial walks through a simple example of implementing a parameter server using PyTorch’s <a class="reference external" href="https://pytorch.org/docs/stable/rpc.html">Distributed RPC framework</a>. The parameter server framework is a paradigm in which a set of servers store parameters, such as large embedding tables, and several trainers query the parameter servers in order to retrieve the most up to date parameters. These trainers can run a training loop locally and occasionally synchronize with the parameter server to get the latest parameters. For more reading on the parameter server approach, check out <a class="reference external" href="https://www.cs.cmu.edu/~muli/file/parameter_server_osdi14.pdf">this paper</a>.</p>
 <p>Using the Distributed RPC Framework, we’ll build an example where multiple trainers use RPC to communicate with the same parameter server and use <a class="reference external" href="https://pytorch.org/docs/stable/rpc.html#torch.distributed.rpc.RRef">RRef</a> to access states on the remote parameter server instance. Each trainer will launch its dedicated backward pass in a distributed fashion through stitching of the autograd graph across multiple nodes using distributed autograd.</p>
-<p><strong>Note</strong>: This tutorial covers the use of the Distributed RPC Framework, which is useful for splitting a model onto multiple machines, or for implementing a parameter-server training strategy where network trainers fetch parameters hosted on a different machine. If instead you are looking for replicating your model across many GPUs, please see the <a class="reference external" href="https://pytorch.org/tutorials/intermediate/ddp_tutorial.html">Distributed Data Parallel tutorial</a>. There is also another <a class="reference external" href="https://pytorch.org/tutorials/intermediate/rpc_tutorial.html">RPC tutorial</a> that covers reinforcement learning and RNN use cases.</p>
+<p><strong>Note</strong>: This tutorial covers the use of the Distributed RPC Framework, which is useful for splitting a model onto multiple machines, or for implementing a parameter-server training strategy where network trainers fetch parameters hosted on a different machine. If instead you are looking for replicating your model across many GPUs, please see the <a class="reference external" href="https://tutorials.pytorch.kr/intermediate/ddp_tutorial.html">Distributed Data Parallel tutorial</a>. There is also another <a class="reference external" href="https://tutorials.pytorch.kr/intermediate/rpc_tutorial.html">RPC tutorial</a> that covers reinforcement learning and RNN use cases.</p>
 <p>Let’s start with the familiar: importing our required modules and defining a simple ConvNet that will train on the MNIST dataset. The below network is largely adopted from the network defined in the <a class="reference external" href="https://github.com/pytorch/examples/tree/master/mnist">pytorch/examples repo</a>.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">argparse</span>
 <span class="kn">import</span> <span class="nn">os</span>
@@ -763,21 +763,21 @@ <h1>Implementing a Parameter Server Using Distributed RPC Framework<a class="hea
 
 
              </article>
-             
+
             </div>
             <footer>
-  
+
     <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
-      
+
         <a href="dist_pipeline_parallel_tutorial.html" class="btn btn-neutral float-right" title="Distributed Pipeline Parallelism Using RPC" accesskey="n" rel="next">Next <img src="../_static/images/chevron-right-orange.svg" class="next-page"></a>
-      
-      
+
+
         <a href="rpc_tutorial.html" class="btn btn-neutral" title="Getting Started with Distributed RPC Framework" accesskey="p" rel="prev"><img src="../_static/images/chevron-right-orange.svg" class="previous-page"> Previous</a>
-      
+
     </div>
-  
 
-  
+
+
     <hr class="community-hr hr-top" />
     <div class="community-container">
       <div class="community-prompt">더 궁금하시거나 개선할 내용이 있으신가요? 커뮤니티에 참여해보세요!</div>
@@ -797,7 +797,7 @@ <h1>Implementing a Parameter Server Using Distributed RPC Framework<a class="hea
       </div>
     </div>
     <hr class="rating-hr hr-bottom"/>
-  
+
 
   <div role="contentinfo">
     <p>
@@ -805,11 +805,11 @@ <h1>Implementing a Parameter Server Using Distributed RPC Framework<a class="hea
 
     </p>
   </div>
-    
+
       <div>
         Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
       </div>
-     
+
 
 </footer>
 
@@ -836,12 +836,12 @@ <h1>Implementing a Parameter Server Using Distributed RPC Framework<a class="hea
       </section>
     </div>
 
-  
 
 
-  
-  
-     
+
+
+
+
        <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
          <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
          <script src="../_static/jquery.js"></script>
@@ -855,8 +855,8 @@ <h1>Implementing a Parameter Server Using Distributed RPC Framework<a class="hea
          <script src="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/contrib/auto-render.min.js"></script>
          <script src="../_static/katex_autorenderer.js"></script>
          <script src="../_static/design-tabs.js"></script>
-     
-  
+
+
 
   <script type="text/javascript" src="../_static/js/vendor/popper.min.js"></script>
   <script type="text/javascript" src="../_static/js/vendor/bootstrap.min.js"></script>
@@ -868,7 +868,7 @@ <h1>Implementing a Parameter Server Using Distributed RPC Framework<a class="hea
           SphinxRtdTheme.Navigation.enable(true);
       });
   </script>
- 
+
 <script>
 //add microsoft link
 
diff --git a/docs/intermediate/torchrec_tutorial.html b/docs/intermediate/torchrec_tutorial.html
index c2f7db7bd..5dda7f190 100644
--- a/docs/intermediate/torchrec_tutorial.html
+++ b/docs/intermediate/torchrec_tutorial.html
@@ -6,39 +6,39 @@
 <!--[if gt IE 8]><!--> <html class="no-js" lang="ko" > <!--<![endif]-->
 <head>
   <meta charset="utf-8">
-  
+
 <meta property="og:title" content="TorchRec 소개" />
-  
+
 <meta property="og:type" content="article" />
-  
+
 <meta property="og:url" content="https://tutorials.pytorch.kr/intermediate/torchrec_tutorial.html" />
-  
+
 <meta property="og:description" content="아래 동영상이나 유튜브 에서 따라해보세요. 추천 시스템을 만들 때, 제품이나 페이지와 같은 객체를 임베딩으로 표현하고 싶은 경우가 많습니다. Meta AI의 딥러닝 추천 모델 또는 DLRM을 예로 들 수 있습니다. 객체의 수가 증가함에 따라, 임베딩 테이블의 크기가 단일 GPU의 메모리를 초과할 수 있습니다. 일반적인 방법은 모델 병렬화의 일종으로, 임베딩 테이블을 여러 디바이스로 샤딩(shard)하는 것입니다. 이를 위해, TorchRec은 DistributedModelParallel 또는 DMP로 불리는 주요한 API를 소..." />
-  
+
 <meta property="og:image" content="https://tutorials.pytorch.kr/_static/logos/logo-kr-sm-dark.png" />
-  
+
 <meta property="og:image:alt" content="TorchRec 소개" />
-  
+
 <meta property="og:ignore_canonical" content="true" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  
+
   <title>TorchRec 소개 &mdash; 파이토치 한국어 튜토리얼 (PyTorch tutorials in Korean)</title>
-  
 
-  
-  
+
+
+
     <link rel="shortcut icon" href="../_static/favicon.ico"/>
-  
-  
-  
 
-  
 
-  
-  
-    
 
-  
+
+
+
+
+
+
+
+
 
   <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
   <!-- <link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> -->
@@ -57,9 +57,9 @@
     <link rel="index" title="색인" href="../genindex.html" />
     <link rel="search" title="검색" href="../search.html" />
     <link rel="next" title="Exploring TorchRec sharding" href="../advanced/sharding.html" />
-    <link rel="prev" title="안드로이드에서의 이미지 분할 DeepLapV3" href="../beginner/deeplabv3_on_android.html" /> 
+    <link rel="prev" title="안드로이드에서의 이미지 분할 DeepLapV3" href="../beginner/deeplabv3_on_android.html" />
+
 
-  
   <script src="../_static/js/modernizr.min.js"></script>
 
   <!-- Preload the theme fonts -->
@@ -116,9 +116,9 @@
 
 <body class="pytorch-body">
 
-   
 
-    
+
+
 
     <div class="table-of-contents-link-wrapper">
       <span>Table of Contents</span>
@@ -129,21 +129,21 @@
       <div class="pytorch-side-scroll">
         <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
           <div class="pytorch-left-menu-search">
-            
 
-            
-              
-              
+
+
+
+
                 <div class="version">
                   1.13.1+cu117
                 </div>
-              
-            
 
-            
 
 
-  
+
+
+
+
 
 
 <div role="search">
@@ -154,15 +154,15 @@
   </form>
 </div>
 
-            
+
           </div>
 
-          
-            
-            
-              
-            
-            
+
+
+
+
+
+
               <p class="caption" role="heading"><span class="caption-text">파이토치(PyTorch) 레시피</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../recipes/recipes_index.html">모든 레시피 보기</a></li>
@@ -305,8 +305,8 @@
 <li class="toctree-l1"><a class="reference internal" href="../beginner/flava_finetuning_tutorial.html">TorchMultimodal Tutorial: Finetuning FLAVA</a></li>
 </ul>
 
-            
-          
+
+
         </div>
       </div>
     </nav>
@@ -314,7 +314,7 @@
     <div class="pytorch-container">
       <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
         <div class="pytorch-breadcrumbs-wrapper">
-          
+
 
 
 
@@ -333,30 +333,30 @@
 <div role="navigation" aria-label="breadcrumbs navigation">
 
   <ul class="pytorch-breadcrumbs">
-    
+
       <li>
         <a href="../index.html">
-          
+
             Tutorials
-          
+
         </a> &gt;
       </li>
 
-        
+
       <li>TorchRec 소개</li>
-    
-    
+
+
       <li class="pytorch-breadcrumbs-aside">
-        
-            
+
+
             <a href="../_sources/intermediate/torchrec_tutorial.rst.txt" rel="nofollow"><img src="../_static/images/view-page-source-icon.svg"></a>
-          
-        
+
+
       </li>
-    
+
   </ul>
 
-  
+
 </div>
         </div>
 
@@ -368,7 +368,7 @@
       <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
         <div class="pytorch-content-left">
 
-        
+
 
           <div class="pytorch-call-to-action-links">
             <div id="tutorial-type">intermediate/torchrec_tutorial</div>
@@ -390,14 +390,14 @@
             </div>
           </div>
 
-        
 
-          
+
+
           <div class="rst-content">
-          
+
             <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
              <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
-              
+
   <div class="section" id="torchrec">
 <h1>TorchRec 소개<a class="headerlink" href="#torchrec" title="이 제목에 대한 퍼머링크">¶</a></h1>
 <div class="admonition tip">
@@ -433,7 +433,7 @@ <h2>개요<a class="headerlink" href="#id2" title="이 제목에 대한 퍼머
 <div class="section" id="id3">
 <h3>분산 설정<a class="headerlink" href="#id3" title="이 제목에 대한 퍼머링크">¶</a></h3>
 <p>torch.distributed를 사용하여 환경을 설정합니다. 분산에 대한 자세한 내용은 이
-<a class="reference external" href="https://pytorch.org/tutorials/beginner/dist_overview.html">튜토리얼</a> 을 참고하세요.</p>
+<a class="reference external" href="https://tutorials.pytorch.kr/beginner/dist_overview.html">튜토리얼</a> 을 참고하세요.</p>
 <p>여기서는 1개의 colab GPU에 대응하는 1개의 랭크(colab 프로세스)를 사용합니다.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">os</span>
 <span class="kn">import</span> <span class="nn">torch</span>
@@ -573,21 +573,21 @@ <h2>추가 자료<a class="headerlink" href="#id4" title="이 제목에 대한 
 
 
              </article>
-             
+
             </div>
             <footer>
-  
+
     <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
-      
+
         <a href="../advanced/sharding.html" class="btn btn-neutral float-right" title="Exploring TorchRec sharding" accesskey="n" rel="next">Next <img src="../_static/images/chevron-right-orange.svg" class="next-page"></a>
-      
-      
+
+
         <a href="../beginner/deeplabv3_on_android.html" class="btn btn-neutral" title="안드로이드에서의 이미지 분할 DeepLapV3" accesskey="p" rel="prev"><img src="../_static/images/chevron-right-orange.svg" class="previous-page"> Previous</a>
-      
+
     </div>
-  
 
-  
+
+
     <hr class="community-hr hr-top" />
     <div class="community-container">
       <div class="community-prompt">더 궁금하시거나 개선할 내용이 있으신가요? 커뮤니티에 참여해보세요!</div>
@@ -607,7 +607,7 @@ <h2>추가 자료<a class="headerlink" href="#id4" title="이 제목에 대한 
       </div>
     </div>
     <hr class="rating-hr hr-bottom"/>
-  
+
 
   <div role="contentinfo">
     <p>
@@ -615,11 +615,11 @@ <h2>추가 자료<a class="headerlink" href="#id4" title="이 제목에 대한 
 
     </p>
   </div>
-    
+
       <div>
         Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
       </div>
-     
+
 
 </footer>
 
@@ -659,12 +659,12 @@ <h2>추가 자료<a class="headerlink" href="#id4" title="이 제목에 대한 
       </section>
     </div>
 
-  
 
 
-  
-  
-     
+
+
+
+
        <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
          <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
          <script src="../_static/jquery.js"></script>
@@ -678,8 +678,8 @@ <h2>추가 자료<a class="headerlink" href="#id4" title="이 제목에 대한 
          <script src="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/contrib/auto-render.min.js"></script>
          <script src="../_static/katex_autorenderer.js"></script>
          <script src="../_static/design-tabs.js"></script>
-     
-  
+
+
 
   <script type="text/javascript" src="../_static/js/vendor/popper.min.js"></script>
   <script type="text/javascript" src="../_static/js/vendor/bootstrap.min.js"></script>
@@ -691,7 +691,7 @@ <h2>추가 자료<a class="headerlink" href="#id4" title="이 제목에 대한 
           SphinxRtdTheme.Navigation.enable(true);
       });
   </script>
- 
+
 <script>
 //add microsoft link
 
diff --git a/docs/intermediate/torchserve_with_ipex.html b/docs/intermediate/torchserve_with_ipex.html
index a002da64f..28d849432 100644
--- a/docs/intermediate/torchserve_with_ipex.html
+++ b/docs/intermediate/torchserve_with_ipex.html
@@ -6,39 +6,39 @@
 <!--[if gt IE 8]><!--> <html class="no-js" lang="ko" > <!--<![endif]-->
 <head>
   <meta charset="utf-8">
-  
+
 <meta property="og:title" content="Grokking PyTorch Intel CPU performance from first principles" />
-  
+
 <meta property="og:type" content="article" />
-  
+
 <meta property="og:url" content="https://tutorials.pytorch.kr/intermediate/torchserve_with_ipex.html" />
-  
+
 <meta property="og:description" content="A case study on the TorchServe inference framework optimized with Intel® Extension for PyTorch*. Authors: Min Jean Cho, Mark Saroufim Reviewers: Ashok Emani, Jiong Gong Getting a strong out-of-box performance for deep learning on CPUs can be tricky but it’s much easier if you’re aware of the main..." />
-  
+
 <meta property="og:image" content="https://tutorials.pytorch.kr/_static/logos/logo-kr-sm-dark.png" />
-  
+
 <meta property="og:image:alt" content="Grokking PyTorch Intel CPU performance from first principles" />
-  
+
 <meta property="og:ignore_canonical" content="true" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  
+
   <title>Grokking PyTorch Intel CPU performance from first principles &mdash; 파이토치 한국어 튜토리얼 (PyTorch tutorials in Korean)</title>
-  
 
-  
-  
+
+
+
     <link rel="shortcut icon" href="../_static/favicon.ico"/>
-  
-  
-  
 
-  
 
-  
-  
-    
 
-  
+
+
+
+
+
+
+
+
 
   <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
   <!-- <link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> -->
@@ -57,9 +57,9 @@
     <link rel="index" title="색인" href="../genindex.html" />
     <link rel="search" title="검색" href="../search.html" />
     <link rel="next" title="Grokking PyTorch Intel CPU performance from first principles (Part 2)" href="torchserve_with_ipex_2.html" />
-    <link rel="prev" title="(베타) PyTorch에서 Eager Mode를 이용한 정적 양자화" href="../advanced/static_quantization_tutorial.html" /> 
+    <link rel="prev" title="(베타) PyTorch에서 Eager Mode를 이용한 정적 양자화" href="../advanced/static_quantization_tutorial.html" />
+
 
-  
   <script src="../_static/js/modernizr.min.js"></script>
 
   <!-- Preload the theme fonts -->
@@ -116,9 +116,9 @@
 
 <body class="pytorch-body">
 
-   
 
-    
+
+
 
     <div class="table-of-contents-link-wrapper">
       <span>Table of Contents</span>
@@ -129,21 +129,21 @@
       <div class="pytorch-side-scroll">
         <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
           <div class="pytorch-left-menu-search">
-            
 
-            
-              
-              
+
+
+
+
                 <div class="version">
                   1.13.1+cu117
                 </div>
-              
-            
 
-            
 
 
-  
+
+
+
+
 
 
 <div role="search">
@@ -154,15 +154,15 @@
   </form>
 </div>
 
-            
+
           </div>
 
-          
-            
-            
-              
-            
-            
+
+
+
+
+
+
               <p class="caption" role="heading"><span class="caption-text">파이토치(PyTorch) 레시피</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../recipes/recipes_index.html">모든 레시피 보기</a></li>
@@ -305,8 +305,8 @@
 <li class="toctree-l1"><a class="reference internal" href="../beginner/flava_finetuning_tutorial.html">TorchMultimodal Tutorial: Finetuning FLAVA</a></li>
 </ul>
 
-            
-          
+
+
         </div>
       </div>
     </nav>
@@ -314,7 +314,7 @@
     <div class="pytorch-container">
       <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
         <div class="pytorch-breadcrumbs-wrapper">
-          
+
 
 
 
@@ -333,30 +333,30 @@
 <div role="navigation" aria-label="breadcrumbs navigation">
 
   <ul class="pytorch-breadcrumbs">
-    
+
       <li>
         <a href="../index.html">
-          
+
             Tutorials
-          
+
         </a> &gt;
       </li>
 
-        
+
       <li>Grokking PyTorch Intel CPU performance from first principles</li>
-    
-    
+
+
       <li class="pytorch-breadcrumbs-aside">
-        
-            
+
+
             <a href="../_sources/intermediate/torchserve_with_ipex.rst.txt" rel="nofollow"><img src="../_static/images/view-page-source-icon.svg"></a>
-          
-        
+
+
       </li>
-    
+
   </ul>
 
-  
+
 </div>
         </div>
 
@@ -368,7 +368,7 @@
       <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
         <div class="pytorch-content-left">
 
-        
+
 
           <div class="pytorch-call-to-action-links">
             <div id="tutorial-type">intermediate/torchserve_with_ipex</div>
@@ -390,14 +390,14 @@
             </div>
           </div>
 
-        
 
-          
+
+
           <div class="rst-content">
-          
+
             <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
              <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
-              
+
   <div class="section" id="grokking-pytorch-intel-cpu-performance-from-first-principles">
 <h1>Grokking PyTorch Intel CPU performance from first principles<a class="headerlink" href="#grokking-pytorch-intel-cpu-performance-from-first-principles" title="이 제목에 대한 퍼머링크">¶</a></h1>
 <p>A case study on the TorchServe inference framework optimized with <a class="reference external" href="https://github.com/intel/intel-extension-for-pytorch">Intel® Extension for PyTorch*</a>.</p>
@@ -440,7 +440,7 @@ <h1>Grokking PyTorch Intel CPU performance from first principles<a class="header
 <p><em>GEMM (General Matrix Multiply)</em> run on fused-multiply-add (FMA) or dot-product (DP) execution units which will be bottlenecked and cause delays in thread waiting/<em>spinning at synchronization</em> barrier when <em>hyperthreading</em> is enabled - because using logical cores causes insufficient concurrency for all working threads as each logical thread <em>contends for the same core resources</em>. Instead, if we use 1 thread per physical core, we avoid this contention. So we generally recommend <em>avoiding logical cores</em> by setting CPU <em>thread affinity</em> to physical cores via <em>core pinning</em>.</p>
 <p>Multi-socket systems have <em>Non-Uniform Memory Access (NUMA)</em> which is a shared memory architecture that describes the placement of main memory modules with respect to processors. But if a process is not NUMA-aware, slow <em>remote memory</em> is frequently accessed when <em>threads migrate</em> cross socket via <em>Intel Ultra Path Interconnect (UPI)</em> during run time. We address this problem by setting CPU <em>thread affinity</em> to a specific socket via <em>core pinning</em>.</p>
 <p>Knowing these principles in mind, proper CPU runtime configuration can significantly boost out-of-box performance.</p>
-<p>In this blog, we’ll walk you through the important runtime configurations you should be aware of from <a class="reference external" href="https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html#cpu-specific-optimizations">CPU Performance Tuning Guide</a>, explain how they work, how to profile them and how to integrate them within a model serving framework like <a class="reference external" href="https://github.com/pytorch/serve">TorchServe</a> via an easy to use <a class="reference external" href="https://github.com/intel/intel-extension-for-pytorch/blob/master/docs/tutorials/performance_tuning/launch_script.md">launch script</a> which we’ve <a class="reference external" href="https://github.com/pytorch/serve/pull/1354">integrated</a> <sup>1</sup> natively.</p>
+<p>In this blog, we’ll walk you through the important runtime configurations you should be aware of from <a class="reference external" href="https://tutorials.pytorch.kr/recipes/recipes/tuning_guide.html#cpu-specific-optimizations">CPU Performance Tuning Guide</a>, explain how they work, how to profile them and how to integrate them within a model serving framework like <a class="reference external" href="https://github.com/pytorch/serve">TorchServe</a> via an easy to use <a class="reference external" href="https://github.com/intel/intel-extension-for-pytorch/blob/master/docs/tutorials/performance_tuning/launch_script.md">launch script</a> which we’ve <a class="reference external" href="https://github.com/pytorch/serve/pull/1354">integrated</a> <sup>1</sup> natively.</p>
 <p>We’ll explain all of these ideas <strong>visually</strong> from <strong>first principles</strong> with lots of <strong>profiles</strong> and show you how we applied our learnings to make out of the box CPU performance on TorchServe better.</p>
 <ol class="arabic simple">
 <li><p>The feature has to be explicitly enabled by setting <em>cpu_launcher_enable=true</em> in <em>config.properties</em>.</p></li>
@@ -700,7 +700,7 @@ <h2>Conclusion<a class="headerlink" href="#conclusion" title="이 제목에 대
 <p>These principles can be automatically configured via an easy to use launch script which has already been integrated into TorchServe.</p>
 <p>For interested readers, please check out the following documents:</p>
 <ul class="simple">
-<li><p><a class="reference external" href="https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html#cpu-specific-optimizations">CPU specific optimizations</a></p></li>
+<li><p><a class="reference external" href="https://tutorials.pytorch.kr/recipes/recipes/tuning_guide.html#cpu-specific-optimizations">CPU specific optimizations</a></p></li>
 <li><p><a class="reference external" href="https://www.intel.com/content/www/us/en/developer/articles/technical/how-to-get-better-performance-on-pytorchcaffe2-with-intel-acceleration.html">Maximize Performance of Intel® Software Optimization for PyTorch* on CPU</a></p></li>
 <li><p><a class="reference external" href="https://intel.github.io/intel-extension-for-pytorch/tutorials/performance_tuning/tuning_guide.html">Performance Tuning Guide</a></p></li>
 <li><p><a class="reference external" href="https://intel.github.io/intel-extension-for-pytorch/tutorials/performance_tuning/launch_script.html">Launch Script Usage Guide</a></p></li>
@@ -719,21 +719,21 @@ <h2>Acknowledgement<a class="headerlink" href="#acknowledgement" title="이 제
 
 
              </article>
-             
+
             </div>
             <footer>
-  
+
     <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
-      
+
         <a href="torchserve_with_ipex_2.html" class="btn btn-neutral float-right" title="Grokking PyTorch Intel CPU performance from first principles (Part 2)" accesskey="n" rel="next">Next <img src="../_static/images/chevron-right-orange.svg" class="next-page"></a>
-      
-      
+
+
         <a href="../advanced/static_quantization_tutorial.html" class="btn btn-neutral" title="(베타) PyTorch에서 Eager Mode를 이용한 정적 양자화" accesskey="p" rel="prev"><img src="../_static/images/chevron-right-orange.svg" class="previous-page"> Previous</a>
-      
+
     </div>
-  
 
-  
+
+
     <hr class="community-hr hr-top" />
     <div class="community-container">
       <div class="community-prompt">더 궁금하시거나 개선할 내용이 있으신가요? 커뮤니티에 참여해보세요!</div>
@@ -753,7 +753,7 @@ <h2>Acknowledgement<a class="headerlink" href="#acknowledgement" title="이 제
       </div>
     </div>
     <hr class="rating-hr hr-bottom"/>
-  
+
 
   <div role="contentinfo">
     <p>
@@ -761,11 +761,11 @@ <h2>Acknowledgement<a class="headerlink" href="#acknowledgement" title="이 제
 
     </p>
   </div>
-    
+
       <div>
         Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
       </div>
-     
+
 
 </footer>
 
@@ -802,12 +802,12 @@ <h2>Acknowledgement<a class="headerlink" href="#acknowledgement" title="이 제
       </section>
     </div>
 
-  
 
 
-  
-  
-     
+
+
+
+
        <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
          <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
          <script src="../_static/jquery.js"></script>
@@ -821,8 +821,8 @@ <h2>Acknowledgement<a class="headerlink" href="#acknowledgement" title="이 제
          <script src="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/contrib/auto-render.min.js"></script>
          <script src="../_static/katex_autorenderer.js"></script>
          <script src="../_static/design-tabs.js"></script>
-     
-  
+
+
 
   <script type="text/javascript" src="../_static/js/vendor/popper.min.js"></script>
   <script type="text/javascript" src="../_static/js/vendor/bootstrap.min.js"></script>
@@ -834,7 +834,7 @@ <h2>Acknowledgement<a class="headerlink" href="#acknowledgement" title="이 제
           SphinxRtdTheme.Navigation.enable(true);
       });
   </script>
- 
+
 <script>
 //add microsoft link
 
diff --git a/docs/intermediate/torchserve_with_ipex_2.html b/docs/intermediate/torchserve_with_ipex_2.html
index 461ebca99..a99bf3490 100644
--- a/docs/intermediate/torchserve_with_ipex_2.html
+++ b/docs/intermediate/torchserve_with_ipex_2.html
@@ -6,39 +6,39 @@
 <!--[if gt IE 8]><!--> <html class="no-js" lang="ko" > <!--<![endif]-->
 <head>
   <meta charset="utf-8">
-  
+
 <meta property="og:title" content="Grokking PyTorch Intel CPU performance from first principles (Part 2)" />
-  
+
 <meta property="og:type" content="article" />
-  
+
 <meta property="og:url" content="https://tutorials.pytorch.kr/intermediate/torchserve_with_ipex_2.html" />
-  
+
 <meta property="og:description" content="Authors: Min Jean Cho, Jing Xu, Mark Saroufim In the Grokking PyTorch Intel CPU Performance From First Principles tutorial , we have introduced how to tune CPU runtime configurations, how to profile them, and how to integrate them into TorchServe for optimized CPU performance. In this tutorial, w..." />
-  
+
 <meta property="og:image" content="https://tutorials.pytorch.kr/_static/logos/logo-kr-sm-dark.png" />
-  
+
 <meta property="og:image:alt" content="Grokking PyTorch Intel CPU performance from first principles (Part 2)" />
-  
+
 <meta property="og:ignore_canonical" content="true" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  
+
   <title>Grokking PyTorch Intel CPU performance from first principles (Part 2) &mdash; 파이토치 한국어 튜토리얼 (PyTorch tutorials in Korean)</title>
-  
 
-  
-  
+
+
+
     <link rel="shortcut icon" href="../_static/favicon.ico"/>
-  
-  
-  
 
-  
 
-  
-  
-    
 
-  
+
+
+
+
+
+
+
+
 
   <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
   <!-- <link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> -->
@@ -57,9 +57,9 @@
     <link rel="index" title="색인" href="../genindex.html" />
     <link rel="search" title="검색" href="../search.html" />
     <link rel="next" title="Getting Started - Accelerate Your Scripts with nvFuser" href="nvfuser_intro_tutorial.html" />
-    <link rel="prev" title="Grokking PyTorch Intel CPU performance from first principles" href="torchserve_with_ipex.html" /> 
+    <link rel="prev" title="Grokking PyTorch Intel CPU performance from first principles" href="torchserve_with_ipex.html" />
+
 
-  
   <script src="../_static/js/modernizr.min.js"></script>
 
   <!-- Preload the theme fonts -->
@@ -116,9 +116,9 @@
 
 <body class="pytorch-body">
 
-   
 
-    
+
+
 
     <div class="table-of-contents-link-wrapper">
       <span>Table of Contents</span>
@@ -129,21 +129,21 @@
       <div class="pytorch-side-scroll">
         <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
           <div class="pytorch-left-menu-search">
-            
 
-            
-              
-              
+
+
+
+
                 <div class="version">
                   1.13.1+cu117
                 </div>
-              
-            
 
-            
 
 
-  
+
+
+
+
 
 
 <div role="search">
@@ -154,15 +154,15 @@
   </form>
 </div>
 
-            
+
           </div>
 
-          
-            
-            
-              
-            
-            
+
+
+
+
+
+
               <p class="caption" role="heading"><span class="caption-text">파이토치(PyTorch) 레시피</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../recipes/recipes_index.html">모든 레시피 보기</a></li>
@@ -305,8 +305,8 @@
 <li class="toctree-l1"><a class="reference internal" href="../beginner/flava_finetuning_tutorial.html">TorchMultimodal Tutorial: Finetuning FLAVA</a></li>
 </ul>
 
-            
-          
+
+
         </div>
       </div>
     </nav>
@@ -314,7 +314,7 @@
     <div class="pytorch-container">
       <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
         <div class="pytorch-breadcrumbs-wrapper">
-          
+
 
 
 
@@ -333,30 +333,30 @@
 <div role="navigation" aria-label="breadcrumbs navigation">
 
   <ul class="pytorch-breadcrumbs">
-    
+
       <li>
         <a href="../index.html">
-          
+
             Tutorials
-          
+
         </a> &gt;
       </li>
 
-        
+
       <li>Grokking PyTorch Intel CPU performance from first principles (Part 2)</li>
-    
-    
+
+
       <li class="pytorch-breadcrumbs-aside">
-        
-            
+
+
             <a href="../_sources/intermediate/torchserve_with_ipex_2.rst.txt" rel="nofollow"><img src="../_static/images/view-page-source-icon.svg"></a>
-          
-        
+
+
       </li>
-    
+
   </ul>
 
-  
+
 </div>
         </div>
 
@@ -368,7 +368,7 @@
       <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
         <div class="pytorch-content-left">
 
-        
+
 
           <div class="pytorch-call-to-action-links">
             <div id="tutorial-type">intermediate/torchserve_with_ipex_2</div>
@@ -390,18 +390,18 @@
             </div>
           </div>
 
-        
 
-          
+
+
           <div class="rst-content">
-          
+
             <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
              <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
-              
+
   <div class="section" id="grokking-pytorch-intel-cpu-performance-from-first-principles-part-2">
 <h1>Grokking PyTorch Intel CPU performance from first principles (Part 2)<a class="headerlink" href="#grokking-pytorch-intel-cpu-performance-from-first-principles-part-2" title="이 제목에 대한 퍼머링크">¶</a></h1>
 <p>Authors: <a class="reference external" href="https://github.com/min-jean-cho">Min Jean Cho</a>, <a class="reference external" href="https://github.com/jingxu10">Jing Xu</a>, <a class="reference external" href="https://github.com/msaroufim">Mark Saroufim</a></p>
-<p>In the <a class="reference external" href="https://pytorch.org/tutorials/intermediate/torchserve_with_ipex.html">Grokking PyTorch Intel CPU Performance From First Principles</a> tutorial
+<p>In the <a class="reference external" href="https://tutorials.pytorch.kr/intermediate/torchserve_with_ipex.html">Grokking PyTorch Intel CPU Performance From First Principles</a> tutorial
 , we have introduced how to tune CPU runtime configurations, how to profile them, and how to integrate them into <a class="reference external" href="https://github.com/pytorch/serve">TorchServe</a> for optimized CPU performance.</p>
 <p>In this tutorial, we will demonstrate boosting performance with memory allocator via the <a class="reference external" href="https://github.com/intel/intel-extension-for-pytorch/blob/master/docs/tutorials/performance_tuning/launch_script.md">Intel® Extension for PyTorch* Launcher</a>
 , and optimized kernels on CPU via <a class="reference external" href="https://github.com/intel/intel-extension-for-pytorch">Intel® Extension for PyTorch*</a>
@@ -430,7 +430,7 @@ <h4>Tune for the Back End Bound<a class="headerlink" href="#tune-for-the-back-en
 </div>
 <div class="section" id="intel-vtune-profiler-s-instrumentation-and-tracing-technology-itt">
 <h3>Intel® VTune™ Profiler’s Instrumentation and Tracing Technology (ITT)<a class="headerlink" href="#intel-vtune-profiler-s-instrumentation-and-tracing-technology-itt" title="이 제목에 대한 퍼머링크">¶</a></h3>
-<p>The ITT APIs of Intel® VTune Profiler is a useful tool to annotate a region of your workload for tracing to profile and visualize at a finer granularity of your annotation – OP/function/sub-function granularity. By annotating at the granularity of your PyTorch model’s OPs, Intel® VTune Profiler’s ITT enables op-level profiling. Intel® VTune Profiler’s ITT has been integrated into <a class="reference external" href="https://pytorch.org/tutorials/beginner/introyt/autogradyt_tutorial.html#autograd-profiler">PyTorch Autograd Profiler</a>. <sup>1</sup></p>
+<p>The ITT APIs of Intel® VTune Profiler is a useful tool to annotate a region of your workload for tracing to profile and visualize at a finer granularity of your annotation – OP/function/sub-function granularity. By annotating at the granularity of your PyTorch model’s OPs, Intel® VTune Profiler’s ITT enables op-level profiling. Intel® VTune Profiler’s ITT has been integrated into <a class="reference external" href="https://tutorials.pytorch.kr/beginner/introyt/autogradyt_tutorial.html#autograd-profiler">PyTorch Autograd Profiler</a>. <sup>1</sup></p>
 <ol class="arabic simple">
 <li><p>The feature has to be explicitly enabled by <em>with torch.autograd.profiler.emit_itt()</em>.</p></li>
 </ol>
@@ -787,21 +787,21 @@ <h2>Acknowledgement<a class="headerlink" href="#acknowledgement" title="이 제
 
 
              </article>
-             
+
             </div>
             <footer>
-  
+
     <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
-      
+
         <a href="nvfuser_intro_tutorial.html" class="btn btn-neutral float-right" title="Getting Started - Accelerate Your Scripts with nvFuser" accesskey="n" rel="next">Next <img src="../_static/images/chevron-right-orange.svg" class="next-page"></a>
-      
-      
+
+
         <a href="torchserve_with_ipex.html" class="btn btn-neutral" title="Grokking PyTorch Intel CPU performance from first principles" accesskey="p" rel="prev"><img src="../_static/images/chevron-right-orange.svg" class="previous-page"> Previous</a>
-      
+
     </div>
-  
 
-  
+
+
     <hr class="community-hr hr-top" />
     <div class="community-container">
       <div class="community-prompt">더 궁금하시거나 개선할 내용이 있으신가요? 커뮤니티에 참여해보세요!</div>
@@ -821,7 +821,7 @@ <h2>Acknowledgement<a class="headerlink" href="#acknowledgement" title="이 제
       </div>
     </div>
     <hr class="rating-hr hr-bottom"/>
-  
+
 
   <div role="contentinfo">
     <p>
@@ -829,11 +829,11 @@ <h2>Acknowledgement<a class="headerlink" href="#acknowledgement" title="이 제
 
     </p>
   </div>
-    
+
       <div>
         Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
       </div>
-     
+
 
 </footer>
 
@@ -901,12 +901,12 @@ <h2>Acknowledgement<a class="headerlink" href="#acknowledgement" title="이 제
       </section>
     </div>
 
-  
 
 
-  
-  
-     
+
+
+
+
        <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
          <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
          <script src="../_static/jquery.js"></script>
@@ -920,8 +920,8 @@ <h2>Acknowledgement<a class="headerlink" href="#acknowledgement" title="이 제
          <script src="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/contrib/auto-render.min.js"></script>
          <script src="../_static/katex_autorenderer.js"></script>
          <script src="../_static/design-tabs.js"></script>
-     
-  
+
+
 
   <script type="text/javascript" src="../_static/js/vendor/popper.min.js"></script>
   <script type="text/javascript" src="../_static/js/vendor/bootstrap.min.js"></script>
@@ -933,7 +933,7 @@ <h2>Acknowledgement<a class="headerlink" href="#acknowledgement" title="이 제
           SphinxRtdTheme.Navigation.enable(true);
       });
   </script>
- 
+
 <script>
 //add microsoft link
 
diff --git a/docs/prototype/fx_graph_mode_ptq_static.html b/docs/prototype/fx_graph_mode_ptq_static.html
index 3dd33ceb4..3fbfad8ec 100644
--- a/docs/prototype/fx_graph_mode_ptq_static.html
+++ b/docs/prototype/fx_graph_mode_ptq_static.html
@@ -6,39 +6,39 @@
 <!--[if gt IE 8]><!--> <html class="no-js" lang="ko" > <!--<![endif]-->
 <head>
   <meta charset="utf-8">
-  
+
 <meta property="og:title" content="(prototype) FX Graph Mode Post Training Static Quantization" />
-  
+
 <meta property="og:type" content="article" />
-  
+
 <meta property="og:url" content="https://tutorials.pytorch.kr/prototype/fx_graph_mode_ptq_static.html" />
-  
+
 <meta property="og:description" content="Author: Jerry Zhang This tutorial introduces the steps to do post training static quantization in graph mode based on torch.fx. The advantage of FX graph mode quantization is that we can perform quantization fully automatically on the model although there might some effort required to make the mo..." />
-  
+
 <meta property="og:image" content="https://tutorials.pytorch.kr/_static/logos/logo-kr-sm-dark.png" />
-  
+
 <meta property="og:image:alt" content="(prototype) FX Graph Mode Post Training Static Quantization" />
-  
+
 <meta property="og:ignore_canonical" content="true" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  
+
   <title>(prototype) FX Graph Mode Post Training Static Quantization &mdash; 파이토치 한국어 튜토리얼 (PyTorch tutorials in Korean)</title>
-  
 
-  
-  
+
+
+
     <link rel="shortcut icon" href="../_static/favicon.ico"/>
-  
-  
-  
 
-  
 
-  
-  
-    
 
-  
+
+
+
+
+
+
+
+
 
   <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
   <!-- <link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> -->
@@ -55,9 +55,9 @@
   <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.10.0-beta/dist/katex.min.css" type="text/css" />
   <link rel="stylesheet" href="../_static/css/custom.css" type="text/css" />
     <link rel="index" title="색인" href="../genindex.html" />
-    <link rel="search" title="검색" href="../search.html" /> 
+    <link rel="search" title="검색" href="../search.html" />
+
 
-  
   <script src="../_static/js/modernizr.min.js"></script>
 
   <!-- Preload the theme fonts -->
@@ -114,9 +114,9 @@
 
 <body class="pytorch-body">
 
-   
 
-    
+
+
 
     <div class="table-of-contents-link-wrapper">
       <span>Table of Contents</span>
@@ -127,21 +127,21 @@
       <div class="pytorch-side-scroll">
         <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
           <div class="pytorch-left-menu-search">
-            
 
-            
-              
-              
+
+
+
+
                 <div class="version">
                   1.13.1+cu117
                 </div>
-              
-            
 
-            
 
 
-  
+
+
+
+
 
 
 <div role="search">
@@ -152,15 +152,15 @@
   </form>
 </div>
 
-            
+
           </div>
 
-          
-            
-            
-              
-            
-            
+
+
+
+
+
+
               <p class="caption" role="heading"><span class="caption-text">파이토치(PyTorch) 레시피</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../recipes/recipes_index.html">모든 레시피 보기</a></li>
@@ -303,8 +303,8 @@
 <li class="toctree-l1"><a class="reference internal" href="../beginner/flava_finetuning_tutorial.html">TorchMultimodal Tutorial: Finetuning FLAVA</a></li>
 </ul>
 
-            
-          
+
+
         </div>
       </div>
     </nav>
@@ -312,7 +312,7 @@
     <div class="pytorch-container">
       <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
         <div class="pytorch-breadcrumbs-wrapper">
-          
+
 
 
 
@@ -331,30 +331,30 @@
 <div role="navigation" aria-label="breadcrumbs navigation">
 
   <ul class="pytorch-breadcrumbs">
-    
+
       <li>
         <a href="../index.html">
-          
+
             Tutorials
-          
+
         </a> &gt;
       </li>
 
-        
+
       <li>(prototype) FX Graph Mode Post Training Static Quantization</li>
-    
-    
+
+
       <li class="pytorch-breadcrumbs-aside">
-        
-            
+
+
             <a href="../_sources/prototype/fx_graph_mode_ptq_static.rst.txt" rel="nofollow"><img src="../_static/images/view-page-source-icon.svg"></a>
-          
-        
+
+
       </li>
-    
+
   </ul>
 
-  
+
 </div>
         </div>
 
@@ -366,7 +366,7 @@
       <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
         <div class="pytorch-content-left">
 
-        
+
 
           <div class="pytorch-call-to-action-links">
             <div id="tutorial-type">prototype/fx_graph_mode_ptq_static</div>
@@ -388,14 +388,14 @@
             </div>
           </div>
 
-        
 
-          
+
+
           <div class="rst-content">
-          
+
             <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
              <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
-              
+
   <div class="section" id="prototype-fx-graph-mode-post-training-static-quantization">
 <h1>(prototype) FX Graph Mode Post Training Static Quantization<a class="headerlink" href="#prototype-fx-graph-mode-post-training-static-quantization" title="이 제목에 대한 퍼머링크">¶</a></h1>
 <p><strong>Author</strong>: <a class="reference external" href="https://github.com/jerryzh168">Jerry Zhang</a></p>
@@ -404,7 +404,7 @@ <h1>(prototype) FX Graph Mode Post Training Static Quantization<a class="headerl
 The advantage of FX graph mode quantization is that we can perform quantization fully automatically on the model
 although there might some effort required to make the model compatible with FX Graph Mode Quantizatiion (symbolically traceable with <code class="docutils literal notranslate"><span class="pre">torch.fx</span></code>),
 we’ll have a separate tutorial to show how to make the part of the model we want to quantize compatibble with FX Graph Mode Quantization.
-We also have a tutorial for <a class="reference external" href="https://pytorch.org/tutorials/prototype/fx_graph_mode_ptq_dynamic.html">FX Graph Mode Post Training Dynamic Quantization</a>.
+We also have a tutorial for <a class="reference external" href="https://tutorials.pytorch.kr/prototype/fx_graph_mode_ptq_dynamic.html">FX Graph Mode Post Training Dynamic Quantization</a>.
 tldr; The FX Graph Mode API looks like the following:</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">torch</span>
 <span class="kn">from</span> <span class="nn">torch.ao.quantization</span> <span class="kn">import</span> <span class="n">get_default_qconfig</span>
@@ -424,7 +424,7 @@ <h1>(prototype) FX Graph Mode Post Training Static Quantization<a class="headerl
 </div>
 <div class="section" id="motivation-of-fx-graph-mode-quantization">
 <h2>1. Motivation of FX Graph Mode Quantization<a class="headerlink" href="#motivation-of-fx-graph-mode-quantization" title="이 제목에 대한 퍼머링크">¶</a></h2>
-<p>Currently PyTorch only has eager mode quantization: <a class="reference external" href="https://pytorch.org/tutorials/advanced/static_quantization_tutorial.html">Static Quantization with Eager Mode in PyTorch</a>.</p>
+<p>Currently PyTorch only has eager mode quantization: <a class="reference external" href="https://tutorials.pytorch.kr/advanced/static_quantization_tutorial.html">Static Quantization with Eager Mode in PyTorch</a>.</p>
 <p>We can see there are multiple manual steps involved in the process, including:</p>
 <ul class="simple">
 <li><p>Explicitly quantize and dequantize activations, this is time consuming when floating point and quantized operations are mixed in a model.</p></li>
@@ -443,7 +443,7 @@ <h2>1. Motivation of FX Graph Mode Quantization<a class="headerlink" href="#moti
 <div class="section" id="define-helper-functions-and-prepare-dataset">
 <h2>2. Define Helper Functions and Prepare Dataset<a class="headerlink" href="#define-helper-functions-and-prepare-dataset" title="이 제목에 대한 퍼머링크">¶</a></h2>
 <p>We’ll start by doing the necessary imports, defining some helper functions and prepare the data.
-These steps are identitcal to <a class="reference external" href="https://pytorch.org/tutorials/advanced/static_quantization_tutorial.html">Static Quantization with Eager Mode in PyTorch</a>.</p>
+These steps are identitcal to <a class="reference external" href="https://tutorials.pytorch.kr/advanced/static_quantization_tutorial.html">Static Quantization with Eager Mode in PyTorch</a>.</p>
 <p>To run the code in this tutorial using the entire ImageNet dataset, first download imagenet by following the instructions at here <a class="reference external" href="http://www.image-net.org/download">ImageNet Data</a>. Unzip the downloaded file into the 〈data_path〉 folder.</p>
 <p>Download the <a class="reference external" href="https://download.pytorch.org/models/resnet18-f37072fd.pth">torchvision resnet18 model</a> and rename it to
 <code class="docutils literal notranslate"><span class="pre">data/resnet18_pretrained_float.pth</span></code>.</p>
@@ -737,7 +737,7 @@ <h2>8. Evaluation<a class="headerlink" href="#evaluation" title="이 제목에 
 </div>
 <p>If you want to get better accuracy or performance,  try changing the <cite>qconfig_dict</cite>.
 We plan to add support for graph mode in the Numerical Suite so that you can
-easily determine the sensitivity towards quantization of different modules in a model: <a class="reference external" href="https://pytorch.org/tutorials/prototype/numeric_suite_tutorial.html">PyTorch Numeric Suite Tutorial</a></p>
+easily determine the sensitivity towards quantization of different modules in a model: <a class="reference external" href="https://tutorials.pytorch.kr/prototype/numeric_suite_tutorial.html">PyTorch Numeric Suite Tutorial</a></p>
 </div>
 <div class="section" id="debugging-quantized-model">
 <h2>9. Debugging Quantized Model<a class="headerlink" href="#debugging-quantized-model" title="이 제목에 대한 퍼머링크">¶</a></h2>
@@ -804,12 +804,12 @@ <h2>10. Comparison with Baseline Float Model and Eager Mode Quantization<a class
 
 
              </article>
-             
+
             </div>
             <footer>
-  
 
-  
+
+
     <hr class="community-hr hr-top" />
     <div class="community-container">
       <div class="community-prompt">더 궁금하시거나 개선할 내용이 있으신가요? 커뮤니티에 참여해보세요!</div>
@@ -829,7 +829,7 @@ <h2>10. Comparison with Baseline Float Model and Eager Mode Quantization<a class
       </div>
     </div>
     <hr class="rating-hr hr-bottom"/>
-  
+
 
   <div role="contentinfo">
     <p>
@@ -837,11 +837,11 @@ <h2>10. Comparison with Baseline Float Model and Eager Mode Quantization<a class
 
     </p>
   </div>
-    
+
       <div>
         Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
       </div>
-     
+
 
 </footer>
 
@@ -880,12 +880,12 @@ <h2>10. Comparison with Baseline Float Model and Eager Mode Quantization<a class
       </section>
     </div>
 
-  
 
 
-  
-  
-     
+
+
+
+
        <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
          <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
          <script src="../_static/jquery.js"></script>
@@ -899,8 +899,8 @@ <h2>10. Comparison with Baseline Float Model and Eager Mode Quantization<a class
          <script src="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/contrib/auto-render.min.js"></script>
          <script src="../_static/katex_autorenderer.js"></script>
          <script src="../_static/design-tabs.js"></script>
-     
-  
+
+
 
   <script type="text/javascript" src="../_static/js/vendor/popper.min.js"></script>
   <script type="text/javascript" src="../_static/js/vendor/bootstrap.min.js"></script>
@@ -912,7 +912,7 @@ <h2>10. Comparison with Baseline Float Model and Eager Mode Quantization<a class
           SphinxRtdTheme.Navigation.enable(true);
       });
   </script>
- 
+
 <script>
 //add microsoft link
 
diff --git a/docs/prototype/maskedtensor_adagrad.html b/docs/prototype/maskedtensor_adagrad.html
index 355c32a57..1b6a1ef67 100644
--- a/docs/prototype/maskedtensor_adagrad.html
+++ b/docs/prototype/maskedtensor_adagrad.html
@@ -6,39 +6,39 @@
 <!--[if gt IE 8]><!--> <html class="no-js" lang="ko" > <!--<![endif]-->
 <head>
   <meta charset="utf-8">
-  
+
 <meta property="og:title" content="(Prototype) Efficiently writing 《sparse》 semantics for Adagrad with MaskedTensor" />
-  
+
 <meta property="og:type" content="article" />
-  
+
 <meta property="og:url" content="https://tutorials.pytorch.kr/prototype/maskedtensor_adagrad.html" />
-  
+
 <meta property="og:description" content="Before working through this tutorial, please review the MaskedTensor Overview and Sparsity tutorials. Introduction and Motivation: Issue 1369 discussed the additional lines of code that were introduced while writing 《sparse》 semantics for Adagrad, but really, the code uses sparsity as a proxy for..." />
-  
+
 <meta property="og:image" content="https://tutorials.pytorch.kr/_static/logos/logo-kr-sm-dark.png" />
-  
+
 <meta property="og:image:alt" content="(Prototype) Efficiently writing 《sparse》 semantics for Adagrad with MaskedTensor" />
-  
+
 <meta property="og:ignore_canonical" content="true" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  
+
   <title>(Prototype) Efficiently writing 《sparse》 semantics for Adagrad with MaskedTensor &mdash; 파이토치 한국어 튜토리얼 (PyTorch tutorials in Korean)</title>
-  
 
-  
-  
+
+
+
     <link rel="shortcut icon" href="../_static/favicon.ico"/>
-  
-  
-  
 
-  
 
-  
-  
-    
 
-  
+
+
+
+
+
+
+
+
 
   <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
   <!-- <link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> -->
@@ -55,9 +55,9 @@
   <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.10.0-beta/dist/katex.min.css" type="text/css" />
   <link rel="stylesheet" href="../_static/css/custom.css" type="text/css" />
     <link rel="index" title="색인" href="../genindex.html" />
-    <link rel="search" title="검색" href="../search.html" /> 
+    <link rel="search" title="검색" href="../search.html" />
+
 
-  
   <script src="../_static/js/modernizr.min.js"></script>
 
   <!-- Preload the theme fonts -->
@@ -114,9 +114,9 @@
 
 <body class="pytorch-body">
 
-   
 
-    
+
+
 
     <div class="table-of-contents-link-wrapper">
       <span>Table of Contents</span>
@@ -127,21 +127,21 @@
       <div class="pytorch-side-scroll">
         <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
           <div class="pytorch-left-menu-search">
-            
 
-            
-              
-              
+
+
+
+
                 <div class="version">
                   1.13.1+cu117
                 </div>
-              
-            
 
-            
 
 
-  
+
+
+
+
 
 
 <div role="search">
@@ -152,15 +152,15 @@
   </form>
 </div>
 
-            
+
           </div>
 
-          
-            
-            
-              
-            
-            
+
+
+
+
+
+
               <p class="caption" role="heading"><span class="caption-text">파이토치(PyTorch) 레시피</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../recipes/recipes_index.html">모든 레시피 보기</a></li>
@@ -303,8 +303,8 @@
 <li class="toctree-l1"><a class="reference internal" href="../beginner/flava_finetuning_tutorial.html">TorchMultimodal Tutorial: Finetuning FLAVA</a></li>
 </ul>
 
-            
-          
+
+
         </div>
       </div>
     </nav>
@@ -312,7 +312,7 @@
     <div class="pytorch-container">
       <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
         <div class="pytorch-breadcrumbs-wrapper">
-          
+
 
 
 
@@ -331,30 +331,30 @@
 <div role="navigation" aria-label="breadcrumbs navigation">
 
   <ul class="pytorch-breadcrumbs">
-    
+
       <li>
         <a href="../index.html">
-          
+
             Tutorials
-          
+
         </a> &gt;
       </li>
 
-        
+
       <li>(Prototype) Efficiently writing 《sparse》 semantics for Adagrad with MaskedTensor</li>
-    
-    
+
+
       <li class="pytorch-breadcrumbs-aside">
-        
-            
+
+
             <a href="../_sources/prototype/maskedtensor_adagrad.rst.txt" rel="nofollow"><img src="../_static/images/view-page-source-icon.svg"></a>
-          
-        
+
+
       </li>
-    
+
   </ul>
 
-  
+
 </div>
         </div>
 
@@ -366,7 +366,7 @@
       <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
         <div class="pytorch-content-left">
 
-        
+
 
           <div class="pytorch-call-to-action-links">
             <div id="tutorial-type">prototype/maskedtensor_adagrad</div>
@@ -388,14 +388,14 @@
             </div>
           </div>
 
-        
 
-          
+
+
           <div class="rst-content">
-          
+
             <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
              <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
-              
+
   <div class="sphx-glr-download-link-note admonition note">
 <p class="admonition-title">참고</p>
 <p>Click <a class="reference internal" href="#sphx-glr-download-prototype-maskedtensor-adagrad-py"><span class="std std-ref">here</span></a>
@@ -404,8 +404,8 @@
 <div class="sphx-glr-example-title section" id="prototype-efficiently-writing-sparse-semantics-for-adagrad-with-maskedtensor">
 <span id="sphx-glr-prototype-maskedtensor-adagrad-py"></span><h1>(Prototype) Efficiently writing 《sparse》 semantics for Adagrad with MaskedTensor<a class="headerlink" href="#prototype-efficiently-writing-sparse-semantics-for-adagrad-with-maskedtensor" title="이 제목에 대한 퍼머링크">¶</a></h1>
 <p>Before working through this tutorial, please review the MaskedTensor
-<a class="reference external" href="https://pytorch.org/tutorials/prototype/maskedtensor_overview.html">Overview</a> and
-<a class="reference external" href="https://pytorch.org/tutorials/prototype/maskedtensor_sparsity.html">Sparsity</a> tutorials.</p>
+<a class="reference external" href="https://tutorials.pytorch.kr/prototype/maskedtensor_overview.html">Overview</a> and
+<a class="reference external" href="https://tutorials.pytorch.kr/prototype/maskedtensor_sparsity.html">Sparsity</a> tutorials.</p>
 <div class="section" id="introduction-and-motivation">
 <h2>Introduction and Motivation<a class="headerlink" href="#introduction-and-motivation" title="이 제목에 대한 퍼머링크">¶</a></h2>
 <p><a class="reference external" href="https://github.com/pytorch/pytorch/issues/1369">Issue 1369</a> discussed the additional lines of code
@@ -613,7 +613,7 @@ <h2>Conclusion<a class="headerlink" href="#conclusion" title="이 제목에 대
 <div class="section" id="further-reading">
 <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제목에 대한 퍼머링크">¶</a></h2>
 <p>To continue learning more, you can find our final review (for now) on
-<a class="reference external" href="https://pytorch.org/tutorials/prototype/maskedtensor_advanced_semantics.html">MaskedTensor Advanced Semantics</a>
+<a class="reference external" href="https://tutorials.pytorch.kr/prototype/maskedtensor_advanced_semantics.html">MaskedTensor Advanced Semantics</a>
 to see some of the differences in design decisions between <code class="xref py py-class docutils literal notranslate"><span class="pre">MaskedTensor</span></code> and NumPy’s MaskedArray, as well
 as reduction semantics.</p>
 <p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 0 minutes  0.027 seconds)</p>
@@ -631,12 +631,12 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
 
 
              </article>
-             
+
             </div>
             <footer>
-  
 
-  
+
+
     <hr class="community-hr hr-top" />
     <div class="community-container">
       <div class="community-prompt">더 궁금하시거나 개선할 내용이 있으신가요? 커뮤니티에 참여해보세요!</div>
@@ -656,7 +656,7 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
       </div>
     </div>
     <hr class="rating-hr hr-bottom"/>
-  
+
 
   <div role="contentinfo">
     <p>
@@ -664,11 +664,11 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
 
     </p>
   </div>
-    
+
       <div>
         Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
       </div>
-     
+
 
 </footer>
 
@@ -704,12 +704,12 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
       </section>
     </div>
 
-  
 
 
-  
-  
-     
+
+
+
+
        <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
          <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
          <script src="../_static/jquery.js"></script>
@@ -723,8 +723,8 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
          <script src="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/contrib/auto-render.min.js"></script>
          <script src="../_static/katex_autorenderer.js"></script>
          <script src="../_static/design-tabs.js"></script>
-     
-  
+
+
 
   <script type="text/javascript" src="../_static/js/vendor/popper.min.js"></script>
   <script type="text/javascript" src="../_static/js/vendor/bootstrap.min.js"></script>
@@ -736,7 +736,7 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
           SphinxRtdTheme.Navigation.enable(true);
       });
   </script>
- 
+
 <script>
 //add microsoft link
 
diff --git a/docs/prototype/maskedtensor_advanced_semantics.html b/docs/prototype/maskedtensor_advanced_semantics.html
index d02408eba..ecb9d3b98 100644
--- a/docs/prototype/maskedtensor_advanced_semantics.html
+++ b/docs/prototype/maskedtensor_advanced_semantics.html
@@ -6,39 +6,39 @@
 <!--[if gt IE 8]><!--> <html class="no-js" lang="ko" > <!--<![endif]-->
 <head>
   <meta charset="utf-8">
-  
+
 <meta property="og:title" content="(Prototype) MaskedTensor Advanced Semantics" />
-  
+
 <meta property="og:type" content="article" />
-  
+
 <meta property="og:url" content="https://tutorials.pytorch.kr/prototype/maskedtensor_advanced_semantics.html" />
-  
-<meta property="og:description" content="Before working on this tutorial, please make sure to review our MaskedTensor Overview tutorial <https://pytorch.org/tutorials/prototype/maskedtensor_overview.html>. The purpose of this tutorial is to help users understand how some of the advanced semantics work and how they came to be. We will fo..." />
-  
+
+<meta property="og:description" content="Before working on this tutorial, please make sure to review our MaskedTensor Overview tutorial <https://tutorials.pytorch.kr/prototype/maskedtensor_overview.html>. The purpose of this tutorial is to help users understand how some of the advanced semantics work and how they came to be. We will fo..." />
+
 <meta property="og:image" content="https://tutorials.pytorch.kr/_static/logos/logo-kr-sm-dark.png" />
-  
+
 <meta property="og:image:alt" content="(Prototype) MaskedTensor Advanced Semantics" />
-  
+
 <meta property="og:ignore_canonical" content="true" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  
+
   <title>(Prototype) MaskedTensor Advanced Semantics &mdash; 파이토치 한국어 튜토리얼 (PyTorch tutorials in Korean)</title>
-  
 
-  
-  
+
+
+
     <link rel="shortcut icon" href="../_static/favicon.ico"/>
-  
-  
-  
 
-  
 
-  
-  
-    
 
-  
+
+
+
+
+
+
+
+
 
   <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
   <!-- <link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> -->
@@ -55,9 +55,9 @@
   <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.10.0-beta/dist/katex.min.css" type="text/css" />
   <link rel="stylesheet" href="../_static/css/custom.css" type="text/css" />
     <link rel="index" title="색인" href="../genindex.html" />
-    <link rel="search" title="검색" href="../search.html" /> 
+    <link rel="search" title="검색" href="../search.html" />
+
 
-  
   <script src="../_static/js/modernizr.min.js"></script>
 
   <!-- Preload the theme fonts -->
@@ -114,9 +114,9 @@
 
 <body class="pytorch-body">
 
-   
 
-    
+
+
 
     <div class="table-of-contents-link-wrapper">
       <span>Table of Contents</span>
@@ -127,21 +127,21 @@
       <div class="pytorch-side-scroll">
         <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
           <div class="pytorch-left-menu-search">
-            
 
-            
-              
-              
+
+
+
+
                 <div class="version">
                   1.13.1+cu117
                 </div>
-              
-            
 
-            
 
 
-  
+
+
+
+
 
 
 <div role="search">
@@ -152,15 +152,15 @@
   </form>
 </div>
 
-            
+
           </div>
 
-          
-            
-            
-              
-            
-            
+
+
+
+
+
+
               <p class="caption" role="heading"><span class="caption-text">파이토치(PyTorch) 레시피</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../recipes/recipes_index.html">모든 레시피 보기</a></li>
@@ -303,8 +303,8 @@
 <li class="toctree-l1"><a class="reference internal" href="../beginner/flava_finetuning_tutorial.html">TorchMultimodal Tutorial: Finetuning FLAVA</a></li>
 </ul>
 
-            
-          
+
+
         </div>
       </div>
     </nav>
@@ -312,7 +312,7 @@
     <div class="pytorch-container">
       <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
         <div class="pytorch-breadcrumbs-wrapper">
-          
+
 
 
 
@@ -331,30 +331,30 @@
 <div role="navigation" aria-label="breadcrumbs navigation">
 
   <ul class="pytorch-breadcrumbs">
-    
+
       <li>
         <a href="../index.html">
-          
+
             Tutorials
-          
+
         </a> &gt;
       </li>
 
-        
+
       <li>(Prototype) MaskedTensor Advanced Semantics</li>
-    
-    
+
+
       <li class="pytorch-breadcrumbs-aside">
-        
-            
+
+
             <a href="../_sources/prototype/maskedtensor_advanced_semantics.rst.txt" rel="nofollow"><img src="../_static/images/view-page-source-icon.svg"></a>
-          
-        
+
+
       </li>
-    
+
   </ul>
 
-  
+
 </div>
         </div>
 
@@ -366,7 +366,7 @@
       <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
         <div class="pytorch-content-left">
 
-        
+
 
           <div class="pytorch-call-to-action-links">
             <div id="tutorial-type">prototype/maskedtensor_advanced_semantics</div>
@@ -388,14 +388,14 @@
             </div>
           </div>
 
-        
 
-          
+
+
           <div class="rst-content">
-          
+
             <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
              <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
-              
+
   <div class="sphx-glr-download-link-note admonition note">
 <p class="admonition-title">참고</p>
 <p>Click <a class="reference internal" href="#sphx-glr-download-prototype-maskedtensor-advanced-semantics-py"><span class="std std-ref">here</span></a>
@@ -404,7 +404,7 @@
 <div class="sphx-glr-example-title section" id="prototype-maskedtensor-advanced-semantics">
 <span id="sphx-glr-prototype-maskedtensor-advanced-semantics-py"></span><h1>(Prototype) MaskedTensor Advanced Semantics<a class="headerlink" href="#prototype-maskedtensor-advanced-semantics" title="이 제목에 대한 퍼머링크">¶</a></h1>
 <p>Before working on this tutorial, please make sure to review our
-<cite>MaskedTensor Overview tutorial &lt;https://pytorch.org/tutorials/prototype/maskedtensor_overview.html&gt;</cite>.</p>
+<cite>MaskedTensor Overview tutorial &lt;https://tutorials.pytorch.kr/prototype/maskedtensor_overview.html&gt;</cite>.</p>
 <p>The purpose of this tutorial is to help users understand how some of the advanced semantics work
 and how they came to be. We will focus on two particular ones:</p>
 <p><a href="#id1"><span class="problematic" id="id2">*</span></a>. Differences between MaskedTensor and <a class="reference external" href="https://numpy.org/doc/stable/reference/maskedarray.html">NumPy’s MaskedArray</a>
@@ -501,7 +501,7 @@ <h2>MaskedTensor vs NumPy’s MaskedArray<a class="headerlink" href="#maskedtens
 </div>
 <div class="section" id="reduction-semantics">
 <span id="id9"></span><h2>Reduction Semantics<a class="headerlink" href="#reduction-semantics" title="이 제목에 대한 퍼머링크">¶</a></h2>
-<p>Recall in <a class="reference external" href="https://pytorch.org/tutorials/prototype/maskedtensor_overview.html">MaskedTensor’s Overview tutorial</a>
+<p>Recall in <a class="reference external" href="https://tutorials.pytorch.kr/prototype/maskedtensor_overview.html">MaskedTensor’s Overview tutorial</a>
 we discussed 《Implementing missing torch.nan* ops》. Those are examples of reductions – operators that remove one
 (or more) dimensions from a Tensor and then aggregate the result. In this section, we will use reduction semantics
 to motivate our strict requirements around matching masks from above.</p>
@@ -636,12 +636,12 @@ <h2>Conclusion<a class="headerlink" href="#conclusion" title="이 제목에 대
 
 
              </article>
-             
+
             </div>
             <footer>
-  
 
-  
+
+
     <hr class="community-hr hr-top" />
     <div class="community-container">
       <div class="community-prompt">더 궁금하시거나 개선할 내용이 있으신가요? 커뮤니티에 참여해보세요!</div>
@@ -661,7 +661,7 @@ <h2>Conclusion<a class="headerlink" href="#conclusion" title="이 제목에 대
       </div>
     </div>
     <hr class="rating-hr hr-bottom"/>
-  
+
 
   <div role="contentinfo">
     <p>
@@ -669,11 +669,11 @@ <h2>Conclusion<a class="headerlink" href="#conclusion" title="이 제목에 대
 
     </p>
   </div>
-    
+
       <div>
         Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
       </div>
-     
+
 
 </footer>
 
@@ -706,12 +706,12 @@ <h2>Conclusion<a class="headerlink" href="#conclusion" title="이 제목에 대
       </section>
     </div>
 
-  
 
 
-  
-  
-     
+
+
+
+
        <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
          <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
          <script src="../_static/jquery.js"></script>
@@ -725,8 +725,8 @@ <h2>Conclusion<a class="headerlink" href="#conclusion" title="이 제목에 대
          <script src="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/contrib/auto-render.min.js"></script>
          <script src="../_static/katex_autorenderer.js"></script>
          <script src="../_static/design-tabs.js"></script>
-     
-  
+
+
 
   <script type="text/javascript" src="../_static/js/vendor/popper.min.js"></script>
   <script type="text/javascript" src="../_static/js/vendor/bootstrap.min.js"></script>
@@ -738,7 +738,7 @@ <h2>Conclusion<a class="headerlink" href="#conclusion" title="이 제목에 대
           SphinxRtdTheme.Navigation.enable(true);
       });
   </script>
- 
+
 <script>
 //add microsoft link
 
diff --git a/docs/prototype/maskedtensor_overview.html b/docs/prototype/maskedtensor_overview.html
index 7a39a6883..4aa6d2c32 100644
--- a/docs/prototype/maskedtensor_overview.html
+++ b/docs/prototype/maskedtensor_overview.html
@@ -6,39 +6,39 @@
 <!--[if gt IE 8]><!--> <html class="no-js" lang="ko" > <!--<![endif]-->
 <head>
   <meta charset="utf-8">
-  
+
 <meta property="og:title" content="(Prototype) MaskedTensor Overview" />
-  
+
 <meta property="og:type" content="article" />
-  
+
 <meta property="og:url" content="https://tutorials.pytorch.kr/prototype/maskedtensor_overview.html" />
-  
+
 <meta property="og:description" content="This tutorial is designed to serve as a starting point for using MaskedTensors and discuss its masking semantics. MaskedTensor serves as an extension to torch.Tensor that provides the user with the ability to: use any masked semantics (for example, variable length tensors, nan* operators, etc.), ..." />
-  
+
 <meta property="og:image" content="https://tutorials.pytorch.kr/_static/logos/logo-kr-sm-dark.png" />
-  
+
 <meta property="og:image:alt" content="(Prototype) MaskedTensor Overview" />
-  
+
 <meta property="og:ignore_canonical" content="true" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  
+
   <title>(Prototype) MaskedTensor Overview &mdash; 파이토치 한국어 튜토리얼 (PyTorch tutorials in Korean)</title>
-  
 
-  
-  
+
+
+
     <link rel="shortcut icon" href="../_static/favicon.ico"/>
-  
-  
-  
 
-  
 
-  
-  
-    
 
-  
+
+
+
+
+
+
+
+
 
   <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
   <!-- <link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> -->
@@ -55,9 +55,9 @@
   <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.10.0-beta/dist/katex.min.css" type="text/css" />
   <link rel="stylesheet" href="../_static/css/custom.css" type="text/css" />
     <link rel="index" title="색인" href="../genindex.html" />
-    <link rel="search" title="검색" href="../search.html" /> 
+    <link rel="search" title="검색" href="../search.html" />
+
 
-  
   <script src="../_static/js/modernizr.min.js"></script>
 
   <!-- Preload the theme fonts -->
@@ -114,9 +114,9 @@
 
 <body class="pytorch-body">
 
-   
 
-    
+
+
 
     <div class="table-of-contents-link-wrapper">
       <span>Table of Contents</span>
@@ -127,21 +127,21 @@
       <div class="pytorch-side-scroll">
         <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
           <div class="pytorch-left-menu-search">
-            
 
-            
-              
-              
+
+
+
+
                 <div class="version">
                   1.13.1+cu117
                 </div>
-              
-            
 
-            
 
 
-  
+
+
+
+
 
 
 <div role="search">
@@ -152,15 +152,15 @@
   </form>
 </div>
 
-            
+
           </div>
 
-          
-            
-            
-              
-            
-            
+
+
+
+
+
+
               <p class="caption" role="heading"><span class="caption-text">파이토치(PyTorch) 레시피</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../recipes/recipes_index.html">모든 레시피 보기</a></li>
@@ -303,8 +303,8 @@
 <li class="toctree-l1"><a class="reference internal" href="../beginner/flava_finetuning_tutorial.html">TorchMultimodal Tutorial: Finetuning FLAVA</a></li>
 </ul>
 
-            
-          
+
+
         </div>
       </div>
     </nav>
@@ -312,7 +312,7 @@
     <div class="pytorch-container">
       <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
         <div class="pytorch-breadcrumbs-wrapper">
-          
+
 
 
 
@@ -331,30 +331,30 @@
 <div role="navigation" aria-label="breadcrumbs navigation">
 
   <ul class="pytorch-breadcrumbs">
-    
+
       <li>
         <a href="../index.html">
-          
+
             Tutorials
-          
+
         </a> &gt;
       </li>
 
-        
+
       <li>(Prototype) MaskedTensor Overview</li>
-    
-    
+
+
       <li class="pytorch-breadcrumbs-aside">
-        
-            
+
+
             <a href="../_sources/prototype/maskedtensor_overview.rst.txt" rel="nofollow"><img src="../_static/images/view-page-source-icon.svg"></a>
-          
-        
+
+
       </li>
-    
+
   </ul>
 
-  
+
 </div>
         </div>
 
@@ -366,7 +366,7 @@
       <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
         <div class="pytorch-content-left">
 
-        
+
 
           <div class="pytorch-call-to-action-links">
             <div id="tutorial-type">prototype/maskedtensor_overview</div>
@@ -388,14 +388,14 @@
             </div>
           </div>
 
-        
 
-          
+
+
           <div class="rst-content">
-          
+
             <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
              <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
-              
+
   <div class="sphx-glr-download-link-note admonition note">
 <p class="admonition-title">참고</p>
 <p>Click <a class="reference internal" href="#sphx-glr-download-prototype-maskedtensor-overview-py"><span class="std std-ref">here</span></a>
@@ -799,7 +799,7 @@ <h2>Conclusion<a class="headerlink" href="#conclusion" title="이 제목에 대
 <div class="section" id="further-reading">
 <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제목에 대한 퍼머링크">¶</a></h2>
 <p>To continue learning more, you can find our
-<a class="reference external" href="https://pytorch.org/tutorials/prototype/maskedtensor_sparsity.html">MaskedTensor Sparsity tutorial</a>
+<a class="reference external" href="https://tutorials.pytorch.kr/prototype/maskedtensor_sparsity.html">MaskedTensor Sparsity tutorial</a>
 to see how MaskedTensor enables sparsity and the different storage formats we currently support.</p>
 <p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 0 minutes  0.034 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-prototype-maskedtensor-overview-py">
@@ -816,12 +816,12 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
 
 
              </article>
-             
+
             </div>
             <footer>
-  
 
-  
+
+
     <hr class="community-hr hr-top" />
     <div class="community-container">
       <div class="community-prompt">더 궁금하시거나 개선할 내용이 있으신가요? 커뮤니티에 참여해보세요!</div>
@@ -841,7 +841,7 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
       </div>
     </div>
     <hr class="rating-hr hr-bottom"/>
-  
+
 
   <div role="contentinfo">
     <p>
@@ -849,11 +849,11 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
 
     </p>
   </div>
-    
+
       <div>
         Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
       </div>
-     
+
 
 </footer>
 
@@ -903,12 +903,12 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
       </section>
     </div>
 
-  
 
 
-  
-  
-     
+
+
+
+
        <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
          <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
          <script src="../_static/jquery.js"></script>
@@ -922,8 +922,8 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
          <script src="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/contrib/auto-render.min.js"></script>
          <script src="../_static/katex_autorenderer.js"></script>
          <script src="../_static/design-tabs.js"></script>
-     
-  
+
+
 
   <script type="text/javascript" src="../_static/js/vendor/popper.min.js"></script>
   <script type="text/javascript" src="../_static/js/vendor/bootstrap.min.js"></script>
@@ -935,7 +935,7 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
           SphinxRtdTheme.Navigation.enable(true);
       });
   </script>
- 
+
 <script>
 //add microsoft link
 
diff --git a/docs/prototype/maskedtensor_sparsity.html b/docs/prototype/maskedtensor_sparsity.html
index c7af75da1..dfd25e1af 100644
--- a/docs/prototype/maskedtensor_sparsity.html
+++ b/docs/prototype/maskedtensor_sparsity.html
@@ -6,39 +6,39 @@
 <!--[if gt IE 8]><!--> <html class="no-js" lang="ko" > <!--<![endif]-->
 <head>
   <meta charset="utf-8">
-  
+
 <meta property="og:title" content="(Prototype) MaskedTensor Sparsity" />
-  
+
 <meta property="og:type" content="article" />
-  
+
 <meta property="og:url" content="https://tutorials.pytorch.kr/prototype/maskedtensor_sparsity.html" />
-  
-<meta property="og:description" content="Before working on this tutorial, please make sure to review our MaskedTensor Overview tutorial <https://pytorch.org/tutorials/prototype/maskedtensor_overview.html>. Introduction: Sparsity has been an area of rapid growth and importance within PyTorch; if any sparsity terms are confusing below, pl..." />
-  
+
+<meta property="og:description" content="Before working on this tutorial, please make sure to review our MaskedTensor Overview tutorial <https://tutorials.pytorch.kr/prototype/maskedtensor_overview.html>. Introduction: Sparsity has been an area of rapid growth and importance within PyTorch; if any sparsity terms are confusing below, pl..." />
+
 <meta property="og:image" content="https://tutorials.pytorch.kr/_static/logos/logo-kr-sm-dark.png" />
-  
+
 <meta property="og:image:alt" content="(Prototype) MaskedTensor Sparsity" />
-  
+
 <meta property="og:ignore_canonical" content="true" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  
+
   <title>(Prototype) MaskedTensor Sparsity &mdash; 파이토치 한국어 튜토리얼 (PyTorch tutorials in Korean)</title>
-  
 
-  
-  
+
+
+
     <link rel="shortcut icon" href="../_static/favicon.ico"/>
-  
-  
-  
 
-  
 
-  
-  
-    
 
-  
+
+
+
+
+
+
+
+
 
   <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
   <!-- <link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> -->
@@ -55,9 +55,9 @@
   <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.10.0-beta/dist/katex.min.css" type="text/css" />
   <link rel="stylesheet" href="../_static/css/custom.css" type="text/css" />
     <link rel="index" title="색인" href="../genindex.html" />
-    <link rel="search" title="검색" href="../search.html" /> 
+    <link rel="search" title="검색" href="../search.html" />
+
 
-  
   <script src="../_static/js/modernizr.min.js"></script>
 
   <!-- Preload the theme fonts -->
@@ -114,9 +114,9 @@
 
 <body class="pytorch-body">
 
-   
 
-    
+
+
 
     <div class="table-of-contents-link-wrapper">
       <span>Table of Contents</span>
@@ -127,21 +127,21 @@
       <div class="pytorch-side-scroll">
         <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
           <div class="pytorch-left-menu-search">
-            
 
-            
-              
-              
+
+
+
+
                 <div class="version">
                   1.13.1+cu117
                 </div>
-              
-            
 
-            
 
 
-  
+
+
+
+
 
 
 <div role="search">
@@ -152,15 +152,15 @@
   </form>
 </div>
 
-            
+
           </div>
 
-          
-            
-            
-              
-            
-            
+
+
+
+
+
+
               <p class="caption" role="heading"><span class="caption-text">파이토치(PyTorch) 레시피</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../recipes/recipes_index.html">모든 레시피 보기</a></li>
@@ -303,8 +303,8 @@
 <li class="toctree-l1"><a class="reference internal" href="../beginner/flava_finetuning_tutorial.html">TorchMultimodal Tutorial: Finetuning FLAVA</a></li>
 </ul>
 
-            
-          
+
+
         </div>
       </div>
     </nav>
@@ -312,7 +312,7 @@
     <div class="pytorch-container">
       <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
         <div class="pytorch-breadcrumbs-wrapper">
-          
+
 
 
 
@@ -331,30 +331,30 @@
 <div role="navigation" aria-label="breadcrumbs navigation">
 
   <ul class="pytorch-breadcrumbs">
-    
+
       <li>
         <a href="../index.html">
-          
+
             Tutorials
-          
+
         </a> &gt;
       </li>
 
-        
+
       <li>(Prototype) MaskedTensor Sparsity</li>
-    
-    
+
+
       <li class="pytorch-breadcrumbs-aside">
-        
-            
+
+
             <a href="../_sources/prototype/maskedtensor_sparsity.rst.txt" rel="nofollow"><img src="../_static/images/view-page-source-icon.svg"></a>
-          
-        
+
+
       </li>
-    
+
   </ul>
 
-  
+
 </div>
         </div>
 
@@ -366,7 +366,7 @@
       <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
         <div class="pytorch-content-left">
 
-        
+
 
           <div class="pytorch-call-to-action-links">
             <div id="tutorial-type">prototype/maskedtensor_sparsity</div>
@@ -388,14 +388,14 @@
             </div>
           </div>
 
-        
 
-          
+
+
           <div class="rst-content">
-          
+
             <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
              <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
-              
+
   <div class="sphx-glr-download-link-note admonition note">
 <p class="admonition-title">참고</p>
 <p>Click <a class="reference internal" href="#sphx-glr-download-prototype-maskedtensor-sparsity-py"><span class="std std-ref">here</span></a>
@@ -404,7 +404,7 @@
 <div class="sphx-glr-example-title section" id="prototype-maskedtensor-sparsity">
 <span id="sphx-glr-prototype-maskedtensor-sparsity-py"></span><h1>(Prototype) MaskedTensor Sparsity<a class="headerlink" href="#prototype-maskedtensor-sparsity" title="이 제목에 대한 퍼머링크">¶</a></h1>
 <p>Before working on this tutorial, please make sure to review our
-<cite>MaskedTensor Overview tutorial &lt;https://pytorch.org/tutorials/prototype/maskedtensor_overview.html&gt;</cite>.</p>
+<cite>MaskedTensor Overview tutorial &lt;https://tutorials.pytorch.kr/prototype/maskedtensor_overview.html&gt;</cite>.</p>
 <div class="section" id="introduction">
 <h2>Introduction<a class="headerlink" href="#introduction" title="이 제목에 대한 퍼머링크">¶</a></h2>
 <p>Sparsity has been an area of rapid growth and importance within PyTorch; if any sparsity terms are confusing below,
@@ -542,7 +542,7 @@ <h3>Unary<a class="headerlink" href="#unary" title="이 제목에 대한 퍼머
 <h3>Binary<a class="headerlink" href="#binary" title="이 제목에 대한 퍼머링크">¶</a></h3>
 <p><a class="reference external" href="https://pytorch.org/docs/master/masked.html#unary-operators">Binary operators</a> are also supported, but the
 input masks from the two masked tensors must match. For more information on why this decision was made, please
-find our <a class="reference external" href="https://pytorch.org/tutorials/prototype/maskedtensor_advanced_semantics.html">MaskedTensor: Advanced Semantics tutorial</a>.</p>
+find our <a class="reference external" href="https://tutorials.pytorch.kr/prototype/maskedtensor_advanced_semantics.html">MaskedTensor: Advanced Semantics tutorial</a>.</p>
 <p>Please find an example below:</p>
 <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">i</span> <span class="o">=</span> <span class="p">[[</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span>
      <span class="p">[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">2</span><span class="p">]]</span>
@@ -839,7 +839,7 @@ <h2>Conclusion<a class="headerlink" href="#conclusion" title="이 제목에 대
 <div class="section" id="further-reading">
 <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제목에 대한 퍼머링크">¶</a></h2>
 <p>To continue learning more, you can find our
-<a class="reference external" href="https://pytorch.org/tutorials/prototype/maskedtensor_adagrad.html">Efficiently writing 《sparse》 semantics for Adagrad with MaskedTensor tutorial</a>
+<a class="reference external" href="https://tutorials.pytorch.kr/prototype/maskedtensor_adagrad.html">Efficiently writing 《sparse》 semantics for Adagrad with MaskedTensor tutorial</a>
 to see an example of how MaskedTensor can simplify existing workflows with native masking semantics.</p>
 <p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 0 minutes  0.086 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-prototype-maskedtensor-sparsity-py">
@@ -856,12 +856,12 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
 
 
              </article>
-             
+
             </div>
             <footer>
-  
 
-  
+
+
     <hr class="community-hr hr-top" />
     <div class="community-container">
       <div class="community-prompt">더 궁금하시거나 개선할 내용이 있으신가요? 커뮤니티에 참여해보세요!</div>
@@ -881,7 +881,7 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
       </div>
     </div>
     <hr class="rating-hr hr-bottom"/>
-  
+
 
   <div role="contentinfo">
     <p>
@@ -889,11 +889,11 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
 
     </p>
   </div>
-    
+
       <div>
         Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
       </div>
-     
+
 
 </footer>
 
@@ -940,12 +940,12 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
       </section>
     </div>
 
-  
 
 
-  
-  
-     
+
+
+
+
        <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
          <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
          <script src="../_static/jquery.js"></script>
@@ -959,8 +959,8 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
          <script src="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/contrib/auto-render.min.js"></script>
          <script src="../_static/katex_autorenderer.js"></script>
          <script src="../_static/design-tabs.js"></script>
-     
-  
+
+
 
   <script type="text/javascript" src="../_static/js/vendor/popper.min.js"></script>
   <script type="text/javascript" src="../_static/js/vendor/bootstrap.min.js"></script>
@@ -972,7 +972,7 @@ <h2>Further Reading<a class="headerlink" href="#further-reading" title="이 제
           SphinxRtdTheme.Navigation.enable(true);
       });
   </script>
- 
+
 <script>
 //add microsoft link
 
diff --git a/docs/recipes/intel_extension_for_pytorch.html b/docs/recipes/intel_extension_for_pytorch.html
index 1dbc04fea..0593259d8 100644
--- a/docs/recipes/intel_extension_for_pytorch.html
+++ b/docs/recipes/intel_extension_for_pytorch.html
@@ -6,39 +6,39 @@
 <!--[if gt IE 8]><!--> <html class="no-js" lang="ko" > <!--<![endif]-->
 <head>
   <meta charset="utf-8">
-  
+
 <meta property="og:title" content="Intel® Extension for PyTorch*" />
-  
+
 <meta property="og:type" content="article" />
-  
+
 <meta property="og:url" content="https://tutorials.pytorch.kr/recipes/intel_extension_for_pytorch.html" />
-  
+
 <meta property="og:description" content="Intel Extension for PyTorch* extends PyTorch with optimizations for extra performance boost on Intel hardware. Most of the optimizations will be included in stock PyTorch releases eventually, and the intention of the extension is to deliver up to date features and optimizations for PyTorch on Int..." />
-  
+
 <meta property="og:image" content="https://tutorials.pytorch.kr/_static/logos/logo-kr-sm-dark.png" />
-  
+
 <meta property="og:image:alt" content="Intel® Extension for PyTorch*" />
-  
+
 <meta property="og:ignore_canonical" content="true" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  
+
   <title>Intel® Extension for PyTorch* &mdash; 파이토치 한국어 튜토리얼 (PyTorch tutorials in Korean)</title>
-  
 
-  
-  
+
+
+
     <link rel="shortcut icon" href="../_static/favicon.ico"/>
-  
-  
-  
 
-  
 
-  
-  
-    
 
-  
+
+
+
+
+
+
+
+
 
   <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
   <!-- <link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> -->
@@ -55,9 +55,9 @@
   <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.10.0-beta/dist/katex.min.css" type="text/css" />
   <link rel="stylesheet" href="../_static/css/custom.css" type="text/css" />
     <link rel="index" title="색인" href="../genindex.html" />
-    <link rel="search" title="검색" href="../search.html" /> 
+    <link rel="search" title="검색" href="../search.html" />
+
 
-  
   <script src="../_static/js/modernizr.min.js"></script>
 
   <!-- Preload the theme fonts -->
@@ -114,9 +114,9 @@
 
 <body class="pytorch-body">
 
-   
 
-    
+
+
 
     <div class="table-of-contents-link-wrapper">
       <span>Table of Contents</span>
@@ -127,21 +127,21 @@
       <div class="pytorch-side-scroll">
         <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
           <div class="pytorch-left-menu-search">
-            
 
-            
-              
-              
+
+
+
+
                 <div class="version">
                   1.13.1+cu117
                 </div>
-              
-            
 
-            
 
 
-  
+
+
+
+
 
 
 <div role="search">
@@ -152,15 +152,15 @@
   </form>
 </div>
 
-            
+
           </div>
 
-          
-            
-            
-              
-            
-            
+
+
+
+
+
+
               <p class="caption" role="heading"><span class="caption-text">파이토치(PyTorch) 레시피</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="recipes_index.html">모든 레시피 보기</a></li>
@@ -303,8 +303,8 @@
 <li class="toctree-l1"><a class="reference internal" href="../beginner/flava_finetuning_tutorial.html">TorchMultimodal Tutorial: Finetuning FLAVA</a></li>
 </ul>
 
-            
-          
+
+
         </div>
       </div>
     </nav>
@@ -312,7 +312,7 @@
     <div class="pytorch-container">
       <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
         <div class="pytorch-breadcrumbs-wrapper">
-          
+
 
 
 
@@ -331,30 +331,30 @@
 <div role="navigation" aria-label="breadcrumbs navigation">
 
   <ul class="pytorch-breadcrumbs">
-    
+
       <li>
         <a href="../index.html">
-          
+
             Tutorials
-          
+
         </a> &gt;
       </li>
 
-        
+
       <li>Intel® Extension for PyTorch*</li>
-    
-    
+
+
       <li class="pytorch-breadcrumbs-aside">
-        
-            
+
+
             <a href="../_sources/recipes/intel_extension_for_pytorch.rst.txt" rel="nofollow"><img src="../_static/images/view-page-source-icon.svg"></a>
-          
-        
+
+
       </li>
-    
+
   </ul>
 
-  
+
 </div>
         </div>
 
@@ -366,7 +366,7 @@
       <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
         <div class="pytorch-content-left">
 
-        
+
 
           <div class="pytorch-call-to-action-links">
             <div id="tutorial-type">recipes/intel_extension_for_pytorch</div>
@@ -388,14 +388,14 @@
             </div>
           </div>
 
-        
 
-          
+
+
           <div class="rst-content">
-          
+
             <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
              <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
-              
+
   <div class="section" id="intel-extension-for-pytorch">
 <h1>Intel® Extension for PyTorch*<a class="headerlink" href="#intel-extension-for-pytorch" title="이 제목에 대한 퍼머링크">¶</a></h1>
 <p>Intel Extension for PyTorch* extends PyTorch with optimizations for extra
@@ -658,7 +658,7 @@ <h3>C++<a class="headerlink" href="#c" title="이 제목에 대한 퍼머링크"
 please use Python interface. Comparing to usage of libtorch, no specific code
 changes are required, except for converting input data into channels last data
 format. Compilation follows the recommended methodology with CMake. Detailed
-instructions can be found in <a class="reference external" href="https://pytorch.org/tutorials/advanced/cpp_export.html#depending-on-libtorch-and-building-the-application">PyTorch tutorial</a>.
+instructions can be found in <a class="reference external" href="https://tutorials.pytorch.kr/advanced/cpp_export.html#depending-on-libtorch-and-building-the-application">PyTorch tutorial</a>.
 During compilation, Intel optimizations will be activated automatically
 once C++ dynamic library of Intel® Extension for PyTorch* is linked.</p>
 <p><strong>example-app.cpp</strong></p>
@@ -718,12 +718,12 @@ <h2>Tutorials<a class="headerlink" href="#tutorials" title="이 제목에 대한
 
 
              </article>
-             
+
             </div>
             <footer>
-  
 
-  
+
+
     <hr class="community-hr hr-top" />
     <div class="community-container">
       <div class="community-prompt">더 궁금하시거나 개선할 내용이 있으신가요? 커뮤니티에 참여해보세요!</div>
@@ -743,7 +743,7 @@ <h2>Tutorials<a class="headerlink" href="#tutorials" title="이 제목에 대한
       </div>
     </div>
     <hr class="rating-hr hr-bottom"/>
-  
+
 
   <div role="contentinfo">
     <p>
@@ -751,11 +751,11 @@ <h2>Tutorials<a class="headerlink" href="#tutorials" title="이 제목에 대한
 
     </p>
   </div>
-    
+
       <div>
         Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
       </div>
-     
+
 
 </footer>
 
@@ -805,12 +805,12 @@ <h2>Tutorials<a class="headerlink" href="#tutorials" title="이 제목에 대한
       </section>
     </div>
 
-  
 
 
-  
-  
-     
+
+
+
+
        <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
          <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
          <script src="../_static/jquery.js"></script>
@@ -824,8 +824,8 @@ <h2>Tutorials<a class="headerlink" href="#tutorials" title="이 제목에 대한
          <script src="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/contrib/auto-render.min.js"></script>
          <script src="../_static/katex_autorenderer.js"></script>
          <script src="../_static/design-tabs.js"></script>
-     
-  
+
+
 
   <script type="text/javascript" src="../_static/js/vendor/popper.min.js"></script>
   <script type="text/javascript" src="../_static/js/vendor/bootstrap.min.js"></script>
@@ -837,7 +837,7 @@ <h2>Tutorials<a class="headerlink" href="#tutorials" title="이 제목에 대한
           SphinxRtdTheme.Navigation.enable(true);
       });
   </script>
- 
+
 <script>
 //add microsoft link
 
diff --git a/docs/recipes/quantization.html b/docs/recipes/quantization.html
index 4072e1e65..d905eebb8 100644
--- a/docs/recipes/quantization.html
+++ b/docs/recipes/quantization.html
@@ -6,39 +6,39 @@
 <!--[if gt IE 8]><!--> <html class="no-js" lang="ko" > <!--<![endif]-->
 <head>
   <meta charset="utf-8">
-  
+
 <meta property="og:title" content="양자화 레시피" />
-  
+
 <meta property="og:type" content="article" />
-  
+
 <meta property="og:url" content="https://tutorials.pytorch.kr/recipes/quantization.html" />
-  
+
 <meta property="og:description" content="이 레시피는 Pytorch 모델을 양자화하는 방법을 설명합니다. 양자화된 모델은 원본 모델과 거의 같은 정확도를 내면서, 사이즈가 줄어들고 추론 속도가 빨라집니다. 양자화 작업은 서버 모델과 모바일 모델 배포에 모두 적용될 수 있지만, 모바일 환경에서 특히 중요하고 매우 필요합니다. 그 이유는 양자화를 적용하지 않은 모델의 크기가 iOS나 Android 앱이 허용하는 크기 한도를 초과하고, 그로 인해 모델의 배포나 OTA 업데이트가 너무 오래 걸리며, 또한 추론 속도가 너무 느려서 사용자의 쾌적함을 방해하기 때문입니다. 소개:..." />
-  
+
 <meta property="og:image" content="https://tutorials.pytorch.kr/_static/logos/logo-kr-sm-dark.png" />
-  
+
 <meta property="og:image:alt" content="양자화 레시피" />
-  
+
 <meta property="og:ignore_canonical" content="true" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  
+
   <title>양자화 레시피 &mdash; 파이토치 한국어 튜토리얼 (PyTorch tutorials in Korean)</title>
-  
 
-  
-  
+
+
+
     <link rel="shortcut icon" href="../_static/favicon.ico"/>
-  
-  
-  
 
-  
 
-  
-  
-    
 
-  
+
+
+
+
+
+
+
+
 
   <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
   <!-- <link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> -->
@@ -55,9 +55,9 @@
   <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.10.0-beta/dist/katex.min.css" type="text/css" />
   <link rel="stylesheet" href="../_static/css/custom.css" type="text/css" />
     <link rel="index" title="색인" href="../genindex.html" />
-    <link rel="search" title="검색" href="../search.html" /> 
+    <link rel="search" title="검색" href="../search.html" />
+
 
-  
   <script src="../_static/js/modernizr.min.js"></script>
 
   <!-- Preload the theme fonts -->
@@ -114,9 +114,9 @@
 
 <body class="pytorch-body">
 
-   
 
-    
+
+
 
     <div class="table-of-contents-link-wrapper">
       <span>Table of Contents</span>
@@ -127,21 +127,21 @@
       <div class="pytorch-side-scroll">
         <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
           <div class="pytorch-left-menu-search">
-            
 
-            
-              
-              
+
+
+
+
                 <div class="version">
                   1.13.1+cu117
                 </div>
-              
-            
 
-            
 
 
-  
+
+
+
+
 
 
 <div role="search">
@@ -152,15 +152,15 @@
   </form>
 </div>
 
-            
+
           </div>
 
-          
-            
-            
-              
-            
-            
+
+
+
+
+
+
               <p class="caption" role="heading"><span class="caption-text">파이토치(PyTorch) 레시피</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="recipes_index.html">모든 레시피 보기</a></li>
@@ -303,8 +303,8 @@
 <li class="toctree-l1"><a class="reference internal" href="../beginner/flava_finetuning_tutorial.html">TorchMultimodal Tutorial: Finetuning FLAVA</a></li>
 </ul>
 
-            
-          
+
+
         </div>
       </div>
     </nav>
@@ -312,7 +312,7 @@
     <div class="pytorch-container">
       <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
         <div class="pytorch-breadcrumbs-wrapper">
-          
+
 
 
 
@@ -331,30 +331,30 @@
 <div role="navigation" aria-label="breadcrumbs navigation">
 
   <ul class="pytorch-breadcrumbs">
-    
+
       <li>
         <a href="../index.html">
-          
+
             Tutorials
-          
+
         </a> &gt;
       </li>
 
-        
+
       <li>양자화 레시피</li>
-    
-    
+
+
       <li class="pytorch-breadcrumbs-aside">
-        
-            
+
+
             <a href="../_sources/recipes/quantization.rst.txt" rel="nofollow"><img src="../_static/images/view-page-source-icon.svg"></a>
-          
-        
+
+
       </li>
-    
+
   </ul>
 
-  
+
 </div>
         </div>
 
@@ -366,7 +366,7 @@
       <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
         <div class="pytorch-content-left">
 
-        
+
 
           <div class="pytorch-call-to-action-links">
             <div id="tutorial-type">recipes/quantization</div>
@@ -388,14 +388,14 @@
             </div>
           </div>
 
-        
 
-          
+
+
           <div class="rst-content">
-          
+
             <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
              <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
-              
+
   <div class="section" id="id1">
 <h1>양자화 레시피<a class="headerlink" href="#id1" title="이 제목에 대한 퍼머링크">¶</a></h1>
 <p>이 레시피는 Pytorch 모델을 양자화하는 방법을 설명합니다. 양자화된 모델은 원본 모델과 거의 같은 정확도를 내면서, 사이즈가 줄어들고 추론 속도가 빨라집니다. 양자화 작업은 서버 모델과 모바일 모델 배포에 모두 적용될 수 있지만, 모바일 환경에서 특히 중요하고 매우 필요합니다. 그 이유는 양자화를 적용하지 않은 모델의 크기가 iOS나 Android 앱이 허용하는 크기 한도를 초과하고, 그로 인해 모델의 배포나 OTA 업데이트가 너무 오래 걸리며, 또한 추론 속도가 너무 느려서 사용자의 쾌적함을 방해하기 때문입니다.</p>
@@ -457,7 +457,7 @@ <h3>2. 학습 후 동적 양자화 (Post Training Dynamic Quantization)<a class=
 <p class="admonition-title">경고</p>
 <p>동적 양자화는 사전 학습된 양자화 적용 모델이 준비되지 않았을 때 사용하기 가장 쉬운 방식이지만, 이 방식의 주요 한계는 <cite>qconfig_spec</cite> 옵션이 현재는 <cite>nn.Linear</cite> 과 <cite>nn.LSTM</cite> 만 지원한다는 것입니다. 이는 <cite>nn.Conv2d</cite> 같은 다른 모듈을 양자화할 때, 나중에 논의될 정적 양자화나 양자화를 고려한 학습을 사용해야 한다는 걸 의미합니다.</p>
 </div>
-<p><cite>quantize_dynamic</cite> API call 관련 전체 문서는 <a class="reference external" href="https://pytorch.org/docs/stable/quantization.html#torch.quantization.quantize_dynamic">여기</a> 를 참고하세요. 학습 후 동적 양자화를 사용하는 세 가지 예제에는 <a class="reference external" href="https://pytorch.org/tutorials/intermediate/dynamic_quantization_bert_tutorial.html">the Bert example</a>, <a class="reference external" href="https://pytorch.org/tutorials/advanced/dynamic_quantization_tutorial.html#test-dynamic-quantization">an LSTM model example</a>, <a class="reference external" href="https://pytorch.org/tutorials/recipes/recipes/dynamic_quantization.html#do-the-quantization">demo LSTM example</a> 이 있습니다.</p>
+<p><cite>quantize_dynamic</cite> API call 관련 전체 문서는 <a class="reference external" href="https://pytorch.org/docs/stable/quantization.html#torch.quantization.quantize_dynamic">여기</a> 를 참고하세요. 학습 후 동적 양자화를 사용하는 세 가지 예제에는 <a class="reference external" href="https://tutorials.pytorch.kr/intermediate/dynamic_quantization_bert_tutorial.html">the Bert example</a>, <a class="reference external" href="https://tutorials.pytorch.kr/advanced/dynamic_quantization_tutorial.html#test-dynamic-quantization">an LSTM model example</a>, <a class="reference external" href="https://tutorials.pytorch.kr/recipes/recipes/dynamic_quantization.html#do-the-quantization">demo LSTM example</a> 이 있습니다.</p>
 </div>
 <div class="section" id="post-training-static-quantization">
 <h3>3. 학습 후 정적 양자화 (Post Training Static Quantization)<a class="headerlink" href="#post-training-static-quantization" title="이 제목에 대한 퍼머링크">¶</a></h3>
@@ -506,12 +506,12 @@ <h2>더 알아보기<a class="headerlink" href="#id12" title="이 제목에 대
 
 
              </article>
-             
+
             </div>
             <footer>
-  
 
-  
+
+
     <hr class="community-hr hr-top" />
     <div class="community-container">
       <div class="community-prompt">더 궁금하시거나 개선할 내용이 있으신가요? 커뮤니티에 참여해보세요!</div>
@@ -531,7 +531,7 @@ <h2>더 알아보기<a class="headerlink" href="#id12" title="이 제목에 대
       </div>
     </div>
     <hr class="rating-hr hr-bottom"/>
-  
+
 
   <div role="contentinfo">
     <p>
@@ -539,11 +539,11 @@ <h2>더 알아보기<a class="headerlink" href="#id12" title="이 제목에 대
 
     </p>
   </div>
-    
+
       <div>
         Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
       </div>
-     
+
 
 </footer>
 
@@ -582,12 +582,12 @@ <h2>더 알아보기<a class="headerlink" href="#id12" title="이 제목에 대
       </section>
     </div>
 
-  
 
 
-  
-  
-     
+
+
+
+
        <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
          <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
          <script src="../_static/jquery.js"></script>
@@ -601,8 +601,8 @@ <h2>더 알아보기<a class="headerlink" href="#id12" title="이 제목에 대
          <script src="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/contrib/auto-render.min.js"></script>
          <script src="../_static/katex_autorenderer.js"></script>
          <script src="../_static/design-tabs.js"></script>
-     
-  
+
+
 
   <script type="text/javascript" src="../_static/js/vendor/popper.min.js"></script>
   <script type="text/javascript" src="../_static/js/vendor/bootstrap.min.js"></script>
@@ -614,7 +614,7 @@ <h2>더 알아보기<a class="headerlink" href="#id12" title="이 제목에 대
           SphinxRtdTheme.Navigation.enable(true);
       });
   </script>
- 
+
 <script>
 //add microsoft link
 
diff --git a/docs/recipes/recipes/tuning_guide.html b/docs/recipes/recipes/tuning_guide.html
index a2f038b62..b06526641 100644
--- a/docs/recipes/recipes/tuning_guide.html
+++ b/docs/recipes/recipes/tuning_guide.html
@@ -6,39 +6,39 @@
 <!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
 <head>
   <meta charset="utf-8">
-  
+
 <meta property="og:title" content="Performance Tuning Guide" />
-  
+
 <meta property="og:type" content="article" />
-  
+
 <meta property="og:url" content="https://tutorials.pytorch.kr/recipes/recipes/tuning_guide.html" />
-  
+
 <meta property="og:description" content="Author: Szymon Migacz Performance Tuning Guide is a set of optimizations and best practices which can accelerate training and inference of deep learning models in PyTorch. Presented techniques often can be implemented by changing only a few lines of code and can be applied to a wide range of deep..." />
-  
+
 <meta property="og:image" content="https://tutorials.pytorch.kr/_static/logos/logo-kr-sm-dark.png" />
-  
+
 <meta property="og:image:alt" content="Performance Tuning Guide" />
-  
+
 <meta property="og:ignore_canonical" content="true" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  
+
   <title>Performance Tuning Guide &mdash; 파이토치 한국어 튜토리얼 (PyTorch tutorials in Korean)</title>
-  
 
-  
-  
+
+
+
     <link rel="shortcut icon" href="../../_static/favicon.ico"/>
-  
-  
-  
 
-  
 
-  
-  
-    
 
-  
+
+
+
+
+
+
+
+
 
   <link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
   <!-- <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" /> -->
@@ -52,9 +52,9 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="TorchScript로 배포하기" href="../torchscript_inference.html" />
-    <link rel="prev" title="Automatic Mixed Precision" href="amp_recipe.html" /> 
+    <link rel="prev" title="Automatic Mixed Precision" href="amp_recipe.html" />
+
 
-  
   <script src="../../_static/js/modernizr.min.js"></script>
 
   <!-- Preload the theme fonts -->
@@ -111,9 +111,9 @@
 
 <body class="pytorch-body">
 
-   
 
-    
+
+
 
     <div class="table-of-contents-link-wrapper">
       <span>Table of Contents</span>
@@ -124,21 +124,21 @@
       <div class="pytorch-side-scroll">
         <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
           <div class="pytorch-left-menu-search">
-            
 
-            
-              
-              
+
+
+
+
                 <div class="version">
                   1.12.0+cu102
                 </div>
-              
-            
 
-            
 
 
-  
+
+
+
+
 
 
 <div role="search">
@@ -149,15 +149,15 @@
   </form>
 </div>
 
-            
+
           </div>
 
-          
-            
-            
-              
-            
-            
+
+
+
+
+
+
               <p class="caption" role="heading"><span class="caption-text">파이토치(PyTorch) 레시피</span></p>
 <ul class="current">
 <li class="toctree-l1 current"><a class="reference internal" href="../recipes_index.html">모든 레시피 보기</a></li>
@@ -301,8 +301,8 @@
 <li class="toctree-l1"><a class="reference internal" href="../../advanced/sharding.html">Exploring TorchRec sharding</a></li>
 </ul>
 
-            
-          
+
+
         </div>
       </div>
     </nav>
@@ -310,7 +310,7 @@
     <div class="pytorch-container">
       <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
         <div class="pytorch-breadcrumbs-wrapper">
-          
+
 
 
 
@@ -329,32 +329,32 @@
 <div role="navigation" aria-label="breadcrumbs navigation">
 
   <ul class="pytorch-breadcrumbs">
-    
+
       <li>
         <a href="../../index.html">
-          
+
             Tutorials
-          
+
         </a> &gt;
       </li>
 
-        
+
           <li><a href="../recipes_index.html">PyTorch Recipes</a> &gt;</li>
-        
+
       <li>Performance Tuning Guide</li>
-    
-    
+
+
       <li class="pytorch-breadcrumbs-aside">
-        
-            
+
+
             <a href="../../_sources/recipes/recipes/tuning_guide.rst.txt" rel="nofollow"><img src="../../_static/images/view-page-source-icon.svg"></a>
-          
-        
+
+
       </li>
-    
+
   </ul>
 
-  
+
 </div>
         </div>
 
@@ -366,7 +366,7 @@
       <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
         <div class="pytorch-content-left">
 
-        
+
 
           <div class="pytorch-call-to-action-links">
             <div id="tutorial-type">recipes/recipes/tuning_guide</div>
@@ -388,14 +388,14 @@
             </div>
           </div>
 
-        
 
-          
+
+
           <div class="rst-content">
-          
+
             <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
              <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
-              
+
   <div class="sphx-glr-download-link-note admonition note">
 <p class="admonition-title">Note</p>
 <p>Click <a class="reference internal" href="#sphx-glr-download-recipes-recipes-tuning-guide-py"><span class="std std-ref">here</span></a> to download the full example code</p>
@@ -514,9 +514,9 @@ <h3>Enable channels_last memory format for computer vision models<a class="heade
 <p>Support for <code class="docutils literal notranslate"><span class="pre">channels_last</span></code> is experimental, but it’s expected to work for
 standard computer vision models (e.g. ResNet-50, SSD). To convert models to
 <code class="docutils literal notranslate"><span class="pre">channels_last</span></code> format follow
-<a class="reference external" href="https://pytorch.org/tutorials/intermediate/memory_format_tutorial.html">Channels Last Memory Format Tutorial</a>.
+<a class="reference external" href="https://tutorials.pytorch.kr/intermediate/memory_format_tutorial.html">Channels Last Memory Format Tutorial</a>.
 The tutorial includes a section on
-<a class="reference external" href="https://pytorch.org/tutorials/intermediate/memory_format_tutorial.html#converting-existing-models">converting existing models</a>.</p>
+<a class="reference external" href="https://tutorials.pytorch.kr/intermediate/memory_format_tutorial.html#converting-existing-models">converting existing models</a>.</p>
 </div>
 <div class="section" id="checkpoint-intermediate-buffers">
 <h3>Checkpoint intermediate buffers<a class="headerlink" href="#checkpoint-intermediate-buffers" title="Permalink to this headline">¶</a></h3>
@@ -716,7 +716,7 @@ <h3>Use mixed precision and AMP<a class="headerlink" href="#use-mixed-precision-
 <li><p>native PyTorch AMP is available starting from PyTorch 1.6:
 <a class="reference external" href="https://pytorch.org/docs/stable/amp.html">documentation</a>,
 <a class="reference external" href="https://pytorch.org/docs/stable/notes/amp_examples.html#amp-examples">examples</a>,
-<a class="reference external" href="https://pytorch.org/tutorials/recipes/recipes/amp_recipe.html">tutorial</a></p></li>
+<a class="reference external" href="https://tutorials.pytorch.kr/recipes/recipes/amp_recipe.html">tutorial</a></p></li>
 </ul>
 </li>
 </ul>
@@ -827,21 +827,21 @@ <h3>Load-balance workload in a distributed setting<a class="headerlink" href="#l
 
 
              </article>
-             
+
             </div>
             <footer>
-  
+
     <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
-      
+
         <a href="../torchscript_inference.html" class="btn btn-neutral float-right" title="TorchScript로 배포하기" accesskey="n" rel="next">Next <img src="../../_static/images/chevron-right-orange.svg" class="next-page"></a>
-      
-      
+
+
         <a href="amp_recipe.html" class="btn btn-neutral" title="Automatic Mixed Precision" accesskey="p" rel="prev"><img src="../../_static/images/chevron-right-orange.svg" class="previous-page"> Previous</a>
-      
+
     </div>
-  
 
-  
+
+
 
     <hr class="rating-hr hr-top">
       <div class="rating-container">
@@ -856,7 +856,7 @@ <h3>Load-balance workload in a distributed setting<a class="headerlink" href="#l
       </div>
     <hr class="rating-hr hr-bottom"/>
 
-  
+
 
   <div role="contentinfo">
     <p>
@@ -864,11 +864,11 @@ <h3>Load-balance workload in a distributed setting<a class="headerlink" href="#l
 
     </p>
   </div>
-    
+
       <div>
         Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
       </div>
-     
+
 
 </footer>
 
@@ -932,12 +932,12 @@ <h3>Load-balance workload in a distributed setting<a class="headerlink" href="#l
       </section>
     </div>
 
-  
 
 
-  
-  
-     
+
+
+
+
        <script type="text/javascript" id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
          <script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
          <script src="../../_static/jquery.js"></script>
@@ -948,8 +948,8 @@ <h3>Load-balance workload in a distributed setting<a class="headerlink" href="#l
          <script src="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/katex.min.js"></script>
          <script src="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/contrib/auto-render.min.js"></script>
          <script src="../../_static/katex_autorenderer.js"></script>
-     
-  
+
+
 
   <script type="text/javascript" src="../../_static/js/vendor/popper.min.js"></script>
   <script type="text/javascript" src="../../_static/js/vendor/bootstrap.min.js"></script>
@@ -961,7 +961,7 @@ <h3>Load-balance workload in a distributed setting<a class="headerlink" href="#l
           SphinxRtdTheme.Navigation.enable(true);
       });
   </script>
- 
+
 <script>
 //add microsoft link
 
diff --git a/index.rst b/index.rst
index 47c4f7b99..21ecbf622 100644
--- a/index.rst
+++ b/index.rst
@@ -5,10 +5,14 @@
 
 아래 튜토리얼들이 새로 추가되었습니다.
 
-* `PyTorch Distributed Series </beginner/ddp_series_intro.html?utm_source=whats_new_tutorials&utm_medium=ddp_series_intro>`__
-* `Fast Transformer Inference with Better Transformer </beginner/bettertransformer_tutorial.html?utm_source=whats_new_tutorials&utm_medium=bettertransformer>`__
-* `Advanced model training with Fully Sharded Data Parallel (FSDP) </intermediate/FSDP_adavnced_tutorial.html?utm_source=whats_new_tutorials&utm_medium=FSDP_advanced>`__
-* `Grokking PyTorch Intel CPU Performance from First Principles </intermediate/torchserve_with_ipex.html?utm_source=whats_new_tutorials&utm_medium=torchserve_ipex>`__
+* `Implementing High Performance Transformers with Scaled Dot Product Attention <https://tutorials.pytorch.kr/intermediate/scaled_dot_product_attention_tutorial.html?utm_source=whats_new_tutorials&utm_medium=scaled_dot_product_attention_tutorial>`__
+* `torch.compile Tutorial <https://tutorials.pytorch.kr/intermediate/torch_compile_tutorial.html?utm_source=whats_new_tutorials&utm_medium=torch_compile>`__
+* `Per Sample Gradients <https://tutorials.pytorch.kr/intermediate/per_sample_grads.html?utm_source=whats_new_tutorials&utm_medium=per_sample_grads>`__
+* `Jacobians, Hessians, hvp, vhp, and more: composing function transforms <https://tutorials.pytorch.kr/intermediate/jacobians_hessians.html?utm_source=whats_new_tutorials&utm_medium=jacobians_hessians>`__
+* `Model Ensembling <https://tutorials.pytorch.kr/intermediate/ensembling.html?utm_source=whats_new_tutorials&utm_medium=ensembling>`__
+* `Neural Tangent Kernels <https://tutorials.pytorch.kr/intermediate/neural_tangent_kernels.html?utm_source=whats_new_tutorials&utm_medium=neural_tangent_kernels>`__
+* `Reinforcement Learning (PPO) with TorchRL Tutorial <https://tutorials.pytorch.kr/intermediate/reinforcement_ppo.html?utm_source=whats_new_tutorials&utm_medium=reinforcement_ppo>`__
+* `Changing Default Device <https://tutorials.pytorch.kr/recipes/recipes/changing_default_device.html?utm_source=whats_new_tutorials&utm_medium=changing_default_device>`__
 
 .. raw:: html
 
@@ -270,6 +274,13 @@
    :link: intermediate/reinforcement_q_learning.html
    :tags: Reinforcement-Learning
 
+.. customcarditem::
+   :header: Reinforcement Learning (PPO) with TorchRL
+   :card_description: Learn how to use PyTorch and TorchRL to train a Proximal Policy Optimization agent on the Inverted Pendulum task from Gym.
+   :image: _static/img/invpendulum.gif
+   :link: intermediate/reinforcement_ppo.html
+   :tags: Reinforcement-Learning
+
 .. customcarditem::
    :header: Train a Mario-playing RL Agent
    :card_description: Use PyTorch to train a Double Q-learning agent to play Mario.
@@ -417,6 +428,34 @@
    :link: intermediate/forward_ad_usage.html
    :tags: Frontend-APIs
 
+.. customcarditem::
+   :header: Jacobians, Hessians, hvp, vhp, and more
+   :card_description: Learn how to compute advanced autodiff quantities using torch.func
+   :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png
+   :link: intermediate/jacobians_hessians.html
+   :tags: Frontend-APIs
+
+.. customcarditem::
+   :header: Model Ensembling
+   :card_description: Learn how to ensemble models using torch.vmap
+   :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png
+   :link: intermediate/ensembling.html
+   :tags: Frontend-APIs
+
+.. customcarditem::
+   :header: Per-Sample-Gradients
+   :card_description: Learn how to compute per-sample-gradients using torch.func
+   :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png
+   :link: intermediate/per_sample_grads.html
+   :tags: Frontend-APIs
+
+.. customcarditem::
+   :header: Neural Tangent Kernels
+   :card_description: Learn how to compute neural tangent kernels using torch.func
+   :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png
+   :link: intermediate/neural_tangent_kernels.html
+   :tags: Frontend-APIs
+
 .. Model Optimization
 
 .. customcarditem::
@@ -441,11 +480,11 @@
    :tags: Model-Optimization,Best-Practice
 
 .. customcarditem::
-    :header: Optimizing Vision Transformer Model
-    :card_description: Learn how to use Facebook Data-efficient Image Transformers DeiT and script and optimize it for mobile.
-    :image: _static/img/thumbnails/cropped/mobile.png
-    :link: beginner/vt_tutorial.html
-    :tags: Model-Optimization,Best-Practice,Mobile
+   :header: Optimizing Vision Transformer Model
+   :card_description: Learn how to use Facebook Data-efficient Image Transformers DeiT and script and optimize it for mobile.
+   :image: _static/img/thumbnails/cropped/mobile.png
+   :link: beginner/vt_tutorial.html
+   :tags: Model-Optimization,Best-Practice,Mobile
 
 .. customcarditem::
    :header: Parametrizations Tutorial
@@ -503,13 +542,6 @@
    :link: intermediate/torchserve_with_ipex_2
    :tags: Model-Optimization,Production
 
-.. customcarditem::
-   :header: Introduction to nvFuser
-   :card_description: An introduction to nvFuser
-   :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png
-   :link: intermediate/nvfuser_intro_tutorial.html
-   :tags: Model-Optimization
-
 .. customcarditem::
    :header: Multi-Objective Neural Architecture Search with Ax
    :card_description: Learn how to use Ax to search over architectures find optimal tradeoffs between accuracy and latency.
@@ -517,6 +549,20 @@
    :link: intermediate/ax_multiobjective_nas_tutorial.html
    :tags: Model-Optimization,Best-Practice,Ax,TorchX
 
+.. customcarditem::
+   :header: torch.compile Tutorial
+   :card_description: Speed up your models with minimal code changes using torch.compile, the latest PyTorch compiler solution.
+   :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png
+   :link: intermediate/torch_compile_tutorial.html
+   :tags: Model-Optimization
+
+.. customcarditem::
+   :header: (beta) Implementing High-Performance Transformers with SCALED DOT PRODUCT ATTENTION
+   :card_description: This tutorial explores the new torch.nn.functional.scaled_dot_product_attention and how it can be used to construct Transformer components.
+   :image: _static/img/thumbnails/cropped/pytorch-logo.png
+   :link: intermediate/scaled_dot_product_attention_tutorial.html
+   :tags: Model-Optimization,Attention,Transformer
+
 .. Parallel-and-Distributed-Training
 
 .. customcarditem::
@@ -840,6 +886,7 @@
    :caption: 강화학습
 
    intermediate/reinforcement_q_learning
+   intermediate/reinforcement_ppo
    intermediate/mario_rl_tutorial
 
 .. toctree::
@@ -871,6 +918,10 @@
 
    intermediate/memory_format_tutorial
    intermediate/forward_ad_usage
+   intermediate/jacobians_hessians
+   intermediate/ensembling
+   intermediate/per_sample_grads
+   intermediate/neural_tangent_kernels.py
    advanced/cpp_frontend
    advanced/torch-script-parallelism
    advanced/cpp_autograd
@@ -909,6 +960,8 @@
    intermediate/torchserve_with_ipex_2
    intermediate/nvfuser_intro_tutorial
    intermediate/ax_multiobjective_nas_tutorial
+   intermediate/torch_compile_tutorial
+   intermediate/scaled_dot_product_attention_tutorial
 
 .. toctree::
    :maxdepth: 2
@@ -938,7 +991,7 @@
    :maxdepth: 2
    :includehidden:
    :hidden:
-   :caption: Mobile
+   :caption: 모바일
 
    beginner/deeplabv3_on_ios
    beginner/deeplabv3_on_android
@@ -947,7 +1000,7 @@
    :maxdepth: 2
    :includehidden:
    :hidden:
-   :caption: Recommendation Systems
+   :caption: 추천 시스템
 
    intermediate/torchrec_tutorial
    advanced/sharding
diff --git a/intermediate_source/FSDP_adavnced_tutorial.rst b/intermediate_source/FSDP_adavnced_tutorial.rst
index cce90e878..9df85a19e 100644
--- a/intermediate_source/FSDP_adavnced_tutorial.rst
+++ b/intermediate_source/FSDP_adavnced_tutorial.rst
@@ -10,10 +10,10 @@ Wright <https://github.com/lessw2020>`__, `Rohan Varma
 This tutorial introduces more advanced features of Fully Sharded Data Parallel
 (FSDP) as part of the PyTorch 1.12 release. To get familiar with FSDP, please
 refer to the `FSDP getting started tutorial
-<https://pytorch.org/tutorials/intermediate/FSDP_tutorial.html>`__.
+<https://tutorials.pytorch.kr/intermediate/FSDP_tutorial.html>`__.
 
 In this tutorial, we fine-tune a HuggingFace (HF) T5 model with FSDP for text
-summarization as a working example. 
+summarization as a working example.
 
 The example uses Wikihow and for simplicity, we will showcase the training on a
 single node, P4dn instance with 8 A100 GPUs. We will soon have a blog post on
@@ -24,7 +24,7 @@ FSDP is a production ready package with focus on ease of use, performance, and
 long-term support.  One of the main benefits of FSDP is reducing the memory
 footprint on each GPU. This enables training of larger models with lower total
 memory vs DDP, and leverages the overlap of computation and communication to
-train models efficiently. 
+train models efficiently.
 This reduced memory pressure can be leveraged to either train larger models or
 increase batch size, potentially helping overall training throughput.  You can
 read more about PyTorch FSDP `here
@@ -61,7 +61,7 @@ At a high level FDSP works as follow:
 
 * Run `all_gather` to collect all shards from all ranks to recover the full
   parameter in this FSDP unit Run backward computation
-* Discard non-owned parameters to free memory. 
+* Discard non-owned parameters to free memory.
 * Run reduce_scatter to sync gradients
 
 
@@ -86,7 +86,7 @@ We will install PyTorch nightlies, as some of the features such as activation
 checkpointing is available in nightlies and will be added in next PyTorch
 release after 1.12.
 
-.. code-block:: bash 
+.. code-block:: bash
 
     pip3 install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu113/torch_nightly.html
 
@@ -154,7 +154,7 @@ Next, we add the following code snippets to a Python script “T5_training.py”
     import tqdm
     from datetime import datetime
 
-1.4 Distributed training setup. 
+1.4 Distributed training setup.
 Here we use two helper functions to initialize the processes for distributed
 training,  and then to clean up after training completion.  In this tutorial, we
 are going to use torch elastic, using `torchrun
@@ -191,13 +191,13 @@ metrics.
         date_of_run = datetime.now().strftime("%Y-%m-%d-%I:%M:%S_%p")
         print(f"--> current date and time of run = {date_of_run}")
         return date_of_run
-   
+
     def format_metrics_to_gb(item):
         """quick function to format numbers to gigabyte and round to 4 digit precision"""
         metric_num = item / g_gigabyte
         metric_num = round(metric_num, ndigits=4)
         return metric_num
-    
+
 
 2.2 Define a train function:
 
@@ -275,7 +275,7 @@ metrics.
 
 .. code-block:: python
 
-    
+
     def fsdp_main(args):
 
         model, tokenizer = setup_model("t5-base")
@@ -292,7 +292,7 @@ metrics.
 
 
         #wikihow(tokenizer, type_path, num_samples, input_length, output_length, print_text=False)
-        train_dataset = wikihow(tokenizer, 'train', 1500, 512, 150, False) 
+        train_dataset = wikihow(tokenizer, 'train', 1500, 512, 150, False)
         val_dataset = wikihow(tokenizer, 'validation', 300, 512, 150, False)
 
         sampler1 = DistributedSampler(train_dataset, rank=rank, num_replicas=world_size, shuffle=True)
@@ -430,7 +430,7 @@ metrics.
 
 .. code-block:: python
 
-    
+
     if __name__ == '__main__':
         # Training settings
         parser = argparse.ArgumentParser(description='PyTorch T5 FSDP Example')
@@ -463,7 +463,7 @@ metrics.
 
 To run the the training using torchrun:
 
-.. code-block:: bash 
+.. code-block:: bash
 
     torchrun --nnodes 1 --nproc_per_node 4  T5_training.py
 
@@ -473,7 +473,7 @@ Transformer Wrapping Policy
 ---------------------------
 
 As discussed in the `previous tutorial
-<https://pytorch.org/tutorials/intermediate/FSDP_tutorial.html>`__,
+<https://tutorials.pytorch.kr/intermediate/FSDP_tutorial.html>`__,
 auto_wrap_policy is one of the FSDP features that make it easy to automatically
 shard a given model and put the model, optimizer and gradient shards into
 distinct FSDP units.
@@ -487,7 +487,7 @@ communication efficient.  In PyTorch 1.12, FSDP added this support and now we
 have a wrapping policy for transfomers.
 
 It can be created as follows, where the T5Block represents the T5 transformer
-layer class (holding MHSA and FFN).  
+layer class (holding MHSA and FFN).
 
 
 .. code-block:: python
@@ -499,7 +499,7 @@ layer class (holding MHSA and FFN).
             },
         )
     torch.cuda.set_device(local_rank)
-  
+
 
     model = FSDP(model,
         fsdp_auto_wrap_policy=t5_auto_wrap_policy)
@@ -519,10 +519,10 @@ it can result in significant slowdowns.
 To check if BFloat16 is natively supported, you can use the following :
 
 .. code-block:: python
-    
+
     bf16_ready = (
         torch.version.cuda
-        and torch.cuda.is_bf16_supported() 
+        and torch.cuda.is_bf16_supported()
         and LooseVersion(torch.version.cuda) >= "11.0"
         and dist.is_nccl_available()
         and nccl.version() >= (2, 10)
@@ -571,7 +571,7 @@ with the following policy:
 .. code-block:: bash
 
     grad_bf16 = MixedPrecision(reduce_dtype=torch.bfloat16)
-    
+
 
 In 2.4 we just add the relevant mixed precision policy to the FSDP wrapper:
 
@@ -604,9 +604,9 @@ CPU-based initialization:
             auto_wrap_policy=t5_auto_wrap_policy,
             mixed_precision=bfSixteen,
             device_id=torch.cuda.current_device())
-     
 
-    
+
+
 Sharding Strategy
 -----------------
 FSDP sharding strategy by default is set to fully shard the model parameters,
@@ -627,7 +627,7 @@ instead of "ShardingStrategy.FULL_SHARD" to the FSDP initialization  as follows:
             sharding_strategy=ShardingStrategy.SHARD_GRAD_OP # ZERO2)
 
 This will reduce the communication overhead in FSDP, in this case, it holds full
-parameters after forward and through the backwards pass. 
+parameters after forward and through the backwards pass.
 
 This saves an all_gather during backwards so there is less communication at the
 cost of a higher memory footprint. Note that full model params are freed at the
@@ -652,12 +652,12 @@ wrapper in 2.4 as follows:
             mixed_precision=bfSixteen,
             device_id=torch.cuda.current_device(),
             backward_prefetch = BackwardPrefetch.BACKWARD_PRE)
-            
+
 `backward_prefetch` has two modes, `BACKWARD_PRE` and `BACKWARD_POST`.
 `BACKWARD_POST` means that the next FSDP unit's params will not be requested
 until the current FSDP unit processing is complete, thus minimizing memory
 overhead.  In some cases, using `BACKWARD_PRE` can increase model training speed
-up to 2-10%, with even higher speed improvements noted for larger models. 
+up to 2-10%, with even higher speed improvements noted for larger models.
 
 Model Checkpoint Saving, by streaming to the Rank0 CPU
 ------------------------------------------------------
@@ -696,7 +696,7 @@ Pytorch 1.12 and used HF T5 as the running example.  Using the proper wrapping
 policy especially for transformer models, along with mixed precision and
 backward prefetch should speed up your training runs. Also, features such as
 initializing the model on device, and checkpoint saving via streaming to CPU
-should help to avoid OOM error in dealing with large models. 
+should help to avoid OOM error in dealing with large models.
 
 We are actively working to add new features to FSDP for the next release. If
 you have feedback, feature requests, questions or are encountering issues
diff --git a/intermediate_source/FSDP_tutorial.rst b/intermediate_source/FSDP_tutorial.rst
index d711637ae..d69a03b68 100644
--- a/intermediate_source/FSDP_tutorial.rst
+++ b/intermediate_source/FSDP_tutorial.rst
@@ -4,7 +4,7 @@ Getting Started with Fully Sharded Data Parallel(FSDP)
 **Author**: `Hamid Shojanazeri <https://github.com/HamidShojanazeri>`__, `Yanli Zhao <https://github.com/zhaojuanmao>`__, `Shen Li <https://mrshenli.github.io/>`__
 
 .. note::
-   |edit| View and edit this tutorial in `github <https://github.com/pytorch/tutorials/blob/master/intermediate_source/FSDP_tutorial.rst>`__.
+   |edit| View and edit this tutorial in `github <https://github.com/pytorch/tutorials/blob/main/intermediate_source/FSDP_tutorial.rst>`__.
 
 Training AI models at a large scale is a challenging task that requires a lot of compute power and resources. 
 It also comes with considerable engineering complexity to handle the training of these very large models.
@@ -249,7 +249,7 @@ We add the following code snippets to a python script “FSDP_mnist.py”.
 
         if args.save_model:
             # use a barrier to make sure training is done on all ranks
-            dist_barrier()
+            dist.barrier()
             # state_dict for FSDP model is only available on Nightlies for now
             states = model.state_dict()
             if rank == 0:
diff --git a/intermediate_source/autograd_saved_tensors_hooks_tutorial.py b/intermediate_source/autograd_saved_tensors_hooks_tutorial.py
index 9978f1968..e5690c676 100644
--- a/intermediate_source/autograd_saved_tensors_hooks_tutorial.py
+++ b/intermediate_source/autograd_saved_tensors_hooks_tutorial.py
@@ -12,8 +12,8 @@
 # 패킹/언패킹 절차를 제어하는 hooks을 정의하는 방법을 안내합니다.
 
 # 이 튜토리얼에서는 독자가 역전파가 어떻게 동작하는지를 이론적으로 잘 알고 있다고 가정합니다.
-# 아니라면, 아래의 자료를 먼저 읽어보세요.
-# https://colab.research.google.com/drive/1aWNdmYt7RcHMbUk-Xz2Cv5-cGFSWPXe0#scrollTo=AHcEJ6nXUb7W
+# 아니라면, `이것 <https://colab.research.google.com/drive/1aWNdmYt7RcHMbUk-Xz2Cv5-cGFSWPXe0#scrollTo=AHcEJ6nXUb7W>`_ 을 먼저 읽어보세요.
+#
 
 ######################################################################
 # 저장된 tensor
@@ -67,7 +67,7 @@
 
 
 ######################################################################
-# 이러한 중간 값(위의 주황색)은 접두사 ``_saved``로 시작하는 
+# 이러한 중간 값(위의 주황색)은 접두사 ``_saved``로 시작하는
 # ``y``의 ``grad_fn`` 속성을 찾아 (디버깅 목적으로) 접근 할 수 있습니다.
 
 print(y.grad_fn._saved_self)
@@ -92,14 +92,14 @@ def f(x):
 ######################################################################
 # 위의 예제에서 미분(grad)없이 실행하면 범위내의 ``x`` 와 ``y`` 는 유지되지만
 # 그래프에서는 ``f(x)`` 와 ``f(f(x))`` 가 추가로 저장됩니다.
-# 따라서 훈련 중 정방향 경로를 실행하면 평가중에 
+# 따라서 훈련 중 정방향 경로를 실행하면 평가중에
 # (더 정확하게는 자동미분(auto grad)가 필요하지 않은 경우보다)
 # 메모리 사용량이 더 많아지게 됩니다.
 
 
 ######################################################################
 # 패킹과 언패킹의 개념
-# ~~~~~~~~~~~~~~~~~~~~
+# ~~~~~~~~~~~~~~~~~~~~~~
 
 
 
@@ -132,8 +132,8 @@ def f(x):
 # 그러나 tensor는 동일한 *저장소*를 공유합니다.
 
 ######################################################################
-# 저장된 tensors hooks
-# -------------------
+# 저장된 tensor hooks
+# -----------------------
 
 
 
@@ -185,8 +185,9 @@ def unpack_hook(x):
 # 먼저, 가능은 하지만 바보같아서 누구도 하고 싶어하지 않는 예제 몇가지를 살펴보겠습니다.
 
 ######################################################################
-# **반환 및 int**
- 
+# ``int`` 반환
+# ^^^^^^^^^^^^^^^^^^^^
+#
 # 파이썬 리스트의 인덱스 반환
 # 상대적으로는 위험하진 않지만 논란의 여지가 있는 유용성
 
@@ -207,7 +208,8 @@ def unpack(x):
 assert(x.grad.equal(2 * x))
 
 ######################################################################
-# **튜플의 반환**
+# 튜플(tuple) 반환
+# ^^^^^^^^^^^^^^^^^^^^
 #
 # 일부 tensor와 함수를 반환하고 패킹을 푸는 방법은 이런 형태로는 유용하지 않을 것입니다.
 
@@ -228,10 +230,11 @@ def unpack(packed):
 assert(torch.allclose(x.grad, 2 * x))
 
 ######################################################################
-# **str의 반환**
+# ``str`` 반환
+# ^^^^^^^^^^^^^^^^^^^^
 #
 # tensor의 __repr__ 반환
-# 아마도 이렇게 하지 말 것 
+# 아마도 이렇게는 하지 않을 것입니다.
 
 x = torch.randn(5, requires_grad=True)
 with torch.autograd.graph.saved_tensors_hooks(lambda x: repr(x), lambda x: eval("torch." + x)):
@@ -243,7 +246,7 @@ def unpack(packed):
 ######################################################################
 # 이러한 예제는 실제로 유용하지 않을 것이지만
 # 원래 tensor의 내용을 가져오기에 충분한 정보를 가지고 있다면
-# ``pack_hook`` 의 결과물이 어떤 파이썬 객체라도 
+# ``pack_hook`` 의 결과물이 어떤 파이썬 객체라도
 # 될 수 있음을 보여줍니다.
 # 다음 섹션에서는 더 유용한 응용프로그램에 중점을 두겠습니다.
 
@@ -301,7 +304,7 @@ def forward(self, x):
 
 
 ######################################################################
-# 실제로 A100 GPU에서 배치크기가 256인 resnet-152의 경우 이는 GPU메모리 사용량이
+# 실제로 A100 GPU에서 배치크기가 256인 ResNet-152의 경우 이는 GPU 메모리 사용량이
 # 48G에서 5GB로 줄어들지만, 이는 6배 느려지는 댓가를 치뤄야합니다.
 #
 # 물론 네트워크 특정부분만 CPU에 저장하여 절충안을 조정할 수 있습니다.
diff --git a/intermediate_source/ax_multiobjective_nas_tutorial.py b/intermediate_source/ax_multiobjective_nas_tutorial.py
index 169349281..79b096b9e 100644
--- a/intermediate_source/ax_multiobjective_nas_tutorial.py
+++ b/intermediate_source/ax_multiobjective_nas_tutorial.py
@@ -46,9 +46,9 @@
 # -----------------------
 #
 # Our goal is to optimize the PyTorch Lightning training job defined in
-# `mnist_train_nas.py <https://github.com/pytorch/tutorials/tree/master/intermediate_source/mnist_train_nas.py>`__.
+# `mnist_train_nas.py <https://github.com/pytorch/tutorials/tree/main/intermediate_source/mnist_train_nas.py>`__.
 # To do this using TorchX, we write a helper function that takes in
-# the values of the architcture and hyperparameters of the training
+# the values of the architecture and hyperparameters of the training
 # job and creates a `TorchX AppDef <https://pytorch.org/torchx/latest/basics.html>`__
 # with the appropriate settings.
 #
@@ -72,12 +72,12 @@ def trainer(
     trial_idx: int = -1,
 ) -> specs.AppDef:
 
-    # define the log path so we can pass it to the TorchX AppDef
+    # define the log path so we can pass it to the TorchX ``AppDef``
     if trial_idx >= 0:
         log_path = Path(log_path).joinpath(str(trial_idx)).absolute().as_posix()
 
     return utils.python(
-        # command line args to the training script
+        # command line arguments to the training script
         "--log_path",
         log_path,
         "--hidden_size_1",
@@ -126,15 +126,15 @@ def trainer(
     tracker_base="/tmp/",
     component=trainer,
     # NOTE: To launch this job on a cluster instead of locally you can
-    # specify a different scheduler and adjust args appropriately.
+    # specify a different scheduler and adjust arguments appropriately.
     scheduler="local_cwd",
     component_const_params={"log_path": log_dir},
     cfg={},
 )
 
 ######################################################################
-# Setting up the SearchSpace
-# --------------------------
+# Setting up the ``SearchSpace``
+# ------------------------------
 #
 # First, we define our search space. Ax supports both range parameters
 # of type integer and float as well as choice parameters which can have
@@ -154,7 +154,7 @@ def trainer(
 parameters = [
     # NOTE: In a real-world setting, hidden_size_1 and hidden_size_2
     # should probably be powers of 2, but in our simple example this
-    # would mean that num_params can't take on that many values, which
+    # would mean that ``num_params`` can't take on that many values, which
     # in turn makes the Pareto frontier look pretty weird.
     RangeParameter(
         name="hidden_size_1",
@@ -189,7 +189,7 @@ def trainer(
         upper=0.5,
         parameter_type=ParameterType.FLOAT,
     ),
-    ChoiceParameter(  # NOTE: ChoiceParameters don't require log-scale
+    ChoiceParameter(  # NOTE: ``ChoiceParameters`` don't require log-scale
         name="batch_size",
         values=[32, 64, 128, 256],
         parameter_type=ParameterType.INT,
@@ -212,7 +212,7 @@ def trainer(
 #
 # Ax has the concept of a `Metric <https://ax.dev/api/core.html#metric>`__
 # that defines properties of outcomes and how observations are obtained
-# for these outcomes. This allows e.g. encodig how data is fetched from
+# for these outcomes. This allows e.g. encoding how data is fetched from
 # some distributed execution backend and post-processed before being
 # passed as input to Ax.
 #
@@ -229,7 +229,7 @@ def trainer(
 # index (see the ``trainer()`` function above). We will define a metric
 # class that is aware of that logging directory. By subclassing
 # `TensorboardCurveMetric <https://ax.dev/api/metrics.html?highlight=tensorboardcurvemetric#ax.metrics.tensorboard.TensorboardCurveMetric>`__
-# we get the logic to read and parse the Tensorboard logs for free.
+# we get the logic to read and parse the TensorBoard logs for free.
 #
 
 from ax.metrics.tensorboard import TensorboardCurveMetric
@@ -237,10 +237,10 @@ def trainer(
 
 class MyTensorboardMetric(TensorboardCurveMetric):
 
-    # NOTE: We need to tell the new Tensorboard metric how to get the id /
-    # file handle for the tensorboard logs from a trial. In this case
+    # NOTE: We need to tell the new TensorBoard metric how to get the id /
+    # file handle for the TensorBoard logs from a trial. In this case
     # our convention is to just save a separate file per trial in
-    # the pre-specified log dir.
+    # the prespecified log dir.
     @classmethod
     def get_ids_from_trials(cls, trials):
         return {
@@ -257,9 +257,9 @@ def is_available_while_running(cls):
 
 
 ######################################################################
-# Now we can instatiate the metrics for accuracy and the number of
+# Now we can instantiate the metrics for accuracy and the number of
 # model parameters. Here `curve_name` is the name of the metric in the
-# Tensorboard logs, while `name` is the metric name used internally
+# TensorBoard logs, while `name` is the metric name used internally
 # by Ax. We also specify `lower_is_better` to indicate the favorable
 # direction of the two metrics.
 #
@@ -277,8 +277,8 @@ def is_available_while_running(cls):
 
 
 ######################################################################
-# Setting up the OptimizationConfig
-# ----------------------------------
+# Setting up the ``OptimizationConfig``
+# -------------------------------------
 #
 # The way to tell Ax what it should optimize is by means of an
 # `OptimizationConfig <https://ax.dev/api/core.html#module-ax.core.optimization_config>`__.
@@ -335,8 +335,8 @@ def is_available_while_running(cls):
 )
 
 ######################################################################
-# Choosing the GenerationStrategy
-# -------------------------------
+# Choosing the Generation Strategy
+# --------------------------------
 #
 # A `GenerationStrategy <https://ax.dev/api/modelbridge.html#ax.modelbridge.generation_strategy.GenerationStrategy>`__
 # is the abstract representation of how we would like to perform the
@@ -366,7 +366,7 @@ def is_available_while_running(cls):
 # Configuring the Scheduler
 # -------------------------
 #
-# The `Scheduler` (TODO: link) acts as the loop control for the optimization.
+# The ``Scheduler`` acts as the loop control for the optimization.
 # It communicates with the backend to launch trials, check their status,
 # and retrieve results. In the case of this tutorial, it is simply reading
 # and parsing the locally saved logs. In a remote execution setting,
@@ -404,7 +404,7 @@ def is_available_while_running(cls):
 # ------------------------
 #
 # Now that everything is configured, we can let Ax run the optimization
-# in a fully automated fashion. The Scheduler will periodially check
+# in a fully automated fashion. The Scheduler will periodically check
 # the logs for the status of all currently running trials, and if a
 # trial completes the scheduler will update its status on the
 # experiment and fetch the observations needed for the Bayesian
@@ -479,7 +479,7 @@ def is_available_while_running(cls):
 from ax.plot.diagnostic import interact_cross_validation_plotly
 from ax.utils.notebook.plotting import init_notebook_plotting, render
 
-cv = cross_validate(model=gs.model)  # The surrogate model is stored on the GenerationStrategy
+cv = cross_validate(model=gs.model)  # The surrogate model is stored on the ``GenerationStrategy``
 compute_diagnostics(cv)
 
 interact_cross_validation_plotly(cv)
@@ -508,7 +508,7 @@ def is_available_while_running(cls):
 
 
 ######################################################################
-# Acknowledgements
+# Acknowledgments
 # ----------------
 #
 # We thank the TorchX team (in particular Kiuk Chung and Tristan Rice)
diff --git a/intermediate_source/char_rnn_classification_tutorial.py b/intermediate_source/char_rnn_classification_tutorial.py
index 7a9e7c838..246ba7c76 100644
--- a/intermediate_source/char_rnn_classification_tutorial.py
+++ b/intermediate_source/char_rnn_classification_tutorial.py
@@ -2,6 +2,7 @@
 """
 기초부터 시작하는 NLP: 문자-단위 RNN으로 이름 분류하기
 ********************************************************************************
+
 **Author**: `Sean Robertson <https://github.com/spro/practical-pytorch>`_
   **번역**: `황성수 <https://github.com/adonisues>`_, `김제필 <https://github.com/garlicvread>`_
 
@@ -55,7 +56,7 @@
    `여기 <https://download.pytorch.org/tutorial/data.zip>`__ 에서 데이터를 다운로드 받고
    현재 디렉토리에 압축을 푸십시오.
 
-``data/names`` 디렉토리에는 "[Language].txt" 라는 18 개의 텍스트 파일이 있습니다.
+``data/names`` 디렉토리에는 ``[Language].txt`` 라는 18 개의 텍스트 파일이 있습니다.
 각 파일에는 한 줄에 하나의 이름이 포함되어 있으며 대부분 로마자로 되어 있습니다.
 (그러나, 유니코드에서 ASCII로 변환해야 함).
 
@@ -186,7 +187,7 @@ def lineToTensor(line):
 # <https://tutorials.pytorch.kr/beginner/former_torchies/
 # nnft_tutorial.html#example-2-recurrent-net>`__ 에서 복사함)은
 # 입력 및 은닉 상태로 작동하는 2개의 선형 계층이며,
-# 출력 다음에 LogSoftmax 계층이 있습니다.
+# 출력 다음에 ``LogSoftmax`` 계층이 있습니다.
 #
 # .. figure:: https://i.imgur.com/Z2xbySO.png
 #    :alt:
@@ -378,7 +379,7 @@ def timeSince(since):
     output, loss = train(category_tensor, line_tensor)
     current_loss += loss
 
-    # iter 숫자, 손실, 이름, 추측 화면 출력
+    # ``iter`` 숫자, 손실, 이름, 추측 화면 출력
     if iter % print_every == 0:
         guess, guess_i = categoryFromOutput(output)
         correct = '✓' if guess == category else '✗ (%s)' % category
@@ -501,7 +502,7 @@ def predict(input_line, n_predictions=3):
 # -  ``model.py`` (RNN 정의)
 # -  ``train.py`` (학습 실행)
 # -  ``predict.py`` (커맨드 라인 인자로 ``predict()`` 실행)
-# -  ``server.py`` (bottle.py를 사용하여 JSON API로 예측 제공)
+# -  ``server.py`` (``bottle.py`` 를 사용하여 JSON API로 예측 제공)
 #
 # 학습과 네트워크 저장을 위해 ``train.py`` 실행.
 #
diff --git a/intermediate_source/char_rnn_generation_tutorial.py b/intermediate_source/char_rnn_generation_tutorial.py
index fff80b17a..0d486f865 100644
--- a/intermediate_source/char_rnn_generation_tutorial.py
+++ b/intermediate_source/char_rnn_generation_tutorial.py
@@ -2,6 +2,7 @@
 """
 기초부터 시작하는 NLP:  문자-단위 RNN으로 이름 생성하기
 ********************************************************************************
+
 **Author**: `Sean Robertson <https://github.com/spro/practical-pytorch>`_
   **번역**: `황성수 <https://github.com/adonisues>`_
 
@@ -229,7 +230,7 @@ def inputTensor(line):
         tensor[li][0][all_letters.find(letter)] = 1
     return tensor
 
-# 목표를 위한 두번째 문자 부터 마지막(EOS) 까지의 LongTensor
+# 목표를 위한 두번째 문자 부터 마지막(EOS)까지의 ``LongTensor``
 def targetTensor(line):
     letter_indexes = [all_letters.find(line[li]) for li in range(1, len(line))]
     letter_indexes.append(n_letters - 1) # EOS
@@ -315,7 +316,7 @@ def timeSince(since):
 print_every = 5000
 plot_every = 500
 all_losses = []
-total_loss = 0 # plot_every 마다 초기화
+total_loss = 0 # ``plot_every`` 마다 초기화
 
 start = time.time()
 
@@ -421,6 +422,6 @@ def samples(category, start_letters='ABC'):
 #    choosing a start letter
 # -  Get better results with a bigger and/or better shaped network
 #
-#    -  Try the nn.LSTM and nn.GRU layers
-#    -  상위 수준 네트워크로 여러 개의 이런 RNN을  결합해 보십시오
+#    -  Try the ``nn.LSTM`` and ``nn.GRU`` layers
+#    -  상위 수준 네트워크로 여러 개의 이런 RNN을 결합해 보십시오
 #
diff --git a/intermediate_source/custom_function_conv_bn_tutorial.py b/intermediate_source/custom_function_conv_bn_tutorial.py
index 8838fc0d3..a9fcd8838 100644
--- a/intermediate_source/custom_function_conv_bn_tutorial.py
+++ b/intermediate_source/custom_function_conv_bn_tutorial.py
@@ -35,7 +35,7 @@
 For simplicity, in this tutorial we hardcode `bias=False`, `stride=1`, `padding=0`, `dilation=1`,
 and `groups=1` for Conv2D. For BatchNorm2D, we hardcode `eps=1e-3`, `momentum=0.1`,
 `affine=False`, and `track_running_statistics=False`. Another small difference
-is that we add epsilon in the denomator outside of the square root in the computation
+is that we add epsilon in the denominator outside of the square root in the computation
 of batch norm.
 
 [0] https://nenadmarkus.com/p/fusing-batchnorm-and-conv/
@@ -72,7 +72,7 @@ def backward(ctx, grad_out):
         return convolution_backward(grad_out, X, weight)
 
 ######################################################################
-# When testing with gradcheck, it is important to use double precision
+# When testing with ``gradcheck``, it is important to use double precision
 weight = torch.rand(5, 3, 3, 3, requires_grad=True, dtype=torch.double)
 X = torch.rand(10, 3, 7, 7, requires_grad=True, dtype=torch.double)
 torch.autograd.gradcheck(Conv2D.apply, (X, weight))
@@ -80,38 +80,38 @@ def backward(ctx, grad_out):
 ######################################################################
 # Backward Formula Implementation for Batch Norm
 # -------------------------------------------------------------------
-# Batch Norm has two modes: training and eval mode. In training mode
-# the sample statistics are a function of the inputs. In eval mode,
+# Batch Norm has two modes: training and ``eval`` mode. In training mode
+# the sample statistics are a function of the inputs. In ``eval`` mode,
 # we use the saved running statistics, which are not a function of the inputs.
 # This makes non-training mode's backward significantly simpler. Below
 # we implement and test only the training mode case.
 def unsqueeze_all(t):
-    # Helper function to unsqueeze all the dimensions that we reduce over
+    # Helper function to ``unsqueeze`` all the dimensions that we reduce over
     return t[None, :, None, None]
 
 def batch_norm_backward(grad_out, X, sum, sqrt_var, N, eps):
-    # We use the formula: out = (X - mean(X)) / (sqrt(var(X)) + eps)
-    # in batch norm 2d's forward. To simplify our derivation, we follow the
+    # We use the formula: ``out = (X - mean(X)) / (sqrt(var(X)) + eps)``
+    # in batch norm 2D forward. To simplify our derivation, we follow the
     # chain rule and compute the gradients as follows before accumulating
     # them all into a final grad_input.
-    #  1) 'grad of out wrt var(X)' * 'grad of var(X) wrt X'
-    #  2) 'grad of out wrt mean(X)' * 'grad of mean(X) wrt X'
-    #  3) 'grad of out wrt X in the numerator' * 'grad of X wrt X'
+    #  1) ``grad of out wrt var(X)`` * ``grad of var(X) wrt X``
+    #  2) ``grad of out wrt mean(X)`` * ``grad of mean(X) wrt X``
+    #  3) ``grad of out wrt X in the numerator`` * ``grad of X wrt X``
     # We then rewrite the formulas to use as few extra buffers as possible
     tmp = ((X - unsqueeze_all(sum) / N) * grad_out).sum(dim=(0, 2, 3))
     tmp *= -1
-    d_denom = tmp / (sqrt_var + eps)**2  # d_denom = -num / denom**2
-    # It is useful to delete tensors when you no longer need them with `del`
-    # For example, we could've done `del tmp` here because we won't use it later
-    # In this case, it's not a big difference because tmp only has size of (C,)
+    d_denom = tmp / (sqrt_var + eps)**2  # ``d_denom = -num / denom**2``
+    # It is useful to delete tensors when you no longer need them with ``del``
+    # For example, we could've done ``del tmp`` here because we won't use it later
+    # In this case, it's not a big difference because ``tmp`` only has size of (C,)
     # The important thing is avoid allocating NCHW-sized tensors unnecessarily
-    d_var = d_denom / (2 * sqrt_var)  # denom = torch.sqrt(var) + eps
-    # Compute d_mean_dx before allocating the final NCHW-sized grad_input buffer
+    d_var = d_denom / (2 * sqrt_var)  # ``denom = torch.sqrt(var) + eps``
+    # Compute ``d_mean_dx`` before allocating the final NCHW-sized grad_input buffer
     d_mean_dx = grad_out / unsqueeze_all(sqrt_var + eps)
     d_mean_dx = unsqueeze_all(-d_mean_dx.sum(dim=(0, 2, 3)) / N)
-    # d_mean_dx has already been reassigned to a C-sized buffer so no need to worry
+    # ``d_mean_dx`` has already been reassigned to a C-sized buffer so no need to worry
 
-    # (1) unbiased_var(x) = ((X - unsqueeze_all(mean))**2).sum(dim=(0, 2, 3)) / (N - 1)
+    # ``(1) unbiased_var(x) = ((X - unsqueeze_all(mean))**2).sum(dim=(0, 2, 3)) / (N - 1)``
     grad_input = X * unsqueeze_all(d_var * N)
     grad_input += unsqueeze_all(-d_var * sum)
     grad_input *= 2 / ((N - 1) * N)
@@ -120,13 +120,13 @@ def batch_norm_backward(grad_out, X, sum, sqrt_var, N, eps):
     # (3) Add 'grad_out / <factor>' without allocating an extra buffer
     grad_input *= unsqueeze_all(sqrt_var + eps)
     grad_input += grad_out
-    grad_input /= unsqueeze_all(sqrt_var + eps)  # sqrt_var + eps > 0!
+    grad_input /= unsqueeze_all(sqrt_var + eps)  # ``sqrt_var + eps > 0!``
     return grad_input
 
 class BatchNorm(torch.autograd.Function):
     @staticmethod
     def forward(ctx, X, eps=1e-3):
-        # Don't save keepdim'd values for backward
+        # Don't save ``keepdim`` values for backward
         sum = X.sum(dim=(0, 2, 3))
         var = X.var(unbiased=True, dim=(0, 2, 3))
         N = X.numel() / X.size(1)
@@ -149,7 +149,7 @@ def backward(ctx, grad_out):
         return batch_norm_backward(grad_out, X, ctx.sum, ctx.sqrt_var, ctx.N, ctx.eps)
 
 ######################################################################
-# Testing with gradcheck
+# Testing with ``gradcheck``
 a = torch.rand(1, 2, 3, 4, requires_grad=True, dtype=torch.double)
 torch.autograd.gradcheck(BatchNorm.apply, (a,), fast_mode=False)
 
@@ -228,7 +228,7 @@ def reset_parameters(self) -> None:
         nn.init.kaiming_uniform_(self.conv_weight, a=math.sqrt(5))
 
 ######################################################################
-# Use gradcheck to validate the correctness of our backward formula
+# Use ``gradcheck`` to validate the correctness of our backward formula
 weight = torch.rand(5, 3, 3, 3, requires_grad=True, dtype=torch.double)
 X = torch.rand(2, 3, 4, 4, requires_grad=True, dtype=torch.double)
 torch.autograd.gradcheck(FusedConvBN2DFunction.apply, (X, weight))
@@ -236,7 +236,7 @@ def reset_parameters(self) -> None:
 ######################################################################
 # Testing out our new Layer
 # -------------------------------------------------------------------
-# Use FusedConvBN to train a basic network
+# Use ``FusedConvBN`` to train a basic network
 # The code below is after some light modifications to the example here:
 # https://github.com/pytorch/examples/tree/master/mnist
 import torch.optim as optim
@@ -350,20 +350,20 @@ def test(model, device, test_loader):
 ######################################################################
 # A Comparison of Memory Usage
 # -------------------------------------------------------------------
-# If cuda is enabled, print out memory usage for both `fused=True` and `fused=False`
-# For an example run on RTX 3070, CuDNN 8.0.5: fused peak memory: 1.56GB,
+# If CUDA is enabled, print out memory usage for both `fused=True` and `fused=False`
+# For an example run on NVIDIA GeForce RTX 3070, NVIDIA CUDA® Deep Neural Network library (cuDNN) 8.0.5: fused peak memory: 1.56GB,
 # unfused peak memory: 2.68GB
 #
 # It is important to note that the *peak* memory usage for this model may vary depending
-# the specific CuDNN convolution algorithm used. For shallower models, it
+# the specific cuDNN convolution algorithm used. For shallower models, it
 # may be possible for the peak memory allocated of the fused model to exceed
 # that of the unfused model! This is because the memory allocated to compute
-# certain CuDNN convolution algorithms can be high enough to "hide" the typical peak
+# certain cuDNN convolution algorithms can be high enough to "hide" the typical peak
 # you would expect to be near the start of the backward pass.
 #
 # For this reason, we also record and display the memory allocated at the end
 # of the forward pass as an approximation, and to demonstrate that we indeed
-# allocate one fewer buffer per fused conv-bn pair.
+# allocate one fewer buffer per fused ``conv-bn`` pair.
 from statistics import mean
 
 torch.backends.cudnn.enabled = True
@@ -384,7 +384,7 @@ def test(model, device, test_loader):
             scheduler.step()
         peak_memory_allocated.append(torch.cuda.max_memory_allocated())
         torch.cuda.reset_peak_memory_stats()
-    print("CuDNN version:", torch.backends.cudnn.version())
+    print("cuDNN version:", torch.backends.cudnn.version())
     print()
     print("Peak memory allocated:")
     print(f"fused: {peak_memory_allocated[0]/1024**3:.2f}GB, unfused: {peak_memory_allocated[1]/1024**3:.2f}GB")
diff --git a/intermediate_source/ddp_series_minGPT.rst b/intermediate_source/ddp_series_minGPT.rst
index 0493db8d6..1d1f809e4 100644
--- a/intermediate_source/ddp_series_minGPT.rst
+++ b/intermediate_source/ddp_series_minGPT.rst
@@ -48,9 +48,9 @@ Files used for training
 ~~~~~~~~~~~~~~~~~~~~~~~~
 - `trainer.py <https://github.com/pytorch/examples/blob/main/distributed/minGPT-ddp/mingpt/trainer.py>`__ includes the Trainer class that runs the distributed training iterations on the model with the provided dataset.
 - `model.py <https://github.com/pytorch/examples/blob/main/distributed/minGPT-ddp/mingpt/model.py>`__ defines the model architecture.
-- `char_dataset.py <https://github.com/pytorch/examples/blob/main/distributed/minGPT-ddp/mingpt/char_dataset.py>`__ contains the `Dataset`class for a character-level dataset.
+- `char_dataset.py <https://github.com/pytorch/examples/blob/main/distributed/minGPT-ddp/mingpt/char_dataset.py>`__ contains the ``Dataset`` class for a character-level dataset.
 - `gpt2_train_cfg.yaml <https://github.com/pytorch/examples/blob/main/distributed/minGPT-ddp/mingpt/gpt2_train_cfg.yaml>`__ contains the configurations for data, model, optimizer, and training run.
-- `main.py <https://github.com/pytorch/examples/blob/main/distributed/minGPT-ddp/mingpt/main.py>`__ is the entry point to the trainig job. It sets up the DDP process group, reads all the configurations and runs the training job.
+- `main.py <https://github.com/pytorch/examples/blob/main/distributed/minGPT-ddp/mingpt/main.py>`__ is the entry point to the training job. It sets up the DDP process group, reads all the configurations and runs the training job.
 
 
 Saving and Loading from the cloud
@@ -72,8 +72,8 @@ A typical training run's memory footprint consists of model weights, activations
 Since DDP replicates the model on each GPU, it only works when GPUs have sufficient capacity to accomodate the full footprint. 
 When models grow larger, more aggressive techniques might be useful:
 
-   -  `activation checkpointing <https://pytorch.org/docs/stable/checkpoint.html>`__: Instead of saving intermediate activations during the forward pass, the activations are recomputed during the backward pass. In this approach, we run more compute but save on memory footprint.
-   -  `Fully-Sharded Data Parallel <https://pytorch.org/docs/stable/fsdp.html>`__: Here the model is not replicated but "sharded" across all the GPUs, and computation is overlapped with communication in the forward and backward passes. Read our `blog <https://medium.com/pytorch/training-a-1-trillion-parameter-model-with-pytorch-fully-sharded-data-parallel-on-aws-3ac13aa96cff>`__ to learn how we trained a 1 Trillion parameter model with FSDP.
+-  `activation checkpointing <https://pytorch.org/docs/stable/checkpoint.html>`__: Instead of saving intermediate activations during the forward pass, the activations are recomputed during the backward pass. In this approach, we run more compute but save on memory footprint.
+-  `Fully-Sharded Data Parallel <https://pytorch.org/docs/stable/fsdp.html>`__: Here the model is not replicated but "sharded" across all the GPUs, and computation is overlapped with communication in the forward and backward passes. Read our `blog <https://medium.com/pytorch/training-a-1-trillion-parameter-model-with-pytorch-fully-sharded-data-parallel-on-aws-3ac13aa96cff>`__ to learn how we trained a 1 Trillion parameter model with FSDP.
 
 
 Further Reading
diff --git a/intermediate_source/ddp_series_multinode.rst b/intermediate_source/ddp_series_multinode.rst
index f76383669..721c5580f 100644
--- a/intermediate_source/ddp_series_multinode.rst
+++ b/intermediate_source/ddp_series_multinode.rst
@@ -38,7 +38,7 @@ Follow along with the video below or on `youtube <https://www.youtube.com/watch/
 Multinode training involves deploying a training job across several
 machines. There are two ways to do this:
 
--  running a torchrun command on each machine with identical rendezvous arguments, or
+-  running a ``torchrun`` command on each machine with identical rendezvous arguments, or
 -  deploying it on a compute cluster using a workload manager (like SLURM)
 
 In this video we will go over the (minimal) code changes required to move from single-node multigpu to
@@ -50,7 +50,7 @@ on 4 GPUs on a single node will be faster than running it on 4 nodes with 1 GPU
 Local and Global ranks
 ~~~~~~~~~~~~~~~~~~~~~~~~
 In single-node settings, we were tracking the 
-``gpu_id``s of the devices running our training processes. ``torchrun`` tracks this value in an environment variable ``LOCAL_RANK``
+``gpu_id`` of each device running our training process. ``torchrun`` tracks this value in an environment variable ``LOCAL_RANK``
 which uniquely identifies each GPU-process on a node. For a unique identifier across all the nodes, ``torchrun`` provides another variable
 ``RANK`` which refers to the global rank of a process.
 
diff --git a/intermediate_source/ddp_tutorial.rst b/intermediate_source/ddp_tutorial.rst
index 5af4c3cf0..9bd4a2ecf 100644
--- a/intermediate_source/ddp_tutorial.rst
+++ b/intermediate_source/ddp_tutorial.rst
@@ -1,5 +1,6 @@
 분산 데이터 병렬 처리 시작하기
 ===================================
+
 **저자**: `Shen Li <https://mrshenli.github.io/>`_
 
 **감수**: `Joe Zhu <https://github.com/gunandrose4u>`_
@@ -7,7 +8,7 @@
 **번역**: `조병근 <https://github.com/Jo-byung-geun>`_
 
 .. note::
-   |edit| 이 튜토리얼의 소스 코드는 `GitHub <https://github.com/pytorch/tutorials/blob/master/intermediate_source/ddp_tutorial.rst>`__ 에서 확인하고 변경해 볼 수 있습니다.
+   |edit| 이 튜토리얼의 소스 코드는 `GitHub <https://github.com/pytorch/tutorials/blob/main/intermediate_source/ddp_tutorial.rst>`__ 에서 확인하고 변경해 볼 수 있습니다.
 
 선수과목(Prerequisites):
 
diff --git a/intermediate_source/dist_pipeline_parallel_tutorial.rst b/intermediate_source/dist_pipeline_parallel_tutorial.rst
index cac570056..5700a317f 100644
--- a/intermediate_source/dist_pipeline_parallel_tutorial.rst
+++ b/intermediate_source/dist_pipeline_parallel_tutorial.rst
@@ -3,13 +3,13 @@ Distributed Pipeline Parallelism Using RPC
 **Author**: `Shen Li <https://mrshenli.github.io/>`_
 
 .. note::
-   |edit| View and edit this tutorial in `github <https://github.com/pytorch/tutorials/blob/master/intermediate_source/dist_pipeline_parallel_tutorial.rst>`__.
+   |edit| View and edit this tutorial in `github <https://github.com/pytorch/tutorials/blob/main/intermediate_source/dist_pipeline_parallel_tutorial.rst>`__.
 
 Prerequisites:
 
 -  `PyTorch Distributed Overview <../beginner/dist_overview.html>`__
--  `Single-Machine Model Parallel Best Practices <https://pytorch.org/tutorials/intermediate/model_parallel_tutorial.html>`__
--  `Getting started with Distributed RPC Framework <https://pytorch.org/tutorials/intermediate/rpc_tutorial.html>`__
+-  `Single-Machine Model Parallel Best Practices <https://tutorials.pytorch.kr/intermediate/model_parallel_tutorial.html>`__
+-  `Getting started with Distributed RPC Framework <https://tutorials.pytorch.kr/intermediate/rpc_tutorial.html>`__
 -  RRef helper functions:
    `RRef.rpc_sync() <https://pytorch.org/docs/master/rpc.html#torch.distributed.rpc.RRef.rpc_sync>`__,
    `RRef.rpc_async() <https://pytorch.org/docs/master/rpc.html#torch.distributed.rpc.RRef.rpc_async>`__, and
diff --git a/intermediate_source/dist_tuto.rst b/intermediate_source/dist_tuto.rst
index 184f5a2be..4a0c7e86d 100644
--- a/intermediate_source/dist_tuto.rst
+++ b/intermediate_source/dist_tuto.rst
@@ -4,7 +4,7 @@ PyTorch로 분산 어플리케이션 개발하기
   **번역**: `박정환 <https://github.com/9bow>`_
 
 .. note::
-   |edit| 이 튜토리얼의 소스 코드는 `GitHub <https://github.com/pytorch/tutorials/blob/master/intermediate_source/dist_tuto.rst>`__ 에서 확인하고 변경해 볼 수 있습니다.
+   |edit| 이 튜토리얼의 소스 코드는 `GitHub <https://github.com/pytorch/tutorials/blob/main/intermediate_source/dist_tuto.rst>`__ 에서 확인하고 변경해 볼 수 있습니다.
 
 선수과목(Prerequisites):
 
@@ -416,7 +416,7 @@ PyTorch에는 현재 ``dist.all_reduce(tensor, op, group)`` 외에도 6개의 
    이해합니다.
 
 통신 백엔드(Communication Backends)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 ``torch.distributed`` 의 가장 우아한 면 중 하나는 다른 백엔드를 기반으로 추상화하고
 구축하는 기능입니다. 앞에서 언급한 것처럼 현재 PyTorch에는 Gloo, NCLL 및 MPI의
diff --git a/intermediate_source/ensembling.py b/intermediate_source/ensembling.py
new file mode 100644
index 000000000..8102b7bc1
--- /dev/null
+++ b/intermediate_source/ensembling.py
@@ -0,0 +1,175 @@
+# -*- coding: utf-8 -*-
+"""
+Model ensembling
+================
+
+This tutorial illustrates how to vectorize model ensembling using ``torch.vmap``.
+
+What is model ensembling?
+-------------------------
+Model ensembling combines the predictions from multiple models together.
+Traditionally this is done by running each model on some inputs separately
+and then combining the predictions. However, if you're running models with
+the same architecture, then it may be possible to combine them together
+using ``torch.vmap``. ``vmap`` is a function transform that maps functions across
+dimensions of the input tensors. One of its use cases is eliminating
+for-loops and speeding them up through vectorization.
+
+Let's demonstrate how to do this using an ensemble of simple MLPs.
+
+.. note::
+
+   This tutorial requires PyTorch 2.0.0 or later.
+"""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+torch.manual_seed(0)
+
+# Here's a simple MLP
+class SimpleMLP(nn.Module):
+    def __init__(self):
+        super(SimpleMLP, self).__init__()
+        self.fc1 = nn.Linear(784, 128)
+        self.fc2 = nn.Linear(128, 128)
+        self.fc3 = nn.Linear(128, 10)
+
+    def forward(self, x):
+        x = x.flatten(1)
+        x = self.fc1(x)
+        x = F.relu(x)
+        x = self.fc2(x)
+        x = F.relu(x)
+        x = self.fc3(x)
+        return x
+
+######################################################################
+# Let’s generate a batch of dummy data and pretend that we’re working with
+# an MNIST dataset. Thus, the dummy images are 28 by 28, and we have a
+# minibatch of size 64. Furthermore, lets say we want to combine the predictions
+# from 10 different models.
+
+device = 'cuda'
+num_models = 10
+
+data = torch.randn(100, 64, 1, 28, 28, device=device)
+targets = torch.randint(10, (6400,), device=device)
+
+models = [SimpleMLP().to(device) for _ in range(num_models)]
+
+######################################################################
+# We have a couple of options for generating predictions. Maybe we want to
+# give each model a different randomized minibatch of data. Alternatively,
+# maybe we want to run the same minibatch of data through each model (e.g.
+# if we were testing the effect of different model initializations).
+
+######################################################################
+# Option 1: different minibatch for each model
+
+minibatches = data[:num_models]
+predictions_diff_minibatch_loop = [model(minibatch) for model, minibatch in zip(models, minibatches)]
+
+######################################################################
+# Option 2: Same minibatch
+
+minibatch = data[0]
+predictions2 = [model(minibatch) for model in models]
+
+######################################################################
+# Using ``vmap`` to vectorize the ensemble
+# ------------------------------------
+#
+# Let's use ``vmap`` to speed up the for-loop. We must first prepare the models
+# for use with ``vmap``.
+#
+# First, let’s combine the states of the model together by stacking each
+# parameter. For example, ``model[i].fc1.weight`` has shape ``[784, 128]``; we are
+# going to stack the ``.fc1.weight`` of each of the 10 models to produce a big
+# weight of shape ``[10, 784, 128]``.
+#
+# PyTorch offers the ``torch.func.stack_module_state`` convenience function to do
+# this.
+from torch.func import stack_module_state
+
+params, buffers = stack_module_state(models)
+
+######################################################################
+# Next, we need to define a function to ``vmap`` over. The function should,
+# given parameters and buffers and inputs, run the model using those
+# parameters, buffers, and inputs. We'll use ``torch.func.functional_call``
+# to help out:
+
+from torch.func import functional_call
+import copy
+
+# Construct a "stateless" version of one of the models. It is "stateless" in
+# the sense that the parameters are meta Tensors and do not have storage.
+base_model = copy.deepcopy(models[0])
+base_model = base_model.to('meta')
+
+def fmodel(params, buffers, x):
+    return functional_call(base_model, (params, buffers), (x,))
+
+######################################################################
+# Option 1: get predictions using a different minibatch for each model.
+#
+# By default, ``vmap`` maps a function across the first dimension of all inputs to
+# the passed-in function. After using ``stack_module_state``, each of
+# the ``params`` and buffers have an additional dimension of size 'num_models' at
+# the front, and minibatches has a dimension of size 'num_models'.
+
+print([p.size(0) for p in params.values()]) # show the leading 'num_models' dimension
+
+assert minibatches.shape == (num_models, 64, 1, 28, 28) # verify minibatch has leading dimension of size 'num_models'
+
+from torch import vmap
+
+predictions1_vmap = vmap(fmodel)(params, buffers, minibatches)
+
+# verify the ``vmap`` predictions match the
+assert torch.allclose(predictions1_vmap, torch.stack(predictions_diff_minibatch_loop), atol=1e-3, rtol=1e-5)
+
+######################################################################
+# Option 2: get predictions using the same minibatch of data.
+#
+# ``vmap`` has an ``in_dims`` argument that specifies which dimensions to map over.
+# By using ``None``, we tell ``vmap`` we want the same minibatch to apply for all of
+# the 10 models.
+
+predictions2_vmap = vmap(fmodel, in_dims=(0, 0, None))(params, buffers, minibatch)
+
+assert torch.allclose(predictions2_vmap, torch.stack(predictions2), atol=1e-3, rtol=1e-5)
+
+######################################################################
+# A quick note: there are limitations around what types of functions can be
+# transformed by ``vmap``. The best functions to transform are ones that are pure
+# functions: a function where the outputs are only determined by the inputs
+# that have no side effects (e.g. mutation). ``vmap`` is unable to handle mutation
+# of arbitrary Python data structures, but it is able to handle many in-place
+# PyTorch operations.
+
+######################################################################
+# Performance
+# -----------
+# Curious about performance numbers? Here's how the numbers look.
+
+from torch.utils.benchmark import Timer
+without_vmap = Timer(
+    stmt="[model(minibatch) for model, minibatch in zip(models, minibatches)]",
+    globals=globals())
+with_vmap = Timer(
+    stmt="vmap(fmodel)(params, buffers, minibatches)",
+    globals=globals())
+print(f'Predictions without vmap {without_vmap.timeit(100)}')
+print(f'Predictions with vmap {with_vmap.timeit(100)}')
+
+######################################################################
+# There's a large speedup using ``vmap``!
+#
+# In general, vectorization with ``vmap`` should be faster than running a function
+# in a for-loop and competitive with manual batching. There are some exceptions
+# though, like if we haven’t implemented the ``vmap`` rule for a particular
+# operation or if the underlying kernels weren’t optimized for older hardware
+# (GPUs). If you see any of these cases, please let us know by opening an issue
+# on GitHub.
diff --git a/intermediate_source/flask_rest_api_tutorial.py b/intermediate_source/flask_rest_api_tutorial.py
index 54e3c91d3..a2504bb12 100644
--- a/intermediate_source/flask_rest_api_tutorial.py
+++ b/intermediate_source/flask_rest_api_tutorial.py
@@ -158,8 +158,8 @@ def transform_image(image_bytes):
 
 from torchvision import models
 
-# 이미 학습된 가중치를 사용하기 위해 `pretrained` 에 `True` 값을 전달합니다:
-model = models.densenet121(pretrained=True)
+# 이미 학습된 가중치를 사용하기 위해 `weights` 에 `IMAGENET1K_V1` 값을 전달합니다:
+model = models.densenet121(weights='IMAGENET1K_V1')
 # 모델을 추론에만 사용할 것이므로, `eval` 모드로 변경합니다:
 model.eval()
 
@@ -262,7 +262,7 @@ def get_prediction(image_bytes):
 #
 #   app = Flask(__name__)
 #   imagenet_class_index = json.load(open('<PATH/TO/.json/FILE>/imagenet_class_index.json'))
-#   model = models.densenet121(pretrained=True)
+#   model = models.densenet121(weights='IMAGENET1K_V1')
 #   model.eval()
 #
 #
@@ -310,10 +310,10 @@ def get_prediction(image_bytes):
 #
 # .. code-block:: python
 #
-#   import requests
+#    import requests
 #
-#   resp = requests.post("http://localhost:5000/predict",
-#                        files={"file": open('<PATH/TO/.jpg/FILE>/cat.jpg','rb')})
+#    resp = requests.post("http://localhost:5000/predict",
+#                         files={"file": open('<PATH/TO/.jpg/FILE>/cat.jpg','rb')})
 
 #######################################################################
 # `resp.json()` 을 호출하면 다음과 같은 결과를 출력합니다:
diff --git a/intermediate_source/forced_alignment_with_torchaudio_tutorial.py b/intermediate_source/forced_alignment_with_torchaudio_tutorial.py
deleted file mode 100644
index 1a5e8025d..000000000
--- a/intermediate_source/forced_alignment_with_torchaudio_tutorial.py
+++ /dev/null
@@ -1,528 +0,0 @@
-"""
-Forced Alignment with Wav2Vec2
-==============================
-
-**Author** `Moto Hira <moto@fb.com>`__
-
-This tutorial shows how to align transcript to speech with
-``torchaudio``, using CTC segmentation algorithm described in
-`CTC-Segmentation of Large Corpora for German End-to-end Speech
-Recognition <https://arxiv.org/abs/2007.09127>`__.
-
-"""
-
-import torch
-import torchaudio
-
-print(torch.__version__)
-print(torchaudio.__version__)
-
-
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-print(device)
-
-
-######################################################################
-# Overview
-# --------
-#
-# The process of alignment looks like the following.
-#
-# 1. Estimate the frame-wise label probability from audio waveform
-# 2. Generate the trellis matrix which represents the probability of
-#    labels aligned at time step.
-# 3. Find the most likely path from the trellis matrix.
-#
-# In this example, we use ``torchaudio``\ ’s ``Wav2Vec2`` model for
-# acoustic feature extraction.
-#
-
-
-######################################################################
-# Preparation
-# -----------
-#
-# First we import the necessary packages, and fetch data that we work on.
-#
-
-# %matplotlib inline
-
-from dataclasses import dataclass
-
-import IPython
-import matplotlib
-import matplotlib.pyplot as plt
-
-matplotlib.rcParams["figure.figsize"] = [16.0, 4.8]
-
-torch.random.manual_seed(0)
-
-SPEECH_FILE = torchaudio.utils.download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
-
-
-######################################################################
-# Generate frame-wise label probability
-# -------------------------------------
-#
-# The first step is to generate the label class porbability of each aduio
-# frame. We can use a Wav2Vec2 model that is trained for ASR. Here we use
-# :py:func:`torchaudio.pipelines.WAV2VEC2_ASR_BASE_960H`.
-#
-# ``torchaudio`` provides easy access to pretrained models with associated
-# labels.
-#
-# .. note::
-#
-#    In the subsequent sections, we will compute the probability in
-#    log-domain to avoid numerical instability. For this purpose, we
-#    normalize the ``emission`` with :py:func:`torch.log_softmax`.
-#
-
-bundle = torchaudio.pipelines.WAV2VEC2_ASR_BASE_960H
-model = bundle.get_model().to(device)
-labels = bundle.get_labels()
-with torch.inference_mode():
-    waveform, _ = torchaudio.load(SPEECH_FILE)
-    emissions, _ = model(waveform.to(device))
-    emissions = torch.log_softmax(emissions, dim=-1)
-
-emission = emissions[0].cpu().detach()
-
-################################################################################
-# Visualization
-################################################################################
-print(labels)
-plt.imshow(emission.T)
-plt.colorbar()
-plt.title("Frame-wise class probability")
-plt.xlabel("Time")
-plt.ylabel("Labels")
-plt.show()
-
-
-######################################################################
-# Generate alignment probability (trellis)
-# ----------------------------------------
-#
-# From the emission matrix, next we generate the trellis which represents
-# the probability of transcript labels occur at each time frame.
-#
-# Trellis is 2D matrix with time axis and label axis. The label axis
-# represents the transcript that we are aligning. In the following, we use
-# :math:`t` to denote the index in time axis and :math:`j` to denote the
-# index in label axis. :math:`c_j` represents the label at label index
-# :math:`j`.
-#
-# To generate, the probability of time step :math:`t+1`, we look at the
-# trellis from time step :math:`t` and emission at time step :math:`t+1`.
-# There are two path to reach to time step :math:`t+1` with label
-# :math:`c_{j+1}`. The first one is the case where the label was
-# :math:`c_{j+1}` at :math:`t` and there was no label change from
-# :math:`t` to :math:`t+1`. The other case is where the label was
-# :math:`c_j` at :math:`t` and it transitioned to the next label
-# :math:`c_{j+1}` at :math:`t+1`.
-#
-# The follwoing diagram illustrates this transition.
-#
-# .. image:: https://download.pytorch.org/torchaudio/tutorial-assets/ctc-forward.png
-#
-# Since we are looking for the most likely transitions, we take the more
-# likely path for the value of :math:`k_{(t+1, j+1)}`, that is
-#
-# :math:`k_{(t+1, j+1)} = max( k_{(t, j)} p(t+1, c_{j+1}), k_{(t, j+1)} p(t+1, repeat) )`
-#
-# where :math:`k` represents is trellis matrix, and :math:`p(t, c_j)`
-# represents the probability of label :math:`c_j` at time step :math:`t`.
-# :math:`repeat` represents the blank token from CTC formulation. (For the
-# detail of CTC algorithm, please refer to the *Sequence Modeling with CTC*
-# [`distill.pub <https://distill.pub/2017/ctc/>`__])
-#
-
-transcript = "I|HAD|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT"
-dictionary = {c: i for i, c in enumerate(labels)}
-
-tokens = [dictionary[c] for c in transcript]
-print(list(zip(transcript, tokens)))
-
-
-def get_trellis(emission, tokens, blank_id=0):
-    num_frame = emission.size(0)
-    num_tokens = len(tokens)
-
-    # Trellis has extra diemsions for both time axis and tokens.
-    # The extra dim for tokens represents <SoS> (start-of-sentence)
-    # The extra dim for time axis is for simplification of the code.
-    trellis = torch.empty((num_frame + 1, num_tokens + 1))
-    trellis[0, 0] = 0
-    trellis[1:, 0] = torch.cumsum(emission[:, 0], 0)
-    trellis[0, -num_tokens:] = -float("inf")
-    trellis[-num_tokens:, 0] = float("inf")
-
-    for t in range(num_frame):
-        trellis[t + 1, 1:] = torch.maximum(
-            # Score for staying at the same token
-            trellis[t, 1:] + emission[t, blank_id],
-            # Score for changing to the next token
-            trellis[t, :-1] + emission[t, tokens],
-        )
-    return trellis
-
-
-trellis = get_trellis(emission, tokens)
-
-################################################################################
-# Visualization
-################################################################################
-plt.imshow(trellis[1:, 1:].T, origin="lower")
-plt.annotate("- Inf", (trellis.size(1) / 5, trellis.size(1) / 1.5))
-plt.colorbar()
-plt.show()
-
-######################################################################
-# In the above visualization, we can see that there is a trace of high
-# probability crossing the matrix diagonally.
-#
-
-
-######################################################################
-# Find the most likely path (backtracking)
-# ----------------------------------------
-#
-# Once the trellis is generated, we will traverse it following the
-# elements with high probability.
-#
-# We will start from the last label index with the time step of highest
-# probability, then, we traverse back in time, picking stay
-# (:math:`c_j \rightarrow c_j`) or transition
-# (:math:`c_j \rightarrow c_{j+1}`), based on the post-transition
-# probability :math:`k_{t, j} p(t+1, c_{j+1})` or
-# :math:`k_{t, j+1} p(t+1, repeat)`.
-#
-# Transition is done once the label reaches the beginning.
-#
-# The trellis matrix is used for path-finding, but for the final
-# probability of each segment, we take the frame-wise probability from
-# emission matrix.
-#
-
-
-@dataclass
-class Point:
-    token_index: int
-    time_index: int
-    score: float
-
-
-def backtrack(trellis, emission, tokens, blank_id=0):
-    # Note:
-    # j and t are indices for trellis, which has extra dimensions
-    # for time and tokens at the beginning.
-    # When referring to time frame index `T` in trellis,
-    # the corresponding index in emission is `T-1`.
-    # Similarly, when referring to token index `J` in trellis,
-    # the corresponding index in transcript is `J-1`.
-    j = trellis.size(1) - 1
-    t_start = torch.argmax(trellis[:, j]).item()
-
-    path = []
-    for t in range(t_start, 0, -1):
-        # 1. Figure out if the current position was stay or change
-        # Note (again):
-        # `emission[J-1]` is the emission at time frame `J` of trellis dimension.
-        # Score for token staying the same from time frame J-1 to T.
-        stayed = trellis[t - 1, j] + emission[t - 1, blank_id]
-        # Score for token changing from C-1 at T-1 to J at T.
-        changed = trellis[t - 1, j - 1] + emission[t - 1, tokens[j - 1]]
-
-        # 2. Store the path with frame-wise probability.
-        prob = emission[t - 1, tokens[j - 1] if changed > stayed else 0].exp().item()
-        # Return token index and time index in non-trellis coordinate.
-        path.append(Point(j - 1, t - 1, prob))
-
-        # 3. Update the token
-        if changed > stayed:
-            j -= 1
-            if j == 0:
-                break
-    else:
-        raise ValueError("Failed to align")
-    return path[::-1]
-
-
-path = backtrack(trellis, emission, tokens)
-for p in path:
-    print(p)
-
-
-################################################################################
-# Visualization
-################################################################################
-def plot_trellis_with_path(trellis, path):
-    # To plot trellis with path, we take advantage of 'nan' value
-    trellis_with_path = trellis.clone()
-    for _, p in enumerate(path):
-        trellis_with_path[p.time_index, p.token_index] = float("nan")
-    plt.imshow(trellis_with_path[1:, 1:].T, origin="lower")
-
-
-plot_trellis_with_path(trellis, path)
-plt.title("The path found by backtracking")
-plt.show()
-
-######################################################################
-# Looking good. Now this path contains repetations for the same labels, so
-# let’s merge them to make it close to the original transcript.
-#
-# When merging the multiple path points, we simply take the average
-# probability for the merged segments.
-#
-
-
-# Merge the labels
-@dataclass
-class Segment:
-    label: str
-    start: int
-    end: int
-    score: float
-
-    def __repr__(self):
-        return f"{self.label}\t({self.score:4.2f}): [{self.start:5d}, {self.end:5d})"
-
-    @property
-    def length(self):
-        return self.end - self.start
-
-
-def merge_repeats(path):
-    i1, i2 = 0, 0
-    segments = []
-    while i1 < len(path):
-        while i2 < len(path) and path[i1].token_index == path[i2].token_index:
-            i2 += 1
-        score = sum(path[k].score for k in range(i1, i2)) / (i2 - i1)
-        segments.append(
-            Segment(
-                transcript[path[i1].token_index],
-                path[i1].time_index,
-                path[i2 - 1].time_index + 1,
-                score,
-            )
-        )
-        i1 = i2
-    return segments
-
-
-segments = merge_repeats(path)
-for seg in segments:
-    print(seg)
-
-
-################################################################################
-# Visualization
-################################################################################
-def plot_trellis_with_segments(trellis, segments, transcript):
-    # To plot trellis with path, we take advantage of 'nan' value
-    trellis_with_path = trellis.clone()
-    for i, seg in enumerate(segments):
-        if seg.label != "|":
-            trellis_with_path[seg.start + 1 : seg.end + 1, i + 1] = float("nan")
-
-    fig, [ax1, ax2] = plt.subplots(2, 1, figsize=(16, 9.5))
-    ax1.set_title("Path, label and probability for each label")
-    ax1.imshow(trellis_with_path.T, origin="lower")
-    ax1.set_xticks([])
-
-    for i, seg in enumerate(segments):
-        if seg.label != "|":
-            ax1.annotate(seg.label, (seg.start + 0.7, i + 0.3), weight="bold")
-            ax1.annotate(f"{seg.score:.2f}", (seg.start - 0.3, i + 4.3))
-
-    ax2.set_title("Label probability with and without repetation")
-    xs, hs, ws = [], [], []
-    for seg in segments:
-        if seg.label != "|":
-            xs.append((seg.end + seg.start) / 2 + 0.4)
-            hs.append(seg.score)
-            ws.append(seg.end - seg.start)
-            ax2.annotate(seg.label, (seg.start + 0.8, -0.07), weight="bold")
-    ax2.bar(xs, hs, width=ws, color="gray", alpha=0.5, edgecolor="black")
-
-    xs, hs = [], []
-    for p in path:
-        label = transcript[p.token_index]
-        if label != "|":
-            xs.append(p.time_index + 1)
-            hs.append(p.score)
-
-    ax2.bar(xs, hs, width=0.5, alpha=0.5)
-    ax2.axhline(0, color="black")
-    ax2.set_xlim(ax1.get_xlim())
-    ax2.set_ylim(-0.1, 1.1)
-
-
-plot_trellis_with_segments(trellis, segments, transcript)
-plt.tight_layout()
-plt.show()
-
-
-######################################################################
-# Looks good. Now let’s merge the words. The Wav2Vec2 model uses ``'|'``
-# as the word boundary, so we merge the segments before each occurance of
-# ``'|'``.
-#
-# Then, finally, we segment the original audio into segmented audio and
-# listen to them to see if the segmentation is correct.
-#
-
-# Merge words
-def merge_words(segments, separator="|"):
-    words = []
-    i1, i2 = 0, 0
-    while i1 < len(segments):
-        if i2 >= len(segments) or segments[i2].label == separator:
-            if i1 != i2:
-                segs = segments[i1:i2]
-                word = "".join([seg.label for seg in segs])
-                score = sum(seg.score * seg.length for seg in segs) / sum(seg.length for seg in segs)
-                words.append(Segment(word, segments[i1].start, segments[i2 - 1].end, score))
-            i1 = i2 + 1
-            i2 = i1
-        else:
-            i2 += 1
-    return words
-
-
-word_segments = merge_words(segments)
-for word in word_segments:
-    print(word)
-
-
-################################################################################
-# Visualization
-################################################################################
-def plot_alignments(trellis, segments, word_segments, waveform):
-    trellis_with_path = trellis.clone()
-    for i, seg in enumerate(segments):
-        if seg.label != "|":
-            trellis_with_path[seg.start + 1 : seg.end + 1, i + 1] = float("nan")
-
-    fig, [ax1, ax2] = plt.subplots(2, 1, figsize=(16, 9.5))
-
-    ax1.imshow(trellis_with_path[1:, 1:].T, origin="lower")
-    ax1.set_xticks([])
-    ax1.set_yticks([])
-
-    for word in word_segments:
-        ax1.axvline(word.start - 0.5)
-        ax1.axvline(word.end - 0.5)
-
-    for i, seg in enumerate(segments):
-        if seg.label != "|":
-            ax1.annotate(seg.label, (seg.start, i + 0.3))
-            ax1.annotate(f"{seg.score:.2f}", (seg.start, i + 4), fontsize=8)
-
-    # The original waveform
-    ratio = waveform.size(0) / (trellis.size(0) - 1)
-    ax2.plot(waveform)
-    for word in word_segments:
-        x0 = ratio * word.start
-        x1 = ratio * word.end
-        ax2.axvspan(x0, x1, alpha=0.1, color="red")
-        ax2.annotate(f"{word.score:.2f}", (x0, 0.8))
-
-    for seg in segments:
-        if seg.label != "|":
-            ax2.annotate(seg.label, (seg.start * ratio, 0.9))
-    xticks = ax2.get_xticks()
-    plt.xticks(xticks, xticks / bundle.sample_rate)
-    ax2.set_xlabel("time [second]")
-    ax2.set_yticks([])
-    ax2.set_ylim(-1.0, 1.0)
-    ax2.set_xlim(0, waveform.size(-1))
-
-
-plot_alignments(
-    trellis,
-    segments,
-    word_segments,
-    waveform[0],
-)
-plt.show()
-
-################################################################################
-#
-
-# A trick to embed the resulting audio to the generated file.
-# `IPython.display.Audio` has to be the last call in a cell,
-# and there should be only one call par cell.
-def display_segment(i):
-    ratio = waveform.size(1) / (trellis.size(0) - 1)
-    word = word_segments[i]
-    x0 = int(ratio * word.start)
-    x1 = int(ratio * word.end)
-    print(f"{word.label} ({word.score:.2f}): {x0 / bundle.sample_rate:.3f} - {x1 / bundle.sample_rate:.3f} sec")
-    segment = waveform[:, x0:x1]
-    return IPython.display.Audio(segment.numpy(), rate=bundle.sample_rate)
-
-
-######################################################################
-#
-
-# Generate the audio for each segment
-print(transcript)
-IPython.display.Audio(SPEECH_FILE)
-
-
-######################################################################
-#
-
-display_segment(0)
-
-######################################################################
-#
-
-display_segment(1)
-
-######################################################################
-#
-
-display_segment(2)
-
-######################################################################
-#
-
-display_segment(3)
-
-######################################################################
-#
-
-display_segment(4)
-
-######################################################################
-#
-
-display_segment(5)
-
-######################################################################
-#
-
-display_segment(6)
-
-######################################################################
-#
-
-display_segment(7)
-
-######################################################################
-#
-
-display_segment(8)
-
-######################################################################
-# Conclusion
-# ----------
-#
-# In this tutorial, we looked how to use torchaudio’s Wav2Vec2 model to
-# perform CTC segmentation for forced alignment.
-#
diff --git a/intermediate_source/forced_alignment_with_torchaudio_tutorial.rst b/intermediate_source/forced_alignment_with_torchaudio_tutorial.rst
new file mode 100644
index 000000000..4c9752d01
--- /dev/null
+++ b/intermediate_source/forced_alignment_with_torchaudio_tutorial.rst
@@ -0,0 +1,11 @@
+Forced Alignment with Wav2Vec2
+==============================
+
+This tutorial has been moved to https://pytorch.org/audio/stable/tutorials/forced_alignment_tutorial.html
+
+It will redirect in 3 seconds.
+
+.. raw:: html
+
+   <meta http-equiv="Refresh" content="3; url='https://pytorch.org/audio/stable/tutorials/forced_alignment_tutorial.html'" />
+
diff --git a/intermediate_source/forward_ad_usage.py b/intermediate_source/forward_ad_usage.py
index 688456c9b..10965d64a 100644
--- a/intermediate_source/forward_ad_usage.py
+++ b/intermediate_source/forward_ad_usage.py
@@ -25,7 +25,7 @@
 to dual numbers[0].
 
 As the forward pass is performed, if any input tensors are dual tensors,
-extra computation is performed to propogate this "sensitivity" of the
+extra computation is performed to propagate this "sensitivity" of the
 function.
 
 """
@@ -63,12 +63,12 @@ def fn(x, y):
     dual_input_alt = fwAD.make_dual(primal, tangent.T)
     assert fwAD.unpack_dual(dual_input_alt).tangent is not tangent
 
-    # Tensors that do not not have an associated tangent are automatically
+    # Tensors that do not have an associated tangent are automatically
     # considered to have a zero-filled tangent of the same shape.
     plain_tensor = torch.randn(10, 10)
     dual_output = fn(dual_input, plain_tensor)
 
-    # Unpacking the dual returns a namedtuple with ``primal`` and ``tangent``
+    # Unpacking the dual returns a ``namedtuple`` with ``primal`` and ``tangent``
     # as attributes
     jvp = fwAD.unpack_dual(dual_output).tangent
 
@@ -100,13 +100,12 @@ def fn(x, y):
     jvp = fwAD.unpack_dual(out).tangent
 
 ######################################################################
-# Using Modules stateless API (experimental)
+# Using the functional Module API (beta)
 # --------------------------------------------------------------------
 # Another way to use ``nn.Module`` with forward AD is to utilize
-# the stateless API. NB: At the time of writing the stateless API is still
-# experimental and may be subject to change.
+# the functional Module API (also known as the stateless Module API).
 
-from torch.nn.utils._stateless import functional_call
+from torch.func import functional_call
 
 # We need a fresh module because the functional call requires the
 # the model to have parameters registered.
@@ -137,7 +136,7 @@ class Fn(torch.autograd.Function):
     @staticmethod
     def forward(ctx, foo):
         result = torch.exp(foo)
-        # Tensors stored in ctx can be used in the subsequent forward grad
+        # Tensors stored in ``ctx`` can be used in the subsequent forward grad
         # computation.
         ctx.result = result
         return result
@@ -145,7 +144,7 @@ def forward(ctx, foo):
     @staticmethod
     def jvp(ctx, gI):
         gO = gI * ctx.result
-        # If the tensor stored in ctx will not also be used in the backward pass,
+        # If the tensor stored in`` ctx`` will not also be used in the backward pass,
         # one can manually free it using ``del``
         del ctx.result
         return gO
@@ -162,9 +161,9 @@ def jvp(ctx, gI):
 
 # It is important to use ``autograd.gradcheck`` to verify that your
 # custom autograd Function computes the gradients correctly. By default,
-# gradcheck only checks the backward-mode (reverse-mode) AD gradients. Specify
+# ``gradcheck`` only checks the backward-mode (reverse-mode) AD gradients. Specify
 # ``check_forward_ad=True`` to also check forward grads. If you did not
-# implement the backward formula for your function, you can also tell gradcheck
+# implement the backward formula for your function, you can also tell ``gradcheck``
 # to skip the tests that require backward-mode AD by specifying
 # ``check_backward_ad=False``, ``check_undefined_grad=False``, and
 # ``check_batched_grad=False``.
@@ -199,11 +198,11 @@ def fn(x, y):
     return x ** 2 + y ** 2
 
 # Here is a basic example to compute the JVP of the above function.
-# The jvp(func, primals, tangents) returns func(*primals) as well as the
-# computed jvp. Each primal must be associated with a tangent of the same shape.
+# The ``jvp(func, primals, tangents)`` returns ``func(*primals)`` as well as the
+# computed Jacobian-vector product (JVP). Each primal must be associated with a tangent of the same shape.
 primal_out, tangent_out = ft.jvp(fn, (primal0, primal1), (tangent0, tangent1))
 
-# functorch.jvp requires every primal to be associated with a tangent.
+# ``functorch.jvp`` requires every primal to be associated with a tangent.
 # If we only want to associate certain inputs to `fn` with tangents,
 # then we'll need to create a new function that captures inputs without tangents:
 primal = torch.randn(10, 10)
@@ -217,7 +216,7 @@ def fn(x, y):
 ######################################################################
 # Using the functional API with Modules
 # --------------------------------------------------------------------
-# To use ``nn.Module`` with functorch.jvp to compute Jacobian-vector products
+# To use ``nn.Module`` with ``functorch.jvp`` to compute Jacobian-vector products
 # with respect to the model parameters, we need to reformulate the
 # ``nn.Module`` as a function that accepts both the model parameters and inputs
 # to the module.
@@ -226,16 +225,16 @@ def fn(x, y):
 input = torch.randn(16, 5)
 tangents = tuple([torch.rand_like(p) for p in model.parameters()])
 
-# Given a torch.nn.Module, ft.make_functional_with_buffers extracts the state
-# (params and buffers) and returns a functional version of the model that
+# Given a ``torch.nn.Module``, ``ft.make_functional_with_buffers`` extracts the state
+# (``params`` and buffers) and returns a functional version of the model that
 # can be invoked like a function.
 # That is, the returned ``func`` can be invoked like
 # ``func(params, buffers, input)``.
-# ft.make_functional_with_buffers is analogous to the nn.Modules stateless API
+# ``ft.make_functional_with_buffers`` is analogous to the ``nn.Modules`` stateless API
 # that you saw previously and we're working on consolidating the two.
 func, params, buffers = ft.make_functional_with_buffers(model)
 
-# Because jvp requires every input to be associated with a tangent, we need to
+# Because ``jvp`` requires every input to be associated with a tangent, we need to
 # create a new function that, when given the parameters, produces the output
 def func_params_only(params):
     return func(params, buffers, input)
diff --git a/intermediate_source/fx_conv_bn_fuser.py b/intermediate_source/fx_conv_bn_fuser.py
index e6d50f886..56aa5e128 100644
--- a/intermediate_source/fx_conv_bn_fuser.py
+++ b/intermediate_source/fx_conv_bn_fuser.py
@@ -1,7 +1,8 @@
 # -*- coding: utf-8 -*-
 """
 (베타) FX에서 합성곱/배치 정규화(Convolution/Batch Norm) 결합기(Fuser) 만들기
-****************************************************************************
+*******************************************************************************
+
 **저자**: `Horace He <https://github.com/chillee>`_
 
 **번역:** `오찬희 <https://github.com/kozeldark>`_
@@ -66,7 +67,7 @@ def forward(self, x):
 
 ######################################################################
 # 합성곱과 배치 정규화 결합하기
-# -----------------------------
+# --------------------------------
 # PyTorch에서 합성곱과 배치 정규화를 자동으로 결합하려고 할 때 가장 큰 어려움 중 하나는
 # PyTorch가 계산 그래프에 쉽게 접근할 수 있는 방법을 제공하지 않는다는 것입니다.
 # FX는 호출된 실제 연산을 기호적(symbolically)으로 추적하여 이 문제를 해결하므로 순차적 모듈 내에 중첩되거나
@@ -85,7 +86,7 @@ def forward(self, x):
 
 ####################################
 # 합성곱과 배치 정규화 결합하기
-# ---------------------------
+# --------------------------------
 # 일부 다른 결합과 달리, 합성곱과 배치 정규화의 결합은 새로운 연산자를 필요로 하지 않습니다.
 # 대신, 추론 중 배치 정규화는 점별 덧셈과 곱셈으로 구성되므로,
 # 이러한 연산은 이전 합성곱의 가중치로 "미리 계산되어 저장(baked)" 될 수 있습니다.
@@ -126,13 +127,13 @@ def fuse_conv_bn_weights(conv_w, conv_b, bn_rm, bn_rv, bn_eps, bn_w, bn_b):
 
 ####################################
 # FX 결합 전달(pass)
-# --------------
+# --------------------
 # 이제 합성곱과 배치 정규화를 결합하는 방법뿐만 아니라 계산 그래프도 얻었으므로
 # 남은 것은 FX 그래프에 절차를 반복하고 원하는 결합을 적용하는 것입니다.
 
 def _parent_name(target : str) -> Tuple[str, str]:
     """
-    정규화 된 이름(qualname)을 부모경로(parent path)와 마지막 요소(last atom)로 나눠줍니다.
+    정규화된 이름( ``qualname`` )을 부모 경로(parent path)와 마지막 요소(last atom)로 나눠줍니다.
     예를 들어, `foo.bar.baz` -> (`foo.bar`, `baz`)
     """
     *parent, name = target.rsplit('.', 1)
@@ -191,10 +192,11 @@ def fuse(model: torch.nn.Module) -> torch.nn.Module:
 #       여기서는 2D 합성곱만 일치시키는 등 시연 목적으로 약간의 단순화를 하였습니다.
 #       더 유용한 전달은 다음 링크를 참조하십시오.
 #       https://github.com/pytorch/pytorch/blob/master/torch/fx/experimental/fuser.py
+#
 
 ######################################################################
 # 결합 전달(Fusion pass) 실험하기
-# --------------------------------
+# ----------------------------------
 # 이제 아주 작은 초기 모델에 대해 이 결합 전달을 실행해 결과가 동일한지 확인할 수 있습니다.
 # 또한 결합 모델의 코드를 출력하여 더 이상 배치 정규화가 없는지 확인할 수 있습니다.
 
@@ -207,7 +209,7 @@ def fuse(model: torch.nn.Module) -> torch.nn.Module:
 
 ######################################################################
 # ResNet18에서 결합 벤치마킹하기
-# ------------------------------
+# --------------------------------
 # 이제 ResNet18과 같은 대형 모델에서 결합 전달을 실험하고
 # 이 전달이 추론 성능을 얼마나 향상시키는지 확인할 수 있습니다.
 import torchvision.models as models
@@ -231,9 +233,9 @@ def benchmark(model, iters=20):
 print("Unfused time: ", benchmark(rn18))
 print("Fused time: ", benchmark(fused_rn18))
 ######################################################################
-# 앞서 살펴본 바와 같이, FX 변환의 출력은 (Torchscriptable) PyTorch 코드입니다.
-# 따라서 `jit.script` 를 통해 쉽게 출력하여 성능을 더 높일 수 있습니다.
-# 이러한 방식으로 FX 모델 변환은 Torchscript와 아무런 문제 없이 구성됩니다.
+# 앞서 살펴본 바와 같이, FX 변환의 출력은 ("torchscriptable") PyTorch 코드입니다.
+# 따라서 ``jit.script`` 를 통해 쉽게 출력하여 성능을 더 높일 수 있습니다.
+# 이러한 방식으로 FX 모델 변환은 TorchScript와 아무런 문제 없이 구성됩니다.
 
 jit_rn18 = torch.jit.script(fused_rn18)
 print("jit time: ", benchmark(jit_rn18))
@@ -241,7 +243,7 @@ def benchmark(model, iters=20):
 
 ######
 # 결론
-# ---
+# ------
 # FX를 사용하면 PyTorch 코드에 정적 그래프 변환을 쉽게 작성할 수 있습니다.
 #
 # FX는 아직 베타 버전이기 때문에 FX 사용에 대한 피드백을 보내주시면 감사하겠습니다.
diff --git a/intermediate_source/fx_profiling_tutorial.py b/intermediate_source/fx_profiling_tutorial.py
index 06726e4dd..4a484658c 100644
--- a/intermediate_source/fx_profiling_tutorial.py
+++ b/intermediate_source/fx_profiling_tutorial.py
@@ -117,7 +117,7 @@ def __init__(self, mod : torch.nn.Module):
 
     ######################################################################
     # Next, let's override our first method: ``run()``. ``Interpreter``'s ``run``
-    # method is the top-level entrypoint for execution of the model. We will
+    # method is the top-level entry point for execution of the model. We will
     # want to intercept this so that we can record the total runtime of the
     # model.
 
@@ -129,7 +129,7 @@ def run(self, *args) -> Any:
         # Record the time we finished running the model
         t_end = time.time()
         # Store the total elapsed time this model execution took in the
-        # ProfilingInterpreter
+        # ``ProfilingInterpreter``
         self.total_runtime_sec.append(t_end - t_start)
         return return_val
 
@@ -176,7 +176,7 @@ def summary(self, should_sort : bool = False) -> str:
             # time each node took with respect to the whole network.
             pct_total = mean_runtime / mean_total_runtime * 100
             # Record the node's type, name of the node, mean runtime, and
-            # percent runtim
+            # percent runtime.
             node_summaries.append(
                 [node.op, str(node), mean_runtime, pct_total])
 
@@ -214,11 +214,11 @@ def summary(self, should_sort : bool = False) -> str:
 ######################################################################
 # There are two things we should call out here:
 #
-# * MaxPool2d takes up the most time. This is a known issue:
+# * ``MaxPool2d`` takes up the most time. This is a known issue:
 #   https://github.com/pytorch/pytorch/issues/51393
 # * BatchNorm2d also takes up significant time. We can continue this
 #   line of thinking and optimize this in the Conv-BN Fusion with FX
-#   `tutorial <https://pytorch.org/tutorials/intermediate/fx_conv_bn_fuser.html>`_. 
+#   `tutorial <https://tutorials.pytorch.kr/intermediate/fx_conv_bn_fuser.html>`_.
 #
 #
 # Conclusion
@@ -226,7 +226,7 @@ def summary(self, should_sort : bool = False) -> str:
 # As we can see, using FX we can easily capture PyTorch programs (even
 # ones we don't have the source code for!) in a machine-interpretable
 # format and use that for analysis, such as the performance analysis
-# we've done here. FX opens up an exiciting world of possibilities for
+# we've done here. FX opens up an exciting world of possibilities for
 # working with PyTorch programs.
 #
 # Finally, since FX is still in beta, we would be happy to hear any
diff --git a/intermediate_source/jacobians_hessians.py b/intermediate_source/jacobians_hessians.py
new file mode 100644
index 000000000..b8b96c30a
--- /dev/null
+++ b/intermediate_source/jacobians_hessians.py
@@ -0,0 +1,349 @@
+# -*- coding: utf-8 -*-
+"""
+Jacobians, Hessians, hvp, vhp, and more: composing function transforms
+======================================================================
+
+Computing jacobians or hessians are useful in a number of non-traditional
+deep learning models. It is difficult (or annoying) to compute these quantities
+efficiently using PyTorch's regular autodiff APIs
+(``Tensor.backward()``, ``torch.autograd.grad``). PyTorch's 
+`JAX-inspired <https://github.com/google/jax>`_
+`function transforms API <https://pytorch.org/docs/master/func.html>`_
+provides ways of computing various higher-order autodiff quantities
+efficiently.
+
+.. note::
+
+   This tutorial requires PyTorch 2.0.0 or later.
+
+Computing the Jacobian
+----------------------
+"""
+
+import torch
+import torch.nn.functional as F
+from functools import partial
+_ = torch.manual_seed(0)
+
+######################################################################
+# Let's start with a function that we'd like to compute the jacobian of.
+# This is a simple linear function with non-linear activation.
+
+def predict(weight, bias, x):
+    return F.linear(x, weight, bias).tanh()
+
+######################################################################
+# Let's add some dummy data: a weight, a bias, and a feature vector x.
+
+D = 16
+weight = torch.randn(D, D)
+bias = torch.randn(D)
+x = torch.randn(D)  # feature vector
+
+######################################################################
+# Let's think of ``predict`` as a function that maps the input ``x`` from :math:`R^D \to R^D`.
+# PyTorch Autograd computes vector-Jacobian products. In order to compute the full
+# Jacobian of this :math:`R^D \to R^D` function, we would have to compute it row-by-row
+# by using a different unit vector each time.
+
+def compute_jac(xp):
+    jacobian_rows = [torch.autograd.grad(predict(weight, bias, xp), xp, vec)[0]
+                     for vec in unit_vectors]
+    return torch.stack(jacobian_rows)
+
+xp = x.clone().requires_grad_()
+unit_vectors = torch.eye(D)
+
+jacobian = compute_jac(xp)
+
+print(jacobian.shape)
+print(jacobian[0])  # show first row
+
+######################################################################
+# Instead of computing the jacobian row-by-row, we can use PyTorch's
+# ``torch.vmap`` function transform to get rid of the for-loop and vectorize the
+# computation. We can’t directly apply ``vmap`` to ``torch.autograd.grad``;
+# instead, PyTorch provides a ``torch.func.vjp`` transform that composes with
+# ``torch.vmap``:
+
+from torch.func import vmap, vjp
+
+_, vjp_fn = vjp(partial(predict, weight, bias), x)
+
+ft_jacobian, = vmap(vjp_fn)(unit_vectors)
+
+# let's confirm both methods compute the same result
+assert torch.allclose(ft_jacobian, jacobian)
+
+######################################################################
+# In a later tutorial a composition of reverse-mode AD and ``vmap`` will give us
+# per-sample-gradients.
+# In this tutorial, composing reverse-mode AD and ``vmap`` gives us Jacobian
+# computation!
+# Various compositions of ``vmap`` and autodiff transforms can give us different
+# interesting quantities.
+#
+# PyTorch provides ``torch.func.jacrev`` as a convenience function that performs
+# the ``vmap-vjp`` composition to compute jacobians. ``jacrev`` accepts an ``argnums``
+# argument that says which argument we would like to compute Jacobians with
+# respect to.
+
+from torch.func import jacrev
+
+ft_jacobian = jacrev(predict, argnums=2)(weight, bias, x)
+
+# Confirm by running the following:
+assert torch.allclose(ft_jacobian, jacobian)
+
+######################################################################
+# Let's compare the performance of the two ways to compute the jacobian.
+# The function transform version is much faster (and becomes even faster the
+# more outputs there are).
+#
+# In general, we expect that vectorization via ``vmap`` can help eliminate overhead
+# and give better utilization of your hardware.
+#
+# ``vmap`` does this magic by pushing the outer loop down into the function's
+# primitive operations in order to obtain better performance.
+#
+# Let's make a quick function to evaluate performance and deal with
+# microseconds and milliseconds measurements:
+
+def get_perf(first, first_descriptor, second, second_descriptor):
+    """takes torch.benchmark objects and compares delta of second vs first."""
+    faster = second.times[0]
+    slower = first.times[0]
+    gain = (slower-faster)/slower
+    if gain < 0: gain *=-1
+    final_gain = gain*100
+    print(f" Performance delta: {final_gain:.4f} percent improvement with {second_descriptor} ")
+
+######################################################################
+# And then run the performance comparison:
+
+from torch.utils.benchmark import Timer
+
+without_vmap = Timer(stmt="compute_jac(xp)", globals=globals())
+with_vmap = Timer(stmt="jacrev(predict, argnums=2)(weight, bias, x)", globals=globals())
+
+no_vmap_timer = without_vmap.timeit(500)
+with_vmap_timer = with_vmap.timeit(500)
+
+print(no_vmap_timer)
+print(with_vmap_timer)
+
+######################################################################
+# Let's do a relative performance comparison of the above with our ``get_perf`` function:
+
+get_perf(no_vmap_timer, "without vmap",  with_vmap_timer, "vmap")
+
+######################################################################
+# Furthermore, it’s pretty easy to flip the problem around and say we want to
+# compute Jacobians of the parameters to our model (weight, bias) instead of the input
+
+# note the change in input via ``argnums`` parameters of 0,1 to map to weight and bias
+ft_jac_weight, ft_jac_bias = jacrev(predict, argnums=(0, 1))(weight, bias, x)
+
+######################################################################
+# Reverse-mode Jacobian (``jacrev``) vs forward-mode Jacobian (``jacfwd``)
+# ------------------------------------------------------------------------
+#
+# We offer two APIs to compute jacobians: ``jacrev`` and ``jacfwd``:
+#
+# - ``jacrev`` uses reverse-mode AD. As you saw above it is a composition of our
+#   ``vjp`` and ``vmap`` transforms.
+# - ``jacfwd`` uses forward-mode AD. It is implemented as a composition of our
+#   ``jvp`` and ``vmap`` transforms.
+#
+# ``jacfwd`` and ``jacrev`` can be substituted for each other but they have different
+# performance characteristics.
+#
+# As a general rule of thumb, if you’re computing the jacobian of an :math:`R^N \to R^M`
+# function, and there are many more outputs than inputs (for example, :math:`M > N`) then
+# ``jacfwd`` is preferred, otherwise use ``jacrev``. There are exceptions to this rule,
+# but a non-rigorous argument for this follows:
+#
+# In reverse-mode AD, we are computing the jacobian row-by-row, while in
+# forward-mode AD (which computes Jacobian-vector products), we are computing
+# it column-by-column. The Jacobian matrix has M rows and N columns, so if it
+# is taller or wider one way we may prefer the method that deals with fewer
+# rows or columns.
+
+from torch.func import jacrev, jacfwd
+
+######################################################################
+# First, let's benchmark with more inputs than outputs:
+
+Din = 32
+Dout = 2048
+weight = torch.randn(Dout, Din)
+
+bias = torch.randn(Dout)
+x = torch.randn(Din)
+
+# remember the general rule about taller vs wider... here we have a taller matrix:
+print(weight.shape)
+
+using_fwd = Timer(stmt="jacfwd(predict, argnums=2)(weight, bias, x)", globals=globals())
+using_bwd = Timer(stmt="jacrev(predict, argnums=2)(weight, bias, x)", globals=globals())
+
+jacfwd_timing = using_fwd.timeit(500)
+jacrev_timing = using_bwd.timeit(500)
+
+print(f'jacfwd time: {jacfwd_timing}')
+print(f'jacrev time: {jacrev_timing}')
+
+######################################################################
+# and then do a relative benchmark:
+
+get_perf(jacfwd_timing, "jacfwd", jacrev_timing, "jacrev", );
+
+#######################################################################
+# and now the reverse - more outputs (M) than inputs (N):
+
+Din = 2048
+Dout = 32
+weight = torch.randn(Dout, Din)
+bias = torch.randn(Dout)
+x = torch.randn(Din)
+
+using_fwd = Timer(stmt="jacfwd(predict, argnums=2)(weight, bias, x)", globals=globals())
+using_bwd = Timer(stmt="jacrev(predict, argnums=2)(weight, bias, x)", globals=globals())
+
+jacfwd_timing = using_fwd.timeit(500)
+jacrev_timing = using_bwd.timeit(500)
+
+print(f'jacfwd time: {jacfwd_timing}')
+print(f'jacrev time: {jacrev_timing}')
+
+#######################################################################
+# and a relative performance comparison:
+
+get_perf(jacrev_timing, "jacrev", jacfwd_timing, "jacfwd")
+
+#######################################################################
+# Hessian computation with functorch.hessian
+# ------------------------------------------
+# We offer a convenience API to compute hessians: ``torch.func.hessiani``.
+# Hessians are the jacobian of the jacobian (or the partial derivative of
+# the partial derivative, aka second order).
+#
+# This suggests that one can just compose functorch jacobian transforms to
+# compute the Hessian.
+# Indeed, under the hood, ``hessian(f)`` is simply ``jacfwd(jacrev(f))``.
+#
+# Note: to boost performance: depending on your model, you may also want to
+# use ``jacfwd(jacfwd(f))`` or ``jacrev(jacrev(f))`` instead to compute hessians
+# leveraging the rule of thumb above regarding wider vs taller matrices.
+
+from torch.func import hessian
+
+# lets reduce the size in order not to overwhelm Colab. Hessians require
+# significant memory:
+Din = 512
+Dout = 32
+weight = torch.randn(Dout, Din)
+bias = torch.randn(Dout)
+x = torch.randn(Din)
+
+hess_api = hessian(predict, argnums=2)(weight, bias, x)
+hess_fwdfwd = jacfwd(jacfwd(predict, argnums=2), argnums=2)(weight, bias, x)
+hess_revrev = jacrev(jacrev(predict, argnums=2), argnums=2)(weight, bias, x)
+
+#######################################################################
+# Let's verify we have the same result regardless of using hessian API or
+# using ``jacfwd(jacfwd())``.
+
+torch.allclose(hess_api, hess_fwdfwd)
+
+#######################################################################
+# Batch Jacobian and Batch Hessian
+# --------------------------------
+# In the above examples we’ve been operating with a single feature vector.
+# In some cases you might want to take the Jacobian of a batch of outputs
+# with respect to a batch of inputs. That is, given a batch of inputs of
+# shape ``(B, N)`` and a function that goes from :math:`R^N \to R^M`, we would like
+# a Jacobian of shape ``(B, M, N)``.
+#
+# The easiest way to do this is to use ``vmap``:
+
+batch_size = 64
+Din = 31
+Dout = 33
+
+weight = torch.randn(Dout, Din)
+print(f"weight shape = {weight.shape}")
+
+bias = torch.randn(Dout)
+
+x = torch.randn(batch_size, Din)
+
+compute_batch_jacobian = vmap(jacrev(predict, argnums=2), in_dims=(None, None, 0))
+batch_jacobian0 = compute_batch_jacobian(weight, bias, x)
+
+#######################################################################
+# If you have a function that goes from (B, N) -> (B, M) instead and are
+# certain that each input produces an independent output, then it's also
+# sometimes possible to do this without using ``vmap`` by summing the outputs
+# and then computing the Jacobian of that function:
+
+def predict_with_output_summed(weight, bias, x):
+    return predict(weight, bias, x).sum(0)
+
+batch_jacobian1 = jacrev(predict_with_output_summed, argnums=2)(weight, bias, x).movedim(1, 0)
+assert torch.allclose(batch_jacobian0, batch_jacobian1)
+
+#######################################################################
+# If you instead have a function that goes from :math:`R^N \to R^M` but inputs that
+# are batched, you compose ``vmap`` with ``jacrev`` to compute batched jacobians:
+#
+# Finally, batch hessians can be computed similarly. It's easiest to think
+# about them by using ``vmap`` to batch over hessian computation, but in some
+# cases the sum trick also works.
+
+compute_batch_hessian = vmap(hessian(predict, argnums=2), in_dims=(None, None, 0))
+
+batch_hess = compute_batch_hessian(weight, bias, x)
+batch_hess.shape
+
+#######################################################################
+# Computing Hessian-vector products
+# ---------------------------------
+# The naive way to compute a Hessian-vector product (hvp) is to materialize
+# the full Hessian and perform a dot-product with a vector. We can do better:
+# it turns out we don't need to materialize the full Hessian to do this. We'll
+# go through two (of many) different strategies to compute Hessian-vector products:
+# - composing reverse-mode AD with reverse-mode AD
+# - composing reverse-mode AD with forward-mode AD
+#
+# Composing reverse-mode AD with forward-mode AD (as opposed to reverse-mode
+# with reverse-mode) is generally the more memory efficient way to compute a
+# hvp because forward-mode AD doesn't need to construct an Autograd graph and
+# save intermediates for backward:
+
+from torch.func import jvp, grad, vjp
+
+def hvp(f, primals, tangents):
+  return jvp(grad(f), primals, tangents)[1]
+
+#######################################################################
+# Here's some sample usage.
+
+def f(x):
+  return x.sin().sum()
+
+x = torch.randn(2048)
+tangent = torch.randn(2048)
+
+result = hvp(f, (x,), (tangent,))
+
+#######################################################################
+# If PyTorch forward-AD does not have coverage for your operations, then we can
+# instead compose reverse-mode AD with reverse-mode AD:
+
+def hvp_revrev(f, primals, tangents):
+  _, vjp_fn = vjp(grad(f), *primals)
+  return vjp_fn(*tangents)
+
+result_hvp_revrev = hvp_revrev(f, (x,), (tangent,))
+assert torch.allclose(result, result_hvp_revrev[0])
diff --git a/intermediate_source/mario_rl_tutorial.py b/intermediate_source/mario_rl_tutorial.py
index 457b69a7b..8e876ebd3 100755
--- a/intermediate_source/mario_rl_tutorial.py
+++ b/intermediate_source/mario_rl_tutorial.py
@@ -3,12 +3,8 @@
 마리오 게임 RL 에이전트로 학습하기
 ===============================
 
-저자: `Yuansong Feng <https://github.com/YuansongFeng>`__, `Suraj
-Subramanian <https://github.com/suraj813>`__, `Howard
-Wang <https://github.com/hw26>`__, `Steven
-Guo <https://github.com/GuoYuzhang>`__.
-
-번역: `김태영 <https://github.com/Taeyoung96>`__.
+**저자**: `Yuansong Feng <https://github.com/YuansongFeng>`__, `Suraj Subramanian <https://github.com/suraj813>`__, `Howard Wang <https://github.com/hw26>`__, `Steven Guo <https://github.com/GuoYuzhang>`__.
+**번역**: `김태영 <https://github.com/Taeyoung96>`__.
 
 이번 튜토리얼에서는 심층 강화 학습의 기본 사항들에 대해 이야기해보도록 하겠습니다.
 마지막에는, 스스로 게임을 할 수 있는 AI 기반 마리오를
@@ -307,9 +303,9 @@ def act(self, state):
     주어진 상태에서, 입실론-그리디 행동(epsilon-greedy action)을 선택하고, 스텝의 값을 업데이트 합니다.
 
     입력값:
-    state(LazyFrame): 현재 상태에서의 단일 상태(observation)값을 말합니다. 차원은 (state_dim)입니다.
+    state (``LazyFrame``): 현재 상태에서의 단일 상태(observation)값을 말합니다. 차원은 (state_dim)입니다.
     출력값:
-    action_idx (int): Mario가 수행할 행동을 나타내는 정수 값입니다.
+    ``action_idx`` (int): Mario가 수행할 행동을 나타내는 정수 값입니다.
     """
         # 임의의 행동을 선택하기
         if np.random.rand() < self.exploration_rate:
@@ -333,7 +329,7 @@ def act(self, state):
 
 ######################################################################
 # 캐시(Cache)와 리콜(Recall)하기
-# ------------------------------
+# --------------------------------
 #
 # 이 두가지 함수는 마리오의 “메모리” 프로세스 역할을 합니다.
 #
@@ -358,11 +354,11 @@ def cache(self, state, next_state, action, reward, done):
         Store the experience to self.memory (replay buffer)
 
         입력값:
-        state (LazyFrame),
-        next_state (LazyFrame),
-        action (int),
-        reward (float),
-        done (bool))
+        state (``LazyFrame``),
+        next_state (``LazyFrame``),
+        action (``int``),
+        reward (``float``),
+        done(``bool``))
         """
         def first_if_tuple(x):
             return x[0] if isinstance(x, tuple) else x
@@ -407,7 +403,7 @@ def recall(self):
 
 
 class MarioNet(nn.Module):
-    """작은 cnn 구조
+    """작은 CNN 구조
   입력 -> (conv2d + relu) x 3 -> flatten -> (dense + relu) x 2 -> 출력
   """
 
@@ -504,7 +500,7 @@ def td_target(self, reward, next_state, done):
 
 
 ######################################################################
-# 모델을 업데이트 하기.
+# 모델 업데이트
 # ~~~~~~~~~~~~~~~~~~~~~~
 #
 # 마리오가 재생 버퍼에서 입력을 샘플링할 때,  :math:`TD_t`
@@ -547,8 +543,8 @@ def sync_Q_target(self):
 
 
 ######################################################################
-# 체크포인트를 저장합니다.
-# ~~~~~~~~~~~~~~~~~~~~~~~
+# 체크포인트 저장
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
 
 
@@ -565,7 +561,7 @@ def save(self):
 
 
 ######################################################################
-# 모든 기능을 종합해봅시다.
+# 모든 기능을 합치기
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
 
@@ -719,7 +715,7 @@ def record(self, episode, epsilon, step):
 
 ######################################################################
 # 게임을 실행시켜봅시다!
-# """""""""""""""""""
+# """""""""""""""""""""""""
 #
 # 이번 예제에서는 10개의 에피소드에 대해 학습 루프를 실행시켰습니다.하지만 마리오가 진정으로
 # 세계를 학습하기 위해서는 적어도 40000개의 에피소드에 대해 학습을 시킬 것을 제안합니다!
diff --git a/intermediate_source/memory_format_tutorial.py b/intermediate_source/memory_format_tutorial.py
index cd92877b5..4234ee068 100644
--- a/intermediate_source/memory_format_tutorial.py
+++ b/intermediate_source/memory_format_tutorial.py
@@ -122,7 +122,8 @@
 print(z.stride()) # 결과: (3072, 1, 96, 3)
 
 ######################################################################
-# Conv, Batchnorm 모듈은 Channels last를 지원합니다. (단, CudNN >=7.6 에서만 동작)
+# ```cudnn`` 백엔드를 사용하는 `Conv``, ``Batchnorm`` 모듈은 Channels last를 지원합니다.
+# (단, CudNN >=7.6 에서만 동작)
 # 합성곱(convolution) 모듈은 이진 p-wise 연산자(binary p-wise operator)와는 다르게
 # Channels last가 주된 메모리 형식입니다. 모든 입력은 연속적인 메모리 형식이며,
 # 연산자는 연속된 메모리 형식으로 출력을 생성합니다. 그렇지 않으면, 출력은
@@ -148,10 +149,10 @@
 # 성능 향상
 # -------------------------------------------------------------------------------------------
 # Channels last 메모리 형식 최적화는 GPU와 CPU에서 모두 사용 가능합니다.
-# GPU에서는 정밀도를 줄인(reduced precision ``torch.float16``) 상태에서 Tensor Cores를 지원하는 Nvidia의
+# GPU에서는 정밀도를 줄인(reduced precision ``torch.float16``) 상태에서 Tensor Cores를 지원하는 NVIDIA의
 # 하드웨어에서 가장 의미심장한 성능 향상을 보였습니다. `AMP (Automated Mixed Precision)` 학습 스크립트를
 # 활용하여 연속적인 형식에 비해 Channels last 방식이 22% 이상의 성능 향승을 확인할 수 있었습니다.
-# 이 때, Nvidia가 제공하는 AMP를 사용했습니다. https://github.com/NVIDIA/apex
+# 이 때, NVIDIA가 제공하는 AMP를 사용했습니다. https://github.com/NVIDIA/apex
 #
 # ``python main_amp.py -a resnet50 --b 200 --workers 16 --opt-level O2  ./data``
 
@@ -364,7 +365,7 @@ def attribute(m):
 # ----------
 # 다음과 같이 여전히 해야 할 일이 많이 남아있습니다:
 #
-# - N1HW와 NC11 Tensors의 모호성 해결하기;
+# - ``N1HW`` 와 ``NC11`` Tensors의 모호성 해결하기;
 # - 분산 학습을 지원하는지 확인하기;
 # - 연산자 범위(operators coverage) 개선(improve)하기
 #
diff --git a/intermediate_source/mnist_train_nas.py b/intermediate_source/mnist_train_nas.py
index f6f58968a..4ae6d894f 100644
--- a/intermediate_source/mnist_train_nas.py
+++ b/intermediate_source/mnist_train_nas.py
@@ -1,5 +1,5 @@
 """
-Example training code for ax_multiobjective_nas_tutorial.py
+Example training code for ``ax_multiobjective_nas_tutorial.py``
 """
 
 import argparse
@@ -16,7 +16,7 @@
 from torch import nn
 from torch.nn import functional as F
 from torch.utils.data import DataLoader
-from torchmetrics.functional.classification.accuracy import accuracy
+from torchmetrics.functional.classification.accuracy import multiclass_accuracy
 from torchvision import transforms
 from torchvision.datasets import MNIST
 
@@ -44,7 +44,6 @@ def parse_args():
 args = parse_args()
 
 PATH_DATASETS = os.environ.get("PATH_DATASETS", ".")
-AVAIL_GPUS = min(1, torch.cuda.device_count())
 
 
 class MnistModel(LightningModule):
@@ -106,7 +105,7 @@ def validation_step(self, batch, batch_idx):
         logits = self(x)
         loss = F.nll_loss(logits, y)
         preds = torch.argmax(logits, dim=1)
-        acc = accuracy(preds, y)
+        acc = multiclass_accuracy(preds, y, num_classes=self.num_classes)
         self.log("val_acc", acc, prog_bar=False)
         return loss
 
@@ -136,7 +135,6 @@ def run_training_job():
     # Initialize a trainer (don't log anything since things get so slow...)
     trainer = Trainer(
         logger=False,
-        gpus=AVAIL_GPUS,
         max_epochs=args.epochs,
         enable_progress_bar=False,
         deterministic=True,  # Do we want a bit of noise?
diff --git a/intermediate_source/model_parallel_tutorial.py b/intermediate_source/model_parallel_tutorial.py
index 48be19960..9ba37978e 100644
--- a/intermediate_source/model_parallel_tutorial.py
+++ b/intermediate_source/model_parallel_tutorial.py
@@ -2,6 +2,7 @@
 """
 단일 머신을 사용한 모델 병렬화 모범 사례
 ===================================================
+
 **저자** : `Shen Li <https://mrshenli.github.io/>`_
 **번역** : `안상준 <https://github.com/Justin-A>`_
 
@@ -27,7 +28,7 @@
    `분산 프레임워크 RPC 시작해보기 <rpc_tutorial.html>`__
 
 Basic Usage
------------
+-------------
 """
 
 ######################################################################
@@ -239,11 +240,11 @@ def forward(self, x):
         ret = []
 
         for s_next in splits:
-            # A. s_prev는 두 번째 GPU에서 실행됩니다.
+            # A. ``s_prev`` 는 두 번째 GPU( ``cuda:1`` )에서 실행됩니다.
             s_prev = self.seq2(s_prev)
             ret.append(self.fc(s_prev.view(s_prev.size(0), -1)))
 
-            # B. s_next는 A.와 동시에 진행되면서 첫 번째 GPU에서 실행됩니다.
+            # B. ``s_next`` 는 A와 동시에 진행되면서 첫 번째 GPU( ``cuda:0`` )에서 실행됩니다.
             s_prev = self.seq1(s_next).to('cuda:1')
 
         s_prev = self.seq2(s_prev)
diff --git a/intermediate_source/neural_tangent_kernels.py b/intermediate_source/neural_tangent_kernels.py
new file mode 100644
index 000000000..ca1de89da
--- /dev/null
+++ b/intermediate_source/neural_tangent_kernels.py
@@ -0,0 +1,248 @@
+# -*- coding: utf-8 -*-
+"""
+Neural Tangent Kernels
+======================
+
+The neural tangent kernel (NTK) is a kernel that describes
+`how a neural network evolves during training <https://en.wikipedia.org/wiki/Neural_tangent_kernel>`_.
+There has been a lot of research around it `in recent years <https://arxiv.org/abs/1806.07572>`_.
+This tutorial, inspired by the implementation of `NTKs in JAX <https://github.com/google/neural-tangents>`_
+(see `Fast Finite Width Neural Tangent Kernel <https://arxiv.org/abs/2206.08720>`_ for details),
+demonstrates how to easily compute this quantity using ``torch.func``,
+composable function transforms for PyTorch.
+
+.. note::
+
+   This tutorial requires PyTorch 2.0.0 or later.
+
+Setup
+-----
+
+First, some setup. Let's define a simple CNN that we wish to compute the NTK of.
+"""
+
+import torch
+import torch.nn as nn
+from torch.func import functional_call, vmap, vjp, jvp, jacrev
+device = 'cuda'
+
+class CNN(nn.Module):
+    def __init__(self):
+        super(CNN, self).__init__()
+        self.conv1 = nn.Conv2d(3, 32, (3, 3))
+        self.conv2 = nn.Conv2d(32, 32, (3, 3))
+        self.conv3 = nn.Conv2d(32, 32, (3, 3))
+        self.fc = nn.Linear(21632, 10)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = x.relu()
+        x = self.conv2(x)
+        x = x.relu()
+        x = self.conv3(x)
+        x = x.flatten(1)
+        x = self.fc(x)
+        return x
+
+######################################################################
+# And let's generate some random data
+
+x_train = torch.randn(20, 3, 32, 32, device=device)
+x_test = torch.randn(5, 3, 32, 32, device=device)
+
+######################################################################
+# Create a function version of the model
+# --------------------------------------
+#
+# ``torch.func`` transforms operate on functions. In particular, to compute the NTK,
+# we will need a function that accepts the parameters of the model and a single
+# input (as opposed to a batch of inputs!) and returns a single output.
+#
+# We'll use ``torch.func.functional_call``, which allows us to call an ``nn.Module``
+# using different parameters/buffers, to help accomplish the first step.
+#
+# Keep in mind that the model was originally written to accept a batch of input
+# data points. In our CNN example, there are no inter-batch operations. That
+# is, each data point in the batch is independent of other data points. With
+# this assumption in mind, we can easily generate a function that evaluates the
+# model on a single data point:
+
+
+net = CNN().to(device)
+
+# Detaching the parameters because we won't be calling Tensor.backward().
+params = {k: v.detach() for k, v in net.named_parameters()}
+
+def fnet_single(params, x):
+    return functional_call(net, params, (x.unsqueeze(0),)).squeeze(0)
+
+######################################################################
+# Compute the NTK: method 1 (Jacobian contraction)
+# ------------------------------------------------
+# We're ready to compute the empirical NTK. The empirical NTK for two data
+# points :math:`x_1` and :math:`x_2` is defined as the matrix product between the Jacobian
+# of the model evaluated at :math:`x_1` and the Jacobian of the model evaluated at
+# :math:`x_2`:
+#
+# .. math::
+#
+#    J_{net}(x_1) J_{net}^T(x_2)
+#
+# In the batched case where :math:`x_1` is a batch of data points and :math:`x_2` is a
+# batch of data points, then we want the matrix product between the Jacobians
+# of all combinations of data points from :math:`x_1` and :math:`x_2`.
+#
+# The first method consists of doing just that - computing the two Jacobians,
+# and contracting them. Here's how to compute the NTK in the batched case:
+
+def empirical_ntk_jacobian_contraction(fnet_single, params, x1, x2):
+    # Compute J(x1)
+    jac1 = vmap(jacrev(fnet_single), (None, 0))(params, x1)
+    jac1 = jac1.values()
+    jac1 = [j.flatten(2) for j in jac1]
+
+    # Compute J(x2)
+    jac2 = vmap(jacrev(fnet_single), (None, 0))(params, x2)
+    jac2 = jac2.values()
+    jac2 = [j.flatten(2) for j in jac2]
+
+    # Compute J(x1) @ J(x2).T
+    result = torch.stack([torch.einsum('Naf,Mbf->NMab', j1, j2) for j1, j2 in zip(jac1, jac2)])
+    result = result.sum(0)
+    return result
+
+result = empirical_ntk_jacobian_contraction(fnet_single, params, x_train, x_test)
+print(result.shape)
+
+######################################################################
+# In some cases, you may only want the diagonal or the trace of this quantity,
+# especially if you know beforehand that the network architecture results in an
+# NTK where the non-diagonal elements can be approximated by zero. It's easy to
+# adjust the above function to do that:
+
+def empirical_ntk_jacobian_contraction(fnet_single, params, x1, x2, compute='full'):
+    # Compute J(x1)
+    jac1 = vmap(jacrev(fnet_single), (None, 0))(params, x1)
+    jac1 = jac1.values()
+    jac1 = [j.flatten(2) for j in jac1]
+
+    # Compute J(x2)
+    jac2 = vmap(jacrev(fnet_single), (None, 0))(params, x2)
+    jac2 = jac2.values()
+    jac2 = [j.flatten(2) for j in jac2]
+
+    # Compute J(x1) @ J(x2).T
+    einsum_expr = None
+    if compute == 'full':
+        einsum_expr = 'Naf,Mbf->NMab'
+    elif compute == 'trace':
+        einsum_expr = 'Naf,Maf->NM'
+    elif compute == 'diagonal':
+        einsum_expr = 'Naf,Maf->NMa'
+    else:
+        assert False
+
+    result = torch.stack([torch.einsum(einsum_expr, j1, j2) for j1, j2 in zip(jac1, jac2)])
+    result = result.sum(0)
+    return result
+
+result = empirical_ntk_jacobian_contraction(fnet_single, params, x_train, x_test, 'trace')
+print(result.shape)
+
+######################################################################
+# The asymptotic time complexity of this method is :math:`N O [FP]` (time to
+# compute the Jacobians) + :math:`N^2 O^2 P` (time to contract the Jacobians),
+# where :math:`N` is the batch size of :math:`x_1` and :math:`x_2`, :math:`O`
+# is the model's output size, :math:`P` is the total number of parameters, and
+# :math:`[FP]` is the cost of a single forward pass through the model. See
+# section 3.2 in
+# `Fast Finite Width Neural Tangent Kernel <https://arxiv.org/abs/2206.08720>`_
+# for details.
+#
+# Compute the NTK: method 2 (NTK-vector products)
+# -----------------------------------------------
+#
+# The next method we will discuss is a way to compute the NTK using NTK-vector
+# products.
+#
+# This method reformulates NTK as a stack of NTK-vector products applied to
+# columns of an identity matrix :math:`I_O` of size :math:`O\times O`
+# (where :math:`O` is the output size of the model):
+#
+# .. math::
+#
+#    J_{net}(x_1) J_{net}^T(x_2) = J_{net}(x_1) J_{net}^T(x_2) I_{O} = \left[J_{net}(x_1) \left[J_{net}^T(x_2) e_o\right]\right]_{o=1}^{O},
+#
+# where :math:`e_o\in \mathbb{R}^O` are column vectors of the identity matrix
+# :math:`I_O`.
+#
+# - Let :math:`\textrm{vjp}_o = J_{net}^T(x_2) e_o`. We can use
+#   a vector-Jacobian product to compute this.
+# - Now, consider :math:`J_{net}(x_1) \textrm{vjp}_o`. This is a
+#   Jacobian-vector product!
+# - Finally, we can run the above computation in parallel over all
+#   columns :math:`e_o` of :math:`I_O` using ``vmap``.
+#
+# This suggests that we can use a combination of reverse-mode AD (to compute
+# the vector-Jacobian product) and forward-mode AD (to compute the
+# Jacobian-vector product) to compute the NTK.
+#
+# Let's code that up:
+
+def empirical_ntk_ntk_vps(func, params, x1, x2, compute='full'):
+    def get_ntk(x1, x2):
+        def func_x1(params):
+            return func(params, x1)
+
+        def func_x2(params):
+            return func(params, x2)
+
+        output, vjp_fn = vjp(func_x1, params)
+
+        def get_ntk_slice(vec):
+            # This computes ``vec @ J(x2).T``
+            # `vec` is some unit vector (a single slice of the Identity matrix)
+            vjps = vjp_fn(vec)
+            # This computes ``J(X1) @ vjps``
+            _, jvps = jvp(func_x2, (params,), vjps)
+            return jvps
+
+        # Here's our identity matrix
+        basis = torch.eye(output.numel(), dtype=output.dtype, device=output.device).view(output.numel(), -1)
+        return vmap(get_ntk_slice)(basis)
+
+    # ``get_ntk(x1, x2)`` computes the NTK for a single data point x1, x2
+    # Since the x1, x2 inputs to ``empirical_ntk_ntk_vps`` are batched,
+    # we actually wish to compute the NTK between every pair of data points
+    # between {x1} and {x2}. That's what the ``vmaps`` here do.
+    result = vmap(vmap(get_ntk, (None, 0)), (0, None))(x1, x2)
+
+    if compute == 'full':
+        return result
+    if compute == 'trace':
+        return torch.einsum('NMKK->NM', result)
+    if compute == 'diagonal':
+        return torch.einsum('NMKK->NMK', result)
+
+result_from_jacobian_contraction = empirical_ntk_jacobian_contraction(fnet_single, params, x_test, x_train)
+result_from_ntk_vps = empirical_ntk_ntk_vps(fnet_single, params, x_test, x_train)
+assert torch.allclose(result_from_jacobian_contraction, result_from_ntk_vps, atol=1e-5)
+
+######################################################################
+# Our code for ``empirical_ntk_ntk_vps`` looks like a direct translation from
+# the math above! This showcases the power of function transforms: good luck
+# trying to write an efficient version of the above by only using
+# ``torch.autograd.grad``.
+#
+# The asymptotic time complexity of this method is :math:`N^2 O [FP]`, where
+# :math:`N` is the batch size of :math:`x_1` and :math:`x_2`, :math:`O` is the
+# model's output size, and :math:`[FP]` is the cost of a single forward pass
+# through the model. Hence this method performs more forward passes through the
+# network than method 1, Jacobian contraction (:math:`N^2 O` instead of
+# :math:`N O`), but avoids the contraction cost altogether (no :math:`N^2 O^2 P`
+# term, where :math:`P` is the total number of model's parameters). Therefore,
+# this method is preferable when :math:`O P` is large relative to :math:`[FP]`,
+# such as fully-connected (not convolutional) models with many outputs :math:`O`.
+# Memory-wise, both methods should be comparable. See section 3.3 in
+# `Fast Finite Width Neural Tangent Kernel <https://arxiv.org/abs/2206.08720>`_
+# for details.
diff --git a/intermediate_source/nvfuser_intro_tutorial.py b/intermediate_source/nvfuser_intro_tutorial.py
deleted file mode 100644
index 91166fcce..000000000
--- a/intermediate_source/nvfuser_intro_tutorial.py
+++ /dev/null
@@ -1,687 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Getting Started - Accelerate Your Scripts with nvFuser
-****************************
-
-**Authors**: `Christian Sarofeen <https://github.com/csarofeen>`_
-`Piotr Bialecki <https://github.com/ptrblck>`_
-`Kevin Stephano <https://github.com/kevinstephano>`_
-`Jie Jiang <https://github.com/jjsjann123>`_
-`Masaki Kozuki <https://github.com/crcrpar>`_
-`Neal Vaidya`
-
-
-Introduction
-------------
-
-This tutorial will demonstrate how you can accelerate your networks
-with nvFuser. nvFuser is a Deep Learning Compiler that just-in-time
-compiles fast and flexible GPU specific code to reliably accelerate
-users' networks automatically, providing speedups for deep learning
-networks running on Volta and later CUDA accelerators by generating
-fast custom “fusion” kernels at runtime. nvFuser is specifically
-designed to meet the unique requirements of the PyTorch community,
-and it supports diverse network architectures and programs with
-dynamic inputs of varying shapes and strides.
-
-Importing Packages and Selecting a Device
------------------------------------------
-In order to run this tutorial and see the benefits of using nvFuser,
-you would need to install the `1.12.0` PyTorch release as well as
-`functorch` `0.2` or newer version of them. `functorch` also needs
-`networkx` for its smart recomputation heuristics which you can
-install via `pip install networkx`.
-Additionally, a GPU is required.
-"""
-
-import torch
-import torch.nn.functional as F
-import functorch
-from functorch.compile import memory_efficient_fusion
-from copy import deepcopy
-from typing import List
-import time
-import functools
-import random
-
-random.seed(42)
-
-if torch.__version__ < (1, 12, 0):
-    raise RuntimeError(
-        "PyTorch >= 1.12.0 required, but your environment uses torch=={}".format(
-            torch.__version__
-        )
-    )
-
-major, minor, _ = functorch.__version__.split(".")
-if int(major) == 0 and int(minor) < 2:
-    raise RuntimeError(
-        "FuncTorch >= 0.2.0 required, but your environment uses functorch=={}".format(
-            functorch.__version__
-        )
-    )
-
-######################################################################
-# The Transformer Block
-# ---------------------
-# The network topology we’re going to focus on is the Transformer
-# Block for networks like BERT. As of writing this tutorial, nvFuser
-# provides acceleration of pointwise, reduction, and normalization
-# operations. These simple operations are the backbone of large
-# networks, so improving the speed of these operations can improve
-# overall network training speed. Future releases of nvFuser will
-# improve the performance of Linear Layers, but for now we will
-# specifically look at the Bias-Dropout-Add-LayerNorm section of this
-# Transformer Block.
-#
-# .. figure:: /_static/img/nvfuser_intro/nvfuser_transformer_block.png
-#
-# First, let’s define the forward pass for this section of our network.
-# For when we’ll use TorchScript on this function, we decorate the
-# function with type information of the function parameters. This isn’t
-# always required, but it can often help to provide this information to
-# TorchScript because it is a strictly typed system. Since we have
-# PyTorch’s autograd system, we don’t need to explicitly define the
-# backwards pass.
-
-
-def composite_definition(
-    input1: torch.Tensor,
-    input2: torch.Tensor,
-    weight: torch.Tensor,
-    bias1: torch.Tensor,
-    bias2: torch.Tensor,
-    normalization_axis: int,
-    dropout_prob: float,
-) -> torch.Tensor:
-    bias1_out = input1 + bias1
-    dropout_out = F.dropout(bias1_out, dropout_prob, training=True)
-    norm_input = dropout_out + input2
-    norm_output = F.layer_norm(
-        norm_input, (input1.size(normalization_axis),), weight, bias2
-    )
-    return norm_output
-
-
-######################################################################
-# Setup and Performance Metrics
-# ---------------------
-# Next, we initialize some inputs, parameters, and a simulated gradient
-# output tensor for the backwards pass since we aren’t including a
-# loss function.
-#
-
-
-# Setup initial tensors and parameters
-input_size = [64, 128, 1024]
-device = "cuda"
-dtype = torch.float32
-
-# Create sample inputs
-input1 = torch.randn(*input_size, device=device, dtype=dtype, requires_grad=True)
-input2 = torch.rand_like(input1).requires_grad_()
-
-# Precompute a grad output tensor, for this example it's the same size
-# as the inputs
-grad_output = torch.rand_like(input1)
-
-# Randomly initialize the model parameters
-weight = torch.nn.Parameter(torch.randn(input_size[2], dtype=dtype, device=device))
-bias1 = torch.nn.Parameter(torch.randn(input_size[2], dtype=dtype, device=device))
-bias2 = torch.nn.Parameter(torch.randn(input_size[2], dtype=dtype, device=device))
-
-parameters = [input1, input2, weight, bias1, bias2]
-
-######################################################################
-# To produce a baseline performance we will measure the speed of our
-# forward and backward passes in PyTorch’s default eager mode. To get
-# accurate and comparable measurements, we perform a few warm up
-# iterations. Then, we time many iterations of the forward and backward
-# pass using performance counters combined with proper GPU
-# synchronization, then compute the average iterations per second.
-# It’s important to be very careful when measuring performance on the
-# GPU, as we want to remove any initialization costs and need
-# synchronization since it’s an asynchronous device. Since we will
-# measure many variations of this problem with and without nvFuser we
-# define a helper method called `profile_workload` and will use
-# `functool.partial` to concisely profile the workload.
-#
-
-# Utility to profile the workload
-def profile_workload(forward_func, grad_output, iteration_count=100, label=""):
-    # Perform warm-up iterations
-    for _ in range(3):
-        # Run model, forward and backward
-        output = forward_func()
-        output.backward(grad_output)
-        # delete gradiens to avoid profiling the gradient accumulation
-        for p in parameters:
-            p.grad = None
-
-    # Synchronize the GPU before starting the timer
-    torch.cuda.synchronize()
-    start = time.perf_counter()
-    for _ in range(iteration_count):
-        # Run model, forward and backward
-        output = forward_func()
-        output.backward(grad_output)
-        # delete gradiens to avoid profiling the gradient accumulation
-        for p in parameters:
-            p.grad = None
-
-    # Synchronize the GPU before stopping the timer
-    torch.cuda.synchronize()
-    stop = time.perf_counter()
-    iters_per_second = iteration_count / (stop - start)
-    if label:
-        print(label)
-    print("Average iterations per second: {:.2f}".format(iters_per_second))
-
-
-######################################################################
-# We can now measure a baseline performance of PyTorch’s eager mode
-# (without nvFuser).
-#
-
-
-# Run and profile eager mode execution on the composite definition of our
-# operations.
-func = functools.partial(
-    composite_definition,
-    input1,
-    input2,
-    weight,
-    bias1,
-    bias2,
-    normalization_axis=2,
-    dropout_prob=0.1,
-)
-profile_workload(
-    func, grad_output, iteration_count=100, label="Eager Mode - Composite definition"
-)
-
-######################################################################
-# It’s important for PyTorch and nvFuser to work well across diverse
-# GPU architectures. For our measurements we’ve run this tutorial on
-# five GPUs ranging from consumer to enterprise grade. Our baseline
-# geometric mean (geomean) performance across these GPUs is 850
-# iterations per second, plotted in the figure below.
-#
-# .. figure:: /_static/img/nvfuser_intro/nvfuser_tutorial_0.png
-# As we run different variations of this script with nvFuser, we will
-# continue to add the results to this figure for the same GPUs.
-#
-
-######################################################################
-# TorchScript & nvFuser
-# ---------------------
-# nvFuser is the default fusion system in TorchScript since PyTorch
-# version 1.12, so to turn on nvFuser we need to enable TorchScript.
-# This will allow nvFuser to automatically generate fast kernels and
-# take over execution of these operations. TorchScript can be a
-# challenging system to get working, but with our current definition
-# of our operators, all we need to do is wrap our function in the
-# `torch.jit.script` compile function. We can then simply run our
-# workload as before.
-#
-
-scripted_composite_definition = torch.jit.script(composite_definition)
-func = functools.partial(
-    scripted_composite_definition,
-    input1,
-    input2,
-    weight,
-    bias1,
-    bias2,
-    normalization_axis=2,
-    dropout_prob=0.1,
-)
-profile_workload(
-    func, grad_output, iteration_count=100, label="TorchScript - Composite definition"
-)
-
-######################################################################
-# Before we get to the results, it is important to mention here that
-# nvFuser does not generate the exact same sequence of random numbers,
-# as random number generation in PyTorch is dependent on the precise
-# parallelization scheme used for the GPU function. Therefore, if you
-# want to validate the output of nvFuser with the output without
-# nvFuser, it would require disabling the random number generation
-# functions. In this example, we would simply need to change
-# `dropout_out = F.dropout(bias1_out, dropout_prob, training=True)`
-# to
-# `dropout_out = F.dropout(bias1_out, dropout_prob, training=False)`
-# as the dropout function is the only function in this example that
-# depends on random number generation.
-#
-# .. figure:: /_static/img/nvfuser_intro/nvfuser_tutorial_1.png
-#
-# Our geomean performance with nvFuser is 1,394 images per second
-# which is a geomean of 1.64x faster than eager mode. We did not
-# include the time that TorchScript and nvFuser take to compile the
-# program and GPU functions. For real end-to-end training the
-# compile time of TorchScript and nvFuser are negligible. For
-# example, in this tutorial the combination of TorchScript and
-# nvFuser took around 2.4s in total to compile these high speed
-# GPU functions.
-#
-# nvFuser’s capabilities extend well beyond this initial performance gain.
-#
-
-######################################################################
-# nvFuser & Dynamic Shapes
-# ---------------------
-# It is challenging for Deep Learning Compilers to provide performance
-# gains when the user changes the input sizes of the tensors. However,
-# supporting changing shapes has always been a fundamental design
-# criteria for nvFuser, as processing different-sized input tensors is
-# critical to many applications like Natural Language Processing and
-# Graph Neural Networks.
-#
-# To use nvFuser on inputs that change shape from iteration, we
-# generate new input and output gradient tensors and make a few
-# different sizes. Since the last dimension is shared with the
-# parameters and cannot be changed dynamically in LayerNorm, we
-# perturb the first two dimensions of the input and gradient tensors.
-#
-
-SHAPE_COUNT = 20
-dynamic_sizes = deepcopy(input_size)
-
-inputs1: List[torch.Tensor] = []
-inputs2: List[torch.Tensor] = []
-grad_outputs: List[torch.Tensor] = []
-
-
-# Create some random shapes
-for _ in range(SHAPE_COUNT):
-    dynamic_sizes[0] = input_size[0] + random.randrange(-2, 3)
-    dynamic_sizes[1] = input_size[1] + random.randrange(-2, 3)
-    input = torch.randn(*dynamic_sizes, device=device, dtype=dtype, requires_grad=True)
-    inputs1.append(input)
-    inputs2.append(torch.rand_like(input))
-    grad_outputs.append(torch.rand_like(input))
-
-######################################################################
-# No changes from before are required for running with TorchScript, we
-# simply reuse the previous definition that we wrapped in
-# `torch.jit.script`.
-#
-# We’ll start as usual by performing some warm-up iterations, however
-# we won’t show nvFuser all of the input sizes, we’ll only show one
-# size for the warm-up.
-#
-
-# Perform warm-up iterations
-for _ in range(3):
-    dynamic_input1 = inputs1[0]
-    dynamic_input2 = inputs2[0]
-    dynamic_grad_output = grad_outputs[0]
-    # Run model, forward and backward
-    output = scripted_composite_definition(
-        dynamic_input1,
-        dynamic_input2,
-        weight,
-        bias1,
-        bias2,
-        normalization_axis=2,
-        dropout_prob=0.1,
-    )
-    output.backward(dynamic_grad_output)
-
-######################################################################
-# Now, we can measure the performance metrics of nvFuser as we have
-# previously.
-#
-
-# Profile manually as our helper function expects static inputs
-iteration_count = 100
-# Synchronize the GPU before starting the timer
-torch.cuda.synchronize()
-start = time.perf_counter()
-for i in range(iteration_count):
-    dynamic_input1 = inputs1[i % SHAPE_COUNT]
-    dynamic_input2 = inputs2[i % SHAPE_COUNT]
-    dynamic_grad_output = grad_outputs[i % SHAPE_COUNT]
-    dynamic_parameters = [dynamic_input1, dynamic_input2, weight, bias1, bias2]
-
-    # Run model, forward and backward
-    output = scripted_composite_definition(
-        dynamic_input1,
-        dynamic_input2,
-        weight,
-        bias1,
-        bias2,
-        normalization_axis=2,
-        dropout_prob=0.1,
-    )
-    output.backward(dynamic_grad_output)
-    # Delete the gradients to avoid profiling the gradient accumulation
-    for p in dynamic_parameters:
-        p.grad = None
-
-# Synchronize the GPU before stopping the timer
-torch.cuda.synchronize()
-stop = time.perf_counter()
-iters_per_second = iteration_count / (stop - start)
-print("TorchScript - Random Sizes")
-print("Average iterations per second: {:.2f}".format(iters_per_second))
-
-######################################################################
-# Performance across our GPUs is very similar to the previous
-# performance seen. Only the performance of the A100 degraded
-# slightly, but is still much higher than without nvFuser. The small
-# change in performance of the A100 is actually related to the
-# additional CPU overhead that dynamic shapes cause in nvFuser.
-# nvFuser at runtime has to infer how to run the different sized
-# kernels, so additional CPU time is consumed. This CPU time is
-# present with all GPUs, but since the A100 runs its functions so fast
-# this CPU overhead cannot be fully hidden by the asynchronous nature
-# of GPU execution.
-#
-# .. figure:: /_static/img/nvfuser_intro/nvfuser_tutorial_2.png
-#
-# .. note:: Today, nvFuser in TorchScript is the only exposure of
-#           nvFuser that allows for dynamic shape changes, although we will
-#           expand this capability to other systems in the future. For more
-#           insight into how dynamic shapes are implemented in nvFuser, you can
-#           view this presentation from GTC 2021:
-#           https://www.nvidia.com/en-us/on-demand/session/gtcspring21-s31952/
-#
-
-######################################################################
-# Defining novel operations with nvFuser and FuncTorch
-# ----------------------------------------------------
-#
-# One of the primary benefits of nvFuser is the ability to define
-# novel operations composed of PyTorch “primitives” which are then
-# just-in-time compiled into efficient kernels.
-#
-# PyTorch has strong performance for any individual operation,
-# especially composite operations like LayerNorm. However, if
-# LayerNorm wasn’t already implemented in PyTorch as a composite
-# operation, then you’d have to define it as a series of simpler
-# (primitive) operations. Let’s make such a definition and run it
-# without nvFuser.
-#
-
-
-def primitive_definition(
-    input1: torch.Tensor,
-    input2: torch.Tensor,
-    weight: torch.Tensor,
-    bias1: torch.Tensor,
-    bias2: torch.Tensor,
-    normalization_axis: int,
-    dropout_prob: float,
-    keepdim: bool,
-) -> torch.Tensor:
-    bias1_out = input1 + bias1
-    dropout_out = F.dropout(bias1_out, dropout_prob, training=True)
-    norm_input = dropout_out + input2
-    mean = norm_input.mean(normalization_axis, keepdim=keepdim)
-    diff = norm_input - mean
-    diff_sq = diff * diff
-    var = diff_sq.mean(normalization_axis, keepdim=keepdim)
-    pre_shift_scale_norm_output = (norm_input - mean) / torch.sqrt(var + 1e-12)
-    norm_output = weight * pre_shift_scale_norm_output + bias2
-    return norm_output
-
-
-# Profile primitive definition
-func = functools.partial(
-    primitive_definition,
-    input1,
-    input2,
-    weight,
-    bias1,
-    bias2,
-    normalization_axis=2,
-    dropout_prob=0.1,
-    keepdim=True,
-)
-profile_workload(
-    func, grad_output, iteration_count=100, label="Eager Mode - Primitive Definition"
-)
-
-######################################################################
-# While the above is mathematically equivalent to our previous
-# definition, benchmarking our new function with the original static
-# shape using TorchScript and nvFuser shows the iterations per second
-# decreases – mostly due to the cost of accessing memory to save
-# intermediate results.
-#
-# .. figure:: /_static/img/nvfuser_intro/nvfuser_tutorial_3.png
-#
-# The geomean iterations per second is 260 iterations per second,
-# 3.26x slower than the composite definition in eager mode and 5.35x
-# slower than the nvFuser composite operation! For more information on
-# why there’s such a drastic decrease in compute speed please see this
-# presentation from GTC 2022:
-# https://www.nvidia.com/en-us/on-demand/session/gtcspring22-s41958/
-#
-# nvFuser with TorchScript can improve the performance of this
-# operation even though it’s defined with primitive PyTorch
-# operations. Simply by enabling TorchScript on the new function
-# (just like before), we can see much of the performance returns.
-#
-
-# Profile scripted primitive definition
-scripted_primitive_definition = torch.jit.script(primitive_definition)
-func = functools.partial(
-    scripted_primitive_definition,
-    input1,
-    input2,
-    weight,
-    bias1,
-    bias2,
-    normalization_axis=2,
-    dropout_prob=0.1,
-    keepdim=True,
-)
-profile_workload(
-    func, grad_output, iteration_count=100, label="TorchScript - Primitive definition"
-)
-
-######################################################################
-# .. figure:: /_static/img/nvfuser_intro/nvfuser_tutorial_4.png
-#
-# However, the performance is still slower than the original eager
-# mode performance of the composite definition. TorchScript works well
-# when predefined composite operations are used, however TorchScript’s
-# application of Autograd saves all of the activations for each
-# operator in the fusion for re-use in the backwards pass. However,
-# this is not typically the optimal choice. Especially when chaining
-# together multiple simple operations, it is often much faster to
-# recompute some intermediate tensors rather than spend the time
-# storing and retrieving several saved results from memory.
-#
-# It’s possible to optimize away many of these unnecessary memory
-# accesses, but it requires building a connected forward and backward
-# graph which isn’t possible with TorchScript. The
-# `memory_efficient_fusion` pass in FuncTorch, however, is such an
-# optimization pass. To use this pass, we have to redefine our
-# function to pull the constants inside (for now it’s easiest to make
-# non-tensor constants literals in the function definition):
-#
-
-
-def primitive_definition_for_memory_efficient_fusion(
-    input1: torch.Tensor,
-    input2: torch.Tensor,
-    weight: torch.Tensor,
-    bias1: torch.Tensor,
-    bias2: torch.Tensor,
-) -> torch.Tensor:
-    bias1_out = input1 + bias1
-    dropout_out = F.dropout(bias1_out, 0.1, training=True)
-    norm_input = dropout_out + input2
-    mean = norm_input.mean(2, keepdim=True)
-    diff = norm_input - mean
-    diff_sq = diff * diff
-    var = diff_sq.mean(2, keepdim=True)
-    pre_shift_scale_norm_output = (norm_input - mean) / torch.sqrt(var + 1e-12)
-    norm_output = weight * pre_shift_scale_norm_output + bias2
-    return norm_output
-
-
-######################################################################
-# Now, instead of passing our function to TorchScript, we will pass it
-# to FuncTorch’s optimization pass.
-#
-
-
-# Optimize the model with FuncTorch tracing and the memory efficiency
-# optimization pass
-memory_efficient_primitive_definition = memory_efficient_fusion(
-    primitive_definition_for_memory_efficient_fusion
-)
-
-# Profile memory efficient primitive definition
-func = functools.partial(
-    memory_efficient_primitive_definition, input1, input2, weight, bias1, bias2
-)
-profile_workload(
-    func,
-    grad_output,
-    iteration_count=100,
-    label="FuncTorch - Primitive definition",
-)
-
-######################################################################
-# This recovers even more speed, but it’s still not as fast as
-# TorchScripts original performance with the composite definition.
-# However, this is still faster than running this new definition
-# without nvFuser, and is still faster than the composite definition
-# without nvFuser.
-#
-# .. figure:: /_static/img/nvfuser_intro/nvfuser_tutorial_5.png
-#
-# .. note:: FuncTorch’s memory efficient pass is experimental and still
-#           actively in development.
-#           Future versions of the API are expected to achieve performance
-#           closer to that of TorchScript with the composite definition.
-#
-# .. note:: FuncTorch’s memory efficient pass specializes on the shapes of
-#           the inputs to the function. If new inputs are provided with
-#           different shapes, then you need to construct a new function
-#           using `memory_efficient_fusion` and apply it to the new inputs.
-
-
-######################################################################
-# Transformer Block With a Novel Normalization
-# ----------------------------------------------------
-# The ability to quickly execute chains of simple operations is
-# important as not every operation has a composite operation defined
-# in PyTorch. Previously, this meant researchers either had to define
-# an entirely new operation in PyTorch – which takes a lot of time and
-# knowledge of the lower-level PyTorch code as well as parallel
-# programming – or writing the operation in simpler PyTorch ops and
-# settling for poor performance. For example, let's replace LayerNorm
-# in our example with RMSNorm. Even though RMSNorm is a bit simpler
-# than LayerNorm, it doesn’t have an existing compound operation in
-# PyTorch. See the `Root Mean Square Layer Normalization <https://doi.org/10.48550/arXiv.1910.07467>`__ paper for more information about RMSNorm.
-# As before, we’ll define our new transformer block with
-# primitive PyTorch operations.
-#
-
-
-def with_rms_norm(
-    input1: torch.Tensor,
-    input2: torch.Tensor,
-    weight: torch.Tensor,
-    bias: torch.Tensor,
-    normalization_axis: int,
-    dropout_prob: float,
-    keepdim: bool,
-) -> torch.Tensor:
-    bias_out = input1 + bias
-    dropout_out = F.dropout(bias_out, dropout_prob, training=True)
-    norm_input = dropout_out + input2
-    var = norm_input.mul(norm_input).mean(normalization_axis, keepdim)
-    pre_shift_scale_norm_output = norm_input / torch.sqrt(var + 1e-12)
-    norm_output = weight * pre_shift_scale_norm_output
-    return norm_output
-
-
-######################################################################
-# As before, we’ll get a baseline by running PyTorch without nvFuser.
-#
-
-# Profile rms_norm
-func = functools.partial(
-    with_rms_norm,
-    input1,
-    input2,
-    weight,
-    bias1,
-    normalization_axis=2,
-    dropout_prob=0.1,
-    keepdim=True,
-)
-profile_workload(func, grad_output, iteration_count=100, label="Eager Mode - RMS Norm")
-
-######################################################################
-# With nvFuser through TorchScript.
-#
-
-# Profile scripted rms_norm
-scripted_with_rms_norm = torch.jit.script(with_rms_norm)
-func = functools.partial(
-    scripted_with_rms_norm,
-    input1,
-    input2,
-    weight,
-    bias1,
-    normalization_axis=2,
-    dropout_prob=0.1,
-    keepdim=True,
-)
-profile_workload(func, grad_output, iteration_count=100, label="TorchScript - RMS Norm")
-
-######################################################################
-# With nvFuser through Functorch.
-#
-
-
-def with_rms_norm_for_memory_efficient_fusion(
-    input1: torch.Tensor, input2: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor
-) -> torch.Tensor:
-    bias_out = input1 + bias
-    dropout_out = torch.nn.functional.dropout(bias_out, 0.1)
-    norm_input = dropout_out + input2
-    var = norm_input.mul(norm_input).mean(2, keepdim=True)
-    pre_shift_scale_norm_output = norm_input / torch.sqrt(var + 1e-12)
-    norm_output = weight * pre_shift_scale_norm_output
-    return norm_output
-
-
-# Profile memory efficient rms_norm
-memory_efficient_rms_norm = memory_efficient_fusion(
-    with_rms_norm_for_memory_efficient_fusion
-)
-func = functools.partial(memory_efficient_rms_norm, input1, input2, weight, bias1)
-profile_workload(func, grad_output, iteration_count=100, label="FuncTorch - RMS Norm")
-
-######################################################################
-# .. figure:: /_static/img/nvfuser_intro/nvfuser_tutorial_6.png
-#
-# Since RMSNorm is simpler than LayerNorm the performance of our new
-# transformer block is a little higher than the primitive definition
-# without nvFuser (354 iterations per second compared with 260
-# iterations per second). With TorchScript, the iterations per second
-# increases by 2.68x and 3.36x to 952 iterations per second and 1,191
-# iterations per second with TorchScript and FuncTorch’s memory
-# efficient optimization pass, respectively. The performance of this
-# new operation nearly matches the performance of the composite Layer
-# Norm definition with TorchScript.
-#
-# nvFuser is here to provide the ability to define novel operations in
-# simple PyTorch and get performance that’s close to a highly optimized
-# composite operation in PyTorch. We believe this will enable research
-# into novel network topologies without paying for sometimes devastating
-# effects on speed of training. nvFuser provides this unique ability as
-# it’s able to analyze users’ programs to provide performance as fast as a
-# highly hand tuned implementation, regardless of how the operations are
-# defined. nvFuser still cannot support every operation in PyTorch,
-# however its capabilities will continue to grow over time.
diff --git a/intermediate_source/nvfuser_intro_tutorial.rst b/intermediate_source/nvfuser_intro_tutorial.rst
new file mode 100644
index 000000000..9f6123b97
--- /dev/null
+++ b/intermediate_source/nvfuser_intro_tutorial.rst
@@ -0,0 +1,8 @@
+Getting Started - Accelerate Your Scripts with nvFuser
+======================================================
+
+This tutorial has been deprecated. Redirecting to homepage in 3 seconds...
+
+.. raw:: html
+
+   <meta http-equiv="Refresh" content="3; url='https://tutorials.pytorch.kr'" />
diff --git a/intermediate_source/parametrizations.py b/intermediate_source/parametrizations.py
index 0f71a0aaf..086a43006 100644
--- a/intermediate_source/parametrizations.py
+++ b/intermediate_source/parametrizations.py
@@ -19,7 +19,7 @@
 This approach proposes to decouple the learning of the parameters from the
 learning of their norms.  To do so, the parameter is divided by its
 `Frobenius norm <https://en.wikipedia.org/wiki/Matrix_norm#Frobenius_norm>`_
-and a separate parameter encoding its norm is learnt.
+and a separate parameter encoding its norm is learned.
 A similar regularization was proposed for GANs under the name of
 "`spectral normalization <https://pytorch.org/docs/stable/generated/torch.nn.utils.spectral_norm.html>`_". This method
 controls the Lipschitz constant of the network by dividing its parameters by
@@ -84,7 +84,7 @@ def forward(self, x):
 # 2) It does not separate the layer and the parametrization.  If the parametrization were
 #    more difficult, we would have to rewrite its code for each layer that we want to use it
 #    in.
-# 3) It recomputes the parametrization everytime we use the layer. If we use the layer
+# 3) It recomputes the parametrization every time we use the layer. If we use the layer
 #    several times during the forward pass, (imagine the recurrent kernel of an RNN), it
 #    would compute the same ``A`` every time that the layer is called.
 #
@@ -258,8 +258,8 @@ def forward(self, X):
 print((torch.symeig(X).eigenvalues > 0.).all())  # X is positive definite
 
 ###############################################################################
-# Intializing parametrizations
-# ----------------------------
+# Initializing parametrizations
+# -----------------------------
 #
 # Parametrizations come with a mechanism to initialize them. If we implement a method
 # ``right_inverse`` with signature
@@ -327,7 +327,7 @@ def right_inverse(self, A):
 ###############################################################################
 # The name of this method comes from the fact that we would often expect
 # that ``forward(right_inverse(X)) == X``. This is a direct way of rewriting that
-# the forward afer the initalization with value ``X`` should return the value ``X``.
+# the forward after the initialization with value ``X`` should return the value ``X``.
 # This constraint is not strongly enforced in practice. In fact, at times, it might be of
 # interest to relax this relation. For example, consider the following implementation
 # of a randomized pruning method:
diff --git a/intermediate_source/per_sample_grads.py b/intermediate_source/per_sample_grads.py
new file mode 100644
index 000000000..c42367922
--- /dev/null
+++ b/intermediate_source/per_sample_grads.py
@@ -0,0 +1,226 @@
+# -*- coding: utf-8 -*-
+"""
+Per-sample-gradients
+====================
+
+What is it?
+-----------
+
+Per-sample-gradient computation is computing the gradient for each and every
+sample in a batch of data. It is a useful quantity in differential privacy,
+meta-learning, and optimization research.
+
+.. note::
+
+   This tutorial requires PyTorch 2.0.0 or later.
+
+"""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+torch.manual_seed(0)
+
+# Here's a simple CNN and loss function:
+
+class SimpleCNN(nn.Module):
+    def __init__(self):
+        super(SimpleCNN, self).__init__()
+        self.conv1 = nn.Conv2d(1, 32, 3, 1)
+        self.conv2 = nn.Conv2d(32, 64, 3, 1)
+        self.fc1 = nn.Linear(9216, 128)
+        self.fc2 = nn.Linear(128, 10)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = F.relu(x)
+        x = self.conv2(x)
+        x = F.relu(x)
+        x = F.max_pool2d(x, 2)
+        x = torch.flatten(x, 1)
+        x = self.fc1(x)
+        x = F.relu(x)
+        x = self.fc2(x)
+        output = F.log_softmax(x, dim=1)
+        output = x
+        return output
+
+def loss_fn(predictions, targets):
+    return F.nll_loss(predictions, targets)
+
+
+######################################################################
+# Let’s generate a batch of dummy data and pretend that we’re working with an MNIST dataset.
+# The dummy images are 28 by 28 and we use a minibatch of size 64.
+
+device = 'cuda'
+
+num_models = 10
+batch_size = 64
+data = torch.randn(batch_size, 1, 28, 28, device=device)
+
+targets = torch.randint(10, (64,), device=device)
+
+######################################################################
+# In regular model training, one would forward the minibatch through the model,
+# and then call .backward() to compute gradients.  This would generate an
+# 'average' gradient of the entire mini-batch:
+
+model = SimpleCNN().to(device=device)
+predictions = model(data)  # move the entire mini-batch through the model
+
+loss = loss_fn(predictions, targets)
+loss.backward()  # back propagate the 'average' gradient of this mini-batch
+
+######################################################################
+# In contrast to the above approach, per-sample-gradient computation is
+# equivalent to:
+#
+# - for each individual sample of the data, perform a forward and a backward
+#   pass to get an individual (per-sample) gradient.
+
+def compute_grad(sample, target):
+    sample = sample.unsqueeze(0)  # prepend batch dimension for processing
+    target = target.unsqueeze(0)
+
+    prediction = model(sample)
+    loss = loss_fn(prediction, target)
+
+    return torch.autograd.grad(loss, list(model.parameters()))
+
+
+def compute_sample_grads(data, targets):
+    """ manually process each sample with per sample gradient """
+    sample_grads = [compute_grad(data[i], targets[i]) for i in range(batch_size)]
+    sample_grads = zip(*sample_grads)
+    sample_grads = [torch.stack(shards) for shards in sample_grads]
+    return sample_grads
+
+per_sample_grads = compute_sample_grads(data, targets)
+
+######################################################################
+# ``sample_grads[0]`` is the per-sample-grad for model.conv1.weight.
+# ``model.conv1.weight.shape`` is ``[32, 1, 3, 3]``; notice how there is one
+# gradient, per sample, in the batch for a total of 64.
+
+print(per_sample_grads[0].shape)
+
+######################################################################
+# Per-sample-grads, *the efficient way*, using function transforms
+# ----------------------------------------------------------------
+# We can compute per-sample-gradients efficiently by using function transforms.
+#
+# The ``torch.func`` function transform API transforms over functions.
+# Our strategy is to define a function that computes the loss and then apply
+# transforms to construct a function that computes per-sample-gradients.
+#
+# We'll use the ``torch.func.functional_call`` function to treat an ``nn.Module``
+# like a function.
+#
+# First, let’s extract the state from ``model`` into two dictionaries,
+# parameters and buffers. We'll be detaching them because we won't use
+# regular PyTorch autograd (e.g. Tensor.backward(), torch.autograd.grad).
+
+from torch.func import functional_call, vmap, grad
+
+params = {k: v.detach() for k, v in model.named_parameters()}
+buffers = {k: v.detach() for k, v in model.named_buffers()}
+
+######################################################################
+# Next, let's define a function to compute the loss of the model given a
+# single input rather than a batch of inputs. It is important that this
+# function accepts the parameters, the input, and the target, because we will
+# be transforming over them.
+#
+# Note - because the model was originally written to handle batches, we’ll
+# use ``torch.unsqueeze`` to add a batch dimension.
+
+def compute_loss(params, buffers, sample, target):
+    batch = sample.unsqueeze(0)
+    targets = target.unsqueeze(0)
+
+    predictions = functional_call(model, (params, buffers), (batch,))
+    loss = loss_fn(predictions, targets)
+    return loss
+
+######################################################################
+# Now, let’s use the ``grad`` transform to create a new function that computes
+# the gradient with respect to the first argument of ``compute_loss``
+# (i.e. the ``params``).
+
+ft_compute_grad = grad(compute_loss)
+
+######################################################################
+# The ``ft_compute_grad`` function computes the gradient for a single
+# (sample, target) pair. We can use ``vmap`` to get it to compute the gradient
+# over an entire batch of samples and targets. Note that
+# ``in_dims=(None, None, 0, 0)`` because we wish to map ``ft_compute_grad`` over
+# the 0th dimension of the data and targets, and use the same ``params`` and
+# buffers for each.
+
+ft_compute_sample_grad = vmap(ft_compute_grad, in_dims=(None, None, 0, 0))
+
+######################################################################
+# Finally, let's used our transformed function to compute per-sample-gradients:
+
+ft_per_sample_grads = ft_compute_sample_grad(params, buffers, data, targets)
+
+######################################################################
+# we can double check that the results using ``grad`` and ``vmap`` match the
+# results of hand processing each one individually:
+
+for per_sample_grad, ft_per_sample_grad in zip(per_sample_grads, ft_per_sample_grads.values()):
+    assert torch.allclose(per_sample_grad, ft_per_sample_grad, atol=3e-3, rtol=1e-5)
+
+######################################################################
+# A quick note: there are limitations around what types of functions can be
+# transformed by ``vmap``. The best functions to transform are ones that are pure
+# functions: a function where the outputs are only determined by the inputs,
+# and that have no side effects (e.g. mutation). ``vmap`` is unable to handle
+# mutation of arbitrary Python data structures, but it is able to handle many
+# in-place PyTorch operations.
+#
+# Performance comparison
+# ----------------------
+#
+# Curious about how the performance of ``vmap`` compares?
+#
+# Currently the best results are obtained on newer GPU's such as the A100
+# (Ampere) where we've seen up to 25x speedups on this example, but here are
+# some results on our build machines:
+
+def get_perf(first, first_descriptor, second, second_descriptor):
+    """takes torch.benchmark objects and compares delta of second vs first."""
+    second_res = second.times[0]
+    first_res = first.times[0]
+
+    gain = (first_res-second_res)/first_res
+    if gain < 0: gain *=-1 
+    final_gain = gain*100
+
+    print(f"Performance delta: {final_gain:.4f} percent improvement with {first_descriptor} ")
+
+from torch.utils.benchmark import Timer
+
+without_vmap = Timer(stmt="compute_sample_grads(data, targets)", globals=globals())
+with_vmap = Timer(stmt="ft_compute_sample_grad(params, buffers, data, targets)",globals=globals())
+no_vmap_timing = without_vmap.timeit(100)
+with_vmap_timing = with_vmap.timeit(100)
+
+print(f'Per-sample-grads without vmap {no_vmap_timing}')
+print(f'Per-sample-grads with vmap {with_vmap_timing}')
+
+get_perf(with_vmap_timing, "vmap", no_vmap_timing, "no vmap")
+
+######################################################################
+# There are other optimized solutions (like in https://github.com/pytorch/opacus)
+# to computing per-sample-gradients in PyTorch that also perform better than
+# the naive method. But it’s cool that composing ``vmap`` and ``grad`` give us a
+# nice speedup.
+#
+# In general, vectorization with ``vmap`` should be faster than running a function
+# in a for-loop and competitive with manual batching. There are some exceptions
+# though, like if we haven’t implemented the ``vmap`` rule for a particular
+# operation or if the underlying kernels weren’t optimized for older hardware
+# (GPUs). If you see any of these cases, please let us know by opening an issue
+# at on GitHub.
diff --git a/intermediate_source/pipeline_tutorial.py b/intermediate_source/pipeline_tutorial.py
index 1abd4217c..973f8ebb1 100644
--- a/intermediate_source/pipeline_tutorial.py
+++ b/intermediate_source/pipeline_tutorial.py
@@ -129,16 +129,17 @@ def forward(self, x):
 # 알파벳을 길이가 6인 4개의 시퀀스로 나눌 수 있습니다:
 #
 # .. math::
-#   \begin{bmatrix}
-#   \text{A} & \text{B} & \text{C} & \ldots & \text{X} & \text{Y} & \text{Z}
-#   \end{bmatrix}
-#   \Rightarrow
-#   \begin{bmatrix}
-#   \begin{bmatrix}\text{A} \\ \text{B} \\ \text{C} \\ \text{D} \\ \text{E} \\ \text{F}\end{bmatrix} &
-#   \begin{bmatrix}\text{G} \\ \text{H} \\ \text{I} \\ \text{J} \\ \text{K} \\ \text{L}\end{bmatrix} &
-#   \begin{bmatrix}\text{M} \\ \text{N} \\ \text{O} \\ \text{P} \\ \text{Q} \\ \text{R}\end{bmatrix} &
-#   \begin{bmatrix}\text{S} \\ \text{T} \\ \text{U} \\ \text{V} \\ \text{W} \\ \text{X}\end{bmatrix}
-#   \end{bmatrix}
+#
+#    \begin{bmatrix}
+#    \text{A} & \text{B} & \text{C} & \ldots & \text{X} & \text{Y} & \text{Z}
+#    \end{bmatrix}
+#    \Rightarrow
+#    \begin{bmatrix}
+#    \begin{bmatrix}\text{A} \\ \text{B} \\ \text{C} \\ \text{D} \\ \text{E} \\ \text{F}\end{bmatrix} &
+#    \begin{bmatrix}\text{G} \\ \text{H} \\ \text{I} \\ \text{J} \\ \text{K} \\ \text{L}\end{bmatrix} &
+#    \begin{bmatrix}\text{M} \\ \text{N} \\ \text{O} \\ \text{P} \\ \text{Q} \\ \text{R}\end{bmatrix} &
+#    \begin{bmatrix}\text{S} \\ \text{T} \\ \text{U} \\ \text{V} \\ \text{W} \\ \text{X}\end{bmatrix}
+#    \end{bmatrix}
 #
 # 이 열들은 모델에 의해서 독립적으로 취급되며, 이는
 # ``G`` 와 ``F`` 의 의존성이 학습될 수 없다는 것을 의미하지만, 더 효율적인
@@ -167,11 +168,11 @@ def data_process(raw_text_iter):
 device = torch.device("cuda")
 
 def batchify(data, bsz):
-    # 데이터셋을 bsz 파트들로 나눕니다.
+    # 데이터셋을 ``bsz`` 파트들로 나눕니다.
     nbatch = data.size(0) // bsz
     # 깔끔하게 나누어 떨어지지 않는 추가적인 부분(나머지)은 잘라냅니다.
     data = data.narrow(0, 0, nbatch * bsz)
-    # 데이터를 bsz 배치들로 동일하게 나눕니다.
+    # 데이터를 ``bsz`` 배치들로 동일하게 나눕니다.
     data = data.view(bsz, -1).t().contiguous()
     return data.to(device)
 
@@ -238,9 +239,9 @@ def get_batch(source, i):
 
 ntokens = len(vocab) # 단어 사전(어휘집)의 크기
 emsize = 4096 # 임베딩 차원
-nhid = 4096 # nn.TransformerEncoder 에서 순전파(feedforward) 신경망 모델의 차원
-nlayers = 12 # nn.TransformerEncoder 내부의 nn.TransformerEncoderLayer 개수
-nhead = 16 # multiheadattention 모델의 헤드 개수
+nhid = 4096 # ``nn.TransformerEncoder`` 에서 순전파(feedforward) 신경망 모델의 차원
+nlayers = 12 # ``nn.TransformerEncoder`` 내부의 ``nn.TransformerEncoderLayer`` 개수
+nhead = 16 # Multihead Attention 모델의 헤드 개수
 dropout = 0.2 # dropout 값
 
 from torch.distributed import rpc
@@ -296,7 +297,7 @@ def get_total_params(module: torch.nn.Module):
 
 ######################################################################
 # 모델 실행하기
-# -------------
+# ---------------
 #
 
 
@@ -397,7 +398,7 @@ def evaluate(eval_model, data_source):
 
 ######################################################################
 # 평가 데이터셋으로 모델 평가하기
-# -------------------------------
+# ---------------------------------
 #
 
 
diff --git a/intermediate_source/process_group_cpp_extension_tutorial.rst b/intermediate_source/process_group_cpp_extension_tutorial.rst
index 15eb23bf3..236a9a410 100644
--- a/intermediate_source/process_group_cpp_extension_tutorial.rst
+++ b/intermediate_source/process_group_cpp_extension_tutorial.rst
@@ -4,14 +4,14 @@ Customize Process Group Backends Using Cpp Extensions
 **Author**: `Feng Tian <https://github.com/ftian1>`__, `Shen Li <https://mrshenli.github.io/>`__, `Min Si <https://minsii.github.io/>`__
 
 .. note::
-   |edit| View and edit this tutorial in `github <https://github.com/pytorch/tutorials/blob/master/intermediate_source/process_group_cpp_extension_tutorial.rst>`__.
+   |edit| View and edit this tutorial in `github <https://github.com/pytorch/tutorials/blob/main/intermediate_source/process_group_cpp_extension_tutorial.rst>`__.
 
 Prerequisites:
 
 -  `PyTorch Distributed Overview <../beginner/dist_overview.html>`__
 -  `PyTorch Collective Communication Package <https://pytorch.org/docs/stable/distributed.html>`__
 -  `PyTorch Cpp Extension <https://pytorch.org/docs/stable/cpp_extension.html>`__
--  `Writing Distributed Applications with PyTorch <https://pytorch.org/tutorials/intermediate/dist_tuto.html>`__
+-  `Writing Distributed Applications with PyTorch <https://tutorials.pytorch.kr/intermediate/dist_tuto.html>`__
 
 This tutorial demonstrates how to implement a custom ``ProcessGroup``
 backend and plug that into
@@ -101,7 +101,7 @@ repository for the full implementation.
         // The collective communication APIs without a custom implementation
         // will error out if invoked by application code.
     };
-    
+
     class WorkDummy : public Work {
       public:
         WorkDummy(
diff --git a/intermediate_source/pruning_tutorial.py b/intermediate_source/pruning_tutorial.py
index 5249d8bf5..c4760c6fd 100644
--- a/intermediate_source/pruning_tutorial.py
+++ b/intermediate_source/pruning_tutorial.py
@@ -2,6 +2,7 @@
 """
 가지치기 기법(Pruning) 튜토리얼
 =====================================
+
 **저자**: `Michela Paganini <https://github.com/mickypaganini>`_
 **번역** : `안상준 <https://github.com/Justin-A>`_
 
@@ -15,7 +16,7 @@
 하위 신경망 모델과 파라미터값들의 초기화가 운이 좋게 잘 된 케이스를 바탕으로
 ("`lottery tickets <https://arxiv.org/abs/1803.03635>`_") 신경망 구조를 찾는 기술들에 대해 반대 의견을 제시하기도 합니다.
 
-이번 튜토리얼에서는, ``torch.nn.utils.prune`` 을 이용하여 여러분이 설계한 딥러닝 모델에 대해 가지치기 기법을 적용해보는 것을 배워보고,
+이번 튜토리얼에서는, ``torch.nn.utils.prune`` 을 사용하여 여러분이 설계한 딥러닝 모델에 대해 가지치기 기법을 적용해보는 것을 배워보고,
 심화적으로 여러분의 맞춤형 가지치기 기법을 구현하는 방법에 대해 배워보도록 하겠습니다.
 
 요구사항
@@ -31,8 +32,8 @@
 ######################################################################
 # 딥러닝 모델 생성
 # -----------------------
-# 이번 튜토리얼에서는, 얀 르쿤 교수님의 연구진들이 1998년도에 발표한 ``LeNet
-# <http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf>`` 의 모델 구조를 이용합니다.
+# 이번 튜토리얼에서는, 얀 르쿤 교수님의 연구진들이 1998년도에 발표한 `LeNet
+# <http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf>`_ 의 모델 구조를 이용합니다.
 
 
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -41,9 +42,9 @@ class LeNet(nn.Module):
     def __init__(self):
         super(LeNet, self).__init__()
         # 1개 채널 수의 이미지를 입력값으로 이용하여 6개 채널 수의 출력값을 계산하는 방식
-        # Convolution 연산을 진행하는 커널(필터)의 크기는 3x3 을 이용
-        self.conv1 = nn.Conv2d(1, 6, 3)
-        self.conv2 = nn.Conv2d(6, 16, 3)
+        # Convolution 연산을 진행하는 커널(필터)의 크기는 5x5 을 이용
+        self.conv1 = nn.Conv2d(1, 6, 5)
+        self.conv2 = nn.Conv2d(6, 16, 5)
         self.fc1 = nn.Linear(16 * 5 * 5, 120)  # Convolution 연산 결과 5x5 크기의 16 채널 수의 이미지
         self.fc2 = nn.Linear(120, 84)
         self.fc3 = nn.Linear(84, 10)
diff --git a/intermediate_source/reinforcement_ppo.py b/intermediate_source/reinforcement_ppo.py
new file mode 100644
index 000000000..dc6eca949
--- /dev/null
+++ b/intermediate_source/reinforcement_ppo.py
@@ -0,0 +1,702 @@
+# -*- coding: utf-8 -*-
+"""
+Reinforcement Learning (PPO) with TorchRL Tutorial
+==================================================
+**Author**: `Vincent Moens <https://github.com/vmoens>`_
+
+This tutorial demonstrates how to use PyTorch and :py:mod:`torchrl` to train a parametric policy
+network to solve the Inverted Pendulum task from the `OpenAI-Gym/Farama-Gymnasium
+control library <https://github.com/Farama-Foundation/Gymnasium>`__.
+
+.. figure:: /_static/img/invpendulum.gif
+   :alt: Inverted pendulum
+
+   Inverted pendulum
+
+Key learnings:
+
+- How to create an environment in TorchRL, transform its outputs, and collect data from this environment;
+- How to make your classes talk to each other using :class:`tensordict.TensorDict`;
+- The basics of building your training loop with TorchRL:
+
+  - How to compute the advantage signal for policy gradient methods;
+  - How to create a stochastic policy using a probabilistic neural network;
+  - How to create a dynamic replay buffer and sample from it without repetition.
+
+We will cover six crucial components of TorchRL:
+
+* `environments <https://pytorch.org/rl/reference/envs.html>`__
+* `transforms <https://pytorch.org/rl/reference/envs.html#transforms>`__
+* `models (policy and value function) <https://pytorch.org/rl/reference/modules.html>`__
+* `loss modules <https://pytorch.org/rl/reference/objectives.html>`__
+* `data collectors <https://pytorch.org/rl/reference/collectors.html>`__
+* `replay buffers <https://pytorch.org/rl/reference/data.html#replay-buffers>`__
+
+"""
+
+######################################################################
+# If you are running this in Google Colab, make sure you install the following dependencies:
+#
+# .. code-block:: bash
+#
+#    !pip3 install torchrl
+#    !pip3 install gym[mujoco]
+#    !pip3 install tqdm
+#
+# Proximal Policy Optimization (PPO) is a policy-gradient algorithm where a
+# batch of data is being collected and directly consumed to train the policy to maximise
+# the expected return given some proximality constraints. You can think of it
+# as a sophisticated version of `REINFORCE <https://link.springer.com/content/pdf/10.1007/BF00992696.pdf>`_,
+# the foundational policy-optimization algorithm. For more information, see the
+# `Proximal Policy Optimization Algorithms <https://arxiv.org/abs/1707.06347>`_ paper.
+#
+# PPO is usually regarded as a fast and efficient method for online, on-policy
+# reinforcement algorithm. TorchRL provides a loss-module that does all the work
+# for you, so that you can rely on this implementation and focus on solving your
+# problem rather than re-inventing the wheel every time you want to train a policy.
+#
+# For completeness, here is a brief overview of what the loss computes, even though
+# this is taken care of by our :class:`ClipPPOLoss` module—the algorithm works as follows:
+# 1. we will sample a batch of data by playing the
+# policy in the environment for a given number of steps.
+# 2. Then, we will perform a given number of optimization steps with random sub-samples of this batch using
+# a clipped version of the REINFORCE loss.
+# 3. The clipping will put a pessimistic bound on our loss: lower return estimates will
+# be favored compared to higher ones.
+# The precise formula of the loss is:
+#
+# .. math::
+#
+#     L(s,a,\theta_k,\theta) = \min\left(
+#     \frac{\pi_{\theta}(a|s)}{\pi_{\theta_k}(a|s)}  A^{\pi_{\theta_k}}(s,a), \;\;
+#     g(\epsilon, A^{\pi_{\theta_k}}(s,a))
+#     \right),
+#
+# There are two components in that loss: in the first part of the minimum operator,
+# we simply compute an importance-weighted version of the REINFORCE loss (for example, a
+# REINFORCE loss that we have corrected for the fact that the current policy
+# configuration lags the one that was used for the data collection).
+# The second part of that minimum operator is a similar loss where we have clipped
+# the ratios when they exceeded or were below a given pair of thresholds.
+#
+# This loss ensures that whether the advantage is positive or negative, policy
+# updates that would produce significant shifts from the previous configuration
+# are being discouraged.
+#
+# This tutorial is structured as follows:
+#
+# 1. First, we will define a set of hyperparameters we will be using for training.
+#
+# 2. Next, we will focus on creating our environment, or simulator, using TorchRL's
+#    wrappers and transforms.
+#
+# 3. Next, we will design the policy network and the value model,
+#    which is indispensable to the loss function. These modules will be used
+#    to configure our loss module.
+#
+# 4. Next, we will create the replay buffer and data loader.
+#
+# 5. Finally, we will run our training loop and analyze the results.
+#
+# Throughout this tutorial, we'll be using the :mod:`tensordict` library.
+# :class:`tensordict.TensorDict` is the lingua franca of TorchRL: it helps us abstract
+# what a module reads and writes and care less about the specific data
+# description and more about the algorithm itself.
+#
+
+from collections import defaultdict
+
+import matplotlib.pyplot as plt
+import torch
+from tensordict.nn import TensorDictModule
+from tensordict.nn.distributions import NormalParamExtractor
+from torch import nn
+from torchrl.collectors import SyncDataCollector
+from torchrl.data.replay_buffers import ReplayBuffer
+from torchrl.data.replay_buffers.samplers import SamplerWithoutReplacement
+from torchrl.data.replay_buffers.storages import LazyTensorStorage
+from torchrl.envs import (
+    Compose,
+    DoubleToFloat,
+    ObservationNorm,
+    StepCounter,
+    TransformedEnv,
+)
+from torchrl.envs.libs.gym import GymEnv
+from torchrl.envs.utils import check_env_specs, set_exploration_mode
+from torchrl.modules import ProbabilisticActor, TanhNormal, ValueOperator
+from torchrl.objectives import ClipPPOLoss
+from torchrl.objectives.value import GAE
+from tqdm import tqdm
+
+######################################################################
+# Define Hyperparameters
+# ----------------------
+#
+# We set the hyperparameters for our algorithm. Depending on the resources
+# available, one may choose to execute the policy on GPU or on another
+# device.
+# The ``frame_skip`` will control how for how many frames is a single
+# action being executed. The rest of the arguments that count frames
+# must be corrected for this value (since one environment step will
+# actually return ``frame_skip`` frames).
+#
+
+device = "cpu" if not torch.has_cuda else "cuda:0"
+num_cells = 256  # number of cells in each layer
+lr = 3e-4
+max_grad_norm = 1.0
+
+######################################################################
+# Data collection parameters
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# When collecting data, we will be able to choose how big each batch will be
+# by defining a ``frames_per_batch`` parameter. We will also define how many
+# frames (such as the number of interactions with the simulator) we will allow ourselves to
+# use. In general, the goal of an RL algorithm is to learn to solve the task
+# as fast as it can in terms of environment interactions: the lower the ``total_frames``
+# the better.
+# We also define a ``frame_skip``: in some contexts, repeating the same action
+# multiple times over the course of a trajectory may be beneficial as it makes
+# the behavior more consistent and less erratic. However, "skipping"
+# too many frames will hamper training by reducing the reactivity of the actor
+# to observation changes.
+#
+# When using ``frame_skip`` it is good practice to
+# correct the other frame counts by the number of frames we are grouping
+# together. If we configure a total count of X frames for training but
+# use a ``frame_skip`` of Y, we will be actually collecting ``XY`` frames in total
+# which exceeds our predefined budget.
+#
+frame_skip = 1
+frames_per_batch = 1000 // frame_skip
+# For a complete training, bring the number of frames up to 1M
+total_frames = 50_000 // frame_skip
+
+######################################################################
+# PPO parameters
+# ~~~~~~~~~~~~~~
+#
+# At each data collection (or batch collection) we will run the optimization
+# over a certain number of *epochs*, each time consuming the entire data we just
+# acquired in a nested training loop. Here, the ``sub_batch_size`` is different from the
+# ``frames_per_batch`` here above: recall that we are working with a "batch of data"
+# coming from our collector, which size is defined by ``frames_per_batch``, and that
+# we will further split in smaller sub-batches during the inner training loop.
+# The size of these sub-batches is controlled by ``sub_batch_size``.
+#
+sub_batch_size = 64  # cardinality of the sub-samples gathered from the current data in the inner loop
+num_epochs = 10  # optimization steps per batch of data collected
+clip_epsilon = (
+    0.2  # clip value for PPO loss: see the equation in the intro for more context.
+)
+gamma = 0.99
+lmbda = 0.95
+entropy_eps = 1e-4
+
+######################################################################
+# Define an environment
+# ---------------------
+#
+# In RL, an *environment* is usually the way we refer to a simulator or a
+# control system. Various libraries provide simulation environments for reinforcement
+# learning, including Gymnasium (previously OpenAI Gym), DeepMind Control Suite, and
+# many others.
+# As a general library, TorchRL's goal is to provide an interchangeable interface
+# to a large panel of RL simulators, allowing you to easily swap one environment
+# with another. For example, creating a wrapped gym environment can be achieved with few characters:
+#
+
+base_env = GymEnv("InvertedDoublePendulum-v4", device=device, frame_skip=frame_skip)
+
+######################################################################
+# There are a few things to notice in this code: first, we created
+# the environment by calling the ``GymEnv`` wrapper. If extra keyword arguments
+# are passed, they will be transmitted to the ``gym.make`` method, hence covering
+# the most common environment construction commands.
+# Alternatively, one could also directly create a gym environment using ``gym.make(env_name, **kwargs)``
+# and wrap it in a `GymWrapper` class.
+#
+# Also the ``device`` argument: for gym, this only controls the device where
+# input action and observed states will be stored, but the execution will always
+# be done on CPU. The reason for this is simply that gym does not support on-device
+# execution, unless specified otherwise. For other libraries, we have control over
+# the execution device and, as much as we can, we try to stay consistent in terms of
+# storing and execution backends.
+#
+# Transforms
+# ~~~~~~~~~~
+#
+# We will append some transforms to our environments to prepare the data for
+# the policy. In Gym, this is usually achieved via wrappers. TorchRL takes a different
+# approach, more similar to other pytorch domain libraries, through the use of transforms.
+# To add transforms to an environment, one should simply wrap it in a :class:`TransformedEnv`
+# instance, and append the sequence of transforms to it. The transformed environment will inherit
+# the device and meta-data of the wrapped environment, and transform these depending on the sequence
+# of transforms it contains.
+#
+# Normalization
+# ~~~~~~~~~~~~~
+#
+# The first to encode is a normalization transform.
+# As a rule of thumbs, it is preferable to have data that loosely
+# match a unit Gaussian distribution: to obtain this, we will
+# run a certain number of random steps in the environment and compute
+# the summary statistics of these observations.
+#
+# We'll append two other transforms: the :class:`DoubleToFloat` transform will
+# convert double entries to single-precision numbers, ready to be read by the
+# policy. The :class:`StepCounter` transform will be used to count the steps before
+# the environment is terminated. We will use this measure as a supplementary measure
+# of performance.
+#
+# As we will see later, many of the TorchRL's classes rely on :class:`tensordict.TensorDict`
+# to communicate. You could think of it as a python dictionary with some extra
+# tensor features. In practice, this means that many modules we will be working
+# with need to be told what key to read (``in_keys``) and what key to write
+# (``out_keys``) in the ``tensordict`` they will receive. Usually, if ``out_keys``
+# is omitted, it is assumed that the ``in_keys`` entries will be updated
+# in-place. For our transforms, the only entry we are interested in is referred
+# to as ``"observation"`` and our transform layers will be told to modify this
+# entry and this entry only:
+#
+
+env = TransformedEnv(
+    base_env,
+    Compose(
+        # normalize observations
+        ObservationNorm(in_keys=["observation"]),
+        DoubleToFloat(in_keys=["observation"]),
+        StepCounter(),
+    ),
+)
+
+######################################################################
+# As you may have noticed, we have created a normalization layer but we did not
+# set its normalization parameters. To do this, :class:`ObservationNorm` can
+# automatically gather the summary statistics of our environment:
+#
+env.transform[0].init_stats(num_iter=1000, reduce_dim=0, cat_dim=0)
+
+######################################################################
+# The :class:`ObservationNorm` transform has now been populated with a
+# location and a scale that will be used to normalize the data.
+#
+# Let us do a little sanity check for the shape of our summary stats:
+#
+print("normalization constant shape:", env.transform[0].loc.shape)
+
+######################################################################
+# An environment is not only defined by its simulator and transforms, but also
+# by a series of metadata that describe what can be expected during its
+# execution.
+# For efficiency purposes, TorchRL is quite stringent when it comes to
+# environment specs, but you can easily check that your environment specs are
+# adequate.
+# In our example, the :class:`GymWrapper` and :class:`GymEnv` that inherits
+# from it already take care of setting the proper specs for your environment so
+# you should not have to care about this.
+#
+# Nevertheless, let's see a concrete example using our transformed
+# environment by looking at its specs.
+# There are three specs to look at: ``observation_spec`` which defines what
+# is to be expected when executing an action in the environment,
+# ``reward_spec`` which indicates the reward domain and finally the
+# ``input_spec`` (which contains the ``action_spec``) and which represents
+# everything an environment requires to execute a single step.
+#
+print("observation_spec:", env.observation_spec)
+print("reward_spec:", env.reward_spec)
+print("input_spec:", env.input_spec)
+print("action_spec (as defined by input_spec):", env.action_spec)
+
+######################################################################
+# the :func:`check_env_specs` function runs a small rollout and compares its output against the environment
+# specs. If no error is raised, we can be confident that the specs are properly defined:
+#
+check_env_specs(env)
+
+######################################################################
+# For fun, let's see what a simple random rollout looks like. You can
+# call `env.rollout(n_steps)` and get an overview of what the environment inputs
+# and outputs look like. Actions will automatically be drawn from the action spec
+# domain, so you don't need to care about designing a random sampler.
+#
+# Typically, at each step, an RL environment receives an
+# action as input, and outputs an observation, a reward and a done state. The
+# observation may be composite, meaning that it could be composed of more than one
+# tensor. This is not a problem for TorchRL, since the whole set of observations
+# is automatically packed in the output :class:`tensordict.TensorDict`. After executing a rollout
+# (for example, a sequence of environment steps and random action generations) over a given
+# number of steps, we will retrieve a :class:`tensordict.TensorDict` instance with a shape
+# that matches this trajectory length:
+#
+rollout = env.rollout(3)
+print("rollout of three steps:", rollout)
+print("Shape of the rollout TensorDict:", rollout.batch_size)
+
+######################################################################
+# Our rollout data has a shape of ``torch.Size([3])`, which matches the number of steps
+# we ran it for. The ``"next"`` entry points to the data coming after the current step.
+# In most cases, the ``"next""`` data at time `t` matches the data at ``t+1``, but this
+# may not be the case if we are using some specific transformations (for example, multi-step).
+#
+# Policy
+# ------
+#
+# PPO utilizes a stochastic policy to handle exploration. This means that our
+# neural network will have to output the parameters of a distribution, rather
+# than a single value corresponding to the action taken.
+#
+# As the data is continuous, we use a Tanh-Normal distribution to respect the
+# action space boundaries. TorchRL provides such distribution, and the only
+# thing we need to care about is to build a neural network that outputs the
+# right number of parameters for the policy to work with (a location, or mean,
+# and a scale):
+#
+# .. math::
+#
+#     f_{\theta}(\text{observation}) = \mu_{\theta}(\text{observation}), \sigma^{+}_{\theta}(\text{observation})
+#
+# The only extra-difficulty that is brought up here is to split our output in two
+# equal parts and map the second to a strictly positive space.
+#
+# We design the policy in three steps:
+#
+# 1. Define a neural network ``D_obs`` -> ``2 * D_action``. Indeed, our ``loc`` (mu) and ``scale`` (sigma) both have dimension ``D_action``;
+#
+# 2. Append a :class:`NormalParamExtractor` to extract a location and a scale (for example, splits the input in two equal parts
+#   and applies a positive transformation to the scale parameter);
+#
+# 3. Create a probabilistic :class:`TensorDictModule` that can create this distribution and sample from it.
+#
+
+actor_net = nn.Sequential(
+    nn.LazyLinear(num_cells, device=device),
+    nn.Tanh(),
+    nn.LazyLinear(num_cells, device=device),
+    nn.Tanh(),
+    nn.LazyLinear(num_cells, device=device),
+    nn.Tanh(),
+    nn.LazyLinear(2 * env.action_spec.shape[-1], device=device),
+    NormalParamExtractor(),
+)
+
+######################################################################
+# To enable the policy to "talk" with the environment through the ``tensordict``
+# data carrier, we wrap the ``nn.Module`` in a :class:`TensorDictModule`. This
+# class will simply ready the ``in_keys`` it is provided with and write the
+# outputs in-place at the registered ``out_keys``.
+#
+policy_module = TensorDictModule(
+    actor_net, in_keys=["observation"], out_keys=["loc", "scale"]
+)
+
+######################################################################
+# We now need to build a distribution out of the location and scale of our
+# normal distribution. To do so, we instruct the :class:`ProbabilisticActor`
+# class to build a :class:`TanhNormal` out of the location and scale
+# parameters. We also provide the minimum and maximum values of this
+# distribution, which we gather from the environment specs.
+#
+# The name of the ``in_keys`` (and hence the name of the ``out_keys`` from
+# the :class:`TensorDictModule` above) cannot be set to any value one may
+# like, as the :class:`TanhNormal` distribution constructor will expect the
+# ``loc`` and ``scale`` keyword arguments. That being said,
+# :class:`ProbabilisticActor` also accepts ``Dict[str, str]`` typed ``in_keys``
+# where the key-value pair indicates what ``in_key`` string should be used for
+# every keyword argument that is to be used.
+#
+policy_module = ProbabilisticActor(
+    module=policy_module,
+    spec=env.action_spec,
+    in_keys=["loc", "scale"],
+    distribution_class=TanhNormal,
+    distribution_kwargs={
+        "min": env.action_spec.space.minimum,
+        "max": env.action_spec.space.maximum,
+    },
+    return_log_prob=True,
+    # we'll need the log-prob for the numerator of the importance weights
+)
+
+######################################################################
+# Value network
+# -------------
+#
+# The value network is a crucial component of the PPO algorithm, even though it
+# won't be used at inference time. This module will read the observations and
+# return an estimation of the discounted return for the following trajectory.
+# This allows us to amortize learning by relying on the some utility estimation
+# that is learned on-the-fly during training. Our value network share the same
+# structure as the policy, but for simplicity we assign it its own set of
+# parameters.
+#
+value_net = nn.Sequential(
+    nn.LazyLinear(num_cells, device=device),
+    nn.Tanh(),
+    nn.LazyLinear(num_cells, device=device),
+    nn.Tanh(),
+    nn.LazyLinear(num_cells, device=device),
+    nn.Tanh(),
+    nn.LazyLinear(1, device=device),
+)
+
+value_module = ValueOperator(
+    module=value_net,
+    in_keys=["observation"],
+)
+
+######################################################################
+# let's try our policy and value modules. As we said earlier, the usage of
+# :class:`TensorDictModule` makes it possible to directly read the output
+# of the environment to run these modules, as they know what information to read
+# and where to write it:
+#
+print("Running policy:", policy_module(env.reset()))
+print("Running value:", value_module(env.reset()))
+
+######################################################################
+# Data collector
+# --------------
+#
+# TorchRL provides a set of :class:`DataCollector` classes. Briefly, these
+# classes execute three operations: reset an environment, compute an action
+# given the latest observation, execute a step in the environment, and repeat
+# the last two steps until the environment reaches a stop signal (or ``"done"``
+# state).
+#
+# They allow you to control how many frames to collect at each iteration
+# (through the ``frames_per_batch`` parameter),
+# when to reset the environment (through the ``max_frames_per_traj`` argument),
+# on which ``device`` the policy should be executed, etc. They are also
+# designed to work efficiently with batched and multiprocessed environments.
+#
+# The simplest data collector is the :class:`SyncDataCollector`: it is an
+# iterator that you can use to get batches of data of a given length, and
+# that will stop once a total number of frames (``total_frames``) have been
+# collected.
+# Other data collectors (``MultiSyncDataCollector`` and
+# ``MultiaSyncDataCollector``) will execute the same operations in synchronous
+# and asynchronous manner over a set of multiprocessed workers.
+#
+# As for the policy and environment before, the data collector will return
+# :class:`tensordict.TensorDict` instances with a total number of elements that will
+# match ``frames_per_batch``. Using :class:`tensordict.TensorDict` to pass data to the
+# training loop allows you to write data loading pipelines
+# that are 100% oblivious to the actual specificities of the rollout content.
+#
+collector = SyncDataCollector(
+    env,
+    policy_module,
+    frames_per_batch=frames_per_batch,
+    total_frames=total_frames,
+    split_trajs=False,
+    device=device,
+)
+
+######################################################################
+# Replay buffer
+# -------------
+#
+# Replay buffers are a common building piece of off-policy RL algorithms.
+# In on-policy contexts, a replay buffer is refilled every time a batch of
+# data is collected, and its data is repeatedly consumed for a certain number
+# of epochs.
+#
+# TorchRL's replay buffers are built using a common container
+# :class:`ReplayBuffer` which takes as argument the components of the buffer:
+# a storage, a writer, a sampler and possibly some transforms. Only the
+# storage (which indicates the replay buffer capacity) is mandatory. We
+# also specify a sampler without repetition to avoid sampling multiple times
+# the same item in one epoch.
+# Using a replay buffer for PPO is not mandatory and we could simply
+# sample the sub-batches from the collected batch, but using these classes
+# make it easy for us to build the inner training loop in a reproducible way.
+#
+
+replay_buffer = ReplayBuffer(
+    storage=LazyTensorStorage(frames_per_batch),
+    sampler=SamplerWithoutReplacement(),
+)
+
+######################################################################
+# Loss function
+# -------------
+#
+# The PPO loss can be directly imported from TorchRL for convenience using the
+# :class:`ClipPPOLoss` class. This is the easiest way of utilizing PPO:
+# it hides away the mathematical operations of PPO and the control flow that
+# goes with it.
+#
+# PPO requires some "advantage estimation" to be computed. In short, an advantage
+# is a value that reflects an expectancy over the return value while dealing with
+# the bias / variance tradeoff.
+# To compute the advantage, one just needs to (1) build the advantage module, which
+# utilizes our value operator, and (2) pass each batch of data through it before each
+# epoch.
+# The GAE module will update the input ``tensordict`` with new ``"advantage"`` and
+# ``"value_target"`` entries.
+# The ``"value_target"`` is a gradient-free tensor that represents the empirical
+# value that the value network should represent with the input observation.
+# Both of these will be used by :class:`ClipPPOLoss` to
+# return the policy and value losses.
+#
+
+advantage_module = GAE(
+    gamma=gamma, lmbda=lmbda, value_network=value_module, average_gae=True
+)
+
+loss_module = ClipPPOLoss(
+    actor=policy_module,
+    critic=value_module,
+    advantage_key="advantage",
+    clip_epsilon=clip_epsilon,
+    entropy_bonus=bool(entropy_eps),
+    entropy_coef=entropy_eps,
+    # these keys match by default but we set this for completeness
+    value_target_key=advantage_module.value_target_key,
+    critic_coef=1.0,
+    gamma=0.99,
+    loss_critic_type="smooth_l1",
+)
+
+optim = torch.optim.Adam(loss_module.parameters(), lr)
+scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
+    optim, total_frames // frames_per_batch, 0.0
+)
+
+######################################################################
+# Training loop
+# -------------
+# We now have all the pieces needed to code our training loop.
+# The steps include:
+#
+# * Collect data
+#
+#   * Compute advantage
+#
+#     * Loop over the collected to compute loss values
+#     * Back propagate
+#     * Optimize
+#     * Repeat
+#
+#   * Repeat
+#
+# * Repeat
+#
+
+
+logs = defaultdict(list)
+pbar = tqdm(total=total_frames * frame_skip)
+eval_str = ""
+
+# We iterate over the collector until it reaches the total number of frames it was
+# designed to collect:
+for i, tensordict_data in enumerate(collector):
+    # we now have a batch of data to work with. Let's learn something from it.
+    for _ in range(num_epochs):
+        # We'll need an "advantage" signal to make PPO work.
+        # We re-compute it at each epoch as its value depends on the value
+        # network which is updated in the inner loop.
+        advantage_module(tensordict_data)
+        data_view = tensordict_data.reshape(-1)
+        replay_buffer.extend(data_view.cpu())
+        for _ in range(frames_per_batch // sub_batch_size):
+            subdata = replay_buffer.sample(sub_batch_size)
+            loss_vals = loss_module(subdata.to(device))
+            loss_value = (
+                loss_vals["loss_objective"]
+                + loss_vals["loss_critic"]
+                + loss_vals["loss_entropy"]
+            )
+
+            # Optimization: backward, grad clipping and optimization step
+            loss_value.backward()
+            # this is not strictly mandatory but it's good practice to keep
+            # your gradient norm bounded
+            torch.nn.utils.clip_grad_norm_(loss_module.parameters(), max_grad_norm)
+            optim.step()
+            optim.zero_grad()
+
+    logs["reward"].append(tensordict_data["next", "reward"].mean().item())
+    pbar.update(tensordict_data.numel() * frame_skip)
+    cum_reward_str = (
+        f"average reward={logs['reward'][-1]: 4.4f} (init={logs['reward'][0]: 4.4f})"
+    )
+    logs["step_count"].append(tensordict_data["step_count"].max().item())
+    stepcount_str = f"step count (max): {logs['step_count'][-1]}"
+    logs["lr"].append(optim.param_groups[0]["lr"])
+    lr_str = f"lr policy: {logs['lr'][-1]: 4.4f}"
+    if i % 10 == 0:
+        # We evaluate the policy once every 10 batches of data.
+        # Evaluation is rather simple: execute the policy without exploration
+        # (take the expected value of the action distribution) for a given
+        # number of steps (1000, which is our ``env`` horizon).
+        # The ``rollout`` method of the ``env`` can take a policy as argument:
+        # it will then execute this policy at each step.
+        with set_exploration_mode("mean"), torch.no_grad():
+            # execute a rollout with the trained policy
+            eval_rollout = env.rollout(1000, policy_module)
+            logs["eval reward"].append(eval_rollout["next", "reward"].mean().item())
+            logs["eval reward (sum)"].append(
+                eval_rollout["next", "reward"].sum().item()
+            )
+            logs["eval step_count"].append(eval_rollout["step_count"].max().item())
+            eval_str = (
+                f"eval cumulative reward: {logs['eval reward (sum)'][-1]: 4.4f} "
+                f"(init: {logs['eval reward (sum)'][0]: 4.4f}), "
+                f"eval step-count: {logs['eval step_count'][-1]}"
+            )
+            del eval_rollout
+    pbar.set_description(", ".join([eval_str, cum_reward_str, stepcount_str, lr_str]))
+
+    # We're also using a learning rate scheduler. Like the gradient clipping,
+    # this is a nice-to-have but nothing necessary for PPO to work.
+    scheduler.step()
+
+######################################################################
+# Results
+# -------
+#
+# Before the 1M step cap is reached, the algorithm should have reached a max
+# step count of 1000 steps, which is the maximum number of steps before the
+# trajectory is truncated.
+#
+plt.figure(figsize=(10, 10))
+plt.subplot(2, 2, 1)
+plt.plot(logs["reward"])
+plt.title("training rewards (average)")
+plt.subplot(2, 2, 2)
+plt.plot(logs["step_count"])
+plt.title("Max step count (training)")
+plt.subplot(2, 2, 3)
+plt.plot(logs["eval reward (sum)"])
+plt.title("Return (test)")
+plt.subplot(2, 2, 4)
+plt.plot(logs["eval step_count"])
+plt.title("Max step count (test)")
+plt.show()
+
+######################################################################
+# Conclusion and next steps
+# -------------------------
+#
+# In this tutorial, we have learned:
+#
+# 1. How to create and customize an environment with :py:mod:`torchrl`;
+# 2. How to write a model and a loss function;
+# 3. How to set up a typical training loop.
+#
+# If you want to experiment with this tutorial a bit more, you can apply the following modifications:
+#
+# * From an efficiency perspective,
+#   we could run several simulations in parallel to speed up data collection.
+#   Check :class:`torchrl.envs.ParallelEnv` for further information.
+#
+# * From a logging perspective, one could add a :class:`torchrl.record.VideoRecorder` transform to
+#   the environment after asking for rendering to get a visual rendering of the
+#   inverted pendulum in action. Check :py:mod:`torchrl.record` to
+#   know more.
+#
diff --git a/intermediate_source/reinforcement_q_learning.py b/intermediate_source/reinforcement_q_learning.py
index 809317e57..2c88364ce 100644
--- a/intermediate_source/reinforcement_q_learning.py
+++ b/intermediate_source/reinforcement_q_learning.py
@@ -2,24 +2,26 @@
 """
 강화 학습 (DQN) 튜토리얼
 =====================================
-**Author**: `Adam Paszke <https://github.com/apaszke>`_
-  **번역**: `황성수 <https://github.com/adonisues>`_
 
-이 튜토리얼에서는 `OpenAI Gym <https://www.gymlibrary.dev/>`__ 의
-CartPole-v0 태스크에서 DQN (Deep Q Learning) 에이전트를 학습하는데
+**Author**: `Adam Paszke <https://github.com/apaszke>`_, `Mark Towers <https://github.com/pseudo-rnd-thoughts>`_
+  **번역**: `황성수 <https://github.com/adonisues>`_, `박정환 <https://github.com/9bow>`_
+
+이 튜토리얼에서는 `Gymnasium <https://www.gymnasium.farama.org>`__ 의
+CartPole-v1 태스크에서 DQN (Deep Q Learning) 에이전트를 학습하는데
 PyTorch를 사용하는 방법을 보여드립니다.
 
 **태스크**
 
 에이전트는 연결된 막대가 똑바로 서 있도록 카트를 왼쪽이나 오른쪽으로
 움직이는 두 가지 동작 중 하나를 선택해야 합니다.
-다양한 알고리즘과 시각화 기능을 갖춘 공식 순위표를
-`Gym 웹사이트 <https://www.gymlibrary.dev/environments/classic_control/cart_pole>`__ 에서 찾을 수 있습니다.
+환경 설정과 다른 더 까다로운 환경에 대한 자세한 내용은
+`Gymnasium 웹사이트 <https://gymnasium.farama.org/environments/classic_control/cart_pole/>`__
+에서 찾아볼 수 있습니다.
 
 .. figure:: /_static/img/cartpole.gif
-   :alt: cartpole
+   :alt: CartPole
 
-   cartpole
+   CartPole
 
 에이전트가 현재 환경 상태를 관찰하고 행동을 선택하면,
 환경이 새로운 상태로 *전환* 되고 작업의 결과를 나타내는 보상도 반환됩니다.
@@ -28,21 +30,21 @@
 이것은 더 좋은 시나리오가 더 오랫동안 더 많은 보상을 축적하는 것을 의미합니다.
 
 카트폴 태스크는 에이전트에 대한 입력이 환경 상태(위치, 속도 등)를 나타내는
-4개의 실제 값이 되도록 설계되었습니다. 그러나 신경망은 순수하게 그 장면을 보고
-태스크를 해결할 수 있습니다 따라서 카트 중심의 화면 패치를 입력으로 사용합니다.
-이 때문에 우리의 결과는 공식 순위표의 결과와 직접적으로 비교할 수 없습니다.
-우리의 태스크는 훨씬 더 어렵습니다.
-불행히도 모든 프레임을 렌더링해야되므로 이것은 학습 속도를 늦추게됩니다.
+4개의 실제 값이 되도록 설계되었습니다. 스케일링 없이 이 4개의 입력을 받아
+각 동작에 대해 하나씩, 총 2개의 출력을 가진 완전히 연결된 작은 신경망에 통과시킵니다.
+신경망은 주어진 입력에 대해, 각 동작에 대한 예상값을 예측하도록 훈련됩니다.
+가장 높은 예측값을 갖는 동작이 선택됩니다.
 
-엄밀히 말하면, 현재 스크린 패치와 이전 스크린 패치 사이의 차이로 상태를 표시할 것입니다.
-이렇게하면 에이전트가 막대의 속도를 한 이미지에서 고려할 수 있습니다.
 
 **패키지**
 
-먼저 필요한 패키지를 가져옵니다. 첫째, 환경을 위해
-`gym <https://github.com/openai/gym>`__ 이 필요합니다.
+먼저 필요한 패키지를 가져옵니다. 첫째, 환경 구성을 위해
+pip를 사용해 설치한 `gymnasium <https://gymnasium.farama.org/>`__ 이 필요합니다.
+이는 OpenAI Gym로부터 파생(fork)된 것으로, Gym v0.19부터 같은 팀에서 유지보수를 하고 있습니다.
+Google Colab에서 이 튜토리얼을 실행하고 있다면, 다음을 실행해 설치할 수 있습니다:
 
 .. code-block:: bash
+
    %%bash
    pip3 install gym[classic_control]
 
@@ -56,27 +58,20 @@
 
 """
 
-import gym
+import gymnasium as gym
 import math
 import random
-import numpy as np
 import matplotlib
 import matplotlib.pyplot as plt
 from collections import namedtuple, deque
 from itertools import count
-from PIL import Image
 
 import torch
 import torch.nn as nn
 import torch.optim as optim
 import torch.nn.functional as F
-import torchvision.transforms as T
 
-
-if gym.__version__ < '0.26':
-    env = gym.make('CartPole-v0', new_step_api=True, render_mode='single_rgb_array').unwrapped
-else:
-    env = gym.make('CartPole-v0', render_mode='rgb_array').unwrapped
+env = gym.make("CartPole-v1")
 
 # matplotlib 설정
 is_ipython = 'inline' in matplotlib.get_backend()
@@ -114,7 +109,7 @@
 class ReplayMemory(object):
 
     def __init__(self, capacity):
-        self.memory = deque([],maxlen=capacity)
+        self.memory = deque([], maxlen=capacity)
 
     def push(self, *args):
         """transition 저장"""
@@ -141,9 +136,11 @@ def __len__(self):
 # 극대화하려는 정책(policy)을 학습하는 것입니다.
 # :math:`R_{t_0} = \sum_{t=t_0}^{\infty} \gamma^{t - t_0} r_t`, 여기서
 # :math:`R_{t_0}` 는 *반환(return)* 입니다. 할인 상수,
-# :math:`\gamma`, 는 :math:`0` 과 :math:`1` 의 상수이고 합계가
-# 수렴되도록 보장합니다. 에이전트에게 불확실한 먼 미래의 보상이
-# 가까운 미래의 것에 비해 덜 중요하게 만들고, 이것은 상당히 합리적입니다.
+# :math:`\gamma`, 는 :math:`0` 과 :math:`1` 의 상수여야 합니다.
+# :math:`\gamma` 가 낮을수록 에이전트에게는 불확실한 먼 미래의 보상은
+# 상당히 확신할 수 있는 가까운 미래의 보상보다 덜 중요해집니다.
+# 또한, 에이전트가 시간적으로 가까운 시점의 보상을, 동일한 양의 먼 미래의
+# 보상보다 먼저 수집하도록 장려합니다.
 #
 # Q-learning의 주요 아이디어는 만일 함수 :math:`Q^*: State \times Action \rightarrow \mathbb{R}` 를
 # 가지고 있다면 반환이 어떻게 될지 알려줄 수 있고,
@@ -165,7 +162,7 @@ def __len__(self):
 # 평등(equality)의 두 측면 사이의 차이는
 # 시간차 오류(temporal difference error), :math:`\delta` 입니다.:
 #
-# .. math:: \delta = Q(s, a) - (r + \gamma \max_a Q(s', a))
+# .. math:: \delta = Q(s, a) - (r + \gamma \max_a' Q(s', a))
 #
 # 오류를 최소화하기 위해서 `Huber
 # loss <https://en.wikipedia.org/wiki/Huber_loss>`__ 를 사용합니다.
@@ -189,92 +186,25 @@ def __len__(self):
 # ^^^^^^^^^^^
 #
 # 우리 모델은 현재와 이전 스크린 패치의 차이를 취하는
-# CNN(convolutional neural network) 입니다. 두가지 출력 :math:`Q(s, \mathrm{left})` 와
+# 순연결(feed-forward) 신경망입니다. 두가지 출력 :math:`Q(s, \mathrm{left})` 와
 # :math:`Q(s, \mathrm{right})` 가 있습니다. (여기서 :math:`s` 는 네트워크의 입력입니다)
 # 결과적으로 네트워크는 주어진 현재 입력에서 각 행동의 *기대값* 을 예측하려고 합니다.
 #
 
 class DQN(nn.Module):
 
-    def __init__(self, h, w, outputs):
+    def __init__(self, n_observations, n_actions):
         super(DQN, self).__init__()
-        self.conv1 = nn.Conv2d(3, 16, kernel_size=5, stride=2)
-        self.bn1 = nn.BatchNorm2d(16)
-        self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=2)
-        self.bn2 = nn.BatchNorm2d(32)
-        self.conv3 = nn.Conv2d(32, 32, kernel_size=5, stride=2)
-        self.bn3 = nn.BatchNorm2d(32)
-
-        # Linear 입력의 연결 숫자는 conv2d 계층의 출력과 입력 이미지의 크기에
-        # 따라 결정되기 때문에 따로 계산을 해야합니다.
-        def conv2d_size_out(size, kernel_size = 5, stride = 2):
-            return (size - (kernel_size - 1) - 1) // stride  + 1
-        convw = conv2d_size_out(conv2d_size_out(conv2d_size_out(w)))
-        convh = conv2d_size_out(conv2d_size_out(conv2d_size_out(h)))
-        linear_input_size = convw * convh * 32
-        self.head = nn.Linear(linear_input_size, outputs)
+        self.layer1 = nn.Linear(n_observations, 128)
+        self.layer2 = nn.Linear(128, 128)
+        self.layer3 = nn.Linear(128, n_actions)
 
     # 최적화 중에 다음 행동을 결정하기 위해서 하나의 요소 또는 배치를 이용해 호촐됩니다.
     # ([[left0exp,right0exp]...]) 를 반환합니다.
     def forward(self, x):
-        x = x.to(device)
-        x = F.relu(self.bn1(self.conv1(x)))
-        x = F.relu(self.bn2(self.conv2(x)))
-        x = F.relu(self.bn3(self.conv3(x)))
-        return self.head(x.view(x.size(0), -1))
-
-
-######################################################################
-# 입력 추출
-# ^^^^^^^^^^^^^^^^
-#
-# 아래 코드는 환경에서 렌더링 된 이미지를 추출하고 처리하는 유틸리티입니다.
-# 이미지 변환을 쉽게 구성할 수 있는 ``torchvision`` 패키지를 사용합니다.
-# 셀(cell)을 실행하면 추출한 예제 패치가 표시됩니다.
-#
-
-resize = T.Compose([T.ToPILImage(),
-                    T.Resize(40, interpolation=Image.CUBIC),
-                    T.ToTensor()])
-
-
-def get_cart_location(screen_width):
-    world_width = env.x_threshold * 2
-    scale = screen_width / world_width
-    return int(env.state[0] * scale + screen_width / 2.0)  # MIDDLE OF CART
-
-def get_screen():
-    # gym이 요청한 화면은 400x600x3 이지만, 가끔 800x1200x3 처럼 큰 경우가 있습니다.
-    # 이것을 Torch order (CHW)로 변환한다.
-    screen = env.render().transpose((2, 0, 1))
-    # 카트는 아래쪽에 있으므로 화면의 상단과 하단을 제거하십시오.
-    _, screen_height, screen_width = screen.shape
-    screen = screen[:, int(screen_height*0.4):int(screen_height * 0.8)]
-    view_width = int(screen_width * 0.6)
-    cart_location = get_cart_location(screen_width)
-    if cart_location < view_width // 2:
-        slice_range = slice(view_width)
-    elif cart_location > (screen_width - view_width // 2):
-        slice_range = slice(-view_width, None)
-    else:
-        slice_range = slice(cart_location - view_width // 2,
-                            cart_location + view_width // 2)
-    # 카트를 중심으로 정사각형 이미지가 되도록 가장자리를 제거하십시오.
-    screen = screen[:, :, slice_range]
-    # float 으로 변환하고,  rescale 하고, torch tensor 로 변환하십시오.
-    # (이것은 복사를 필요로하지 않습니다)
-    screen = np.ascontiguousarray(screen, dtype=np.float32) / 255
-    screen = torch.from_numpy(screen)
-    # 크기를 수정하고 배치 차원(BCHW)을 추가하십시오.
-    return resize(screen).unsqueeze(0)
-
-
-env.reset()
-plt.figure()
-plt.imshow(get_screen().cpu().squeeze(0).permute(1, 2, 0).numpy(),
-           interpolation='none')
-plt.title('Example extracted screen')
-plt.show()
+        x = F.relu(self.layer1(x))
+        x = F.relu(self.layer2(x))
+        return self.layer3(x)
 
 
 ######################################################################
@@ -295,28 +225,32 @@ def get_screen():
 #    포함 된 셀 밑에 있으며, 매 에피소드마다 업데이트됩니다.
 #
 
+# BATCH_SIZE는 리플레이 버퍼에서 샘플링된 트랜지션의 수입니다.
+# GAMMA는 이전 섹션에서 언급한 할인 계수입니다.
+# EPS_START는 엡실론의 시작 값입니다.
+# EPS_END는 엡실론의 최종 값입니다.
+# EPS_DECAY는 엡실론의 지수 감쇠(exponential decay) 속도 제어하며, 높을수록 감쇠 속도가 느립니다.
+# TAU는 목표 네트워크의 업데이트 속도입니다.
+# LR은 ``AdamW`` 옵티마이저의 학습율(learning rate)입니다.
 BATCH_SIZE = 128
-GAMMA = 0.999
+GAMMA = 0.99
 EPS_START = 0.9
 EPS_END = 0.05
-EPS_DECAY = 200
-TARGET_UPDATE = 10
-
-# AI gym에서 반환된 형태를 기반으로 계층을 초기화 하도록 화면의 크기를
-# 가져옵니다. 이 시점에 일반적으로 3x40x90 에 가깝습니다.
-# 이 크기는 get_screen()에서 고정, 축소된 렌더 버퍼의 결과입니다.
-init_screen = get_screen()
-_, _, screen_height, screen_width = init_screen.shape
+EPS_DECAY = 1000
+TAU = 0.005
+LR = 1e-4
 
 # gym 행동 공간에서 행동의 숫자를 얻습니다.
 n_actions = env.action_space.n
+# 상태 관측 횟수를 얻습니다.
+state, info = env.reset()
+n_observations = len(state)
 
-policy_net = DQN(screen_height, screen_width, n_actions).to(device)
-target_net = DQN(screen_height, screen_width, n_actions).to(device)
+policy_net = DQN(n_observations, n_actions).to(device)
+target_net = DQN(n_observations, n_actions).to(device)
 target_net.load_state_dict(policy_net.state_dict())
-target_net.eval()
 
-optimizer = optim.RMSprop(policy_net.parameters())
+optimizer = optim.AdamW(policy_net.parameters(), lr=LR, amsgrad=True)
 memory = ReplayMemory(10000)
 
 
@@ -336,17 +270,20 @@ def select_action(state):
             # 기대 보상이 더 큰 행동을 선택할 수 있습니다.
             return policy_net(state).max(1)[1].view(1, 1)
     else:
-        return torch.tensor([[random.randrange(n_actions)]], device=device, dtype=torch.long)
+        return torch.tensor([[env.action_space.sample()]], device=device, dtype=torch.long)
 
 
 episode_durations = []
 
 
-def plot_durations():
-    plt.figure(2)
-    plt.clf()
+def plot_durations(show_result=False):
+    plt.figure(1)
     durations_t = torch.tensor(episode_durations, dtype=torch.float)
-    plt.title('Training...')
+    if show_result:
+        plt.title('Result')
+    else:
+        plt.clf()
+        plt.title('Training...')
     plt.xlabel('Episode')
     plt.ylabel('Duration')
     plt.plot(durations_t.numpy())
@@ -358,8 +295,11 @@ def plot_durations():
 
     plt.pause(0.001)  # 도표가 업데이트되도록 잠시 멈춤
     if is_ipython:
-        display.clear_output(wait=True)
-        display.display(plt.gcf())
+        if not show_result:
+            display.display(plt.gcf())
+            display.clear_output(wait=True)
+        else:
+            display.display(plt.gcf())
 
 
 ######################################################################
@@ -374,9 +314,9 @@ def plot_durations():
 # 그것들을 손실로 합칩니다. 우리가 설정한 정의에 따르면 만약 :math:`s` 가
 # 마지막 상태라면 :math:`V(s) = 0` 입니다.
 # 또한 안정성 추가 위한 :math:`V(s_{t+1})` 계산을 위해 목표 네트워크를 사용합니다.
-# 목표 네트워크는 대부분의 시간 동결 상태로 유지되지만, 가끔 정책
-# 네트워크의 가중치로 업데이트됩니다.
-# 이것은 대개 설정한 스텝 숫자이지만 단순화를 위해 에피소드를 사용합니다.
+# 대상 네트워크는 이전에 정의한 하이퍼파라미터 ``TAU`` 에 의해 제어되는
+# `소프트 업데이트 <https://arxiv.org/pdf/1509.02971.pdf>`__
+# 로 모든 단계에서 업데이트됩니다.
 #
 
 def optimize_model():
@@ -407,7 +347,8 @@ def optimize_model():
     # max(1)[0]으로 최고의 보상을 선택하십시오.
     # 이것은 마스크를 기반으로 병합되어 기대 상태 값을 갖거나 상태가 최종인 경우 0을 갖습니다.
     next_state_values = torch.zeros(BATCH_SIZE, device=device)
-    next_state_values[non_final_mask] = target_net(non_final_next_states).max(1)[0].detach()
+    with torch.no_grad():
+        next_state_values[non_final_mask] = target_net(non_final_next_states).max(1)[0]
     # 기대 Q 값 계산
     expected_state_action_values = (next_state_values * GAMMA) + reward_batch
 
@@ -418,43 +359,46 @@ def optimize_model():
     # 모델 최적화
     optimizer.zero_grad()
     loss.backward()
-    for param in policy_net.parameters():
-        param.grad.data.clamp_(-1, 1)
+    # 변화도 클리핑 바꿔치기
+    torch.nn.utils.clip_grad_value_(policy_net.parameters(), 100)
     optimizer.step()
 
 
 ######################################################################
 #
 # 아래에서 주요 학습 루프를 찾을 수 있습니다. 처음으로 환경을
-# 재설정하고 ``상태`` Tensor를 초기화합니다. 그런 다음 행동을
-# 샘플링하고, 그것을 실행하고, 다음 화면과 보상(항상 1)을 관찰하고,
+# 재설정하고 초기 ``state`` Tensor를 얻습니다. 그런 다음 행동을
+# 샘플링하고, 그것을 실행하고, 다음 상태와 보상(항상 1)을 관찰하고,
 # 모델을 한 번 최적화합니다. 에피소드가 끝나면 (모델이 실패)
 # 루프를 다시 시작합니다.
 #
-# 아래에서 `num_episodes` 는 작게 설정됩니다. 노트북을 다운받고
-# 의미있는 개선을 위해서 300 이상의 더 많은 에피소드를 실행해 보십시오.
+# 아래에서 `num_episodes` 는 GPU를 사용할 수 있는 경우 600으로,
+# 그렇지 않은 경우 50개의 에피소드를 설정하여 학습이 너무 오래 걸리지는 않습니다.
+# 하지만 50개의 에피소드만으로는 CartPole에서 좋은 성능을 관찰하기에는 충분치 않습니다.
+# 600개의 학습 에피소드 내에서 모델이 지속적으로 500개의 스텝을 달성하는 것을
+# 볼 수 있어야 합니다. RL 에이전트 학습 과정에는 노이즈가 많을 수 있으므로,
+# 수렴(convergence)이 관찰되지 않으면 학습을 재시작하는 것이 더 나은 결과를 얻을 수 있습니다.
 #
 
-num_episodes = 50
+if torch.cuda.is_available():
+    num_episodes = 600
+else:
+    num_episodes = 50
+
 for i_episode in range(num_episodes):
     # 환경과 상태 초기화
-    env.reset()
-    last_screen = get_screen()
-    current_screen = get_screen()
-    state = current_screen - last_screen
+    state, info = env.reset()
+    state = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
     for t in count():
-        # 행동 선택과 수행
         action = select_action(state)
-        _, reward, done, _, _ = env.step(action.item())
+        observation, reward, terminated, truncated, _ = env.step(action.item())
         reward = torch.tensor([reward], device=device)
+        done = terminated or truncated
 
-        # 새로운 상태 관찰
-        last_screen = current_screen
-        current_screen = get_screen()
-        if not done:
-            next_state = current_screen - last_screen
-        else:
+        if terminated:
             next_state = None
+        else:
+            next_state = torch.tensor(observation, dtype=torch.float32, device=device).unsqueeze(0)
 
         # 메모리에 변이 저장
         memory.push(state, action, next_state, reward)
@@ -464,18 +408,22 @@ def optimize_model():
 
         # (정책 네트워크에서) 최적화 한단계 수행
         optimize_model()
+
+        # 목표 네트워크의 가중치를 소프트 업데이트
+        # θ′ ← τ θ + (1 −τ )θ′
+        target_net_state_dict = target_net.state_dict()
+        policy_net_state_dict = policy_net.state_dict()
+        for key in policy_net_state_dict:
+            target_net_state_dict[key] = policy_net_state_dict[key]*TAU + target_net_state_dict[key]*(1-TAU)
+        target_net.load_state_dict(target_net_state_dict)
+
         if done:
             episode_durations.append(t + 1)
             plot_durations()
             break
 
-    # 목표 네트워크 업데이트, 모든 웨이트와 바이어스 복사
-    if i_episode % TARGET_UPDATE == 0:
-        target_net.load_state_dict(policy_net.state_dict())
-
 print('Complete')
-env.render()
-env.close()
+plot_durations(show_result=True)
 plt.ioff()
 plt.show()
 
@@ -487,6 +435,6 @@ def optimize_model():
 # 행동은 무작위 또는 정책에 따라 선택되어, gym 환경에서 다음 단계 샘플을 가져옵니다.
 # 결과를 재현 메모리에 저장하고 모든 반복에서 최적화 단계를 실행합니다.
 # 최적화는 재현 메모리에서 무작위 배치를 선택하여 새 정책을 학습합니다.
-# "이전" target_net은 최적화에서 기대 Q 값을 계산하는 데에도 사용되고,
-# 최신 상태를 유지하기 위해 가끔 업데이트됩니다.
+# "이전"의 target_net은 최적화에서 기대 Q 값을 계산하는 데에도 사용됩니다.
+# 목표 네트워크 가중치의 소프트 업데이트는 매 단계(step)마다 수행됩니다.
 #
diff --git a/intermediate_source/rpc_async_execution.rst b/intermediate_source/rpc_async_execution.rst
index 8c373b7fd..4a83b5fa1 100644
--- a/intermediate_source/rpc_async_execution.rst
+++ b/intermediate_source/rpc_async_execution.rst
@@ -3,7 +3,7 @@ Implementing Batch RPC Processing Using Asynchronous Executions
 **Author**: `Shen Li <https://mrshenli.github.io/>`_
 
 .. note::
-   |edit| View and edit this tutorial in `github <https://github.com/pytorch/tutorials/blob/master/intermediate_source/rpc_async_execution.rst>`__.
+   |edit| View and edit this tutorial in `github <https://github.com/pytorch/tutorials/blob/main/intermediate_source/rpc_async_execution.rst>`__.
 
 Prerequisites:
 
@@ -192,7 +192,7 @@ implement batch RPC applications using the
 `@rpc.functions.async_execution <https://pytorch.org/docs/master/rpc.html#torch.distributed.rpc.functions.async_execution>`__
 decorator. In the next section, we re-implement the reinforcement learning
 example in the previous
-`Getting started with Distributed RPC Framework <https://pytorch.org/tutorials/intermediate/rpc_tutorial.html>`__
+`Getting started with Distributed RPC Framework <https://tutorials.pytorch.kr/intermediate/rpc_tutorial.html>`__
 tutorial using batch processing, and demonstrate its impact on the training
 speed.
 
@@ -266,7 +266,7 @@ which will be presented shortly, and this function will be decorated with
             self.select_action = Agent.select_action_batch if batch else Agent.select_action
 
 Compared to the previous tutorial
-`Getting started with Distributed RPC Framework <https://pytorch.org/tutorials/intermediate/rpc_tutorial.html>`__,
+`Getting started with Distributed RPC Framework <https://tutorials.pytorch.kr/intermediate/rpc_tutorial.html>`__,
 observers behave a little differently. Instead of exiting when the environment
 is stopped, it always runs ``n_steps`` iterations in every episode. When the
 environment returns, the observer simply resets the environment and start over
diff --git a/intermediate_source/rpc_param_server_tutorial.rst b/intermediate_source/rpc_param_server_tutorial.rst
index 3f810091e..3d8f4722b 100644
--- a/intermediate_source/rpc_param_server_tutorial.rst
+++ b/intermediate_source/rpc_param_server_tutorial.rst
@@ -5,7 +5,7 @@ Implementing a Parameter Server Using Distributed RPC Framework
 **Author**\ : `Rohan Varma <https://github.com/rohan-varma>`_
 
 .. note::
-   |edit| View and edit this tutorial in `github <https://github.com/pytorch/tutorials/blob/master/intermediate_source/rpc_param_server_tutorial.rst>`__.
+   |edit| View and edit this tutorial in `github <https://github.com/pytorch/tutorials/blob/main/intermediate_source/rpc_param_server_tutorial.rst>`__.
 
 Prerequisites:
 
@@ -16,7 +16,7 @@ This tutorial walks through a simple example of implementing a parameter server
 
 Using the Distributed RPC Framework, we'll build an example where multiple trainers use RPC to communicate with the same parameter server and use `RRef <https://pytorch.org/docs/stable/rpc.html#torch.distributed.rpc.RRef>`_ to access states on the remote parameter server instance. Each trainer will launch its dedicated backward pass in a distributed fashion through stitching of the autograd graph across multiple nodes using distributed autograd.
 
-**Note**\ : This tutorial covers the use of the Distributed RPC Framework, which is useful for splitting a model onto multiple machines, or for implementing a parameter-server training strategy where network trainers fetch parameters hosted on a different machine. If instead you are looking for replicating your model across many GPUs, please see the `Distributed Data Parallel tutorial <https://pytorch.org/tutorials/intermediate/ddp_tutorial.html>`_. There is also another `RPC tutorial <https://pytorch.org/tutorials/intermediate/rpc_tutorial.html>`_ that covers reinforcement learning and RNN use cases.
+**Note**\ : This tutorial covers the use of the Distributed RPC Framework, which is useful for splitting a model onto multiple machines, or for implementing a parameter-server training strategy where network trainers fetch parameters hosted on a different machine. If instead you are looking for replicating your model across many GPUs, please see the `Distributed Data Parallel tutorial <https://tutorials.pytorch.kr/intermediate/ddp_tutorial.html>`_. There is also another `RPC tutorial <https://tutorials.pytorch.kr/intermediate/rpc_tutorial.html>`_ that covers reinforcement learning and RNN use cases.
 
 Let's start with the familiar: importing our required modules and defining a simple ConvNet that will train on the MNIST dataset. The below network is largely adopted from the network defined in the `pytorch/examples repo <https://github.com/pytorch/examples/tree/master/mnist>`_.
 
diff --git a/intermediate_source/rpc_tutorial.rst b/intermediate_source/rpc_tutorial.rst
index d63878e2f..835e6f064 100644
--- a/intermediate_source/rpc_tutorial.rst
+++ b/intermediate_source/rpc_tutorial.rst
@@ -3,7 +3,7 @@ Getting Started with Distributed RPC Framework
 **Author**: `Shen Li <https://mrshenli.github.io/>`_
 
 .. note::
-   |edit| View and edit this tutorial in `github <https://github.com/pytorch/tutorials/blob/master/intermediate_source/rpc_tutorial.rst>`__.
+   |edit| View and edit this tutorial in `github <https://github.com/pytorch/tutorials/blob/main/intermediate_source/rpc_tutorial.rst>`__.
 
 Prerequisites:
 
diff --git a/intermediate_source/scaled_dot_product_attention_tutorial.py b/intermediate_source/scaled_dot_product_attention_tutorial.py
new file mode 100644
index 000000000..4ec20d077
--- /dev/null
+++ b/intermediate_source/scaled_dot_product_attention_tutorial.py
@@ -0,0 +1,343 @@
+"""
+(Beta) Implementing High-Performance Transformers with Scaled Dot Product Attention (SDPA)
+==========================================================================================
+
+
+**Author:** `Driss Guessous <https://github.com/drisspg>`_
+"""
+
+######################################################################
+# Summary
+# ~~~~~~~~
+#
+# In this tutorial, we want to highlight a new ``torch.nn.functional`` function
+# that can be helpful for implementing transformer architectures. The
+# function is named ``torch.nn.functional.scaled_dot_product_attention``.
+# For detailed description of the function, see the `PyTorch documentation <https://pytorch.org/docs/master/generated/torch.nn.functional.scaled_dot_product_attention.html#torch.nn.functional.scaled_dot_product_attention>`__.
+# This function has already been incorporated into ``torch.nn.MultiheadAttention`` and ``torch.nn.TransformerEncoderLayer``.
+#
+# Overview
+# ~~~~~~~~~
+# At a high level, this PyTorch function calculates the
+# scaled dot product attention (SDPA) between query, key, and value according to
+# the definition found in the paper `Attention is all you
+# need <https://arxiv.org/abs/1706.03762>`__. While this function can
+# be written in PyTorch using existing functions, a fused implementation can provide
+# large performance benefits over a naive implementation.
+#
+# Fused implementations
+# ~~~~~~~~~~~~~~~~~~~~~~
+#
+# For CUDA tensor inputs, the function will dispatch into one of the following
+# implementations:
+#
+# * `FlashAttention: Fast and Memory-Efficient Exact Attention with IO-Awareness <https://arxiv.org/abs/2205.14135>`__
+# * `Memory-Efficient Attention <https://github.com/facebookresearch/xformers>`__
+# * A PyTorch implementation defined in C++
+#
+# .. note::
+#
+#   This tutorial requires PyTorch 2.0.0 or later.
+#
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+device = "cuda" if torch.cuda.is_available() else "cpu"
+
+# Example Usage:
+query, key, value = torch.randn(2, 3, 8, device=device), torch.randn(2, 3, 8, device=device), torch.randn(2, 3, 8, device=device)
+F.scaled_dot_product_attention(query, key, value)
+
+
+######################################################################
+# Explicit Dispatcher Control
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# While the function will implicitly dispatch to one of the three
+# implementations, the user can also explicitly control the dispatch via
+# the use of a context manager. This context manager allows users to
+# explicitly disable certain implementations. If a user wants to ensure
+# the function is indeed using the fastest implementation for their
+# specific inputs, the context manager can be used to sweep through
+# measuring performance.
+#
+
+# Lets define a helpful benchmarking function:
+import torch.utils.benchmark as benchmark
+def benchmark_torch_function_in_microseconds(f, *args, **kwargs):
+    t0 = benchmark.Timer(
+        stmt="f(*args, **kwargs)", globals={"args": args, "kwargs": kwargs, "f": f}
+    )
+    return t0.blocked_autorange().mean * 1e6
+
+# Lets define the hyper-parameters of our input
+batch_size = 32
+max_sequence_len = 1024
+num_heads = 32
+embed_dimension = 32
+
+dtype = torch.float16
+
+query = torch.rand(batch_size, num_heads, max_sequence_len, embed_dimension, device=device, dtype=dtype)
+key = torch.rand(batch_size, num_heads, max_sequence_len, embed_dimension, device=device, dtype=dtype)
+value = torch.rand(batch_size, num_heads, max_sequence_len, embed_dimension, device=device, dtype=dtype)
+
+print(f"The default implementation runs in {benchmark_torch_function_in_microseconds(F.scaled_dot_product_attention, query, key, value):.3f} microseconds")
+
+# Lets explore the speed of each of the 3 implementations
+from torch.backends.cuda import sdp_kernel, SDPBackend
+
+# Helpful arguments mapper
+backend_map = {
+    SDPBackend.MATH: {"enable_math": True, "enable_flash": False, "enable_mem_efficient": False},
+    SDPBackend.FLASH_ATTENTION: {"enable_math": False, "enable_flash": True, "enable_mem_efficient": False},
+    SDPBackend.EFFICIENT_ATTENTION: {
+        "enable_math": False, "enable_flash": False, "enable_mem_efficient": True}
+}
+
+with sdp_kernel(**backend_map[SDPBackend.MATH]):
+    print(f"The math implementation runs in {benchmark_torch_function_in_microseconds(F.scaled_dot_product_attention, query, key, value):.3f} microseconds")
+
+
+with sdp_kernel(**backend_map[SDPBackend.FLASH_ATTENTION]):
+    try:
+        print(f"The flash attention implementation runs in {benchmark_torch_function_in_microseconds(F.scaled_dot_product_attention, query, key, value):.3f} microseconds")
+    except RuntimeError:
+        print("FlashAttention is not supported. See warnings for reasons.")
+
+with sdp_kernel(**backend_map[SDPBackend.EFFICIENT_ATTENTION]):
+    try:
+        print(f"The memory efficient implementation runs in {benchmark_torch_function_in_microseconds(F.scaled_dot_product_attention, query, key, value):.3f} microseconds")
+    except RuntimeError:
+        print("EfficientAttention is not supported. See warnings for reasons.")
+
+
+######################################################################
+# Hardware dependence
+# ~~~~~~~~~~~~~~~~~~~
+#
+# Depending on what machine you ran the above cell on and what hardware is
+# available, your results might be different.
+# - If you don’t have a GPU and are running on CPU then the context manager
+# will have no effect and all three runs should return similar timings.
+# - Depending on what compute capability your graphics card supports
+# flash attention or memory efficient might have failed.
+
+
+######################################################################
+# Causal Self Attention
+# ~~~~~~~~~~~~~~~~~~~~~
+#
+# Below is an example implementation of a multi-headed causal self
+# attention block inspired by
+# `Andrej Karpathy NanoGPT <https://github.com/karpathy/nanoGPT>`__ repository.
+#
+
+class CausalSelfAttention(nn.Module):
+
+    def __init__(self, num_heads: int, embed_dimension: int, bias: bool=False, is_causal: bool=False, dropout:float=0.0):
+        super().__init__()
+        assert embed_dimension % num_heads == 0
+        # key, query, value projections for all heads, but in a batch
+        self.c_attn = nn.Linear(embed_dimension, 3 * embed_dimension, bias=bias)
+        # output projection
+        self.c_proj = nn.Linear(embed_dimension, embed_dimension, bias=bias)
+        # regularization
+        self.dropout = dropout
+        self.resid_dropout = nn.Dropout(dropout)
+        self.num_heads = num_heads
+        self.embed_dimension = embed_dimension
+        # Perform causal masking
+        self.is_causal = is_causal
+
+    def forward(self, x):
+        # calculate query, key, values for all heads in batch and move head forward to be the batch dim
+        query_projected = self.c_attn(x)
+
+        batch_size = query_projected.size(0)
+        embed_dim = query_projected.size(2)
+        head_dim = embed_dim // (self.num_heads * 3)
+
+        query, key, value = query_projected.chunk(3, -1)
+        query = query.view(batch_size, -1, self.num_heads, head_dim).transpose(1, 2)
+        key = key.view(batch_size, -1, self.num_heads, head_dim).transpose(1, 2)
+        value = value.view(batch_size, -1, self.num_heads, head_dim).transpose(1, 2)
+
+        if self.training:
+            dropout = self.dropout
+            is_causal = self.is_causal
+        else:
+            dropout = 0.0
+            is_causal = False
+
+        y = F.scaled_dot_product_attention(query, key, value, attn_mask=None, dropout_p=dropout, is_causal=is_causal)
+        y = y.transpose(1, 2).view(batch_size, -1, self.num_heads * head_dim)
+
+        y = self.resid_dropout(self.c_proj(y))
+        return y
+
+
+num_heads = 8
+heads_per_dim = 64
+embed_dimension = num_heads * heads_per_dim
+dtype = torch.float16
+model = CausalSelfAttention(num_heads=num_heads, embed_dimension=embed_dimension, bias=False, is_causal=True, dropout=0.1).to("cuda").to(dtype).eval()
+print(model)
+
+
+#####################################################################
+# ``NestedTensor`` and Dense tensor support
+# -----------------------------------------
+#
+# SDPA supports both ``NestedTensor`` and Dense tensor inputs. ``NestedTensors`` handle the case where the input is a batch of variable length sequences
+# without needing to pad each sequence to the maximum length in the batch. For more information about ``NestedTensors`` see
+# `torch.nested <https://pytorch.org/docs/stable/nested.html>`__ and `NestedTensors Tutorial <https://tutorials.pytorch.kr/prototype/nestedtensor.html>`__.
+#
+
+import random
+def generate_rand_batch(
+    batch_size,
+    max_sequence_len,
+    embed_dimension,
+    pad_percentage=None,
+    dtype=torch.float16,
+    device="cuda",
+):
+    if not pad_percentage:
+        return (
+            torch.randn(
+                batch_size,
+                max_sequence_len,
+                embed_dimension,
+                dtype=dtype,
+                device=device,
+            ),
+            None,
+        )
+    # Random sequence lengths
+    seq_len_list = [
+        int(max_sequence_len * (1 - random.gauss(pad_percentage, 0.01)))
+        for _ in range(batch_size)
+    ]
+    # Make random entry in the batch have max sequence length
+    seq_len_list[random.randint(0, batch_size - 1)] = max_sequence_len
+    return (
+        torch.nested.nested_tensor(
+            [
+                torch.randn(seq_len, embed_dimension,
+                            dtype=dtype, device=device)
+                for seq_len in seq_len_list
+            ]
+        ),
+        seq_len_list,
+    )
+
+random_nt, _ = generate_rand_batch(32, 512, embed_dimension, pad_percentage=0.5, dtype=dtype, device=device)
+random_dense, _ = generate_rand_batch(32, 512, embed_dimension, pad_percentage=None, dtype=dtype, device=device)
+
+# Currently the fused implementations don't support ``NestedTensor`` for training
+model.eval()
+
+with sdp_kernel(**backend_map[SDPBackend.FLASH_ATTENTION]):
+    try:
+        print(f"Random NT runs in {benchmark_torch_function_in_microseconds(model, random_nt):.3f} microseconds")
+        print(f"Random Dense runs in {benchmark_torch_function_in_microseconds(model, random_dense):.3f} microseconds")
+    except RuntimeError:
+        print("FlashAttention is not supported. See warnings for reasons.")
+
+
+######################################################################
+# Using SDPA with ``torch.compile``
+# =================================
+#
+# With the release of PyTorch 2.0, a new feature called
+# ``torch.compile()`` has been introduced, which can provide
+# significant performance improvements over eager mode.
+# Scaled dot product attention is fully composable with ``torch.compile()``.
+# To demonstrate this, let's compile the ``CausalSelfAttention`` module using
+# ``torch.compile()`` and observe the resulting performance improvements.
+#
+
+batch_size = 32
+max_sequence_len = 256
+x = torch.rand(batch_size, max_sequence_len,
+               embed_dimension, device=device, dtype=dtype)
+print(
+    f"The non compiled module runs in  {benchmark_torch_function_in_microseconds(model, x):.3f} microseconds")
+
+
+compiled_model = torch.compile(model)
+# Let's compile it
+compiled_model(x)
+print(
+    f"The compiled module runs in  {benchmark_torch_function_in_microseconds(compiled_model, x):.3f} microseconds")
+
+
+######################################################################
+#
+# The exact execution time is dependent on machine, however the results for mine:
+# The non compiled module runs in  166.616 microseconds
+# The compiled module runs in  166.726 microseconds
+# That is not what we were expecting. Let's dig a little deeper.
+# PyTorch comes with an amazing built-in profiler that you can use to
+# inspect the performance characteristics of your code.
+#
+
+from torch.profiler import profile, record_function, ProfilerActivity
+activities = [ProfilerActivity.CPU]
+if device == 'cuda':
+    activities.append(ProfilerActivity.CUDA)
+
+with profile(activities=activities, record_shapes=False) as prof:
+    with record_function(" Non-Compilied Causal Attention"):
+        for _ in range(25):
+            model(x)
+print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))
+
+
+with profile(activities=activities, record_shapes=False) as prof:
+    with record_function("Compiled Causal Attention"):
+        for _ in range(25):
+            compiled_model(x)
+print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=10))
+
+# For even more insights, you can export the trace and use ``chrome://tracing`` to view the results
+# ::
+#
+#    prof.export_chrome_trace("compiled_causal_attention_trace.json").
+
+
+
+
+######################################################################
+# The previous code snippet generates a report of the top 10 PyTorch functions
+# that consumed the most GPU execution time, for both the compiled and non-compiled module.
+# The analysis reveals that the majority of time spent on the GPU is concentrated
+# on the same set of functions for both modules.
+# The reason for this here is that ``torch.compile`` is very good at removing the
+# framework overhead associated with PyTorch. If your model is launching
+# large, efficient CUDA kernels, which in this case ``CausaulSelfAttention``
+# is, then the overhead of PyTorch can be hidden.
+#
+# In reality, your module does not normally consist of a singular
+# ``CausalSelfAttention`` block. When experimenting with `Andrej Karpathy NanoGPT <https://github.com/karpathy/nanoGPT>`__ repository, compiling
+# the module took the time per train step from: ``6090.49ms`` to
+# ``3273.17ms``! This was done on commit: ``ae3a8d5`` of NanoGPT training on
+# the Shakespeare dataset.
+#
+
+
+######################################################################
+# Conclusion
+# ==========
+#
+# In this tutorial, we have demonstrated the basic usage of
+# ``torch.nn.functional.scaled_dot_product_attention``. We have shown how
+# the ``sdp_kernel`` context manager can be used to assert a certain
+# implementation is used on GPU. As well, we built a simple
+# ``CausalSelfAttention`` module that works with ``NestedTensor`` and is torch
+# compilable. In the process we have shown how to the profiling tools can
+# be used to explore the performance characteristics of a user defined
+# module.
+#
diff --git a/intermediate_source/seq2seq_translation_tutorial.py b/intermediate_source/seq2seq_translation_tutorial.py
index 1b9301d13..659667efc 100644
--- a/intermediate_source/seq2seq_translation_tutorial.py
+++ b/intermediate_source/seq2seq_translation_tutorial.py
@@ -289,10 +289,10 @@ def prepareData(lang1, lang2, reverse=False):
 # Seq2Seq 모델은 시퀀스 길이와 순서를 자유롭게하기 때문에
 # 두 언어 사이의 번역에 이상적입니다.
 #
-# 다음 문장 "Je ne suis pas le chat noir" → "I am not the black cat"
+# 다음 문장 ``Je ne suis pas le chat noir`` → ``I am not the black cat``
 # 를 살펴 봅시다. 입력 문장의 단어 대부분은 출력 문장에서
-# 직역("chat noir" 와 "black cat")되지만 약간 다른 순서도 있습니다.
-# "ne/pas" 구조로 인해 입력 문장에 단어가 하나 더 있습니다.
+# 직역(``chat noir`` 와 ``black cat``)되지만 약간 다른 순서도 있습니다.
+# ``ne/pas`` 구조로 인해 입력 문장에 단어가 하나 더 있습니다.
 # 입력 단어의 시퀀스를 직역해서 정확한 번역을 만드는
 # 것은 어려울 것입니다.
 #
@@ -820,7 +820,7 @@ def evaluateAndShowAttention(input_sentence):
 #    -  채팅 → 응답
 #    -  질문 → 답변
 #
-# -  word2vec 또는 GloVe 같은 미리 학습된 word embedding 으로
+# -  ``word2vec`` 또는 ``GloVe`` 같은 미리 학습된 word embedding 으로
 #    embedding 을 교체하십시오
 #
 # -  더 많은 레이어, 은닉 유닛, 더 많은 문장을 사용하십시오.
diff --git a/intermediate_source/speech_command_classification_with_torchaudio_tutorial.py b/intermediate_source/speech_command_classification_with_torchaudio_tutorial.py
deleted file mode 100644
index ba7ff93a8..000000000
--- a/intermediate_source/speech_command_classification_with_torchaudio_tutorial.py
+++ /dev/null
@@ -1,545 +0,0 @@
-"""
-Speech Command Classification with torchaudio
-*********************************************
-
-This tutorial will show you how to correctly format an audio dataset and
-then train/test an audio classifier network on the dataset.
-
-Colab has GPU option available. In the menu tabs, select “Runtime” then
-“Change runtime type”. In the pop-up that follows, you can choose GPU.
-After the change, your runtime should automatically restart (which means
-information from executed cells disappear).
-
-First, let’s import the common torch packages such as
-`torchaudio <https://github.com/pytorch/audio>`__ that can be installed
-by following the instructions on the website.
-
-"""
-
-# Uncomment the line corresponding to your "runtime type" to run in Google Colab
-
-# CPU:
-# !pip install pydub torch==1.7.0+cpu torchvision==0.8.1+cpu torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
-
-# GPU:
-# !pip install pydub torch==1.7.0+cu101 torchvision==0.8.1+cu101 torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import torch.optim as optim
-import torchaudio
-import sys
-
-import matplotlib.pyplot as plt
-import IPython.display as ipd
-
-from tqdm import tqdm
-
-
-######################################################################
-# Let’s check if a CUDA GPU is available and select our device. Running
-# the network on a GPU will greatly decrease the training/testing runtime.
-#
-
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-print(device)
-
-
-######################################################################
-# Importing the Dataset
-# ---------------------
-#
-# We use torchaudio to download and represent the dataset. Here we use
-# `SpeechCommands <https://arxiv.org/abs/1804.03209>`__, which is a
-# datasets of 35 commands spoken by different people. The dataset
-# ``SPEECHCOMMANDS`` is a ``torch.utils.data.Dataset`` version of the
-# dataset. In this dataset, all audio files are about 1 second long (and
-# so about 16000 time frames long).
-#
-# The actual loading and formatting steps happen when a data point is
-# being accessed, and torchaudio takes care of converting the audio files
-# to tensors. If one wants to load an audio file directly instead,
-# ``torchaudio.load()`` can be used. It returns a tuple containing the
-# newly created tensor along with the sampling frequency of the audio file
-# (16kHz for SpeechCommands).
-#
-# Going back to the dataset, here we create a subclass that splits it into
-# standard training, validation, testing subsets.
-#
-
-from torchaudio.datasets import SPEECHCOMMANDS
-import os
-
-
-class SubsetSC(SPEECHCOMMANDS):
-    def __init__(self, subset: str = None):
-        super().__init__("./", download=True)
-
-        def load_list(filename):
-            filepath = os.path.join(self._path, filename)
-            with open(filepath) as fileobj:
-                return [os.path.normpath(os.path.join(self._path, line.strip())) for line in fileobj]
-
-        if subset == "validation":
-            self._walker = load_list("validation_list.txt")
-        elif subset == "testing":
-            self._walker = load_list("testing_list.txt")
-        elif subset == "training":
-            excludes = load_list("validation_list.txt") + load_list("testing_list.txt")
-            excludes = set(excludes)
-            self._walker = [w for w in self._walker if w not in excludes]
-
-
-# Create training and testing split of the data. We do not use validation in this tutorial.
-train_set = SubsetSC("training")
-test_set = SubsetSC("testing")
-
-waveform, sample_rate, label, speaker_id, utterance_number = train_set[0]
-
-
-######################################################################
-# A data point in the SPEECHCOMMANDS dataset is a tuple made of a waveform
-# (the audio signal), the sample rate, the utterance (label), the ID of
-# the speaker, the number of the utterance.
-#
-
-print("Shape of waveform: {}".format(waveform.size()))
-print("Sample rate of waveform: {}".format(sample_rate))
-
-plt.plot(waveform.t().numpy());
-
-
-######################################################################
-# Let’s find the list of labels available in the dataset.
-#
-
-labels = sorted(list(set(datapoint[2] for datapoint in train_set)))
-labels
-
-
-######################################################################
-# The 35 audio labels are commands that are said by users. The first few
-# files are people saying “marvin”.
-#
-
-waveform_first, *_ = train_set[0]
-ipd.Audio(waveform_first.numpy(), rate=sample_rate)
-
-waveform_second, *_ = train_set[1]
-ipd.Audio(waveform_second.numpy(), rate=sample_rate)
-
-
-######################################################################
-# The last file is someone saying “visual”.
-#
-
-waveform_last, *_ = train_set[-1]
-ipd.Audio(waveform_last.numpy(), rate=sample_rate)
-
-
-######################################################################
-# Formatting the Data
-# -------------------
-#
-# This is a good place to apply transformations to the data. For the
-# waveform, we downsample the audio for faster processing without losing
-# too much of the classification power.
-#
-# We don’t need to apply other transformations here. It is common for some
-# datasets though to have to reduce the number of channels (say from
-# stereo to mono) by either taking the mean along the channel dimension,
-# or simply keeping only one of the channels. Since SpeechCommands uses a
-# single channel for audio, this is not needed here.
-#
-
-new_sample_rate = 8000
-transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=new_sample_rate)
-transformed = transform(waveform)
-
-ipd.Audio(transformed.numpy(), rate=new_sample_rate)
-
-
-######################################################################
-# We are encoding each word using its index in the list of labels.
-#
-
-
-def label_to_index(word):
-    # Return the position of the word in labels
-    return torch.tensor(labels.index(word))
-
-
-def index_to_label(index):
-    # Return the word corresponding to the index in labels
-    # This is the inverse of label_to_index
-    return labels[index]
-
-
-word_start = "yes"
-index = label_to_index(word_start)
-word_recovered = index_to_label(index)
-
-print(word_start, "-->", index, "-->", word_recovered)
-
-
-######################################################################
-# To turn a list of data point made of audio recordings and utterances
-# into two batched tensors for the model, we implement a collate function
-# which is used by the PyTorch DataLoader that allows us to iterate over a
-# dataset by batches. Please see `the
-# documentation <https://pytorch.org/docs/stable/data.html#working-with-collate-fn>`__
-# for more information about working with a collate function.
-#
-# In the collate function, we also apply the resampling, and the text
-# encoding.
-#
-
-
-def pad_sequence(batch):
-    # Make all tensor in a batch the same length by padding with zeros
-    batch = [item.t() for item in batch]
-    batch = torch.nn.utils.rnn.pad_sequence(batch, batch_first=True, padding_value=0.)
-    return batch.permute(0, 2, 1)
-
-
-def collate_fn(batch):
-
-    # A data tuple has the form:
-    # waveform, sample_rate, label, speaker_id, utterance_number
-
-    tensors, targets = [], []
-
-    # Gather in lists, and encode labels as indices
-    for waveform, _, label, *_ in batch:
-        tensors += [waveform]
-        targets += [label_to_index(label)]
-
-    # Group the list of tensors into a batched tensor
-    tensors = pad_sequence(tensors)
-    targets = torch.stack(targets)
-
-    return tensors, targets
-
-
-batch_size = 256
-
-if device == "cuda":
-    num_workers = 1
-    pin_memory = True
-else:
-    num_workers = 0
-    pin_memory = False
-
-train_loader = torch.utils.data.DataLoader(
-    train_set,
-    batch_size=batch_size,
-    shuffle=True,
-    collate_fn=collate_fn,
-    num_workers=num_workers,
-    pin_memory=pin_memory,
-)
-test_loader = torch.utils.data.DataLoader(
-    test_set,
-    batch_size=batch_size,
-    shuffle=False,
-    drop_last=False,
-    collate_fn=collate_fn,
-    num_workers=num_workers,
-    pin_memory=pin_memory,
-)
-
-
-######################################################################
-# Define the Network
-# ------------------
-#
-# For this tutorial we will use a convolutional neural network to process
-# the raw audio data. Usually more advanced transforms are applied to the
-# audio data, however CNNs can be used to accurately process the raw data.
-# The specific architecture is modeled after the M5 network architecture
-# described in `this paper <https://arxiv.org/pdf/1610.00087.pdf>`__. An
-# important aspect of models processing raw audio data is the receptive
-# field of their first layer’s filters. Our model’s first filter is length
-# 80 so when processing audio sampled at 8kHz the receptive field is
-# around 10ms (and at 4kHz, around 20 ms). This size is similar to speech
-# processing applications that often use receptive fields ranging from
-# 20ms to 40ms.
-#
-
-
-class M5(nn.Module):
-    def __init__(self, n_input=1, n_output=35, stride=16, n_channel=32):
-        super().__init__()
-        self.conv1 = nn.Conv1d(n_input, n_channel, kernel_size=80, stride=stride)
-        self.bn1 = nn.BatchNorm1d(n_channel)
-        self.pool1 = nn.MaxPool1d(4)
-        self.conv2 = nn.Conv1d(n_channel, n_channel, kernel_size=3)
-        self.bn2 = nn.BatchNorm1d(n_channel)
-        self.pool2 = nn.MaxPool1d(4)
-        self.conv3 = nn.Conv1d(n_channel, 2 * n_channel, kernel_size=3)
-        self.bn3 = nn.BatchNorm1d(2 * n_channel)
-        self.pool3 = nn.MaxPool1d(4)
-        self.conv4 = nn.Conv1d(2 * n_channel, 2 * n_channel, kernel_size=3)
-        self.bn4 = nn.BatchNorm1d(2 * n_channel)
-        self.pool4 = nn.MaxPool1d(4)
-        self.fc1 = nn.Linear(2 * n_channel, n_output)
-
-    def forward(self, x):
-        x = self.conv1(x)
-        x = F.relu(self.bn1(x))
-        x = self.pool1(x)
-        x = self.conv2(x)
-        x = F.relu(self.bn2(x))
-        x = self.pool2(x)
-        x = self.conv3(x)
-        x = F.relu(self.bn3(x))
-        x = self.pool3(x)
-        x = self.conv4(x)
-        x = F.relu(self.bn4(x))
-        x = self.pool4(x)
-        x = F.avg_pool1d(x, x.shape[-1])
-        x = x.permute(0, 2, 1)
-        x = self.fc1(x)
-        return F.log_softmax(x, dim=2)
-
-
-model = M5(n_input=transformed.shape[0], n_output=len(labels))
-model.to(device)
-print(model)
-
-
-def count_parameters(model):
-    return sum(p.numel() for p in model.parameters() if p.requires_grad)
-
-
-n = count_parameters(model)
-print("Number of parameters: %s" % n)
-
-
-######################################################################
-# We will use the same optimization technique used in the paper, an Adam
-# optimizer with weight decay set to 0.0001. At first, we will train with
-# a learning rate of 0.01, but we will use a ``scheduler`` to decrease it
-# to 0.001 during training after 20 epochs.
-#
-
-optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0.0001)
-scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)  # reduce the learning after 20 epochs by a factor of 10
-
-
-######################################################################
-# Training and Testing the Network
-# --------------------------------
-#
-# Now let’s define a training function that will feed our training data
-# into the model and perform the backward pass and optimization steps. For
-# training, the loss we will use is the negative log-likelihood. The
-# network will then be tested after each epoch to see how the accuracy
-# varies during the training.
-#
-
-
-def train(model, epoch, log_interval):
-    model.train()
-    for batch_idx, (data, target) in enumerate(train_loader):
-
-        data = data.to(device)
-        target = target.to(device)
-
-        # apply transform and model on whole batch directly on device
-        data = transform(data)
-        output = model(data)
-
-        # negative log-likelihood for a tensor of size (batch x 1 x n_output)
-        loss = F.nll_loss(output.squeeze(), target)
-
-        optimizer.zero_grad()
-        loss.backward()
-        optimizer.step()
-
-        # print training stats
-        if batch_idx % log_interval == 0:
-            print(f"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}")
-
-        # update progress bar
-        pbar.update(pbar_update)
-        # record loss
-        losses.append(loss.item())
-
-
-######################################################################
-# Now that we have a training function, we need to make one for testing
-# the networks accuracy. We will set the model to ``eval()`` mode and then
-# run inference on the test dataset. Calling ``eval()`` sets the training
-# variable in all modules in the network to false. Certain layers like
-# batch normalization and dropout layers behave differently during
-# training so this step is crucial for getting correct results.
-#
-
-
-def number_of_correct(pred, target):
-    # count number of correct predictions
-    return pred.squeeze().eq(target).sum().item()
-
-
-def get_likely_index(tensor):
-    # find most likely label index for each element in the batch
-    return tensor.argmax(dim=-1)
-
-
-def test(model, epoch):
-    model.eval()
-    correct = 0
-    for data, target in test_loader:
-
-        data = data.to(device)
-        target = target.to(device)
-
-        # apply transform and model on whole batch directly on device
-        data = transform(data)
-        output = model(data)
-
-        pred = get_likely_index(output)
-        correct += number_of_correct(pred, target)
-
-        # update progress bar
-        pbar.update(pbar_update)
-
-    print(f"\nTest Epoch: {epoch}\tAccuracy: {correct}/{len(test_loader.dataset)} ({100. * correct / len(test_loader.dataset):.0f}%)\n")
-
-
-######################################################################
-# Finally, we can train and test the network. We will train the network
-# for ten epochs then reduce the learn rate and train for ten more epochs.
-# The network will be tested after each epoch to see how the accuracy
-# varies during the training.
-#
-
-log_interval = 20
-n_epoch = 2
-
-pbar_update = 1 / (len(train_loader) + len(test_loader))
-losses = []
-
-# The transform needs to live on the same device as the model and the data.
-transform = transform.to(device)
-with tqdm(total=n_epoch) as pbar:
-    for epoch in range(1, n_epoch + 1):
-        train(model, epoch, log_interval)
-        test(model, epoch)
-        scheduler.step()
-
-# Let's plot the training loss versus the number of iteration.
-# plt.plot(losses);
-# plt.title("training loss");
-
-
-######################################################################
-# The network should be more than 65% accurate on the test set after 2
-# epochs, and 85% after 21 epochs. Let’s look at the last words in the
-# train set, and see how the model did on it.
-#
-
-
-def predict(tensor):
-    # Use the model to predict the label of the waveform
-    tensor = tensor.to(device)
-    tensor = transform(tensor)
-    tensor = model(tensor.unsqueeze(0))
-    tensor = get_likely_index(tensor)
-    tensor = index_to_label(tensor.squeeze())
-    return tensor
-
-
-waveform, sample_rate, utterance, *_ = train_set[-1]
-ipd.Audio(waveform.numpy(), rate=sample_rate)
-
-print(f"Expected: {utterance}. Predicted: {predict(waveform)}.")
-
-
-######################################################################
-# Let’s find an example that isn’t classified correctly, if there is one.
-#
-
-for i, (waveform, sample_rate, utterance, *_) in enumerate(test_set):
-    output = predict(waveform)
-    if output != utterance:
-        ipd.Audio(waveform.numpy(), rate=sample_rate)
-        print(f"Data point #{i}. Expected: {utterance}. Predicted: {output}.")
-        break
-else:
-    print("All examples in this dataset were correctly classified!")
-    print("In this case, let's just look at the last data point")
-    ipd.Audio(waveform.numpy(), rate=sample_rate)
-    print(f"Data point #{i}. Expected: {utterance}. Predicted: {output}.")
-
-
-######################################################################
-# Feel free to try with one of your own recordings of one of the labels!
-# For example, using Colab, say “Go” while executing the cell below. This
-# will record one second of audio and try to classify it.
-#
-
-
-def record(seconds=1):
-
-    from google.colab import output as colab_output
-    from base64 import b64decode
-    from io import BytesIO
-    from pydub import AudioSegment
-
-    RECORD = (
-        b"const sleep  = time => new Promise(resolve => setTimeout(resolve, time))\n"
-        b"const b2text = blob => new Promise(resolve => {\n"
-        b"  const reader = new FileReader()\n"
-        b"  reader.onloadend = e => resolve(e.srcElement.result)\n"
-        b"  reader.readAsDataURL(blob)\n"
-        b"})\n"
-        b"var record = time => new Promise(async resolve => {\n"
-        b"  stream = await navigator.mediaDevices.getUserMedia({ audio: true })\n"
-        b"  recorder = new MediaRecorder(stream)\n"
-        b"  chunks = []\n"
-        b"  recorder.ondataavailable = e => chunks.push(e.data)\n"
-        b"  recorder.start()\n"
-        b"  await sleep(time)\n"
-        b"  recorder.onstop = async ()=>{\n"
-        b"    blob = new Blob(chunks)\n"
-        b"    text = await b2text(blob)\n"
-        b"    resolve(text)\n"
-        b"  }\n"
-        b"  recorder.stop()\n"
-        b"})"
-    )
-    RECORD = RECORD.decode("ascii")
-
-    print(f"Recording started for {seconds} seconds.")
-    display(ipd.Javascript(RECORD))
-    s = colab_output.eval_js("record(%d)" % (seconds * 1000))
-    print("Recording ended.")
-    b = b64decode(s.split(",")[1])
-
-    fileformat = "wav"
-    filename = f"_audio.{fileformat}"
-    AudioSegment.from_file(BytesIO(b)).export(filename, format=fileformat)
-    return torchaudio.load(filename)
-
-
-# Detect whether notebook runs in google colab
-if "google.colab" in sys.modules:
-    waveform, sample_rate = record()
-    print(f"Predicted: {predict(waveform)}.")
-    ipd.Audio(waveform.numpy(), rate=sample_rate)
-
-
-######################################################################
-# Conclusion
-# ----------
-#
-# In this tutorial, we used torchaudio to load a dataset and resample the
-# signal. We have then defined a neural network that we trained to
-# recognize a given command. There are also other data preprocessing
-# methods, such as finding the mel frequency cepstral coefficients (MFCC),
-# that can reduce the size of the dataset. This transform is also
-# available in torchaudio as ``torchaudio.transforms.MFCC``.
-#
diff --git a/intermediate_source/speech_recognition_pipeline_tutorial.py b/intermediate_source/speech_recognition_pipeline_tutorial.py
deleted file mode 100644
index 01a439fd2..000000000
--- a/intermediate_source/speech_recognition_pipeline_tutorial.py
+++ /dev/null
@@ -1,288 +0,0 @@
-"""
-Wav2Vec2를 이용해서 음성 인식하기
-=============================
-
-**저자**: `Moto Hira <moto@fb.com>`__
-
-**번역**: `장보윤 <https://github.com/terri1102>`__
-
-이 튜토리얼은 wav2vec 2.0으로 사전 학습된 모델을 이용해서 어떻게 음성 인식을 
-수행하는지 안내합니다. [`논문 <https://arxiv.org/abs/2006.11477>`__]
-
-"""
-
-
-######################################################################
-# 개요
-# -----
-#
-# 음성인식은 아래와 같은 과정으로 진행됩니다.
-#
-# 1. 오디오 파형으로부터 음향 특성을 추출합니다.
-#
-# 2. 프레임별로 음향 특성의 클래스를 추정합니다.
-# 
-# 3. 클래스 확률의 시퀀스에 따라서 가설을 설립합니다.
-#
-# Torchaudio는 사전 학습된 모델 가중치와 기대 샘플링 레이트나 클래스 라벨과 같은 
-# 관련 정보를 제공합니다. 이런 정보들은 
-# :py:func:`torchaudio.pipelines` 모듈에 포함되어 있습니다.
-#
-
-
-######################################################################
-# 준비사항
-# -------
-#
-# 먼저 필요한 패키지들을 불러오고 사용할 데이터를 가져오겠습니다.
-#
-
-# %matplotlib inline
-
-import os
-
-import IPython
-import matplotlib
-import matplotlib.pyplot as plt
-import requests
-import torch
-import torchaudio
-
-matplotlib.rcParams["figure.figsize"] = [16.0, 4.8]
-
-torch.random.manual_seed(0)
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
-print(torch.__version__)
-print(torchaudio.__version__)
-print(device)
-
-SPEECH_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/VOiCES_devkit/source-16k/train/sp0307/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav"  # noqa: E501
-SPEECH_FILE = "_assets/speech.wav"
-
-if not os.path.exists(SPEECH_FILE):
-    os.makedirs("_assets", exist_ok=True)
-    with open(SPEECH_FILE, "wb") as file:
-        file.write(requests.get(SPEECH_URL).content)
-
-
-######################################################################
-# 파이프라인 생성하기
-# ----------------
-#
-# 먼저 특성 추출과 클래스 분류를 수행하는 Wav2Vec2 모델을 생성하겠습니다.
-#
-# Torchaudio에는 사전 학습된 Wav2Vec2 모델 가중치가 두 종류 있습니다. 
-# 하나는 ASR 태스크를 위해 미세 조정된 가중치이고 다른 하나는 미세 조정되지 않은 
-# 가중치입니다.
-#
-# Wav2Vec2 (그리고 HuBERT) 모델들은 자기 지도 방식으로 학습된 모델입니다. 이 모델들은 먼저
-# 표현(representation)을 얻기 위해 오디오 데이터만으로 학습되었고, 
-# 특정한 태스크를 위해 라벨을 추가하여 미세 조정되었습니다.
-#
-# 미세 조정되지 않은 사전 학습된 가중치는 다운스트림 태스크를 위해서 
-# 미세 조정될 수 있지만 이번 튜토리얼에서는 이 부분에 대해 다루지는 않겠습니다.
-#
-# 이 튜토리얼에서 사용할 모델은 
-# :py:func:`torchaudio.pipelines.WAV2VEC2_ASR_BASE_960H` 입니다.
-#
-# 이 외에도 :py:mod:`torchaudio.pipelines` 에는 다양한 모델들이 있습니다. 
-# 모델 학습을 위한 세부 사항은 문서를 참고하시기 바랍니다.
-#
-# 아래의 코드에서 번들 객체(object)는 모델을 생성(instantiate)하고 다른 정보를 얻기 위한 
-# 인터페이스를 제공합니다. 이를 이용해 샘플링 레이트와 클래스 라벨을 확인하겠습니다.
-#
-
-bundle = torchaudio.pipelines.WAV2VEC2_ASR_BASE_960H
-
-print("Sample Rate:", bundle.sample_rate)
-
-print("Labels:", bundle.get_labels())
-
-
-######################################################################
-# 모델은 아래와 같이 생성할 수 있습니다. 이 과정을 통해 사전 학습된 모델의 가중치를
-# 가져와서 모델에 넣어줍니다.
-#
-
-model = bundle.get_model().to(device)
-
-print(model.__class__)
-
-
-######################################################################
-# 데이터 불러오기
-# --------------
-#
-# 이번 튜토리얼에서는 Creative Commons 4.0 라이선스인 
-# `VOiCES 데이터셋 <https://iqtlabs.github.io/voices/>`__ 을 사용할 것입니다.
-#
-
-IPython.display.Audio(SPEECH_FILE)
-
-
-######################################################################
-# 데이터를 불러오기 위해 :py:func:`torchaudio.load` 를 사용합니다.
-#
-# 만약 데이터의 샘플링 레이트가 pipeline에서 기대하는 샘플링 레이트와 다른 경우 
-# :py:func:`torchaudio.functional.resample` 을 이용해서 리샘플링합니다.
-#
-# .. note::
-#
-#    - :py:func:`torchaudio.functional.resample` 은 CUDA tensor에도 사용할 수 있습니다.
-#    - 같은 세트의 샘플링 레이트에 대해 여러 번 리샘플링을 수행할 경우, 
-#      :py:func:`torchaudio.transforms.Resample` 를 사용하면 성능이 더 개선될 수 있습니다.
-#
-
-waveform, sample_rate = torchaudio.load(SPEECH_FILE)
-waveform = waveform.to(device)
-
-if sample_rate != bundle.sample_rate:
-    waveform = torchaudio.functional.resample(waveform, sample_rate, bundle.sample_rate)
-
-
-######################################################################
-# 음향 특성 추출하기
-# ----------------
-#
-# 다음으로 진행할 것은 오디오에서 음향 특성을 추출하는 것입니다.
-#
-# .. note::
-#    Wav2Vec2 모델은 ASR 태스크를 위해 미세 조정되어 특성 추출과 분류를 
-#    한 번에 수행할 수 있습니다.
-#    하지만 자세한 설명을 위해 이번 튜토리얼에서는 특성 추출을 하는 방법도 설명하겠습니다.
-#
-
-with torch.inference_mode():
-    features, _ = model.extract_features(waveform)
-
-
-######################################################################
-# 반환되는 특성은 tensor의 배열이고 각 tensor는 transformer 레이어의 출력입니다.
-#
-
-fig, ax = plt.subplots(len(features), 1, figsize=(16, 4.3 * len(features)))
-for i, feats in enumerate(features):
-    ax[i].imshow(feats[0].cpu())
-    ax[i].set_title(f"Feature from transformer layer {i+1}")
-    ax[i].set_xlabel("Feature dimension")
-    ax[i].set_ylabel("Frame (time-axis)")
-plt.tight_layout()
-plt.show()
-
-
-######################################################################
-# 특성 분류하기
-# ------------
-#
-# 음향 특성을 추출한 후 다음 단계는 특성을 카테고리로 분류하는 것입니다.
-#
-# Wav2Vec2 모델은 특성 추출과 분류를 한 번에 수행합니다.
-#
-
-with torch.inference_mode():
-    emission, _ = model(waveform)
-
-
-######################################################################
-# 결과는 확률의 형태가 아닌 로짓(logit)의 형태로 나옵니다.
-#
-# 이를 시각화해 보겠습니다.
-#
-
-plt.imshow(emission[0].cpu().T)
-plt.title("Classification result")
-plt.xlabel("Frame (time-axis)")
-plt.ylabel("Class")
-plt.show()
-print("Class labels:", bundle.get_labels())
-
-
-######################################################################
-# 타임 라인에 따라 특정한 라벨이 강하게 나타나는 것을 알 수 있습니다.
-#
-
-######################################################################
-# 대본(transcript) 생성하기
-# ------------------------
-#
-# 이제 라벨 확률의 시퀀스에서 대본(transcript)을 생성할 차례입니다. 이렇게 가설을 
-# 생성하는 과정을 "디코딩"이라고 부릅니다.
-#
-# 디코딩은 단순한 분류보다는 더 정교한 작업입니다. 
-# 특정 타임 스텝에서 디코딩을 하는 것은 주변 관측에 영향을 받을 수 있기 때문입니다.
-#
-# 예를 들어 ``night`` 와 ``knight`` 의 경우를 생각해 보겠습니다. ``night`` 와 
-# ``knight`` 의 사전 확률 분포가 다르더라도 
-# (일반적인 대화에서 ``night`` 가 ``knight`` 보다 훨씬 더 자주 등장합니다) 
-# ``a knight with a sword`` 와 같은 문장에서 ``knight`` 로 정확한 대본을 생성하기 
-# 위해서는 디코딩 과정에서 충분한 문맥을 볼 때까지 최종 결정을 연기해야 합니다.
-#
-# 디코딩을 위한 다양한 기술들은 많은 경우 
-# 단어 사전과 언어 모델과 같은 외부 리소스를 요구합니다.
-#
-# 이번 튜토리얼에서는 단순함을 위해 탐욕적인(greedy) 디코딩을 사용하여 외부 요소에 
-# 의존하지 않고 각 타임 스텝에서 가장 좋은 가설을 선택하겠습니다. 
-# 따라서 문맥 정보는 사용되지 않고 하나의 대본만 생성됩니다.
-#
-# 탐욕적인 디코딩 알고리즘을 정의하겠습니다.
-#
-
-class GreedyCTCDecoder(torch.nn.Module):
-    def __init__(self, labels, blank=0):
-        super().__init__()
-        self.labels = labels
-        self.blank = blank
-
-    def forward(self, emission: torch.Tensor) -> str:
-        """Given a sequence emission over labels, get the best path string
-        Args:
-          emission (Tensor): Logit tensors. Shape `[num_seq, num_label]`.
-
-        Returns:
-          str: The resulting transcript
-        """
-        indices = torch.argmax(emission, dim=-1)  # [num_seq,]
-        indices = torch.unique_consecutive(indices, dim=-1)
-        indices = [i for i in indices if i != self.blank]
-        return "".join([self.labels[i] for i in indices])
-
-
-######################################################################
-# 디코더 객체를 생성하고, 대본을 디코딩합니다.
-#
-
-decoder = GreedyCTCDecoder(labels=bundle.get_labels())
-transcript = decoder(emission[0])
-
-
-######################################################################
-# 이제 결과를 확인하고 오디오를 다시 들어 보겠습니다.
-#
-
-print(transcript)
-IPython.display.Audio(SPEECH_FILE)
-
-
-######################################################################
-# ASR 모델은 Connectionist Temporal Classification (CTC)이라는 손실 함수를 
-# 사용하여 미세 조정됩니다. CTC 손실 함수의 세부 사항은 
-# `여기 <https://distill.pub/2017/ctc/>`__ 를 참고하시기 바랍니다.
-# CTC에서 공백 토큰 (ϵ)은 기존 심볼의 반복을 나타내는 스페셜 토큰입니다. 
-# 디코딩 과정에서 공백 토큰은 무시됩니다.
-#
-
-
-######################################################################
-# 결론
-# ----
-#
-# 이번 튜토리얼에서 음향 특성 추출과 음성 인식을 위해서 
-# :py:mod:`torchaudio.pipelines` 를 어떻게 사용하는지 알아보았습니다. 
-# 모델을 만들고 산출물(emission)을 얻는 모든 과정은 아래의 2줄 만으로도 가능합니다.
-#
-# ::
-#
-#    model = torchaudio.pipelines.WAV2VEC2_ASR_BASE_960H.get_model()
-#    emission = model(waveforms, ...)
-#
diff --git a/intermediate_source/speech_recognition_pipeline_tutorial.rst b/intermediate_source/speech_recognition_pipeline_tutorial.rst
new file mode 100644
index 000000000..4ec497b3b
--- /dev/null
+++ b/intermediate_source/speech_recognition_pipeline_tutorial.rst
@@ -0,0 +1,10 @@
+Speech Recognition with Wav2Vec2
+================================
+
+This tutorial has been moved to https://pytorch.org/audio/stable/tutorials/speech_recognition_pipeline_tutorial.html
+
+It will redirect in 3 seconds.
+
+.. raw:: html
+
+   <meta http-equiv="Refresh" content="3; url='https://pytorch.org/audio/stable/tutorials/speech_recognition_pipeline_tutorial.html'" />
diff --git a/intermediate_source/tensorboard_profiler_tutorial.py b/intermediate_source/tensorboard_profiler_tutorial.py
index 5b6cd440a..440f2257e 100644
--- a/intermediate_source/tensorboard_profiler_tutorial.py
+++ b/intermediate_source/tensorboard_profiler_tutorial.py
@@ -54,7 +54,7 @@
 
 ######################################################################
 # Then prepare the input data. For this tutorial, we use the CIFAR10 dataset.
-# Transform it to the desired format and use DataLoader to load each batch.
+# Transform it to the desired format and use ``DataLoader`` to load each batch.
 
 transform = T.Compose(
     [T.Resize(224),
@@ -68,7 +68,7 @@
 # To run on GPU, move model and loss to GPU device.
 
 device = torch.device("cuda:0")
-model = torchvision.models.resnet18(pretrained=True).cuda(device)
+model = torchvision.models.resnet18(weights='IMAGENET1K_V1').cuda(device)
 criterion = torch.nn.CrossEntropyLoss().cuda(device)
 optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
 model.train()
@@ -116,7 +116,7 @@ def train(data):
 # - ``profile_memory`` - Track tensor memory allocation/deallocation. Note, for old version of pytorch with version
 #   before 1.10, if you suffer long profiling time, please disable it or upgrade to new version.
 # - ``with_stack`` - Record source information (file and line number) for the ops.
-#   If the TensorBoard is launched in VSCode (`reference <https://code.visualstudio.com/docs/datascience/pytorch-support#_tensorboard-integration>`_),
+#   If the TensorBoard is launched in VS Code (`reference <https://code.visualstudio.com/docs/datascience/pytorch-support#_tensorboard-integration>`_),
 #   clicking a stack frame will navigate to the specific code line.
 
 with torch.profiler.profile(
@@ -217,13 +217,13 @@ def train(data):
 # The "Total" duration includes its child operators’ time.
 #
 # - View call stack
-# Click the "View Callstack" of an operator, the operators with same name but different call stacks will be shown.
-# Then click a "View Callstack" in this sub-table, the call stack frames will be shown.
+# Click the ``View Callstack`` of an operator, the operators with same name but different call stacks will be shown.
+# Then click a ``View Callstack`` in this sub-table, the call stack frames will be shown.
 #
 # .. image:: ../../_static/img/profiler_callstack.png
 #    :scale: 25 %
 #
-# If the TensorBoard is launched inside VSCode
+# If the TensorBoard is launched inside VS Code
 # (`Launch Guide <https://devblogs.microsoft.com/python/python-in-visual-studio-code-february-2021-release/#tensorboard-integration>`_),
 # clicking a call stack frame will navigate to the specific code line.
 #
@@ -279,8 +279,8 @@ def train(data):
 # 5. Improve performance with the help of profiler
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
-# At the bottom of "Overview" page, the suggestion in "Performance Recommendation" hints the bottleneck is DataLoader.
-# The PyTorch DataLoader uses single process by default.
+# At the bottom of "Overview" page, the suggestion in "Performance Recommendation" hints the bottleneck is ``DataLoader``.
+# The PyTorch ``DataLoader`` uses single process by default.
 # User could enable multi-process data loading by setting the parameter ``num_workers``.
 # `Here <https://pytorch.org/docs/stable/data.html#single-and-multi-process-data-loading>`_ is more details.
 #
@@ -350,7 +350,7 @@ def train(data):
 # In the memory events table, the allocation and release events are paired into one entry. The "operator" column shows
 # the immediate ATen operator that is causing the allocation. Notice that in PyTorch, ATen operators commonly use
 # ``aten::empty`` to allocate memory. For example, ``aten::ones`` is implemented as ``aten::empty`` followed by an
-# ``aten::fill_``. Solely display the opeartor name as ``aten::empty`` is of little help. It will be shown as
+# ``aten::fill_``. Solely display the operator name as ``aten::empty`` is of little help. It will be shown as
 # ``aten::ones (aten::empty)`` in this special case. The "Allocation Time", "Release Time" and "Duration"
 # columns' data might be missing if the event occurs outside of the time range. 
 #
diff --git a/intermediate_source/text_to_speech_with_torchaudio.py b/intermediate_source/text_to_speech_with_torchaudio.py
deleted file mode 100644
index c2b71dbfc..000000000
--- a/intermediate_source/text_to_speech_with_torchaudio.py
+++ /dev/null
@@ -1,306 +0,0 @@
-"""
-torchaudio를 사용하여 텍스트에서 음성으로 변환(text-to-speech)
-==============================================================
-**Author**: `Yao-Yuan Yang <https://github.com/yangarbiter>`__, `Moto Hira <moto@fb.com>`__
-**번역자**: `이가람 <https://github.com/garam24>`__
-
-"""
-
-######################################################################
-# 개요
-# --------
-#
-# 이번 튜토리얼에서는 torchaudio에서 사전학습된 Tacotron2를 사용하여 텍스트에서 음성으로 변환하는
-# 파이프라인을 소개합니다.
-#
-# 텍스트에서 음성으로 변환하는 파이프라인은 다음의 단계를 따릅니다: 1. 텍스트 전처리
-#
-# 먼저, 입력 텍스트를 기호 리스트로 인코딩(encoding)합니다. 이 튜토리얼에서는 영문자를 사용하고
-# 기호로는 음소(phonene)를 사용하고자 합니다.
-#
-# 2. 스펙트로그램(spectrogram) 생성
-#
-# 인코딩된 텍스트로부터 스펙트로그램을 생성합니다. 이를 위해 ``Tacotron2`` 모델을 사용할 예정입니다.
-#
-# 3. 시간-도메인(time-domain) 변환
-#
-# 마지막 단계에서 스펙트로그램을 파형(waveform)으로 변환합니다.
-# 스펙트로그램으로부터 음성을 생성하는 이 과정을 보코더(vocoder)라고 부르기도 합니다.
-# 이 튜토리얼에서는 세 가지 종류의 보코더가 사용됩니다.
-# ```WaveRNN`` <https://pytorch.org/audio/stable/models/wavernn.html>`__,
-# ```Griffin-Lim`` <https://pytorch.org/audio/stable/transforms.html#griffinlim>`__,
-# and
-# ```Nvidia's WaveGlow`` <https://pytorch.org/hub/nvidia_deeplearningexamples_tacotron2/>`__.
-#
-# 다음 그림은 전체 과정을 보여줍니다.
-#
-# .. image:: https://download.pytorch.org/torchaudio/tutorial-assets/tacotron2_tts_pipeline.png
-#
-
-
-######################################################################
-# 준비 단계
-# -----------
-#
-# 먼저, 필요한 라이브러라를 설치합니다. 음소 단위 인코딩을 하기 위해서는 ``torchaudio`` 를 비롯하여, ``DeepPhonemizer`` 가 필요합니다.
-#
-# 주피터 노트북에서 이 예제를 실행할 때, DeepPhonemizer를 설치해주세요.
-#
-# %%
-#  .. code-block:: bash
-#
-#      %%bash
-#      pip3 install deep_phonemizer
-
-import torch
-import torchaudio
-import matplotlib.pyplot as plt
-
-import IPython
-
-print(torch.__version__)
-print(torchaudio.__version__)
-
-torch.random.manual_seed(0)
-device = "cuda" if torch.cuda.is_available() else "cpu"
-
-
-
-######################################################################
-# 텍스트 처리
-# ---------------
-#
-
-
-######################################################################
-# 문자 기반 인코딩
-# ~~~~~~~~~~~~~~~~~~~~~~~~
-#
-# 이번 섹션에서는 문자 기반 인코딩이 어떻게 이루어지는지 다룰 예정입니다.
-#
-# 사전학습된 Tacotron2 모델은 기호 테이블들의 집합을 구체적으로 필요로 하기 때문에,
-# ``torchaudio`` 는 해당 기능을 제공하고 있습니다. 이번 섹션에서는 인코딩 기초에 대한 설명보다 조금 더 나아가고자 합니다.
-#
-# 먼저 기호들의 집합을 정의합니다. 예를 들어, ``'_-!\'(),.:;? abcdefghijklmnopqrstuvwxyz'`` 와 같은 것들을 사용할 수 있습니다.
-# 그리고 나서 입력 텍스트의 각각의 문자를 테이블 상에서 대응하는 기호의 인덱스에 맵핑(mapping)합니다.
-#
-# 아래는 이러한 과정의 예시입니다. 테이블에 포함되어있지 않은 기호들은 이 예제에서 제외하였습니다.
-#
-
-symbols = '_-!\'(),.:;? abcdefghijklmnopqrstuvwxyz'
-look_up = {s: i for i, s in enumerate(symbols)}
-symbols = set(symbols)
-
-def text_to_sequence(text):
-  text = text.lower()
-  return [look_up[s] for s in text if s in symbols]
-
-text = "Hello world! Text to speech!"
-print(text_to_sequence(text))
-
-
-######################################################################
-# 위에서 언급한 것과 같이, 기호 테이블과 인덱스는 사전학습된 Tacotron2 모델에서 요구하는 형태와
-# 일치해야합니다. ``torchaudio`` 는 사전학습된 모델에 맞추어 변환시키는 기능을 제공합니다.
-# 이 예제에서는 이러한 변환 기능을 아래와 같이 인스턴스화하여 사용할 수 있습니다.
-#
-
-processor = torchaudio.pipelines.TACOTRON2_WAVERNN_CHAR_LJSPEECH.get_text_processor()
-
-text = "Hello world! Text to speech!"
-processed, lengths = processor(text)
-
-print(processed)
-print(lengths)
-
-
-######################################################################
-# ``processor`` 객체는 텍스트 또는 텍스트 리스트를 입력으로 받아들입니다.
-# 텍스트 리스트가 주어질 때, 반환되는 ``lenghts`` 변수는 출력 배치(batch)에서
-# 처리된 각 토큰의 유효 길이를 나타냅니다.
-#
-# 중간 단계의 형태는 다음과 같이 검색할 수 있습니다.
-#
-
-print([processor.tokens[i] for i in processed[0, :lengths[0]]])
-
-
-######################################################################
-# 음소 기반 인코딩
-# ~~~~~~~~~~~~~~~~~~~~~~~~~
-#
-# 음소 기반 인코딩은 문자 기반 인코딩과 유사하지만,
-# 음소에 기반한 기호 테이블을 사용하고 G2P (Grapheme-to-Phoneme) 모델을 사용한다는 점에서 다릅니다.
-#
-# G2P 모델에 대한 상세한 내용은 이번 튜토리얼의 범위를 벗어나기 때문에
-# 해당 변환이 어떻게 이루어지는지를 중심으로 살펴보겠습니다.
-#
-# 문자 기반 인코딩의 경우와 비슷하게, 인코딩 과정은 사전학습된 Tacotron2가 학습된 형태에 매칭되어야 합니다.
-# ``torchaudio`` 는 이러한 과정을 위한 인터페이스(interface)를 제공합니다.
-#
-# 다음의 코드는 이러한 과정을 만들고 사용하는 방법을 보여줍니다.
-# 뒤 편에서는, ``DeepPhonemizer`` 패키지를 사용하여 G2P 모델이 생성되고 ``DeepPhonemizer`` 의 저자가
-# 공개한 사전학습된 가중치가 불러들여지게 됩니다.
-#
-
-bundle = torchaudio.pipelines.TACOTRON2_WAVERNN_PHONE_LJSPEECH
-
-processor = bundle.get_text_processor()
-
-text = "Hello world! Text to speech!"
-with torch.inference_mode():
-  processed, lengths = processor(text)
-
-print(processed)
-print(lengths)
-
-
-######################################################################
-# 인코딩된 값들이 문자 기반 인코딩의 예제와는 다르다는 점에 유의하세요.
-#
-# 중간 과정은 다음과 같은 모습을 보입니다.
-#
-
-print([processor.tokens[i] for i in processed[0, :lengths[0]]])
-
-
-######################################################################
-# 스펙트로그램 생성
-# ------------------------------
-#
-# ``Tacotron2`` 는 인코딩된 텍스트로부터 스펙트로그램을 생성하는 데 사용되는 모델입니다.
-# 모델에 대한 자세한 내용은 다음의 `논문<https://arxiv.org/abs/1712.05884>`__ 을 참고해주세요.
-#
-# 사전학습된 가중치로 Tacotron2 모델을 인스턴스화 하는 것은 간단합니다.
-# 하지만 Tacotron2 모델의 입력은 매칭되는 텍스트 프로세서(text processor)로 처리되어야 한다는 것을
-# 유의해주세요.
-#
-# ``torchaudio`` 는 매칭되는 모델과 프로세서를 함께 묶어서 파이프라인을 만들기 쉽도록 해줍니다.
-#
-# (사용할 수 있는 번들의 종류와 사용법이 궁금하다면, `이 문서 <https://pytorch.org/audio/stable/pipelines.html#tacotron2-text-to-speech>`__ 를 참고하세요.)
-#
-
-bundle = torchaudio.pipelines.TACOTRON2_WAVERNN_PHONE_LJSPEECH
-processor = bundle.get_text_processor()
-tacotron2 = bundle.get_tacotron2().to(device)
-
-text = "Hello world! Text to speech!"
-
-with torch.inference_mode():
-  processed, lengths = processor(text)
-  processed = processed.to(device)
-  lengths = lengths.to(device)
-  spec, _, _ = tacotron2.infer(processed, lengths)
-
-
-plt.imshow(spec[0].cpu().detach())
-
-
-######################################################################
-# ``Tacotron2.infer`` 메소드(method)는 다항 샘플링(multinomial sampling)을 한다는 점을 유의하세요,
-# 따라서 스펙트로그램을 생성하는 이 과정에서 무작위성이 발생합니다.
-#
-
-for _ in range(3):
-  with torch.inference_mode():
-    spec, spec_lengths, _ = tacotron2.infer(processed, lengths)
-  plt.imshow(spec[0].cpu().detach())
-  plt.show()
-
-
-######################################################################
-# 파형 생성
-# ---------
-#
-# 스펙트로그램이 일단 생성되면, 마지막 단계는 스펙트로그램으로부터 파형을 복원하는 것입니다.
-#
-# ``torchaudio`` 는 그리핀-림(``GriffinLim``)과 웨이브 RNN(``WaveRNN``)에 기반한 보코더를 제공합니다.
-#
-
-
-######################################################################
-# 웨이브 RNN
-# ~~~~~~~~~~~
-#
-# 이전 섹션에 이어서, 같은 번들에서 일치하는 웨이브 RNN 모델을 인스턴스화할 수 있습니다.
-#
-
-bundle = torchaudio.pipelines.TACOTRON2_WAVERNN_PHONE_LJSPEECH
-
-processor = bundle.get_text_processor()
-tacotron2 = bundle.get_tacotron2().to(device)
-vocoder = bundle.get_vocoder().to(device)
-
-text = "Hello world! Text to speech!"
-
-with torch.inference_mode():
-  processed, lengths = processor(text)
-  processed = processed.to(device)
-  lengths = lengths.to(device)
-  spec, spec_lengths, _ = tacotron2.infer(processed, lengths)
-  waveforms, lengths = vocoder(spec, spec_lengths)
-
-torchaudio.save("output_wavernn.wav", waveforms[0:1].cpu(), sample_rate=vocoder.sample_rate)
-IPython.display.display(IPython.display.Audio("output_wavernn.wav"))
-
-
-######################################################################
-# 그리핀-림
-# ~~~~~~~~~
-#
-# 그리핀-림 보코더는 웨이브 RNN과 사용하는 방식이 같습니다.
-# 보코드 객체를 ``get_vocoder`` 메소드로 인스턴스화하여 스펙트로그램을 통과할 수 있습니다.
-#
-
-bundle = torchaudio.pipelines.TACOTRON2_GRIFFINLIM_PHONE_LJSPEECH
-
-processor = bundle.get_text_processor()
-tacotron2 = bundle.get_tacotron2().to(device)
-vocoder = bundle.get_vocoder().to(device)
-
-with torch.inference_mode():
-  processed, lengths = processor(text)
-  processed = processed.to(device)
-  lengths = lengths.to(device)
-  spec, spec_lengths, _ = tacotron2.infer(processed, lengths)
-waveforms, lengths = vocoder(spec, spec_lengths)
-
-torchaudio.save("output_griffinlim.wav", waveforms[0:1].cpu(), sample_rate=vocoder.sample_rate)
-IPython.display.display(IPython.display.Audio("output_griffinlim.wav"))
-
-
-######################################################################
-# 웨이브 글로우(Waveglow)
-# ~~~~~~~~~~~~~~~~~~~~~~~
-#
-# 웨이브 글로우는 엔비디아(Nvidia)가 공개한 보코더입니다. 사전학습된 가중치가 토치 허브(Torch Hub)에 공개되어 있습니다.
-# ``torch.hub`` 모듈을 사용하여 모델을 인스턴스화 할 수 있습니다.
-#
-
-if torch.cuda.is_available():
-  waveglow = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_waveglow', model_math='fp32')
-else:
-  # Workaround to load model mapped on GPU
-  # https://stackoverflow.com/a/61840832
-  waveglow = torch.hub.load(
-      "NVIDIA/DeepLearningExamples:torchhub",
-      "nvidia_waveglow",
-      model_math="fp32",
-      pretrained=False,
-  )
-  checkpoint = torch.hub.load_state_dict_from_url(
-      "https://api.ngc.nvidia.com/v2/models/nvidia/waveglow_ckpt_fp32/versions/19.09.0/files/nvidia_waveglowpyt_fp32_20190427",
-      progress=False,
-      map_location=device,
-  )
-  state_dict = {key.replace("module.", ""): value for key, value in checkpoint["state_dict"].items()}
-
-waveglow = waveglow.remove_weightnorm(waveglow)
-waveglow = waveglow.to(device)
-waveglow.eval()
-
-with torch.no_grad():
-  waveforms = waveglow.infer(spec)
-
-torchaudio.save("output_waveglow.wav", waveforms[0:1].cpu(), sample_rate=22050)
-IPython.display.display(IPython.display.Audio("output_waveglow.wav"))
diff --git a/intermediate_source/text_to_speech_with_torchaudio.rst b/intermediate_source/text_to_speech_with_torchaudio.rst
new file mode 100644
index 000000000..bbb6d7f27
--- /dev/null
+++ b/intermediate_source/text_to_speech_with_torchaudio.rst
@@ -0,0 +1,10 @@
+Text-to-speech with Tacotron2
+=============================
+
+This tutorial has been moved to https://pytorch.org/audio/stable/tutorials/tacotron2_pipeline_tutorial.html
+
+It will redirect in 3 seconds.
+
+.. raw:: html
+
+   <meta http-equiv="Refresh" content="3; url='https://pytorch.org/audio/stable/tutorials/tacotron2_pipeline_tutorial.html'" />
diff --git a/intermediate_source/torch_compile_tutorial.py b/intermediate_source/torch_compile_tutorial.py
new file mode 100644
index 000000000..d4b8e54b9
--- /dev/null
+++ b/intermediate_source/torch_compile_tutorial.py
@@ -0,0 +1,509 @@
+# -*- coding: utf-8 -*-
+
+"""
+torch.compile Tutorial
+================
+**Author:** William Wen
+"""
+
+######################################################################
+# ``torch.compile`` is the latest method to speed up your PyTorch code!
+# ``torch.compile`` makes PyTorch code run faster by
+# JIT-compiling PyTorch code into optimized kernels,
+# all while requiring minimal code changes.
+# 
+# In this tutorial, we cover basic ``torch.compile`` usage,
+# and demonstrate the advantages of ``torch.compile`` over
+# previous PyTorch compiler solutions, such as
+# `TorchScript <https://pytorch.org/docs/stable/jit.html>`__ and 
+# `FX Tracing <https://pytorch.org/docs/stable/fx.html#torch.fx.symbolic_trace>`__.
+#
+# **Contents**
+# 
+# - Basic Usage
+# - Demonstrating Speedups
+# - Comparison to TorchScript and FX Tracing
+# - TorchDynamo and FX Graphs
+# - Conclusion
+#
+# **Required pip Dependencies**
+#
+# - ``torch >= 2.0``
+# - ``torchvision``
+# - ``numpy``
+# - ``scipy``
+# - ``tabulate``
+
+######################################################################
+# NOTE: a modern NVIDIA GPU (H100, A100, or V100) is recommended for this tutorial in
+# order to reproduce the speedup numbers shown below and documented elsewhere.
+
+import torch
+import warnings
+
+gpu_ok = False
+if torch.cuda.is_available():
+    device_cap = torch.cuda.get_device_capability()
+    if device_cap in ((7, 0), (8, 0), (9, 0)):
+        gpu_ok = True
+
+if not gpu_ok:
+    warnings.warn(
+        "GPU is not NVIDIA V100, A100, or H100. Speedup numbers may be lower "
+        "than expected."
+    )
+
+######################################################################
+# Basic Usage
+# ------------
+#
+# ``torch.compile`` is included in the latest PyTorch..
+# Running TorchInductor on GPU requires Triton, which is included with the PyTorch 2.0 nightly
+# binary. If Triton is still missing, try installing ``torchtriton`` via pip 
+# (``pip install torchtriton --extra-index-url "https://download.pytorch.org/whl/nightly/cu117"``
+# for CUDA 11.7).
+#
+# Arbitrary Python functions can be optimized by passing the callable to
+# ``torch.compile``. We can then call the returned optimized
+# function in place of the original function.
+
+def foo(x, y):
+    a = torch.sin(x)
+    b = torch.cos(x)
+    return a + b
+opt_foo1 = torch.compile(foo)
+print(opt_foo1(torch.randn(10, 10), torch.randn(10, 10)))
+
+######################################################################
+# Alternatively, we can decorate the function.
+
+@torch.compile
+def opt_foo2(x, y):
+    a = torch.sin(x)
+    b = torch.cos(x)
+    return a + b
+print(opt_foo2(torch.randn(10, 10), torch.randn(10, 10)))
+
+######################################################################
+# We can also optimize ``torch.nn.Module`` instances.
+
+class MyModule(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.lin = torch.nn.Linear(100, 10)
+
+    def forward(self, x):
+        return torch.nn.functional.relu(self.lin(x))
+
+mod = MyModule()
+opt_mod = torch.compile(mod)
+print(opt_mod(torch.randn(10, 100)))
+
+######################################################################
+# Demonstrating Speedups
+# -----------------------
+#
+# Let's now demonstrate that using ``torch.compile`` can speed
+# up real models. We will compare standard eager mode and 
+# ``torch.compile`` by evaluating and training ResNet-18 on random data.
+#
+# Before we start, we need to define some utility functions.
+
+# Returns the result of running `fn()` and the time it took for `fn()` to run,
+# in seconds. We use CUDA events and synchronization for the most accurate
+# measurements.
+def timed(fn):
+    start = torch.cuda.Event(enable_timing=True)
+    end = torch.cuda.Event(enable_timing=True)
+    start.record()
+    result = fn()
+    end.record()
+    torch.cuda.synchronize()
+    return result, start.elapsed_time(end) / 1000
+
+# Generates random input and targets data for the model, where `b` is
+# batch size.
+def generate_data(b):
+    return (
+        torch.randn(b, 3, 128, 128).to(torch.float32).cuda(),
+        torch.randint(1000, (b,)).cuda(),
+    )
+
+N_ITERS = 10
+
+from torchvision.models import densenet121
+def init_model():
+    return densenet121().to(torch.float32).cuda()
+
+######################################################################
+# First, let's compare inference.
+#
+# Note that in the call to ``torch.compile``, we have have the additional
+# ``mode`` argument, which we will discuss below.
+
+def evaluate(mod, inp):
+    return mod(inp)
+
+model = init_model()
+
+# Reset since we are using a different mode.
+import torch._dynamo
+torch._dynamo.reset()
+
+evaluate_opt = torch.compile(evaluate, mode="reduce-overhead")
+
+inp = generate_data(16)[0]
+print("eager:", timed(lambda: evaluate(model, inp))[1])
+print("compile:", timed(lambda: evaluate_opt(model, inp))[1])
+
+######################################################################
+# Notice that ``torch.compile`` takes a lot longer to complete
+# compared to eager. This is because ``torch.compile`` compiles
+# the model into optimized kernels as it executes. In our example, the
+# structure of the model doesn't change, and so recompilation is not
+# needed. So if we run our optimized model several more times, we should
+# see a significant improvement compared to eager.
+
+eager_times = []
+compile_times = []
+for i in range(N_ITERS):
+    inp = generate_data(16)[0]
+    _, eager_time = timed(lambda: evaluate(model, inp))
+    eager_times.append(eager_time)
+    print(f"eager eval time {i}: {eager_time}")
+
+print("~" * 10)
+
+compile_times = []
+for i in range(N_ITERS):
+    inp = generate_data(16)[0]
+    _, compile_time = timed(lambda: evaluate_opt(model, inp))
+    compile_times.append(compile_time)
+    print(f"compile eval time {i}: {compile_time}")
+print("~" * 10)
+
+import numpy as np
+eager_med = np.median(eager_times)
+compile_med = np.median(compile_times)
+speedup = eager_med / compile_med
+print(f"(eval) eager median: {eager_med}, compile median: {compile_med}, speedup: {speedup}x")
+print("~" * 10)
+
+######################################################################
+# And indeed, we can see that running our model with ``torch.compile``
+# results in a significant speedup. Speedup mainly comes from reducing Python overhead and
+# GPU read/writes, and so the observed speedup may vary on factors such as model
+# architecture and batch size. For example, if a model's architecture is simple
+# and the amount of data is large, then the bottleneck would be
+# GPU compute and the observed speedup may be less significant.
+#
+# You may also see different speedup results depending on the chosen ``mode``
+# argument. Since our model and data are small, we want to reduce overhead as
+# much as possible, and so we chose ``"reduce-overhead"``. For your own models,
+# you may need to experiment with different modes to maximize speedup. You can
+# read more about modes `here <https://pytorch.org/get-started/pytorch-2.0/#user-experience>`__.
+#
+# For general PyTorch benchmarking, you can try using ``torch.utils.benchmark`` instead of the ``timed``
+# function we defined above. We wrote our own timing function in this tutorial to show
+# ``torch.compile``'s compilation latency.
+#
+# Now, let's consider comparing training.
+
+model = init_model()
+opt = torch.optim.Adam(model.parameters())
+
+def train(mod, data):
+    opt.zero_grad(True)
+    pred = mod(data[0])
+    loss = torch.nn.CrossEntropyLoss()(pred, data[1])
+    loss.backward()
+    opt.step()
+
+eager_times = []
+for i in range(N_ITERS):
+    inp = generate_data(16)
+    _, eager_time = timed(lambda: train(model, inp))
+    eager_times.append(eager_time)
+    print(f"eager train time {i}: {eager_time}")
+print("~" * 10)
+
+model = init_model()
+opt = torch.optim.Adam(model.parameters())
+train_opt = torch.compile(train, mode="reduce-overhead")
+
+compile_times = []
+for i in range(N_ITERS):
+    inp = generate_data(16)
+    _, compile_time = timed(lambda: train_opt(model, inp))
+    compile_times.append(compile_time)
+    print(f"compile train time {i}: {compile_time}")
+print("~" * 10)
+
+eager_med = np.median(eager_times)
+compile_med = np.median(compile_times)
+speedup = eager_med / compile_med
+print(f"(train) eager median: {eager_med}, compile median: {compile_med}, speedup: {speedup}x")
+print("~" * 10)
+
+######################################################################
+# Again, we can see that ``torch.compile`` takes longer in the first
+# iteration, as it must compile the model, but in subsequent iterations, we see
+# significant speedups compared to eager.
+
+######################################################################
+# Comparison to TorchScript and FX Tracing
+# -----------------------------------------
+# 
+# We have seen that ``torch.compile`` can speed up PyTorch code.
+# Why else should we use ``torch.compile`` over existing PyTorch
+# compiler solutions, such as TorchScript or FX Tracing? Primarily, the
+# advantage of ``torch.compile`` lies in its ability to handle
+# arbitrary Python code with minimal changes to existing code.
+#
+# One case that ``torch.compile`` can handle that other compiler
+# solutions struggle with is data-dependent control flow (the 
+# ``if x.sum() < 0:`` line below).
+
+def f1(x, y):
+    if x.sum() < 0:
+        return -y
+    return y
+
+# Test that `fn1` and `fn2` return the same result, given
+# the same arguments `args`. Typically, `fn1` will be an eager function
+# while `fn2` will be a compiled function (torch.compile, TorchScript, or FX graph).
+def test_fns(fn1, fn2, args):
+    out1 = fn1(*args)
+    out2 = fn2(*args)
+    return torch.allclose(out1, out2)
+
+inp1 = torch.randn(5, 5)
+inp2 = torch.randn(5, 5)
+
+######################################################################
+# TorchScript tracing ``f1`` results in
+# silently incorrect results, since only the actual control flow path
+# is traced.
+
+traced_f1 = torch.jit.trace(f1, (inp1, inp2))
+print("traced 1, 1:", test_fns(f1, traced_f1, (inp1, inp2)))
+print("traced 1, 2:", test_fns(f1, traced_f1, (-inp1, inp2)))
+
+######################################################################
+# FX tracing ``f1`` results in an error due to the presence of
+# data-dependent control flow.
+
+import traceback as tb
+try:
+    torch.fx.symbolic_trace(f1)
+except:
+    tb.print_exc()
+
+######################################################################
+# If we provide a value for ``x`` as we try to FX trace ``f1``, then
+# we run into the same problem as TorchScript tracing, as the data-dependent
+# control flow is removed in the traced function.
+
+fx_f1 = torch.fx.symbolic_trace(f1, concrete_args={"x": inp1})
+print("fx 1, 1:", test_fns(f1, fx_f1, (inp1, inp2)))
+print("fx 1, 2:", test_fns(f1, fx_f1, (-inp1, inp2)))
+
+######################################################################
+# Now we can see that ``torch.compile`` correctly handles
+# data-dependent control flow.
+
+# Reset since we are using a different mode.
+torch._dynamo.reset()
+
+compile_f1 = torch.compile(f1)
+print("compile 1, 1:", test_fns(f1, compile_f1, (inp1, inp2)))
+print("compile 1, 2:", test_fns(f1, compile_f1, (-inp1, inp2)))
+print("~" * 10)
+
+######################################################################
+# TorchScript scripting can handle data-dependent control flow, but this
+# solution comes with its own set of problems. Namely, TorchScript scripting
+# can require major code changes and will raise errors when unsupported Python
+# is used.
+#
+# In the example below, we forget TorchScript type annotations and we receive
+# a TorchScript error because the input type for argument ``y``, an ``int``,
+# does not match with the default argument type, ``torch.Tensor``.
+
+def f2(x, y):
+    return x + y
+
+inp1 = torch.randn(5, 5)
+inp2 = 3
+
+script_f2 = torch.jit.script(f2)
+try:
+    script_f2(inp1, inp2)
+except:
+    tb.print_exc()
+
+######################################################################
+# However, ``torch.compile`` is easily able to handle ``f2``.
+
+compile_f2 = torch.compile(f2)
+print("compile 2:", test_fns(f2, compile_f2, (inp1, inp2)))
+print("~" * 10)
+
+######################################################################
+# Another case that ``torch.compile`` handles well compared to
+# previous compilers solutions is the usage of non-PyTorch functions.
+
+import scipy
+def f3(x):
+    x = x * 2
+    x = scipy.fft.dct(x.numpy())
+    x = torch.from_numpy(x)
+    x = x * 2
+    return x
+
+######################################################################
+# TorchScript tracing treats results from non-PyTorch function calls
+# as constants, and so our results can be silently wrong.
+
+inp1 = torch.randn(5, 5)
+inp2 = torch.randn(5, 5)
+traced_f3 = torch.jit.trace(f3, (inp1,))
+print("traced 3:", test_fns(f3, traced_f3, (inp2,)))
+
+######################################################################
+# TorchScript scripting and FX tracing disallow non-PyTorch function calls.
+
+try:
+    torch.jit.script(f3)
+except:
+    tb.print_exc()
+
+try:
+    torch.fx.symbolic_trace(f3)
+except:
+    tb.print_exc()
+
+######################################################################
+# In comparison, ``torch.compile`` is easily able to handle
+# the non-PyTorch function call.
+
+compile_f3 = torch.compile(f3)
+print("compile 3:", test_fns(f3, compile_f3, (inp2,)))
+
+######################################################################
+# TorchDynamo and FX Graphs
+# --------------------------
+#
+# One important component of ``torch.compile`` is TorchDynamo.
+# TorchDynamo is responsible for JIT compiling arbitrary Python code into
+# `FX graphs <https://pytorch.org/docs/stable/fx.html#torch.fx.Graph>`__, which can
+# then be further optimized. TorchDynamo extracts FX graphs by analyzing Python bytecode
+# during runtime and detecting calls to PyTorch operations.
+# 
+# Normally, TorchInductor, another component of ``torch.compile``,
+# further compiles the FX graphs into optimized kernels,
+# but TorchDynamo allows for different backends to be used. In order to inspect
+# the FX graphs that TorchDynamo outputs, let us create a custom backend that
+# outputs the FX graph and simply returns the graph's unoptimized forward method.
+
+from typing import List
+def custom_backend(gm: torch.fx.GraphModule, example_inputs: List[torch.Tensor]):
+    print("custom backend called with FX graph:")
+    gm.graph.print_tabular()
+    return gm.forward
+
+# Reset since we are using a different backend.
+torch._dynamo.reset()
+
+opt_model = torch.compile(init_model(), backend=custom_backend)
+opt_model(generate_data(16)[0])
+
+######################################################################
+# Using our custom backend, we can now see how TorchDynamo is able to handle
+# data-dependent control flow. Consider the function below, where the line
+# ``if b.sum() < 0`` is the source of data-dependent control flow.
+
+def bar(a, b):
+    x = a / (torch.abs(a) + 1)
+    if b.sum() < 0:
+        b = b * -1
+    return x * b
+
+opt_bar = torch.compile(bar, backend=custom_backend)
+inp1 = torch.randn(10)
+inp2 = torch.randn(10)
+opt_bar(inp1, inp2)
+opt_bar(inp1, -inp2)
+
+######################################################################
+# The output reveals that TorchDynamo extracted 3 different FX graphs
+# corresponding the following code (order may differ from the output above):
+#
+# 1. ``x = a / (torch.abs(a) + 1)``
+# 2. ``b = b * -1; return x * b``
+# 3. ``return x * b``
+#
+# When TorchDynamo encounters unsupported Python features, such as data-dependent
+# control flow, it breaks the computation graph, lets the default Python
+# interpreter handle the unsupported code, then resumes capturing the graph.
+#
+# Let's investigate by example how TorchDynamo would step through ``bar``.
+# If ``b.sum() < 0``, then TorchDynamo would run graph 1, let
+# Python determine the result of the conditional, then run
+# graph 2. On the other hand, if ``not b.sum() < 0``, then TorchDynamo
+# would run graph 1, let Python determine the result of the conditional, then
+# run graph 3.
+#
+# This highlights a major difference between TorchDynamo and previous PyTorch
+# compiler solutions. When encountering unsupported Python features,
+# previous solutions either raise an error or silently fail.
+# TorchDynamo, on the other hand, will break the computation graph.
+#
+# We can see where TorchDynamo breaks the graph by using ``torch._dynamo.explain``:
+
+# Reset since we are using a different backend.
+torch._dynamo.reset()
+explanation, out_guards, graphs, ops_per_graph, break_reasons, explanation_verbose = torch._dynamo.explain(
+    bar, torch.randn(10), torch.randn(10)
+)
+print(explanation_verbose)
+
+######################################################################
+# In order to maximize speedup, graph breaks should be limited.
+# We can force TorchDynamo to raise an error upon the first graph
+# break encountered by using ``fullgraph=True``:
+
+opt_bar = torch.compile(bar, fullgraph=True)
+try:
+    opt_bar(torch.randn(10), torch.randn(10))
+except:
+    tb.print_exc()
+
+######################################################################
+# And below, we demonstrate that TorchDynamo does not break the graph on
+# the model we used above for demonstrating speedups.
+
+opt_model = torch.compile(init_model(), fullgraph=True)
+print(opt_model(generate_data(16)[0]))
+
+######################################################################
+# Finally, if we simply want TorchDynamo to output the FX graph for export,
+# we can use ``torch._dynamo.export``. Note that ``torch._dynamo.export``, like
+# ``fullgraph=True``, raises an error if TorchDynamo breaks the graph.
+
+try:
+    torch._dynamo.export(bar, torch.randn(10), torch.randn(10))
+except:
+    tb.print_exc()
+
+model_exp = torch._dynamo.export(init_model(), generate_data(16)[0])
+print(model_exp[0](generate_data(16)[0]))
+
+######################################################################
+# Conclusion
+# ------------
+#
+# In this tutorial, we introduced ``torch.compile`` by covering
+# basic usage, demonstrating speedups over eager mode, comparing to previous
+# PyTorch compiler solutions, and briefly investigating TorchDynamo and its interactions
+# with FX graphs. We hope that you will give ``torch.compile`` a try!
diff --git a/intermediate_source/torchrec_tutorial.rst b/intermediate_source/torchrec_tutorial.rst
index e1b2e663f..e5cf458bf 100644
--- a/intermediate_source/torchrec_tutorial.rst
+++ b/intermediate_source/torchrec_tutorial.rst
@@ -22,7 +22,7 @@ Meta AI의 `딥러닝 추천 모델 <https://arxiv.org/abs/1906.00091>`__ 또는
 PyTorch의 DistributedDataParallel와 같이, DMP는 분산 학습을 가능하게하기 위해 모델을 포장합니다.
 
 설치
-----
+-----
 
 요구 사항: python >= 3.7
 
@@ -38,17 +38,17 @@ TorchRec을 사용할 때는 CUDA를 적극 추천합니다. (CUDA를 사용하
 
 
 개요
-----
+-----
 
 이 튜토리얼에서는 TorchRec의 ``nn.module`` |EmbeddingBagCollection|_, |DistributedModelParallel|_ API,
 데이터 구조 |KeyedJaggedTensor|_ 3가지 내용을 다룹니다.
 
 
 분산 설정
-~~~~~~~
+~~~~~~~~~~
 
 torch.distributed를 사용하여 환경을 설정합니다. 분산에 대한 자세한 내용은 이
-`튜토리얼 <https://pytorch.org/tutorials/beginner/dist_overview.html>`__ 을 참고하세요.
+`튜토리얼 <https://tutorials.pytorch.kr/beginner/dist_overview.html>`__ 을 참고하세요.
 
 여기서는 1개의 colab GPU에 대응하는 1개의 랭크(colab 프로세스)를 사용합니다.
 
@@ -72,7 +72,7 @@ torch.distributed를 사용하여 환경을 설정합니다. 분산에 대한 
 
 
 EmbeddingBag에서 EmbeddingBagCollection으로
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 PyTorch는 |torch.nn.Embedding|_ 와 |torch.nn.EmbeddingBag|_ 를 통해 임베딩을 나타냅니다.
 EmbeddingBag은 임베딩의 풀(pool) 버전입니다.
@@ -108,7 +108,7 @@ EmbeddingBag 그룹을 나타내고자 |EmbeddingBagCollection|_ 을 사용합
 
 
 DistributedModelParallel
-~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 이제 모델을 |DistributedModelParallel|_ (DMP)로 감쌀 준비가 되었습니다.
 DMP의 인스턴스화는 다음과 같습니다.
@@ -128,7 +128,7 @@ TorchRec은 모두 단일 GPU에 배치합니다.
 
 
 입력과 오프셋이 있는 기본 nn.EmbeddingBag 질의
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 ``input`` 과 ``offsets`` 이 있는 |nn.Embedding|_ 과 |nn.EmbeddingBag|_ 를 질의합니다.
 입력은 lookup 값을 포함하는 1-D 텐서입니다.
@@ -153,7 +153,7 @@ TorchRec은 모두 단일 GPU에 배치합니다.
 
 
 KeyedJaggedTensor로 미니 배치 표현하기
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 예제 및 기능별로 객체 ID가 임의의 수인 다양한 예제를 효율적으로 나타내야 합니다.
 다양한 표현이 가능하도록, TorchRec 데이터구조 |KeyedJaggedTensor|_ (KJT)를 사용합니다.
@@ -191,7 +191,7 @@ KJT 배치 크기는 ``batch_size = len(lengths)//len(keys)`` 인 것을 눈여
 
 
 총정리하여, KJT 미니배치를 사용하여 분산 모델 질의하기
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 마지막으로 제품과 사용자의 미니배치를 사용하여 모델을 질의합니다.
 
@@ -206,7 +206,7 @@ KJT 배치 크기는 ``batch_size = len(lengths)//len(keys)`` 인 것을 눈여
 
 
 추가 자료
----------
+------------
 
 자세한 내용은
 `dlrm <https://github.com/pytorch/torchrec/tree/main/examples/dlrm>`__
diff --git a/intermediate_source/torchserve_with_ipex.rst b/intermediate_source/torchserve_with_ipex.rst
index caef69267..2f7c31471 100644
--- a/intermediate_source/torchserve_with_ipex.rst
+++ b/intermediate_source/torchserve_with_ipex.rst
@@ -5,9 +5,9 @@ A case study on the TorchServe inference framework optimized with `Intel® Exten
 
 Authors: Min Jean Cho, Mark Saroufim
 
-Reviewers: Ashok Emani, Jiong Gong 
+Reviewers: Ashok Emani, Jiong Gong
 
-Getting a strong out-of-box performance for deep learning on CPUs can be tricky but it’s much easier if you’re aware of the main problems that affect performance, how to measure them and how to solve them. 
+Getting a strong out-of-box performance for deep learning on CPUs can be tricky but it’s much easier if you’re aware of the main problems that affect performance, how to measure them and how to solve them.
 
 TL;DR
 
@@ -24,34 +24,34 @@ TL;DR
 |                                   | - Thread migration                                                                                                                                                                             |                                                                                                 |
 +-----------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------+
 
-*GEMM (General Matrix Multiply)* run on fused-multiply-add (FMA) or dot-product (DP) execution units which will be bottlenecked and cause delays in thread waiting/*spinning at synchronization* barrier when *hyperthreading* is enabled - because using logical cores causes insufficient concurrency for all working threads as each logical thread *contends for the same core resources*. Instead, if we use 1 thread per physical core, we avoid this contention. So we generally recommend *avoiding logical cores* by setting CPU *thread affinity* to physical cores via *core pinning*.  
+*GEMM (General Matrix Multiply)* run on fused-multiply-add (FMA) or dot-product (DP) execution units which will be bottlenecked and cause delays in thread waiting/*spinning at synchronization* barrier when *hyperthreading* is enabled - because using logical cores causes insufficient concurrency for all working threads as each logical thread *contends for the same core resources*. Instead, if we use 1 thread per physical core, we avoid this contention. So we generally recommend *avoiding logical cores* by setting CPU *thread affinity* to physical cores via *core pinning*.
 
-Multi-socket systems have *Non-Uniform Memory Access (NUMA)* which is a shared memory architecture that describes the placement of main memory modules with respect to processors. But if a process is not NUMA-aware, slow *remote memory* is frequently accessed when *threads migrate* cross socket via *Intel Ultra Path Interconnect (UPI)* during run time. We address this problem by setting CPU *thread affinity* to a specific socket via *core pinning*.  
+Multi-socket systems have *Non-Uniform Memory Access (NUMA)* which is a shared memory architecture that describes the placement of main memory modules with respect to processors. But if a process is not NUMA-aware, slow *remote memory* is frequently accessed when *threads migrate* cross socket via *Intel Ultra Path Interconnect (UPI)* during run time. We address this problem by setting CPU *thread affinity* to a specific socket via *core pinning*.
 
-Knowing these principles in mind, proper CPU runtime configuration can significantly boost out-of-box performance. 
+Knowing these principles in mind, proper CPU runtime configuration can significantly boost out-of-box performance.
 
-In this blog, we'll walk you through the important runtime configurations you should be aware of from `CPU Performance Tuning Guide <https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html#cpu-specific-optimizations>`_, explain how they work, how to profile them and how to integrate them within a model serving framework like `TorchServe <https://github.com/pytorch/serve>`_ via an easy to use `launch script <https://github.com/intel/intel-extension-for-pytorch/blob/master/docs/tutorials/performance_tuning/launch_script.md>`_ which we’ve `integrated <https://github.com/pytorch/serve/pull/1354>`_ :superscript:`1` natively.
+In this blog, we'll walk you through the important runtime configurations you should be aware of from `CPU Performance Tuning Guide <https://tutorials.pytorch.kr/recipes/recipes/tuning_guide.html#cpu-specific-optimizations>`_, explain how they work, how to profile them and how to integrate them within a model serving framework like `TorchServe <https://github.com/pytorch/serve>`_ via an easy to use `launch script <https://github.com/intel/intel-extension-for-pytorch/blob/master/docs/tutorials/performance_tuning/launch_script.md>`_ which we’ve `integrated <https://github.com/pytorch/serve/pull/1354>`_ :superscript:`1` natively.
 
-We’ll explain all of these ideas :strong:`visually` from :strong:`first principles` with lots of :strong:`profiles` and show you how we applied our learnings to make out of the box CPU performance on TorchServe better. 
+We’ll explain all of these ideas :strong:`visually` from :strong:`first principles` with lots of :strong:`profiles` and show you how we applied our learnings to make out of the box CPU performance on TorchServe better.
 
 1. The feature has to be explicitly enabled by setting *cpu_launcher_enable=true* in *config.properties*.
 
-Avoid logical cores for deep learning 
+Avoid logical cores for deep learning
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Avoiding logical cores for deep learning workloads generally improves performance. To understand this, let us take a step back to GEMM. 
+Avoiding logical cores for deep learning workloads generally improves performance. To understand this, let us take a step back to GEMM.
 
 :strong:`Optimizing GEMM optimizes deep learning`
 
-The majority of time in deep learning training or inference is spent on millions of repeated operations of GEMM which is at the core of fully connected layers. Fully connected layers have been used for decades since multi-layer perceptrons (MLP) `proved to be a universal approximator of any continuous function <https://en.wikipedia.org/wiki/Universal_approximation_theorem>`_. Any MLP can be entirely represented as GEMM. And even a convolution can be represented as a GEMM by using a `Toepliz matrix <https://en.wikipedia.org/wiki/Toeplitz_matrix>`_. 
+The majority of time in deep learning training or inference is spent on millions of repeated operations of GEMM which is at the core of fully connected layers. Fully connected layers have been used for decades since multi-layer perceptrons (MLP) `proved to be a universal approximator of any continuous function <https://en.wikipedia.org/wiki/Universal_approximation_theorem>`_. Any MLP can be entirely represented as GEMM. And even a convolution can be represented as a GEMM by using a `Toepliz matrix <https://en.wikipedia.org/wiki/Toeplitz_matrix>`_.
 
 Returning to the original topic, most GEMM operators benefit from using non-hyperthreading, because the majority of time in deep learning training or inference is spent on millions of repeated operations of GEMM running on fused-multiply-add (FMA) or dot-product (DP) execution units shared by hyperthreading cores. With hyperthreading enabled, OpenMP threads will contend for the same GEMM execution units.
 
 .. figure:: /_static/img/torchserve-ipex-images/1_.png
    :width: 70%
    :align: center
-   
-And if 2 logical threads run GEMM at the same time, they will be sharing the same core resources causing front end bound, such that the overhead from this front end bound is greater than the gain from running both logical threads at the same time. 
+
+And if 2 logical threads run GEMM at the same time, they will be sharing the same core resources causing front end bound, such that the overhead from this front end bound is greater than the gain from running both logical threads at the same time.
 
 Therefore we generally recommend avoiding using logical cores for deep learning workloads to achieve good performance. The launch script by default uses physical cores only; however, users can easily experiment with logical vs. physical cores by simply toggling the ``--use_logical_core`` launch script knob.
 
@@ -64,24 +64,24 @@ We'll use the following example of feeding ResNet50 dummy tensor:
     import torch
     import torchvision.models as models
     import time
- 
+
     model = models.resnet50(pretrained=False)
     model.eval()
     data = torch.rand(1, 3, 224, 224)
- 
+
     # warm up
     for _ in range(100):
         model(data)
- 
+
     start = time.time()
     for _ in range(100):
         model(data)
     end = time.time()
     print('Inference took {:.2f} ms in average'.format((end-start)/100*1000))
 
-Throughout the blog, we'll use `Intel® VTune™ Profiler <https://www.intel.com/content/www/us/en/developer/tools/oneapi/vtune-profiler.html#gs.v4egjg>`_ to profile and verify optimizations. And we'll run all exercises on a machine with two Intel(R) Xeon(R) Platinum 8180M CPUs. The CPU information is shown in Figure 2.1. 
+Throughout the blog, we'll use `Intel® VTune™ Profiler <https://www.intel.com/content/www/us/en/developer/tools/oneapi/vtune-profiler.html#gs.v4egjg>`_ to profile and verify optimizations. And we'll run all exercises on a machine with two Intel(R) Xeon(R) Platinum 8180M CPUs. The CPU information is shown in Figure 2.1.
 
-Environment variable ``OMP_NUM_THREADS`` is used to set the number of threads for parallel region. We'll compare ``OMP_NUM_THREADS=2`` with (1) use of logical cores and (2) use of physical cores only. 
+Environment variable ``OMP_NUM_THREADS`` is used to set the number of threads for parallel region. We'll compare ``OMP_NUM_THREADS=2`` with (1) use of logical cores and (2) use of physical cores only.
 
 (1) Both OpenMP threads trying to utilize the same GEMM execution units shared by hyperthreading cores (0, 56)
 
@@ -96,107 +96,107 @@ We can visualize this by running ``htop`` command on Linux as shown below.
    :width: 100%
    :align: center
 
-We notice that the Spin Time is flagged, and Imbalance or Serial Spinning contributed to the majority of it - 4.980 seconds out of the 8.982 seconds total. The Imbalance or Serial Spinning when using logical cores is due to insufficient concurrency of working threads as each logical thread contends for the same core resources. 
+We notice that the Spin Time is flagged, and Imbalance or Serial Spinning contributed to the majority of it - 4.980 seconds out of the 8.982 seconds total. The Imbalance or Serial Spinning when using logical cores is due to insufficient concurrency of working threads as each logical thread contends for the same core resources.
 
-The Top Hotspots section of the execution summary indicates that ``__kmp_fork_barrier`` took 4.589 seconds of CPU time - during 9.33% of the CPU execution time, threads were just spinning at this barrier due to thread synchronization.  
+The Top Hotspots section of the execution summary indicates that ``__kmp_fork_barrier`` took 4.589 seconds of CPU time - during 9.33% of the CPU execution time, threads were just spinning at this barrier due to thread synchronization.
 
-(2) Each OpenMP thread utilizing GEMM execution units in respective physical cores (0,1) 
+(2) Each OpenMP thread utilizing GEMM execution units in respective physical cores (0,1)
 
 
 .. figure:: /_static/img/torchserve-ipex-images/4.png
    :width: 80%
    :align: center
- 
+
 
 .. figure:: /_static/img/torchserve-ipex-images/5.png
    :width: 80%
    :align: center
-   
-We first note that the execution time dropped from 32 seconds to 23 seconds by avoiding logical cores. While there's still some non-negligible Imbalance or Serial Spinning, we note relative improvement from 4.980 seconds to 3.887 seconds. 
 
-By not using logical threads (instead, using 1 thread per physical core), we avoid logical threads contending for the same core resources. The Top Hotspots section also indicates relative improvement of ``__kmp_fork_barrier`` time from 4.589 seconds to 3.530 seconds. 
+We first note that the execution time dropped from 32 seconds to 23 seconds by avoiding logical cores. While there's still some non-negligible Imbalance or Serial Spinning, we note relative improvement from 4.980 seconds to 3.887 seconds.
 
-Local memory access is always faster than remote memory access 
+By not using logical threads (instead, using 1 thread per physical core), we avoid logical threads contending for the same core resources. The Top Hotspots section also indicates relative improvement of ``__kmp_fork_barrier`` time from 4.589 seconds to 3.530 seconds.
+
+Local memory access is always faster than remote memory access
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-We generally recommend binding a process to a local socket such that the process does not migrate across sockets. Generally the goal of doing so is to utilize high speed cache on local memory and to avoid remote memory access which can be ~2x slower. 
+We generally recommend binding a process to a local socket such that the process does not migrate across sockets. Generally the goal of doing so is to utilize high speed cache on local memory and to avoid remote memory access which can be ~2x slower.
 
 
 .. figure:: /_static/img/torchserve-ipex-images/6.png
    :width: 80%
    :align: center
-Figure 1. Two-socket configuration 
+Figure 1. Two-socket configuration
 
-Figure 1. shows a typical two-socket configuration. Notice that each socket has its own local memory. Sockets are connected to each other via Intel Ultra Path Interconnect (UPI) which allows each socket to access the local memory of another socket called remote memory. Local memory access is always faster than remote memory access. 
+Figure 1. shows a typical two-socket configuration. Notice that each socket has its own local memory. Sockets are connected to each other via Intel Ultra Path Interconnect (UPI) which allows each socket to access the local memory of another socket called remote memory. Local memory access is always faster than remote memory access.
 
 .. figure:: /_static/img/torchserve-ipex-images/7.png
    :width: 50%
    :align: center
-Figure 2.1. CPU information 
+Figure 2.1. CPU information
 
-Users can get their CPU information by running ``lscpu`` command on their Linux machine. Figure 2.1. shows an example of ``lscpu``  execution on a machine with two Intel(R) Xeon(R) Platinum 8180M CPUs. Notice that there are 28 cores per socket, and 2 threads per core (i.e., hyperthreading is enabled). In other words, there are 28 logical cores in addition to 28 physical cores, giving a total of 56 cores per socket. And there are 2 sockets, giving a total of 112 cores (``Thread(s) per core`` x ``Core(s) per socket`` x ``Socket(s)``). 
+Users can get their CPU information by running ``lscpu`` command on their Linux machine. Figure 2.1. shows an example of ``lscpu``  execution on a machine with two Intel(R) Xeon(R) Platinum 8180M CPUs. Notice that there are 28 cores per socket, and 2 threads per core (i.e., hyperthreading is enabled). In other words, there are 28 logical cores in addition to 28 physical cores, giving a total of 56 cores per socket. And there are 2 sockets, giving a total of 112 cores (``Thread(s) per core`` x ``Core(s) per socket`` x ``Socket(s)``).
 
 .. figure:: /_static/img/torchserve-ipex-images/8.png
    :width: 100%
    :align: center
-Figure 2.2. CPU information 
+Figure 2.2. CPU information
 
-The 2 sockets are mapped to 2 NUMA nodes (NUMA node 0, NUMA node 1) respectively.  Physical cores are indexed prior to logical cores. As shown in Figure 2.2., the first 28 physical cores (0-27) and the first 28 logical cores (56-83) on the first socket are on NUMA node 0. And the second 28 physical cores (28-55) and the second 28 logical cores (84-111) on the second socket are on NUMA node 1. Cores on the same socket share local memory and last level cache (LLC) which is much faster than cross-socket communication via Intel UPI. 
+The 2 sockets are mapped to 2 NUMA nodes (NUMA node 0, NUMA node 1) respectively.  Physical cores are indexed prior to logical cores. As shown in Figure 2.2., the first 28 physical cores (0-27) and the first 28 logical cores (56-83) on the first socket are on NUMA node 0. And the second 28 physical cores (28-55) and the second 28 logical cores (84-111) on the second socket are on NUMA node 1. Cores on the same socket share local memory and last level cache (LLC) which is much faster than cross-socket communication via Intel UPI.
 
-Now that we understand NUMA, cross-socket (UPI) traffic, local vs. remote memory access in multi-processor systems, let's profile and verify our understanding. 
+Now that we understand NUMA, cross-socket (UPI) traffic, local vs. remote memory access in multi-processor systems, let's profile and verify our understanding.
 
 :strong:`Exercise`
 
-We'll reuse the ResNet50 example above. 
+We'll reuse the ResNet50 example above.
 
-As we did not pin threads to processor cores of a specific socket, the operating system periodically schedules threads on processor cores located in different sockets. 
+As we did not pin threads to processor cores of a specific socket, the operating system periodically schedules threads on processor cores located in different sockets.
 
-.. figure:: /_static/img/torchserve-ipex-images/9.gif 
+.. figure:: /_static/img/torchserve-ipex-images/9.gif
    :width: 100%
    :align: center
 
-Figure 3. CPU usage of non NUMA-aware application. 1 main worker thread was launched, then it launched a physical core number (56) of threads on all cores, including logical cores. 
+Figure 3. CPU usage of non NUMA-aware application. 1 main worker thread was launched, then it launched a physical core number (56) of threads on all cores, including logical cores.
 
 (Aside: If the number of threads is not set by `torch.set_num_threads <https://pytorch.org/docs/stable/generated/torch.set_num_threads.html>`_, the default number of threads is the number of physical cores in a hyperthreading enabled system. This can be verified by `torch.get_num_threads <https://pytorch.org/docs/stable/generated/torch.get_num_threads.html>`_. Hence we see above about half of the cores busy running the example script.)
 
 .. figure:: /_static/img/torchserve-ipex-images/10.png
    :width: 100%
    :align: center
-Figure 4. Non-Uniform Memory Access Analysis graph 
+Figure 4. Non-Uniform Memory Access Analysis graph
 
 
-Figure 4. compares local vs. remote memory access over time. We verify usage of remote memory which could result in sub-optimal performance. 
+Figure 4. compares local vs. remote memory access over time. We verify usage of remote memory which could result in sub-optimal performance.
 
 :strong:`Set thread affinity to reduce remote memory access and cross-socket (UPI) traffic`
 
-Pinning threads to cores on the same socket helps maintain locality of memory access. In this example, we'll pin to the physical cores on the first NUMA node (0-27). With the launch script, users can easily experiment with NUMA nodes configuration by simply toggling the ``--node_id`` launch script knob. 
+Pinning threads to cores on the same socket helps maintain locality of memory access. In this example, we'll pin to the physical cores on the first NUMA node (0-27). With the launch script, users can easily experiment with NUMA nodes configuration by simply toggling the ``--node_id`` launch script knob.
 
 Let's visualize the CPU usage now.
 
-.. figure:: /_static/img/torchserve-ipex-images/11.gif 
+.. figure:: /_static/img/torchserve-ipex-images/11.gif
    :width: 100%
    :align: center
-Figure 5. CPU usage of NUMA-aware application 
+Figure 5. CPU usage of NUMA-aware application
 
-1 main worker thread was launched, then it launched threads on all physical cores on the first numa node. 
+1 main worker thread was launched, then it launched threads on all physical cores on the first numa node.
 
 .. figure:: /_static/img/torchserve-ipex-images/12.png
    :width: 100%
    :align: center
-Figure 6. Non-Uniform Memory Access Analysis graph 
+Figure 6. Non-Uniform Memory Access Analysis graph
 
-As shown in Figure 6., now almost all memory accesses are local accesses. 
+As shown in Figure 6., now almost all memory accesses are local accesses.
 
-Efficient CPU usage with core pinning for multi-worker inference 
+Efficient CPU usage with core pinning for multi-worker inference
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-When running multi-worker inference, cores are overlapped (or shared) between workers causing inefficient CPU usage. To address this problem, the launch script equally divides the number of available cores by the number of workers such that each worker is pinned to assigned cores during runtime. 
+When running multi-worker inference, cores are overlapped (or shared) between workers causing inefficient CPU usage. To address this problem, the launch script equally divides the number of available cores by the number of workers such that each worker is pinned to assigned cores during runtime.
 
 :strong:`Exercise with TorchServe`
 
-For this exercise, let's apply the CPU performance tuning principles and recommendations that we have discussed so far to `TorchServe apache-bench benchmarking <https://github.com/pytorch/serve/tree/master/benchmarks#benchmarking-with-apache-bench>`_. 
+For this exercise, let's apply the CPU performance tuning principles and recommendations that we have discussed so far to `TorchServe apache-bench benchmarking <https://github.com/pytorch/serve/tree/master/benchmarks#benchmarking-with-apache-bench>`_.
 
-We'll use ResNet50 with 4 workers, concurrency 100, requests 10,000. All other parameters (e.g., batch_size, input, etc) are the same as the `default parameters <https://github.com/pytorch/serve/blob/master/benchmarks/benchmark-ab.py#L18>`_. 
+We'll use ResNet50 with 4 workers, concurrency 100, requests 10,000. All other parameters (e.g., batch_size, input, etc) are the same as the `default parameters <https://github.com/pytorch/serve/blob/master/benchmarks/benchmark-ab.py#L18>`_.
 
 We'll compare the following three configurations:
 
@@ -204,16 +204,16 @@ We'll compare the following three configurations:
 
 (2) `torch.set_num_threads <https://pytorch.org/docs/stable/generated/torch.set_num_threads.html>`_ = ``number of physical cores / number of workers`` (no core pinning)
 
-(3) core pinning via the launch script 
+(3) core pinning via the launch script (Required Torchserve>=0.6.1)
 
-After this exercise, we'll have verified that we prefer avoiding logical cores and prefer local memory access via core pinning with a real TorchServe use case. 
+After this exercise, we'll have verified that we prefer avoiding logical cores and prefer local memory access via core pinning with a real TorchServe use case.
 
-1. Default TorchServe setting (no core pinning) 
+1. Default TorchServe setting (no core pinning)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-The `base_handler <https://github.com/pytorch/serve/blob/master/ts/torch_handler/base_handler.py>`_ doesn't explicitly set `torch.set_num_threads <https://pytorch.org/docs/stable/generated/torch.set_num_threads.html>`_. Hence the default number of threads is the number of physical CPU cores as described `here <https://pytorch.org/docs/stable/notes/cpu_threading_torchscript_inference.html#runtime-api>`_. Users can check the number of threads by `torch.get_num_threads <https://pytorch.org/docs/stable/generated/torch.get_num_threads.html>`_ in the base_handler. Each of the 4 main worker threads launches a physical core number (56) of threads, launching a total of 56x4 = 224 threads, which is more than the total number of cores 112.  Therefore cores are guaranteed to be heavily overlapped with high logical core utilization- multiple workers using multiple cores at the same time. Furthermore, because threads are not affinitized to specific CPU cores, the operating system periodically schedules threads to cores located in different sockets. 
+The `base_handler <https://github.com/pytorch/serve/blob/master/ts/torch_handler/base_handler.py>`_ doesn't explicitly set `torch.set_num_threads <https://pytorch.org/docs/stable/generated/torch.set_num_threads.html>`_. Hence the default number of threads is the number of physical CPU cores as described `here <https://pytorch.org/docs/stable/notes/cpu_threading_torchscript_inference.html#runtime-api>`_. Users can check the number of threads by `torch.get_num_threads <https://pytorch.org/docs/stable/generated/torch.get_num_threads.html>`_ in the base_handler. Each of the 4 main worker threads launches a physical core number (56) of threads, launching a total of 56x4 = 224 threads, which is more than the total number of cores 112.  Therefore cores are guaranteed to be heavily overlapped with high logical core utilization- multiple workers using multiple cores at the same time. Furthermore, because threads are not affinitized to specific CPU cores, the operating system periodically schedules threads to cores located in different sockets.
 
-1. CPU usage 
+1. CPU usage
 
 .. figure:: /_static/img/torchserve-ipex-images/13.png
    :width: 100%
@@ -233,28 +233,28 @@ We observe a very high Core Bound stall of 88.4%, decreasing pipeline efficiency
 .. figure:: /_static/img/torchserve-ipex-images/15.png
    :width: 40%
    :align: center
-   
+
 .. figure:: /_static/img/torchserve-ipex-images/16.png
    :width: 50%
    :align: center
-   
-An empty pipeline slot not filled with micro-ops (uOps) is attributed to a stall. For example, without core pinning CPU usage may not effectively be on compute but on other operations like thread scheduling from Linux kernel. We see above that ``__sched_yield`` contributed to the majority of the Spin Time.  
+
+An empty pipeline slot not filled with micro-ops (uOps) is attributed to a stall. For example, without core pinning CPU usage may not effectively be on compute but on other operations like thread scheduling from Linux kernel. We see above that ``__sched_yield`` contributed to the majority of the Spin Time.
 
 3. Thread Migration
 
-Without core pinning, scheduler may migrate thread executing on a core to a different core. Thread migration can disassociate the thread from data that has already been fetched into the caches resulting in longer data access latencies. This problem is exacerbated in NUMA systems when thread migrates across sockets. Data that has been fetched to high speed cache on local memory now becomes remote memory, which is much slower.  
+Without core pinning, scheduler may migrate thread executing on a core to a different core. Thread migration can disassociate the thread from data that has already been fetched into the caches resulting in longer data access latencies. This problem is exacerbated in NUMA systems when thread migrates across sockets. Data that has been fetched to high speed cache on local memory now becomes remote memory, which is much slower.
 
 .. figure:: /_static/img/torchserve-ipex-images/17.png
    :width: 50%
    :align: center
 
-Generally the total number of threads should be less than or equal to the total number of threads supported by the core. In the above example, we notice a large number of threads executing on core_51 instead of the expected 2 threads (since hyperthreading is enabled in Intel(R) Xeon(R) Platinum 8180 CPUs) . This indicates thread migration. 
+Generally the total number of threads should be less than or equal to the total number of threads supported by the core. In the above example, we notice a large number of threads executing on core_51 instead of the expected 2 threads (since hyperthreading is enabled in Intel(R) Xeon(R) Platinum 8180 CPUs) . This indicates thread migration.
 
 .. figure:: /_static/img/torchserve-ipex-images/18.png
    :width: 80%
    :align: center
 
-Additionally, notice that thread (TID:97097) was executing on a large number of CPU cores, indicating CPU migration. For example, this thread was executing on cpu_81, then migrated to cpu_14, then migrated to cpu_5, and so on. Furthermore, note that this thread migrated cross socket back and forth many times, resulting in very inefficient memory access. For example, this thread executed on cpu_70 (NUMA node 0), then migrated to cpu_100 (NUMA node 1), then migrated to cpu_24 (NUMA node 0). 
+Additionally, notice that thread (TID:97097) was executing on a large number of CPU cores, indicating CPU migration. For example, this thread was executing on cpu_81, then migrated to cpu_14, then migrated to cpu_5, and so on. Furthermore, note that this thread migrated cross socket back and forth many times, resulting in very inefficient memory access. For example, this thread executed on cpu_70 (NUMA node 0), then migrated to cpu_100 (NUMA node 1), then migrated to cpu_24 (NUMA node 0).
 
 4. Non Uniform Memory Access Analysis
 
@@ -262,9 +262,9 @@ Additionally, notice that thread (TID:97097) was executing on a large number of
    :width: 100%
    :align: center
 
-Compare local vs. remote memory access over time. We observe that about half, 51.09%, of the memory accesses were remote accesses, indicating sub-optimal NUMA configuration. 
+Compare local vs. remote memory access over time. We observe that about half, 51.09%, of the memory accesses were remote accesses, indicating sub-optimal NUMA configuration.
 
-2. torch.set_num_threads = ``number of physical cores / number of workers`` (no core pinning) 
+2. torch.set_num_threads = ``number of physical cores / number of workers`` (no core pinning)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 For an apple-to-apple comparison with launcher's core pinning, we'll set the number of threads to the number of cores divided by the number of workers (launcher does this internally). Add the following code snippet in the `base_handler <https://github.com/pytorch/serve/blob/master/ts/torch_handler/base_handler.py>`_:
@@ -273,22 +273,22 @@ For an apple-to-apple comparison with launcher's core pinning, we'll set the num
 
     torch.set_num_threads(num_physical_cores/num_workers)
 
-As before without core pinning, these threads are not affinitized to specific CPU cores, causing the operating system to periodically schedule threads on cores located in different sockets. 
+As before without core pinning, these threads are not affinitized to specific CPU cores, causing the operating system to periodically schedule threads on cores located in different sockets.
 
 1. CPU usage
 
-.. figure:: /_static/img/torchserve-ipex-images/20.gif 
+.. figure:: /_static/img/torchserve-ipex-images/20.gif
    :width: 100%
    :align: center
-   
-4 main worker threads were launched, then each launched a ``num_physical_cores/num_workers`` number (14) of threads on all cores, including logical cores.  
+
+4 main worker threads were launched, then each launched a ``num_physical_cores/num_workers`` number (14) of threads on all cores, including logical cores.
 
 2. Core Bound stalls
 
 .. figure:: /_static/img/torchserve-ipex-images/21.png
    :width: 80%
    :align: center
-   
+
 Although the percentage of Core Bound stalls has decreased from 88.4% to 73.5%, the Core Bound is still very high.
 
 .. figure:: /_static/img/torchserve-ipex-images/22.png
@@ -304,8 +304,8 @@ Although the percentage of Core Bound stalls has decreased from 88.4% to 73.5%,
 .. figure:: /_static/img/torchserve-ipex-images/24.png
    :width: 75%
    :align: center
-   
-Similar as before, without core pinning thread (TID:94290) was executing on a large number of CPU cores, indicating CPU migration. We notice again cross-socket thread migration, resulting in very inefficient memory access. For example, this thread executed on cpu_78 (NUMA node 0), then migrated to cpu_108 (NUMA node 1). 
+
+Similar as before, without core pinning thread (TID:94290) was executing on a large number of CPU cores, indicating CPU migration. We notice again cross-socket thread migration, resulting in very inefficient memory access. For example, this thread executed on cpu_78 (NUMA node 0), then migrated to cpu_108 (NUMA node 1).
 
 4. Non Uniform Memory Access Analysis
 
@@ -313,18 +313,18 @@ Similar as before, without core pinning thread (TID:94290) was executing on a la
    :width: 100%
    :align: center
 
-Although an improvement from the original 51.09%, still 40.45% of memory access is remote, indicating sub-optimal NUMA configuration. 
+Although an improvement from the original 51.09%, still 40.45% of memory access is remote, indicating sub-optimal NUMA configuration.
 
 3. launcher core pinning
 ~~~~~~~~~~~~~~~~~~~~~~~~
-Launcher will internally equally distribute physical cores to workers, and bind them to each worker. As a reminder, launcher by default uses physical cores only. In this example, launcher will bind worker 0 to cores 0-13 (NUMA node 0), worker 1 to cores 14-27 (NUMA node 0), worker 2 to cores 28-41 (NUMA node 1), and worker 3 to cores 42-55 (NUMA node 1). Doing so ensures that cores are not overlapped among workers and avoids logical core usage. 
+Launcher will internally equally distribute physical cores to workers, and bind them to each worker. As a reminder, launcher by default uses physical cores only. In this example, launcher will bind worker 0 to cores 0-13 (NUMA node 0), worker 1 to cores 14-27 (NUMA node 0), worker 2 to cores 28-41 (NUMA node 1), and worker 3 to cores 42-55 (NUMA node 1). Doing so ensures that cores are not overlapped among workers and avoids logical core usage.
 
 1. CPU usage
 
-.. figure:: /_static/img/torchserve-ipex-images/26.gif 
+.. figure:: /_static/img/torchserve-ipex-images/26.gif
    :width: 100%
    :align: center
-   
+
 4 main worker threads were launched, then each launched a ``num_physical_cores/num_workers`` number (14) of threads affinitized to the assigned physical cores.
 
 2. Core Bound stalls
@@ -332,52 +332,52 @@ Launcher will internally equally distribute physical cores to workers, and bind
 .. figure:: /_static/img/torchserve-ipex-images/27.png
    :width: 80%
    :align: center
-   
-Core Bound stalls has decreased significantly from the original 88.4% to 46.2% - almost a 2x improvement. 
+
+Core Bound stalls has decreased significantly from the original 88.4% to 46.2% - almost a 2x improvement.
 
 .. figure:: /_static/img/torchserve-ipex-images/28.png
    :width: 40%
    :align: center
-   
+
 .. figure:: /_static/img/torchserve-ipex-images/29.png
    :width: 50%
    :align: center
 
-We verify that with core binding, most CPU time is effectively used on compute - Spin Time of 0.256s.  
+We verify that with core binding, most CPU time is effectively used on compute - Spin Time of 0.256s.
 
 3. Thread Migration
 
 .. figure:: /_static/img/torchserve-ipex-images/30.png
    :width: 100%
    :align: center
-   
-We verify that `OMP Primary Thread #0` was bound to assigned physical cores (42-55), and did not migrate cross-socket. 
+
+We verify that `OMP Primary Thread #0` was bound to assigned physical cores (42-55), and did not migrate cross-socket.
 
 4. Non Uniform Memory Access Analysis
 
 .. figure:: /_static/img/torchserve-ipex-images/31.png
    :width: 100%
    :align: center
-   
-Now almost all, 89.52%, memory accesses are local accesses. 
+
+Now almost all, 89.52%, memory accesses are local accesses.
 
 Conclusion
 ~~~~~~~~~~
 
-In this blog, we've showcased that properly setting your CPU runtime configuration can significantly boost out-of-box CPU performance. 
+In this blog, we've showcased that properly setting your CPU runtime configuration can significantly boost out-of-box CPU performance.
 
 We have walked through some general CPU performance tuning principles and recommendations:
 
 - In a hyperthreading enabled system, avoid logical cores by setting thread affinity to physical cores only via core pinning.
-- In a multi-socket system with NUMA, avoid cross-socket remote memory access by setting thread affinity to a specific socket via core pinning. 
+- In a multi-socket system with NUMA, avoid cross-socket remote memory access by setting thread affinity to a specific socket via core pinning.
 
-We have visually explained these ideas from first principles and have verified the performance boost with profiling. And finally, we have applied all of our learnings to TorchServe to boost out-of-box TorchServe CPU performance.  
+We have visually explained these ideas from first principles and have verified the performance boost with profiling. And finally, we have applied all of our learnings to TorchServe to boost out-of-box TorchServe CPU performance.
 
-These principles can be automatically configured via an easy to use launch script which has already been integrated into TorchServe. 
+These principles can be automatically configured via an easy to use launch script which has already been integrated into TorchServe.
 
 For interested readers, please check out the following documents:
 
-- `CPU specific optimizations <https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html#cpu-specific-optimizations>`_
+- `CPU specific optimizations <https://tutorials.pytorch.kr/recipes/recipes/tuning_guide.html#cpu-specific-optimizations>`_
 - `Maximize Performance of Intel® Software Optimization for PyTorch* on CPU <https://www.intel.com/content/www/us/en/developer/articles/technical/how-to-get-better-performance-on-pytorchcaffe2-with-intel-acceleration.html>`_
 - `Performance Tuning Guide <https://intel.github.io/intel-extension-for-pytorch/tutorials/performance_tuning/tuning_guide.html>`_
 - `Launch Script Usage Guide <https://intel.github.io/intel-extension-for-pytorch/tutorials/performance_tuning/launch_script.html>`_
@@ -388,7 +388,7 @@ For interested readers, please check out the following documents:
 
 And stay tuned for a follow-up posts on optimized kernels on CPU via `Intel® Extension for PyTorch* <https://github.com/intel/intel-extension-for-pytorch>`_ and advanced launcher configurations such as memory allocator.
 
-Acknowledgement 
+Acknowledgement
 ~~~~~~~~~~~~~~~
 
-We would like to thank Ashok Emani (Intel) and Jiong Gong (Intel) for their immense guidance and support, and thorough feedback and reviews throughout many steps of this blog. We would also like to thank Hamid Shojanazeri (Meta), Li Ning (AWS) and Jing Xu (Intel) for helpful feedback in code review. And Suraj Subramanian (Meta) and Geeta Chauhan (Meta) for helpful feedback on the blog. 
+We would like to thank Ashok Emani (Intel) and Jiong Gong (Intel) for their immense guidance and support, and thorough feedback and reviews throughout many steps of this blog. We would also like to thank Hamid Shojanazeri (Meta), Li Ning (AWS) and Jing Xu (Intel) for helpful feedback in code review. And Suraj Subramanian (Meta) and Geeta Chauhan (Meta) for helpful feedback on the blog.
diff --git a/intermediate_source/torchserve_with_ipex_2.rst b/intermediate_source/torchserve_with_ipex_2.rst
index 6ace1e6a3..75ae13be5 100644
--- a/intermediate_source/torchserve_with_ipex_2.rst
+++ b/intermediate_source/torchserve_with_ipex_2.rst
@@ -3,12 +3,12 @@ Grokking PyTorch Intel CPU performance from first principles (Part 2)
 
 Authors: `Min Jean Cho <https://github.com/min-jean-cho>`_, `Jing Xu <https://github.com/jingxu10>`_, `Mark Saroufim <https://github.com/msaroufim>`_
 
-In the `Grokking PyTorch Intel CPU Performance From First Principles <https://pytorch.org/tutorials/intermediate/torchserve_with_ipex.html>`_ tutorial
+In the `Grokking PyTorch Intel CPU Performance From First Principles <https://tutorials.pytorch.kr/intermediate/torchserve_with_ipex.html>`_ tutorial
 , we have introduced how to tune CPU runtime configurations, how to profile them, and how to integrate them into `TorchServe <https://github.com/pytorch/serve>`_ for optimized CPU performance.
 
 In this tutorial, we will demonstrate boosting performance with memory allocator via the `Intel® Extension for PyTorch* Launcher <https://github.com/intel/intel-extension-for-pytorch/blob/master/docs/tutorials/performance_tuning/launch_script.md>`_
 , and optimized kernels on CPU via `Intel® Extension for PyTorch* <https://github.com/intel/intel-extension-for-pytorch>`_
-, and apply them to TorchServe showcasing 7.71x throughput speedup for ResNet50 and 2.20x throughput speedup for BERT. 
+, and apply them to TorchServe showcasing 7.71x throughput speedup for ResNet50 and 2.20x throughput speedup for BERT.
 
 .. figure:: /_static/img/torchserve-ipex-images-2/1.png
    :width: 100%
@@ -16,41 +16,41 @@ In this tutorial, we will demonstrate boosting performance with memory allocator
 
 Prerequisites
 -------------
-Throughout this tutorial, we will use `Top-down Microarchitecture Analysis (TMA) <https://www.intel.com/content/www/us/en/develop/documentation/vtune-cookbook/top/methodologies/top-down-microarchitecture-analysis-method.html>`_ to profile and show that the Back End Bound (Memory Bound, Core Bound) is often the primary bottleneck for under-optimized or under-tuned deep learning workloads, and demonstrate optimization techniques via Intel® Extension for PyTorch* for improving Back End Bound. We will use  `toplev <https://github.com/andikleen/pmu-tools/wiki/toplev-manual>`_, a tool part of `pmu-tools <https://github.com/andikleen/pmu-tools>`_ built on top of `Linux perf <https://man7.org/linux/man-pages/man1/perf.1.html>`_, for TMA. 
+Throughout this tutorial, we will use `Top-down Microarchitecture Analysis (TMA) <https://www.intel.com/content/www/us/en/develop/documentation/vtune-cookbook/top/methodologies/top-down-microarchitecture-analysis-method.html>`_ to profile and show that the Back End Bound (Memory Bound, Core Bound) is often the primary bottleneck for under-optimized or under-tuned deep learning workloads, and demonstrate optimization techniques via Intel® Extension for PyTorch* for improving Back End Bound. We will use  `toplev <https://github.com/andikleen/pmu-tools/wiki/toplev-manual>`_, a tool part of `pmu-tools <https://github.com/andikleen/pmu-tools>`_ built on top of `Linux perf <https://man7.org/linux/man-pages/man1/perf.1.html>`_, for TMA.
 
 We will also use `Intel® VTune™ Profiler's Instrumentation and Tracing Technology (ITT) <https://github.com/pytorch/pytorch/issues/41001>`__ to profile at finer granularity.
 
 Top-down Microarchitecture Analysis Method (TMA)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-When tuning CPU for optimal performance, it's useful to know where the bottleneck is. Most CPU cores have on-chip Performance Monitoring Units (PMUs). PMUs are dedicated pieces of logic within a CPU core that count specific hardware events as they occur on the system. Examples of these events may be Cache Misses or Branch Mispredictions. PMUs are used for Top-down Microarchitecture Analysis (TMA) to identify the bottlenecks. TMA consists of hierarchical levels as shown: 
+When tuning CPU for optimal performance, it's useful to know where the bottleneck is. Most CPU cores have on-chip Performance Monitoring Units (PMUs). PMUs are dedicated pieces of logic within a CPU core that count specific hardware events as they occur on the system. Examples of these events may be Cache Misses or Branch Mispredictions. PMUs are used for Top-down Microarchitecture Analysis (TMA) to identify the bottlenecks. TMA consists of hierarchical levels as shown:
 
 .. figure:: /_static/img/torchserve-ipex-images-2/2.png
    :width: 100%
    :align: center
-   
+
 The top level, level-1, metrics collect *Retiring*, *Bad Speculation*, *Front End Bound*, *Back End Bound*. The pipeline of CPU can conceptually be simplified and divided into two: the frontend and the backend. The *frontend* is responsible for fetching the program code and decoding them into low-level hardware operations called micro-ops (uOps). The uOps are then fed to the *backend* in a process called allocation. Once allocated, the backend is responsible for executing the uOp in an available execution unit. A completion of uOp's execution is called *retirement*. In contrast, a *bad speculation* is when speculatively fetched uOps are canceled before retiring such as in the case of mispredicted branches. Each of these metrics can further be broken down in the subsequent levels to pinpoint the bottleneck.
 
 Tune for the Back End Bound
 +++++++++++++++++++++++++++
-The majority of untuned deep learning workloads will be Back End Bound. Resolving Back End bound is often resolving sources of latency causing retirement to take longer than necessary. As shown above, Back End Bound has two sub-metrics – Core Bound and Memory Bound. 
+The majority of untuned deep learning workloads will be Back End Bound. Resolving Back End bound is often resolving sources of latency causing retirement to take longer than necessary. As shown above, Back End Bound has two sub-metrics – Core Bound and Memory Bound.
 
-Memory Bound stalls have causes related to the memory subsystem. For example, last-level cache (LLC or L3 cache) miss causing access to DRAM. Scaling deep learning models often requires significant compute. And high compute utilization requires that data is available when the execution units need it to execute the uOps. This requires prefetching the data and reusing the data in cache instead of fetching that same data multiple times from main memory which causes execution units to be starved while data is being returned. Throughout this tutorial, we wll show that a more efficient memory allocator, operator fusion, memory layout format optimization reduce overhead on Memory Bound with better cache locality. 
+Memory Bound stalls have causes related to the memory subsystem. For example, last-level cache (LLC or L3 cache) miss causing access to DRAM. Scaling deep learning models often requires significant compute. And high compute utilization requires that data is available when the execution units need it to execute the uOps. This requires prefetching the data and reusing the data in cache instead of fetching that same data multiple times from main memory which causes execution units to be starved while data is being returned. Throughout this tutorial, we wll show that a more efficient memory allocator, operator fusion, memory layout format optimization reduce overhead on Memory Bound with better cache locality.
 
-Core Bound stalls indicate sub-optimal use of available execution units while there are no uncompleted memory accesses. For example, several general matrix-matrix multiplication (GEMM) instructions in a row competing for fused-multiply-add (FMA) or dot-product (DP) execution units could cause Core Bound stalls. Key deep learning kernels, including the DP kernels, have been well optimized by `oneDNN library <https://github.com/oneapi-src/oneDNN>`_ (oneAPI Deep Neural Network Library), reducing overhead on Core Bound. 
+Core Bound stalls indicate sub-optimal use of available execution units while there are no uncompleted memory accesses. For example, several general matrix-matrix multiplication (GEMM) instructions in a row competing for fused-multiply-add (FMA) or dot-product (DP) execution units could cause Core Bound stalls. Key deep learning kernels, including the DP kernels, have been well optimized by `oneDNN library <https://github.com/oneapi-src/oneDNN>`_ (oneAPI Deep Neural Network Library), reducing overhead on Core Bound.
 
 Operations like GEMM, convolution, deconvolution are compute-intensive. While operations like pooling, batch normalization, activation functions like ReLU are memory-bound.
 
 Intel® VTune™ Profiler's Instrumentation and Tracing Technology (ITT)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The ITT APIs of Intel® VTune Profiler is a useful tool to annotate a region of your workload for tracing to profile and visualize at a finer granularity of your annotation – OP/function/sub-function granularity. By annotating at the granularity of your PyTorch model's OPs, Intel® VTune Profiler's ITT enables op-level profiling. Intel® VTune Profiler's ITT has been integrated into `PyTorch Autograd Profiler <https://pytorch.org/tutorials/beginner/introyt/autogradyt_tutorial.html#autograd-profiler>`_. :superscript:`1`
+The ITT APIs of Intel® VTune Profiler is a useful tool to annotate a region of your workload for tracing to profile and visualize at a finer granularity of your annotation – OP/function/sub-function granularity. By annotating at the granularity of your PyTorch model's OPs, Intel® VTune Profiler's ITT enables op-level profiling. Intel® VTune Profiler's ITT has been integrated into `PyTorch Autograd Profiler <https://tutorials.pytorch.kr/beginner/introyt/autogradyt_tutorial.html#autograd-profiler>`_. :superscript:`1`
 
 1. The feature has to be explicitly enabled by *with torch.autograd.profiler.emit_itt()*.
 
 TorchServe with Intel® Extension for PyTorch*
 ---------------------------------------------
-`Intel® Extension for PyTorch* <https://github.com/intel/intel-extension-for-pytorch>`__ is a Python package to extend PyTorch with optimizations for extra performance boost on Intel hardware.  
+`Intel® Extension for PyTorch* <https://github.com/intel/intel-extension-for-pytorch>`__ is a Python package to extend PyTorch with optimizations for extra performance boost on Intel hardware.
 
-Intel® Extension for PyTorch* has already been integrated into TorchServe to improve the performance out-of-box. :superscript:`2` For custom handler scripts, we recommend adding the *intel_extension_for_pytorch* package in. 
+Intel® Extension for PyTorch* has already been integrated into TorchServe to improve the performance out-of-box. :superscript:`2` For custom handler scripts, we recommend adding the *intel_extension_for_pytorch* package in.
 
 2. The feature has to be explicitly enabled by setting *ipex_enable=true* in *config.properties*.
 
@@ -89,7 +89,7 @@ The following example measures the average inference time of ResNet50:
     for _ in range(100):
         model(data)
 
-    # measure 
+    # measure
     # Intel® VTune Profiler's ITT context manager
     with torch.autograd.profiler.emit_itt():
         start = time.time()
@@ -102,7 +102,7 @@ The following example measures the average inference time of ResNet50:
 
     print('Inference took {:.2f} ms in average'.format((end-start)/100*1000))
 
-Let's collect level-1 TMA metrics. 
+Let's collect level-1 TMA metrics.
 
 .. figure:: /_static/img/torchserve-ipex-images-2/3.png
    :width: 100%
@@ -114,29 +114,29 @@ Level-1 TMA shows that both PTMalloc and JeMalloc are bounded by the backend. Mo
    :width: 100%
    :align: center
 
-Level-2 TMA shows that the Back End Bound was caused by Memory Bound. Let's go one level deeper. 
+Level-2 TMA shows that the Back End Bound was caused by Memory Bound. Let's go one level deeper.
 
 .. figure:: /_static/img/torchserve-ipex-images-2/5.png
    :width: 100%
    :align: center
-   
-Most of the metrics under the Memory Bound identify which level of the memory hierarchy from the L1 cache to main memory is the bottleneck. A hotspot bounded at a given level indicates that most of the data was being retrieved from that cache or memory-level. Optimizations should focus on moving data closer to the core. Level-3 TMA shows that PTMalloc was bottlenecked by DRAM Bound. On the other hand, JeMalloc was bottlenecked by L1 Bound – JeMalloc moved data closer to the core, and thus faster execution. 
+
+Most of the metrics under the Memory Bound identify which level of the memory hierarchy from the L1 cache to main memory is the bottleneck. A hotspot bounded at a given level indicates that most of the data was being retrieved from that cache or memory-level. Optimizations should focus on moving data closer to the core. Level-3 TMA shows that PTMalloc was bottlenecked by DRAM Bound. On the other hand, JeMalloc was bottlenecked by L1 Bound – JeMalloc moved data closer to the core, and thus faster execution.
 
 Let's look at Intel® VTune Profiler ITT trace. In the example script, we have annotated each *step_x* of the inference loop.
 
 .. figure:: /_static/img/torchserve-ipex-images-2/6.png
    :width: 100%
    :align: center
-   
-Each step is traced in the timeline graph. The duration of model inference on the last step (step_99) decreased from 304.308 ms to 261.843 ms. 
+
+Each step is traced in the timeline graph. The duration of model inference on the last step (step_99) decreased from 304.308 ms to 261.843 ms.
 
 Exercise with TorchServe
 ^^^^^^^^^^^^^^^^^^^^^^^^
 Let's profile PTMalloc vs. JeMalloc with TorchServe.
 
-We will use `TorchServe apache-bench benchmarking <https://github.com/pytorch/serve/tree/master/benchmarks#benchmarking-with-apache-bench>`_ with ResNet50 FP32, batch size 32, concurrency 32, requests 8960. All other parameters are the same as the `default parameters <https://github.com/pytorch/serve/tree/master/benchmarks#benchmark-parameters>`_. 
+We will use `TorchServe apache-bench benchmarking <https://github.com/pytorch/serve/tree/master/benchmarks#benchmarking-with-apache-bench>`_ with ResNet50 FP32, batch size 32, concurrency 32, requests 8960. All other parameters are the same as the `default parameters <https://github.com/pytorch/serve/tree/master/benchmarks#benchmark-parameters>`_.
 
-As in the previous exercise, we will use the launcher to designate the memory allocator, and to bind the workload to physical cores of the first socket. To do so, user simply needs to add a few lines in `config.properties <https://pytorch.org/serve/configuration.html#config-properties-file>`__: 
+As in the previous exercise, we will use the launcher to designate the memory allocator, and to bind the workload to physical cores of the first socket. To do so, user simply needs to add a few lines in `config.properties <https://pytorch.org/serve/configuration.html#config-properties-file>`__:
 
 PTMalloc
 
@@ -144,39 +144,39 @@ PTMalloc
 
     cpu_launcher_enable=true
     cpu_launcher_args=--node_id 0 --use_default_allocator
-  
-JeMalloc 
+
+JeMalloc
 
 .. code:: python
 
     cpu_launcher_enable=true
     cpu_launcher_args=--node_id 0 --enable_jemalloc
-    
-Let's collect level-1 TMA metrics. 
+
+Let's collect level-1 TMA metrics.
 
 .. figure:: /_static/img/torchserve-ipex-images-2/7.png
    :width: 100%
    :align: center
 
-Let's go one level deeper. 
+Let's go one level deeper.
 
 .. figure:: /_static/img/torchserve-ipex-images-2/8.png
    :width: 100%
    :align: center
 
-Let's use Intel® VTune Profiler ITT to annotate `TorchServe inference scope <https://github.com/pytorch/serve/blob/master/ts/torch_handler/base_handler.py#L188>`_ to profile at inference-level granularity. As `TorchServe Architecture <https://github.com/pytorch/serve/blob/master/docs/internals.md#torchserve-architecture>`_ consists of several sub-components, including the Java frontend for handling request/response, and the Python backend for running the actual inference on the models, it is helpful to use Intel® VTune Profiler ITT to limit the collection of trace data at inference-level.  
+Let's use Intel® VTune Profiler ITT to annotate `TorchServe inference scope <https://github.com/pytorch/serve/blob/master/ts/torch_handler/base_handler.py#L188>`_ to profile at inference-level granularity. As `TorchServe Architecture <https://github.com/pytorch/serve/blob/master/docs/internals.md#torchserve-architecture>`_ consists of several sub-components, including the Java frontend for handling request/response, and the Python backend for running the actual inference on the models, it is helpful to use Intel® VTune Profiler ITT to limit the collection of trace data at inference-level.
 
 .. figure:: /_static/img/torchserve-ipex-images-2/9.png
    :width: 100%
    :align: center
-   
+
 Each inference call is traced in the timeline graph. The duration of the last model inference decreased from 561.688 ms to 251.287 ms - 2.2x speedup.
 
 .. figure:: /_static/img/torchserve-ipex-images-2/10.png
    :width: 100%
    :align: center
-   
-The timeline graph can be expanded to see op-level profiling results. The duration of *aten::conv2d* decreased from 16.401 ms to 6.392 ms - 2.6x speedup. 
+
+The timeline graph can be expanded to see op-level profiling results. The duration of *aten::conv2d* decreased from 16.401 ms to 6.392 ms - 2.6x speedup.
 
 In this section, we have demonstrated that JeMalloc can give better performance than the default PyTorch memory allocator, PTMalloc, with efficient thread-local caches improving Back-End-Bound.
 
@@ -197,16 +197,16 @@ The three major `Intel® Extension for PyTorch* <https://github.com/intel/intel-
 
 Operator Optimization
 +++++++++++++++++++++
-Optimized operators and kernels are registered through PyTorch dispatching mechanism. These operators and kernels are accelerated from native vectorization feature and matrix calculation feature of Intel hardware. During execution, Intel® Extension for PyTorch* intercepts invocation of ATen operators, and replaces the original ones with these optimized ones. Popular operators like Convolution, Linear have been optimized in Intel® Extension for PyTorch*. 
+Optimized operators and kernels are registered through PyTorch dispatching mechanism. These operators and kernels are accelerated from native vectorization feature and matrix calculation feature of Intel hardware. During execution, Intel® Extension for PyTorch* intercepts invocation of ATen operators, and replaces the original ones with these optimized ones. Popular operators like Convolution, Linear have been optimized in Intel® Extension for PyTorch*.
 
 Exercise
 ^^^^^^^^
-Let's profile optimized operator with Intel® Extension for PyTorch*. We will compare with and without the lines in code changes. 
+Let's profile optimized operator with Intel® Extension for PyTorch*. We will compare with and without the lines in code changes.
 
 As in the previous exercises, we will bind the workload to physical cores of the first socket.
 
 .. code:: python
-    
+
     import torch
 
     class Model(torch.nn.Module):
@@ -218,7 +218,7 @@ As in the previous exercises, we will bind the workload to physical cores of the
         def forward(self, x):
             x = self.conv(x)
             x = self.relu(x)
-            return x 
+            return x
 
     model = Model()
     model.eval()
@@ -228,38 +228,38 @@ As in the previous exercises, we will bind the workload to physical cores of the
     import intel_extension_for_pytorch as ipex
     model = ipex.optimize(model)
     ######################################################
-    
+
     print(model)
 
-The model consists of two operations—Conv2d and ReLU. By printing the model object, we get the following output. 
+The model consists of two operations—Conv2d and ReLU. By printing the model object, we get the following output.
 
 .. figure:: /_static/img/torchserve-ipex-images-2/11.png
    :width: 60%
    :align: center
-   
-Let's collect level-1 TMA metrics. 
+
+Let's collect level-1 TMA metrics.
 
 .. figure:: /_static/img/torchserve-ipex-images-2/12.png
    :width: 100%
    :align: center
-   
+
 Notice the Back End Bound reduced from 68.9 to 38.5 – 1.8x speedup.
 
-Additionally, let's profile with PyTorch Profiler. 
+Additionally, let's profile with PyTorch Profiler.
 
 .. figure:: /_static/img/torchserve-ipex-images-2/13.png
    :width: 100%
    :align: center
-   
-Notice the CPU time reduced from 851 us to 310 us – 2.7X speedup. 
+
+Notice the CPU time reduced from 851 us to 310 us – 2.7X speedup.
 
 Graph Optimization
 ++++++++++++++++++
-It is highly recommended for users to take advantage of Intel® Extension for PyTorch* with `TorchScript <https://pytorch.org/docs/stable/jit.html>`_ for further graph optimizations. To optimize performance further with TorchScript, Intel® Extension for PyTorch* supports oneDNN fusion of frequently used FP32/BF16 operator patterns, like Conv2D+ReLU, Linear+ReLU, and more to reduce operator/kernel invocation overheads, and for better cache locality. Some operator fusions allow to maintain temporary calculations, data type conversions, data layouts for better cache locality. As well as for INT8, Intel® Extension for PyTorch* has built-in quantization recipes to deliver good statistical accuracy for popular DL workloads including CNN, NLP and recommendation models. The quantized model is then optimized with oneDNN fusion support. 
+It is highly recommended for users to take advantage of Intel® Extension for PyTorch* with `TorchScript <https://pytorch.org/docs/stable/jit.html>`_ for further graph optimizations. To optimize performance further with TorchScript, Intel® Extension for PyTorch* supports oneDNN fusion of frequently used FP32/BF16 operator patterns, like Conv2D+ReLU, Linear+ReLU, and more to reduce operator/kernel invocation overheads, and for better cache locality. Some operator fusions allow to maintain temporary calculations, data type conversions, data layouts for better cache locality. As well as for INT8, Intel® Extension for PyTorch* has built-in quantization recipes to deliver good statistical accuracy for popular DL workloads including CNN, NLP and recommendation models. The quantized model is then optimized with oneDNN fusion support.
 
 Exercise
-^^^^^^^^ 
-Let's profile FP32 graph optimization with TorchScript. 
+^^^^^^^^
+Let's profile FP32 graph optimization with TorchScript.
 
 As in the previous exercises, we will bind the workload to physical cores of the first socket.
 
@@ -276,7 +276,7 @@ As in the previous exercises, we will bind the workload to physical cores of the
         def forward(self, x):
             x = self.conv(x)
             x = self.relu(x)
-            return x 
+            return x
 
     model = Model()
     model.eval()
@@ -287,36 +287,36 @@ As in the previous exercises, we will bind the workload to physical cores of the
     model = ipex.optimize(model)
     ######################################################
 
-    # torchscript 
+    # torchscript
     with torch.no_grad():
         model = torch.jit.trace(model, data)
         model = torch.jit.freeze(model)
 
-Let's collect level-1 TMA metrics. 
+Let's collect level-1 TMA metrics.
 
 .. figure:: /_static/img/torchserve-ipex-images-2/14.png
    :width: 100%
    :align: center
-   
+
 Notice the Back End Bound reduced from 67.1 to 37.5 – 1.8x speedup.
 
-Additionally, let's profile with PyTorch Profiler. 
+Additionally, let's profile with PyTorch Profiler.
 
 .. figure:: /_static/img/torchserve-ipex-images-2/15.png
    :width: 100%
    :align: center
-   
+
 Notice that with Intel® Extension for PyTorch*  Conv + ReLU operators are fused, and the CPU time reduced from 803 us to 248 us – 3.2X speedup. The oneDNN eltwise post-op enables fusing a primitive with an elementwise primitive. This is one of the most popular kinds of fusion: an eltwise (typically an activation function such as ReLU) with preceding convolution or inner product. Have a look at the oneDNN verbose log shown in the next section.
 
 Channels Last Memory Format
 +++++++++++++++++++++++++++
-When invoking *ipex.optimize* on model, Intel® Extension for PyTorch* automatically converts the model to optimized memory format, channels last. Channels last is a memory format that is more friendly to Intel Architecture. Compared to PyTorch default channels first NCHW (batch, channels, height, width) memory format, channels last NHWC (batch, height, width, channels) memory format generally accelerates convolutional neural networks with better cache locality. 
+When invoking *ipex.optimize* on model, Intel® Extension for PyTorch* automatically converts the model to optimized memory format, channels last. Channels last is a memory format that is more friendly to Intel Architecture. Compared to PyTorch default channels first NCHW (batch, channels, height, width) memory format, channels last NHWC (batch, height, width, channels) memory format generally accelerates convolutional neural networks with better cache locality.
 
 One thing to note is that it is expensive to convert memory format. So it's better to convert the memory format prior to deployment once, and keep the memory format conversion minimum during deployment. As the data propagates through model's layers the channels last memory format is preserved through consecutive channels last supported layers (for example, Conv2d -> ReLU -> Conv2d) and conversions are only made in between channels last unsupported layers. See `Memory Format Propagation <https://www.intel.com/content/www/us/en/develop/documentation/onednn-developer-guide-and-reference/top/programming-model/memory-format-propagation.html>`_ for more details.
 
 Exercise
 ^^^^^^^^
-Let's demonstrate channels last optimization. 
+Let's demonstrate channels last optimization.
 
 .. code:: python
 
@@ -331,7 +331,7 @@ Let's demonstrate channels last optimization.
         def forward(self, x):
             x = self.conv(x)
             x = self.relu(x)
-            return x 
+            return x
 
     model = Model()
     model.eval()
@@ -339,7 +339,7 @@ Let's demonstrate channels last optimization.
 
     import intel_extension_for_pytorch as ipex
     ############################### code changes ###############################
-    ipex.disable_auto_channels_last() # omit this line for channels_last (default) 
+    ipex.disable_auto_channels_last() # omit this line for channels_last (default)
     ############################################################################
     model = ipex.optimize(model)
 
@@ -352,83 +352,83 @@ We will use `oneDNN verbose mode <https://oneapi-src.github.io/oneDNN/dev_guide_
 .. figure:: /_static/img/torchserve-ipex-images-2/16.png
    :width: 15%
    :align: center
-   
+
 .. figure:: /_static/img/torchserve-ipex-images-2/17.png
    :width: 100%
    :align: center
-   
-Above is oneDNN verbose from channels first. We can verify that there are reorders from weight and data, then do computation, and finally reorder output back.   
+
+Above is oneDNN verbose from channels first. We can verify that there are reorders from weight and data, then do computation, and finally reorder output back.
 
 .. figure:: /_static/img/torchserve-ipex-images-2/18.png
    :width: 80%
    :align: center
-   
+
 Above is oneDNN verbose from channels last. We can verify that channels last memory format avoids unnecessary reorders.
 
-Performance Boost with Intel® Extension for PyTorch* 
+Performance Boost with Intel® Extension for PyTorch*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Below summarizes performance boost of TorchServe with Intel® Extension for PyTorch* for ResNet50 and BERT-base-uncased. 
+Below summarizes performance boost of TorchServe with Intel® Extension for PyTorch* for ResNet50 and BERT-base-uncased.
 
 .. figure:: /_static/img/torchserve-ipex-images-2/19.png
    :width: 100%
    :align: center
-   
+
 Exercise with TorchServe
 ~~~~~~~~~~~~~~~~~~~~~~~~
-Let's profile Intel® Extension for PyTorch* optimizations with TorchServe. 
+Let's profile Intel® Extension for PyTorch* optimizations with TorchServe.
 
-We will use `TorchServe apache-bench benchmarking <https://github.com/pytorch/serve/tree/master/benchmarks#benchmarking-with-apache-bench>`_ with ResNet50 FP32 TorchScript, batch size 32, concurrency 32, requests 8960. All other parameters are the same as the `default parameters <https://github.com/pytorch/serve/tree/master/benchmarks#benchmark-parameters>`_. 
+We will use `TorchServe apache-bench benchmarking <https://github.com/pytorch/serve/tree/master/benchmarks#benchmarking-with-apache-bench>`_ with ResNet50 FP32 TorchScript, batch size 32, concurrency 32, requests 8960. All other parameters are the same as the `default parameters <https://github.com/pytorch/serve/tree/master/benchmarks#benchmark-parameters>`_.
 
-As in the previous exercise, we will use the launcher to bind the workload to physical cores of the first socket. To do so, user simply needs to add a few lines in `config.properties <https://github.com/pytorch/serve/tree/master/benchmarks#benchmark-parameters>`__: 
+As in the previous exercise, we will use the launcher to bind the workload to physical cores of the first socket. To do so, user simply needs to add a few lines in `config.properties <https://github.com/pytorch/serve/tree/master/benchmarks#benchmark-parameters>`__:
 
 .. code:: python
 
     cpu_launcher_enable=true
     cpu_launcher_args=--node_id 0
 
-Let's collect level-1 TMA metrics. 
+Let's collect level-1 TMA metrics.
 
 .. figure:: /_static/img/torchserve-ipex-images-2/20.png
    :width: 100%
    :align: center
-   
-Level-1 TMA shows that both are bounded by the backend. As discussed earlier, the majority of untuned deep learning workloads will be Back End Bound. Notice the Back End Bound reduced from 70.0 to 54.1. Let's go one level deeper. 
+
+Level-1 TMA shows that both are bounded by the backend. As discussed earlier, the majority of untuned deep learning workloads will be Back End Bound. Notice the Back End Bound reduced from 70.0 to 54.1. Let's go one level deeper.
 
 .. figure:: /_static/img/torchserve-ipex-images-2/21.png
    :width: 100%
    :align: center
-   
+
 As discussed earlier, Back End Bound has two submetrics – Memory Bound and Core Bound. Memory Bound indicates the workload is under-optimized or under-utilized, and ideally memory-bound operations can be improved to core-bound by optimizing the OPs and improving cache locality. Level-2 TMA shows that the Back End Bound improved from Memory Bound to Core Bound. Let's go one level deeper.
 
 .. figure:: /_static/img/torchserve-ipex-images-2/22.png
    :width: 100%
    :align: center
-   
-Scaling deep learning models for production on a model serving framework like TorchServe requires high compute utilization. This requires that data is available through prefetching and reusing the data in cache when the execution units need it to execute the uOps. Level-3 TMA shows that the Back End Memory Bound improved from DRAM Bound to Core Bound. 
+
+Scaling deep learning models for production on a model serving framework like TorchServe requires high compute utilization. This requires that data is available through prefetching and reusing the data in cache when the execution units need it to execute the uOps. Level-3 TMA shows that the Back End Memory Bound improved from DRAM Bound to Core Bound.
 
 As in the previous exercise with TorchServe, let's use Intel® VTune Profiler ITT to annotate `TorchServe inference scope <https://github.com/pytorch/serve/blob/master/ts/torch_handler/base_handler.py#L188>`_ to profile at inference-level granularity.
 
 .. figure:: /_static/img/torchserve-ipex-images-2/23.png
    :width: 100%
    :align: center
-   
+
 Each inference call is traced in the timeline graph. The duration of the last inference call decreased from 215.731 ms to 95.634 ms - 2.3x speedup.
 
 .. figure:: /_static/img/torchserve-ipex-images-2/24.png
    :width: 100%
    :align: center
-   
-The timeline graph can be expanded to see op-level profiling results. Notice that Conv + ReLU has been fused, and the duration decreased from 6.393 ms + 1.731 ms to 3.408 ms - 2.4x speedup. 
+
+The timeline graph can be expanded to see op-level profiling results. Notice that Conv + ReLU has been fused, and the duration decreased from 6.393 ms + 1.731 ms to 3.408 ms - 2.4x speedup.
 
 Conclusion
------------ 
-In this tutorial, we have used Top-down Microarchitecture Analysis (TMA) and Intel® VTune™ Profiler's Instrumentation and Tracing Technology (ITT) to demonstrate that 
+-----------
+In this tutorial, we have used Top-down Microarchitecture Analysis (TMA) and Intel® VTune™ Profiler's Instrumentation and Tracing Technology (ITT) to demonstrate that
 
-- Often the primary bottleneck of under-optimized or under-tuned deep learning workloads are Back End Bound, which has two submetrics, Memory Bound and Core Bound. 
+- Often the primary bottleneck of under-optimized or under-tuned deep learning workloads are Back End Bound, which has two submetrics, Memory Bound and Core Bound.
 
-- A more efficient memory allocator, operator fusion, memory layout format optimization by Intel® Extension for PyTorch* improve Memory Bound. 
+- A more efficient memory allocator, operator fusion, memory layout format optimization by Intel® Extension for PyTorch* improve Memory Bound.
 
-- Key deep learning primitives, such as convolution, matrix multiplication, dot-product, etc have been well optimized by Intel® Extension for PyTorch* and oneDNN library, improving Core Bound. 
+- Key deep learning primitives, such as convolution, matrix multiplication, dot-product, etc have been well optimized by Intel® Extension for PyTorch* and oneDNN library, improving Core Bound.
 
 - Intel® Extension for PyTorch* has been integrated into TorchServe with an ease-of-use API.
 
diff --git a/prototype_source/README.md b/prototype_source/README.md
index c33dc1c13..9f8792824 100644
--- a/prototype_source/README.md
+++ b/prototype_source/README.md
@@ -1,9 +1,9 @@
 # Prototype Tutorials and Recipes
 
-This directory contains tutorials and recipes demonstrating prototype features in PyTorch. 
+This directory contains tutorials and recipes demonstrating prototype features in PyTorch.
 
-**Prototype features** are not available as part of binary distributions like PyPI or Conda (except maybe behind run-time flags). To test these features we would, depending on the feature, recommend building from master or using the nightly wheelss that are made available on pytorch.org. 
+**Prototype features** are not available as part of binary distributions like PyPI or Conda (except maybe behind run-time flags). To test these features we would, depending on the feature, recommend building from master or using the nightly wheels that are made available on pytorch.org.
 
 These are intentionally left out of the tutorials.pytorch.kr build and will not show up on the website.
 
-*Level of commitment:* We are committing to gathering high bandwidth feedback only on these features. Based on this feedback and potential further engagement between community members, we as a community will decide if we want to upgrade the level of commitment or to fail fast. 
+*Level of commitment:* We are committing to gathering high bandwidth feedback only on these features. Based on this feedback and potential further engagement between community members, we as a community will decide if we want to upgrade the level of commitment or to fail fast.
diff --git a/prototype_source/README.txt b/prototype_source/README.txt
index d974267ad..3383df88f 100644
--- a/prototype_source/README.txt
+++ b/prototype_source/README.txt
@@ -2,7 +2,7 @@ Prototype Tutorials
 ------------------
 1. distributed_rpc_profiling.rst
      Profiling PyTorch RPC-Based Workloads
-     https://github.com/pytorch/tutorials/blob/release/1.6/prototype_source/distributed_rpc_profiling.rst
+     https://github.com/PyTorchKorea/tutorials-kr/blob/master/prototype_source/distributed_rpc_profiling.rst
 
 2. graph_mode_static_quantization_tutorial.py
 	   Graph Mode Post Training Static Quantization in PyTorch
@@ -10,15 +10,15 @@ Prototype Tutorials
 
 3. graph_mode_dynamic_bert_tutorial.rst
 	   Graph Mode Dynamic Quantization on BERT
-	   https://github.com/pytorch/tutorials/blob/master/prototype_source/graph_mode_dynamic_bert_tutorial.rst
+	   https://github.com/PyTorchKorea/tutorials-kr/blob/master/prototype_source/graph_mode_dynamic_bert_tutorial.rst
 
 4. numeric_suite_tutorial.py
 	   PyTorch Numeric Suite Tutorial
-	   https://github.com/pytorch/tutorials/blob/master/prototype_source/numeric_suite_tutorial.py
+	   https://github.com/PyTorchKorea/tutorials-kr/blob/master/prototype_source/numeric_suite_tutorial.py
 
 5. torchscript_freezing.py
 	   Model Freezing in TorchScript
-	   https://github.com/pytorch/tutorials/blob/master/prototype_source/torchscript_freezing.py
+	   https://github.com/PyTorchKorea/tutorials-kr/blob/master/prototype_source/torchscript_freezing.py
 
 6. vulkan_workflow.rst
      Vulkan Backend User Workflow
diff --git a/prototype_source/backend_config_tutorial.rst b/prototype_source/backend_config_tutorial.rst
new file mode 100644
index 000000000..ba6729285
--- /dev/null
+++ b/prototype_source/backend_config_tutorial.rst
@@ -0,0 +1,326 @@
+(prototype) PyTorch BackendConfig Tutorial
+==========================================
+**Author**: `Andrew Or <https://github.com/andrewor14>`_
+
+The BackendConfig API enables developers to integrate their backends
+with PyTorch quantization. It is currently only supported in FX graph
+mode quantization, but support may be extended to other modes of
+quantization in the future. In this tutorial, we will demonstrate how to
+use this API to customize quantization support for specific backends.
+For more information on the motivation and implementation details behind
+BackendConfig, please refer to this
+`README <https://github.com/pytorch/pytorch/tree/master/torch/ao/quantization/backend_config>`__.
+
+Suppose we are a backend developer and we wish to integrate our backend
+with PyTorch's quantization APIs. Our backend consists of two ops only:
+quantized linear and quantized conv-relu. In this section, we will walk
+through how to achieve this by quantizing an example model using a custom
+BackendConfig through `prepare_fx` and `convert_fx`.
+
+.. code:: ipython3
+
+    import torch
+    from torch.ao.quantization import (
+        default_weight_observer,
+        get_default_qconfig_mapping,
+        MinMaxObserver,
+        QConfig,
+        QConfigMapping,
+    )
+    from torch.ao.quantization.backend_config import (
+        BackendConfig,
+        BackendPatternConfig,
+        DTypeConfig,
+        DTypeWithConstraints,
+        ObservationType,
+    )
+    from torch.ao.quantization.quantize_fx import prepare_fx, convert_fx
+
+1. Derive reference pattern for each quantized operator
+--------------------------------------------------------
+
+For quantized linear, suppose our backend expects the reference pattern
+`[dequant - fp32_linear - quant]` and lowers it into a single quantized
+linear op. The way to achieve this is to first insert quant-dequant ops
+before and after the float linear op, such that we produce the following
+reference model::
+
+  quant1 - [dequant1 - fp32_linear - quant2] - dequant2
+
+Similarly, for quantized conv-relu, we wish to produce the following
+reference model, where the reference pattern in the square brackets will
+be lowered into a single quantized conv-relu op::
+
+  quant1 - [dequant1 - fp32_conv_relu - quant2] - dequant2
+
+2. Set DTypeConfigs with backend constraints
+---------------------------------------------
+
+In the reference patterns above, the input dtype specified in the
+DTypeConfig will be passed as the dtype argument to quant1, while the
+output dtype will be passed as the dtype argument to quant2. If the output
+dtype is fp32, as in the case of dynamic quantization, then the output
+quant-dequant pair will not be inserted. This example also shows how to
+specify restrictions on quantization and scale ranges on a particular dtype.
+
+.. code:: ipython3
+
+    quint8_with_constraints = DTypeWithConstraints(
+        dtype=torch.quint8,
+        quant_min_lower_bound=0,
+        quant_max_upper_bound=255,
+        scale_min_lower_bound=2 ** -12,
+    )
+    
+    # Specify the dtypes passed to the quantized ops in the reference model spec
+    weighted_int8_dtype_config = DTypeConfig(
+        input_dtype=quint8_with_constraints,
+        output_dtype=quint8_with_constraints,
+        weight_dtype=torch.qint8,
+        bias_dtype=torch.float)
+
+3. Set up fusion for conv-relu
+-------------------------------
+
+Note that the original user model contains separate conv and relu ops,
+so we need to first fuse the conv and relu ops into a single conv-relu
+op (`fp32_conv_relu`), and then quantize this op similar to how the linear
+op is quantized. We can set up fusion by defining a function that accepts
+3 arguments, where the first is whether or not this is for QAT, and the
+remaining arguments refer to the individual items of the fused pattern.
+
+.. code:: ipython3
+
+   def fuse_conv2d_relu(is_qat, conv, relu):
+       """Return a fused ConvReLU2d from individual conv and relu modules."""
+       return torch.ao.nn.intrinsic.ConvReLU2d(conv, relu)
+
+4. Define the BackendConfig
+----------------------------
+
+Now we have all the necessary pieces, so we go ahead and define our
+BackendConfig. Here we use different observers (will be renamed) for
+the input and output for the linear op, so the quantization params
+passed to the two quantize ops (quant1 and quant2) will be different.
+This is commonly the case for weighted ops like linear and conv.
+
+For the conv-relu op, the observation type is the same. However, we
+need two BackendPatternConfigs to support this op, one for fusion
+and one for quantization. For both conv-relu and linear, we use the
+DTypeConfig defined above.
+
+.. code:: ipython3
+
+    linear_config = BackendPatternConfig() \
+        .set_pattern(torch.nn.Linear) \
+        .set_observation_type(ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT) \
+        .add_dtype_config(weighted_int8_dtype_config) \
+        .set_root_module(torch.nn.Linear) \
+        .set_qat_module(torch.nn.qat.Linear) \
+        .set_reference_quantized_module(torch.ao.nn.quantized.reference.Linear)
+
+    # For fusing Conv2d + ReLU into ConvReLU2d
+    # No need to set observation type and dtype config here, since we are not
+    # inserting quant-dequant ops in this step yet
+    conv_relu_config = BackendPatternConfig() \
+        .set_pattern((torch.nn.Conv2d, torch.nn.ReLU)) \
+        .set_fused_module(torch.ao.nn.intrinsic.ConvReLU2d) \
+        .set_fuser_method(fuse_conv2d_relu)
+    
+    # For quantizing ConvReLU2d
+    fused_conv_relu_config = BackendPatternConfig() \
+        .set_pattern(torch.ao.nn.intrinsic.ConvReLU2d) \
+        .set_observation_type(ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT) \
+        .add_dtype_config(weighted_int8_dtype_config) \
+        .set_root_module(torch.nn.Conv2d) \
+        .set_qat_module(torch.ao.nn.intrinsic.qat.ConvReLU2d) \
+        .set_reference_quantized_module(torch.ao.nn.quantized.reference.Conv2d)
+
+    backend_config = BackendConfig("my_backend") \
+        .set_backend_pattern_config(linear_config) \
+        .set_backend_pattern_config(conv_relu_config) \
+        .set_backend_pattern_config(fused_conv_relu_config)
+
+5. Set up QConfigMapping that satisfies the backend constraints
+----------------------------------------------------------------
+
+In order to use the ops defined above, the user must define a QConfig
+that satisfies the constraints specified in the DTypeConfig. For more
+detail, see the documentation for `DTypeConfig <https://pytorch.org/docs/stable/generated/torch.ao.quantization.backend_config.DTypeConfig.html>`__.
+We will then use this QConfig for all the modules used in the patterns
+we wish to quantize.
+
+.. code:: ipython3
+
+    # Note: Here we use a quant_max of 127, but this could be up to 255 (see `quint8_with_constraints`)
+    activation_observer = MinMaxObserver.with_args(quant_min=0, quant_max=127, eps=2 ** -12)
+    qconfig = QConfig(activation=activation_observer, weight=default_weight_observer)
+
+    # Note: All individual items of a fused pattern, e.g. Conv2d and ReLU in
+    # (Conv2d, ReLU), must have the same QConfig
+    qconfig_mapping = QConfigMapping() \
+        .set_object_type(torch.nn.Linear, qconfig) \
+        .set_object_type(torch.nn.Conv2d, qconfig) \
+        .set_object_type(torch.nn.BatchNorm2d, qconfig) \
+        .set_object_type(torch.nn.ReLU, qconfig)
+
+6. Quantize the model through prepare and convert
+--------------------------------------------------
+
+Finally, we quantize the model by passing the BackendConfig we defined
+into prepare and convert. This produces a quantized linear module and
+a fused quantized conv-relu module.
+
+.. code:: ipython3
+
+    class MyModel(torch.nn.Module):
+        def __init__(self, use_bn: bool):
+            super().__init__()
+            self.linear = torch.nn.Linear(10, 3)
+            self.conv = torch.nn.Conv2d(3, 3, 3)
+            self.bn = torch.nn.BatchNorm2d(3)
+            self.relu = torch.nn.ReLU()
+            self.sigmoid = torch.nn.Sigmoid()
+            self.use_bn = use_bn
+
+        def forward(self, x):
+            x = self.linear(x)
+            x = self.conv(x)
+            if self.use_bn:
+                x = self.bn(x)
+            x = self.relu(x)
+            x = self.sigmoid(x)
+            return x
+
+    example_inputs = (torch.rand(1, 3, 10, 10, dtype=torch.float),)
+    model = MyModel(use_bn=False)
+    prepared = prepare_fx(model, qconfig_mapping, example_inputs, backend_config=backend_config)
+    prepared(*example_inputs)  # calibrate
+    converted = convert_fx(prepared, backend_config=backend_config)
+
+.. parsed-literal::
+
+    >>> print(converted)
+
+    GraphModule(
+      (linear): QuantizedLinear(in_features=10, out_features=3, scale=0.012136868201196194, zero_point=67, qscheme=torch.per_tensor_affine)
+      (conv): QuantizedConvReLU2d(3, 3, kernel_size=(3, 3), stride=(1, 1), scale=0.0029353597201406956, zero_point=0)
+      (sigmoid): Sigmoid()
+    )
+    
+    def forward(self, x):
+        linear_input_scale_0 = self.linear_input_scale_0
+        linear_input_zero_point_0 = self.linear_input_zero_point_0
+        quantize_per_tensor = torch.quantize_per_tensor(x, linear_input_scale_0, linear_input_zero_point_0, torch.quint8);  x = linear_input_scale_0 = linear_input_zero_point_0 = None
+        linear = self.linear(quantize_per_tensor);  quantize_per_tensor = None
+        conv = self.conv(linear);  linear = None
+        dequantize_2 = conv.dequantize();  conv = None
+        sigmoid = self.sigmoid(dequantize_2);  dequantize_2 = None
+        return sigmoid
+
+(7. Experiment with faulty BackendConfig setups)
+-------------------------------------------------
+
+As an experiment, here we modify the model to use conv-bn-relu
+instead of conv-relu, but use the same BackendConfig, which doesn't
+know how to quantize conv-bn-relu. As a result, only linear is
+quantized, but conv-bn-relu is neither fused nor quantized.
+
+.. code:: ipython3
+    # Only linear is quantized, since there's no rule for fusing conv-bn-relu
+    example_inputs = (torch.rand(1, 3, 10, 10, dtype=torch.float),)
+    model = MyModel(use_bn=True)
+    prepared = prepare_fx(model, qconfig_mapping, example_inputs, backend_config=backend_config)
+    prepared(*example_inputs)  # calibrate
+    converted = convert_fx(prepared, backend_config=backend_config)
+
+.. parsed-literal::
+
+    >>> print(converted)
+
+    GraphModule(
+      (linear): QuantizedLinear(in_features=10, out_features=3, scale=0.015307803638279438, zero_point=95, qscheme=torch.per_tensor_affine)
+      (conv): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
+      (bn): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+      (relu): ReLU()
+      (sigmoid): Sigmoid()
+    )
+    
+    def forward(self, x):
+        linear_input_scale_0 = self.linear_input_scale_0
+        linear_input_zero_point_0 = self.linear_input_zero_point_0
+        quantize_per_tensor = torch.quantize_per_tensor(x, linear_input_scale_0, linear_input_zero_point_0, torch.quint8);  x = linear_input_scale_0 = linear_input_zero_point_0 = None
+        linear = self.linear(quantize_per_tensor);  quantize_per_tensor = None
+        dequantize_1 = linear.dequantize();  linear = None
+        conv = self.conv(dequantize_1);  dequantize_1 = None
+        bn = self.bn(conv);  conv = None
+        relu = self.relu(bn);  bn = None
+        sigmoid = self.sigmoid(relu);  relu = None
+        return sigmoid
+
+As another experiment, here we use the default QConfigMapping that
+doesn't satisfy the dtype constraints specified in the backend. As
+a result, nothing is quantized since the QConfigs are simply ignored.
+
+.. code:: ipython3
+    # Nothing is quantized or fused, since backend constraints are not satisfied
+    example_inputs = (torch.rand(1, 3, 10, 10, dtype=torch.float),)
+    model = MyModel(use_bn=True)
+    prepared = prepare_fx(model, get_default_qconfig_mapping(), example_inputs, backend_config=backend_config)
+    prepared(*example_inputs)  # calibrate
+    converted = convert_fx(prepared, backend_config=backend_config)
+
+.. parsed-literal::
+
+    >>> print(converted)
+
+    GraphModule(
+      (linear): Linear(in_features=10, out_features=3, bias=True)
+      (conv): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
+      (bn): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
+      (relu): ReLU()
+      (sigmoid): Sigmoid()
+    )
+    
+    def forward(self, x):
+        linear = self.linear(x);  x = None
+        conv = self.conv(linear);  linear = None
+        bn = self.bn(conv);  conv = None
+        relu = self.relu(bn);  bn = None
+        sigmoid = self.sigmoid(relu);  relu = None
+        return sigmoid
+
+
+Built-in BackendConfigs
+-----------------------
+
+PyTorch quantization supports a few built-in native BackendConfigs under
+the ``torch.ao.quantization.backend_config`` namespace:
+
+- `get_fbgemm_backend_config <https://github.com/pytorch/pytorch/blob/master/torch/ao/quantization/backend_config/fbgemm.py>`__:
+  for server target settings
+- `get_qnnpack_backend_config <https://github.com/pytorch/pytorch/blob/master/torch/ao/quantization/backend_config/qnnpack.py>`__:
+  for mobile and edge device target settings, also supports XNNPACK
+  quantized ops
+- `get_native_backend_config <https://github.com/pytorch/pytorch/blob/master/torch/ao/quantization/backend_config/native.py>`__
+  (default): a BackendConfig that supports a union of the operator
+  patterns supported in the FBGEMM and QNNPACK BackendConfigs
+
+There are also other BackendConfigs under development (e.g. for
+TensorRT and x86), but these are still mostly experimental at the
+moment. If the user wishes to integrate a new, custom backend with
+PyTorch’s quantization API, they may define their own BackendConfigs
+using the same set of APIs used to define the natively supported
+ones as in the example above.
+
+Further Reading
+---------------
+
+How BackendConfig is used in FX graph mode quantization:
+https://github.com/pytorch/pytorch/blob/master/torch/ao/quantization/fx/README.md
+
+Motivation and implementation details behind BackendConfig:
+https://github.com/pytorch/pytorch/blob/master/torch/ao/quantization/backend_config/README.md
+
+Early design of BackendConfig:
+https://github.com/pytorch/rfcs/blob/master/RFC-0019-Extending-PyTorch-Quantization-to-Custom-Backends.md
diff --git a/prototype_source/fx_graph_mode_ptq_dynamic.py b/prototype_source/fx_graph_mode_ptq_dynamic.py
index aa77b6c10..97418d98e 100644
--- a/prototype_source/fx_graph_mode_ptq_dynamic.py
+++ b/prototype_source/fx_graph_mode_ptq_dynamic.py
@@ -4,7 +4,7 @@
 
 **Author**: `Jerry Zhang <https://github.com/jerryzh168>`_
 
-This tutorial introduces the steps to do post training dynamic quantization in graph mode based on ``torch.fx``. 
+This tutorial introduces the steps to do post training dynamic quantization in graph mode based on ``torch.fx``.
 We have a separate tutorial for `FX Graph Mode Post Training Static Quantization <https://tutorials.pytorch.kr/prototype/fx_graph_mode_ptq_static.html>`_,
 comparison between FX Graph Mode Quantization and Eager Mode Quantization can be found in the `quantization docs <https://pytorch.org/docs/master/quantization.html#quantization-api-summary>`_
 
@@ -13,20 +13,21 @@
 .. code:: python
 
     import torch
-    from torch.quantization import default_dynamic_qconfig
-    # Note that this is temporary, we'll expose these functions to torch.quantization after official releasee
+    from torch.ao.quantization import default_dynamic_qconfig, QConfigMapping
+    # Note that this is temporary, we'll expose these functions to torch.ao.quantization after official releasee
     from torch.quantization.quantize_fx import prepare_fx, convert_fx
 
     float_model.eval()
-    qconfig = get_default_qconfig("fbgemm")
-    qconfig_dict = {"": qconfig}
-    prepared_model = prepare_fx(float_model, qconfig_dict)  # fuse modules and insert observers
+    # The old 'fbgemm' is still available but 'x86' is the recommended default.
+    qconfig = get_default_qconfig("x86")
+    qconfig_mapping = QConfigMapping().set_global(qconfig)
+    prepared_model = prepare_fx(float_model, qconfig_mapping, example_inputs)  # fuse modules and insert observers
     # no calibration is required for dynamic quantization
     quantized_model = convert_fx(prepared_model)  # convert the model to a dynamically quantized model
 
-In this tutorial, we’ll apply dynamic quantization to an LSTM-based next word-prediction model, 
-closely following the word language model from the PyTorch examples. 
-We will copy the code from `Dynamic Quantization on an LSTM Word Language Model <https://tutorials.pytorch.kr/advanced/dynamic_quantization_tutorial.html>`_ 
+In this tutorial, we’ll apply dynamic quantization to an LSTM-based next word-prediction model,
+closely following the word language model from the PyTorch examples.
+We will copy the code from `Dynamic Quantization on an LSTM Word Language Model <https://tutorials.pytorch.kr/advanced/dynamic_quantization_tutorial.html>`_
 and omit the descriptions.
 
 """
@@ -36,20 +37,20 @@
 # 1. Define the Model, Download Data and Model
 # --------------------------------------------
 #
-# Download the `data <https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip>`_ 
+# Download the `data <https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip>`_
 # and unzip to data folder
-# 
+#
 # .. code::
-#     
+#
 #     mkdir data
 #     cd data
 #     wget https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip
 #     unzip wikitext-2-v1.zip
 #
 # Download model to the data folder:
-# 
+#
 # .. code::
-# 
+#
 #     wget https://s3.amazonaws.com/pytorch-tutorial-assets/word_language_model_quantize.pth
 #
 # Define the model:
@@ -105,7 +106,7 @@ def init_hidden(lstm_model, bsz):
     nhid = lstm_model.rnn.hidden_size
     return (torch.zeros(nlayers, bsz, nhid, device=device),
             torch.zeros(nlayers, bsz, nhid, device=device))
-    
+
 
 # Load Text Data
 class Dictionary(object):
@@ -191,6 +192,7 @@ def batchify(data, bsz):
     return data.view(bsz, -1).t().contiguous()
 
 test_data = batchify(corpus.test, eval_batch_size)
+example_inputs = (next(iter(test_data))[0])
 
 # Evaluation functions
 def get_batch(source, i):
@@ -224,25 +226,23 @@ def evaluate(model_, data_source):
 ######################################################################
 # 2. Post Training Dynamic Quantization
 # -------------------------------------
-# Now we can dynamically quantize the model. 
+# Now we can dynamically quantize the model.
 # We can use the same function as post training static quantization but with a dynamic qconfig.
 
 from torch.quantization.quantize_fx import prepare_fx, convert_fx
-from torch.quantization import default_dynamic_qconfig, float_qparams_weight_only_qconfig
-
-# Full docs for supported qconfig for floating point modules/ops can be found in docs for quantization (TODO: link)
-# Full docs for qconfig_dict can be found in the documents of prepare_fx (TODO: link)
-qconfig_dict = {
-    "object_type": [
-        (nn.Embedding, float_qparams_weight_only_qconfig),
-        (nn.LSTM, default_dynamic_qconfig),
-        (nn.Linear, default_dynamic_qconfig)
-    ]
-}
+from torch.ao.quantization import default_dynamic_qconfig, float_qparams_weight_only_qconfig, QConfigMapping
+
+# Full docs for supported qconfig for floating point modules/ops can be found in `quantization docs <https://pytorch.org/docs/stable/quantization.html#module-torch.quantization>`_
+# Full docs for `QConfigMapping <https://pytorch.org/docs/stable/generated/torch.ao.quantization.qconfig_mapping.QConfigMapping.html#torch.ao.quantization.qconfig_mapping.QConfigMapping>`_
+qconfig_mapping = (QConfigMapping()
+    .set_object_type(nn.Embedding, float_qparams_weight_only_qconfig)
+    .set_object_type(nn.LSTM, default_dynamic_qconfig)
+    .set_object_type(nn.Linear, default_dynamic_qconfig)
+)
 # Deepcopying the original model because quantization api changes the model inplace and we want
 # to keep the original model for future comparison
 model_to_quantize = copy.deepcopy(model)
-prepared_model = prepare_fx(model_to_quantize, qconfig_dict)
+prepared_model = prepare_fx(model_to_quantize, qconfig_mapping, example_inputs)
 print("prepared model:", prepared_model)
 quantized_model = convert_fx(prepared_model)
 print("quantized model", quantized_model)
@@ -252,11 +252,11 @@ def evaluate(model_, data_source):
 # For dynamically quantized objects, we didn't do anything in ``prepare_fx`` for modules,
 # but will insert observers for weight for dynamically quantizable forunctionals and torch ops.
 # We also fuse the modules like Conv + Bn, Linear + ReLU.
-# 
-# In convert we'll convert the float modules to dynamically quantized modules and 
+#
+# In convert we'll convert the float modules to dynamically quantized modules and
 # convert float ops to dynamically quantized ops. We can see in the example model,
 # ``nn.Embedding``, ``nn.Linear`` and ``nn.LSTM`` are dynamically quantized.
-# 
+#
 # Now we can compare the size and runtime of the quantized model.
 
 def print_size_of_model(model):
@@ -283,10 +283,10 @@ def time_model_evaluation(model, test_data):
 time_model_evaluation(quantized_model, test_data)
 
 #####################################################################
-# There is a roughly 2x speedup for this model. Also note that the speedup 
+# There is a roughly 2x speedup for this model. Also note that the speedup
 # may vary depending on model, device, build, input batch sizes, threading etc.
 #
 # 3. Conclusion
 # -------------
-# This tutorial introduces the api for post training dynamic quantization in FX Graph Mode, 
-# which dynamically quantizes the same modules as Eager Mode Quantization.
+# This tutorial introduces the api for post training dynamic quantization in FX Graph Mode,
+# which dynamically quantizes the same modules as Eager Mode Quantization.
\ No newline at end of file
diff --git a/prototype_source/fx_graph_mode_ptq_static.rst b/prototype_source/fx_graph_mode_ptq_static.rst
index 812c9d23f..1a4865dbd 100644
--- a/prototype_source/fx_graph_mode_ptq_static.rst
+++ b/prototype_source/fx_graph_mode_ptq_static.rst
@@ -1,64 +1,67 @@
-(prototype) FX Graph Mode Post Training Static Quantization 
-=========================================================== 
-**Author**: `Jerry Zhang <https://github.com/jerryzh168>`_ 
-
-This tutorial introduces the steps to do post training static quantization in graph mode based on   
-`torch.fx <https://github.com/pytorch/pytorch/blob/master/torch/fx/__init__.py>`_.  
-The advantage of FX graph mode quantization is that we can perform quantization fully automatically on the model    
-although there might some effort required to make the model compatible with FX Graph Mode Quantizatiion (symbolically traceable with ``torch.fx``), 
-we'll have a separate tutorial to show how to make the part of the model we want to quantize compatibble with FX Graph Mode Quantization.   
-We also have a tutorial for `FX Graph Mode Post Training Dynamic Quantization <https://pytorch.org/tutorials/prototype/fx_graph_mode_ptq_dynamic.html>`_.
+(prototype) FX Graph Mode Post Training Static Quantization
+===========================================================
+**Author**: `Jerry Zhang <https://github.com/jerryzh168>`_ **Edited by**: `Charles Hernandez <https://github.com/HDCharles>`_
+
+This tutorial introduces the steps to do post training static quantization in graph mode based on
+`torch.fx <https://github.com/pytorch/pytorch/blob/master/torch/fx/__init__.py>`_.
+The advantage of FX graph mode quantization is that we can perform quantization fully automatically on the model.
+Although there might be some effort required to make the model compatible with FX Graph Mode Quantization (symbolically traceable with ``torch.fx``),
+we'll have a separate tutorial to show how to make the part of the model we want to quantize compatible with FX Graph Mode Quantization.
+We also have a tutorial for `FX Graph Mode Post Training Dynamic Quantization <https://tutorials.pytorch.kr/prototype/fx_graph_mode_ptq_dynamic.html>`_.
 tldr; The FX Graph Mode API looks like the following:
 
 .. code:: python
 
-  import torch    
+  import torch
   from torch.ao.quantization import get_default_qconfig
   from torch.ao.quantization.quantize_fx import prepare_fx, convert_fx
-  float_model.eval()  
-  qconfig = get_default_qconfig("fbgemm") 
-  qconfig_dict = {"": qconfig}    
-  def calibrate(model, data_loader):  
-      model.eval()    
-      with torch.no_grad():   
-          for image, target in data_loader:   
-              model(image)    
-  prepared_model = prepare_fx(float_model, qconfig_dict)  # fuse modules and insert observers 
-  calibrate(prepared_model, data_loader_test)  # run calibration on sample data   
-  quantized_model = convert_fx(prepared_model)  # convert the calibrated model to a quantized model   
-
-
- 
-1. Motivation of FX Graph Mode Quantization   
--------------------------------------------   
-  
-Currently PyTorch only has eager mode quantization: `Static Quantization with Eager Mode in PyTorch <https://pytorch.org/tutorials/advanced/static_quantization_tutorial.html>`_. 
-  
-We can see there are multiple manual steps involved in the process, including:    
-  
-- Explicitly quantize and dequantize activations, this is time consuming when floating point and quantized operations are mixed in a model.   
-- Explicitly fuse modules, this requires manually identifying the sequence of convolutions, batch norms and relus and other fusion patterns.  
-- Special handling is needed for pytorch tensor operations (like add, concat etc.)    
-- Functionals did not have first class support (functional.conv2d and functional.linear would not get quantized)  
-  
-Most of these required modifications comes from the underlying limitations of eager mode quantization. Eager mode works in module level since it can not inspect the code that is actually run (in the forward function), quantization is achieved by module swapping, and we don’t know how the modules are used in forward function in eager mode, so it requires users to insert QuantStub and DeQuantStub manually to mark the points they want to quantize or dequantize.    
-In graph mode, we can inspect the actual code that’s been executed in forward function (e.g. aten function calls) and quantization is achieved by module and graph manipulations. Since graph mode has full visibility of the code that is run, our tool is able to automatically figure out things like which modules to fuse and where to insert observer calls, quantize/dequantize functions etc., we are able to automate the whole quantization process.    
-  
-Advantages of FX Graph Mode Quantization are: 
-  
-- Simple quantization flow, minimal manual steps  
-- Unlocks the possibility of doing higher level optimizations like automatic precision selection  
-  
-2. Define Helper Functions and Prepare Dataset    
-----------------------------------------------    
-  
-We’ll start by doing the necessary imports, defining some helper functions and prepare the data.  
-These steps are identitcal to `Static Quantization with Eager Mode in PyTorch <https://pytorch.org/tutorials/advanced/static_quantization_tutorial.html>`_.       
+  from torch.ao.quantization import QConfigMapping
+  float_model.eval()
+  # The old 'fbgemm' is still available but 'x86' is the recommended default.
+  qconfig = get_default_qconfig("x86")
+  qconfig_mapping = QConfigMapping().set_global(qconfig)
+  def calibrate(model, data_loader):
+      model.eval()
+      with torch.no_grad():
+          for image, target in data_loader:
+              model(image)
+  example_inputs = (next(iter(data_loader))[0]) # get an example input
+  prepared_model = prepare_fx(float_model, qconfig_mapping, example_inputs)  # fuse modules and insert observers
+  calibrate(prepared_model, data_loader_test)  # run calibration on sample data
+  quantized_model = convert_fx(prepared_model)  # convert the calibrated model to a quantized model
+
+
+
+1. Motivation of FX Graph Mode Quantization
+-------------------------------------------
+
+Currently, PyTorch only has eager mode quantization as an alternative: `Static Quantization with Eager Mode in PyTorch <https://tutorials.pytorch.kr/advanced/static_quantization_tutorial.html>`_.
+
+We can see there are multiple manual steps involved in the eager mode quantization process, including:
+
+- Explicitly quantize and dequantize activations-this is time consuming when floating point and quantized operations are mixed in a model.
+- Explicitly fuse modules-this requires manually identifying the sequence of convolutions, batch norms and relus and other fusion patterns.
+- Special handling is needed for pytorch tensor operations (like add, concat etc.)
+- Functionals did not have first class support (functional.conv2d and functional.linear would not get quantized)
+
+Most of these required modifications comes from the underlying limitations of eager mode quantization. Eager mode works in module level since it can not inspect the code that is actually run (in the forward function), quantization is achieved by module swapping, and we don’t know how the modules are used in forward function in eager mode, so it requires users to insert QuantStub and DeQuantStub manually to mark the points they want to quantize or dequantize.
+In graph mode, we can inspect the actual code that’s been executed in forward function (e.g. aten function calls) and quantization is achieved by module and graph manipulations. Since graph mode has full visibility of the code that is run, our tool is able to automatically figure out things like which modules to fuse and where to insert observer calls, quantize/dequantize functions etc., we are able to automate the whole quantization process.
+
+Advantages of FX Graph Mode Quantization are:
+
+- Simple quantization flow, minimal manual steps
+- Unlocks the possibility of doing higher level optimizations like automatic precision selection
+
+2. Define Helper Functions and Prepare Dataset
+----------------------------------------------
+
+We’ll start by doing the necessary imports, defining some helper functions and prepare the data.
+These steps are identitcal to `Static Quantization with Eager Mode in PyTorch <https://tutorials.pytorch.kr/advanced/static_quantization_tutorial.html>`_.
 
 To run the code in this tutorial using the entire ImageNet dataset, first download imagenet by following the instructions at here `ImageNet Data <http://www.image-net.org/download>`_. Unzip the downloaded file into the 'data_path' folder.
 
-Download the `torchvision resnet18 model <https://download.pytorch.org/models/resnet18-f37072fd.pth>`_ and rename it to  
-``data/resnet18_pretrained_float.pth``.   
+Download the `torchvision resnet18 model <https://download.pytorch.org/models/resnet18-f37072fd.pth>`_ and rename it to
+``data/resnet18_pretrained_float.pth``.
 
 .. code:: python
 
@@ -68,7 +71,7 @@ Download the `torchvision resnet18 model <https://download.pytorch.org/models/re
     import numpy as np
 
     import torch
-    from torch.ao.quantization import get_default_qconfig
+    from torch.ao.quantization import get_default_qconfig, QConfigMapping
     from torch.ao.quantization.quantize_fx import prepare_fx, convert_fx, fuse_fx
     import torch.nn as nn
     from torch.utils.data import DataLoader
@@ -79,96 +82,96 @@ Download the `torchvision resnet18 model <https://download.pytorch.org/models/re
     import torchvision.transforms as transforms
 
     # Set up warnings
-    import warnings 
-    warnings.filterwarnings(    
-        action='ignore',    
-        category=DeprecationWarning,    
-        module=r'.*'    
-    )   
-    warnings.filterwarnings(    
-        action='default',   
+    import warnings
+    warnings.filterwarnings(
+        action='ignore',
+        category=DeprecationWarning,
+        module=r'.*'
+    )
+    warnings.filterwarnings(
+        action='default',
         module=r'torch.ao.quantization'
-    )   
-
-    # Specify random seed for repeatable results    
-    _ = torch.manual_seed(191009)   
-
-
-    class AverageMeter(object): 
-        """Computes and stores the average and current value""" 
-        def __init__(self, name, fmt=':f'): 
-            self.name = name    
-            self.fmt = fmt  
-            self.reset()    
-
-        def reset(self):    
-            self.val = 0    
-            self.avg = 0    
-            self.sum = 0    
-            self.count = 0  
-
-        def update(self, val, n=1): 
-            self.val = val  
-            self.sum += val * n 
-            self.count += n 
-            self.avg = self.sum / self.count    
-
-        def __str__(self):  
-            fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' 
-            return fmtstr.format(**self.__dict__)   
-
-
-    def accuracy(output, target, topk=(1,)):    
-        """Computes the accuracy over the k top predictions for the specified values of k"""    
-        with torch.no_grad():   
-            maxk = max(topk)    
-            batch_size = target.size(0) 
-
-            _, pred = output.topk(maxk, 1, True, True)  
-            pred = pred.t() 
-            correct = pred.eq(target.view(1, -1).expand_as(pred))   
-
-            res = []    
-            for k in topk:  
-                correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)    
-                res.append(correct_k.mul_(100.0 / batch_size))  
-            return res  
-
-
-    def evaluate(model, criterion, data_loader):    
-        model.eval()    
-        top1 = AverageMeter('Acc@1', ':6.2f')   
-        top5 = AverageMeter('Acc@5', ':6.2f')   
-        cnt = 0 
-        with torch.no_grad():   
-            for image, target in data_loader:   
-                output = model(image)   
-                loss = criterion(output, target)    
-                cnt += 1    
-                acc1, acc5 = accuracy(output, target, topk=(1, 5))  
-                top1.update(acc1[0], image.size(0)) 
-                top5.update(acc5[0], image.size(0)) 
-        print('')   
-
-        return top1, top5   
-
-    def load_model(model_file): 
-        model = resnet18(pretrained=False)  
-        state_dict = torch.load(model_file) 
-        model.load_state_dict(state_dict)   
-        model.to("cpu") 
-        return model    
-
-    def print_size_of_model(model): 
-        if isinstance(model, torch.jit.RecursiveScriptModule):  
-            torch.jit.save(model, "temp.p") 
-        else:   
-            torch.jit.save(torch.jit.script(model), "temp.p")   
-        print("Size (MB):", os.path.getsize("temp.p")/1e6)  
-        os.remove("temp.p") 
-
-    def prepare_data_loaders(data_path):    
-        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],  
+    )
+
+    # Specify random seed for repeatable results
+    _ = torch.manual_seed(191009)
+
+
+    class AverageMeter(object):
+        """Computes and stores the average and current value"""
+        def __init__(self, name, fmt=':f'):
+            self.name = name
+            self.fmt = fmt
+            self.reset()
+
+        def reset(self):
+            self.val = 0
+            self.avg = 0
+            self.sum = 0
+            self.count = 0
+
+        def update(self, val, n=1):
+            self.val = val
+            self.sum += val * n
+            self.count += n
+            self.avg = self.sum / self.count
+
+        def __str__(self):
+            fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
+            return fmtstr.format(**self.__dict__)
+
+
+    def accuracy(output, target, topk=(1,)):
+        """Computes the accuracy over the k top predictions for the specified values of k"""
+        with torch.no_grad():
+            maxk = max(topk)
+            batch_size = target.size(0)
+
+            _, pred = output.topk(maxk, 1, True, True)
+            pred = pred.t()
+            correct = pred.eq(target.view(1, -1).expand_as(pred))
+
+            res = []
+            for k in topk:
+                correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
+                res.append(correct_k.mul_(100.0 / batch_size))
+            return res
+
+
+    def evaluate(model, criterion, data_loader):
+        model.eval()
+        top1 = AverageMeter('Acc@1', ':6.2f')
+        top5 = AverageMeter('Acc@5', ':6.2f')
+        cnt = 0
+        with torch.no_grad():
+            for image, target in data_loader:
+                output = model(image)
+                loss = criterion(output, target)
+                cnt += 1
+                acc1, acc5 = accuracy(output, target, topk=(1, 5))
+                top1.update(acc1[0], image.size(0))
+                top5.update(acc5[0], image.size(0))
+        print('')
+
+        return top1, top5
+
+    def load_model(model_file):
+        model = resnet18(pretrained=False)
+        state_dict = torch.load(model_file)
+        model.load_state_dict(state_dict)
+        model.to("cpu")
+        return model
+
+    def print_size_of_model(model):
+        if isinstance(model, torch.jit.RecursiveScriptModule):
+            torch.jit.save(model, "temp.p")
+        else:
+            torch.jit.save(torch.jit.script(model), "temp.p")
+        print("Size (MB):", os.path.getsize("temp.p")/1e6)
+        os.remove("temp.p")
+
+    def prepare_data_loaders(data_path):
+        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                          std=[0.229, 0.224, 0.225])
         dataset = torchvision.datasets.ImageNet(
             data_path, split="train", transform=transforms.Compose([
@@ -185,140 +188,112 @@ Download the `torchvision resnet18 model <https://download.pytorch.org/models/re
                 normalize,
             ]))
 
-        train_sampler = torch.utils.data.RandomSampler(dataset) 
-        test_sampler = torch.utils.data.SequentialSampler(dataset_test) 
+        train_sampler = torch.utils.data.RandomSampler(dataset)
+        test_sampler = torch.utils.data.SequentialSampler(dataset_test)
 
-        data_loader = torch.utils.data.DataLoader(  
-            dataset, batch_size=train_batch_size,   
-            sampler=train_sampler)  
+        data_loader = torch.utils.data.DataLoader(
+            dataset, batch_size=train_batch_size,
+            sampler=train_sampler)
 
-        data_loader_test = torch.utils.data.DataLoader( 
-            dataset_test, batch_size=eval_batch_size,   
-            sampler=test_sampler)   
+        data_loader_test = torch.utils.data.DataLoader(
+            dataset_test, batch_size=eval_batch_size,
+            sampler=test_sampler)
 
-        return data_loader, data_loader_test    
+        return data_loader, data_loader_test
 
     data_path = '~/.data/imagenet'
-    saved_model_dir = 'data/'   
-    float_model_file = 'resnet18_pretrained_float.pth'  
+    saved_model_dir = 'data/'
+    float_model_file = 'resnet18_pretrained_float.pth'
 
     train_batch_size = 30
     eval_batch_size = 50
 
-    data_loader, data_loader_test = prepare_data_loaders(data_path) 
-    criterion = nn.CrossEntropyLoss()   
-    float_model = load_model(saved_model_dir + float_model_file).to("cpu")  
-    float_model.eval()      
+    data_loader, data_loader_test = prepare_data_loaders(data_path)
+    example_inputs = (next(iter(data_loader))[0])
+    criterion = nn.CrossEntropyLoss()
+    float_model = load_model(saved_model_dir + float_model_file).to("cpu")
+    float_model.eval()
 
-    # deepcopy the model since we need to keep the original model around    
-    import copy 
-    model_to_quantize = copy.deepcopy(float_model)  
+    # deepcopy the model since we need to keep the original model around
+    import copy
+    model_to_quantize = copy.deepcopy(float_model)
 
-3. Set model to eval mode 
-------------------------- 
+3. Set model to eval mode
+-------------------------
 For post training quantization, we'll need to set model to eval mode.
 
 .. code:: python
 
-    model_to_quantize.eval()    
-
-  
-4. Specify how to quantize the model with ``qconfig_dict``    
-----------------------------------------------------------    
-  
-.. code:: python  
-  
-  qconfig_dict = {"" : default_qconfig}   
-  
-We use the same qconfig used in eager mode quantization, ``qconfig`` is just a named tuple    
-of the observers for activation and weight. ``qconfig_dict`` is a dictionary with the following configurations:   
-  
-.. code:: python  
-  
-  qconfig = { 
-      "" : qconfig_global,
-      "sub" : qconfig_sub,    
-      "sub.fc" : qconfig_fc,  
-      "sub.conv": None    
-  }   
-  qconfig_dict = {    
-      # qconfig? means either a valid qconfig or None 
-      # optional, global config   
-      "": qconfig?,   
-      # optional, used for module and function types  
-      # could also be split into module_types and function_types if we prefer 
-      "object_type": [    
-          (torch.nn.Conv2d, qconfig?),    
-          (torch.nn.functional.add, qconfig?),    
-          ...,    
-      ],  
-      # optional, used for module names   
-      "module_name": [    
-          ("foo.bar", qconfig?)   
-          ...,    
-      ],  
-      # optional, matched in order, first match takes precedence  
-      "module_name_regex": [  
-          ("foo.*bar.*conv[0-9]+", qconfig?)  
-          ...,    
-      ],  
-      # priority (in increasing order): global, object_type, module_name_regex, module_name   
-      # qconfig == None means fusion and quantization should be skipped for anything  
-      # matching the rule 
-      
-      # **api subject to change** 
-      # optional: specify the path for standalone modules 
-      # These modules are symbolically traced and quantized as one unit   
-      # so that the call to the submodule appears as one call_module  
-      # node in the forward graph of the GraphModule  
-      "standalone_module_name": [ 
-          "submodule.standalone"  
-      ],  
-      "standalone_module_class": [    
-          StandaloneModuleClass   
-      ]   
-  }   
-  
-Utility functions related to ``qconfig`` can be found in the `qconfig <https://github.com/pytorch/pytorch/blob/master/torch/ao/quantization/qconfig.py>`_ file.
+    model_to_quantize.eval()
+
+
+4. Specify how to quantize the model with ``QConfigMapping``
+----------------------------------------------------------
+
+.. code:: python
+
+  qconfig_mapping = QConfigMapping.set_global(default_qconfig)
+
+We use the same qconfig used in eager mode quantization, ``qconfig`` is just a named tuple
+of the observers for activation and weight. ``QConfigMapping`` contains mapping information from ops to qconfigs:
+
+.. code:: python
+
+  qconfig_mapping = (QConfigMapping()
+      .set_global(qconfig_opt)  # qconfig_opt is an optional qconfig, either a valid qconfig or None
+      .set_object_type(torch.nn.Conv2d, qconfig_opt) # can be a callable...
+      .set_object_type("torch.nn.functional.add", qconfig_opt) # ...or a string of the class name
+      .set_module_name_regex("foo.*bar.*conv[0-9]+", qconfig_opt) # matched in order, first match takes precedence
+      .set_module_name("foo.bar", qconfig_opt)
+      .set_module_name_object_type_order()
+  )
+      # priority (in increasing order): global, object_type, module_name_regex, module_name
+      # qconfig == None means fusion and quantization should be skipped for anything
+      # matching the rule (unless a higher priority match is found)
+
+
+Utility functions related to ``qconfig`` can be found in the `qconfig <https://github.com/pytorch/pytorch/blob/master/torch/ao/quantization/qconfig.py>`_ file
+while those for ``QConfigMapping`` can be found in the `qconfig_mapping <https://github.com/pytorch/pytorch/blob/master/torch/ao/quantization/fx/qconfig_mapping.py>`
 
 .. code:: python
 
-    qconfig = get_default_qconfig("fbgemm") 
-    qconfig_dict = {"": qconfig}    
+    # The old 'fbgemm' is still available but 'x86' is the recommended default.
+    qconfig = get_default_qconfig("x86")
+    qconfig_mapping = QConfigMapping().set_global(qconfig)
 
-5. Prepare the Model for Post Training Static Quantization    
-----------------------------------------------------------    
-  
-.. code:: python  
-  
-    prepared_model = prepare_fx(model_to_quantize, qconfig_dict)  
-  
-prepare_fx folds BatchNorm modules into previous Conv2d modules, and insert observers     
-in appropriate places in the model.   
+5. Prepare the Model for Post Training Static Quantization
+----------------------------------------------------------
 
 .. code:: python
 
-    prepared_model = prepare_fx(model_to_quantize, qconfig_dict)
-    print(prepared_model.graph) 
+    prepared_model = prepare_fx(model_to_quantize, qconfig_mapping, example_inputs)
 
-6. Calibration    
---------------    
-Calibration function is run after the observers are inserted in the model.    
-The purpose for calibration is to run through some sample examples that is representative of the workload     
-(for example a sample of the training data set) so that the observers in the model are able to observe    
-the statistics of the Tensors and we can later use this information to calculate quantization parameters. 
+prepare_fx folds BatchNorm modules into previous Conv2d modules, and insert observers
+in appropriate places in the model.
 
 .. code:: python
 
-    def calibrate(model, data_loader):  
-        model.eval()    
-        with torch.no_grad():   
-            for image, target in data_loader:   
-                model(image)    
-    calibrate(prepared_model, data_loader_test)  # run calibration on sample data   
+    prepared_model = prepare_fx(model_to_quantize, qconfig_mapping, example_inputs)
+    print(prepared_model.graph)
+
+6. Calibration
+--------------
+Calibration function is run after the observers are inserted in the model.
+The purpose for calibration is to run through some sample examples that is representative of the workload
+(for example a sample of the training data set) so that the observers in the model are able to observe
+the statistics of the Tensors and we can later use this information to calculate quantization parameters.
 
-7. Convert the Model to a Quantized Model 
------------------------------------------ 
+.. code:: python
+
+    def calibrate(model, data_loader):
+        model.eval()
+        with torch.no_grad():
+            for image, target in data_loader:
+                model(image)
+    calibrate(prepared_model, data_loader_test)  # run calibration on sample data
+
+7. Convert the Model to a Quantized Model
+-----------------------------------------
 ``convert_fx`` takes a calibrated model and produces a quantized model.
 
 .. code:: python
@@ -326,111 +301,111 @@ the statistics of the Tensors and we can later use this information to calculate
     quantized_model = convert_fx(prepared_model)
     print(quantized_model)
 
-8. Evaluation 
-------------- 
-We can now print the size and accuracy of the quantized model.    
+8. Evaluation
+-------------
+We can now print the size and accuracy of the quantized model.
 
 .. code:: python
 
-    print("Size of model before quantization")  
-    print_size_of_model(float_model)    
-    print("Size of model after quantization")   
-    print_size_of_model(quantized_model)    
-    top1, top5 = evaluate(quantized_model, criterion, data_loader_test) 
-    print("[before serilaization] Evaluation accuracy on test dataset: %2.2f, %2.2f"%(top1.avg, top5.avg))  
-
-    fx_graph_mode_model_file_path = saved_model_dir + "resnet18_fx_graph_mode_quantized.pth"    
-
-    # this does not run due to some erros loading convrelu module:  
-    # ModuleAttributeError: 'ConvReLU2d' object has no attribute '_modules' 
-    # save the whole model directly 
-    # torch.save(quantized_model, fx_graph_mode_model_file_path)    
-    # loaded_quantized_model = torch.load(fx_graph_mode_model_file_path)    
-
-    # save with state_dict  
-    # torch.save(quantized_model.state_dict(), fx_graph_mode_model_file_path)   
-    # import copy   
-    # model_to_quantize = copy.deepcopy(float_model)    
-    # prepared_model = prepare_fx(model_to_quantize, {"": qconfig}) 
-    # loaded_quantized_model = convert_fx(prepared_model)   
-    # loaded_quantized_model.load_state_dict(torch.load(fx_graph_mode_model_file_path)) 
-
-    # save with script  
-    torch.jit.save(torch.jit.script(quantized_model), fx_graph_mode_model_file_path)    
-    loaded_quantized_model = torch.jit.load(fx_graph_mode_model_file_path)  
-
-    top1, top5 = evaluate(loaded_quantized_model, criterion, data_loader_test)  
-    print("[after serialization/deserialization] Evaluation accuracy on test dataset: %2.2f, %2.2f"%(top1.avg, top5.avg))   
-  
-If you want to get better accuracy or performance,  try changing the `qconfig_dict`.  
-We plan to add support for graph mode in the Numerical Suite so that you can  
-easily determine the sensitivity towards quantization of different modules in a model: `PyTorch Numeric Suite Tutorial <https://pytorch.org/tutorials/prototype/numeric_suite_tutorial.html>`_    
-  
-9. Debugging Quantized Model  
-----------------------------  
-We can also print the weight for quantized an un-quantized conv to see the difference,    
-we'll first call fuse explicitly to fuse the conv and bn in the model:    
-Note that ``fuse_fx`` only works in eval mode.    
+    print("Size of model before quantization")
+    print_size_of_model(float_model)
+    print("Size of model after quantization")
+    print_size_of_model(quantized_model)
+    top1, top5 = evaluate(quantized_model, criterion, data_loader_test)
+    print("[before serilaization] Evaluation accuracy on test dataset: %2.2f, %2.2f"%(top1.avg, top5.avg))
+
+    fx_graph_mode_model_file_path = saved_model_dir + "resnet18_fx_graph_mode_quantized.pth"
+
+    # this does not run due to some erros loading convrelu module:
+    # ModuleAttributeError: 'ConvReLU2d' object has no attribute '_modules'
+    # save the whole model directly
+    # torch.save(quantized_model, fx_graph_mode_model_file_path)
+    # loaded_quantized_model = torch.load(fx_graph_mode_model_file_path)
+
+    # save with state_dict
+    # torch.save(quantized_model.state_dict(), fx_graph_mode_model_file_path)
+    # import copy
+    # model_to_quantize = copy.deepcopy(float_model)
+    # prepared_model = prepare_fx(model_to_quantize, {"": qconfig})
+    # loaded_quantized_model = convert_fx(prepared_model)
+    # loaded_quantized_model.load_state_dict(torch.load(fx_graph_mode_model_file_path))
+
+    # save with script
+    torch.jit.save(torch.jit.script(quantized_model), fx_graph_mode_model_file_path)
+    loaded_quantized_model = torch.jit.load(fx_graph_mode_model_file_path)
+
+    top1, top5 = evaluate(loaded_quantized_model, criterion, data_loader_test)
+    print("[after serialization/deserialization] Evaluation accuracy on test dataset: %2.2f, %2.2f"%(top1.avg, top5.avg))
+
+If you want to get better accuracy or performance,  try changing the `qconfig_mapping`.
+We plan to add support for graph mode in the Numerical Suite so that you can
+easily determine the sensitivity towards quantization of different modules in a model. For more information, see `PyTorch Numeric Suite Tutorial <https://tutorials.pytorch.kr/prototype/numeric_suite_tutorial.html>`_
+
+9. Debugging Quantized Model
+----------------------------
+We can also print the weight for quantized a non-quantized convolution op to see the difference,
+we'll first call fuse explicitly to fuse the convolution and batch norm in the model:
+Note that ``fuse_fx`` only works in eval mode.
 
 .. code:: python
 
-    fused = fuse_fx(float_model)    
+    fused = fuse_fx(float_model)
+
+    conv1_weight_after_fuse = fused.conv1[0].weight[0]
+    conv1_weight_after_quant = quantized_model.conv1.weight().dequantize()[0]
 
-    conv1_weight_after_fuse = fused.conv1[0].weight[0]  
-    conv1_weight_after_quant = quantized_model.conv1.weight().dequantize()[0]   
+    print(torch.max(abs(conv1_weight_after_fuse - conv1_weight_after_quant)))
 
-    print(torch.max(abs(conv1_weight_after_fuse - conv1_weight_after_quant)))   
-  
-10. Comparison with Baseline Float Model and Eager Mode Quantization  
---------------------------------------------------------------------   
+10. Comparison with Baseline Float Model and Eager Mode Quantization
+--------------------------------------------------------------------
 
 .. code:: python
 
-    scripted_float_model_file = "resnet18_scripted.pth" 
+    scripted_float_model_file = "resnet18_scripted.pth"
 
-    print("Size of baseline model") 
-    print_size_of_model(float_model)    
+    print("Size of baseline model")
+    print_size_of_model(float_model)
 
-    top1, top5 = evaluate(float_model, criterion, data_loader_test) 
-    print("Baseline Float Model Evaluation accuracy: %2.2f, %2.2f"%(top1.avg, top5.avg))    
-    torch.jit.save(torch.jit.script(float_model), saved_model_dir + scripted_float_model_file)  
-  
-In this section we compare the model quantized with FX graph mode quantization with the model     
-quantized in eager mode. FX graph mode and eager mode produce very similar quantized models,  
-so the expectation is that the accuracy and speedup are similar as well.  
+    top1, top5 = evaluate(float_model, criterion, data_loader_test)
+    print("Baseline Float Model Evaluation accuracy: %2.2f, %2.2f"%(top1.avg, top5.avg))
+    torch.jit.save(torch.jit.script(float_model), saved_model_dir + scripted_float_model_file)
+
+In this section, we compare the model quantized with FX graph mode quantization with the model
+quantized in eager mode. FX graph mode and eager mode produce very similar quantized models,
+so the expectation is that the accuracy and speedup are similar as well.
 
 .. code:: python
 
-    print("Size of Fx graph mode quantized model")  
-    print_size_of_model(quantized_model)    
-    top1, top5 = evaluate(quantized_model, criterion, data_loader_test) 
-    print("FX graph mode quantized model Evaluation accuracy on test dataset: %2.2f, %2.2f"%(top1.avg, top5.avg))   
-
-    from torchvision.models.quantization.resnet import resnet18 
-    eager_quantized_model = resnet18(pretrained=True, quantize=True).eval() 
-    print("Size of eager mode quantized model") 
-    eager_quantized_model = torch.jit.script(eager_quantized_model) 
-    print_size_of_model(eager_quantized_model)  
-    top1, top5 = evaluate(eager_quantized_model, criterion, data_loader_test)   
-    print("eager mode quantized model Evaluation accuracy on test dataset: %2.2f, %2.2f"%(top1.avg, top5.avg))  
-    eager_mode_model_file = "resnet18_eager_mode_quantized.pth" 
-    torch.jit.save(eager_quantized_model, saved_model_dir + eager_mode_model_file)  
-  
-We can see that the model size and accuracy of FX graph mode and eager mode quantized model are pretty similar.   
-  
-Running the model in AIBench (with single threading) gives the following result:  
-  
+    print("Size of Fx graph mode quantized model")
+    print_size_of_model(quantized_model)
+    top1, top5 = evaluate(quantized_model, criterion, data_loader_test)
+    print("FX graph mode quantized model Evaluation accuracy on test dataset: %2.2f, %2.2f"%(top1.avg, top5.avg))
+
+    from torchvision.models.quantization.resnet import resnet18
+    eager_quantized_model = resnet18(pretrained=True, quantize=True).eval()
+    print("Size of eager mode quantized model")
+    eager_quantized_model = torch.jit.script(eager_quantized_model)
+    print_size_of_model(eager_quantized_model)
+    top1, top5 = evaluate(eager_quantized_model, criterion, data_loader_test)
+    print("eager mode quantized model Evaluation accuracy on test dataset: %2.2f, %2.2f"%(top1.avg, top5.avg))
+    eager_mode_model_file = "resnet18_eager_mode_quantized.pth"
+    torch.jit.save(eager_quantized_model, saved_model_dir + eager_mode_model_file)
+
+We can see that the model size and accuracy of FX graph mode and eager mode quantized model are pretty similar.
+
+Running the model in AIBench (with single threading) gives the following result:
+
 .. code::
-  
-  Scripted Float Model:   
-  Self CPU time total: 192.48ms   
-  
-  Scripted Eager Mode Quantized Model:    
-  Self CPU time total: 50.76ms    
-  
-  Scripted FX Graph Mode Quantized Model: 
-  Self CPU time total: 50.63ms    
-  
-As we can see for resnet18 both FX graph mode and eager mode quantized model get similar speed up over the floating point model,  
-which is around 2-4x faster than the floating point model. But the actual speedup over floating point model may vary  
-depending on model, device, build, input batch sizes, threading etc.
+
+  Scripted Float Model:
+  Self CPU time total: 192.48ms
+
+  Scripted Eager Mode Quantized Model:
+  Self CPU time total: 50.76ms
+
+  Scripted FX Graph Mode Quantized Model:
+  Self CPU time total: 50.63ms
+
+As we can see for resnet18 both FX graph mode and eager mode quantized model get similar speedup over the floating point model,
+which is around 2-4x faster than the floating point model. But the actual speedup over floating point model may vary
+depending on model, device, build, input batch sizes, threading etc.
\ No newline at end of file
diff --git a/prototype_source/fx_graph_mode_quant_guide.rst b/prototype_source/fx_graph_mode_quant_guide.rst
index e858c64d7..bb360861b 100644
--- a/prototype_source/fx_graph_mode_quant_guide.rst
+++ b/prototype_source/fx_graph_mode_quant_guide.rst
@@ -3,13 +3,13 @@
 
 **Author**: `Jerry Zhang <https://github.com/jerryzh168>`_
 
-FX Graph Mode Quantization requires a symbolically traceable model. 
+FX Graph Mode Quantization requires a symbolically traceable model.
 We use the FX framework (TODO: link) to convert a symbolically traceable nn.Module instance to IR,
-and we operate on the IR to execute the quantization passes.  
+and we operate on the IR to execute the quantization passes.
 Please post your question about symbolically tracing your model in `PyTorch Discussion Forum <https://discuss.pytorch.org/c/quantization/17>`_
 
-Quantization will only work on the symbolically traceable parts of your model. 
-Data dependent control flow (if statements / for loops etc using symbolically traced values) are one common pattern which is not supported. 
+Quantization will only work on the symbolically traceable parts of your model.
+The data dependent control flow-if statements / for loops, and so on using symbolically traced values-are one common pattern which is not supported.
 If your model is not symbolically traceable end to end, you have a couple of options to enable FX Graph Mode Quantization only on a part of the model.
 You can use any combination of these options:
 
@@ -27,7 +27,7 @@ If the code that is not symbolically traceable does not need to be quantized, we
 to run FX Graph Mode Quantization:
 1.a. Symbolically trace only the code that needs to be quantized
 -----------------------------------------------------------------
-When the whole model is not symbolically traceable but the submodule we want to quantize is 
+When the whole model is not symbolically traceable but the submodule we want to quantize is
 symbolically traceable, we can run quantization only on that submodule.
 before:
 
@@ -38,7 +38,7 @@ before:
           x = traceable_code(x)
           x = non_traceable_code_2(x)
           return x
-   
+
 after:
 
 .. code:: python
@@ -46,7 +46,7 @@ after:
       def forward(self, x):
           x = traceable_code(x)
           return x
-       
+
   class M(nn.Module):
       def __init__(self):
           self.traceable_submodule = FP32Traceable(...)
@@ -61,9 +61,9 @@ quantization code:
 
 .. code:: python
 
-  qconfig_dict = {"": qconfig}
+  qconfig_mapping = QConfigMapping().set_global(qconfig)
   model_fp32.traceable_submodule = \
-    prepare_fx(model_fp32.traceable_submodule, qconfig_dict)
+    prepare_fx(model_fp32.traceable_submodule, qconfig_mapping, example_inputs)
 
 Note if original model needs to be preserved, you will have to
 copy it yourself before calling the quantization APIs.
@@ -72,7 +72,7 @@ copy it yourself before calling the quantization APIs.
 #####################################################
 1.b. Skip symbolically trace the non-traceable code
 ---------------------------------------------------
-When we have some non-traceable code in the module, and this part of code doesn’t need to be quantized, 
+When we have some non-traceable code in the module, and this part of code doesn’t need to be quantized,
 we can factor out this part of the code into a submodule and skip symbolically trace that submodule.
 
 
@@ -98,7 +98,7 @@ after, non-traceable parts moved to a module and marked as a leaf
       def forward(self, x):
           x = non_traceable_code(x)
           return x
-   
+
   class M(nn.Module):
 
       def __init__(self):
@@ -117,7 +117,7 @@ quantization code:
 
 .. code:: python
 
-  qconfig_dict = {"": qconfig}
+  qconfig_mapping = QConfigMapping.set_global(qconfig)
 
   prepare_custom_config_dict = {
       # option 1
@@ -126,8 +126,9 @@ quantization code:
       "non_traceable_module_class": [MNonTraceable],
   }
   model_prepared = prepare_fx(
-      model_fp32, 
-      qconfig_dict,
+      model_fp32,
+      qconfig_mapping,
+      example_inputs,
       prepare_custom_config_dict=prepare_custom_config_dict,
   )
 
@@ -136,7 +137,7 @@ If the code that is not symbolically traceable needs to be quantized, we have th
 ##########################################################
 2.a Refactor your code to make it symbolically traceable
 --------------------------------------------------------
-If it is easy to refactor the code and make the code symbolically traceable, 
+If it is easy to refactor the code and make the code symbolically traceable,
 we can refactor the code and remove the use of non-traceable constructs in python.
 
 More information about symbolic tracing support can be found in: (TODO: link)
@@ -150,8 +151,8 @@ before:
       x = x.view(*new_x_shape)
       return x.permute(0, 2, 1, 3)
 
-   
-This is not symbolically traceable because in x.view(*new_x_shape) 
+
+This is not symbolically traceable because in x.view(*new_x_shape)
 unpacking is not supported, however, it is easy to remove the unpacking
 since x.view also supports list input.
 
@@ -177,10 +178,10 @@ depends on the model.
 2.b. Write your own observed and quantized submodule
 -----------------------------------------------------
 
-If the non-traceable code can’t be refactored to be symbolically traceable, 
-for example it has some loops that can’t be eliminated, like nn.LSTM, 
-we’ll need to factor out the non-traceable code to a submodule (we call it CustomModule in fx graph mode quantization) and 
-define the observed and quantized version of the submodule (in post training static quantization or quantization aware training for static quantization) 
+If the non-traceable code can’t be refactored to be symbolically traceable,
+for example it has some loops that can’t be eliminated, like nn.LSTM,
+we’ll need to factor out the non-traceable code to a submodule (we call it CustomModule in fx graph mode quantization) and
+define the observed and quantized version of the submodule (in post training static quantization or quantization aware training for static quantization)
 or define the quantized version (in post training dynamic and weight only quantization)
 
 
@@ -228,7 +229,7 @@ to StaticQuantNonTraceable
       @classmethod
       def from_observed(cls, ...):
           ...
- 
+
 
 .. code:: python
 
@@ -245,7 +246,7 @@ to StaticQuantNonTraceable
           x = self.non_traceable_submodule(x)
           x = self.traceable_code_1(x)
           return x
-   
+
 
 quantization code:
 
@@ -263,8 +264,9 @@ quantization code:
   }
 
   model_prepared = prepare_fx(
-      model_fp32, 
-      qconfig_dict, 
+      model_fp32,
+      qconfig_mapping,
+      example_inputs,
       prepare_custom_config_dict=prepare_custom_config_dict)
 
 calibrate / train (not shown)
@@ -283,7 +285,7 @@ calibrate / train (not shown)
       convert_custom_config_dict)
 
 post training dynamic/weight only quantization
-in these two modes we don't need to observe the original model, so we 
+in these two modes we don't need to observe the original model, so we
 only need to define thee quantized model
 
 .. code:: python
@@ -306,8 +308,9 @@ only need to define thee quantized model
   # The example is for post training quantization
   model_fp32.eval()
   model_prepared = prepare_fx(
-      model_fp32, 
-      qconfig_dict, 
+      model_fp32,
+      qconfig_mapping,
+      example_inputs,
       prepare_custom_config_dict=prepare_custom_config_dict)
 
   convert_custom_config_dict = {
diff --git a/prototype_source/fx_numeric_suite_tutorial.py b/prototype_source/fx_numeric_suite_tutorial.py
deleted file mode 100644
index 922f48ae0..000000000
--- a/prototype_source/fx_numeric_suite_tutorial.py
+++ /dev/null
@@ -1,231 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-PyTorch FX Numeric Suite Core APIs Tutorial
-===========================================
-
-Introduction
-------------
-
-Quantization is good when it works, but it is difficult to know what is wrong
-when it does not satisfy the accuracy we expect. Debugging the accuracy issue
-of quantization is not easy and time-consuming.
-
-One important step of debugging is to measure the statistics of the float model
-and its corresponding quantized model to know where they differ most.
-We built a suite of numeric tools called PyTorch FX Numeric Suite Core APIs in
-PyTorch quantization to enable the measurement of the statistics between
-quantized module and float module to support quantization debugging efforts.
-Even for the quantized model with good accuracy, PyTorch FX Numeric Suite Core
-APIs can still be used as the profiling tool to better understand the
-quantization error within the model and provide the guidance for further
-optimization.
-
-PyTorch FX Numeric Suite Core APIs currently supports models quantized through
-both static quantization and dynamic quantization with unified APIs.
-
-In this tutorial we will use MobileNetV2 as an example to show how to use
-PyTorch FX Numeric Suite Core APIs to measure the statistics between static
-quantized model and float model.
-
-Setup
-^^^^^
-We’ll start by doing the necessary imports:
-"""
-
-##############################################################################
-
-# Imports and util functions
-
-import copy
-import torch
-import torchvision
-import torch.quantization
-import torch.ao.ns._numeric_suite_fx as ns
-import torch.quantization.quantize_fx as quantize_fx
-
-import matplotlib.pyplot as plt
-from tabulate import tabulate
-
-torch.manual_seed(0)
-plt.style.use('seaborn-whitegrid')
-
-
-# a simple line graph
-def plot(xdata, ydata, xlabel, ylabel, title):
-    _ = plt.figure(figsize=(10, 5), dpi=100)
-    plt.xlabel(xlabel)
-    plt.ylabel(ylabel)
-    plt.title(title)
-    ax = plt.axes()
-    ax.plot(xdata, ydata)
-    plt.show()
-
-##############################################################################
-# Then we load the pretrained float MobileNetV2 model, and quantize it.
-
-
-# create float model
-mobilenetv2_float = torchvision.models.quantization.mobilenet_v2(
-    pretrained=True, quantize=False).eval()
-
-# create quantized model
-qconfig_dict = {
-    '': torch.quantization.get_default_qconfig('fbgemm'),
-    # adjust the qconfig to make the results more interesting to explore
-    'module_name': [
-        # turn off quantization for the first couple of layers
-        ('features.0', None),
-        ('features.1', None),
-        # use MinMaxObserver for `features.17`, this should lead to worse
-        # weight SQNR
-        ('features.17', torch.quantization.default_qconfig),
-    ]
-}
-# Note: quantization APIs are inplace, so we save a copy of the float model for
-# later comparison to the quantized model. This is done throughout the
-# tutorial.
-datum = torch.randn(1, 3, 224, 224)
-mobilenetv2_prepared = quantize_fx.prepare_fx(
-    copy.deepcopy(mobilenetv2_float), qconfig_dict, (datum,))
-mobilenetv2_prepared(datum)
-# Note: there is a long standing issue that we cannot copy.deepcopy a
-# quantized model. Since quantization APIs are inplace and we need to use
-# different copies of the quantized model throughout this tutorial, we call
-# `convert_fx` on a copy, so we have access to the original `prepared_model`
-# later. This is done throughout the tutorial.
-mobilenetv2_quantized = quantize_fx.convert_fx(
-    copy.deepcopy(mobilenetv2_prepared))
-
-##############################################################################
-# 1. Compare the weights of float and quantized models
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-# The first analysis we can do is comparing the weights of the fp32 model and
-# the int8 model by calculating the SQNR between each pair of weights.
-#
-# The `extract_weights` API can be used to extract weights from linear,
-# convolution and LSTM layers. It works for dynamic quantization as well as
-# PTQ/QAT.
-
-# Note: when comparing weights in models with Conv-BN for PTQ, we need to
-# compare weights after Conv-BN fusion for a proper comparison.  Because of
-# this, we use `prepared_model` instead of `float_model` when comparing
-# weights.
-
-# Extract conv and linear weights from corresponding parts of two models, and
-# save them in `wt_compare_dict`.
-mobilenetv2_wt_compare_dict = ns.extract_weights(
-    'fp32',  # string name for model A
-    mobilenetv2_prepared,  # model A
-    'int8',  # string name for model B
-    mobilenetv2_quantized,  # model B
-)
-
-# calculate SQNR between each pair of weights
-ns.extend_logger_results_with_comparison(
-    mobilenetv2_wt_compare_dict,  # results object to modify inplace
-    'fp32',  # string name of model A (from previous step)
-    'int8',  # string name of model B (from previous step)
-    torch.ao.ns.fx.utils.compute_sqnr,  # tensor comparison function
-    'sqnr',  # the name to use to store the results under
-)
-
-# massage the data into a format easy to graph and print
-mobilenetv2_wt_to_print = []
-for idx, (layer_name, v) in enumerate(mobilenetv2_wt_compare_dict.items()):
-    mobilenetv2_wt_to_print.append([
-        idx,
-        layer_name,
-        v['weight']['int8'][0]['prev_node_target_type'],
-        v['weight']['int8'][0]['values'][0].shape,
-        v['weight']['int8'][0]['sqnr'][0],
-    ])
-
-# plot the SQNR between fp32 and int8 weights for each layer
-plot(
-    [x[0] for x in mobilenetv2_wt_to_print],
-    [x[4] for x in mobilenetv2_wt_to_print],
-    'idx',
-    'sqnr',
-    'weights, idx to sqnr'
-)
-
-##############################################################################
-# Also print out the SQNR, so we can inspect the layer name and type:
-
-print(tabulate(
-    mobilenetv2_wt_to_print,
-    headers=['idx', 'layer_name', 'type', 'shape', 'sqnr']
-))
-
-##############################################################################
-# 2. Compare activations API
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^
-# The second tool allows for comparison of activations between float and
-# quantized models at corresponding locations for the same input.
-#
-# .. figure:: /_static/img/compare_output.png
-#
-# The `add_loggers`/`extract_logger_info` API can be used to to extract
-# activations from any layer with a `torch.Tensor` return type. It works for
-# dynamic quantization as well as PTQ/QAT.
-
-# Compare unshadowed activations
-
-# Create a new copy of the quantized model, because we cannot `copy.deepcopy`
-# a quantized model.
-mobilenetv2_quantized = quantize_fx.convert_fx(
-    copy.deepcopy(mobilenetv2_prepared))
-mobilenetv2_float_ns, mobilenetv2_quantized_ns = ns.add_loggers(
-    'fp32',  # string name for model A
-    copy.deepcopy(mobilenetv2_prepared),  # model A
-    'int8',  # string name for model B
-    mobilenetv2_quantized,  # model B
-    ns.OutputLogger,  # logger class to use
-)
-
-# feed data through network to capture intermediate activations
-mobilenetv2_float_ns(datum)
-mobilenetv2_quantized_ns(datum)
-
-# extract intermediate activations
-mobilenetv2_act_compare_dict = ns.extract_logger_info(
-    mobilenetv2_float_ns,  # model A, with loggers (from previous step)
-    mobilenetv2_quantized_ns,  # model B, with loggers (from previous step)
-    ns.OutputLogger,  # logger class to extract data from
-    'int8',  # string name of model to use for layer names for the output
-)
-
-# add SQNR comparison
-ns.extend_logger_results_with_comparison(
-    mobilenetv2_act_compare_dict,  # results object to modify inplace
-    'fp32',  # string name of model A (from previous step)
-    'int8',  # string name of model B (from previous step)
-    torch.ao.ns.fx.utils.compute_sqnr,  # tensor comparison function
-    'sqnr',  # the name to use to store the results under
-)
-
-# massage the data into a format easy to graph and print
-mobilenet_v2_act_to_print = []
-for idx, (layer_name, v) in enumerate(mobilenetv2_act_compare_dict.items()):
-    mobilenet_v2_act_to_print.append([
-        idx,
-        layer_name,
-        v['node_output']['int8'][0]['prev_node_target_type'],
-        v['node_output']['int8'][0]['values'][0].shape,
-        v['node_output']['int8'][0]['sqnr'][0]])
-
-# plot the SQNR between fp32 and int8 activations for each layer
-plot(
-    [x[0] for x in mobilenet_v2_act_to_print],
-    [x[4] for x in mobilenet_v2_act_to_print],
-    'idx',
-    'sqnr',
-    'unshadowed activations, idx to sqnr',
-)
-
-##############################################################################
-# Also print out the SQNR, so we can inspect the layer name and type:
-print(tabulate(
-    mobilenet_v2_act_to_print,
-    headers=['idx', 'layer_name', 'type', 'shape', 'sqnr']
-))
diff --git a/prototype_source/maskedtensor_adagrad.py b/prototype_source/maskedtensor_adagrad.py
index 445da1e0e..97c7483f3 100644
--- a/prototype_source/maskedtensor_adagrad.py
+++ b/prototype_source/maskedtensor_adagrad.py
@@ -7,8 +7,8 @@
 
 ######################################################################
 # Before working through this tutorial, please review the MaskedTensor
-# `Overview <https://pytorch.org/tutorials/prototype/maskedtensor_overview.html>`__ and
-# `Sparsity <https://pytorch.org/tutorials/prototype/maskedtensor_sparsity.html>`__ tutorials.
+# `Overview <https://tutorials.pytorch.kr/prototype/maskedtensor_overview.html>`__ and
+# `Sparsity <https://tutorials.pytorch.kr/prototype/maskedtensor_sparsity.html>`__ tutorials.
 #
 # Introduction and Motivation
 # ---------------------------
@@ -212,7 +212,7 @@ def _make_sparse(grad, grad_indices, values):
 # ---------------
 #
 # To continue learning more, you can find our final review (for now) on
-# `MaskedTensor Advanced Semantics <https://pytorch.org/tutorials/prototype/maskedtensor_advanced_semantics.html>`__
+# `MaskedTensor Advanced Semantics <https://tutorials.pytorch.kr/prototype/maskedtensor_advanced_semantics.html>`__
 # to see some of the differences in design decisions between :class:`MaskedTensor` and NumPy's MaskedArray, as well
 # as reduction semantics.
 #
diff --git a/prototype_source/maskedtensor_advanced_semantics.py b/prototype_source/maskedtensor_advanced_semantics.py
index 7a0233042..ccc3d90af 100644
--- a/prototype_source/maskedtensor_advanced_semantics.py
+++ b/prototype_source/maskedtensor_advanced_semantics.py
@@ -6,14 +6,14 @@
 """
 
 ######################################################################
-# 
+#
 # Before working on this tutorial, please make sure to review our
-# `MaskedTensor Overview tutorial <https://pytorch.org/tutorials/prototype/maskedtensor_overview.html>`.
+# `MaskedTensor Overview tutorial <https://tutorials.pytorch.kr/prototype/maskedtensor_overview.html>`.
 #
 # The purpose of this tutorial is to help users understand how some of the advanced semantics work
 # and how they came to be. We will focus on two particular ones:
 #
-# *. Differences between MaskedTensor and `NumPy's MaskedArray <https://numpy.org/doc/stable/reference/maskedarray.html>`__  
+# *. Differences between MaskedTensor and `NumPy's MaskedArray <https://numpy.org/doc/stable/reference/maskedarray.html>`__
 # *. Reduction semantics
 #
 # Preparation
@@ -89,7 +89,7 @@
 # Reduction Semantics
 # -------------------
 #
-# Recall in `MaskedTensor's Overview tutorial <https://pytorch.org/tutorials/prototype/maskedtensor_overview.html>`__
+# Recall in `MaskedTensor's Overview tutorial <https://tutorials.pytorch.kr/prototype/maskedtensor_overview.html>`__
 # we discussed "Implementing missing torch.nan* ops". Those are examples of reductions -- operators that remove one
 # (or more) dimensions from a Tensor and then aggregate the result. In this section, we will use reduction semantics
 # to motivate our strict requirements around matching masks from above.
@@ -167,4 +167,4 @@
 # the associative property amongst binary operations), which in turn can necessitate the user
 # to be more intentional with their code at times, but we believe this to be the better move.
 # If you have any thoughts on this, please `let us know <https://github.com/pytorch/pytorch/issues>`__!
-# 
+#
diff --git a/prototype_source/maskedtensor_overview.py b/prototype_source/maskedtensor_overview.py
index 288286936..e65e86d41 100644
--- a/prototype_source/maskedtensor_overview.py
+++ b/prototype_source/maskedtensor_overview.py
@@ -328,6 +328,6 @@
 # ===============
 #
 # To continue learning more, you can find our
-# `MaskedTensor Sparsity tutorial <https://pytorch.org/tutorials/prototype/maskedtensor_sparsity.html>`__
+# `MaskedTensor Sparsity tutorial <https://tutorials.pytorch.kr/prototype/maskedtensor_sparsity.html>`__
 # to see how MaskedTensor enables sparsity and the different storage formats we currently support.
 #
diff --git a/prototype_source/maskedtensor_sparsity.py b/prototype_source/maskedtensor_sparsity.py
index 74024f8e2..0ef0b8f5b 100644
--- a/prototype_source/maskedtensor_sparsity.py
+++ b/prototype_source/maskedtensor_sparsity.py
@@ -7,7 +7,7 @@
 
 ######################################################################
 # Before working on this tutorial, please make sure to review our
-# `MaskedTensor Overview tutorial <https://pytorch.org/tutorials/prototype/maskedtensor_overview.html>`.
+# `MaskedTensor Overview tutorial <https://tutorials.pytorch.kr/prototype/maskedtensor_overview.html>`.
 #
 # Introduction
 # ------------
@@ -117,7 +117,7 @@
 # ^^^^^^
 # `Binary operators <https://pytorch.org/docs/master/masked.html#unary-operators>`__ are also supported, but the
 # input masks from the two masked tensors must match. For more information on why this decision was made, please
-# find our `MaskedTensor: Advanced Semantics tutorial <https://pytorch.org/tutorials/prototype/maskedtensor_advanced_semantics.html>`__.
+# find our `MaskedTensor: Advanced Semantics tutorial <https://tutorials.pytorch.kr/prototype/maskedtensor_advanced_semantics.html>`__.
 #
 # Please find an example below:
 #
@@ -310,6 +310,6 @@
 # ---------------
 #
 # To continue learning more, you can find our
-# `Efficiently writing "sparse" semantics for Adagrad with MaskedTensor tutorial <https://pytorch.org/tutorials/prototype/maskedtensor_adagrad.html>`__
+# `Efficiently writing "sparse" semantics for Adagrad with MaskedTensor tutorial <https://tutorials.pytorch.kr/prototype/maskedtensor_adagrad.html>`__
 # to see an example of how MaskedTensor can simplify existing workflows with native masking semantics.
 #
diff --git a/prototype_source/nestedtensor.py b/prototype_source/nestedtensor.py
index dd7c8cd2c..0d2898cc4 100644
--- a/prototype_source/nestedtensor.py
+++ b/prototype_source/nestedtensor.py
@@ -1,24 +1,24 @@
 """
-Nested Tensors
+
+NestedTensors
 ===============================================================
 
-Nested tensor is very similar to regular tensor, except for the shape:
+NestedTensors are similar to regular tensors, except for their shape:
 
 * for a regular tensor, each dimension has a size
 
-* for a nested tensor, not all dimensions have regular sizes; some of them are jagged
+* for a nestedtensor, not all dimensions have regular sizes; some of them are jagged
 
-Nested tensors are a natural solution for representing sequential data within various domains:
+Nestedtensors are a natural solution for representing sequential data within various domains:
 
-* in NLP, sentences can have variable lengths, so a batch of sentences forms a nested tensor
+* in NLP, sentences can have variable lengths, so a batch of sentences forms a nestedtensor
 
-* in CV, images can have variable shapes, so a batch of images forms a nested tensor
+* in CV, images can have variable shapes, so a batch of images forms a nestedtensor
 
-In this tutorial, we will demonstrate basic usage of nested tensors and motivate their usefulness
+In this tutorial, we will demonstrate basic usage of nestedtensors and motivate their usefulness
 for operating on sequential data of varying lengths with a real-world example.
 
-The nested tensor operations used here have not been released yet.
-You will have to install the latest nightly to run this tutorial.
+NestedTensor are currently a prototype feature and are subject to change.
 """
 
 import torch
@@ -27,31 +27,42 @@
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
 ######################################################################
-# Nested Tensor Initialization
+# NestedTensor Initialization
 # ----------------
 #
 
 ######################################################################
-# From the Python frontend, a nested tensor can be created from a list of tensors.
-nt = torch.nested_tensor([torch.randn((2, 6)), torch.randn((3, 6))], device=device)
-print(nt)
+# From the Python frontend, a nestedtensor can be created from a list of tensors.
+# We denote nt[i] as the ith tensor component of a nestedtensor.
+nt = torch.nested.nested_tensor([torch.arange(12).reshape(
+    2, 6), torch.arange(18).reshape(3, 6)], dtype=torch.float, device=device)
+print(f"{nt=}")
 
 ######################################################################
 # By padding every underlying tensor to the same shape,
-# a nested tensor can be converted to a regular tensor.
-pt = torch.nested.to_padded_tensor(nt, padding=0.0)
-print(pt)
+# a nestedtensor can be converted to a regular tensor.
+padded_out_tensor = torch.nested.to_padded_tensor(nt, padding=0.0)
+print(f"{padded_out_tensor=}")
+
+######################################################################
+# All tensors posses an attribute for determining if they are nested;
+print(f"nt is nested: {nt.is_nested}")
+print(f"padded_out_tensor is nested: {padded_out_tensor.is_nested}")
 
 ######################################################################
-# For practical reasons, conceptually we implement nested tensor
-# as a batch of tensors with different shapes,
+# It is common to construct nestedtensors from batches of irregularly shaped tensors.
 # i.e. dimension 0 is assumed to be the batch dimension.
-# Indexing dimension 0 gives back the underlying tensor.
-print("0th underlying tensor:", nt[0], sep='\n')
-print("last column of 1st underlying tensor:", nt[1, :, -1], sep='\n')
+# Indexing dimension 0 gives back the first underlying tensor component.
+print("First underlying tensor component:", nt[0], sep='\n')
+print("last column of 2nd underlying tensor component:", nt[1, :, -1], sep='\n')
+
+# When indexing a nestedtensor's 0th dimension, the result is a regular tensor.
+print(f"First underlying tensor component is nested: {nt[0].is_nested}")
 
 ######################################################################
-# Slicing in dimension 0 has not been supported yet.
+# An important note is that slicing in dimension 0 has not been supported yet.
+# Which means it not currently possible to construct a view that combines the underlying
+# tensor components.
 
 ######################################################################
 # Nested Tensor Operations
@@ -59,10 +70,10 @@
 #
 
 ######################################################################
-# As each operation must be explicitly implemented for nested tensors,
-# operation coverage for nested tensors is currently narrower than that of regular tensors.
+# As each operation must be explicitly implemented for nestedtensors,
+# operation coverage for nestedtensors is currently narrower than that of regular tensors.
 # For now, only basic operations such as index, dropout, softmax, transpose, reshape, linear, bmm are covered.
-# However, coverage is being expanded rapidly.
+# However, coverage is being expanded.
 # If you need certain operations, please file an `issue <https://github.com/pytorch/pytorch>`__
 # to help us prioritize coverage.
 #
@@ -75,11 +86,11 @@
 # a single dimension may be -1, in which case it is inferred
 # from the remaining dimensions and the number of elements.
 #
-# The semantics for nested tensors are similar, except that -1 no longer infers.
+# The semantics for nestedtensors are similar, except that -1 no longer infers.
 # Instead, it inherits the old size (here 2 for ``nt[0]`` and 3 for ``nt[1]``).
 # -1 is the only legal size to specify for a jagged dimension.
-nt1 = nt.reshape(2, -1, 2, 3)
-print(nt1)
+nt_reshaped = nt.reshape(2, -1, 2, 3)
+print(f"{nt_reshaped=}")
 
 ######################################################################
 # **transpose**
@@ -87,28 +98,28 @@
 # The transpose op is for swapping two dimensions of a tensor.
 # Its full semantics can be found
 # `here <https://pytorch.org/docs/stable/generated/torch.transpose.html>`__.
-# Note that nested tensor dimension 0 is special;
+# Note that for nestedtensors dimension 0 is special;
 # it is assumed to be the batch dimension,
-# so transposes involving nested tensor dimension 0 are forbidden.
-nt2 = nt1.transpose(1, 2)
-print(nt2)
+# so transposes involving nestedtensor dimension 0 are not supported.
+nt_transposed = nt_reshaped.transpose(1, 2)
+print(f"{nt_transposed=}")
 
 ######################################################################
 # **others**
 #
 # Other operations have the same semantics as for regular tensors.
-# Applying the operation on a nested tensor is equivalent to
+# Applying the operation on a nestedtensor is equivalent to
 # applying the operation to the underlying tensor components,
-# with the result being a nested tensor as well.
-nt_mm = torch.nested_tensor([torch.randn((2, 3, 4)), torch.randn((2, 3, 5))], device=device)
-nt3 = torch.matmul(nt2, nt_mm)
-print("matmul:", nt3, sep='\n')
+# with the result being a nestedtensor as well.
+nt_mm = torch.nested.nested_tensor([torch.randn((2, 3, 4)), torch.randn((2, 3, 5))], device=device)
+nt3 = torch.matmul(nt_transposed, nt_mm)
+print(f"Result of Matmul:\n {nt3}")
 
 nt4 = F.dropout(nt3, 0.1)
-print("dropout:", nt4, sep='\n')
+print(f"Result of Dropout:\n {nt4}")
 
 nt5 = F.softmax(nt4, -1)
-print("softmax:", nt5, sep='\n')
+print(f"Result of Softmax:\n {nt5}")
 
 ######################################################################
 # Why Nested Tensor
@@ -116,24 +127,30 @@
 #
 
 ######################################################################
-# In the age before nested tensor, one has to manually pad each data tensor
-# to the same shape to form a batch as a regular tensor.
-# For example, we have 2 sentences and a vocabulary, then pad with 0.
+# When data is sequential, it is often the case that each sample has a different length.
+# For example, in a batch of sentences, each sentence has a different number of words.
+# A common technique for handling varying sequences is to manually pad each data tensor
+# to the same shape in order to form a batch.
+# For example, we have 2 sentences with different lengths and a vocabulary
+# In order to represent his as single tensor we pad with 0 to the max length in the batch.
 sentences = [["goodbye", "padding"],
              ["embrace", "nested", "tensor"]]
-vocabulary = {"goodbye" : 1.0, "padding" : 2.0,
-              "embrace" : 3.0, "nested" : 4.0, "tensor" : 5.0}
+vocabulary = {"goodbye": 1.0, "padding": 2.0,
+              "embrace": 3.0, "nested": 4.0, "tensor": 5.0}
 padded_sentences = torch.tensor([[1.0, 2.0, 0.0],
                                  [3.0, 4.0, 5.0]])
-nested_sentences = torch.nested_tensor([torch.tensor([1.0, 2.0]),
-                                        torch.tensor([3.0, 4.0, 5.0])])
-print(padded_sentences)
-print(nested_sentences)
+nested_sentences = torch.nested.nested_tensor([torch.tensor([1.0, 2.0]),
+                                               torch.tensor([3.0, 4.0, 5.0])])
+print(f"{padded_sentences=}")
+print(f"{nested_sentences=}")
 
 ######################################################################
-# Clearly, padding introduces inefficiency.
-# Further, padding with zeros does not correctly treat entries as padding for every operation,
-# e.g. in softmax one has to pad with -inf rather than 0 to ignore specific entries.
+# This techinque of padding a batch of data to its max length is not optimal.
+# The padded data is not needed for computation and wastes memory by allocating
+# larger tensors than necessary.
+# Further, not all operations have the same semnatics when applied to padded data.
+# For matrix multiplications in order to ignore the padded entries, one needs to pad
+# with 0 while for softmax one has to pad with -inf to ignore specific entries.
 padded_sentences_for_softmax = torch.tensor([[1.0, 2.0, float("-inf")],
                                              [3.0, 4.0, 5.0]])
 print(F.softmax(padded_sentences_for_softmax, -1))
@@ -142,40 +159,42 @@
 ######################################################################
 # Let us take a look at a practical example: the multi-head attention component
 # utilized in `Transformers <https://arxiv.org/pdf/1706.03762.pdf>`__.
-# The nested tensor version is straightforward.
+# The nestedtensor version is straightforward.
 import math
 
-"""
-Args:
-    query: query of shape (N, L_t, E_q)
-    key: key of shape (N, L_s, E_k)
-    value: value of shape (N, L_s, E_v)
-    nheads: number of heads in multi-head attention
-    W_q: Weight for query input projection of shape (E_total, E_q)
-    W_k: Weight for key input projection of shape (E_total, E_k)
-    W_v: Weight for value input projection of shape (E_total, E_v)
-    W_out: Weight for output projection of shape (E_out, E_total)
-    b_q (optional): Bias for query input projection of shape E_total. Default: None
-    b_k (optional): Bias for key input projection of shape E_total. Default: None
-    b_v (optional): Bias for value input projection of shape E_total. Default: None
-    b_out (optional): Bias for output projection of shape E_out. Default: None
-    dropout_p: dropout probability. Default: 0.0
-    where:
-        N is the batch size
-        L_t is the target sequence length (jagged)
-        L_s is the source sequence length (jagged)
-        E_q is the embedding size for query
-        E_k is the embedding size for key
-        E_v is the embedding size for value
-        E_total is the embedding size for all heads combined
-        E_out is the output embedding size
-Returns:
-    attn_output: Output of shape (N, L_t, E_out)
-"""
-def mha_nested(query, key, value, nheads,
-W_q, W_k, W_v, W_out,
-b_q=None, b_k=None, b_v=None, b_out=None,
-dropout_p=0.0):
+def mha_nested(query: torch.Tensor, key: torch.Tensor, value: torch.Tensor, nheads: int,
+               W_q: torch.Tensor, W_k: torch.Tensor, W_v: torch.Tensor, W_out: torch.Tensor,
+               b_q: torch.Tensor = None, b_k: torch.Tensor = None, b_v: torch.Tensor = None, b_out: torch.Tensor = None,
+               dropout_p: float = 0.0) -> torch.Tensor:
+    """Compute multi-head attention with nested tensors.
+    Args:
+        query (torch.Tensor): query of shape (N, L_t, E_q)
+        key (torch.Tensor): key of shape (N, L_s, E_k)
+        value (torch.Tensor): value of shape (N, L_s, E_v)
+        nheads (int): number of heads in multi-head attention
+        W_q (torch.Tensor): Weight for query input projection of shape (E_total, E_q)
+        W_k (torch.Tensor): Weight for key input projection of shape (E_total, E_k)
+        W_v (torch.Tensor): Weight for value input projection of shape (E_total, E_v)
+        W_out (torch.Tensor): Weight for output projection of shape (E_out, E_total)
+        b_q (torch.Tensor, optional): Bias for query input projection of shape E_total. Default: None. Defaults to None.
+        b_k (torch.Tensor, optional): Bias for key input projection of shape E_total. Default: None. Defaults to None.
+        b_v (torch.Tensor, optional): Bias for value input projection of shape E_total. Default: None. Defaults to None.
+        b_out (torch.Tensor, optional): Bias for output projection of shape E_out. Default: None. Defaults to None.
+        dropout_p (float, optional): Dropout probability. Defaults to 0.0.
+
+        Where:
+            N is the batch size
+            L_t is the target sequence length (jagged)
+            L_s is the source sequence length (jagged)
+            E_q is the embedding size for query
+            E_k is the embedding size for key
+            E_v is the embedding size for value
+            E_total is the embedding size for all heads combined
+            E_out is the output embedding size
+    Returns:
+        torch.Tensor:  Output of shape (N, L_t, E_out)
+    """
+
     N = query.size(0)
     E_total = W_q.size(0)
     assert E_total % nheads == 0, "Embedding dim is not divisible by nheads"
@@ -191,11 +210,11 @@ def mha_nested(query, key, value, nheads,
 
     # reshape query, key, value to separate by head
     # (N, L_t, E_total) -> (N, L_t, nheads, E_head) -> (N, nheads, L_t, E_head)
-    query = query.reshape(-1, -1, nheads, E_head).transpose(1, 2)
+    query = query.reshape(N, -1, nheads, E_head).transpose(1, 2)
     # (N, L_s, E_total) -> (N, L_s, nheads, E_head) -> (N, nheads, L_s, E_head)
-    key = key.reshape(-1, -1, nheads, E_head).transpose(1, 2)
+    key = key.reshape(N, -1, nheads, E_head).transpose(1, 2)
     # (N, L_s, E_total) -> (N, L_s, nheads, E_head) -> (N, nheads, L_s, E_head)
-    value = value.reshape(-1, -1, nheads, E_head).transpose(1, 2)
+    value = value.reshape(N, -1, nheads, E_head).transpose(1, 2)
 
     # query matmul key^T
     # (N, nheads, L_t, E_head) x (N, nheads, L_s, E_head)^T -> (N, nheads, L_t, L_s)
@@ -229,45 +248,48 @@ def mha_nested(query, key, value, nheads,
 ######################################################################
 # The 0-padded tensor version additionally requires masks
 # for more complicated treatments at padded entries.
-"""
-Args:
-    query: query of shape (N, L_t, E_q)
-    key: key of shape (N, L_s, E_k)
-    value: value of shape (N, L_s, E_v)
-    nheads: number of heads in multi-head attention
-    attn_mask_q: boolean mask indicating locations that should not take part in attention for query, shape (N, L_t)
-    attn_mask_kv: boolean mask indicating locations that should not take part in attention for key and value, shape (N, L_s)
-    W_q: Weight for query input projection of shape (E_total, E_q)
-    W_k: Weight for key input projection of shape (E_total, E_k)
-    W_v: Weight for value input projection of shape (E_total, E_v)
-    W_out: Weight for output projection of shape (E_out, E_total)
-    b_q (optional): Bias for query input projection of shape E_total. Default: None
-    b_k (optional): Bias for key input projection of shape E_total. Default: None
-    b_v (optional): Bias for value input projection of shape E_total. Default: None
-    b_out (optional): Bias for output projection of shape E_out. Default: None
-    dropout_p: dropout probability. Default: 0.0
-    where:
-        N is the batch size
-        L_t is the target sequence length (padded)
-        L_s is the source sequence length (padded)
-        E_q is the embedding size for query
-        E_k is the embedding size for key
-        E_v is the embedding size for value
-        E_total is the embedding size for all heads combined
-        E_out is the output embedding size
-Returns:
-    attn_output: Output of shape (N, L_t, E_out)
-"""
-def mha_padded(query, key, value, nheads,
-attn_mask_q, attn_mask_kv,
-W_q, W_k, W_v, W_out,
-b_q=None, b_k=None, b_v=None, b_out=None,
-dropout_p=0.0):
+def mha_padded(query: torch.Tensor, key: torch.Tensor, value: torch.Tensor, nheads: int,
+               attn_mask_q: torch.Tensor, attn_mask_kv: torch.Tensor,
+               W_q: torch.Tensor, W_k: torch.Tensor, W_v: torch.Tensor, W_out: torch.Tensor,
+               b_q: torch.Tensor = None, b_k: torch.Tensor = None, b_v: torch.Tensor = None, b_out: torch.Tensor = None,
+               dropout_p: float = 0.0) -> torch.Tensor:
+    """Compute multi-head attention for padded out dense tensors.
+
+    Args:
+        query (torch.Tensor): query of shape (N, L_t, E_q)
+        key (torch.Tensor): key of shape (N, L_s, E_k)
+        value (torch.Tensor): value of shape (N, L_s, E_v)
+        nheads (int): number of heads in multi-head attention
+        attn_mask_q (torch.Tensor): boolean mask indicating locations that should not take part in attention for query, shape (N, L_t)
+        attn_mask_kv (torch.Tensor): boolean mask indicating locations that should not take part in attention for key and value, shape (N, L_s)
+        W_q (torch.Tensor): Weight for query input projection of shape (E_total, E_q)
+        W_k (torch.Tensor): Weight for key input projection of shape (E_total, E_k)
+        W_v (torch.Tensor): Weight for value input projection of shape (E_total, E_v)
+        W_out (torch.Tensor): Weight for output projection of shape (E_out, E_total)
+        b_q (torch.Tensor, optional): Bias for query input projection of shape E_total.. Defaults to None.
+        b_k (torch.Tensor, optional): Bias for key input projection of shape E_total.. Defaults to None.
+        b_v (torch.Tensor, optional): Bias for value input projection of shape E_total.. Defaults to None.
+        b_out (torch.Tensor, optional): Bias for output projection of shape E_out. Defaults to None.
+        dropout_p (float, optional): Dropout probability. Defaults to 0.0.
+
+        Where:
+            N is the batch size
+            L_t is the target sequence length (padded)
+            L_s is the source sequence length (padded)
+            E_q is the embedding size for query
+            E_k is the embedding size for key
+            E_v is the embedding size for value
+            E_total is the embedding size for all heads combined
+            E_out is the output embedding size
+    Returns:
+        torch.Tensor: Output of shape (N, L_t, E_out)
+    """
     N = query.size(0)
     L_t = query.size(1)
     L_s = key.size(1)
     E_total = W_q.size(0)
     assert E_total % nheads == 0, "Embedding dim is not divisible by nheads"
+    assert L_t == L_s, "This implementation assumes equal query and key sequence lengths"
     E_head = E_total // nheads
 
     # apply input projection
@@ -278,19 +300,6 @@ def mha_padded(query, key, value, nheads,
     # (N, L_s, E_v) -> (N, L_s, E_total)
     value = F.linear(value, W_v, b_v)
 
-    # padding-specific step: remove bias from padded entries
-    # in the specific multihead-attention formula it is not necessary to remove these bias
-    # because the -inf padding later on in softmax step can take care of it
-    # but to be general here we demonstrate the bias removal
-    for i in range(N):
-        for j in range(L_t):
-            if attn_mask_q[i, j]:
-                query[i, j, :] = 0.0
-        for j in range(L_s):
-            if attn_mask_kv[i, j]:
-                key[i, j, :] = 0.0
-                value[i, j, :] = 0.0
-
     # reshape query, key, value to separate by head
     # (N, L_t, E_total) -> (N, L_t, nheads, E_head) -> (N, nheads, L_t, E_head) -> (N * nheads, L_t, E_head)
     query = query.reshape(N, -1, nheads, E_head).transpose(1, 2).reshape(N * nheads, -1, E_head)
@@ -302,21 +311,19 @@ def mha_padded(query, key, value, nheads,
     # query bmm key^T
     # (N * nheads, L_t, E_head) x (N * nheads, L_s, E_head)^T -> (N * nheads, L_t, L_s)
     keyT = key.transpose(-1, -2)
-    # padding-specific step: add -inf mask for padding in softmax
-    attn_mask = query.new_zeros((N, nheads, L_t, L_s))
-    for i in range(N):
-        for j in range(L_t):
-            for k in range(L_s):
-                if attn_mask_q[i, j] or attn_mask_kv[i, k]:
-                    attn_mask[i, :, j, k] = float("-inf")
-    attn_mask = attn_mask.reshape((N * nheads, L_t, L_s))
-    attn_weights = torch.baddbmm(attn_mask, query, keyT)
-    # if no padding, it could have been as simple as
-    #     attn_weights = torch.bmm(query, keyT)
+    attn_weights = torch.bmm(query, keyT)
 
     # scale down
     attn_weights = attn_weights * (1.0 / math.sqrt(E_head))
 
+    # Have to manipulate masks in order to apply them to the attention weights
+    key_padding_mask = attn_mask_q.view(N, 1, 1, L_t).expand(-1, nheads, -1, -1).reshape(N*nheads, 1, L_t).to(device=device)
+    attn_mask = torch.zeros(key_padding_mask.shape, device=device, dtype=torch.float32)
+    attn_mask = attn_mask.masked_fill_(key_padding_mask, float("-inf"))
+
+    # Zero out the attention weights where the mask is True by adding -inf prior to softmax
+    attn_weights.add_(attn_mask)
+
     # softmax
     attn_weights = F.softmax(attn_weights, dim=-1).nan_to_num_(0.0)
 
@@ -337,10 +344,7 @@ def mha_padded(query, key, value, nheads,
     attn_output = F.linear(attn_output, W_out, b_out)
 
     # padding-specific step: remove output projection bias from padded entries
-    for i in range(N):
-        for j in range(L_t):
-            if attn_mask_q[i, j]:
-                attn_output[i, j, :] = 0.0
+    attn_output[attn_mask_q, :] = 0.0
 
     return attn_output
 
@@ -387,17 +391,17 @@ def zipf_sentence_lengths(alpha: float, batch_size: int) -> np.ndarray:
 
 # create nested input
 queries = []
-keys    = []
-values  = []
+keys = []
+values = []
 for i in range(N):
     l = sentence_lengths[i]
     s = l
     queries.append(torch.randn((l, E_q), device=device))
     keys   .append(torch.randn((s, E_k), device=device))
     values .append(torch.randn((s, E_v), device=device))
-query = torch.nested_tensor(queries)
-key   = torch.nested_tensor(keys   )
-value = torch.nested_tensor(values )
+query = torch.nested.nested_tensor(queries)
+key = torch.nested.nested_tensor(keys)
+value = torch.nested.nested_tensor(values)
 
 # pad input
 padded_query = torch.nested.to_padded_tensor(query, 0.0, (N, L_t, E_q))
@@ -407,13 +411,11 @@ def zipf_sentence_lengths(alpha: float, batch_size: int) -> np.ndarray:
 # create attention masks
 attn_mask_q = torch.zeros((N, L_t), dtype=torch.bool)
 attn_mask_kv = torch.zeros((N, L_s), dtype=torch.bool)
-for i in range(N):
-    for j in range(L_t):
-        if padded_query[i, j, :].abs().max().item() == 0.0:
-            attn_mask_q[i, j] = True
-    for j in range(L_s):
-        if padded_key[i, j, :].abs().max().item() == 0.0:
-            attn_mask_kv[i, j] = True
+
+#  We need to mask out the padding entries in the attention weights.
+for i, entry_length in enumerate(sentence_lengths):
+    attn_mask_q[i, entry_length:] = True
+    attn_mask_kv[i, entry_length:] = True
 
 ######################################################################
 # check correctness and performance
@@ -437,15 +439,16 @@ def zipf_sentence_lengths(alpha: float, batch_size: int) -> np.ndarray:
 t2 = timeit.default_timer()
 
 print("nested and padded calculations differ by", (torch.nested.to_padded_tensor(out_nested, 0.0, (N, L_t, E_out)) - out_padded).abs().max().item())
-print("nested tensor multi-head attention takes", t1 - t0, "seconds")
+print("nestedtensor multi-head attention takes", t1 - t0, "seconds")
 print("padded tensor multi-head attention takes", t2 - t1, "seconds")
 
 ######################################################################
-# The nested tensor version avoids wasted computation on padding,
-# so in sequential CPU execution it is faster than padded tensor version as expected.
-# Optimization for multi-threaded environment is underway.
+# Although the nestedtensor version avoids wasted computation on padding, it is not faster
+# then the equivalent padded tensor version. This is because the nestedtensor version
+# has implemented a few of the kernels, like softmax, in a non optimal way.
 #
-# For now, performant kernels are provided for specific use cases, e.g.
+# There are plans to implement performance critical operations using the new Pytorch 2.0 stack
+# For now, some performant kernels are provided for specific use cases, e.g.
 # self-attention evaluation by multi-head attention formula.
 
 # embeddings are assumed to be the same
@@ -465,28 +468,28 @@ def zipf_sentence_lengths(alpha: float, batch_size: int) -> np.ndarray:
 W_out, b_out = mha_lib.out_proj.weight, mha_lib.out_proj.bias
 
 ######################################################################
-# check correctness and performance
-
-t0 = timeit.default_timer()
-out_lib, out_lib_weights = mha_lib(query, query, query)
-
-t1 = timeit.default_timer()
-out_nested = mha_nested(
-    query, query, query, nheads,
-    W_q, W_k, W_v, W_out,
-    b_q=b_q, b_k=b_k, b_v=b_v, b_out=b_out,
-    dropout_p=dropout_p)
-
-t2 = timeit.default_timer()
-padded_out = mha_padded(
-    padded_query, padded_query, padded_query, nheads,
-    attn_mask_q, attn_mask_q,
-    W_q, W_k, W_v, W_out,
-    b_q=b_q, b_k=b_k, b_v=b_v, b_out=b_out,
-    dropout_p=dropout_p)
-t3 = timeit.default_timer()
-
-print("nested general and library calculations differ by", (torch.nested.to_padded_tensor(out_nested, 0.0) - torch.nested.to_padded_tensor(out_lib, 0.0)).abs().max().item())
-print("nested library multi-head attention takes", t1 - t0, "seconds")
-print("nested general multi-head attention takes", t2 - t1, "seconds")
-print("padded tensor multi-head attention takes", t3 - t2, "seconds")
+# If we set need_weights to False this will enable the fast path in the library.
+# Under the hood this will call _scaled_dot_product_attention. If your tensors
+# are on CUDA, than a fused, efficient attention kernel will be used. For
+# more detailed performance characteristics look at the benchmark in
+# pytorch/benchmarks/transformer/sdp.py
+
+with torch.inference_mode():
+    t0 = timeit.default_timer()
+    out_lib, out_lib_weights = mha_lib(query, query, query, need_weights=False)
+
+    t1 = timeit.default_timer()
+    padded_out = mha_padded(
+        padded_query, padded_query, padded_query, nheads,
+        attn_mask_q, attn_mask_q,
+        W_q, W_k, W_v, W_out,
+        b_q=b_q, b_k=b_k, b_v=b_v, b_out=b_out,
+        dropout_p=dropout_p)
+    t2 = timeit.default_timer()
+
+nested_time = t1 - t0
+padded_time = t2 - t1
+print("Nested and padded calculations differ by", (torch.nested.to_padded_tensor(out_lib, 0.0) - padded_out).abs().max().item())
+print("Nested library multi-head attention takes", nested_time, "seconds")
+print("Padded tensor multi-head attention takes", padded_time, "seconds")
+print(f"Nested Speedup: {padded_time / nested_time:.3f}")
\ No newline at end of file
diff --git a/prototype_source/prototype_index.rst b/prototype_source/prototype_index.rst
index cfdb2ffcc..179767687 100644
--- a/prototype_source/prototype_index.rst
+++ b/prototype_source/prototype_index.rst
@@ -166,11 +166,11 @@ Prototype features are not available as part of binary distributions like PyPI o
 
 .. customcarditem::
    :header: MaskedTensor: Simplifying Adagrad Sparse Semantics
-   :card_description: See a showcase on how masked tensors can enable sparse semantics and provide for a cleaner dev experience 
+   :card_description: See a showcase on how masked tensors can enable sparse semantics and provide for a cleaner dev experience
    :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png
    :link: ../prototype/maskedtensor_adagrad.html
    :tags: MaskedTensor
-   
+
 .. End of tutorial card section
 
 .. raw:: html
diff --git a/prototype_source/skip_param_init.rst b/prototype_source/skip_param_init.rst
index d2e7f4743..529b1440e 100644
--- a/prototype_source/skip_param_init.rst
+++ b/prototype_source/skip_param_init.rst
@@ -1,11 +1,11 @@
 모듈 매개변수 초기화 건너뛰기
-===========================
+===============================
 
 소개
-----
+------
 
 모듈이 생성될 때, 모듈 유형과 관련된 기본 초기화 방법에 따라 학습 가능한 매개변수가 초기화됩니다.
-예를 들어, :class:`torch.nn.Linear` 모듈의 `weight` 매개변수는 
+예를 들어, :class:`torch.nn.Linear` 모듈의 `weight` 매개변수는
 `uniform(-1/sqrt(in_features), 1/sqrt(in_features))` 분포로 초기화됩니다.
 기존에는 다른 초기화 방법이 필요한 경우 모듈 인스턴스화 후 매개변수를 재초기화해야 했습니다.
 
@@ -22,7 +22,7 @@
 이 경우 구성 중 수행되는 초기화는 계산 낭비이며, `weight` 매개변수가 크면 사소한 문제가 아닐 수 있습니다.
 
 초기화 건너뛰기
---------------
+-----------------
 
 모듈 구성 중 매개변수 초기화를 건너뛰게 되어 낭비되는 계산을 피할 수 있습니다.
 :func:`torch.nn.utils.skip_init` 함수를 사용하면 쉽게 건너뛰기가 가능합니다.
@@ -42,14 +42,14 @@
 .. _Updating:
 
 초기화 건너뛰기를 위한 모듈 업데이트
----------------------------------
+-------------------------------------
 
 :func:`torch.nn.utils.skip_init` 의 구현(참고 :ref:`Details`) 방법에 따라,
 모듈이 함수와 호환되기 위한 두 가지 요구사항이 있습니다.
 다음의 요구사항을 이행하면 커스텀 모듈의 매개변수 초기화 건너뛰기 기능을 선택할 수 있습니다.
 
-  1. 모듈을 생성할 때 매개변수와 버퍼로 전달되는 모듈의 생성자 내 `device` 키워드 인자(keyword argument)를 
-  사용해야 합니다. 
+  1. 모듈을 생성할 때 매개변수와 버퍼로 전달되는 모듈의 생성자 내 `device` 키워드 인자(keyword argument)를
+  사용해야 합니다.
 
   2. 모듈은 초기화를 제외하고 모듈의 생성자 내 매개변수 또는 버퍼 계산을 수행하지 않아야 합니다
   (즉, `torch.nn.init`의 함수).
@@ -99,13 +99,13 @@
 .. _Details:
 
 구현 세부 사항
--------------
+----------------
 
 내부적으로 :func:`torch.nn.utils.skip_init` 함수는 2단계 패턴으로 구현됩니다.
 
 ::
 
-    # 1. meta 장치에서 모듈을 초기화합니다; 모든 torch.nn.init ops는 
+    # 1. meta 장치에서 모듈을 초기화합니다; 모든 torch.nn.init ops는
     # meta 장치에서 no-op 동작을 합니다.
     m = nn.Linear(10, 5, device='meta')
 
diff --git a/prototype_source/vulkan_workflow.rst b/prototype_source/vulkan_workflow.rst
index c18f57ae2..7cd3a5c98 100644
--- a/prototype_source/vulkan_workflow.rst
+++ b/prototype_source/vulkan_workflow.rst
@@ -102,7 +102,7 @@ Python script to save pretrained mobilenet_v2 to a file:
     script_model = torch.jit.script(model)
     torch.jit.save(script_model, "mobilenet2.pt")
 
-PyTorch 1.7 Vulkan backend supports only float 32bit operators. The default model needs additional step that will optimize operators fusing 
+PyTorch 1.7 Vulkan backend supports only float 32bit operators. The default model needs additional step that will optimize operators fusing
 
 ::
 
@@ -112,6 +112,10 @@ PyTorch 1.7 Vulkan backend supports only float 32bit operators. The default mode
 
 The result model can be used only on Vulkan backend as it contains specific to the Vulkan backend operators.
 
+By default, ``optimize_for_mobile`` with ``backend='vulkan'`` rewrites the graph so  that inputs are transferred to the Vulkan backend, and outputs are transferred to the CPU backend, therefore, the model can be run on CPU inputs and produce CPU outputs. To disable this, add the argument ``optimization_blocklist={MobileOptimizerType.VULKAN_AUTOMATIC_GPU_TRANSFER}`` to ``optimize_for_mobile``. (``MobileOptimizerType`` can be imported from ``torch.utils.mobile_optimizer``)
+
+For more information, see the `torch.utils.mobile_optimizer` `API documentation <https://pytorch.org/docs/stable/mobile_optimizer.html>`_.
+
 Using Vulkan backend in code
 ----------------------------
 
@@ -219,11 +223,11 @@ Or if you need only specific abi you can set it as an argument:
 Add prepared model ``mobilenet2-vulkan.pt`` to test applocation assets:
 
 ::
-  
+
   cp mobilenet2-vulkan.pt $PYTORCH_ROOT/android/test_app/app/src/main/assets/
 
 
-3. Build and Install test applocation to connected android device 
+3. Build and Install test applocation to connected android device
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 ::
@@ -231,7 +235,7 @@ Add prepared model ``mobilenet2-vulkan.pt`` to test applocation assets:
     cd $PYTORCH_ROOT
     gradle -p android test_app:installMbvulkanLocalBaseDebug
 
-After successful installation, the application with the name 'MBQ' can be launched on the device. 
+After successful installation, the application with the name 'MBQ' can be launched on the device.
 
 
 
diff --git a/recipes_source/android_native_app_with_custom_op.rst b/recipes_source/android_native_app_with_custom_op.rst
index 09b1a5422..0c5777e47 100644
--- a/recipes_source/android_native_app_with_custom_op.rst
+++ b/recipes_source/android_native_app_with_custom_op.rst
@@ -1,5 +1,5 @@
 PyTorch 사전 빌드된 라이브러리를 사용하는 네이티브 Android 애플리케이션 만들기
-==============================================================================
+================================================================================
 
 **저자**: `Ivan Kobzarev <https://github.com/IvanKobzarev>`_
 
@@ -64,7 +64,7 @@ Gradle은 JDK가 필요하기에, JDK를 설치하고 환경 변수 ``JAVA_HOME`
 
 
 사용자 지정 C++ 연산자로 TorchScript Model 준비하기
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 TorchScript는 사용자 정의 C++ 연산자를 사용하는 것을 허용하며, 사용자 정의 연산 관련된 세부사항은
 `the dedicated tutorial <https://tutorials.pytorch.kr/advanced/torch_script_custom_ops.html>`_ 여기에서 읽을 수 있습니다.
@@ -144,7 +144,7 @@ Ubuntu:
   apt-get install libopencv-dev
 
 Android 애플리케이션 만들기
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 ``compute.pt`` 를 만들었으면 Android 애플리케이션 내에서 이 TorchScript 모델을 사용하겠습니다. Java API를 이용해서 Android상에서 일반적인 TorchScript 모델(사용자 지정 연산자 없이)을 사용하고자 한다면 `여기 <https://pytorch.org/mobile/android/>`_ 를 살펴 보십시오. 이 예제에서는 사용자 지정 연산자(``my_ops.warp_perspective``)를 사용해서 위와 같은 방밥을 사용할 수 없습니다. 기본 TorchScript 실행이 이 사용자 지정 연산자를 찾지 못하기 때문입니다.
 
@@ -158,14 +158,14 @@ Android 애플리케이션을 ``NativeApp`` 폴더 내에서 생성해 봅시다
   cd NativeApp
 
 Android 애플리케이션 빌드 설정하기
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Android 애플리케이션 빌드는 메인 gradle 부분과 네이티브 빌드 Cmake 부분으로 이루어집니다.
 여기 나열된 목록은 전체 파일 목록입니다. 그래서 전체 구조를 새로이 만들고자 한다면
 코드를 별도로 추가하지 않아도 결과로 나온 Android 애플리케이션을 빌드하고 설치할 수 있습니다.
 
 Gradle 빌드 설정하기
--------------------
+----------------------
 이러한 gradle 설정 파일울 추가해야 합니다: build.gradle, gradle.properties, settings.gradle.
 추가적인 Android Gradle 빌드 설정은 `여기 <https://developer.android.com/studio/build>`_ 에서 찾을 수 있습니다.
 
@@ -332,7 +332,7 @@ Pytorch_android aar은 ``headers`` 폴더 안에 LibTorch 헤더를 포함하고
 
 
 네이티브 빌드 CMake 설정하기
---------------------------
+------------------------------
 
 네이티브 빌드는 ``NativeApp/app/CMakeLists.txt`` 에서 설정합니다:
 
@@ -453,10 +453,10 @@ Android 애플리케이션 매니페스트(Manifest)
 
 
 소스코드
--------
+----------
 
 Java 코드
----------
+-----------
 
 이제 MainActivity를 아래 파일에서 구현할 준비가 되었습니다
 
@@ -543,7 +543,7 @@ Java 코드
 ``NativePeer#loadAndForwardModel`` 은 ``native`` 로 선언이 되어 있는데, Java를 위한 정의는 아닙니다. 이 메소드를 호출하면 JNI를 통해 ``NativeApp/app/src/main/cpp/pytorch_nativeapp.cpp`` 내부에 있는 ``libpytorch_nativeapp.so`` 의 C++ 메소드를 다시 가져옵니다.
 
 네이티브 코드
--------------
+----------------
 
 이제 애플리케이션의 네이티브 부분을 작성할 준비가 되었습니다.
 
diff --git a/recipes_source/bundled_inputs.rst b/recipes_source/bundled_inputs.rst
index bae2a6783..09b4ef666 100644
--- a/recipes_source/bundled_inputs.rst
+++ b/recipes_source/bundled_inputs.rst
@@ -4,14 +4,16 @@
 **Author**: `Jacob Szwejbka <https://github.com/JacobSzwejbka>`_
 
 Introduction
-------------
+--------------
 
 This tutorial introduces the steps to use PyTorch's utility to bundle example or trivial inputs directly into your TorchScript Module.
 
 The interface of the model remains unchanged (other than adding a few methods), so it can still be safely deployed to production. The advantage of this standardized interface is that tools that run models can use it instead of having some sort of external file (or worse, document) that tells you how to run the model properly.
 
 Common case, bundling an input to a model that only uses 'forward' for inference
+
 -------------------
+
 1. **Prepare model**: Convert your model to TorchScript through either tracing or scripting
 
 .. code:: python
@@ -53,7 +55,9 @@ Common case, bundling an input to a model that only uses 'forward' for inference
 
 
 Uncommon case, bundling and retrieving inputs for functions beyond 'forward'
+
 -------------------
+
 1. **Prepare model**: Convert your model to TorchScript through either tracing or scripting
 
 .. code:: python
@@ -194,5 +198,5 @@ Inflatable args are composed of 2 parts, the deflated (compressed) argument, and
 
 
 Learn More
-----------
+------------
 - To learn more about PyTorch Mobile, please refer to `PyTorch Mobile Home Page <https://pytorch.org/mobile/home/>`_
diff --git a/recipes_source/deployment_with_flask.rst b/recipes_source/deployment_with_flask.rst
index 34301cc17..01f1c380a 100644
--- a/recipes_source/deployment_with_flask.rst
+++ b/recipes_source/deployment_with_flask.rst
@@ -65,7 +65,7 @@ Flask 서비스를 구성하는 전체 Python 스크립트는 이 레시피의 
 아래에서 각각의 기능들을 명확히 살펴보기 위해 각 섹션별로 살펴보겠습니다.
 
 Import
-~~~~~~~
+~~~~~~~~
 
 ::
 
@@ -258,6 +258,6 @@ TorchVision transforms(변환) 세트를 구성합니다. (정규화에 대한 
 .. _pytorch.kr: https://pytorch.kr
 .. _pytorch.org: https://pytorch.org
 .. _Flask 사이트: https://flask.palletsprojects.com/en/1.1.x/
-.. _빠른 시작 가이드(Quick Start guide): https://flask.palletsprojects.com/en/1.1.x/quickstart/
+.. _빠른 시작 가이드(Quickstart guide): https://flask.palletsprojects.com/en/1.1.x/quickstart/
 .. _torchvision.models: https://pytorch.org/vision/stable/models.html
 .. _Flask 설치 문서: https://flask.palletsprojects.com/en/1.1.x/installation/
diff --git a/recipes_source/intel_extension_for_pytorch.rst b/recipes_source/intel_extension_for_pytorch.rst
index 83f0517e9..505b59ea6 100644
--- a/recipes_source/intel_extension_for_pytorch.rst
+++ b/recipes_source/intel_extension_for_pytorch.rst
@@ -1,19 +1,25 @@
 Intel® Extension for PyTorch*
 =============================
 
-Intel Extension for PyTorch* extends PyTorch with optimizations for extra
-performance boost on Intel hardware. Most of the optimizations will be
-included in stock PyTorch releases eventually, and the intention of the
-extension is to deliver up to date features and optimizations for PyTorch
-on Intel hardware, examples include AVX-512 Vector Neural Network
-Instructions (AVX512 VNNI) and Intel® Advanced Matrix Extensions (Intel® AMX).
+Intel® Extension for PyTorch* extends PyTorch* with up-to-date features
+optimizations for an extra performance boost on Intel hardware. Optimizations
+take advantage of AVX-512 Vector Neural Network Instructions (AVX512 VNNI) and
+Intel® Advanced Matrix Extensions (Intel® AMX) on Intel CPUs as well as Intel
+X\ :sup:`e`\  Matrix Extensions (XMX) AI engines on Intel discrete GPUs.
+Moreover, through PyTorch* `xpu` device, Intel® Extension for PyTorch* provides
+easy GPU acceleration for Intel discrete GPUs with PyTorch*.
 
 Intel® Extension for PyTorch* has been released as an open–source project
 at `Github <https://github.com/intel/intel-extension-for-pytorch>`_.
 
+- Source code for CPU is available at `master branch <https://github.com/intel/intel-extension-for-pytorch/tree/master>`_.
+- Source code for GPU is available at `xpu-master branch <https://github.com/intel/intel-extension-for-pytorch/tree/xpu-master>`_.
+
 Features
 --------
 
+Intel® Extension for PyTorch* shares most of features for CPU and GPU.
+
 - **Ease-of-use Python API:** Intel® Extension for PyTorch* provides simple
   frontend Python APIs and utilities for users to get performance optimizations
   such as graph optimization and operator optimization with minor code changes.
@@ -33,7 +39,8 @@ Features
   optimization of operators have been massively enabled in Intel® Extension
   for PyTorch*, and partially upstreamed to PyTorch master branch. Most of
   these optimizations will be landed in PyTorch master through PRs that are
-  being submitted and reviewed.
+  being submitted and reviewed. Auto Mixed Precision (AMP) with both BFloat16
+  and Float16 have been enabled for Intel discrete GPUs.
 - **Graph Optimization:** To optimize performance further with torchscript,
   Intel® Extension for PyTorch* supports fusion of frequently used operator
   patterns, like Conv2D+ReLU, Linear+ReLU, etc. The benefit of the fusions are
@@ -63,15 +70,23 @@ You just need to import Intel® Extension for PyTorch* package and apply its
 optimize function against the model object. If it is a training workload, the
 optimize function also needs to be applied against the optimizer object.
 
-For training and inference with BFloat16 data type, torch.cpu.amp has been
-enabled in PyTorch upstream to support mixed precision with convenience, and
+For training and inference with BFloat16 data type, `torch.cpu.amp` has been
+enabled in PyTorch upstream to support mixed precision with convenience.
 BFloat16 datatype has been enabled excessively for CPU operators in PyTorch
-upstream and Intel® Extension for PyTorch*. Running torch.cpu.amp will match
-each operator to its appropriate datatype and returns the best possible
-performance.
+upstream and Intel® Extension for PyTorch*. Meanwhile `torch.xpu.amp`,
+registered by Intel® Extension for PyTorch*, enables easy usage of BFloat16
+and Float16 data types on Intel discrete GPUs. Either `torch.cpu.amp` or
+`torch.xpu.amp` matches each operator to its appropriate datatype automatically
+and returns the best possible performance.
+
+Examples -- CPU
+---------------
+
+This section shows examples of training and inference on CPU with Intel®
+Extension for PyTorch*
 
 The code changes that are required for Intel® Extension for PyTorch* are
-highlighted with comments in a line above.
+highlighted.
 
 Training
 ~~~~~~~~
@@ -82,32 +97,46 @@ Float32
 .. code:: python3
 
    import torch
-   import torch.nn as nn
-   # Import intel_extension_for_pytorch
+   import torchvision
    import intel_extension_for_pytorch as ipex
-   
-   class Model(nn.Module):
-       def __init__(self):
-           super(Model, self).__init__()
-           self.linear = nn.Linear(4, 5)
-   
-       def forward(self, input):
-           return self.linear(input)
-   
-   model = Model()
-   model.set_state_dict(torch.load(PATH))
-   optimizer.set_state_dict(torch.load(PATH))
-   # Invoke optimize function against the model object and optimizer object
+
+   LR = 0.001
+   DOWNLOAD = True
+   DATA = 'datasets/cifar10/'
+
+   transform = torchvision.transforms.Compose([
+       torchvision.transforms.Resize((224, 224)),
+       torchvision.transforms.ToTensor(),
+       torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+   ])
+   train_dataset = torchvision.datasets.CIFAR10(
+           root=DATA,
+           train=True,
+           transform=transform,
+           download=DOWNLOAD,
+   )
+   train_loader = torch.utils.data.DataLoader(
+           dataset=train_dataset,
+           batch_size=128
+   )
+
+   model = torchvision.models.resnet50()
+   criterion = torch.nn.CrossEntropyLoss()
+   optimizer = torch.optim.SGD(model.parameters(), lr = LR, momentum=0.9)
+   model.train()
    model, optimizer = ipex.optimize(model, optimizer=optimizer)
-   
-   for images, label in train_loader():
-       # Setting memory_format to torch.channels_last could improve performance with 4D input data. This is optional.
-       images = images.to(memory_format=torch.channels_last)
-       loss = criterion(model(images), label)
+
+   for batch_idx, (data, target) in enumerate(train_loader):
+       optimizer.zero_grad()
+       output = model(data)
+       loss = criterion(output, target)
        loss.backward()
        optimizer.step()
-   torch.save(model.state_dict(), PATH)
-   torch.save(optimizer.state_dict(), PATH)
+       print(batch_idx)
+   torch.save({
+        'model_state_dict': model.state_dict(),
+        'optimizer_state_dict': optimizer.state_dict(),
+        }, 'checkpoint.pth')
 
 BFloat16
 ^^^^^^^^
@@ -115,33 +144,281 @@ BFloat16
 .. code:: python3
 
    import torch
-   import torch.nn as nn
-   # Import intel_extension_for_pytorch
+   import torchvision
    import intel_extension_for_pytorch as ipex
-   
-   class Model(nn.Module):
-       def __init__(self):
-           super(Model, self).__init__()
-           self.linear = nn.Linear(4, 5)
-   
-       def forward(self, input):
-           return self.linear(input)
-   
-   model = Model()
-   model.set_state_dict(torch.load(PATH))
-   optimizer.set_state_dict(torch.load(PATH))
-   # Invoke optimize function against the model object and optimizer object with data type set to torch.bfloat16
+
+   LR = 0.001
+   DOWNLOAD = True
+   DATA = 'datasets/cifar10/'
+
+   transform = torchvision.transforms.Compose([
+       torchvision.transforms.Resize((224, 224)),
+       torchvision.transforms.ToTensor(),
+       torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+   ])
+   train_dataset = torchvision.datasets.CIFAR10(
+           root=DATA,
+           train=True,
+           transform=transform,
+           download=DOWNLOAD,
+   )
+   train_loader = torch.utils.data.DataLoader(
+           dataset=train_dataset,
+           batch_size=128
+   )
+
+   model = torchvision.models.resnet50()
+   criterion = torch.nn.CrossEntropyLoss()
+   optimizer = torch.optim.SGD(model.parameters(), lr = LR, momentum=0.9)
+   model.train()
    model, optimizer = ipex.optimize(model, optimizer=optimizer, dtype=torch.bfloat16)
-   
-   for images, label in train_loader():
+
+   for batch_idx, (data, target) in enumerate(train_loader):
+       optimizer.zero_grad()
        with torch.cpu.amp.autocast():
-           # Setting memory_format to torch.channels_last could improve performance with 4D input data. This is optional.
-           images = images.to(memory_format=torch.channels_last)
-           loss = criterion(model(images), label)
+           output = model(data)
+           loss = criterion(output, target)
+           loss.backward()
+       optimizer.step()
+       print(batch_idx)
+   torch.save({
+        'model_state_dict': model.state_dict(),
+        'optimizer_state_dict': optimizer.state_dict(),
+        }, 'checkpoint.pth')
+
+Inference - Imperative Mode
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Float32
+^^^^^^^
+
+.. code:: python3
+
+   import torch
+   import torchvision.models as models
+
+   model = models.resnet50(pretrained=True)
+   model.eval()
+   data = torch.rand(1, 3, 224, 224)
+
+   #################### code changes ####################
+   import intel_extension_for_pytorch as ipex
+   model = ipex.optimize(model)
+   ######################################################
+
+   with torch.no_grad():
+     model(data)
+
+BFloat16
+^^^^^^^^
+
+.. code:: python3
+
+   import torch
+   from transformers import BertModel
+
+   model = BertModel.from_pretrained(args.model_name)
+   model.eval()
+
+   vocab_size = model.config.vocab_size
+   batch_size = 1
+   seq_length = 512
+   data = torch.randint(vocab_size, size=[batch_size, seq_length])
+
+   #################### code changes ####################
+   import intel_extension_for_pytorch as ipex
+   model = ipex.optimize(model, dtype=torch.bfloat16)
+   ######################################################
+
+   with torch.no_grad():
+     with torch.cpu.amp.autocast():
+       model(data)
+
+Inference - TorchScript Mode
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+TorchScript mode makes graph optimization possible, hence improves
+performance for some topologies. Intel® Extension for PyTorch* enables most
+commonly used operator pattern fusion, and users can get the performance
+benefit without additional code changes.
+
+Float32
+^^^^^^^
+
+.. code:: python3
+
+   import torch
+   import torchvision.models as models
+
+   model = models.resnet50(pretrained=True)
+   model.eval()
+   data = torch.rand(1, 3, 224, 224)
+
+   #################### code changes ####################
+   import intel_extension_for_pytorch as ipex
+   model = ipex.optimize(model)
+   ######################################################
+
+   with torch.no_grad():
+     d = torch.rand(1, 3, 224, 224)
+     model = torch.jit.trace(model, d)
+     model = torch.jit.freeze(model)
+
+     model(data)
+
+BFloat16
+^^^^^^^^
+
+.. code:: python3
+
+   import torch
+   from transformers import BertModel
+
+   model = BertModel.from_pretrained(args.model_name)
+   model.eval()
+
+   vocab_size = model.config.vocab_size
+   batch_size = 1
+   seq_length = 512
+   data = torch.randint(vocab_size, size=[batch_size, seq_length])
+
+   #################### code changes ####################
+   import intel_extension_for_pytorch as ipex
+   model = ipex.optimize(model, dtype=torch.bfloat16)
+   ######################################################
+
+   with torch.no_grad():
+     with torch.cpu.amp.autocast():
+       d = torch.randint(vocab_size, size=[batch_size, seq_length])
+       model = torch.jit.trace(model, (d,), check_trace=False, strict=False)
+       model = torch.jit.freeze(model)
+
+       model(data)
+
+Examples -- GPU
+---------------
+
+This section shows examples of training and inference on GPU with Intel®
+Extension for PyTorch*
+
+The code changes that are required for Intel® Extension for PyTorch* are
+highlighted with comments in a line above.
+
+Training
+~~~~~~~~
+
+Float32
+^^^^^^^
+
+.. code:: python3
+
+   import torch
+   import torchvision
+   ############# code changes ###############
+   import intel_extension_for_pytorch as ipex
+   ############# code changes ###############
+
+   LR = 0.001
+   DOWNLOAD = True
+   DATA = 'datasets/cifar10/'
+
+   transform = torchvision.transforms.Compose([
+       torchvision.transforms.Resize((224, 224)),
+       torchvision.transforms.ToTensor(),
+       torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+   ])
+   train_dataset = torchvision.datasets.CIFAR10(
+           root=DATA,
+           train=True,
+           transform=transform,
+           download=DOWNLOAD,
+   )
+   train_loader = torch.utils.data.DataLoader(
+           dataset=train_dataset,
+           batch_size=128
+   )
+
+   model = torchvision.models.resnet50()
+   criterion = torch.nn.CrossEntropyLoss()
+   optimizer = torch.optim.SGD(model.parameters(), lr = LR, momentum=0.9)
+   model.train()
+   #################################### code changes ################################
+   model = model.to("xpu")
+   model, optimizer = ipex.optimize(model, optimizer=optimizer, dtype=torch.float32)
+   #################################### code changes ################################
+
+   for batch_idx, (data, target) in enumerate(train_loader):
+       ########## code changes ##########
+       data = data.to("xpu")
+       target = target.to("xpu")
+       ########## code changes ##########
+       optimizer.zero_grad()
+       output = model(data)
+       loss = criterion(output, target)
+       loss.backward()
+       optimizer.step()
+       print(batch_idx)
+   torch.save({
+        'model_state_dict': model.state_dict(),
+        'optimizer_state_dict': optimizer.state_dict(),
+        }, 'checkpoint.pth')
+
+BFloat16
+^^^^^^^^
+
+.. code:: python3
+
+   import torch
+   import torchvision
+   ############# code changes ###############
+   import intel_extension_for_pytorch as ipex
+   ############# code changes ###############
+
+   LR = 0.001
+   DOWNLOAD = True
+   DATA = 'datasets/cifar10/'
+
+   transform = torchvision.transforms.Compose([
+       torchvision.transforms.Resize((224, 224)),
+       torchvision.transforms.ToTensor(),
+       torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+   ])
+   train_dataset = torchvision.datasets.CIFAR10(
+           root=DATA,
+           train=True,
+           transform=transform,
+           download=DOWNLOAD,
+   )
+   train_loader = torch.utils.data.DataLoader(
+           dataset=train_dataset,
+           batch_size=128
+   )
+
+   model = torchvision.models.resnet50()
+   criterion = torch.nn.CrossEntropyLoss()
+   optimizer = torch.optim.SGD(model.parameters(), lr = LR, momentum=0.9)
+   model.train()
+   ##################################### code changes ################################
+   model = model.to("xpu")
+   model, optimizer = ipex.optimize(model, optimizer=optimizer, dtype=torch.bfloat16)
+   ##################################### code changes ################################
+
+   for batch_idx, (data, target) in enumerate(train_loader):
+       optimizer.zero_grad()
+       ######################### code changes #########################
+       data = data.to("xpu")
+       target = target.to("xpu")
+       with torch.xpu.amp.autocast(enabled=True, dtype=torch.bfloat16):
+       ######################### code changes #########################
+           output = model(data)
+           loss = criterion(output, target)
        loss.backward()
        optimizer.step()
-   torch.save(model.state_dict(), PATH)
-   torch.save(optimizer.state_dict(), PATH)
+       print(batch_idx)
+   torch.save({
+        'model_state_dict': model.state_dict(),
+        'optimizer_state_dict': optimizer.state_dict(),
+        }, 'checkpoint.pth')
 
 Inference - Imperative Mode
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -152,24 +429,26 @@ Float32
 .. code:: python3
 
    import torch
-   import torch.nn as nn
-   # Import intel_extension_for_pytorch
+   import torchvision.models as models
+   ############# code changes ###############
    import intel_extension_for_pytorch as ipex
-   
-   class Model(nn.Module):
-       def __init__(self):
-           super(Model, self).__init__()
-           self.linear = nn.Linear(4, 5)
-   
-       def forward(self, input):
-           return self.linear(input)
-   
-   input = torch.randn(2, 4)
-   model = Model()
+   ############# code changes ###############
+
+   model = models.resnet50(pretrained=True)
    model.eval()
-   # Invoke optimize function against the model object
+   data = torch.rand(1, 3, 224, 224)
+
+   model = model.to(memory_format=torch.channels_last)
+   data = data.to(memory_format=torch.channels_last)
+
+   #################### code changes ################
+   model = model.to("xpu")
+   data = data.to("xpu")
    model = ipex.optimize(model, dtype=torch.float32)
-   res = model(input)
+   #################### code changes ################
+
+   with torch.no_grad():
+     model(data)
 
 BFloat16
 ^^^^^^^^
@@ -177,26 +456,60 @@ BFloat16
 .. code:: python3
 
    import torch
-   import torch.nn as nn
-   # Import intel_extension_for_pytorch
+   import torchvision.models as models
+   ############# code changes ###############
    import intel_extension_for_pytorch as ipex
-   
-   class Model(nn.Module):
-       def __init__(self):
-           super(Model, self).__init__()
-           self.linear = nn.Linear(4, 5)
-   
-       def forward(self, input):
-           return self.linear(input)
-   
-   input = torch.randn(2, 4)
-   model = Model()
+   ############# code changes ###############
+
+   model = models.resnet50(pretrained=True)
    model.eval()
-   # Invoke optimize function against the model object with data type set to torch.bfloat16
+   data = torch.rand(1, 3, 224, 224)
+
+   model = model.to(memory_format=torch.channels_last)
+   data = data.to(memory_format=torch.channels_last)
+
+   #################### code changes #################
+   model = model.to("xpu")
+   data = data.to("xpu")
    model = ipex.optimize(model, dtype=torch.bfloat16)
-   with torch.cpu.amp.autocast():
-       res = model(input)
- 
+   #################### code changes #################
+
+   with torch.no_grad():
+     ################################# code changes ######################################
+     with torch.xpu.amp.autocast(enabled=True, dtype=torch.bfloat16, cache_enabled=False):
+     ################################# code changes ######################################
+       model(data)
+
+Float16
+^^^^^^^
+
+.. code:: python3
+
+   import torch
+   import torchvision.models as models
+   ############# code changes ###############
+   import intel_extension_for_pytorch as ipex
+   ############# code changes ###############
+
+   model = models.resnet50(pretrained=True)
+   model.eval()
+   data = torch.rand(1, 3, 224, 224)
+
+   model = model.to(memory_format=torch.channels_last)
+   data = data.to(memory_format=torch.channels_last)
+
+   #################### code changes ################
+   model = model.to("xpu")
+   data = data.to("xpu")
+   model = ipex.optimize(model, dtype=torch.float16)
+   #################### code changes ################
+
+   with torch.no_grad():
+     ################################# code changes ######################################
+     with torch.xpu.amp.autocast(enabled=True, dtype=torch.float16, cache_enabled=False):
+     ################################# code changes ######################################
+       model(data)
+
 Inference - TorchScript Mode
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -211,29 +524,34 @@ Float32
 .. code:: python3
 
    import torch
-   import torch.nn as nn
-   # Import intel_extension_for_pytorch
+   from transformers import BertModel
+   ############# code changes ###############
    import intel_extension_for_pytorch as ipex
-   
-   # oneDNN graph fusion is enabled by default, uncomment the line below to disable it explicitly
-   # ipex.enable_onednn_fusion(False)
-   
-   class Model(nn.Module):
-       def __init__(self):
-           super(Model, self).__init__()
-           self.linear = nn.Linear(4, 5)
-   
-       def forward(self, input):
-           return self.linear(input)
-   
-   input = torch.randn(2, 4)
-   model = Model()
+   ############# code changes ###############
+
+   model = BertModel.from_pretrained(args.model_name)
    model.eval()
-   # Invoke optimize function against the model object
+
+   vocab_size = model.config.vocab_size
+   batch_size = 1
+   seq_length = 512
+   data = torch.randint(vocab_size, size=[batch_size, seq_length])
+
+   #################### code changes ################
+   model = model.to("xpu")
+   data = data.to("xpu")
    model = ipex.optimize(model, dtype=torch.float32)
-   model = torch.jit.trace(model, torch.randn(2, 4))
-   model = torch.jit.freeze(model)
-   res = model(input)
+   #################### code changes ################
+
+   with torch.no_grad():
+     d = torch.randint(vocab_size, size=[batch_size, seq_length])
+     ##### code changes #####
+     d = d.to("xpu")
+     ##### code changes #####
+     model = torch.jit.trace(model, (d,), check_trace=False, strict=False)
+     model = torch.jit.freeze(model)
+
+     model(data)
 
 BFloat16
 ^^^^^^^^
@@ -241,33 +559,74 @@ BFloat16
 .. code:: python3
 
    import torch
-   import torch.nn as nn
-   # Import intel_extension_for_pytorch
+   from transformers import BertModel
+   ############# code changes ###############
    import intel_extension_for_pytorch as ipex
-   
-   # oneDNN graph fusion is enabled by default, uncomment the line below to disable it explicitly
-   # ipex.enable_onednn_fusion(False)
-   
-   class Model(nn.Module):
-       def __init__(self):
-           super(Model, self).__init__()
-           self.linear = nn.Linear(4, 5)
-   
-       def forward(self, input):
-           return self.linear(input)
-   
-   input = torch.randn(2, 4)
-   model = Model()
+   ############# code changes ###############
+
+   model = BertModel.from_pretrained(args.model_name)
    model.eval()
-   # Invoke optimize function against the model with data type set to torch.bfloat16
+
+   vocab_size = model.config.vocab_size
+   batch_size = 1
+   seq_length = 512
+   data = torch.randint(vocab_size, size=[batch_size, seq_length])
+
+   #################### code changes #################
+   model = model.to("xpu")
+   data = data.to("xpu")
    model = ipex.optimize(model, dtype=torch.bfloat16)
-   with torch.cpu.amp.autocast():
-       model = torch.jit.trace(model, torch.randn(2, 4))
+   #################### code changes #################
+
+   with torch.no_grad():
+     d = torch.randint(vocab_size, size=[batch_size, seq_length])
+     ################################# code changes ######################################
+     d = d.to("xpu")
+     with torch.xpu.amp.autocast(enabled=True, dtype=torch.bfloat16, cache_enabled=False):
+     ################################# code changes ######################################
+       model = torch.jit.trace(model, (d,), check_trace=False, strict=False)
        model = torch.jit.freeze(model)
-       res = model(input)
 
-C++
-~~~
+       model(data)
+
+Float16
+^^^^^^^
+
+.. code:: python3
+
+   import torch
+   from transformers import BertModel
+   ############# code changes ###############
+   import intel_extension_for_pytorch as ipex
+   ############# code changes ###############
+
+   model = BertModel.from_pretrained(args.model_name)
+   model.eval()
+
+   vocab_size = model.config.vocab_size
+   batch_size = 1
+   seq_length = 512
+   data = torch.randint(vocab_size, size=[batch_size, seq_length])
+
+   #################### code changes ################
+   model = model.to("xpu")
+   data = data.to("xpu")
+   model = ipex.optimize(model, dtype=torch.float16)
+   #################### code changes ################
+
+   with torch.no_grad():
+     d = torch.randint(vocab_size, size=[batch_size, seq_length])
+     ################################# code changes ######################################
+     d = d.to("xpu")
+     with torch.xpu.amp.autocast(enabled=True, dtype=torch.float16, cache_enabled=False):
+     ################################# code changes ######################################
+       model = torch.jit.trace(model, (d,), check_trace=False, strict=False)
+       model = torch.jit.freeze(model)
+
+       model(data)
+
+C++ (CPU only)
+~~~~~~~~~~~~~~
 
 To work with libtorch, C++ library of PyTorch, Intel® Extension for PyTorch*
 provides its C++ dynamic library as well. The C++ library is supposed to handle
@@ -275,7 +634,7 @@ inference workload only, such as service deployment. For regular development,
 please use Python interface. Comparing to usage of libtorch, no specific code
 changes are required, except for converting input data into channels last data
 format. Compilation follows the recommended methodology with CMake. Detailed
-instructions can be found in `PyTorch tutorial <https://pytorch.org/tutorials/advanced/cpp_export.html#depending-on-libtorch-and-building-the-application>`_.
+instructions can be found in `PyTorch tutorial <https://tutorials.pytorch.kr/advanced/cpp_export.html#depending-on-libtorch-and-building-the-application>`_.
 During compilation, Intel optimizations will be activated automatically
 once C++ dynamic library of Intel® Extension for PyTorch* is linked.
 
@@ -286,7 +645,7 @@ once C++ dynamic library of Intel® Extension for PyTorch* is linked.
    #include <torch/script.h>
    #include <iostream>
    #include <memory>
-   
+
    int main(int argc, const char* argv[]) {
        torch::jit::script::Module module;
        try {
@@ -299,41 +658,92 @@ once C++ dynamic library of Intel® Extension for PyTorch* is linked.
        std::vector<torch::jit::IValue> inputs;
        // make sure input data are converted to channels last format
        inputs.push_back(torch::ones({1, 3, 224, 224}).to(c10::MemoryFormat::ChannelsLast));
-   
+
        at::Tensor output = module.forward(inputs).toTensor();
-   
+
        return 0;
    }
 
-**CMakeList.txt**
+**CMakeLists.txt**
 
 ::
 
    cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
    project(example-app)
-   
-   find_package(Torch REQUIRED)
-   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS} -Wl,--no-as-needed")
-   
+
+   find_package(intel_ext_pt_cpu REQUIRED)
+
    add_executable(example-app example-app.cpp)
-   # Link the binary against the C++ dynamic library file of Intel® Extension for PyTorch*
-   target_link_libraries(example-app "${TORCH_LIBRARIES}" "${INTEL_EXTENSION_FOR_PYTORCH_PATH}/lib/libintel-ext-pt-cpu.so")
+   target_link_libraries(example-app "${TORCH_LIBRARIES}")
 
    set_property(TARGET example-app PROPERTY CXX_STANDARD 14)
 
-**Note:** Since Intel® Extension for PyTorch* is still under development, name of
-the c++ dynamic library in the master branch may defer to
-*libintel-ext-pt-cpu.so* shown above. Please check the name out in the
-installation folder. The so file name starts with *libintel-*.
-
 **Command for compilation**
 
 ::
 
-   $ cmake -DCMAKE_PREFIX_PATH=<LIBPYTORCH_PATH> -DINTEL_EXTENSION_FOR_PYTORCH_PATH=<INTEL_EXTENSION_FOR_PYTORCH_INSTALLATION_PATH> ..
+   $ cmake -DCMAKE_PREFIX_PATH=<LIBPYTORCH_PATH> ..
    $ make
 
+If `Found INTEL_EXT_PT_CPU` is shown as `TRUE`, the extension had been linked
+into the binary. This can be verified with the Linux command `ldd`.
+
+::
+
+   $ cmake -DCMAKE_PREFIX_PATH=/workspace/libtorch ..
+   -- The C compiler identification is GNU 9.3.0
+   -- The CXX compiler identification is GNU 9.3.0
+   -- Check for working C compiler: /usr/bin/cc
+   -- Check for working C compiler: /usr/bin/cc -- works
+   -- Detecting C compiler ABI info
+   -- Detecting C compiler ABI info - done
+   -- Detecting C compile features
+   -- Detecting C compile features - done
+   -- Check for working CXX compiler: /usr/bin/c++
+   -- Check for working CXX compiler: /usr/bin/c++ -- works
+   -- Detecting CXX compiler ABI info
+   -- Detecting CXX compiler ABI info - done
+   -- Detecting CXX compile features
+   -- Detecting CXX compile features - done
+   -- Looking for pthread.h
+   -- Looking for pthread.h - found
+   -- Performing Test CMAKE_HAVE_LIBC_PTHREAD
+   -- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Failed
+   -- Looking for pthread_create in pthreads
+   -- Looking for pthread_create in pthreads - not found
+   -- Looking for pthread_create in pthread
+   -- Looking for pthread_create in pthread - found
+   -- Found Threads: TRUE
+   -- Found Torch: /workspace/libtorch/lib/libtorch.so
+   -- Found INTEL_EXT_PT_CPU: TRUE
+   -- Configuring done
+   -- Generating done
+   -- Build files have been written to: /workspace/build
+
+   $ ldd example-app
+           ...
+           libtorch.so => /workspace/libtorch/lib/libtorch.so (0x00007f3cf98e0000)
+           libc10.so => /workspace/libtorch/lib/libc10.so (0x00007f3cf985a000)
+           libintel-ext-pt-cpu.so => /workspace/libtorch/lib/libintel-ext-pt-cpu.so (0x00007f3cf70fc000)
+           libtorch_cpu.so => /workspace/libtorch/lib/libtorch_cpu.so (0x00007f3ce16ac000)
+           ...
+           libdnnl_graph.so.0 => /workspace/libtorch/lib/libdnnl_graph.so.0 (0x00007f3cde954000)
+           ...
+
+Model Zoo (CPU only)
+--------------------
+
+Use cases that had already been optimized by Intel engineers are available at
+`Model Zoo for Intel® Architecture <https://github.com/IntelAI/models/>`_ (with
+the branch name in format of `pytorch-r<version>-models`). Many PyTorch use
+cases for benchmarking are also available on the GitHub page. You can get
+performance benefits out-of-the-box by simply running scripts in the Model Zoo.
+
 Tutorials
 ---------
 
-Please visit `Intel® Extension for PyTorch* Github repo <https://github.com/intel/intel-extension-for-pytorch>`_ for more tutorials.
+More detailed tutorials are available in the official Intel® Extension
+for PyTorch* Documentation:
+
+- `CPU <https://intel.github.io/intel-extension-for-pytorch/cpu/latest/>`_
+- `GPU <https://intel.github.io/intel-extension-for-pytorch/xpu/latest/>`_
diff --git a/recipes_source/mobile_perf.rst b/recipes_source/mobile_perf.rst
index 962e7e9d9..9b071fed2 100644
--- a/recipes_source/mobile_perf.rst
+++ b/recipes_source/mobile_perf.rst
@@ -2,7 +2,8 @@ PyTorch 모바일 성능 레시피
 =========================
 
 소개
-----
+------
+
 전부는 아니지만, 모바일 기기에서의 애플리케이션과 ML 모델 추론 사용 사례에
 성능(지연시간)은 매우 중대한 사항입니다.
 
@@ -16,14 +17,14 @@ CPU 백엔드에서 모델을 실행합니다.
 
 
 모델 준비
---------
+----------
 
 모바일 기기에서 실행 시간을 줄이는데 도움이 될(성능은 높이고, 지연시간은 줄이는)
 모델의 최적화를 위한 준비부터 시작합니다.
 
 
 설정
-^^^^
+^^^^^^
 
 첫번째로 적어도 버전이 1.5.0 이상인 PyTorch를 conda나 pip으로 설치합니다.
 
@@ -69,7 +70,7 @@ CPU 백엔드에서 모델을 실행합니다.
 
 
 1. ``torch.quantization.fuse_modules`` 이용하여 연산자 결합(fuse)하기
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 fuse_modules은 양자화 패키지 내부에 있다는 것을 혼동하지 마십시오.
 fuse_modules은 모든 ``torch.nn.Module`` 에서 동작합니다.
@@ -90,7 +91,7 @@ fuse_modules은 모든 ``torch.nn.Module`` 에서 동작합니다.
 
 
 2. 모델 양자화하기
-^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^
 
 PyTorch 양자화에 대한 내용은
 `the dedicated tutorial <https://pytorch.org/blog/introduction-to-quantization-on-pytorch/>`_ 에서 찾을 수 있습니다.
@@ -115,7 +116,7 @@ PyTorch 양자화에 대한 내용은
 
 
 3. torch.utils.mobile_optimizer 사용하기
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Torch mobile_optimizer 패키지는 스크립트된 모델을 이용해서 몇 가지 최적화를 수행하고,
 이러한 최적화는 conv2d와 선형 연산에 도움이 됩니다.
@@ -152,7 +153,7 @@ Channels Last(NHWC) 메모리 형식은 PyTorch 1.4.0에서 도입되었습니
 이 변환은 입력이 Channels Last 메모리 형식이면 비용이 들지 않습니다. 결국에는 모든 연산자가 Channels Last 메모리 형식을 유지하면서 작업을 합니다.
 
 5. Android - 순방향 전달을 위한 텐서 재사용하기
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 레시피에서 이 부분은 Android에만 해당합니다.
 
@@ -199,8 +200,73 @@ Channels Last(NHWC) 메모리 형식은 PyTorch 1.4.0에서 도입되었습니
 멤버 변수 ``mModule`` , ``mInputTensorBuffer`` , ``mInputTensor`` 는 단 한 번 초기화를 하고
 버퍼는 ``org.pytorch.torchvision.TensorImageUtils.imageYUV420CenterCropToFloatBuffer`` 를 이용해서 다시 채워집니다.
 
+6. 로딩 시간 최적화
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+**PyTorch 1.13 이상부터 사용 가능**
+
+파이토치 모바일은 로딩 속도가 더 빠른 FlatBuffer(플랫버퍼) 기반 파일 형식도 지원합니다.
+FlatBuffer와 Pickle(피클) 기반 모델 파일은 모두 동일한 ``_load_for_lite_interpreter`` (Python)
+또는 ``_load_for_mobile`` (C++) API로 불러올 수 있습니다.
+
+FlatBuffer 형식을 사용하려면 ``model._save_for_lite_interpreter('path/to/file.ptl')`` 식으로
+모델 파일을 생성하는 대신, 다음 명령을 실행하면 됩니다:
+
+다음을 사용하여 저장
+
+::
+
+  model._save_for_lite_interpreter('path/to/file.ptl', _use_flatbuffer=True)
+
+인수 ``_use_flatbuffer`` 를 추가로 사용하여 zip 파일 대신 FlatBuffer 파일을
+만듭니다. 이렇게 생성된 파일은 불러오는 속도가 더 빨라집니다.
+
+예를 들어 ResNet-50을 사용하고 다음 스크립트를 실행합니다:
+
+::
+
+  import torch
+  from torch.jit import mobile
+  import time
+  model = torch.hub.load('pytorch/vision:v0.10.0', 'deeplabv3_resnet50', pretrained=True)
+  model.eval()
+  jit_model = torch.jit.script(model)
+
+  jit_model._save_for_lite_interpreter('/tmp/jit_model.ptl')
+  jit_model._save_for_lite_interpreter('/tmp/jit_model.ff', _use_flatbuffer=True)
+
+  import timeit
+  print('Load ptl file:')
+  print(timeit.timeit('from torch.jit import mobile; mobile._load_for_lite_interpreter("/tmp/jit_model.ptl")',
+                         number=20))
+  print('Load flatbuffer file:')
+  print(timeit.timeit('from torch.jit import mobile; mobile._load_for_lite_interpreter("/tmp/jit_model.ff")',
+                         number=20))
+
+
+
+다음과 같은 결과를 얻을 수 있습니다:
+
+::
+
+  Load ptl file:
+  0.5387594579999999
+  Load flatbuffer file:
+  0.038842832999999466
+
+실제 모바일 기기에서는 속도 향상 폭이 더 작겠지만, 그럼에도 로딩 시간이
+3배에서 6배까지 단축되는 효과를 기대할 수 있습니다.
+
+### FlatBuffer 기반 모바일 모델을 사용하지 않는 이유
+
+그러나, FlatBuffer 형식에는 고려해야 할 몇 가지 제한 사항이 있습니다:
+
+* PyTorch 1.13 이상에서만 사용할 수 있습니다. 따라서, 이전 버전의 PyTorch로 컴파일된
+  클라이언트 장치에서는 불러오지 못할 수 있습니다.
+* Flatbuffer 라이브러리는 파일 크기에 대해 4GB의 제한을 두고 있습니다. 따라서
+  대용량 모델에는 적합하지 않습니다.
+
 벤치마킹
--------
+----------
 
 벤치마킹(최적화가 사용 사례에 도움이 되었는지 확인)하는 최고의 방법은 최적화를 하고 싶은 특정한 사용 사례를 측정하는 것입니다. 성능 측정 행위가 환경에 따라 달라질 수 있기 때문입니다.
 
@@ -209,7 +275,7 @@ PyTorch 배포판은 모델 순방향 전달을 실행하는 방식을 사용해
 
 
 Android - 벤치마킹 설정
-^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^
 
 레시피에서 이 부분은 Android에만 해당합니다.
 
@@ -246,7 +312,7 @@ speedbenchark_torch 바이너리와 모델을 폰으로 푸시합니다:
 
 
 iOS - 벤치마킹 설정
-^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^
 
 iOS의 경우 , 벤치마킹의 도구로 `TestApp <https://github.com/pytorch/pytorch/tree/master/ios/TestApp>`_ 을 사용합니다.
 
diff --git a/recipes_source/quantization.rst b/recipes_source/quantization.rst
index 7d5febba9..e8ccaf495 100644
--- a/recipes_source/quantization.rst
+++ b/recipes_source/quantization.rst
@@ -1,12 +1,12 @@
 양자화 레시피
-============
+================
 
 이 레시피는 Pytorch 모델을 양자화하는 방법을 설명합니다. 양자화된 모델은 원본 모델과 거의 같은 정확도를 내면서, 사이즈가 줄어들고 추론 속도가 빨라집니다. 양자화 작업은 서버 모델과 모바일 모델 배포에 모두 적용될 수 있지만, 모바일 환경에서 특히 중요하고 매우 필요합니다. 그 이유는 양자화를 적용하지 않은 모델의 크기가 iOS나 Android 앱이 허용하는 크기 한도를 초과하고, 그로 인해 모델의 배포나 OTA 업데이트가 너무 오래 걸리며, 또한 추론 속도가 너무 느려서 사용자의 쾌적함을 방해하기 때문입니다.
 
 소개
-----
+-------
 
-양자화는 모델 매개변수를 구성하는 32비트 크기의 실수 자료형의 숫자를 8비트 크기의 정수 자료형의 숫자로 전환하는 기법입니다. 양자화 기법을 적용하면, 정확도는 거의 같게 유지하면서, 모델의 크기와 메모리 전체 사용량을 원본 모델의 4분의 1까지 감소시킬 수 있고, 추론은 2~4배 정도 빠르게 만들 수 있습니다. 
+양자화는 모델 매개변수를 구성하는 32비트 크기의 실수 자료형의 숫자를 8비트 크기의 정수 자료형의 숫자로 전환하는 기법입니다. 양자화 기법을 적용하면, 정확도는 거의 같게 유지하면서, 모델의 크기와 메모리 전체 사용량을 원본 모델의 4분의 1까지 감소시킬 수 있고, 추론은 2~4배 정도 빠르게 만들 수 있습니다.
 
 모델을 양자화하는 데는 전부 세 가지의 접근법 및 작업방식이 있습니다. 학습 후 동적 양자화(post training dynamic quantization), 학습 후 정적 양자화(post training static quantization), 그리고 양자화를 고려한 학습(quantization aware training)이 있습니다. 하지만 사용하려는 모델이 이미 양자화된 버전이 있다면, 위의 세 가지 방식을 거치지 않고 그 버전을 바로 사용하면 됩니다. 예를 들어, `torchvision` 라이브러리에는 이미 MobileNet v2, ResNet 18, ResNet 50, Inception v3, GoogleNet을 포함한 모델의 양자화된 버전이 존재합니다. 따라서 비록 단순한 작업이겠지만, 사전 학습 및 양자화된 모델 사용(use pretrained quantized model)을 또 다른 작업 방식 중 하나로 포함하려 합니다.
 
@@ -14,19 +14,19 @@
     양자화는 일부 제한된 범위의 연산자에만 지원됩니다. 더 많은 정보는 `여기 <https://pytorch.org/blog/introduction-to-quantization-on-pytorch/#device-and-operator-support>`_ 를 참고하세요.
 
 요구 사항
---------
+-----------
 
 PyTorch 1.6.0 or 1.7.0
 
 torchvision 0.6.0 or 0.7.0
 
 작업 흐름
----------
+------------
 
 모델을 양자화하려면 다음 4가지 방식 중 하나를 사용하세요.
 
 1. 사전 학습 및 양자화된 MobileNet v2 사용하기 (Use Pretrained Quantized MobileNet v2)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 사전 학습된 MobileNet v2 모델을 불러오려면, 다음을 입력하세요.
 
@@ -62,9 +62,9 @@ torchvision 0.6.0 or 0.7.0
     3.63 MB
 
 2. 학습 후 동적 양자화 (Post Training Dynamic Quantization)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-동적 양자화를 적용하면, 모델의 모든 가중치는 32비트 크기의 실수 자료형에서 8비트 크기의 정수 자료형으로 전환되지만, 활성화에 대한 계산을 진행하기 직전까지는 활성 함수는 8비트 정수형으로 전환하지 않게 됩니다. 동적 양자화를 적용하려면, `torch.quantization.quantize_dynamic` 을 사용하면 됩니다. 
+동적 양자화를 적용하면, 모델의 모든 가중치는 32비트 크기의 실수 자료형에서 8비트 크기의 정수 자료형으로 전환되지만, 활성화에 대한 계산을 진행하기 직전까지는 활성 함수는 8비트 정수형으로 전환하지 않게 됩니다. 동적 양자화를 적용하려면, `torch.quantization.quantize_dynamic` 을 사용하면 됩니다.
 
 ::
 
@@ -76,10 +76,10 @@ torchvision 0.6.0 or 0.7.0
 
 .. warning:: 동적 양자화는 사전 학습된 양자화 적용 모델이 준비되지 않았을 때 사용하기 가장 쉬운 방식이지만, 이 방식의 주요 한계는 `qconfig_spec` 옵션이 현재는 `nn.Linear` 과 `nn.LSTM` 만 지원한다는 것입니다. 이는 `nn.Conv2d` 같은 다른 모듈을 양자화할 때, 나중에 논의될 정적 양자화나 양자화를 고려한 학습을 사용해야 한다는 걸 의미합니다.
 
-`quantize_dynamic` API call 관련 전체 문서는 `여기 <https://pytorch.org/docs/stable/quantization.html#torch.quantization.quantize_dynamic>`_ 를 참고하세요. 학습 후 동적 양자화를 사용하는 세 가지 예제에는 `the Bert example <https://pytorch.org/tutorials/intermediate/dynamic_quantization_bert_tutorial.html>`_, `an LSTM model example <https://pytorch.org/tutorials/advanced/dynamic_quantization_tutorial.html#test-dynamic-quantization>`_, `demo LSTM example <https://pytorch.org/tutorials/recipes/recipes/dynamic_quantization.html#do-the-quantization>`_ 이 있습니다.
+`quantize_dynamic` API call 관련 전체 문서는 `여기 <https://pytorch.org/docs/stable/quantization.html#torch.quantization.quantize_dynamic>`_ 를 참고하세요. 학습 후 동적 양자화를 사용하는 세 가지 예제에는 `the Bert example <https://tutorials.pytorch.kr/intermediate/dynamic_quantization_bert_tutorial.html>`_, `an LSTM model example <https://tutorials.pytorch.kr/advanced/dynamic_quantization_tutorial.html#test-dynamic-quantization>`_, `demo LSTM example <https://tutorials.pytorch.kr/recipes/recipes/dynamic_quantization.html#do-the-quantization>`_ 이 있습니다.
 
 3. 학습 후 정적 양자화 (Post Training Static Quantization)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 이 방식은 모델의 가중치와 활성 함수 모두를 8비트 크기의 정수 자료형으로 사전에 바꾸기 때문에, 동적 양자화처럼 추론 과정 중에 활성 함수를 전환하지는 않습니다. 따라서 이 방식은 성능이 뛰어납니다.
 
@@ -98,10 +98,10 @@ torchvision 0.6.0 or 0.7.0
 모델의 전체 정의와 정적 양자화의 예제는 `여기 <https://pytorch.org/docs/stable/quantization.html#quantization-api-summary>`_ 에서 확인하세요. 특수한 정적 양자화 튜토리얼은 `여기 <https://tutorials.pytorch.kr/advanced/static_quantization_tutorial.html>`_ 에서 확인하세요.
 
 .. note::
-   모바일 장비는 일반적으로 ARM 아키텍처를 탑재하는데 여기서 모델이 작동하게 하려면, `qnnpack` 을 `backend` 로 사용해야 합니다. 이와 달리 x86 아키텍처를 탑재한 컴퓨터에서 모델이 작동하게 하려면, `fbgemm` 을 `backend` 로 사용하세요.
+   모바일 장비는 일반적으로 ARM 아키텍처를 탑재하는데 여기서 모델이 작동하게 하려면, `qnnpack` 을 `backend` 로 사용해야 합니다. 이와 달리 x86 아키텍처를 탑재한 컴퓨터에서 모델이 작동하게 하려면, `x86` 을 `backend` 로 사용하세요. (이전의 'fbgemm' 또한 여전히 사용 가능하지만, 'x86'을 기본으로 사용하는 것을 권장합니다.)
 
 4. 양자화를 고려한 학습 (Quantization Aware Training)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 양자화를 고려한 학습은 모델 학습 과정에서 모든 가중치와 활성 함수에 가짜 양자화를 삽입하게 되고, 학습 후 양자화하는 방법보다 높은 추론 정확도를 가집니다. 이는 주로 CNN 모델에 사용됩니다.
 
@@ -130,6 +130,6 @@ torchvision 0.6.0 or 0.7.0
 위의 단계 중 하나를 이용해 양자화된 모델이 생성된 후에, 모바일 장치에서 작동되게 하려면 추가로 `TorchScript` 형식으로 전환하고 모바일 app에 최적화를 진행해야 합니다. 자세한 내용은 `Script and Optimize for Mobile recipe <script_optimized.html>`_ 를 확인하세요.
 
 더 알아보기
-----------
+--------------
 
 다른 양자화 적용법에 대한 추가 정보는 `여기 <https://pytorch.org/docs/stable/quantization.html#quantization-workflows>`_ 와 `여기 <https://pytorch.org/blog/introduction-to-quantization-on-pytorch/#post-training-static-quantization>`_ 를 참고하세요.
diff --git a/recipes_source/recipes/amp_recipe.py b/recipes_source/recipes/amp_recipe.py
index 86b278d35..f6c46de11 100644
--- a/recipes_source/recipes/amp_recipe.py
+++ b/recipes_source/recipes/amp_recipe.py
@@ -11,7 +11,7 @@
 range of ``float32``.  Mixed precision tries to match each op to its appropriate datatype,
 which can reduce your network's runtime and memory footprint.
 
-Ordinarily, "automatic mixed precision training" uses `torch.autocast <https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.autocast>`_ and
+Ordinarily, "automatic mixed precision training" uses `torch.autocast <https://pytorch.org/docs/stable/amp.html#torch.autocast>`_ and
 `torch.cuda.amp.GradScaler <https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler>`_ together.
 
 This recipe measures the performance of a simple network in default precision,
@@ -19,7 +19,7 @@
 mixed precision with improved performance.
 
 You may download and run this recipe as a standalone Python script.
-The only requirements are Pytorch 1.6+ and a CUDA-capable GPU.
+The only requirements are PyTorch 1.6 or later and a CUDA-capable GPU.
 
 Mixed precision primarily benefits Tensor Core-enabled architectures (Volta, Turing, Ampere).
 This recipe should show significant (2-3X) speedup on those architectures.
@@ -105,7 +105,7 @@ def make_model(in_size, out_size, num_layers):
 ##########################################################
 # Adding autocast
 # ---------------
-# Instances of `torch.cuda.amp.autocast <https://pytorch.org/docs/stable/amp.html#autocasting>`_
+# Instances of `torch.autocast <https://pytorch.org/docs/stable/amp.html#autocasting>`_
 # serve as context managers that allow regions of your script to run in mixed precision.
 #
 # In these regions, CUDA ops run in a dtype chosen by autocast
@@ -276,7 +276,7 @@ def make_model(in_size, out_size, num_layers):
 # a dedicated fresh GradScaler instance.  GradScaler instances are lightweight.
 #
 # If you're registering a custom C++ op with the dispatcher, see the
-# `autocast section <https://pytorch.org/tutorials/advanced/dispatcher.html#autocast>`_
+# `autocast section <https://tutorials.pytorch.kr/advanced/dispatcher.html#autocast>`_
 # of the dispatcher tutorial.
 
 ##########################################################
@@ -310,7 +310,7 @@ def make_model(in_size, out_size, num_layers):
 # 1. Disable ``autocast`` or ``GradScaler`` individually (by passing ``enabled=False`` to their constructor) and see if infs/NaNs persist.
 # 2. If you suspect part of your network (e.g., a complicated loss function) overflows , run that forward region in ``float32``
 #    and see if infs/NaNs persist.
-#    `The autocast docstring <https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.autocast>`_'s last code snippet
+#    `The autocast docstring <https://pytorch.org/docs/stable/amp.html#torch.autocast>`_'s last code snippet
 #    shows forcing a subregion to run in ``float32`` (by locally disabling autocast and casting the subregion's inputs).
 #
 # Type mismatch error (may manifest as CUDNN_STATUS_BAD_PARAM)
diff --git a/recipes_source/recipes/changing_default_device.py b/recipes_source/recipes/changing_default_device.py
new file mode 100644
index 000000000..103560fd7
--- /dev/null
+++ b/recipes_source/recipes/changing_default_device.py
@@ -0,0 +1,50 @@
+"""
+Changing default device
+=======================
+
+It is common practice to write PyTorch code in a device-agnostic way,
+and then switch between CPU and CUDA depending on what hardware is available.
+Typically, to do this you might have used if-statements and ``cuda()`` calls
+to do this:
+
+.. note::
+   This recipe requires PyTorch 2.0.0 or later.
+
+"""
+import torch
+
+USE_CUDA = False
+
+mod = torch.nn.Linear(20, 30)
+if USE_CUDA:
+    mod.cuda()
+
+device = 'cpu'
+if USE_CUDA:
+    device = 'cuda'
+inp = torch.randn(128, 20, device=device)
+print(mod(inp).device)
+
+###################################################################
+# PyTorch now also has a context manager which can take care of the
+# device transfer automatically. Here is an example:
+
+with torch.device('cuda'):
+    mod = torch.nn.Linear(20, 30)
+    print(mod.weight.device)
+    print(mod(torch.randn(128, 20)).device)
+
+#########################################
+# You can also set it globally like this: 
+
+torch.set_default_device('cuda')
+
+mod = torch.nn.Linear(20, 30)
+print(mod.weight.device)
+print(mod(torch.randn(128, 20)).device)
+
+################################################################
+# This function imposes a slight performance cost on every Python
+# call to the torch API (not just factory functions). If this
+# is causing problems for you, please comment on
+# `this issue <https://github.com/pytorch/pytorch/issues/92701>`__
diff --git a/recipes_source/recipes/reasoning_about_shapes.py b/recipes_source/recipes/reasoning_about_shapes.py
new file mode 100644
index 000000000..12c85dcb4
--- /dev/null
+++ b/recipes_source/recipes/reasoning_about_shapes.py
@@ -0,0 +1,88 @@
+"""
+Reasoning about Shapes in PyTorch
+=================================
+
+When writing models with PyTorch, it is commonly the case that the parameters
+to a given layer depend on the shape of the output of the previous layer. For
+example, the ``in_features`` of an ``nn.Linear`` layer must match the
+``size(-1)`` of the input. For some layers, the shape computation involves
+complex equations, for example convolution operations.
+
+One way around this is to run the forward pass with random inputs, but this is
+wasteful in terms of memory and compute.
+
+Instead, we can make use of the ``meta`` device to determine the output shapes
+of a layer without materializing any data.
+"""
+
+import torch
+import timeit
+
+t = torch.rand(2, 3, 10, 10, device="meta")
+conv = torch.nn.Conv2d(3, 5, 2, device="meta")
+start = timeit.default_timer()
+out = conv(t)
+end = timeit.default_timer()
+
+print(out)
+print(f"Time taken: {end-start}")
+
+
+##########################################################################
+# Observe that since data is not materialized, passing arbitrarily large
+# inputs will not significantly alter the time taken for shape computation.
+
+t_large = torch.rand(2**10, 3, 2**16, 2**16, device="meta")
+start = timeit.default_timer()
+out = conv(t_large)
+end = timeit.default_timer()
+
+print(out)
+print(f"Time taken: {end-start}")
+
+
+######################################################
+# Consider an arbitrary network such as the following:
+
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class Net(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.conv1 = nn.Conv2d(3, 6, 5)
+        self.pool = nn.MaxPool2d(2, 2)
+        self.conv2 = nn.Conv2d(6, 16, 5)
+        self.fc1 = nn.Linear(16 * 5 * 5, 120)
+        self.fc2 = nn.Linear(120, 84)
+        self.fc3 = nn.Linear(84, 10)
+
+    def forward(self, x):
+        x = self.pool(F.relu(self.conv1(x)))
+        x = self.pool(F.relu(self.conv2(x)))
+        x = torch.flatten(x, 1) # flatten all dimensions except batch
+        x = F.relu(self.fc1(x))
+        x = F.relu(self.fc2(x))
+        x = self.fc3(x)
+        return x
+
+
+###############################################################################
+# We can view the intermediate shapes within an entire network by registering a
+# forward hook to each layer that prints the shape of the output.
+
+def fw_hook(module, input, output):
+    print(f"Shape of output to {module} is {output.shape}.")
+
+
+# Any tensor created within this torch.device context manager will be
+# on the meta device.
+with torch.device("meta"):
+    net = Net()
+    inp = torch.randn((1024, 3, 32, 32))
+
+for name, layer in net.named_modules():
+    layer.register_forward_hook(fw_hook)
+
+out = net(inp)
diff --git a/recipes_source/recipes/saving_and_loading_a_general_checkpoint.py b/recipes_source/recipes/saving_and_loading_a_general_checkpoint.py
index 2222edfec..1754f983d 100644
--- a/recipes_source/recipes/saving_and_loading_a_general_checkpoint.py
+++ b/recipes_source/recipes/saving_and_loading_a_general_checkpoint.py
@@ -125,7 +125,7 @@ def forward(self, x):
 #
 
 model = Net()
-optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
+optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
 
 checkpoint = torch.load(PATH)
 model.load_state_dict(checkpoint['model_state_dict'])
diff --git a/recipes_source/recipes/tuning_guide.py b/recipes_source/recipes/tuning_guide.py
index 86d0d4cf4..d5c882f21 100644
--- a/recipes_source/recipes/tuning_guide.py
+++ b/recipes_source/recipes/tuning_guide.py
@@ -137,9 +137,9 @@ def fused_gelu(x):
 # Support for ``channels_last`` is experimental, but it's expected to work for
 # standard computer vision models (e.g. ResNet-50, SSD). To convert models to
 # ``channels_last`` format follow
-# `Channels Last Memory Format Tutorial <https://pytorch.org/tutorials/intermediate/memory_format_tutorial.html>`_.
+# `Channels Last Memory Format Tutorial <https://tutorials.pytorch.kr/intermediate/memory_format_tutorial.html>`_.
 # The tutorial includes a section on
-# `converting existing models <https://pytorch.org/tutorials/intermediate/memory_format_tutorial.html#converting-existing-models>`_.
+# `converting existing models <https://tutorials.pytorch.kr/intermediate/memory_format_tutorial.html#converting-existing-models>`_.
 
 ###############################################################################
 # Checkpoint intermediate buffers
@@ -240,7 +240,7 @@ def fused_gelu(x):
 # Use oneDNN Graph with TorchScript for inference
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # oneDNN Graph can significantly boost inference performance. It fuses some compute-intensive operations such as convolution, matmul with their neighbor operations.
-# Currently, it's supported as an experimental feature for Float32 data-type.
+# In PyTorch 2.0, it is supported as a beta feature for Float32 & BFloat16 data-types.
 # oneDNN Graph receives the model’s graph and identifies candidates for operator-fusion with respect to the shape of the example input.
 # A model should be JIT-traced using an example input.
 # Speed-up would then be observed after a couple of warm-up iterations for inputs with the same shape as the example input.
@@ -250,7 +250,7 @@ def fused_gelu(x):
 torch.jit.enable_onednn_fusion(True)
 
 ###############################################################################
-# Using the oneDNN Graph API requires just one extra line of code.
+# Using the oneDNN Graph API requires just one extra line of code for inference with Float32.
 # If you are using oneDNN Graph, please avoid calling ``torch.jit.optimize_for_inference``.
 
 # sample input should be of the same shape as expected inputs
@@ -273,6 +273,24 @@ def fused_gelu(x):
     # speedup would be observed after warmup runs
     traced_model(*sample_input)
 
+###############################################################################
+# While the JIT fuser for oneDNN Graph also supports inference with BFloat16 datatype,
+# performance benefit with oneDNN Graph is only exhibited by machines with AVX512_BF16 ISA.
+# The following code snippets serves as an example of using BFloat16 datatype for inference with oneDNN Graph:
+
+# AMP for JIT mode is enabled by default, and is divergent with its eager mode counterpart
+torch._C._jit_set_autocast_mode(False)
+
+with torch.no_grad(), torch.cpu.amp.autocast(cache_enabled=False, dtype=torch.bfloat16):
+    model = torch.jit.trace(model, (example_input))
+    model = torch.jit.freeze(model)
+    # a couple of warmup runs
+    model(example_input)
+    model(example_input)
+    # speedup would be observed in subsequent runs.
+    model(example_input)
+
+
 ###############################################################################
 # Train a model on CPU with PyTorch DistributedDataParallel(DDP) functionality
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -363,7 +381,7 @@ def fused_gelu(x):
 #   * native PyTorch AMP is available starting from PyTorch 1.6:
 #     `documentation <https://pytorch.org/docs/stable/amp.html>`_,
 #     `examples <https://pytorch.org/docs/stable/notes/amp_examples.html#amp-examples>`_,
-#     `tutorial <https://pytorch.org/tutorials/recipes/recipes/amp_recipe.html>`_
+#     `tutorial <https://tutorials.pytorch.kr/recipes/recipes/amp_recipe.html>`_
 #
 #
 
diff --git a/recipes_source/recipes_index.rst b/recipes_source/recipes_index.rst
index 88f81dc99..890455cc3 100644
--- a/recipes_source/recipes_index.rst
+++ b/recipes_source/recipes_index.rst
@@ -108,7 +108,7 @@ Recipes are bite-sized bite-sized, actionable examples of how to use specific Py
    :tags: Basics
 
 .. customcarditem::
-   :header: Pytorch 프로파일러
+   :header: PyTorch 프로파일러
    :card_description: PyTorch의 프로파일러를 사용하여 연산 시간과 메모리 소비량을 측정하는 방법을 알아봅니다.
    :image: ../_static/img/thumbnails/cropped/profiler.png
    :link: ../recipes/recipes/profiler_recipe.html
@@ -121,10 +121,17 @@ Recipes are bite-sized bite-sized, actionable examples of how to use specific Py
    :link: ../recipes/profile_with_itt.html
    :tags: Basics
 
+.. customcarditem::
+   :header: Reasoning about Shapes in PyTorch
+   :card_description: Learn how to use the meta device to reason about shapes in your model.
+   :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png
+   :link: ../recipes/recipes/reasoning_about_shapes.html
+   :tags: Basics
+
 .. Customization
 
 .. customcarditem::
-   :header: 사용자 정의 데이터셋, Transforms & Dataloader
+   :header: 사용자 정의 데이터셋, Transforms & DataLoader
    :card_description: PyTorch 데이터셋 API를 이용하여 어떻게 쉽게 사용자 정의 데이터셋과 dataloader를 만드는지 알아봅니다.
    :image: ../_static/img/thumbnails/cropped/custom-datasets-transforms-and-dataloaders.png
    :link: ../recipes/recipes/custom_dataset_transforms_loader.html
diff --git a/recipes_source/script_optimized.rst b/recipes_source/script_optimized.rst
index edae1caa9..5fc92bd16 100644
--- a/recipes_source/script_optimized.rst
+++ b/recipes_source/script_optimized.rst
@@ -194,16 +194,23 @@ The optimized model can then be saved and deployed in mobile apps:
 
     optimized_torchscript_model.save("optimized_torchscript_model.pth")
 
-By default, `optimize_for_mobile` will perform the following types of optimizations:
+By default, for the CPU backend, `optimize_for_mobile` performs the following types of optimizations:
 
-* Conv2D and BatchNorm fusion which folds Conv2d-BatchNorm2d into Conv2d;
+* `Conv2D and BatchNorm fusion` which folds Conv2d-BatchNorm2d into Conv2d;
 
-* Insert and fold prepacked ops which rewrites the model graph to replace 2D convolutions and linear ops with their prepacked counterparts.
+* `Insert and fold prepacked ops` which rewrites the model graph to replace 2D convolutions and linear ops with their prepacked counterparts.
 
-* ReLU and hardtanh fusion which rewrites graph by finding ReLU/hardtanh ops and fuses them together.
+* `ReLU and hardtanh fusion` which rewrites graph by finding ReLU/hardtanh ops and fuses them together.
 
-* Dropout removal which removes dropout nodes from this module when training is false.
+* `Dropout removal` which removes dropout nodes from this module when training is false.
 
+* `Conv packed params hoisting` which moves convolution packed params to the root module, so that the convolution structs can be deleted. This decreases model size without impacting numerics.
+
+For the Vulkan backend,`optimize_for_mobile` performs the following type of optimization:
+
+* `Automatic GPU transfer` which rewrites the graph so that moving input and output data to and from the GPU becomes part of the model.
+
+Optimization types can be disabled by passing an optimization blocklist as an argument to `optimize_for_mobile`.
 
 Learn More
 -----------------
diff --git a/requirements.txt b/requirements.txt
index f055dc25a..13bdbd43d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,9 @@
 # requirements file for building whole tutorials
 # use `make docs`
 
+# --extra-index-url https://download.pytorch.org/whl/cu117/index.html
+# Use this to run/publish tutorials against the latest binaries during the RC stage. Comment out after the release. Each release verify the correct cuda version.
+
 # Refer to ./jenkins/build.sh for tutorial build instructions
 
 sphinx==5.0.0
@@ -16,6 +19,7 @@ plotly==5.14.0
 tqdm
 numpy
 matplotlib
+librosa
 torch
 torchvision
 torchtext
@@ -33,6 +37,7 @@ tensorboard
 jinja2==3.0.3
 pytorch-lightning
 torchx
+torchrl
 ax-platform
 nbformat>=4.2.0
 datasets
@@ -57,8 +62,9 @@ scikit-image
 scipy
 pillow==9.3.0
 wget
-gym==0.25.1
 gym-super-mario-bros==7.4.0
+pyopengl
+gymnasium[mujoco]==0.27.0
 timm
 iopath
 pygame==2.1.2