From 23df384d431c82731c74dd03661f460dc6b91808 Mon Sep 17 00:00:00 2001
From: Mrityunjay Raj <mr.raj.earth@gmail.com>
Date: Mon, 2 Feb 2026 18:03:53 +0530
Subject: [PATCH 1/2] Fix spelling mistakes in documentation and source code
 comments

Fix ~219 spelling errors across 92 files including documentation (.rst, .md),
source code comments/docstrings, and corresponding test fixtures.

Key corrections: compatibility, maintenance, dependency, environment,
comparison, simultaneously, unnecessary, bootstrapping, and many others.

No code logic changes - only human-readable text in comments, docstrings,
and documentation. Test fixtures updated where source string changes
affect serialized output.

Identified using codespell with project-specific ignore list.

Fixes https://github.com/aboutcode-org/scancode-toolkit/issues/4719

Signed-off-by: Mrityunjay Raj <mr.raj.earth@gmail.com>
---
 .github/ISSUE_TEMPLATE/--feature-request.md   |  2 +-
 .github/PULL_REQUEST_TEMPLATE.md              |  2 +-
 AUTHORS.rst                                   |  1 +
 CHANGELOG.rst                                 | 40 +++++++++----------
 README.rst                                    |  6 +--
 ROADMAP-ABOUTCODE.rst                         |  6 +--
 .../scancode-license-detection-updates.rst    |  6 +--
 .../contribute/contributing-code.rst          | 12 +++---
 .../contribute/contributing-docs.rst          |  6 +--
 .../getting-started/contribute/roadmap.rst    |  4 +-
 .../install-scancode-from-source.rst          |  2 +-
 .../how-to-add-new-license-detection-rule.rst |  2 +-
 .../how-to-generate-attribution-docs.rst      |  2 +-
 .../how-to-install-new-license-plugin.rst     |  2 +-
 .../scancode-cli/cli-basic-options.rst        |  4 +-
 .../scancode-cli/cli-core-options.rst         |  2 +-
 .../scancode-cli/cli-help-text-options.rst    |  2 +-
 .../scancode-cli/cli-post-scan-options.rst    |  2 +-
 .../reference/scancode-supported-packages.rst |  2 +-
 .../rst-snippets/cli-post-scan-options.rst    |  2 +-
 .../note-snippets/cli-output-samples.rst      |  2 +-
 .../note-snippets/cli-post-scan-plugins.rst   |  2 +-
 .../tutorials/adding-a-post-scan-plugin.rst   |  2 +-
 etc/ci/macports-ci                            |  2 +-
 etc/release/scancode_release_tests.py         |  2 +-
 etc/scripts/gen_pypi_simple.py                |  2 +-
 etc/scripts/gen_requirements.py               |  2 +-
 etc/scripts/gen_requirements_dev.py           |  2 +-
 etc/scripts/licenses/buildrules.py            |  2 +-
 etc/scripts/licenses/synclic.py               |  8 ++--
 etc/scripts/utils_dejacode.py                 |  2 +-
 etc/scripts/utils_requirements.py             |  2 +-
 etc/scripts/utils_thirdparty.py               | 12 +++---
 src/cluecode/copyrights.py                    | 20 +++++-----
 src/formattedcode/output_csv.py               |  4 +-
 src/formattedcode/output_cyclonedx.py         |  2 +-
 src/formattedcode/output_html.py              |  2 +-
 src/licensedcode/README.rst                   |  4 +-
 .../additional_license_location_provider.py   |  4 +-
 src/licensedcode/cache.py                     |  6 +--
 src/licensedcode/detection.py                 | 14 +++----
 src/licensedcode/index.py                     |  4 +-
 src/licensedcode/licenses_reference.py        |  4 +-
 src/licensedcode/match.py                     | 18 ++++-----
 src/licensedcode/match_seq.py                 |  2 +-
 src/licensedcode/match_set.py                 |  4 +-
 src/licensedcode/match_spdx_lid.py            | 16 ++++----
 src/licensedcode/match_unknown.py             |  2 +-
 src/licensedcode/models.py                    |  8 ++--
 src/licensedcode/plugin_license.py            |  2 +-
 src/licensedcode/plugin_license_policy.py     |  2 +-
 src/licensedcode/query.py                     | 16 ++++----
 src/licensedcode/required_phrases.py          |  6 +--
 src/licensedcode/stopwords.py                 |  2 +-
 src/licensedcode/tokenize.py                  |  4 +-
 src/packagedcode/README.rst                   |  2 +-
 src/packagedcode/alpine.py                    |  8 ++--
 src/packagedcode/cache.py                     |  4 +-
 src/packagedcode/cargo.py                     |  2 +-
 src/packagedcode/chef.py                      |  2 +-
 src/packagedcode/conda.py                     |  2 +-
 src/packagedcode/debian.py                    |  6 +--
 src/packagedcode/debian_copyright.py          |  4 +-
 src/packagedcode/freebsd.py                   |  2 +-
 src/packagedcode/golang.py                    |  2 +-
 src/packagedcode/jar_manifest.py              |  2 +-
 src/packagedcode/licensing.py                 |  2 +-
 src/packagedcode/maven.py                     |  4 +-
 src/packagedcode/misc.py                      |  2 +-
 src/packagedcode/models.py                    |  4 +-
 src/packagedcode/npm.py                       |  2 +-
 src/packagedcode/pubspec.py                   |  4 +-
 src/packagedcode/pypi.py                      |  4 +-
 src/packagedcode/pyrpm.py                     |  6 +--
 src/packagedcode/recognize.py                 |  2 +-
 src/packagedcode/rpm.py                       |  2 +-
 src/packagedcode/rpm_installed.py             |  6 +--
 src/packagedcode/rubygems.py                  |  4 +-
 src/packagedcode/spec.py                      |  4 +-
 src/scancode/cli.py                           |  6 +--
 src/scancode_config.py                        |  6 +--
 src/summarycode/classify.py                   |  2 +-
 src/summarycode/copyright_tallies.py          |  4 +-
 src/summarycode/facet.py                      |  2 +-
 src/summarycode/summarizer.py                 |  2 +-
 src/summarycode/tallies.py                    | 12 +++---
 src/summarycode/todo.py                       |  8 ++--
 src/textcode/analysis.py                      |  2 +-
 src/textcode/gibberish.py                     |  4 +-
 src/textcode/strings.py                       |  8 ++--
 tests/packagedcode/data/plugin/help.txt       |  2 +-
 .../data/plugin/plugins_list_linux.txt        |  2 +-
 ...ti-orig-tarball-package-expected-diag.json |  2 +-
 93 files changed, 220 insertions(+), 219 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/--feature-request.md b/.github/ISSUE_TEMPLATE/--feature-request.md
index 5663c1c3b80..5d4370ce803 100644
--- a/.github/ISSUE_TEMPLATE/--feature-request.md
+++ b/.github/ISSUE_TEMPLATE/--feature-request.md
@@ -7,7 +7,7 @@ assignees: ''
 
 ---
 
-<!-- Please fill out as much of the template as you can. Delete sections if unneccesary -->
+<!-- Please fill out as much of the template as you can. Delete sections if unnecessary -->
 <!-- 
 Make Sure you've checked out 
     Existing Issues, 
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 91291b65a52..b73d2f77351 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -1,4 +1,4 @@
-<!-- Delete Template sections if unneccesary -->
+<!-- Delete Template sections if unnecessary -->
 <!-- Add issue number here (We encourage you to create the Issue First) -->
 <!-- You can also link the issue in Commit Messages -->
 
diff --git a/AUTHORS.rst b/AUTHORS.rst
index 75b0533f921..b81d5e9dd21 100644
--- a/AUTHORS.rst
+++ b/AUTHORS.rst
@@ -54,6 +54,7 @@ The following organizations or individuals have contributed to ScanCode:
 - Michael Rupprecht @michaelrup
 - Mike Rombout @mrombout
 - Mrinal Paliwal @mnpw
+- Mrityunjay Raj @mr-raj12
 - nexB Inc. @nexB
 - Nirmal Sarswat @vivonk
 - Nisha Kumar @nishakm
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index d115cda4b80..d1d5da36341 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -98,7 +98,7 @@ This is a patch release with bugfixes and improvements::
   improvements by removing unnecessary regex substitutions.
   https://github.com/aboutcode-org/scancode-toolkit/issues/4063
 
-- Fix misc scancode doumentation related issues
+- Fix misc scancode documentation related issues
   https://github.com/aboutcode-org/scancode-toolkit/pull/4457
   https://github.com/aboutcode-org/scancode-toolkit/pull/4458
   https://github.com/aboutcode-org/scancode-toolkit/pull/4462
@@ -127,7 +127,7 @@ from ``4.0.0`` to ``4.1.0``. The changes in Output Data Structure are:
     directories. This is returned optionally with the ``--info`` plugin.
 
   - A new resource level attribute ``is_community`` is added, which is
-    True from commonly used files used for community/project maintainence.
+    True from commonly used files used for community/project maintenance.
     This is returned optionally with the ``--classify`` plugin.
 
 These are the details for the most important changes introduced::
@@ -340,7 +340,7 @@ v32.2.1 - 2024-07-02
 - Add support for poetry packages, with poetry specific pyproject.toml
   support, poetry.lock and package assembly support. Also add support
   for parsing and storing resolved packages and dependency relationships
-  required to statically resolve poetry dependecy graphs.
+  required to statically resolve poetry dependency graphs.
   See https://github.com/nexB/scancode-toolkit/issues/2109
 
 - Add support for pyproject.toml files in python projects.
@@ -378,7 +378,7 @@ v32.2.0 - 2024-06-19
   - Fix a bug in dependency parsing (we were not returning any dependencies)
   - Also support getting dependency versions from workspace
   - Support more attributes from cargo
-  - Better handle workspace data thorugh extra_data attribute
+  - Better handle workspace data through extra_data attribute
   See https://github.com/nexB/scancode-toolkit/pull/3783
 
 - We now support parsing the Swift manifest JSON dump and the
@@ -455,7 +455,7 @@ Changes in Output Data Structure:
     - in package ``license_detections`` and ``other_license_detections``
     - ``matches`` for ``license_detections`` everywhere
 
-  - Adds all rule atrribute level info in codebase level ``todo``
+  - Adds all rule attribute level info in codebase level ``todo``
     data, to assist in review. This includes length, text, notes,
     referenced_filenames, and the boolean attributes (like
     is_license_notice, is_license_intro etc, as applicable).
@@ -815,7 +815,7 @@ License detection:
   license ``key`` of a matched license expression. We now report instead one
   single match for each matched license expression, and list the license keys
   as a ``licenses`` attribute. This avoids data duplication.
-  Inside each match, we list each match and matched rule attributred directly
+  Inside each match, we list each match and matched rule attributed directly
   avoiding nesting. See `license updates doc <https://scancode-toolkit.readthedocs.io/en/latest/reference/license-detection-reference.html#licensematch-result-data>`_
   for examples and details.
 
@@ -841,7 +841,7 @@ License detection:
   - There is an ``--additional-directory`` option with the ``scancode-reindex-licenses``
     command to add the licenses from a directory.
 
-  - There is also a ``--only-builtin`` option to use ony builtin licenses
+  - There is also a ``--only-builtin`` option to use only builtin licenses
     ignoring any additional license plugins.
 
   - See https://github.com/nexB/scancode-toolkit/issues/480 for more details.
@@ -887,7 +887,7 @@ v31.2.6 - 2023-04-25
 
 This is a minor hotfix release.
 
-This fix a crash when parsing a .deb Dbeian package filename
+This fix a crash when parsing a .deb Debian package filename
 reported in https://github.com/nexB/scancode-toolkit/issues/3259
 
 
@@ -1005,7 +1005,7 @@ Important API changes:
     instances that can be aggregating data from multiple manifests.
 
   - There is a a new top-level "dependencies" attribute that contains each
-    dependency instance, these can be standalone or releated to a package.
+    dependency instance, these can be standalone or related to a package.
     These contain a new "extra_data" object.
 
   - There is a new resource-level attribute "for_packages" which refers to
@@ -1035,9 +1035,9 @@ Important API changes:
 - ``DatafileHandler.assemble()``, ``DatafileHandler.assemble_from_many()``, and
   the other ``.assemble()`` methods from the other Package handlers from
   packagedcode, have been updated to yield Package items before Dependency or
-  Resource items. This is particulary important in the case where we are calling
+  Resource items. This is particularly important in the case where we are calling
   the ``assemble()`` method outside of the scancode-toolkit context, where we
-  need to ensure that a Package exists before we assocate a Resource or
+  need to ensure that a Package exists before we associate a Resource or
   Dependency to it.
 
 Copyright detection:
@@ -1373,7 +1373,7 @@ v30.0.0 - 2021-09-23
 This is a major release with new features, and several bug fixes and
 improvements including major updates to the license detection.
 
-We have droped using calendar-based versions and are now switched back to semver
+We have dropped using calendar-based versions and are now switched back to semver
 versioning. To ensure that there is no ambiguity, the new major version has been
 updated from 21 to 30. The primary reason is that calver was not helping
 integrators to track major version changes like semver does.
@@ -1940,7 +1940,7 @@ v3.2.0rc1 (2020-09-08)
  - Improve license detection #1999 - Bryan Sutula
  - Correct CC0 license #1984 - Carmen Bianca Bakker
  - Add documentation for the usage of `cpp_includes` plugin - Chin Yeung Li
- - Improve andling of npm package-lock.json #1993 - Chin Yeung Li
+ - Improve handling of npm package-lock.json #1993 - Chin Yeung Li
  - Add new license detection rules - Gaupeng
  - Improve documentation - Issei Horie
  - Improve consolidation plugin - Jono Yang @JonoYang
@@ -2027,7 +2027,7 @@ Other features and fixes:
 v3.1.0 (2019-08-12)
 -------------------
 
- - Add partial suport for Python 3.6+ #295 @Abhishek-Dev09
+ - Add partial support for Python 3.6+ #295 @Abhishek-Dev09
  - Add plugin to collect dwarf references #1167 @licodeli
  - Add fingerprint plugin #1651 @arnav-mandal1234
  - Add summary and consolidation plugin #1673
@@ -2208,8 +2208,8 @@ v2.9.6 (2018-10-25)
 
  - Add declared license normalization #1092
  - Add new and improved license rules
- - Add mising and clean up ABOUT files for all embedded third-party libraries
- - Improve npm package.json handling (better keuword support)
+ - Add missing and clean up ABOUT files for all embedded third-party libraries
+ - Improve npm package.json handling (better keyword support)
  - Update thirdparty libraries #1224
 
 Credits: Many thanks to everyone that contributed to this release with code and bug reports
@@ -2288,7 +2288,7 @@ Misc:
  - Add facet, classification and summarizer plugins #357
  - Fix file counts #1055
  - Fix corrupted license cache error
- - Upgrade all thridparty libraries #1070
+ - Upgrade all third party libraries #1070
  - De-vendor prebuilt binaries to ease packaging for Linux distros #469
 
 Credits: Many thanks to everyone that contributed to this release with code and bug reports
@@ -2429,8 +2429,8 @@ Licenses:
    words compared to the number of words in the matched rule.
  - The license cache is not checked anymore for consistency once created which
    improved startup times. (unless you are using a Git checkout and you are
-   developping with a SCANCODE_DEV_MODE tag file present)
- - License catagory names have been improved
+   developing with a SCANCODE_DEV_MODE tag file present)
+ - License category names have been improved
 
 Copyrights:
  - Copyright detection in binary files has been improved
@@ -2453,7 +2453,7 @@ Misc:
  - Move essential configuration to a scancode_config.py module
  - Only read a few pages from PDF files by default
  - Improve handling of files with weird characters in their names on all OSses
- - Improve detection of archive vs. comrpessed files
+ - Improve detection of archive vs. compressed files
  - Make all copyright tests data driven using YAML files like for license tests
 
 
diff --git a/README.rst b/README.rst
index c35b142d8a2..cfa3df9065a 100644
--- a/README.rst
+++ b/README.rst
@@ -31,7 +31,7 @@ Build and tests status
 ======================
 
 We run 30,000+ tests on each commit on multiple CIs to ensure a good platform
-compabitility with multiple versions of Windows, Linux and macOS.
+compatibility with multiple versions of Windows, Linux and macOS.
 
 +------------+--------------+-------------------------+----------------------------+
 | **Azure**  | **RTD Build**| **GitHub actions Docs** | **GitHub actions Release** |
@@ -64,7 +64,7 @@ Benefits of ScanCode
 
 - ScanCode detects licenses, copyrights, package manifests, direct dependencies,
   and more both in **source code** and **binary** files and is considered as the
-  best-in-class and reference tool in this domain, re-used as the core tools for
+  best-in-class and reference tool in this domain, reused as the core tools for
   software composition data collection by several open source tools.
 
 - ScanCode provides the **most accurate license detection engine** and does a
@@ -92,7 +92,7 @@ If you have a specific problem, suggestion or bug, please submit a
 
 For quick questions or socializing, join the AboutCode community discussions on `Slack <https://join.slack.com/t/aboutcode-org/shared_invite/zt-3li3bfs78-mmtKG0Qhv~G2dSlNCZW2pA>`_.
 
-Interested in commercial suppport? Contact the `AboutCode team <mailto:hello@aboutcode.org>`_.
+Interested in commercial support? Contact the `AboutCode team <mailto:hello@aboutcode.org>`_.
 
 License
 =======
diff --git a/ROADMAP-ABOUTCODE.rst b/ROADMAP-ABOUTCODE.rst
index be4800281ef..900403875e9 100644
--- a/ROADMAP-ABOUTCODE.rst
+++ b/ROADMAP-ABOUTCODE.rst
@@ -94,7 +94,7 @@ License detection quality improvements
 Improve package detection
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-- Create synthethic, private packages from non-packaged files based on license and copyright
+- Create synthetic, private packages from non-packaged files based on license and copyright
 - Create simplified purl-only lightweight package detection
 - Evolve model for dependencies towards requirements and true dependencies
 - Track private non-published packages
@@ -142,7 +142,7 @@ PURLDB: PurlDB
 - MatchCode matching engine
 
   - embed a SCIO with a matching pipeline for match a whole codebase at once
-    - expore new endpoint for matching whole codebase
+    - explore new endpoint for matching whole codebase
   - support multiple SCIO workers for indexing
   - implement proper ranking of matched code results
   - refactor directory matching to be a pre-matching step to file matching
@@ -162,7 +162,7 @@ VCIO: VulnerableCode.io
 PURL: purl and vers specs
 --------------------------
 
-- Merge and advertize vers spec.
+- Merge and advertise vers spec.
 - Standardize purl with ECMA
 
 
diff --git a/docs/source/explanation/scancode-license-detection-updates.rst b/docs/source/explanation/scancode-license-detection-updates.rst
index 88715aec887..e1824b1aea0 100644
--- a/docs/source/explanation/scancode-license-detection-updates.rst
+++ b/docs/source/explanation/scancode-license-detection-updates.rst
@@ -381,7 +381,7 @@ report here). We are now just reporting a flat mapping here, and all the
 rule details are also not present in the license match, and only available
 as an optional reference.
 
-See this before/after comparision to see how the license data in results has
+See this before/after comparison to see how the license data in results has
 evolved.
 
 Before::
@@ -536,12 +536,12 @@ This is now default with the CLI option ``--license``, which references from
 the match License-level Data and LicenseDB-level Data, and removes the actual data from
 the matches, and adds them to two top-level lists.
 
-Comparision: Before/After license references
+Comparison: Before/After license references
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 To compare how the license output data changes between when license references are not collected
 vs when they are collected (which is default from version 32.x), check out the before/after
-comparision below.
+comparison below.
 
 Before::
 
diff --git a/docs/source/getting-started/contribute/contributing-code.rst b/docs/source/getting-started/contribute/contributing-code.rst
index fcb6f84f4f5..273b4a5b234 100644
--- a/docs/source/getting-started/contribute/contributing-code.rst
+++ b/docs/source/getting-started/contribute/contributing-code.rst
@@ -8,7 +8,7 @@ TL;DR:
 - Contributions comes as bugs/questions/issues and as pull requests.
 - Source code and runtime data are in the /src/ directory.
 - Test code and test data are in the /tests/ directory.
-- Datasets (inluding licenses) and test data are in /data/ sub-directories.
+- Datasets (including licenses) and test data are in /data/ sub-directories.
 - We use DCO signoff in commit messages, like Linux does.
 - Porting ScanCode to other OS (FreeBSD is supported, etc.) is possible. Enter an issue for help.
 
@@ -62,7 +62,7 @@ Running tests
 -------------
 
 ScanCode comes with over 29,000 unit tests to ensure detection accuracy and
-stability across Linux, Windows and macOS OSes: we kinda love tests, do we?
+stability across Linux, Windows and macOS OSes: we kind of love tests, do we?
 
 We use pytest to run the tests: call the ``pytest`` script to run the whole
 test suite. This is installed with the ``pytest`` package which is installed
@@ -99,7 +99,7 @@ Another useful option after a test run with some failures is to re-run only the
 failed tests with the ``--lf`` option, for instance: ``pytest -vvs --lf`` would
 only run only test functions that failed in the previous run.
 
-Because we have a lot of tests (over 29,000), we organized theses in test suites
+Because we have a lot of tests (over 29,000), we organized these in test suites
 using pytest markers that are defined in the ``conftest.py`` pytest plugin.
 These are enabled by adding a ``--test-suite`` option to the pytest command.
 
@@ -109,8 +109,8 @@ These are enabled by adding a ``--test-suite`` option to the pytest command.
   extensive data-driven and data validations (for package, copyright and license
   detection)
 
-In some cases we need to regenerate test data when expected behavious/result data
-structures change, and we have an environement variable to regenerate test data.
+In some cases we need to regenerate test data when expected behaviour/result data
+structures change, and we have an environment variable to regenerate test data.
 `SCANCODE_REGEN_TEST_FIXTURES` is present in `scancode_config` and this can be
 set to regenerate test data for specific tests like this:
 
@@ -124,7 +124,7 @@ If test data is regenerated, it is important to review the diff for test files a
 carefully go through all of it to make sure there are no unintended changes there,
 and then commit all the regenerated test data.
 
-To help debug in scancode, we use logging. There are different environement variables
+To help debug in scancode, we use logging. There are different environment variables
 you need to set to turn on logging. In packagedcode::
 
 ``SCANCODE_DEBUG_PACKAGE=yes pytest -vvs tests/packagedcode/ --lf``
diff --git a/docs/source/getting-started/contribute/contributing-docs.rst b/docs/source/getting-started/contribute/contributing-docs.rst
index 49b469cbcb4..09020d66fd0 100644
--- a/docs/source/getting-started/contribute/contributing-docs.rst
+++ b/docs/source/getting-started/contribute/contributing-docs.rst
@@ -128,7 +128,7 @@ AboutCode documentation uses
 `Intersphinx <https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html>`_
 to link to other Sphinx Documentations, to maintain links to other Aboutcode Projects.
 
-To link sections in the same documentation, standart reST labels are used. Refer
+To link sections in the same documentation, standard reST labels are used. Refer
 `Cross-Referencing <https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html>`_
 for more information.
 
@@ -143,7 +143,7 @@ For example::
 
     It refers to the section itself, see :ref:`my-reference-label`.
 
-Now, using Intersphinx, you can create these labels in one Sphinx Documentation and then referance
+Now, using Intersphinx, you can create these labels in one Sphinx Documentation and then reference
 these labels from another Sphinx Documentation, hosted in different locations.
 
 You just have to add the following in the ``conf.py`` file for your Sphinx Documentation, where you
@@ -183,7 +183,7 @@ For more information, refer this tutorial named
 
 .. _contributing-docs-style-conventions:
 
-Style Conventions for the Documentaion
+Style Conventions for the Documentation
 --------------------------------------
 
 1. Headings
diff --git a/docs/source/getting-started/contribute/roadmap.rst b/docs/source/getting-started/contribute/roadmap.rst
index c66dc989a06..1dea5a3ff90 100644
--- a/docs/source/getting-started/contribute/roadmap.rst
+++ b/docs/source/getting-started/contribute/roadmap.rst
@@ -65,7 +65,7 @@ Core features
 ^^^^^^^^^^^^^
 
 - |white_check_mark| pre scan filtering (ignore binaries, etc)
-- |white_check_mark| pre/post/ouput plugins! (worked as part of the GSoC by @yadsharaf )
+- |white_check_mark| pre/post/output plugins! (worked as part of the GSoC by @yadsharaf )
 - |white_check_mark| scan plugins (e.g. plugins that run a scan to collect data)
 - |white_check_mark| support Python 3 #295
 - |clock1030| transparent archive extraction (as opposed to on-demand with extractcode)
@@ -110,7 +110,7 @@ Packaging
 
 - |white_large_square| simpler installation, automated installer
 - |white_check_mark| distro-friendly packaging
-- |clock1030| unbundle and package as multiple libaries (commoncode, extractcode, etc)
+- |clock1030| unbundle and package as multiple libraries (commoncode, extractcode, etc)
 
 Documentation
 ^^^^^^^^^^^^^
diff --git a/docs/source/getting-started/installation/install-scancode-from-source.rst b/docs/source/getting-started/installation/install-scancode-from-source.rst
index 26983b7f112..a60c3e9da2c 100644
--- a/docs/source/getting-started/installation/install-scancode-from-source.rst
+++ b/docs/source/getting-started/installation/install-scancode-from-source.rst
@@ -58,4 +58,4 @@ it is recommended to run the help command.
 
     For use in development, run instead ``configure --dev``. If your encounter
     issues while configuring a previous version, use ``configure --clean`` to
-    clean and reset your enviroment. After that, run ``configure`` again.
+    clean and reset your environment. After that, run ``configure`` again.
diff --git a/docs/source/how-to-guides/how-to-add-new-license-detection-rule.rst b/docs/source/how-to-guides/how-to-add-new-license-detection-rule.rst
index 4b68b412f9e..d6f07b5ac5c 100644
--- a/docs/source/how-to-guides/how-to-add-new-license-detection-rule.rst
+++ b/docs/source/how-to-guides/how-to-add-new-license-detection-rule.rst
@@ -82,5 +82,5 @@ and other options.
 
 .. note::
 
-    Add rules in a local developement installation and run `scancode-reindex-licenses`
+    Add rules in a local development installation and run `scancode-reindex-licenses`
     to make sure we reindex the rules and this validates the new licenses.
diff --git a/docs/source/how-to-guides/how-to-generate-attribution-docs.rst b/docs/source/how-to-guides/how-to-generate-attribution-docs.rst
index 310f3524aff..9a70219db31 100644
--- a/docs/source/how-to-guides/how-to-generate-attribution-docs.rst
+++ b/docs/source/how-to-guides/how-to-generate-attribution-docs.rst
@@ -4,7 +4,7 @@ How to generate attribution doc from a ScanCode scan
 ====================================================
 
 Users can use an Open Source Project "AboutCode Toolkit" to generate
-attrbution document from a ScanCode scan.
+attribution document from a ScanCode scan.
 
 Read more about AboutCode Toolkit here: https://aboutcode-toolkit.readthedocs.io/.
 
diff --git a/docs/source/how-to-guides/how-to-install-new-license-plugin.rst b/docs/source/how-to-guides/how-to-install-new-license-plugin.rst
index f993419f6ff..ab5113f04cd 100644
--- a/docs/source/how-to-guides/how-to-install-new-license-plugin.rst
+++ b/docs/source/how-to-guides/how-to-install-new-license-plugin.rst
@@ -1,6 +1,6 @@
 .. _how-to-install-new-license-plugin:
 
-How to install external licenses to use in license dectection
+How to install external licenses to use in license detection
 =============================================================
 
 Users can install external licenses and rules in the form of:
diff --git a/docs/source/reference/scancode-cli/cli-basic-options.rst b/docs/source/reference/scancode-cli/cli-basic-options.rst
index d200d7bb9a4..274c517e1ca 100644
--- a/docs/source/reference/scancode-cli/cli-basic-options.rst
+++ b/docs/source/reference/scancode-cli/cli-basic-options.rst
@@ -172,7 +172,7 @@ Quick reference
 
     It adds the following attributes to the top-level in results:
 
-    1. ``packages``: This is a mapping of package data with all the atrributes
+    1. ``packages``: This is a mapping of package data with all the attributes
        present in file level ``package_data`` with the following extra attributes:
        ``package_uid``, ``datafile_paths`` and ``datasource_ids``.
 
@@ -409,7 +409,7 @@ Quick reference
 ``--email``
 -----------
 
-    The ``--email`` option detects and reports email adresses present in scanned files.
+    The ``--email`` option detects and reports email addresses present in scanned files.
 
     It adds the ``emails`` attribute to the file data with the following attributes:
     ``email`` with the actual email that was present in the file, ``start_line`` and
diff --git a/docs/source/reference/scancode-cli/cli-core-options.rst b/docs/source/reference/scancode-cli/cli-core-options.rst
index e945d245da0..60af54f93b4 100644
--- a/docs/source/reference/scancode-cli/cli-core-options.rst
+++ b/docs/source/reference/scancode-cli/cli-core-options.rst
@@ -129,7 +129,7 @@ Comparing progress message options
 ``--max_depth INT``
 -------------------
 
-    Normally, the scan takes place upto the maximum level of nesting of directories possible. But
+    Normally, the scan takes place up to the maximum level of nesting of directories possible. But
     using the ``--max-depth`` option, you can specify the maximum level of directories to scan,
     including and below the root location. This can reduce the time taken for the scan when deeper
     directories are not relevant.
diff --git a/docs/source/reference/scancode-cli/cli-help-text-options.rst b/docs/source/reference/scancode-cli/cli-help-text-options.rst
index a3bfb44f776..f571bad35dd 100644
--- a/docs/source/reference/scancode-cli/cli-help-text-options.rst
+++ b/docs/source/reference/scancode-cli/cli-help-text-options.rst
@@ -584,7 +584,7 @@ for ScanCode Version 32.0.0.
       help_group: post-scan, name: license_policy: --license-policy
         help: Load a License Policy file and apply it to the scan at the Resource level.
     doc:
-      Add the "license_policy" attribute to a resouce if it contains a
+      Add the "license_policy" attribute to a resource if it contains a
       detected license key that is found in the license_policy.yml file
 
 
diff --git a/docs/source/reference/scancode-cli/cli-post-scan-options.rst b/docs/source/reference/scancode-cli/cli-post-scan-options.rst
index 690b4d27a22..c568cda1a0a 100644
--- a/docs/source/reference/scancode-cli/cli-post-scan-options.rst
+++ b/docs/source/reference/scancode-cli/cli-post-scan-options.rst
@@ -816,7 +816,7 @@ To see all plugins available via command line help, use ``--plugins``.
 
         scancode -clipeu --json-pp strapi.json strapi-main/ --tallies --facet dev="*.js" --facet dev="*.ts" --tallies-by-facet
 
-    We have used the `github:strapi/strapi <https://github.com/strapi/strapi>`_ project to generate exmaple results for
+    We have used the `github:strapi/strapi <https://github.com/strapi/strapi>`_ project to generate example results for
     this CLI option.
 
     .. include::  /rst-snippets/note-snippets/cli-pre-scan-facet-core.rst
diff --git a/docs/source/reference/scancode-supported-packages.rst b/docs/source/reference/scancode-supported-packages.rst
index d2ad15803f4..8a2f082ebe7 100644
--- a/docs/source/reference/scancode-supported-packages.rst
+++ b/docs/source/reference/scancode-supported-packages.rst
@@ -491,7 +491,7 @@ parsers in scancode-toolkit during documentation builds.
      - ``go_mod``
      - Go
      - https://go.dev/ref/mod
-   * - Go module cheksums file
+   * - Go module checksums file
      - ``*/go.sum``
      - ``golang``
      - ``linux``, ``win``, ``mac``
diff --git a/docs/source/rst-snippets/cli-post-scan-options.rst b/docs/source/rst-snippets/cli-post-scan-options.rst
index a38d90366cd..37b279f378e 100644
--- a/docs/source/rst-snippets/cli-post-scan-options.rst
+++ b/docs/source/rst-snippets/cli-post-scan-options.rst
@@ -54,7 +54,7 @@
 
 --todo                     Identify license and package detections which needs
                            review as there are potential issues with the detections.
-                           Lists all the unique ambigious detections with references
+                           Lists all the unique ambiguous detections with references
                            to the file path and line numbers, detection details and
                            review comments to help explain the detection issue.
 
diff --git a/docs/source/rst-snippets/note-snippets/cli-output-samples.rst b/docs/source/rst-snippets/note-snippets/cli-output-samples.rst
index 62375486c7a..98b42d33137 100644
--- a/docs/source/rst-snippets/note-snippets/cli-output-samples.rst
+++ b/docs/source/rst-snippets/note-snippets/cli-output-samples.rst
@@ -1,6 +1,6 @@
 .. note::
 
-    You can Output Scan Results in two different file formats simultaniously in one Scan. An
+    You can Output Scan Results in two different file formats simultaneously in one Scan. An
     example - ``scancode -clpieu --json-pp output.json --html output.html samples``.
 
 .. note::
diff --git a/docs/source/rst-snippets/note-snippets/cli-post-scan-plugins.rst b/docs/source/rst-snippets/note-snippets/cli-post-scan-plugins.rst
index 0ea9d030cbf..2d10ecd939c 100644
--- a/docs/source/rst-snippets/note-snippets/cli-post-scan-plugins.rst
+++ b/docs/source/rst-snippets/note-snippets/cli-post-scan-plugins.rst
@@ -1,6 +1,6 @@
 .. note::
 
-    Plugins that are shown by using ``--plugins`` inlcude the following:
+    Plugins that are shown by using ``--plugins`` include the following:
 
     #. Post-Scan Plugins (and, the following)
     #. Pre-Scan Plugins
diff --git a/docs/source/tutorials/adding-a-post-scan-plugin.rst b/docs/source/tutorials/adding-a-post-scan-plugin.rst
index ffeeafef371..1c14af77296 100644
--- a/docs/source/tutorials/adding-a-post-scan-plugin.rst
+++ b/docs/source/tutorials/adding-a-post-scan-plugin.rst
@@ -30,7 +30,7 @@ Here are the major types of plugins:
 
 3. Post-scan plugins (`scancode_post_scan` in entry points)
 
-   These are mainly data processing, summerizing and reporting plugins which
+   These are mainly data processing, summarizing and reporting plugins which
    depend on all the results for the scan plugins. These add new codebase level
    or file-level attributes, and even removes/modifies data as required
    for consolidation or summarization. The base plugin class to be extended is ``PostScanPlugin``
diff --git a/etc/ci/macports-ci b/etc/ci/macports-ci
index ac474e4e42e..b4351ef1ae9 100644
--- a/etc/ci/macports-ci
+++ b/etc/ci/macports-ci
@@ -190,7 +190,7 @@ do
 # this check confirms that ports were installed
 # notice that port -N selfupdate && break is not sufficient as a test
 # (sometime it returns a success even though ports have not been installed)
-# for some misterious reasons, running without "-d" does not work in some case
+# for some mysterious reasons, running without "-d" does not work in some case
   sudo port -d -N selfupdate 2>&1 | grep -v DEBUG | awk '{if($1!="x")print}'
   port info xdrfile > /dev/null && break || true
   sleep 5
diff --git a/etc/release/scancode_release_tests.py b/etc/release/scancode_release_tests.py
index 5b641bd28a2..7091486febf 100755
--- a/etc/release/scancode_release_tests.py
+++ b/etc/release/scancode_release_tests.py
@@ -21,7 +21,7 @@ def run_app_smoke_tests(app_archive):
     """
     Run basic "smoke" scancode tests for the app release archive `app_archive`
     """
-    # Extract app archive which has this namin pattern:
+    # Extract app archive which has this naming pattern:
     # scancode-toolki-21.1.21_py36-linux.tar.xz
     # or scancode-toolkit-21.1.21_py36-windows.zip
     # We split the name on "_" to extract the laft hand side which is name of
diff --git a/etc/scripts/gen_pypi_simple.py b/etc/scripts/gen_pypi_simple.py
index 89d0626527c..a85a15f6782 100644
--- a/etc/scripts/gen_pypi_simple.py
+++ b/etc/scripts/gen_pypi_simple.py
@@ -177,7 +177,7 @@ def simple_index_entry(self, base_url):
 def build_pypi_index(directory, base_url="https://thirdparty.aboutcode.org/pypi"):
     """
     Create the a PyPI simple directory index using a ``directory`` directory of wheels and sdists in
-    the direvctory at ``directory``/simple/ populated with the proper PyPI simple index directory
+    the directory at ``directory``/simple/ populated with the proper PyPI simple index directory
     structure crafted using symlinks.
 
     WARNING: The ``directory``/simple/ directory is removed if it exists. NOTE: in addition to the a
diff --git a/etc/scripts/gen_requirements.py b/etc/scripts/gen_requirements.py
index 1b87944239e..626b7011493 100755
--- a/etc/scripts/gen_requirements.py
+++ b/etc/scripts/gen_requirements.py
@@ -15,7 +15,7 @@
 """
 Utilities to manage requirements files.
 NOTE: this should use ONLY the standard library and not import anything else
-because this is used for boostrapping with no requirements installed.
+because this is used for bootstrapping with no requirements installed.
 """
 
 
diff --git a/etc/scripts/gen_requirements_dev.py b/etc/scripts/gen_requirements_dev.py
index 85482056598..c005e57416c 100755
--- a/etc/scripts/gen_requirements_dev.py
+++ b/etc/scripts/gen_requirements_dev.py
@@ -15,7 +15,7 @@
 """
 Utilities to manage requirements files.
 NOTE: this should use ONLY the standard library and not import anything else
-because this is used for boostrapping with no requirements installed.
+because this is used for bootstrapping with no requirements installed.
 """
 
 
diff --git a/etc/scripts/licenses/buildrules.py b/etc/scripts/licenses/buildrules.py
index 6def6781a9f..cdc526dedd0 100644
--- a/etc/scripts/licenses/buildrules.py
+++ b/etc/scripts/licenses/buildrules.py
@@ -24,7 +24,7 @@
 Note that the .yml data files are validated and the script will report
 errors and stop if a rule data is not valid.
 
-Typicaly validation errors include:
+Typically validation errors include:
 - missing license expressions
 - unknown license keys
 - presence of multiple is_license_xxx: flags (only one is allowed)
diff --git a/etc/scripts/licenses/synclic.py b/etc/scripts/licenses/synclic.py
index e3208430f81..f6f9c063467 100644
--- a/etc/scripts/licenses/synclic.py
+++ b/etc/scripts/licenses/synclic.py
@@ -131,7 +131,7 @@ def get_licenses(
         """
         Return a mapping of key -> ScanCode License objects either fetched
         externally or loaded from the existing `self.original_dir`
-        If ``force_refetch`` the licenses are always refected. Otherwise if
+        If ``force_refetch`` the licenses are always refetched. Otherwise if
         `self.original_dir` exists, they are loaded from there.
         """
         if not use_cache:
@@ -460,7 +460,7 @@ def build_license(self, mapping, skip_oddities=True, scancode_licenses=None):
 
         text = (mapping.get("licenseText") or mapping.get("licenseExceptionText")).strip()
         if not text:
-            raise Exception(f"Missing text fpr SPDX {spdx_license_key}")
+            raise Exception(f"Missing text for SPDX {spdx_license_key}")
 
         lic = License(
             key=key,
@@ -554,7 +554,7 @@ def fetch_licenses(self, scancode_licenses, per_page=100, max_fetch=None, **kwar
 
     def fetch_license_data(self, per_page=100, max_fetch=None, **kwargs):
         """
-        Yield mappings of license daa fetched from the API.
+        Yield mappings of license data fetched from the API.
         """
         api_url = f"{self.api_base_url}/licenses/"
         for licenses in call_deja_api(api_url, self.api_key, paginate=per_page):
@@ -693,7 +693,7 @@ def patch_spdx_license(self, api_url, license_key, spdx_license_key):
 
 def call_deja_api(api_url, api_key, paginate=0, params=None):
     """
-    Yield result mappings from the reponses of calling the API at
+    Yield result mappings from the responses of calling the API at
     `api_url` with `api_key` . Raise an exception on failure.
 
     Pass `headers` and `params` mappings to the
diff --git a/etc/scripts/utils_dejacode.py b/etc/scripts/utils_dejacode.py
index b6bff5186c5..417e0a3f2b6 100644
--- a/etc/scripts/utils_dejacode.py
+++ b/etc/scripts/utils_dejacode.py
@@ -86,7 +86,7 @@ def update_with_dejacode_data(distribution):
 
 def update_with_dejacode_about_data(distribution):
     """
-    Update the Distribution `distribution` wiht ABOUT code data fetched from
+    Update the Distribution `distribution` with ABOUT code data fetched from
     DejaCode. Return True if data was updated.
     """
     package_data = get_package_data(distribution)
diff --git a/etc/scripts/utils_requirements.py b/etc/scripts/utils_requirements.py
index 4bdc96c6978..be64da9c12b 100755
--- a/etc/scripts/utils_requirements.py
+++ b/etc/scripts/utils_requirements.py
@@ -15,7 +15,7 @@
 """
 Utilities to manage requirements files and call pip.
 NOTE: this should use ONLY the standard library and not import anything else
-because this is used for boostrapping with no requirements installed.
+because this is used for bootstrapping with no requirements installed.
 """
 
 
diff --git a/etc/scripts/utils_thirdparty.py b/etc/scripts/utils_thirdparty.py
index 6731264707c..86a7c67e538 100755
--- a/etc/scripts/utils_thirdparty.py
+++ b/etc/scripts/utils_thirdparty.py
@@ -73,7 +73,7 @@
   (e.g., platfiorm and ABI tags.) and is represented by the "tags" it supports.
 
 - A plain LinksRepository which is just a collection of URLs scrape from a web
-  page such as HTTP diretory listing. It is used either with pip "--find-links"
+  page such as HTTP directory listing. It is used either with pip "--find-links"
   option or to fetch ABOUT and LICENSE files.
 
 - A PypiSimpleRepository is a PyPI "simple" index where a HTML page is listing
@@ -267,7 +267,7 @@ def download_wheel(name, version, environment, dest_dir=THIRDPARTY_DIR, repos=tu
             fetched_wheel_filenames.append(fetched_wheel_filename)
 
         if fetched_wheel_filenames:
-            # do not futher fetch from other repos if we find in first, typically PyPI
+            # do not further fetch from other repos if we find in first, typically PyPI
             break
 
     return fetched_wheel_filenames
@@ -307,7 +307,7 @@ def download_sdist(name, version, dest_dir=THIRDPARTY_DIR, repos=tuple()):
         fetched_sdist_filename = package.sdist.download(dest_dir=dest_dir)
 
         if fetched_sdist_filename:
-            # do not futher fetch from other repos if we find in first, typically PyPI
+            # do not further fetch from other repos if we find in first, typically PyPI
             break
 
     return fetched_sdist_filename
@@ -1646,7 +1646,7 @@ def _get_package_versions_map(self, name):
             self.fetched_package_normalized_names.add(normalized_name)
             try:
                 links = self.fetch_links(normalized_name=normalized_name)
-                # note that thsi is sorted so the mapping is also sorted
+                # note that this is sorted so the mapping is also sorted
                 versions = {
                     package.version: package
                     for package in PypiPackage.packages_from_many_paths_or_urls(paths_or_urls=links)
@@ -2024,7 +2024,7 @@ def get_other_dists(_package, _dist):
                 local_dist.fetch_license_files(dest_dir=dest_dir, use_cached_index=use_cached_index)
                 continue
 
-            # lets try to get from another dist of the same local package
+            # let's try to get from another dist of the same local package
             for otherd in get_other_dists(local_package, local_dist):
                 updated = local_dist.update_from_other_dist(otherd)
                 if updated and local_dist.has_key_metadata():
@@ -2064,7 +2064,7 @@ def get_other_dists(_package, _dist):
                     )
                     continue
 
-            # lets try to fetch remotely
+            # let's try to fetch remotely
             local_dist.load_remote_about_data()
 
             # if has key data we may look to improve later, but we can move on
diff --git a/src/cluecode/copyrights.py b/src/cluecode/copyrights.py
index 6d17467acf6..b48848c556f 100644
--- a/src/cluecode/copyrights.py
+++ b/src/cluecode/copyrights.py
@@ -844,7 +844,7 @@ def build_detection_from_node(
     # Landroid/icu/impl/IDNA2003 : treat as JUNK
     (r'^[^\\/]+[\\/][^\\/]+[\\/].*$', 'JUNK'),
 
-    # Combo of many (3+) letters and punctuations groups without spaces is likely junk
+    # Combo of many (3+) letters and punctuation groups without spaces is likely junk
     # "AEO>>,o>>'!xeoI?o?O1/4thuA/"
     # (r'((\w+\W+){3,})+', 'JUNK'),
 
@@ -1130,7 +1130,7 @@ def build_detection_from_node(
     # this trigger otherwise "copyright ownership. The ASF" in Apache license headers
     (r'^[Oo]wnership\.?$', 'JUNK'),
 
-    # exceptions to composed proper namess, mostly debian copyright/control tag-related
+    # exceptions to composed proper names, mostly debian copyright/control tag-related
     # FIXME: may be lowercase instead?
     (r'^Title:?$', 'JUNK'),
     (r'^Debianized-By:?$', 'JUNK'),
@@ -2004,7 +2004,7 @@ def build_detection_from_node(
     # affiliates or "and its affiliate(s)."
     (r'^[Aa]ffiliate(s|\(s\))?\.?$', 'NNP'),
 
-    # OU as in Org unit, found in some certficates
+    # OU as in Org unit, found in some certificates
     (r'^OU$', 'OU'),
 
     # Various rare company names/suffix
@@ -2047,15 +2047,15 @@ def build_detection_from_node(
     (r'^\(?[Rr]ecoded$', 'AUTH2'),
     (r'^\(?[Mm]odified$', 'AUTH2'),
     (r'^\(?[Cc]reated$', 'AUTH2'),
-    # written is often mispelled
+    # written is often misspelled
     (r'^\(?[Ww]ritt?e[dn]$', 'AUTH2'),
-    # rewritten is often mispelled
+    # rewritten is often misspelled
     (r'^\(?[Rr]ewritt?e[dn]$', 'AUTH2'),
     (r'^\(?[Mm]aintained$', 'AUTH2'),
     (r'^\(?[Dd]eveloped$', 'AUTH2'),
     (r'^\(?[Au]thored$', 'AUTH2'),
 
-    # commiters is interesting, and so a tag of its own
+    # committers is interesting, and so a tag of its own
     (r'[Cc]ommitters\.?,?', 'COMMIT'),
 
     # same for maintainers, developers, admins.
@@ -2323,7 +2323,7 @@ def build_detection_from_node(
     # !$?
     (r'^\!\$\?$', 'JUNK'),
 
-    # things composed only of non-word letters (e.g. junk punctuations)
+    # things composed only of non-word letters (e.g. junk punctuation)
     # but keeping _ ? and () and - as parts of words
     (r'^[^\w\?\-\(\)]{3,10}$', 'JUNK'),
 
@@ -4018,7 +4018,7 @@ def remove_dupe_copyright_words(c):
 
 def remove_some_extra_words_and_punct(c):
     """
-    Remove misc junk including some punctuations
+    Remove misc junk including some punctuation
     """
     c = c.replace('<p>', ' ')
     c = c.replace('<a href', ' ')
@@ -4120,7 +4120,7 @@ def strip_leading_numbers(s):
 
 def strip_some_punct(s):
     """
-    Return a string stripped from some leading and trailing punctuations.
+    Return a string stripped from some leading and trailing punctuation.
     """
     if s:
         s = s.strip(''','"}{-_:;&@!''')
@@ -4131,7 +4131,7 @@ def strip_some_punct(s):
 
 def fix_trailing_space_dot(s):
     """
-    Return a string stripped from some leading and trailing punctuations.
+    Return a string stripped from some leading and trailing punctuation.
     """
     if s and s.endswith(' .'):
         s = s[:-2] + '.'
diff --git a/src/formattedcode/output_csv.py b/src/formattedcode/output_csv.py
index 911a56b2620..75c2f2fb7ec 100644
--- a/src/formattedcode/output_csv.py
+++ b/src/formattedcode/output_csv.py
@@ -172,7 +172,7 @@ def collect_keys(mapping, key_group):
                         val = with_two_decimals(val)
 
                     # lines are present in multiple scans: keep their column name as
-                    # not scan-specific. Prefix othe columns with license__
+                    # not scan-specific. Prefix other columns with license__
                     if k not in ('start_line', 'end_line',):
                         k = 'license_match__' + k
 
@@ -257,7 +257,7 @@ def get_package_columns(_columns=set()):
     """
     Return (and cache in_columns) a set of package column names included in the
     CSV output.
-    Some columsn are excluded for now such as lists of mappings: these do not
+    Some columns are excluded for now such as lists of mappings: these do not
     serialize well to CSV
     """
     if _columns:
diff --git a/src/formattedcode/output_cyclonedx.py b/src/formattedcode/output_cyclonedx.py
index 3ffeb98e5e6..013a3c838c4 100644
--- a/src/formattedcode/output_cyclonedx.py
+++ b/src/formattedcode/output_cyclonedx.py
@@ -513,7 +513,7 @@ def from_package(cls, package, components_by_purl):
         # holds a mapping of purl -> list(purl)
         dependencies_by_dependent = defaultdict(set)
 
-        # ist of error messages for a given "ref"
+        # list of error messages for a given "ref"
         warnings_by_dependent = defaultdict(list)
 
         for dependency in package.get('dependencies', []):
diff --git a/src/formattedcode/output_html.py b/src/formattedcode/output_html.py
index a2c236b2adb..1552cdfba4a 100644
--- a/src/formattedcode/output_html.py
+++ b/src/formattedcode/output_html.py
@@ -238,7 +238,7 @@ def generate_output(results, license_references, version, template):
         if results:
             converted[path] = sorted(results, key=itemgetter('start'))
 
-        # TODO: this is klunky: we need to drop templates entirely or we
+        # TODO: this is clunky: we need to drop templates entirely or we
         # should rather just pass a the list of files from the scan
         # results and let the template handle this rather than
         # denormalizing the list here??
diff --git a/src/licensedcode/README.rst b/src/licensedcode/README.rst
index 601b2823e02..c03e67a76c7 100644
--- a/src/licensedcode/README.rst
+++ b/src/licensedcode/README.rst
@@ -102,7 +102,7 @@ Matching pipeline
 The matching pipeline consist of:
 
 - we start with matching the whole query at once against hashes on the whole text
-  looked up agains a mapping of hash to license rule. We exit if we have a match.
+  looked up against a mapping of hash to license rule. We exit if we have a match.
  
 - then we match the whole query for exact matches using an automaton (Aho-Corasick).
   We exit if we have a match.
@@ -129,7 +129,7 @@ The matching pipeline consist of:
     in this step is also made much smaller by the pre-matching done using sets.
     
 - finally all the collected matches are merged, refined and filtered to yield the
-  final results. The merging considers the ressemblance, containment and overlap
+  final results. The merging considers the resemblance, containment and overlap
   between scanned texts and the matched texts and several secondary factors.
   Filtering is based on the density and length of matches as well as the number of
   good or frequent tokens matched.
diff --git a/src/licensedcode/additional_license_location_provider.py b/src/licensedcode/additional_license_location_provider.py
index 778958cbdd3..cf1ff8acf13 100644
--- a/src/licensedcode/additional_license_location_provider.py
+++ b/src/licensedcode/additional_license_location_provider.py
@@ -46,7 +46,7 @@ class AdditionalLicenseLocationProviderPlugin(object):
     Base plugin class for plugins that provide path locations for one or more
     keys such as the path location to a native binary executable or related
     system files.
-    A plugin is configured as it own package with proper environemnt markers
+    A plugin is configured as it own package with proper environment markers
     """
 
     # name string under which this plugin is registered.
@@ -58,7 +58,7 @@ def get_locations(self):
         """
         Return a mapping of {key: location} where location is an absolute path
         to a file or directory referenced by a known key. The location should
-        exist on a given platorm/OS where this plgin can be installed.
+        exist on a given platform/OS where this plugin can be installed.
         """
         raise NotImplementedError
 
diff --git a/src/licensedcode/cache.py b/src/licensedcode/cache.py
index 65b0fe59ce1..bb83b08befd 100644
--- a/src/licensedcode/cache.py
+++ b/src/licensedcode/cache.py
@@ -38,7 +38,7 @@
 
 class LicenseCache:
     """
-    Represent cachable/pickable LicenseIndex and index-related objects.
+    Represent cacheable/pickable LicenseIndex and index-related objects.
     """
 
     def __init__(
@@ -275,7 +275,7 @@ def build_index(
 
 def build_licensing(licenses_db=None):
     """
-    Return a `license_expression.Licensing` objet built from a `licenses_db`
+    Return a `license_expression.Licensing` object built from a `licenses_db`
     mapping of {key: License} or the standard license db.
     """
     from license_expression import LicenseSymbolLike
@@ -482,7 +482,7 @@ def get_licenses_db():
 
 def get_licensing():
     """
-    Return a license_expression.Licensing objet built from the all the licenses.
+    Return a license_expression.Licensing object built from the all the licenses.
     """
     return get_cache().licensing
 
diff --git a/src/licensedcode/detection.py b/src/licensedcode/detection.py
index 34cbe582e63..3228984b40c 100644
--- a/src/licensedcode/detection.py
+++ b/src/licensedcode/detection.py
@@ -334,7 +334,7 @@ def _identifier(self):
     @property
     def identifier_with_expression(self):
         """
-        Return an identifer for a license detection with the license expression
+        Return an identifier for a license detection with the license expression
         and an UUID created from the detection contents.
         """
         id_safe_expression = python_safe_name(s=str(self.license_expression))
@@ -437,7 +437,7 @@ def append(
         )
 
         if not self.matches:
-            # first match is always an ovveride
+            # first match is always an override
             combine_license = False
             override_license = True
 
@@ -648,7 +648,7 @@ def identifier(self):
     def from_dict(cls, license_match_mapping):
         """
         Return a LicenseMatchFromResult object from a ``license_match_mapping``
-        LicenseMatch data mappping.
+        LicenseMatch data mapping.
         """
         rule = Rule.from_match_data(license_match_mapping)
         matched_text = license_match_mapping.get("matched_text") or None
@@ -673,7 +673,7 @@ def from_dict(cls, license_match_mapping):
     def from_dicts(cls, license_match_mappings):
         """
         Return a LicenseMatchFromResult object from a ``license_match_mapping`s`
-        list of LicenseMatch data mapppings.
+        list of LicenseMatch data mappings.
         """
         return [LicenseMatchFromResult.from_dict(lmm) for lmm in license_match_mappings]
 
@@ -1423,7 +1423,7 @@ def use_referenced_license_expression(referenced_license_expression, license_det
 
     Reference: https://github.com/nexB/scancode-toolkit/issues/3547
     """
-    # TODO: Also determing if referenced matches could be added but
+    # TODO: Also determining if referenced matches could be added but
     # resulting license expression should not be modified.
 
     if referenced_license_expression is None or not license_detection:
@@ -1696,7 +1696,7 @@ def get_license_keys_from_detections(license_detections, licensing=Licensing()):
 
 def can_ignore_ambiguous_detection(license_detection):
     """
-    Return True if the license_detection is not an ambigious detection
+    Return True if the license_detection is not an ambiguous detection
     which needs to be reviewed. A few cases are:
     1. All the locations of the license detection are community files
     """
@@ -1847,7 +1847,7 @@ def group_matches(license_matches, lines_threshold=LINES_THRESHOLD):
             group_of_license_matches = [license_match]
 
         # If the current match is a license clue, we send this as a
-        # seperate group
+        # separate group
         elif license_match.rule.is_license_clue:
             yield group_of_license_matches
             yield [license_match]
diff --git a/src/licensedcode/index.py b/src/licensedcode/index.py
index a791024c13e..a934182a31f 100644
--- a/src/licensedcode/index.py
+++ b/src/licensedcode/index.py
@@ -1048,7 +1048,7 @@ def match_query(
                 ):
                     qry.subtract(mtch.qspan)
 
-            # Check if we have some matchable left: do not match futher if we do
+            # Check if we have some matchable left: do not match further if we do
             # not need to collect qspans matched exactly e.g. with coverage 100%
             # this coverage check is because we have provision to match
             # fragments (unused for now).
@@ -1208,7 +1208,7 @@ class Matcher(NamedTuple):
     name: str
     # function to call, passing a query
     function: Callable
-    # whether to inlude low tokens when checking if there are matchable left
+    # whether to include low tokens when checking if there are matchable left
     include_low: bool
     # True if matching should continue after this matcher
     continue_matching: bool = True
diff --git a/src/licensedcode/licenses_reference.py b/src/licensedcode/licenses_reference.py
index 471d4b50de2..d583a8fa17e 100644
--- a/src/licensedcode/licenses_reference.py
+++ b/src/licensedcode/licenses_reference.py
@@ -150,7 +150,7 @@ def collect_references_from_packages(codebase):
                 license_keys.update(licensing.license_keys(expression))
 
     for rule in rules_by_identifier.values():
-        # TODO: consider using the expresion object directly instead
+        # TODO: consider using the expression object directly instead
         expo = rule.license_expression
         license_keys.update(licensing.license_keys(expo))
 
@@ -182,7 +182,7 @@ def collect_references_from_files(codebase):
         rules_by_identifier.update(rules_by_id)
 
     for rule in rules_by_identifier.values():
-        # TODO: consider using the expresion object directly instead
+        # TODO: consider using the expression object directly instead
         expo = rule.license_expression
         license_keys.update(licensing.license_keys(expo))
 
diff --git a/src/licensedcode/match.py b/src/licensedcode/match.py
index 90eba30d55e..1540a354509 100644
--- a/src/licensedcode/match.py
+++ b/src/licensedcode/match.py
@@ -529,7 +529,7 @@ def qmagnitude(self):
     def is_continuous(self):
         """
         Return True if the all the matched tokens of this match are continuous
-        without any extra unmatched known or unkwown words, or stopwords.
+        without any extra unmatched known or unknown words, or stopwords.
         """
         return (
             self.len() == self.qregion_len() == self.qmagnitude()
@@ -1527,7 +1527,7 @@ def restore_non_overlapping(matches, discarded):
     """
     Return a tuple of (matches, discarded) sequences of LicenseMatch given
     `matches` and `discarded` sequences of LicenseMatch. Reintegrate as matches
-    these that may have been filtered too agressively.
+    these that may have been filtered too aggressively.
     """
     all_matched_qspans = Span().union(*(m.qspan for m in matches))
 
@@ -1634,7 +1634,7 @@ def filter_matches_to_spurious_single_token(
     A "spurious" single token match is a match to a single token that is
     surrounded on both sides by at least `unknown_count` tokens that are either
     unknown tokens, short tokens composed of a single character, tokens
-    composed only of digits or several punctuations and stopwords.
+    composed only of digits or several punctuation marks and stopwords.
     """
     from licensedcode.match_seq import MATCH_SEQ
     if not query:
@@ -1653,7 +1653,7 @@ def filter_matches_to_spurious_single_token(
             kept_append(match)
             continue
 
-        # always keep extact matches
+        # always keep exact matches
         if match.matcher != MATCH_SEQ:
             kept_append(match)
             continue
@@ -1851,7 +1851,7 @@ def filter_invalid_matches_to_single_word_gibberish(
     - the matched rule "is_license_reference" or "is_license_clue"
     - the matched rule has a low relevance, e.g., under 75
     - the matched text has either:
-      - one or more leading or trailing punctuations (except for +)
+      - one or more leading or trailing punctuation (except for +)
         unless this has a high relevance and the rule is contained as-is
         in the matched text (considering case)
       - mixed upper and lower case characters (but not a Title case) unless
@@ -2339,7 +2339,7 @@ def get_matching_regions(
 
     Two consecutive region Spans are such that:
 
-    - there are no overlaping matches between them
+    - there are no overlapping matches between them
     - there are at least ``min_tokens_gap`` unmatched tokens between them
     - OR there are at least ``min_lines_gap`` unmatched lines between them
     """
@@ -2985,7 +2985,7 @@ def _tokenize_matched_text(
 
                 # 2. to ensure the number of tokens is the same in both
                 # tokenizers (though, of course, the case will differ as the
-                # regular query tokenizer ignores case and punctuations).
+                # regular query tokenizer ignores case and punctuation).
 
                 # NOTE: we have a rare Unicode bug/issue because of some Unicode
                 # codepoint such as some Turkish characters that decompose to
@@ -3236,8 +3236,8 @@ def get_full_qspan_matched_text(
     - ``match_start_line`` is the match start_line
     - ``match_end_line`` is the match= end_line
 
-    The returned strings contains the full text including punctuations and
-    spaces that are not participating in the match proper including punctuations.
+    The returned strings contains the full text including punctuation and
+    spaces that are not participating in the match proper including punctuation.
 
     If ``whole_lines`` is True, the unmatched part at the start of the first
     matched line and the unmatched part at the end of the last matched lines are
diff --git a/src/licensedcode/match_seq.py b/src/licensedcode/match_seq.py
index c3904550098..d1abece8eba 100644
--- a/src/licensedcode/match_seq.py
+++ b/src/licensedcode/match_seq.py
@@ -57,7 +57,7 @@ def match_sequence(
     """
     Return a list of LicenseMatch by matching the `query_run` tokens sequence
     starting at `start_offset` against the `idx` index for the candidate `rule`.
-    Stop processing when reachin the deadline time.
+    Stop processing when reaching the deadline time.
     """
     if not rule:
         return []
diff --git a/src/licensedcode/match_set.py b/src/licensedcode/match_set.py
index 105eeb9319e..e54b60dcfa5 100644
--- a/src/licensedcode/match_set.py
+++ b/src/licensedcode/match_set.py
@@ -120,7 +120,7 @@ def multisets_intersector(qmset, imset):
     """
     Return the intersection of a query and index token ids multisets. For a
     token id present in both multisets, the intersection value is the smaller of
-    the occurence count in the query and rule for this token.
+    the occurrence count in the query and rule for this token.
     Optimized for defaultdicts.
     """
     # NOTE: Using a Counter is less efficient
@@ -139,7 +139,7 @@ def multisets_intersector(qmset, imset):
 
 def multiset_counter(mset):
     """
-    Return the sum of occurences of elements present in a token ids multiset,
+    Return the sum of occurrences of elements present in a token ids multiset,
     aka. the multiset cardinality.
     """
     return sum(mset.values())
diff --git a/src/licensedcode/match_spdx_lid.py b/src/licensedcode/match_spdx_lid.py
index 545fe1c13b7..36f14c83ac1 100644
--- a/src/licensedcode/match_spdx_lid.py
+++ b/src/licensedcode/match_spdx_lid.py
@@ -27,12 +27,12 @@
 Matching strategy for license expressions and "SPDX-License-Identifier:"
 expression tags. This is also for spdx license-expressions with other
 prefix strings (example: NuGet License URLs).
-The matching aproach is a tad different:
+The matching approach is a tad different:
 
 First, we do not run this matcher against whole queries. Instead the matchable
 text is collected during the query processing as Query.spdx_lines for any line
-that starts withs these tokens ['spdx', 'license', 'identifier'] or ['spdx',
-'licence', 'identifier'] begining with the first, second or third token position
+that starts with these tokens ['spdx', 'license', 'identifier'] or ['spdx',
+'licence', 'identifier'] beginning with the first, second or third token position
 in a line.
 
 Then the words after "SPDX-license-identifier" are parsed as if they were an
@@ -245,7 +245,7 @@ def _parse_expression(text, licensing, expression_symbols, unknown_symbol):
     updated = expression.subs(old_expressions_subs)
 
     # collect known symbols and build substitution table: replace known symbols
-    # with a symbol wrapping a known license and unkown symbols with the
+    # with a symbol wrapping a known license and unknown symbols with the
     # unknown-spdx symbol
     symbols_table = {}
 
@@ -254,7 +254,7 @@ def _get_matching_symbol(_symbol):
 
     for symbol in licensing.license_symbols(updated, unique=True, decompose=False):
         if isinstance(symbol, LicenseWithExceptionSymbol):
-            # we have two symbols:make a a new symbo, from that
+            # we have two symbols: make a new symbol from that
             new_with = LicenseWithExceptionSymbol(
                 license_symbol=_get_matching_symbol(symbol.license_symbol),
                 exception_symbol=_get_matching_symbol(symbol.exception_symbol)
@@ -344,7 +344,7 @@ def prepare_text(text):
     """
     Return a 2-tuple of (`prefix`, `expression_text`) built from `text` where
     the `expression_text` is prepared to be suitable for SPDX license identifier
-    detection stripped from leading and trailing punctuations, normalized for
+    detection stripped from leading and trailing punctuation, normalized for
     spaces and separateed from an SPDX-License-Identifier `prefix`.
     """
     if is_markup_text(text):
@@ -358,7 +358,7 @@ def prepare_text(text):
 def clean_text(text):
     """
     Return a text suitable for SPDX license identifier detection cleaned from
-    certain leading and trailing punctuations and normalized for spaces.
+    certain leading and trailing punctuation and normalized for spaces.
     """
     if is_markup_text(text):
         text = demarkup_text(text)
@@ -370,7 +370,7 @@ def clean_text(text):
 
     text = ' '.join(text.split())
     punctuation_spaces = "!\"#$%&'*,-./:;<=>?@[\\]^_`{|}~\t\r\n "
-    # remove significant expression punctuations in wrong spot: closing parens
+    # remove significant expression punctuation in wrong spot: closing parens
     # at head and opening parens or + at tail.
     leading_punctuation_spaces = punctuation_spaces + ")+"
     trailng_punctuation_spaces = punctuation_spaces + "("
diff --git a/src/licensedcode/match_unknown.py b/src/licensedcode/match_unknown.py
index 5529352f403..b2e00ff349b 100644
--- a/src/licensedcode/match_unknown.py
+++ b/src/licensedcode/match_unknown.py
@@ -219,7 +219,7 @@ def get_tokens(_toks):
 
     if len(qspan) < unknown_ngram_length * 4 or len(hispan) < 5:
         if TRACE:
-            print('match_unknowns: Skipping weak unkown match', text)
+            print('match_unknowns: Skipping weak unknown match', text)
         return
 
     match = LicenseMatch(
diff --git a/src/licensedcode/models.py b/src/licensedcode/models.py
index 354d93f52d3..70af7fd4c57 100644
--- a/src/licensedcode/models.py
+++ b/src/licensedcode/models.py
@@ -1391,7 +1391,7 @@ class BasicRule:
             'short license references such as a license bare name or license '
             'or a URL provide a weaker clue and level of confidence when '
             'detected because they are shorter and may not always represent a '
-            'clear licensing statment or notice. Mutually exclusive from any other '
+            'clear licensing statement or notice. Mutually exclusive from any other '
             'is_license_* flag')
     )
 
@@ -1585,7 +1585,7 @@ class BasicRule:
             help='Flag set to True if this rule is deleted, '
             'and not to be used anymore in license detection. '
             'This happens usually when a rule is renamed/assigned '
-            'to a seperate license-expression, promoted to being a '
+            'to a separate license-expression, promoted to being a '
             'license text or just plain retired. This is used to '
             'preserve the link to the rule, and therefore make links '
             'to rules as permanent.')
@@ -2369,7 +2369,7 @@ def build_required_phrase_spans(self):
     def compute_thresholds(self, small_rule=SMALL_RULE, tiny_rule=TINY_RULE):
         """
         Compute and set thresholds either considering the occurrence of all
-        tokens or the occurence of unique tokens.
+        tokens or the occurrence of unique tokens.
         """
         min_cov, self.min_matched_length, self.min_high_matched_length = (
             compute_thresholds_occurences(
@@ -2713,7 +2713,7 @@ def compute_thresholds_unique(
 
 class SynthethicRule(Rule):
     """
-    A specialized rule subclass for synthethic rules generated at runtime.
+    A specialized rule subclass for synthetic rules generated at runtime.
     They do not have backing files.
     """
 
diff --git a/src/licensedcode/plugin_license.py b/src/licensedcode/plugin_license.py
index 717253c4baa..7b7c0dc952f 100644
--- a/src/licensedcode/plugin_license.py
+++ b/src/licensedcode/plugin_license.py
@@ -67,7 +67,7 @@ class LicenseScanner(ScanPlugin):
         ('license_detections', attr.ib(default=attr.Factory(list))),
         # license matches that are not proper detections and potentially
         # just clues to licenses or likely false positives, and are not
-        # inlcuded in computing the detected license expression for the resource
+        # included in computing the detected license expression for the resource
         ('license_clues', attr.ib(default=attr.Factory(list))),
         # Percentage of file words detected as license text or notice.
         ('percentage_of_license_text', attr.ib(default=0)),
diff --git a/src/licensedcode/plugin_license_policy.py b/src/licensedcode/plugin_license_policy.py
index e8eda59d5a3..de0ba75bb09 100644
--- a/src/licensedcode/plugin_license_policy.py
+++ b/src/licensedcode/plugin_license_policy.py
@@ -64,7 +64,7 @@ def validate_policy_path(ctx, param, value):
 @post_scan_impl
 class LicensePolicy(PostScanPlugin):
     """
-    Add the "license_policy" attribute to a resouce if it contains a
+    Add the "license_policy" attribute to a resource if it contains a
     detected license key that is found in the license_policy.yml file
     """
 
diff --git a/src/licensedcode/query.py b/src/licensedcode/query.py
index 57f4ce82c3a..2d6c17fd898 100644
--- a/src/licensedcode/query.py
+++ b/src/licensedcode/query.py
@@ -34,7 +34,7 @@
 is important to the overall speed and accuracy of license detection: since the
 most costly parts of detection is done query run by query run, and sequence
 alignment is performed on the best ranking candidates from a probalistic
-ranking, the defintion of what chunk should be matched matters a lot.
+ranking, the definition of what chunk should be matched matters a lot.
 
 If too small, chunking would favour alignment against smaller rules and increase
 the processing time as more alignments would need to be computed. If too big,
@@ -97,7 +97,7 @@ def logger_debug(*args):
     def logger_debug(*args):
         return printer(' '.join(isinstance(a, str) and a or repr(a) for a in args))
 
-# for the cases of very long lines, we break texts in abritrary pseudo lines of
+# for the cases of very long lines, we break texts in arbitrary pseudo lines of
 # up to 25 tokens (aka. words) each to avoid getting huge query runs for texts
 # on a single line (e.g. minified JS or CSS).
 MAX_TOKEN_PER_LINE = 25
@@ -388,7 +388,7 @@ def tokens_by_line(
         # absolute position in a query, including only known tokens
         known_pos = -1
 
-        # flag ifset to True when we have found the first known token globally
+        # flag if set to True when we have found the first known token globally
         # across all query lines
         started = False
 
@@ -448,7 +448,7 @@ def tokens_by_line(
                             if TRACE_STOP_AND_UNKNOWN:
                                 logger_debug(f'      STOPWORD token: known_pos: -1')
                         else:
-                            # here we have a new unknwon token positioned right after
+                            # here we have a new unknown token positioned right after
                             # the current known_pos
                             stopwords_by_pos[known_pos] += 1
                             stopwords_pos_add(known_pos)
@@ -470,7 +470,7 @@ def tokens_by_line(
                                 logger_debug(f'      UNKNOWN token: known_pos: -1')
 
                         else:
-                            # here we have a new unknwon token positioned right after
+                            # here we have a new unknown token positioned right after
                             # the current known_pos
                             unknowns_by_pos[known_pos] += 1
                             unknowns_pos_add(known_pos)
@@ -485,7 +485,7 @@ def tokens_by_line(
 
             # ONLY collect as SPDX a line that starts with SPDX License
             # Identifier. There are cases where this prefix does not start as
-            # the firt tokens such as when we have one or two words (such as a
+            # the first tokens such as when we have one or two words (such as a
             # comment indicator DNL, REM etc.) that start the line and then and
             # an SPDX license identifier.
             spdx_start_offset = None
@@ -546,7 +546,7 @@ def tokenize_and_build_runs(self, tokens_by_line, line_threshold=4):
 
     def refine_runs(self):
         # TODO: move me to the approximate matching loop so that this is done
-        # only if neeed rebreak query runs based on potential rule boundaries
+        # only if needed rebreak query runs based on potential rule boundaries
         query_runs = list(chain.from_iterable(
             break_on_boundaries(qr) for qr in self.query_runs))
 
@@ -798,7 +798,7 @@ def is_digits_only(self):
     def is_matchable(self, include_low=False, qspans=None):
         """
         Return True if this query run has some matchable high token positions.
-        Optinally if `include_low`m include low tokens.
+        Optionally if `include_low`, include low tokens.
         If a list of `qspans` is provided, their positions are also subtracted.
         """
         if include_low:
diff --git a/src/licensedcode/required_phrases.py b/src/licensedcode/required_phrases.py
index f99af2c09da..06dca7d6ffc 100644
--- a/src/licensedcode/required_phrases.py
+++ b/src/licensedcode/required_phrases.py
@@ -334,7 +334,7 @@ def get_updatable_rules_by_expression(license_expression=None, simple_expression
     """
     Return a mapping of rules_by_expression, filtered for an optional ``license_expression``.
     The rules are suitable to receive required phrase updates
-    If simple_expression is True, only consider lincense rules with a single license key.
+    If simple_expression is True, only consider license rules with a single license key.
     """
     rules_by_expression = get_base_rules_by_expression(license_expression)
 
@@ -580,7 +580,7 @@ def update_rules_using_license_attributes(
 
     licenses_by_key = get_licenses_db()
 
-    # license expression is alway  a single key here
+    # license expression is always a single key here
     for license_key, rules in rules_by_expression.items():
         licence_object = licenses_by_key[license_key]
         if verbose:
@@ -855,7 +855,7 @@ def generate_new_required_phrase_rules(
     update_only=False,
 ):
     """
-    Create new rules created from collecting unique required phrases accross all rules.
+    Create new rules created from collecting unique required phrases across all rules.
 
     As a side effect, also update existing rules matched to a required phrase text with the
     "is_required_phrase" flag.
diff --git a/src/licensedcode/stopwords.py b/src/licensedcode/stopwords.py
index e3863e67baa..eb2b6bb06aa 100644
--- a/src/licensedcode/stopwords.py
+++ b/src/licensedcode/stopwords.py
@@ -64,7 +64,7 @@
     'para',
     'ulink',
 
-# Some HTML punctuations and entities all as &emdash;
+# Some HTML punctuation and entities all as &emdash;
 
     'bdquo',
     'bull',
diff --git a/src/licensedcode/tokenize.py b/src/licensedcode/tokenize.py
index bea07dd5a21..1ea210aebf0 100644
--- a/src/licensedcode/tokenize.py
+++ b/src/licensedcode/tokenize.py
@@ -69,7 +69,7 @@ def query_lines(
         else:
             yield line_number, line.rstrip('\n') + '\n'
 
-# Split on whitespace and punctuations: keep only characters and numbers and +
+# Split on whitespace and punctuation: keep only characters and numbers and +
 # when in the middle or end of a word. Keeping the trailing + is important for
 # licenses name such as GPL2+. The use a double negation "not non word" meaning
 # "words" to define the character ranges
@@ -349,7 +349,7 @@ def query_tokenizer(text):
 def matched_query_text_tokenizer(text):
     """
     Return an iterable of tokens and non-tokens punctuation from a unicode query
-    text keeping everything (including punctuations, line endings, etc.)
+    text keeping everything (including punctuation, line endings, etc.)
     The returned iterable contains 2-tuples of:
     - True if the string is a text token or False if this is not
       (such as punctuation, spaces, etc).
diff --git a/src/packagedcode/README.rst b/src/packagedcode/README.rst
index 6daec5a5043..8b302f24248 100644
--- a/src/packagedcode/README.rst
+++ b/src/packagedcode/README.rst
@@ -35,7 +35,7 @@ Taking Python as a main example a package can exist in multiple forms:
 extracting name, version, authorship, declared licensing and declared dependencies as
 found in the any of the package descriptor files (e.g. a `setup.py` file,
 `requirements` file(s) or any of the `*-dist-info` or `*-egg-info` dir files such as
-a `metadata.json`). Other package datafile formats have their own metatada that may be more or
+a `metadata.json`). Other package datafile formats have their own metadata that may be more or
 less comprehensive in the breadth and depth of information they offer (e.g.
 `.nuspec`, `package.json`, `bower.json`, Godeps, etc...). These metadata include the
 declared dependencies (and in some cases the fully resolved dependencies too such as
diff --git a/src/packagedcode/alpine.py b/src/packagedcode/alpine.py
index 506e9d0c14b..fc68a8d2037 100644
--- a/src/packagedcode/alpine.py
+++ b/src/packagedcode/alpine.py
@@ -1170,7 +1170,7 @@ def checksums_handler(value, checksum_name, **kwargs):
 def get_source_entries(source):
     """
     Yield source file tuples as (file_name, URL) where URL is None if this is a
-    lcoal file (i.e. for patches) given an APKBUILD ``source`` attribue string.
+    local file (i.e. for patches) given an APKBUILD ``source`` attribute string.
 
     See https://wiki.alpinelinux.org/wiki/APKBUILD_Reference:
     The shape of this is one entry per line::
@@ -1325,7 +1325,7 @@ def source_handler(value, **kwargs):
     # TODO: Checksum
     # also in APKBUILD we have sha512sums
     # For example: C:Q1sVrQyQ5Ek9/clI1rkKjgINqJNu8=
-    # like for the file checksums "Z", Q means base64 encoding adn 1 means SHA1
+    # like for the file checksums "Z", Q means base64 encoding and 1 means SHA1
     # The content used for this SHA1 is TBD.
     # ('C', 'checksum'),
 
@@ -1555,7 +1555,7 @@ def normalize_and_cleanup_declared_license(declared):
 
 def apply_syntax_fixes(s):
     """
-    Fix the expression string ``s`` by aplying replacement for various quirks to get clean license
+    Fix the expression string ``s`` by applying replacement for various quirks to get clean license
     expression syntax.
     """
     for src, tgt in EXPRESSION_SYNTAX_FIXES.items():
@@ -1724,7 +1724,7 @@ def apply_expressions_mapping(expression):
     """
     Return a new license expression string from an ``expression`` string
     replacing subexpressions using the DECLARED_TO_SPDX_SUBS expression
-    subsitution table.
+    substitution table.
     """
     licensing = Licensing()
 
diff --git a/src/packagedcode/cache.py b/src/packagedcode/cache.py
index 52703717e39..7f784ea91dc 100644
--- a/src/packagedcode/cache.py
+++ b/src/packagedcode/cache.py
@@ -45,7 +45,7 @@
 @attr.s
 class PkgManifestPatternsCache:
     """
-    Represent cachable package manifest regex patterns, prematchers
+    Represent cacheable package manifest regex patterns, prematchers
     and mappings from regex patterns to datasource IDs for all datafile
     handlers.
     """
@@ -164,7 +164,7 @@ def get_prematchers_from_glob_pattern(pattern):
 @attr.s
 class AcceleratedPattern():
     regex :str = attr.ib(default=None) # regular expression string
-    prematchers :list[str] = attr.ib(default=[]) # list of prematcher strinsg for this regex
+    prematchers :list[str] = attr.ib(default=[]) # list of prematcher strings for this regex
     handler_datasource_ids :list[str] = attr.ib(default=[]) # handler
 
 
diff --git a/src/packagedcode/cargo.py b/src/packagedcode/cargo.py
index 3b2d342d828..29f2f1b8780 100644
--- a/src/packagedcode/cargo.py
+++ b/src/packagedcode/cargo.py
@@ -217,7 +217,7 @@ def parse(cls, location, package_only=False):
         categories = core_package_data.get('categories') or []
         keywords.extend(categories)
 
-        # cargo dependencies are complex and can be overriden at multiple levels
+        # cargo dependencies are complex and can be overridden at multiple levels
         dependencies = []
         for key, value in package_data_toml.items():
             if key.endswith('dependencies'):
diff --git a/src/packagedcode/chef.py b/src/packagedcode/chef.py
index d5df6a2f6c4..16bd5709236 100644
--- a/src/packagedcode/chef.py
+++ b/src/packagedcode/chef.py
@@ -154,7 +154,7 @@ def assemble(cls, package_data, resource, codebase, package_adder):
         )
 
 
-# TODO: implemet me: extract and parse and register
+# TODO: implement me: extract and parse and register
 class ChefCookbookHandler(BaseChefMetadataHandler):
     datasource_id = 'chef_cookbook_tarball'
     path_patterns = ('*.tgz',)
diff --git a/src/packagedcode/conda.py b/src/packagedcode/conda.py
index 19e0462f883..72f19684ac2 100644
--- a/src/packagedcode/conda.py
+++ b/src/packagedcode/conda.py
@@ -206,7 +206,7 @@ def get_and_assmeble_from_meta_yaml(cls, package, resource, codebase, package_ad
     @classmethod
     def assemble_from_meta_yaml_only(cls, package_data, resource, codebase, package_adder=models.add_to_package):
         """
-        Assemble and yeild package, dependencies and the meta YAML `resource` from
+        Assemble and yield package, dependencies and the meta YAML `resource` from
         it's `package_data`, and also assign resources to the package.
         """
         if not package_data.purl:
diff --git a/src/packagedcode/debian.py b/src/packagedcode/debian.py
index 173e3342107..3ae2aa00ab3 100644
--- a/src/packagedcode/debian.py
+++ b/src/packagedcode/debian.py
@@ -161,7 +161,7 @@ class DebianControlFileInSourceHandler(models.DatafileHandler):
 
     @classmethod
     def parse(cls, location, package_only=False):
-        # NOTE: a control file in a source repo or debina.tar tarball can contain more than one package
+        # NOTE: a control file in a source repo or debian.tar tarball can contain more than one package
         debian_packages = []
         for debian_data in get_paragraphs_data_from_file(location=location):
             debian_packages.append(
@@ -604,7 +604,7 @@ def parse_debian_files_list(location, datasource_id, package_type):
             line = line.strip()
             if not line or line.startswith('#'):
                 continue
-            # for a plain file lits, the md5sum will be empty
+            # for a plain file list, the md5sum will be empty
             md5sum, _, path = line.partition(' ')
             path = path.strip()
             md5sum = md5sum and md5sum.strip() or None
@@ -744,7 +744,7 @@ def build_package_data(debian_data, datasource_id, package_type='deb', distro=No
 def populate_debian_namespace(packages):
     """
     For an iterable of debian `packages`, populate the
-    most frequently occuring namespace, or the default
+    most frequently occurring namespace, or the default
     namespace 'debian' in packages without namespace.
     """
     if not packages:
diff --git a/src/packagedcode/debian_copyright.py b/src/packagedcode/debian_copyright.py
index 7f08592fe8d..8b6eb3d5114 100644
--- a/src/packagedcode/debian_copyright.py
+++ b/src/packagedcode/debian_copyright.py
@@ -243,7 +243,7 @@ def assign_package_to_resources(cls, package, resource, codebase, package_adder)
             return cls.assign_package_to_resources(package, root, codebase, package_adder)
 
 
-# TODO: distiguish the cased of an installed package vs. the case of an extracted .deb
+# TODO: distinguish the case of an installed package vs. the case of an extracted .deb
 class DebianCopyrightFileInPackageHandler(BaseDebianCopyrightFileHandler):
     datasource_id = 'debian_copyright_in_package'
     description = 'Debian machine readable file in source'
@@ -1527,7 +1527,7 @@ def license_nameless_paragraphs(self):
 
     @property
     def is_all_licenses_used(self):
-        # FIXME: If `lgpl` isn't used but there is `lgpl+` somwhere this
+        # FIXME: If `lgpl` isn't used but there is `lgpl+` somewhere this
         # wouldn't detect correctly. Could be parsed, normalized and the
         # individual components could be matched exactly.
 
diff --git a/src/packagedcode/freebsd.py b/src/packagedcode/freebsd.py
index d170e8f8222..0d37804d669 100644
--- a/src/packagedcode/freebsd.py
+++ b/src/packagedcode/freebsd.py
@@ -212,6 +212,6 @@ def arch_mapper(arch, package):
     """
     # the 'arch' field allows us to craft a binary download_url
     # FIXME: due to the rolling-release nature of binary ports, some download URLs
-    # will lead to 404 errors if a newer release of a particular port is availible
+    # will lead to 404 errors if a newer release of a particular port is available
     package.download_url = f'https://pkg.freebsd.org/{arch}/latest/All/{package.name}-{package.version}.txz'
     return package
diff --git a/src/packagedcode/golang.py b/src/packagedcode/golang.py
index c45be7a5e3a..7381354ba18 100644
--- a/src/packagedcode/golang.py
+++ b/src/packagedcode/golang.py
@@ -108,7 +108,7 @@ class GoSumHandler(BaseGoModuleHandler):
     path_patterns = ('*/go.sum',)
     default_package_type = 'golang'
     default_primary_language = 'Go'
-    description = 'Go module cheksums file'
+    description = 'Go module checksums file'
     documentation_url = 'https://go.dev/ref/mod#go-sum-files'
 
     @classmethod
diff --git a/src/packagedcode/jar_manifest.py b/src/packagedcode/jar_manifest.py
index 48df5373eb1..33e59d8f741 100644
--- a/src/packagedcode/jar_manifest.py
+++ b/src/packagedcode/jar_manifest.py
@@ -388,7 +388,7 @@ def parse_scm_connection(scm_connection):
 def get_datasource_id(package_type):
     """
     Get the corresponding `datasource_id` for the given
-    `package_type`. This is a seperate function to avoid
+    `package_type`. This is a separate function to avoid
     cyclic imports.
     """
     from packagedcode.maven import JavaJarManifestHandler
diff --git a/src/packagedcode/licensing.py b/src/packagedcode/licensing.py
index 75682f1db46..d7a4275fb3e 100644
--- a/src/packagedcode/licensing.py
+++ b/src/packagedcode/licensing.py
@@ -711,7 +711,7 @@ def is_declared_license_not_fully_matched(matches):
     # the query object should be the same for all matches. Is this always true??
     for mt in matches:
         if mt.query != query:
-            # FIXME: the expception may be swallowed in callers!!!
+            # FIXME: the exception may be swallowed in callers!!!
             raise Exception(
                 'Inconsistent package.extracted_license_statement: text with multiple "queries".'
                 'Please report this issue to the scancode-toolkit team.\n'
diff --git a/src/packagedcode/maven.py b/src/packagedcode/maven.py
index a007a361213..d5957fc84d2 100644
--- a/src/packagedcode/maven.py
+++ b/src/packagedcode/maven.py
@@ -43,7 +43,7 @@
 Support for Maven POMs including resolution of variables using Maven properties
 when possible.
 
-We have seen Maven pom in three layout syles:
+We have seen Maven pom in three layout styles:
 
 First case: a pom.xml inside a META-INF directory such as in
 /META-INF/maven/log4j/log4j/pom.xml possibly with a pom.properties
@@ -1375,7 +1375,7 @@ def get_license_detections_for_extracted_license_statement(
         # We can detect each license item individually and check if the unknown was detected
         # in the name, URL or comment field.
         # name, URL, comments
-        # name unknwon: keep that unknown in all cases
+        # name unknown: keep that unknown in all cases
         # URL or comments with unknown, but name not unknown: we want to combine the unknown
         # matches with the correct name match
 
diff --git a/src/packagedcode/misc.py b/src/packagedcode/misc.py
index f4b7289d69f..85efdbe1d97 100644
--- a/src/packagedcode/misc.py
+++ b/src/packagedcode/misc.py
@@ -10,7 +10,7 @@
 from packagedcode import models
 
 """
-Various package data file formats to implment.
+Various package data file formats to implement.
 """
 
 # Package types
diff --git a/src/packagedcode/models.py b/src/packagedcode/models.py
index 0c4ffb9e56e..ad546dbe7ce 100644
--- a/src/packagedcode/models.py
+++ b/src/packagedcode/models.py
@@ -1273,7 +1273,7 @@ def assemble_from_many(
         Like in ``DatafileHandler.assemble()``, Package items must be yielded
         before Dependency or Resource items. This is to ensure that a Package is
         created before we associate a Resource or Dependency to a Package. This
-        is particulary important in the case where we are calling the
+        is particularly important in the case where we are calling the
         ``assemble()`` method outside of the scancode-toolkit context, as
         ``assemble()`` can call ``assemble_from_many()``.
 
@@ -1309,7 +1309,7 @@ def assemble_from_many(
             else:
                 # FIXME: What is the package_data is NOT for the same package as package?
                 # FIXME: What if the update did not do anything? (it does return True or False)
-                # FIXME: There we would be missing out packges AND/OR errors
+                # FIXME: There we would be missing out packages AND/OR errors
                 package.update(
                     package_data=package_data,
                     datafile_path=resource.path,
diff --git a/src/packagedcode/npm.py b/src/packagedcode/npm.py
index 7618e830c19..ae4a457726b 100644
--- a/src/packagedcode/npm.py
+++ b/src/packagedcode/npm.py
@@ -1594,7 +1594,7 @@ def split_scoped_package_name(name):
     if not name:
         return None, None
 
-    # this should never happen: wee only have a scope.
+    # this should never happen: we only have a scope.
     # TODO: raise an  exception?
     if is_scoped_package(name) and '/' not in name:
         return name, None
diff --git a/src/packagedcode/pubspec.py b/src/packagedcode/pubspec.py
index bbc36038c0d..8fcdd2e9dde 100644
--- a/src/packagedcode/pubspec.py
+++ b/src/packagedcode/pubspec.py
@@ -18,7 +18,7 @@
 See https://dart.dev/tools/pub/pubspec
 
 
-API has theses URLs:
+API has these URLs:
 is limited and only returns all versions of a package
 - feeds https://pub.dev/feed.atom
 - all packages, paginated: https://pub.dev/api/packages
@@ -109,7 +109,7 @@ def collect_locks(locks_data):
         sdks:
           dart: ">=2.12.0 <3.0.0"
     """
-    # FIXME: we treat all as nno optioanl for now
+    # FIXME: we treat all as non optional for now
     sdks = locks_data.get('sdks') or {}
     for name, version in sdks.items():
         dep = build_dep(
diff --git a/src/packagedcode/pypi.py b/src/packagedcode/pypi.py
index b5588ed7ca9..3294e170639 100644
--- a/src/packagedcode/pypi.py
+++ b/src/packagedcode/pypi.py
@@ -1611,7 +1611,7 @@ def get_description(metainfo, location=None):
 def clean_description(description):
     """
     Return a cleaned description text, removing extra leading whitespaces if
-    needed. Some metadata formats padd each description line with 8 spaces. Some
+    needed. Some metadata formats pad each description line with 8 spaces. Some
     do not. We check first and cleanup if needed.
     """
     # TODO: verify what is the impact of Description-Content-Type: if any
@@ -2380,7 +2380,7 @@ def add_url(_url, _utype=None, _attribute=None):
 
 def find_pattern(location, pattern):
     """
-    Search the file at `location` for a patern regex on a single line and return
+    Search the file at `location` for a pattern regex on a single line and return
     this or None if not found. Reads the supplied location as text without
     importing it.
 
diff --git a/src/packagedcode/pyrpm.py b/src/packagedcode/pyrpm.py
index 09c5b91be49..e005e7d3399 100644
--- a/src/packagedcode/pyrpm.py
+++ b/src/packagedcode/pyrpm.py
@@ -43,7 +43,7 @@
 # the first 4 bytes of an RPM
 RPM_LEAD_MAGIC_NUMBER = b'\xed\xab\xee\xdb'
 
-# the start of the header (there are some data we ignore before taht)
+# the start of the header (there are some data we ignore before that)
 RPM_HEADER_MAGIC_NUMBER = b'\x8e\xad\xe8'
 
 RPMTAG_MIN_NUMBER = 1000
@@ -434,7 +434,7 @@ def read_lead(self):
     def read_sigheader(self):
         """
         Read signature header
-        ATN: this will not return any usefull information
+        ATN: this will not return any useful information
         besides the file offset
         """
         start = find_magic_number(self.rpmfile)
@@ -467,7 +467,7 @@ def read_headers(self, offset):
         # lets find the start of the header
         self.rpmfile.seek(offset)
         start = find_magic_number(self.rpmfile)
-        # go back to the begining of the header
+        # go back to the beginning of the header
         self.rpmfile.seek(start)
         header = self.rpmfile.read(16)
         header = self.read_header(header)
diff --git a/src/packagedcode/recognize.py b/src/packagedcode/recognize.py
index 7744a550eff..025bcc5492c 100644
--- a/src/packagedcode/recognize.py
+++ b/src/packagedcode/recognize.py
@@ -154,7 +154,7 @@ def _parse(
                 raise
 
         except Exception as e:
-            # We should continue when an Exception has occured when trying to
+            # We should continue when an Exception has occurred when trying to
             # recognize a package
             if TRACE:
                 import traceback
diff --git a/src/packagedcode/rpm.py b/src/packagedcode/rpm.py
index 4cce7e8d1f3..46662f87ac3 100644
--- a/src/packagedcode/rpm.py
+++ b/src/packagedcode/rpm.py
@@ -559,7 +559,7 @@ def parse(cls, location, package_only=False):
 
 def get_digest_algo(rpm_tags):
     """
-    Return a string representing a digest algorightm given an ``rpm_tags``
+    Return a string representing a digest algorithm given an ``rpm_tags``
     RPMtags object
     """
     fda = rpm_tags.files_digest_algo
diff --git a/src/packagedcode/rpm_installed.py b/src/packagedcode/rpm_installed.py
index 8e6a12261c5..14a50c22b71 100644
--- a/src/packagedcode/rpm_installed.py
+++ b/src/packagedcode/rpm_installed.py
@@ -44,7 +44,7 @@ def parse_rpm_xmlish(location, datasource_id, package_type, package_only=False):
     if not location or not os.path.exists(location):
         return
 
-    # there are smetimes weird encodings. We avoid issues there
+    # there are sometimes weird encodings. We avoid issues there
     with open(location, 'rb') as f:
         rpms = as_unicode(f.read())
 
@@ -171,7 +171,7 @@ def build_package(rpm_tags, datasource_id, package_type, package_namespace=None,
 # and handlers MUST accept **kwargs as they also receive the whole current data
 # being processed so far as kwargs.
 
-# TODO: process lists in a more explict way
+# TODO: process lists in a more explicit way
 # Most handler do not use it, but parallel list handlers (such as for files) use
 # this to process such lists by accumulating data passed around
 
@@ -241,7 +241,7 @@ def basename_handler(value, **kwargs):
 
 def infer_digest_algo(digest):
     """
-    Given a ``digest`` string, return an inferred digest algorightm.
+    Given a ``digest`` string, return an inferred digest algorithm.
 
     We assume hex encoding for now (base64 with or without padding is common these days)
 
diff --git a/src/packagedcode/rubygems.py b/src/packagedcode/rubygems.py
index e80295c48af..13dd032d3cf 100644
--- a/src/packagedcode/rubygems.py
+++ b/src/packagedcode/rubygems.py
@@ -391,7 +391,7 @@ def extract_gem_metadata(location):
     """
     Return the string content of the metadata of a .gem archive file at
     ``location`` or None.
-    This performs an extracion to a temp directory.
+    This performs an extraction to a temp directory.
     """
     extract_loc = None
     try:
@@ -710,7 +710,7 @@ def party_mapper(role, names=[], emails=[]):
 
 def get_parties(gem_data):
     """
-    Return a lits of Party from a mapping of ``gem_data``
+    Return a list of Party from a mapping of ``gem_data``
     """
     parties = []
     authors = gem_data.get('author') or []
diff --git a/src/packagedcode/spec.py b/src/packagedcode/spec.py
index 95dfdba7dd9..11b05b3d67d 100644
--- a/src/packagedcode/spec.py
+++ b/src/packagedcode/spec.py
@@ -154,7 +154,7 @@ def parse_spec(location, package_type):
 
     # description can be in single or multi-lines
     # There are many different ways to write description.
-    # we reparse for multline
+    # we reparse for multiline
     description = spec_data.get("description")
     if description:
         if '<<-' in description:
@@ -256,7 +256,7 @@ def get_cleaned_string(string):
 def get_multiline_description(description_start, lines):
     """
     Return a multiline description given the ``description_start`` start of the
-    decsription and a ``lines`` list. These are common in .podspec.
+    description and a ``lines`` list. These are common in .podspec.
 
     https://guides.cocoapods.org/syntax/podspec.html#description
     description is in the form:
diff --git a/src/scancode/cli.py b/src/scancode/cli.py
index 1376c6cfee9..58e2306db56 100644
--- a/src/scancode/cli.py
+++ b/src/scancode/cli.py
@@ -437,7 +437,7 @@ def scancode(
       its `setup` method is called if it is enabled.
 
     - `pre-scan`: each enabled pre-scan plugin `process_codebase(codebase)`
-      method is called to update/transforme the whole codebase.
+      method is called to update/transform the whole codebase.
 
     - `scan`: the codebase is walked and each enabled scan plugin
       `get_scanner()` scanner function is called once for each codebase
@@ -467,7 +467,7 @@ def scancode(
     - `timeout`: float: intterup the scan of a file if it does not finish within
       `timeout` seconds. This applied to each file and scan individually (e.g.
       if the license scan is interrupted they other scans may complete, each
-      withing the timeout)
+      within the timeout)
 
     - `quiet` and `verbose`: boolean flags: Do not display any message if
       `quiet` is True. Otherwise, display extra verbose messages if `quiet` is
@@ -768,7 +768,7 @@ def echo_func(*_args, **_kwargs):
         plugins_to_setup.extend(all_enabled_plugins_by_qname.values())
 
         if TRACE_DEEP:
-            logger_debug('scancode: plugins_to_setup: includng enabled:', plugins_to_setup)
+            logger_debug('scancode: plugins_to_setup: including enabled:', plugins_to_setup)
 
         ########################################################################
         # Setup enabled and required plugins
diff --git a/src/scancode_config.py b/src/scancode_config.py
index 6f5ef644377..8bec2b28f49 100644
--- a/src/scancode_config.py
+++ b/src/scancode_config.py
@@ -36,7 +36,7 @@ def _create_dir(location):
     """
     Create directory and all sub-directories recursively at `location`.
     Raise Exceptions if it fails to create the directory.
-    NOTE: this is essentailly a copy of commoncode.fileutils.create_dir()
+    NOTE: this is essentially a copy of commoncode.fileutils.create_dir()
     """
 
     if exists(location):
@@ -156,7 +156,7 @@ def _create_dir(location):
 # variables
 
 # - scancode_cache_dir: for long-lived caches which are installation-specific:
-# this is for cached data which are infrequently written to and mostly readed,
+# this is for cached data which are infrequently written to and mostly read,
 # such as the license index cache. The same location is used across runs of
 # a given version of ScanCode
 """
@@ -180,7 +180,7 @@ def _create_dir(location):
 
 # we pre-build the index and bundle this with the the deployed release
 # therefore we use package data
-# .... but we accept this to be overriden with and env variable
+# .... but we accept this to be overridden with and env variable
 std_license_cache_dir = join(scancode_src_dir, 'licensedcode', 'data', 'cache')
 __env_license_cache_dir = os.getenv('SCANCODE_LICENSE_INDEX_CACHE')
 licensedcode_cache_dir = (__env_license_cache_dir or std_license_cache_dir)
diff --git a/src/summarycode/classify.py b/src/summarycode/classify.py
index e07d6604826..c8e261de02b 100644
--- a/src/summarycode/classify.py
+++ b/src/summarycode/classify.py
@@ -95,7 +95,7 @@ def get_relative_path(root_path, path):
 )
 
 # Community files are usually files used for FOSS project and community
-# maintainence purposes. We want to detect these as in the context of
+# maintenance purposes. We want to detect these as in the context of
 # licenses as these files don't have interesting license detections, or
 # license detection issues are not important to review for these files.
 # this is similar to `key` files, which also has a lot of community info
diff --git a/src/summarycode/copyright_tallies.py b/src/summarycode/copyright_tallies.py
index 48f5e22445f..f9a577b0109 100644
--- a/src/summarycode/copyright_tallies.py
+++ b/src/summarycode/copyright_tallies.py
@@ -159,7 +159,7 @@ class Text(object):
     key = attr.attrib()
     # original text for a copyright holder
     original = attr.attrib()
-    # count of occurences of a text
+    # count of occurrences of a text
     count = attr.attrib(default=1)
 
     def normalize(self):
@@ -335,7 +335,7 @@ def tally(summary_texts):
 def cluster(texts):
     """
     Given a `texts` iterable of Text objects, group these objects when they have the
-    same key. Yield a tuple of (Text object, count of its occurences).
+    same key. Yield a tuple of (Text object, count of its occurrences).
     """
     clusters = defaultdict(list)
     for text in texts:
diff --git a/src/summarycode/facet.py b/src/summarycode/facet.py
index 97f1498065e..32c1e8b1de5 100644
--- a/src/summarycode/facet.py
+++ b/src/summarycode/facet.py
@@ -42,7 +42,7 @@ def logger_debug(*args):
 
 A facet is defined by zero or more glob/fnmatch expressions. Multiple facets can
 be assigned to a file. The facets definition is a list of (facet, pattern) and a
-file is assigned all the facets that have a pattern defintion that match their
+file is assigned all the facets that have a pattern definition that match their
 path.
 
 Once all files have been assigned a facet, files without a facet are assigned to
diff --git a/src/summarycode/summarizer.py b/src/summarycode/summarizer.py
index 3319670bfc8..d0ac026893f 100644
--- a/src/summarycode/summarizer.py
+++ b/src/summarycode/summarizer.py
@@ -162,7 +162,7 @@ def get_declared_holders(codebase, holders_tallies):
     detected from key files.
 
     A declared holder is a copyright holder present in the key files who has the
-    highest amount of refrences throughout the codebase.
+    highest amount of references throughout the codebase.
     """
     entry_by_holders = {
         fingerprints.generate(entry['value']): entry for entry in holders_tallies if entry['value']
diff --git a/src/summarycode/tallies.py b/src/summarycode/tallies.py
index b488686ad00..cf334ac7bf6 100644
--- a/src/summarycode/tallies.py
+++ b/src/summarycode/tallies.py
@@ -155,7 +155,7 @@ def compute_codebase_tallies(codebase, keep_details, **kwargs):
 def license_tallies(resource, children, keep_details=False):
     """
     Populate a license_expressions list of mappings such as
-        {value: "expression", count: "count of occurences"}
+        {value: "expression", count: "count of occurrences"}
     sorted by decreasing count.
     """
     LIC_EXP = 'detected_license_expression'
@@ -210,7 +210,7 @@ def license_tallies(resource, children, keep_details=False):
 def tally_licenses(license_expressions):
     """
     Given a list of license expressions, return a mapping of {expression: count
-    of occurences}
+    of occurrences}
     """
     # TODO: we could normalize and/or sort each license_expression before
     # summarization and consider other equivalence or containment checks
@@ -220,7 +220,7 @@ def tally_licenses(license_expressions):
 def language_tallies(resource, children, keep_details=False):
     """
     Populate a programming_language tallies list of mappings such as
-        {value: "programming_language", count: "count of occurences"}
+        {value: "programming_language", count: "count of occurrences"}
     sorted by decreasing count.
     """
     PROG_LANG = 'programming_language'
@@ -252,9 +252,9 @@ def language_tallies(resource, children, keep_details=False):
 def tally_languages(languages):
     """
     Given a list of languages, return a mapping of {language: count
-    of occurences}
+    of occurrences}
     """
-    # TODO: consider aggregating related langauges (C/C++, etc)
+    # TODO: consider aggregating related languages (C/C++, etc)
     return Counter(languages)
 
 
@@ -271,7 +271,7 @@ def tally_languages(languages):
 def tally_values(values, attribute):
     """
     Given a list of `values` for a given `attribute`, return a mapping of
-    {value: count of occurences} using a tallier specific to the attribute.
+    {value: count of occurrences} using a tallier specific to the attribute.
     """
     if attribute not in TALLYABLE_ATTRS:
         return {}
diff --git a/src/summarycode/todo.py b/src/summarycode/todo.py
index 8ed5559083a..c1595853293 100644
--- a/src/summarycode/todo.py
+++ b/src/summarycode/todo.py
@@ -105,8 +105,8 @@ def process_codebase(self, codebase, **kwargs):
             license_text_diagnostics = kwargs.get("license_text_diagnostics")
             if not license_diagnostics or not license_text or not license_text_diagnostics:
                 usage_suggestion_message = (
-                    "The --todo option, when paired with --license option should be used with the folowing "
-                    "additional CLI options for maximum benifit: [`--license-text`, `--license-text-diagnostics`,"
+                    "The --todo option, when paired with --license option should be used with the following "
+                    "additional CLI options for maximum benefit: [`--license-text`, `--license-text-diagnostics`,"
                     "--license-diagnostics`] as these show additional diagnostic information to help review the issues."
                 )
                 warnings.simplefilter('always', ToDoPluginUsageWarning)
@@ -118,7 +118,7 @@ def process_codebase(self, codebase, **kwargs):
 
         if not has_packages and not has_licenses:
             usage_suggestion_message = (
-                "The --todo option should be used with atleast one of the license [`--license`], "
+                "The --todo option should be used with at least one of the license [`--license`], "
                 "or package [`--package`] options."
             )
             warnings.simplefilter('always', ToDoPluginUsageWarning)
@@ -399,7 +399,7 @@ class ReviewComments(Enum):
     IMPERFECT_COVERAGE = (
         "The license detection likely is not conclusive as there was "
         "license matches with low score or coverage, and so this needs "
-        "review. scancode would likely benifit from a license rule addition "
+        "review. scancode would likely benefit from a license rule addition "
         "from this case, so please report this to scancode-toolkit github issues."
     )
     LOW_RELEVANCE = (
diff --git a/src/textcode/analysis.py b/src/textcode/analysis.py
index 84bea54c824..5e78b2fdc78 100644
--- a/src/textcode/analysis.py
+++ b/src/textcode/analysis.py
@@ -209,7 +209,7 @@ def break_numbered_unicode_text_lines(
     splitter = re.compile(split).split
     for line_number, line in numbered_lines:
         if len(line) > max_len:
-            # spli then reassemble in more reasonable chunks
+            # split then reassemble in more reasonable chunks
             splitted = splitter(line)
             chunks = (splitted[i:i + chunk_len] for i in range(0, len(splitted), chunk_len))
             for chunk in chunks:
diff --git a/src/textcode/gibberish.py b/src/textcode/gibberish.py
index 0af96532c15..7cad096969e 100644
--- a/src/textcode/gibberish.py
+++ b/src/textcode/gibberish.py
@@ -5,7 +5,7 @@
 # 12Jun2017 Petr Janata - added srcfile and outfile
 # 17Jun2107 Petr Janata - expanded set of accepted characters to include digits and hyphen
 #
-# whch is based off of:
+# which is based off of:
 # https://raw.githubusercontent.com/rrenaud/Gibberish-Detector/aa1d4e4555362b3dada97ebe6ecc23a84fc470fe/gib_detect_train.py
 #
 
@@ -88,7 +88,7 @@ def train(self, bigfile=big_file_path, goodfile=good_file_path,
             for j in range(len(row)):
                 row[j] = math.log(row[j] / s)
 
-        # Find the probability of generating a few arbitrarily choosen good and
+        # Find the probability of generating a few arbitrarily chosen good and
         # bad phrases.
         good_probs = [self.avg_transition_prob(l, counts) for l in open(goodfile, encoding='utf-8')]
         bad_probs = [self.avg_transition_prob(l, counts) for l in open(badfile, encoding='utf-8')]
diff --git a/src/textcode/strings.py b/src/textcode/strings.py
index db9e81bc36f..3781e0a969d 100644
--- a/src/textcode/strings.py
+++ b/src/textcode/strings.py
@@ -139,9 +139,9 @@ def clean_string(s, min_len=MIN_LEN, junk=JUNK):
     """
     Yield cleaned strings from string s if it passes some validity tests:
      * not made of white spaces
-     * with a minimum length ignoring spaces and punctuations
+     * with a minimum length ignoring spaces and punctuation
      * not made of only two repeated character
-     * not made of only of digits, punctuations and whitespaces
+     * not made of only of digits, punctuation and whitespaces
     """
     s = s.strip()
 
@@ -150,7 +150,7 @@ def valid(st):
         return (st and len(st) >= min_len
                 # ignore character repeats, e.g need more than two unique characters
                 and len(set(st.lower())) > 1
-                # ignore string made only of digits, spaces or punctuations
+                # ignore string made only of digits, spaces or punctuation
                 and not all(c in junk for c in st))
 
     if valid(s):
@@ -165,7 +165,7 @@ def valid(st):
 def is_file(s):
     """
     Return True if s looks like a file name.
-    Exmaple: dsdsd.dll
+    Example: dsdsd.dll
     """
     filename = re.compile('^[\\w_\\-]+\\.\\w{1,4}$', re.IGNORECASE).match
     return filename(s)
diff --git a/tests/packagedcode/data/plugin/help.txt b/tests/packagedcode/data/plugin/help.txt
index 7128425a969..7af4bf3de4c 100755
--- a/tests/packagedcode/data/plugin/help.txt
+++ b/tests/packagedcode/data/plugin/help.txt
@@ -437,7 +437,7 @@ Package type:  golang
   datasource_id:     go_sum
   documentation URL: https://go.dev/ref/mod#go-sum-files
   primary language:  Go
-  description:       Go module cheksums file
+  description:       Go module checksums file
   path_patterns:    '*/go.sum'
 --------------------------------------------
 Package type:  golang
diff --git a/tests/packagedcode/data/plugin/plugins_list_linux.txt b/tests/packagedcode/data/plugin/plugins_list_linux.txt
index eb4763d6c7e..1674c9d9f33 100755
--- a/tests/packagedcode/data/plugin/plugins_list_linux.txt
+++ b/tests/packagedcode/data/plugin/plugins_list_linux.txt
@@ -458,7 +458,7 @@ Package type:  golang
   datasource_id:     go_sum
   documentation URL: https://go.dev/ref/mod#go-sum-files
   primary language:  Go
-  description:       Go module cheksums file
+  description:       Go module checksums file
   path_patterns:    '*/go.sum'
 --------------------------------------------
 Package type:  golang
diff --git a/tests/summarycode/data/todo/todo_present/README.multi-orig-tarball-package-expected-diag.json b/tests/summarycode/data/todo/todo_present/README.multi-orig-tarball-package-expected-diag.json
index 165178b3913..16a2d97be71 100644
--- a/tests/summarycode/data/todo/todo_present/README.multi-orig-tarball-package-expected-diag.json
+++ b/tests/summarycode/data/todo/todo_present/README.multi-orig-tarball-package-expected-diag.json
@@ -3,7 +3,7 @@
     {
       "detection_id": "borceux-d99f172d-bc25-b4f7-b6bf-cccec1995ce5",
       "review_comments": {
-        "imperfect-match-coverage": "The license detection likely is not conclusive as there was license matches with low score or coverage, and so this needs review. scancode would likely benifit from a license rule addition from this case, so please report this to scancode-toolkit github issues."
+        "imperfect-match-coverage": "The license detection likely is not conclusive as there was license matches with low score or coverage, and so this needs review. scancode would likely benefit from a license rule addition from this case, so please report this to scancode-toolkit github issues."
       },
       "detection": {
         "license_expression": "borceux",

From 247a94eaab9c29afffa8199a1dbc3e8ec5351a3b Mon Sep 17 00:00:00 2001
From: Mrityunjay Raj <mr.raj.earth@gmail.com>
Date: Wed, 4 Feb 2026 13:31:16 +0530
Subject: [PATCH 2/2] Fix RST title underline length in contributing-docs.rst

Extend the section underline to match the corrected title length after
the spelling fix (Documentaion -> Documentation). Sphinx requires the
underline to be at least as long as the title text.

Signed-off-by: Mrityunjay Raj <mr.raj.earth@gmail.com>
---
 docs/source/getting-started/contribute/contributing-docs.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/getting-started/contribute/contributing-docs.rst b/docs/source/getting-started/contribute/contributing-docs.rst
index 09020d66fd0..640844c30f2 100644
--- a/docs/source/getting-started/contribute/contributing-docs.rst
+++ b/docs/source/getting-started/contribute/contributing-docs.rst
@@ -184,7 +184,7 @@ For more information, refer this tutorial named
 .. _contributing-docs-style-conventions:
 
 Style Conventions for the Documentation
---------------------------------------
+----------------------------------------
 
 1. Headings