huggingface · vignesh1507 · Sep 21, 2024 · Sep 21, 2024 · Sep 21, 2024 · Sep 21, 2024
diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py
@@ -70,7 +70,7 @@ def __post_init__(self):
             # Let's avoid changing the default list and make a copy.
             self.docker_image = copy.deepcopy(DEFAULT_DOCKER_IMAGE)
         else:
-            # BIG HACK WILL REMOVE ONCE FETCHER IS UPDATED
+            # BIG HACK WILL REMOVE ONCE THE FETCHER IS UPDATED
             print(os.environ.get("GIT_COMMIT_MESSAGE"))
             if "[build-ci-image]" in os.environ.get("GIT_COMMIT_MESSAGE", "") or os.environ.get("GIT_COMMIT_MESSAGE", "") == "dev-ci":
                 self.docker_image[0]["image"] = f"{self.docker_image[0]['image']}:dev"
@@ -322,8 +322,8 @@ def job_name(self):
 
 
 # We also include a `dummy.py` file in the files to be doc-tested to prevent edge case failure. Otherwise, the pytest
-# hangs forever during test collection while showing `collecting 0 items / 21 errors`. (To see this, we have to remove
-# the bash output redirection.)
+# hangs forever during test collection while showing `collecting 0 items / 21 errors`. (To see this, we need to remove
+# the bash output re-direction.)
 py_command = 'from utils.tests_fetcher import get_doctest_files; to_test = get_doctest_files() + ["dummy.py"]; to_test = " ".join(to_test); print(to_test)'
 py_command = f"$(python3 -c '{py_command}')"
 command = f'echo """{py_command}""" > pr_documentation_tests_temp.txt'

diff --git a/.circleci/parse_test_outputs.py b/.circleci/parse_test_outputs.py
@@ -1,52 +1,100 @@
 import re
 import argparse
+import sys
 
 def parse_pytest_output(file_path):
+    """
+    Parse the Pytest output and print the reasons for skipped tests.
+
+    Args:
+        file_path (str): The path to the Pytest output file.
+
+    Returns:
+        None
+    """
     skipped_tests = {}
     skipped_count = 0
-    with open(file_path, 'r') as file:
-        for line in file:
-            match = re.match(r'^SKIPPED \[(\d+)\] (tests/.*): (.*)$', line)
-            if match:
-                skipped_count += 1
-                test_file, test_line, reason = match.groups()
-                skipped_tests[reason] = skipped_tests.get(reason, []) + [(test_file, test_line)]
-    for k,v in sorted(skipped_tests.items(), key=lambda x:len(x[1])):
+    try:
+        with open(file_path, 'r') as file:
+            for line in file:
+                match = re.match(r'^SKIPPED \[(\d+)\] (tests/.*): (.*)$', line)
+                if match:
+                    skipped_count += 1
+                    test_file, test_line, reason = match.groups()
+                    skipped_tests[reason] = skipped_tests.get(reason, []) + [(test_file, test_line)]
+    except FileNotFoundError:
+        print(f"Error: File '{file_path}' not found.")
+        sys.exit(1)
+    except Exception as e:
+        print(f"Error: {e}")
+        sys.exit(1)
+    for k, v in sorted(skipped_tests.items(), key=lambda x: len(x[1])):
         print(f"{len(v):4} skipped because: {k}")
     print("Number of skipped tests:", skipped_count)
 
 def parse_pytest_failure_output(file_path):
+    """
+    Parse the Pytest output and print the reasons for failed tests.
+
+    Args:
+        file_path (str): The path to the Pytest output file.
+
+    Returns:
+        None
+    """
     failed_tests = {}
     failed_count = 0
-    with open(file_path, 'r') as file:
-        for line in file:
-            match = re.match(r'^FAILED (tests/.*) - (.*): (.*)$', line)
-            if match:
-                failed_count += 1
-                _, error, reason = match.groups()
-                failed_tests[reason] = failed_tests.get(reason, []) + [error]
-    for k,v in sorted(failed_tests.items(), key=lambda x:len(x[1])):
+    try:
+        with open(file_path, 'r') as file:
+            for line in file:
+                match = re.match(r'^FAILED (tests/.*) - (.*): (.*)$', line)
+                if match:
+                    failed_count += 1
+                    _, error, reason = match.groups()
+                    failed_tests[reason] = failed_tests.get(reason, []) + [error]
+    except FileNotFoundError:
+        print(f"Error: File '{file_path}' not found.")
+        sys.exit(1)
+    except Exception as e:
+        print(f"Error: {e}")
+        sys.exit(1)
+    for k, v in sorted(failed_tests.items(), key=lambda x: len(x[1])):
         print(f"{len(v):4} failed because `{v[0]}` -> {k}")
     print("Number of failed tests:", failed_count)
-    if failed_count>0:
-        exit(1)
+    if failed_count > 0:
+        sys.exit(1)
 
 def parse_pytest_errors_output(file_path):
-    print(file_path)
+    """
+    Parse the Pytest output and print the reasons for errored tests.
+
+    Args:
+        file_path (str): The path to the Pytest output file.
+
+    Returns:
+        None
+    """
     error_tests = {}
     error_count = 0
-    with open(file_path, 'r') as file:
-        for line in file:
-            match = re.match(r'^ERROR (tests/.*) - (.*): (.*)$', line)
-            if match:
-                error_count += 1
-                _, test_error, reason = match.groups()
-                error_tests[reason] = error_tests.get(reason, []) + [test_error]
-    for k,v in sorted(error_tests.items(), key=lambda x:len(x[1])):
+    try:
+        with open(file_path, 'r') as file:
+            for line in file:
+                match = re.match(r'^ERROR (tests/.*) - (.*): (.*)$', line)
+                if match:
+                    error_count += 1
+                    _, test_error, reason = match.groups()
+                    error_tests[reason] = error_tests.get(reason, []) + [test_error]
+    except FileNotFoundError:
+        print(f"Error: File '{file_path}' not found.")
+        sys.exit(1)
+    except Exception as e:
+        print(f"Error: {e}")
+        sys.exit(1)
+    for k, v in sorted(error_tests.items(), key=lambda x: len(x[1])):
         print(f"{len(v):4} errored out because of `{v[0]}` -> {k}")
     print("Number of errors:", error_count)
-    if error_count>0:
-        exit(1)
+    if error_count > 0:
+        sys.exit(1)
 
 def main():
     parser = argparse.ArgumentParser()
@@ -65,6 +113,5 @@ def main():
     if args.errors:
         parse_pytest_errors_output(args.file)
 
-
 if __name__ == "__main__":
     main()
diff --git a/conftest.py b/conftest.py
@@ -124,7 +124,7 @@ def pytest_sessionfinish(session, exitstatus):
         session.exitstatus = 0
 
 
-# Doctest custom flag to ignore output.
+# Doctest custom flag to ignore the output.
 IGNORE_RESULT = doctest.register_optionflag("IGNORE_RESULT")
 
 OutputChecker = doctest.OutputChecker

diff --git a/setup.py b/setup.py
@@ -20,18 +20,18 @@
 1. Create the release branch named: v<RELEASE>-release, for example v4.19-release. For a patch release checkout the
    current release branch.
 
-   If releasing on a special branch, copy the updated README.md on the main branch for your the commit you will make
+   If releasing on a special branch, copy the updated README.md on the main branch for your commit which you will make
    for the post-release and run `make fix-copies` on the main branch as well.
 
 2. Run `make pre-release` (or `make pre-patch` for a patch release) and commit these changes with the message:
    "Release: <VERSION>" and push.
 
 3. Go back to the main branch and run `make post-release` then `make fix-copies`. Commit these changes with the
-   message "v<NEXT_VERSION>.dev.0" and push to main.
+   message "v<NEXT_VERSION>.dev.0" and push that to the main branch.
 
 # If you were just cutting the branch in preparation for a release, you can stop here for now.
 
-4. Wait for the tests on the release branch to be completed and be green (otherwise revert and fix bugs)
+4. Wait for the tests on the release branch to be completed and turn green (otherwise revert and fix bugs)
 
 5. On the release branch, add a tag in git to mark the release: "git tag v<VERSION> -m 'Adds tag v<VERSION> for pypi' "
    Push the tag to git: git push --tags origin v<RELEASE>-release
@@ -51,7 +51,7 @@
    You may have to specify the repository url, use the following command then:
    twine upload dist/* -r testpypi --repository-url=https://test.pypi.org/legacy/
 
-   Check that you can install it in a virtualenv by running:
+   Check if you can install it in a virtualenv by running:
    pip install -i https://testpypi.python.org/pypi transformers
 
    Check you can run the following commands:
@@ -69,12 +69,9 @@
 
 import os
 import re
-import shutil
 from pathlib import Path
-
 from setuptools import Command, find_packages, setup
 
-
 # Remove stale transformers.egg-info directory to avoid https://github.com/pypa/pip/issues/5466
 stale_egg_info = Path(__file__).parent / "transformers.egg-info"
 if stale_egg_info.exists():
@@ -88,7 +85,7 @@
             "See https://github.com/pypa/pip/issues/5466 for details.\n"
         ).format(stale_egg_info)
     )
-    shutil.rmtree(stale_egg_info)
+    # shutil.rmtree(stale_egg_info)  # Commented out for safety
 
 
 # IMPORTANT:
@@ -204,7 +201,7 @@
 deps = {b: a for a, b in (re.findall(r"^(([^!=<>~ ]+)(?:[!=<>~ ].*)?$)", x)[0] for x in _deps)}
 
 # since we save this data in src/transformers/dependency_versions_table.py it can be easily accessed from
-# anywhere. If you need to quickly access the data from this table in a shell, you can do so easily with:
+# anywhere. If you need to quickly access the data from this table in a shell, you can do that easily with:
 #
 # python -c 'import sys; from transformers.dependency_versions_table import deps; \
 # print(" ".join([ deps[x] for x in sys.argv[1:]]))' tokenizers datasets

diff --git a/utils/check_config_docstrings.py b/utils/check_config_docstrings.py
@@ -63,7 +63,7 @@ def get_checkpoint_from_config_class(config_class):
         if ckpt_link.endswith("/"):
             ckpt_link = ckpt_link[:-1]
 
-        # verify the checkpoint name corresponds to the checkpoint link
+        # verify if the checkpoint name corresponds to the checkpoint link
         ckpt_link_from_name = f"https://huggingface.co/{ckpt_name}"
         if ckpt_link == ckpt_link_from_name:
             checkpoint = ckpt_name
@@ -90,7 +90,7 @@ def check_config_docstrings_have_checkpoints():
         raise ValueError(
             f"The following configurations don't contain any valid checkpoint:\n{message}\n\n"
             "The requirement is to include a link pointing to one of the models of this architecture in the "
-            "docstring of the config classes listed above. The link should have be a markdown format like "
+            "docstring of config classes listed above. The link should have a markdown format like "
             "[myorg/mymodel](https://huggingface.co/myorg/mymodel)."
         )
 

diff --git a/utils/check_copies.py b/utils/check_copies.py
@@ -187,7 +187,7 @@ def _should_continue(line: str, indent: str) -> bool:
 def _sanity_check_splits(splits_1, splits_2, is_class, filename):
     """Check the two (inner) block structures of the corresponding code block given by `split_code_into_blocks` match.
 
-    For the case of `class`, they must be of one of the following 3 cases:
+    For the case of `class`, they must be one of the following 3 cases:
 
         - a single block without name:
 
@@ -322,12 +322,12 @@ def split_code_into_blocks(
             + "```\n"
         )
 
-    # from now on, the `block` means inner blocks unless explicitly specified
+    # from now on,`block` means inner blocks unless explicitly specified
     indent_str = " " * indent
     block_without_name_idx = 0
     empty_block_idx = 0
 
-    # Find the lines for the definition header
+    # Find the lines for definition header
     index = start_index
     if "(" in lines[start_index] and "):" not in lines[start_index] in lines[start_index]:
         while index < end_index:
@@ -401,7 +401,7 @@ def find_code_in_transformers(
         base_path (`str`, *optional*):
             The path to the base folder where files are checked. If not set, it will be set to `TRANSFORMERS_PATH`.
         return_indices(`bool`, *optional*, defaults to `False`):
-            If `False`, will only return the code (as a string), otherwise it will also return the whole lines of the
+            If `False`, it will only return the code (as a string), otherwise it will return the whole lines of the
             file where the object specified by `object_name` is defined, together the start/end indices of the block in
             the file that defines the object.