Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated few changes to the code and updated the files which had few typos in it. #33638

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .circleci/create_circleci_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def __post_init__(self):
# Let's avoid changing the default list and make a copy.
self.docker_image = copy.deepcopy(DEFAULT_DOCKER_IMAGE)
else:
# BIG HACK WILL REMOVE ONCE FETCHER IS UPDATED
# BIG HACK WILL REMOVE ONCE THE FETCHER IS UPDATED
print(os.environ.get("GIT_COMMIT_MESSAGE"))
if "[build-ci-image]" in os.environ.get("GIT_COMMIT_MESSAGE", "") or os.environ.get("GIT_COMMIT_MESSAGE", "") == "dev-ci":
self.docker_image[0]["image"] = f"{self.docker_image[0]['image']}:dev"
Expand Down Expand Up @@ -322,8 +322,8 @@ def job_name(self):


# We also include a `dummy.py` file in the files to be doc-tested to prevent edge case failure. Otherwise, the pytest
# hangs forever during test collection while showing `collecting 0 items / 21 errors`. (To see this, we have to remove
# the bash output redirection.)
# hangs forever during test collection while showing `collecting 0 items / 21 errors`. (To see this, we need to remove
# the bash output re-direction.)
py_command = 'from utils.tests_fetcher import get_doctest_files; to_test = get_doctest_files() + ["dummy.py"]; to_test = " ".join(to_test); print(to_test)'
py_command = f"$(python3 -c '{py_command}')"
command = f'echo """{py_command}""" > pr_documentation_tests_temp.txt'
Expand Down
107 changes: 77 additions & 30 deletions .circleci/parse_test_outputs.py
Original file line number Diff line number Diff line change
@@ -1,52 +1,100 @@
import re
import argparse
import sys

def parse_pytest_output(file_path):
"""
Parse the Pytest output and print the reasons for skipped tests.

Args:
file_path (str): The path to the Pytest output file.

Returns:
None
"""
skipped_tests = {}
skipped_count = 0
with open(file_path, 'r') as file:
for line in file:
match = re.match(r'^SKIPPED \[(\d+)\] (tests/.*): (.*)$', line)
if match:
skipped_count += 1
test_file, test_line, reason = match.groups()
skipped_tests[reason] = skipped_tests.get(reason, []) + [(test_file, test_line)]
for k,v in sorted(skipped_tests.items(), key=lambda x:len(x[1])):
try:
with open(file_path, 'r') as file:
for line in file:
match = re.match(r'^SKIPPED \[(\d+)\] (tests/.*): (.*)$', line)
if match:
skipped_count += 1
test_file, test_line, reason = match.groups()
skipped_tests[reason] = skipped_tests.get(reason, []) + [(test_file, test_line)]
except FileNotFoundError:
print(f"Error: File '{file_path}' not found.")
sys.exit(1)
except Exception as e:
print(f"Error: {e}")
sys.exit(1)
for k, v in sorted(skipped_tests.items(), key=lambda x: len(x[1])):
print(f"{len(v):4} skipped because: {k}")
print("Number of skipped tests:", skipped_count)

def parse_pytest_failure_output(file_path):
"""
Parse the Pytest output and print the reasons for failed tests.

Args:
file_path (str): The path to the Pytest output file.

Returns:
None
"""
failed_tests = {}
failed_count = 0
with open(file_path, 'r') as file:
for line in file:
match = re.match(r'^FAILED (tests/.*) - (.*): (.*)$', line)
if match:
failed_count += 1
_, error, reason = match.groups()
failed_tests[reason] = failed_tests.get(reason, []) + [error]
for k,v in sorted(failed_tests.items(), key=lambda x:len(x[1])):
try:
with open(file_path, 'r') as file:
for line in file:
match = re.match(r'^FAILED (tests/.*) - (.*): (.*)$', line)
if match:
failed_count += 1
_, error, reason = match.groups()
failed_tests[reason] = failed_tests.get(reason, []) + [error]
except FileNotFoundError:
print(f"Error: File '{file_path}' not found.")
sys.exit(1)
except Exception as e:
print(f"Error: {e}")
sys.exit(1)
for k, v in sorted(failed_tests.items(), key=lambda x: len(x[1])):
print(f"{len(v):4} failed because `{v[0]}` -> {k}")
print("Number of failed tests:", failed_count)
if failed_count>0:
exit(1)
if failed_count > 0:
sys.exit(1)

def parse_pytest_errors_output(file_path):
print(file_path)
"""
Parse the Pytest output and print the reasons for errored tests.

Args:
file_path (str): The path to the Pytest output file.

Returns:
None
"""
error_tests = {}
error_count = 0
with open(file_path, 'r') as file:
for line in file:
match = re.match(r'^ERROR (tests/.*) - (.*): (.*)$', line)
if match:
error_count += 1
_, test_error, reason = match.groups()
error_tests[reason] = error_tests.get(reason, []) + [test_error]
for k,v in sorted(error_tests.items(), key=lambda x:len(x[1])):
try:
with open(file_path, 'r') as file:
for line in file:
match = re.match(r'^ERROR (tests/.*) - (.*): (.*)$', line)
if match:
error_count += 1
_, test_error, reason = match.groups()
error_tests[reason] = error_tests.get(reason, []) + [test_error]
except FileNotFoundError:
print(f"Error: File '{file_path}' not found.")
sys.exit(1)
except Exception as e:
print(f"Error: {e}")
sys.exit(1)
for k, v in sorted(error_tests.items(), key=lambda x: len(x[1])):
print(f"{len(v):4} errored out because of `{v[0]}` -> {k}")
print("Number of errors:", error_count)
if error_count>0:
exit(1)
if error_count > 0:
sys.exit(1)

def main():
parser = argparse.ArgumentParser()
Expand All @@ -65,6 +113,5 @@ def main():
if args.errors:
parse_pytest_errors_output(args.file)


if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def pytest_sessionfinish(session, exitstatus):
session.exitstatus = 0


# Doctest custom flag to ignore output.
# Doctest custom flag to ignore the output.
IGNORE_RESULT = doctest.register_optionflag("IGNORE_RESULT")

OutputChecker = doctest.OutputChecker
Expand Down
15 changes: 6 additions & 9 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,18 @@
1. Create the release branch named: v<RELEASE>-release, for example v4.19-release. For a patch release checkout the
current release branch.

If releasing on a special branch, copy the updated README.md on the main branch for your the commit you will make
If releasing on a special branch, copy the updated README.md on the main branch for your commit which you will make
for the post-release and run `make fix-copies` on the main branch as well.

2. Run `make pre-release` (or `make pre-patch` for a patch release) and commit these changes with the message:
"Release: <VERSION>" and push.

3. Go back to the main branch and run `make post-release` then `make fix-copies`. Commit these changes with the
message "v<NEXT_VERSION>.dev.0" and push to main.
message "v<NEXT_VERSION>.dev.0" and push that to the main branch.

# If you were just cutting the branch in preparation for a release, you can stop here for now.

4. Wait for the tests on the release branch to be completed and be green (otherwise revert and fix bugs)
4. Wait for the tests on the release branch to be completed and turn green (otherwise revert and fix bugs)

5. On the release branch, add a tag in git to mark the release: "git tag v<VERSION> -m 'Adds tag v<VERSION> for pypi' "
Push the tag to git: git push --tags origin v<RELEASE>-release
Expand All @@ -51,7 +51,7 @@
You may have to specify the repository url, use the following command then:
twine upload dist/* -r testpypi --repository-url=https://test.pypi.org/legacy/

Check that you can install it in a virtualenv by running:
Check if you can install it in a virtualenv by running:
pip install -i https://testpypi.python.org/pypi transformers

Check you can run the following commands:
Expand All @@ -69,12 +69,9 @@

import os
import re
import shutil
from pathlib import Path

from setuptools import Command, find_packages, setup


# Remove stale transformers.egg-info directory to avoid https://github.com/pypa/pip/issues/5466
stale_egg_info = Path(__file__).parent / "transformers.egg-info"
if stale_egg_info.exists():
Expand All @@ -88,7 +85,7 @@
"See https://github.com/pypa/pip/issues/5466 for details.\n"
).format(stale_egg_info)
)
shutil.rmtree(stale_egg_info)
# shutil.rmtree(stale_egg_info) # Commented out for safety


# IMPORTANT:
Expand Down Expand Up @@ -204,7 +201,7 @@
deps = {b: a for a, b in (re.findall(r"^(([^!=<>~ ]+)(?:[!=<>~ ].*)?$)", x)[0] for x in _deps)}

# since we save this data in src/transformers/dependency_versions_table.py it can be easily accessed from
# anywhere. If you need to quickly access the data from this table in a shell, you can do so easily with:
# anywhere. If you need to quickly access the data from this table in a shell, you can do that easily with:
#
# python -c 'import sys; from transformers.dependency_versions_table import deps; \
# print(" ".join([ deps[x] for x in sys.argv[1:]]))' tokenizers datasets
Expand Down
4 changes: 2 additions & 2 deletions utils/check_config_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def get_checkpoint_from_config_class(config_class):
if ckpt_link.endswith("/"):
ckpt_link = ckpt_link[:-1]

# verify the checkpoint name corresponds to the checkpoint link
# verify if the checkpoint name corresponds to the checkpoint link
ckpt_link_from_name = f"https://huggingface.co/{ckpt_name}"
if ckpt_link == ckpt_link_from_name:
checkpoint = ckpt_name
Expand All @@ -90,7 +90,7 @@ def check_config_docstrings_have_checkpoints():
raise ValueError(
f"The following configurations don't contain any valid checkpoint:\n{message}\n\n"
"The requirement is to include a link pointing to one of the models of this architecture in the "
"docstring of the config classes listed above. The link should have be a markdown format like "
"docstring of config classes listed above. The link should have a markdown format like "
"[myorg/mymodel](https://huggingface.co/myorg/mymodel)."
)

Expand Down
8 changes: 4 additions & 4 deletions utils/check_copies.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def _should_continue(line: str, indent: str) -> bool:
def _sanity_check_splits(splits_1, splits_2, is_class, filename):
"""Check the two (inner) block structures of the corresponding code block given by `split_code_into_blocks` match.

For the case of `class`, they must be of one of the following 3 cases:
For the case of `class`, they must be one of the following 3 cases:

- a single block without name:

Expand Down Expand Up @@ -322,12 +322,12 @@ def split_code_into_blocks(
+ "```\n"
)

# from now on, the `block` means inner blocks unless explicitly specified
# from now on,`block` means inner blocks unless explicitly specified
indent_str = " " * indent
block_without_name_idx = 0
empty_block_idx = 0

# Find the lines for the definition header
# Find the lines for definition header
index = start_index
if "(" in lines[start_index] and "):" not in lines[start_index] in lines[start_index]:
while index < end_index:
Expand Down Expand Up @@ -401,7 +401,7 @@ def find_code_in_transformers(
base_path (`str`, *optional*):
The path to the base folder where files are checked. If not set, it will be set to `TRANSFORMERS_PATH`.
return_indices(`bool`, *optional*, defaults to `False`):
If `False`, will only return the code (as a string), otherwise it will also return the whole lines of the
If `False`, it will only return the code (as a string), otherwise it will return the whole lines of the
file where the object specified by `object_name` is defined, together the start/end indices of the block in
the file that defines the object.

Expand Down