Skip to content

Commit 2e999d0

Browse files
authored
Merge pull request josegonzalez#474 from mwtzzz/retry_logic
update retry logic and logging
2 parents 81a72ac + 44b0003 commit 2e999d0

File tree

5 files changed

+277
-75
lines changed

5 files changed

+277
-75
lines changed

README.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,8 @@ CLI Help output::
152152
--throttle-limit to be set)
153153
--exclude [EXCLUDE ...]
154154
names of repositories to exclude
155-
155+
--retries MAX_RETRIES
156+
maximum number of retries for API calls (default: 5)
156157

157158
Usage Details
158159
=============

github_backup/github_backup.py

Lines changed: 48 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,6 @@ def __init__(self, message, dmca_url=None):
7474
" 3. Debian/Ubuntu: apt-get install ca-certificates\n\n"
7575
)
7676

77-
# Retry configuration
78-
MAX_RETRIES = 5
79-
8077

8178
def logging_subprocess(
8279
popenargs, stdout_log_level=logging.DEBUG, stderr_log_level=logging.ERROR, **kwargs
@@ -144,6 +141,17 @@ def mask_password(url, secret="*****"):
144141
return url.replace(parsed.password, secret)
145142

146143

144+
def non_negative_int(value):
145+
"""Argparse type validator for non-negative integers."""
146+
try:
147+
ivalue = int(value)
148+
except ValueError:
149+
raise argparse.ArgumentTypeError(f"'{value}' is not a valid integer")
150+
if ivalue < 0:
151+
raise argparse.ArgumentTypeError(f"{value} must be 0 or greater")
152+
return ivalue
153+
154+
147155
def parse_args(args=None):
148156
parser = argparse.ArgumentParser(description="Backup a github account")
149157
parser.add_argument("user", metavar="USER", type=str, help="github username")
@@ -468,6 +476,13 @@ def parse_args(args=None):
468476
parser.add_argument(
469477
"--exclude", dest="exclude", help="names of repositories to exclude", nargs="*"
470478
)
479+
parser.add_argument(
480+
"--retries",
481+
dest="max_retries",
482+
type=non_negative_int,
483+
default=5,
484+
help="maximum number of retries for API calls (default: 5)",
485+
)
471486
return parser.parse_args(args)
472487

473488

@@ -622,7 +637,7 @@ def retrieve_data(args, template, query_args=None, paginated=True):
622637
def _extract_next_page_url(link_header):
623638
for link in link_header.split(","):
624639
if 'rel="next"' in link:
625-
return link[link.find("<") + 1:link.find(">")]
640+
return link[link.find("<") + 1 : link.find(">")]
626641
return None
627642

628643
def fetch_all() -> Generator[dict, None, None]:
@@ -631,7 +646,7 @@ def fetch_all() -> Generator[dict, None, None]:
631646
while True:
632647
# FIRST: Fetch response
633648

634-
for attempt in range(MAX_RETRIES):
649+
for attempt in range(args.max_retries + 1):
635650
request = _construct_request(
636651
per_page=per_page if paginated else None,
637652
query_args=query_args,
@@ -640,7 +655,7 @@ def fetch_all() -> Generator[dict, None, None]:
640655
as_app=args.as_app,
641656
fine=args.token_fine is not None,
642657
)
643-
http_response = make_request_with_retry(request, auth)
658+
http_response = make_request_with_retry(request, auth, args.max_retries)
644659

645660
match http_response.getcode():
646661
case 200:
@@ -654,10 +669,10 @@ def fetch_all() -> Generator[dict, None, None]:
654669
TimeoutError,
655670
) as e:
656671
logger.warning(f"{type(e).__name__} reading response")
657-
if attempt < MAX_RETRIES - 1:
672+
if attempt < args.max_retries:
658673
delay = calculate_retry_delay(attempt, {})
659674
logger.warning(
660-
f"Retrying in {delay:.1f}s (attempt {attempt + 1}/{MAX_RETRIES})"
675+
f"Retrying read in {delay:.1f}s (attempt {attempt + 1}/{args.max_retries + 1})"
661676
)
662677
time.sleep(delay)
663678
continue # Next retry attempt
@@ -683,10 +698,10 @@ def fetch_all() -> Generator[dict, None, None]:
683698
)
684699
else:
685700
logger.error(
686-
f"Failed to read response after {MAX_RETRIES} attempts for {next_url or template}"
701+
f"Failed to read response after {args.max_retries + 1} attempts for {next_url or template}"
687702
)
688703
raise Exception(
689-
f"Failed to read response after {MAX_RETRIES} attempts for {next_url or template}"
704+
f"Failed to read response after {args.max_retries + 1} attempts for {next_url or template}"
690705
)
691706

692707
# SECOND: Process and paginate
@@ -718,7 +733,7 @@ def fetch_all() -> Generator[dict, None, None]:
718733
return list(fetch_all())
719734

720735

721-
def make_request_with_retry(request, auth):
736+
def make_request_with_retry(request, auth, max_retries=5):
722737
"""Make HTTP request with automatic retry for transient errors."""
723738

724739
def is_retryable_status(status_code, headers):
@@ -730,40 +745,49 @@ def is_retryable_status(status_code, headers):
730745
return int(headers.get("x-ratelimit-remaining", 1)) < 1
731746
return False
732747

733-
for attempt in range(MAX_RETRIES):
748+
for attempt in range(max_retries + 1):
734749
try:
735750
return urlopen(request, context=https_ctx)
736751

737752
except HTTPError as exc:
738753
# HTTPError can be used as a response-like object
739754
if not is_retryable_status(exc.code, exc.headers):
755+
logger.error(
756+
f"API Error: {exc.code} {exc.reason} for {request.full_url}"
757+
)
740758
raise # Non-retryable error
741759

742-
if attempt >= MAX_RETRIES - 1:
743-
logger.error(f"HTTP {exc.code} failed after {MAX_RETRIES} attempts")
760+
if attempt >= max_retries:
761+
logger.error(
762+
f"HTTP {exc.code} failed after {max_retries + 1} attempts for {request.full_url}"
763+
)
744764
raise
745765

746766
delay = calculate_retry_delay(attempt, exc.headers)
747767
logger.warning(
748-
f"HTTP {exc.code}, retrying in {delay:.1f}s "
749-
f"(attempt {attempt + 1}/{MAX_RETRIES})"
768+
f"HTTP {exc.code} ({exc.reason}), retrying in {delay:.1f}s "
769+
f"(attempt {attempt + 1}/{max_retries + 1}) for {request.full_url}"
750770
)
751771
if auth is None and exc.code in (403, 429):
752772
logger.info("Hint: Authenticate to raise your GitHub rate limit")
753773
time.sleep(delay)
754774

755775
except (URLError, socket.error) as e:
756-
if attempt >= MAX_RETRIES - 1:
757-
logger.error(f"Connection error failed after {MAX_RETRIES} attempts: {e}")
776+
if attempt >= max_retries:
777+
logger.error(
778+
f"Connection error failed after {max_retries + 1} attempts: {e} for {request.full_url}"
779+
)
758780
raise
759781
delay = calculate_retry_delay(attempt, {})
760782
logger.warning(
761783
f"Connection error: {e}, retrying in {delay:.1f}s "
762-
f"(attempt {attempt + 1}/{MAX_RETRIES})"
784+
f"(attempt {attempt + 1}/{max_retries + 1}) for {request.full_url}"
763785
)
764786
time.sleep(delay)
765787

766-
raise Exception(f"Request failed after {MAX_RETRIES} attempts") # pragma: no cover
788+
raise Exception(
789+
f"Request failed after {max_retries + 1} attempts"
790+
) # pragma: no cover
767791

768792

769793
def _construct_request(per_page, query_args, template, auth, as_app=None, fine=False):
@@ -1579,9 +1603,7 @@ def filter_repositories(args, unfiltered_repositories):
15791603
repositories = [r for r in repositories if not r.get("archived")]
15801604
if args.starred_skip_size_over is not None:
15811605
if args.starred_skip_size_over <= 0:
1582-
logger.warning(
1583-
"--starred-skip-size-over must be greater than 0, ignoring"
1584-
)
1606+
logger.warning("--starred-skip-size-over must be greater than 0, ignoring")
15851607
else:
15861608
size_limit_kb = args.starred_skip_size_over * 1024
15871609
filtered = []
@@ -1590,7 +1612,9 @@ def filter_repositories(args, unfiltered_repositories):
15901612
size_mb = r.get("size", 0) / 1024
15911613
logger.info(
15921614
"Skipping starred repo {0} ({1:.0f} MB) due to --starred-skip-size-over {2}".format(
1593-
r.get("full_name", r.get("name")), size_mb, args.starred_skip_size_over
1615+
r.get("full_name", r.get("name")),
1616+
size_mb,
1617+
args.starred_skip_size_over,
15941618
)
15951619
)
15961620
else:

tests/test_http_451.py

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ def test_repository_unavailable_error_raised(self):
2121
args.osx_keychain_item_account = None
2222
args.throttle_limit = None
2323
args.throttle_pause = 0
24+
args.max_retries = 5
2425

2526
mock_response = Mock()
2627
mock_response.getcode.return_value = 451
@@ -30,18 +31,26 @@ def test_repository_unavailable_error_raised(self):
3031
"block": {
3132
"reason": "dmca",
3233
"created_at": "2024-11-12T14:38:04Z",
33-
"html_url": "https://github.com/github/dmca/blob/master/2024/11/2024-11-04-source-code.md"
34-
}
34+
"html_url": "https://github.com/github/dmca/blob/master/2024/11/2024-11-04-source-code.md",
35+
},
3536
}
3637
mock_response.read.return_value = json.dumps(dmca_data).encode("utf-8")
3738
mock_response.headers = {"x-ratelimit-remaining": "5000"}
3839
mock_response.reason = "Unavailable For Legal Reasons"
3940

40-
with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response):
41+
with patch(
42+
"github_backup.github_backup.make_request_with_retry",
43+
return_value=mock_response,
44+
):
4145
with pytest.raises(github_backup.RepositoryUnavailableError) as exc_info:
42-
github_backup.retrieve_data(args, "https://api.github.com/repos/test/dmca/issues")
43-
44-
assert exc_info.value.dmca_url == "https://github.com/github/dmca/blob/master/2024/11/2024-11-04-source-code.md"
46+
github_backup.retrieve_data(
47+
args, "https://api.github.com/repos/test/dmca/issues"
48+
)
49+
50+
assert (
51+
exc_info.value.dmca_url
52+
== "https://github.com/github/dmca/blob/master/2024/11/2024-11-04-source-code.md"
53+
)
4554
assert "451" in str(exc_info.value)
4655

4756
def test_repository_unavailable_error_without_dmca_url(self):
@@ -54,16 +63,22 @@ def test_repository_unavailable_error_without_dmca_url(self):
5463
args.osx_keychain_item_account = None
5564
args.throttle_limit = None
5665
args.throttle_pause = 0
66+
args.max_retries = 5
5767

5868
mock_response = Mock()
5969
mock_response.getcode.return_value = 451
6070
mock_response.read.return_value = b'{"message": "Blocked"}'
6171
mock_response.headers = {"x-ratelimit-remaining": "5000"}
6272
mock_response.reason = "Unavailable For Legal Reasons"
6373

64-
with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response):
74+
with patch(
75+
"github_backup.github_backup.make_request_with_retry",
76+
return_value=mock_response,
77+
):
6578
with pytest.raises(github_backup.RepositoryUnavailableError) as exc_info:
66-
github_backup.retrieve_data(args, "https://api.github.com/repos/test/dmca/issues")
79+
github_backup.retrieve_data(
80+
args, "https://api.github.com/repos/test/dmca/issues"
81+
)
6782

6883
assert exc_info.value.dmca_url is None
6984
assert "451" in str(exc_info.value)
@@ -78,16 +93,22 @@ def test_repository_unavailable_error_with_malformed_json(self):
7893
args.osx_keychain_item_account = None
7994
args.throttle_limit = None
8095
args.throttle_pause = 0
96+
args.max_retries = 5
8197

8298
mock_response = Mock()
8399
mock_response.getcode.return_value = 451
84100
mock_response.read.return_value = b"invalid json {"
85101
mock_response.headers = {"x-ratelimit-remaining": "5000"}
86102
mock_response.reason = "Unavailable For Legal Reasons"
87103

88-
with patch("github_backup.github_backup.make_request_with_retry", return_value=mock_response):
104+
with patch(
105+
"github_backup.github_backup.make_request_with_retry",
106+
return_value=mock_response,
107+
):
89108
with pytest.raises(github_backup.RepositoryUnavailableError):
90-
github_backup.retrieve_data(args, "https://api.github.com/repos/test/dmca/issues")
109+
github_backup.retrieve_data(
110+
args, "https://api.github.com/repos/test/dmca/issues"
111+
)
91112

92113

93114
if __name__ == "__main__":

tests/test_pagination.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ def mock_args():
4949
args.osx_keychain_item_account = None
5050
args.throttle_limit = None
5151
args.throttle_pause = 0
52+
args.max_retries = 5
5253
return args
5354

5455

0 commit comments

Comments
 (0)