Skip to content

Commit 5061b3d

Browse files
committed
Fix wrong offset_data being used for sparse files with an additional size PAX keyword
1 parent e18829a commit 5061b3d

File tree

2 files changed

+11
-2
lines changed

2 files changed

+11
-2
lines changed

Lib/tarfile.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1541,6 +1541,12 @@ def _proc_pax(self, tarfile):
15411541
except HeaderError as e:
15421542
raise SubsequentHeaderError(str(e)) from None
15431543

1544+
# offset_data needs to be stored in case "size" is in pax_headers and
1545+
# the next TAR offset needs to be recomputed. next.offset_data may get
1546+
# overwritten when parsing sparse files and therefore cannot be used
1547+
# directly for the recomputation.
1548+
next_offset_data = next.offset_data
1549+
15441550
# Process GNU sparse information.
15451551
if "GNU.sparse.map" in pax_headers:
15461552
# GNU extended sparse format version 0.1.
@@ -1563,9 +1569,10 @@ def _proc_pax(self, tarfile):
15631569
# If the extended header replaces the size field,
15641570
# we need to recalculate the offset where the next
15651571
# header starts.
1566-
offset = next.offset_data
1572+
offset = next_offset_data
15671573
if next.isreg() or next.type not in SUPPORTED_TYPES:
1568-
offset += next._block(next.size)
1574+
# Do not use use next.size here because it may contain the real size for sparse files.
1575+
offset += next._block(int(pax_headers["size"]))
15691576
tarfile.offset = offset
15701577

15711578
return next
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix wrong `offset_data` being used for sparse files with an additional
2+
`size` PAX keyword.

0 commit comments

Comments
 (0)