@@ -235,8 +235,60 @@ def extract_zipfile(self, zipfile_path: str, unpack_dir: str) -> None:
235
235
236
236
def extract_tarfile (self , tarfile_path : str , unpack_dir : str ) -> None :
237
237
with tarfile .open (tarfile_path , 'r:*' ) as tar :
238
+ # In Python 3.12+, there's a `filter` arg where passing a
239
+ # 'data' value will handle this behavior for us. To support older
240
+ # versions of Python we handle this ourselves. We can't hook
241
+ # into `extractall` directly so the idea is that we do a separate
242
+ # validation pass first to ensure there's no files that try
243
+ # to extract outside of the provided `unpack_dir`. This is roughly
244
+ # based off of what's done in the `data_filter()` in Python 3.12.
245
+ self ._validate_safe_extract (tar , unpack_dir )
238
246
tar .extractall (unpack_dir )
239
247
248
+ def _validate_safe_extract (
249
+ self ,
250
+ tar : tarfile .TarFile ,
251
+ unpack_dir : str
252
+ ) -> None :
253
+ for member in tar :
254
+ self ._validate_single_tar_member (member , unpack_dir )
255
+
256
+ def _validate_single_tar_member (
257
+ self ,
258
+ member : tarfile .TarInfo ,
259
+ unpack_dir : str
260
+ ) -> None :
261
+ name = member .name
262
+ dest_path = os .path .realpath (unpack_dir )
263
+ if name .startswith (('/' , os .sep )):
264
+ name = member .path .lstrip ('/' + os .sep )
265
+ if os .path .isabs (name ):
266
+ raise RuntimeError (f"Absolute path in tarfile not allowed: { name } " )
267
+ target_path = os .path .realpath (os .path .join (dest_path , name ))
268
+ # Check we don't escape the destination dir, e.g `../../foo`
269
+ if os .path .commonpath ([target_path , dest_path ]) != dest_path :
270
+ raise RuntimeError (
271
+ f"Tar member outside destination dir: { target_path } " )
272
+ # If we're dealing with a member that's some type of link, ensure
273
+ # it doesn't point to anything outside of the destination dir.
274
+ if member .islnk () or member .issym ():
275
+ if os .path .abspath (member .linkname ):
276
+ raise RuntimeError (f"Symlink to abspath: { member .linkname } " )
277
+ if member .issym ():
278
+ target_path = os .path .join (
279
+ dest_path ,
280
+ os .path .dirname (name ),
281
+ member .linkname ,
282
+ )
283
+ else :
284
+ target_path = os .path .join (
285
+ dest_path ,
286
+ member .linkname )
287
+ target_path = os .path .realpath (target_path )
288
+ if os .path .commonpath ([target_path , dest_path ]) != dest_path :
289
+ raise RuntimeError (
290
+ f"Symlink outside of dest dir: { target_path } " )
291
+
240
292
def directory_exists (self , path : str ) -> bool :
241
293
return os .path .isdir (path )
242
294
0 commit comments