Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pylint] re-enable consider-using-{with, f-string} #3729

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 22 additions & 21 deletions plugins_overview.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
PLUGDIR = 'sos/report/plugins'

plugs_data = {} # the map of all plugins data to collect
plugcontent = '' # content of plugin file just being processed


# method to parse an item of a_s_c/a_c_o/.. methods
Expand All @@ -42,11 +41,11 @@ def add_valid_item(dest, item):
return


# method to find in `plugcontent` all items of given method (a_c_s/a_c_o/..)
# method to find all items of given method (a_c_s/a_c_o/..) in plugin content,
# split by comma; add each valid item to the `dest` list
def add_all_items(method, dest, wrapopen=r'\(', wrapclose=r'\)'):
def add_all_items(method, dest, plugfd, wrapopen=r'\(', wrapclose=r'\)'):
regexp = f"{method}{wrapopen}(.*?){wrapclose}"
for match in re.findall(regexp, plugcontent,
for match in re.findall(regexp, plugfd,
flags=re.MULTILINE | re.DOTALL):
# tuple of distros ended by either (class|from|import)
if isinstance(match, tuple):
Expand Down Expand Up @@ -90,23 +89,25 @@ def add_all_items(method, dest, wrapopen=r'\(', wrapclose=r'\)'):
'journals': [],
'env': [],
}
plugcontent = open(
os.path.join(PLUGDIR, plugfile)).read().replace('\n', '')
add_all_items(
"from sos.report.plugins import ",
plugs_data[plugname]['distros'],
wrapopen='',
wrapclose='(class|from|import)'
)
add_all_items("profiles = ", plugs_data[plugname]['profiles'], wrapopen='')
add_all_items("packages = ", plugs_data[plugname]['packages'], wrapopen='')
add_all_items("add_copy_spec", plugs_data[plugname]['copyspecs'])
add_all_items("add_forbidden_path", plugs_data[plugname]['forbidden'])
add_all_items("add_cmd_output", plugs_data[plugname]['commands'])
add_all_items("collect_cmd_output", plugs_data[plugname]['commands'])
add_all_items("add_service_status", plugs_data[plugname]['service_status'])
add_all_items("add_journal", plugs_data[plugname]['journals'])
add_all_items("add_env_var", plugs_data[plugname]['env'])
with open(os.path.join(PLUGDIR, plugfile)).read().replace('\n', '') as pfd:
add_all_items(
"from sos.report.plugins import ", plugs_data[plugname]['distros'],
pfd, wrapopen='', wrapclose='(class|from|import)'
)
add_all_items("profiles = ", plugs_data[plugname]['profiles'],
pfd, wrapopen='')
add_all_items("packages = ", plugs_data[plugname]['packages'],
pfd, wrapopen='')
add_all_items("add_copy_spec", plugs_data[plugname]['copyspecs'], pfd)
add_all_items("add_forbidden_path",
plugs_data[plugname]['forbidden'], pfd)
add_all_items("add_cmd_output", plugs_data[plugname]['commands'], pfd)
add_all_items("collect_cmd_output",
plugs_data[plugname]['commands'], pfd)
add_all_items("add_service_status",
plugs_data[plugname]['service_status'], pfd)
add_all_items("add_journal", plugs_data[plugname]['journals'], pfd)
add_all_items("add_env_var", plugs_data[plugname]['env'], pfd)

# print output; if "csv" is cmdline argument, print in CSV format, else JSON
if (len(sys.argv) > 1) and (sys.argv[1] == "csv"):
Expand Down
1 change: 0 additions & 1 deletion pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ disable=
W0719, # broad-exception-raised
W1203, # logging-fstring-interpolation
W1406, # redundant-u-string-prefix
R1732, # consider-using-with
W1514, # unspecified-encoding
W0107, # unnecessary-pass
W0718, # broad-exception-caught
Expand Down
31 changes: 15 additions & 16 deletions sos/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -738,22 +738,21 @@ def _build_archive(self, method):
kwargs = {'compresslevel': 6}
else:
kwargs = {'preset': 3}
tar = tarfile.open(self._archive_name, mode=f"w:{_comp_mode}",
**kwargs)
# add commonly reviewed files first, so that they can be more easily
# read from memory without needing to extract the whole archive
for _content in ['version.txt', 'sos_reports', 'sos_logs']:
if not os.path.exists(os.path.join(self._archive_root, _content)):
continue
tar.add(
os.path.join(self._archive_root, _content),
arcname=f"{self._name}/{_content}"
)
# we need to pass the absolute path to the archive root but we
# want the names used in the archive to be relative.
tar.add(self._archive_root, arcname=self._name,
filter=self.copy_permissions_filter)
tar.close()
with tarfile.open(self._archive_name, mode=f"w:{_comp_mode}",
**kwargs) as tar:
# Add commonly reviewed files first, so that they can be more
# easily read from memory without needing to extract
# the whole archive
for _content in ['version.txt', 'sos_reports', 'sos_logs']:
if os.path.exists(os.path.join(self._archive_root, _content)):
tar.add(
os.path.join(self._archive_root, _content),
arcname=f"{self._name}/{_content}"
)
# we need to pass the absolute path to the archive root but we
# want the names used in the archive to be relative.
tar.add(self._archive_root, arcname=self._name,
filter=self.copy_permissions_filter)
self._suffix += f".{_comp_mode}"
return self.name()

Expand Down
28 changes: 14 additions & 14 deletions sos/cleaner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -782,20 +782,20 @@ def obfuscate_file(self, filename, short_name=None, arc_name=None):
return 0
self.log_debug(f"Obfuscating {short_name or filename}",
caller=arc_name)
tfile = tempfile.NamedTemporaryFile(mode='w', dir=self.tmpdir)
with open(filename, 'r', errors='replace') as fname:
for line in fname:
try:
line, count = self.obfuscate_line(line, _parsers)
subs += count
tfile.write(line)
except Exception as err:
self.log_debug(f"Unable to obfuscate {short_name}: "
f"{err}", caller=arc_name)
tfile.seek(0)
if subs:
shutil.copyfile(tfile.name, filename)
tfile.close()
with tempfile.NamedTemporaryFile(mode='w', dir=self.tmpdir) \
as tfile:
with open(filename, 'r', errors='replace') as fname:
for line in fname:
try:
line, count = self.obfuscate_line(line, _parsers)
subs += count
tfile.write(line)
except Exception as err:
self.log_debug(f"Unable to obfuscate {short_name}:"
f"{err}", caller=arc_name)
tfile.seek(0)
if subs:
shutil.copyfile(tfile.name, filename)

_ob_short_name = self.obfuscate_string(short_name.split('/')[-1])
_ob_filename = short_name.replace(short_name.split('/')[-1],
Expand Down
40 changes: 25 additions & 15 deletions sos/cleaner/archives/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,27 @@
# process for extraction if this method is a part of the SoSObfuscationArchive
# class. So, the simplest solution is to remove it from the class.
def extract_archive(archive_path, tmpdir):
archive = tarfile.open(archive_path)
path = os.path.join(tmpdir, 'cleaner')
# set extract filter since python 3.12 (see PEP-706 for more)
# Because python 3.10 and 3.11 raises false alarms as exceptions
# (see #3330 for examples), we can't use data filter but must
# fully trust the archive (legacy behaviour)
archive.extraction_filter = getattr(tarfile, 'fully_trusted_filter',
(lambda member, path: member))
archive.extractall(path)
archive.close()
return os.path.join(path, archive.name.split('/')[-1].split('.tar')[0])
with tarfile.open(archive_path) as archive:
path = os.path.join(tmpdir, 'cleaner')
# set extract filter since python 3.12 (see PEP-706 for more)
# Because python 3.10 and 3.11 raises false alarms as exceptions
# (see #3330 for examples), we can't use data filter but must
# fully trust the archive (legacy behaviour)
archive.extraction_filter = getattr(tarfile, 'fully_trusted_filter',
(lambda member, path: member))

# Guard against "Arbitrary file write during tarfile extraction"
Copy link
Contributor Author

@pponnuvel pponnuvel Jul 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Extracting files one-by-one here. Had to check if the extracted path of files don't have relative paths and stay within the directory to handle "Arbitrary file write during tarfile extraction".

https://codeql.github.com/codeql-query-help/python/py-tarslip/

#3729 (review)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting: I was afraid of negative runtime impact of this change so I run cleaner on some sosreport tarball twice: once without the patch and once with the patch. The difference was statistically insignificant (a fraction of a second from 3m cleaner run).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. I think that's a reasonable.

If we want to fix "Arbitrary file write during tarfile extraction", I think this is the only way (check each file and confirm). Alternative is to ignore it altogether (i.e. keep this code as before without this change) if we think the source tar file is always trustable.

Copy link
Contributor Author

@pponnuvel pponnuvel Aug 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at https://github.com/python/cpython/blob/main/Lib/tarfile.py#L2265, extract() and extractall() pretty much work in the same way.

The major difference between previous and the change in this PR is one additional syscall (getcwd) due to os.path.abspath call. Cost of a single getcwd call varies depending on few factors.

A quick test on my system:

ponnuvel@magicbox:~$ cat t.c
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>

int main() {
    struct timespec start, end;
    char cwd[1024];

    if (clock_gettime(CLOCK_MONOTONIC, &start) == -1) {
        perror("clock_gettime");
        exit(0);
    }

    if (getcwd(cwd, sizeof(cwd)) == NULL) {
        perror("getcwd");
        exit(0);
    }

    if (clock_gettime(CLOCK_MONOTONIC, &end) == -1) {
        perror("clock_gettime");
        exit(0);
    }

    // Calculate the elapsed time in nanoseconds
    long seconds = end.tv_sec - start.tv_sec;
    long nanoseconds = end.tv_nsec - start.tv_nsec;
    long elapsed = seconds * 1000000000L + nanoseconds;

    printf("Current working directory: %s\n", cwd);
    printf("Execution time of getcwd: %ld nanoseconds\n", elapsed);
}
ponnuvel@magicbox:~$ gcc -O3 t.c
ponnuvel@magicbox:~$ ./a.out
ponnuvel@magicbox:~$ pwd 
/home/ponnuvel
ponnuvel@magicbox:~$ ./a.out 
Current working directory: /home/ponnuvel
Execution time of getcwd: 4512 nanoseconds
ponnuvel@magicbox:~$ mkdir -p a/b/c/d/e/f/g/h/i/j/k
ponnuvel@magicbox:~$ cp a.out a/b/c/d/e/f/g/h/i/j/k/
ponnuvel@magicbox:~$ cd -
/home/ponnuvel/a/b/c/d/e/f/g/h/i/j/k
ponnuvel@magicbox:~/a/b/c/d/e/f/g/h/i/j/k$ pwd
/home/ponnuvel/a/b/c/d/e/f/g/h/i/j/k
ponnuvel@magicbox:~/a/b/c/d/e/f/g/h/i/j/k$ ./a.out 
Current working directory: /home/ponnuvel/a/b/c/d/e/f/g/h/i/j/k
Execution time of getcwd: 5415 nanoseconds

(a relatively low-spec system: 4CPU, i7-6500U CPU @ 2.50GHz, 12G memory)

So it takes about 4.5ms to 5.5ms on my system.

Let's round it up to 6ms per call. On a large sosreport I've (1.5GB), it contains ~180K file:

$ tar -tf sosreport-mon2-00384261-2024-07-12-czwswhw.tar.xz | wc -l
179703

6ms (per getcwd call) * 180k files ~= 1.08 seconds extra time on my system. I'd think most servers where sosreports are manipulated/extracted are more powerful than my 2016 laptop (where I ran this test). So the extra cost would liekly be less than 1 second for a reasonably sized sosreport on a production server.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I should add: please don't think I am trying to justify the change. I am simply too curious and woken up early on a Sunday morning ;-)

I am totally fine to revert this change if the conclusion is "we can always trust the source tar files here".

# Checks the extracted files don't stray out of the target directory.
for member in archive.getmembers():
member_path = os.path.join(path, member.name)
abs_directory = os.path.abspath(path)
abs_target = os.path.abspath(member_path)
prefix = os.path.commonprefix([abs_directory, abs_target])
if prefix != abs_directory:
raise Exception(f"Attempted path traversal in tarfle"
f"{prefix} != {abs_directory}")
archive.extract(member, path)
return os.path.join(path, archive.name.split('/')[-1].split('.tar')[0])


class SoSObfuscationArchive():
Expand Down Expand Up @@ -83,6 +93,7 @@ def is_insights(self):

def _load_self(self):
if self.is_tarfile:
# pylint: disable=consider-using-with
self.tarobj = tarfile.open(self.archive_path)

def get_nested_archives(self):
Expand Down Expand Up @@ -255,10 +266,9 @@ def build_tar_file(self, method):
else:
compr_args = {'compresslevel': 6}
self.log_debug(f"Building tar file {tarpath}")
tar = tarfile.open(tarpath, mode=mode, **compr_args)
tar.add(self.extracted_path,
arcname=os.path.split(self.archive_name)[1])
tar.close()
with tarfile.open(tarpath, mode=mode, **compr_args) as tar:
tar.add(self.extracted_path,
arcname=os.path.split(self.archive_name)[1])
return tarpath

def compress(self, method):
Expand Down
12 changes: 6 additions & 6 deletions sos/collector/transports/control_persist.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,12 @@ def _check_for_control_persist(self):
True if ControlPersist is supported, else raise Exception.
"""
ssh_cmd = ['ssh', '-o', 'ControlPersist']
cmd = subprocess.Popen(ssh_cmd, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
_, err = cmd.communicate()
err = err.decode('utf-8')
if 'Bad configuration option' in err or 'Usage:' in err:
raise ControlPersistUnsupportedException
with subprocess.Popen(ssh_cmd, stdout=subprocess.PIPE,
stderr=subprocess.PIPE) as cmd:
_, err = cmd.communicate()
err = err.decode('utf-8')
if 'Bad configuration option' in err or 'Usage:' in err:
raise ControlPersistUnsupportedException
return True

def _connect(self, password=''): # pylint: disable=too-many-branches
Expand Down
20 changes: 9 additions & 11 deletions sos/report/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1505,24 +1505,22 @@ def _create_checksum(self, archive, hash_name):

try:
hash_size = 1024**2 # Hash 1MiB of content at a time.
archive_fp = open(archive, 'rb')
digest = hashlib.new(hash_name)
while True:
hashdata = archive_fp.read(hash_size)
if not hashdata:
break
digest.update(hashdata)
archive_fp.close()
with open(archive, 'rb') as archive_fp:
while True:
hashdata = archive_fp.read(hash_size)
if not hashdata:
break
digest.update(hashdata)
except Exception:
self.handle_exception()
return digest.hexdigest()

def _write_checksum(self, archive, hash_name, checksum):
# store checksum into file
fp = open(archive + "." + hash_name, "w")
if checksum:
fp.write(checksum + "\n")
fp.close()
with open(archive + "." + hash_name, "w") as fp:
if checksum:
fp.write(checksum + "\n")

def final_work(self):
archive = None # archive path
Expand Down
84 changes: 41 additions & 43 deletions sos/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,63 +280,61 @@ def _check_poller(proc):
else:
expanded_args.append(arg)
if to_file:
_output = open(to_file, 'w')
_output = open(to_file, 'w') # pylint: disable=consider-using-with
else:
_output = PIPE
try:
p = Popen(expanded_args, shell=False, stdout=_output,
stderr=STDOUT if stderr else PIPE,
bufsize=-1, env=cmd_env, close_fds=True,
preexec_fn=_child_prep_fn)
with Popen(expanded_args, shell=False, stdout=_output,
stderr=STDOUT if stderr else PIPE,
bufsize=-1, env=cmd_env, close_fds=True,
preexec_fn=_child_prep_fn) as p:

if not to_file:
reader = AsyncReader(p.stdout, sizelimit, binary)
else:
reader = FakeReader(p, binary)

if poller:
while reader.running:
_check_poller(p)
else:
try:
# override timeout=0 to timeout=None, as Popen will treat the
# former as a literal 0-second timeout
p.wait(timeout if timeout else None)
except Exception:
p.terminate()
if to_file:
_output.close()
# until we separate timeouts from the `timeout` command
# handle per-cmd timeouts via Plugin status checks
reader.running = False
return {'status': 124, 'output': reader.get_contents(),
'truncated': reader.is_full}
if to_file:
_output.close()
if not to_file:
reader = AsyncReader(p.stdout, sizelimit, binary)
else:
reader = FakeReader(p, binary)

# wait for Popen to set the returncode
while p.poll() is None:
pass
if poller:
while reader.running:
_check_poller(p)
else:
try:
# override timeout=0 to timeout=None, as Popen will treat
# the former as a literal 0-second timeout
p.wait(timeout if timeout else None)
except Exception:
p.terminate()
if to_file:
_output.close()
# until we separate timeouts from the `timeout` command
# handle per-cmd timeouts via Plugin status checks
reader.running = False
return {'status': 124, 'output': reader.get_contents(),
'truncated': reader.is_full}
if to_file:
_output.close()

# wait for Popen to set the returncode
while p.poll() is None:
pass

stdout = reader.get_contents()
truncated = reader.is_full
if p.returncode in (126, 127):
stdout = b""
Fixed Show fixed Hide fixed
else:
stdout = reader.get_contents()

return {
'status': p.returncode,
'output': stdout,
'truncated': reader.is_full
}
except OSError as e:
if to_file:
_output.close()
if e.errno == errno.ENOENT:
return {'status': 127, 'output': "", 'truncated': ''}
raise e

if p.returncode in (126, 127):
stdout = b""

return {
'status': p.returncode,
'output': stdout,
'truncated': truncated
}


def import_module(module_fqname, superclasses=None):
"""Imports the module module_fqname and returns a list of defined classes
Expand Down
4 changes: 2 additions & 2 deletions tests/cleaner_tests/basic_function_tests/report_with_mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ def pre_sos_setup(self):
# obfuscate a random word from /etc/hosts and ensure the updated
# sanitised file has same permissions (a+r)
try:
self.hosts_obfuscated = open(
'/etc/hosts').read().strip('#\n').split()[-1]
with open('/etc/hosts') as fp:
self.obsfuncated = fp.read().strip('#\n').split()[-1]
except (FileNotFoundError, IndexError) as e:
self.warning(f"Unable to process /etc/hosts: {e}")
if self.hosts_obfuscated:
Expand Down
Loading
Loading