sosreport · TurboTurtle · Aug 8, 2024 · Jul 23, 2024 · pponnuvel · Jul 23, 2024
diff --git a/plugins_overview.py b/plugins_overview.py
@@ -27,7 +27,6 @@
 PLUGDIR = 'sos/report/plugins'
 
 plugs_data = {}     # the map of all plugins data to collect
-plugcontent = ''    # content of plugin file just being processed
 
 
 # method to parse an item of a_s_c/a_c_o/.. methods
@@ -42,11 +41,11 @@ def add_valid_item(dest, item):
             return
 
 
-# method to find in `plugcontent` all items of given method (a_c_s/a_c_o/..)
+# method to find all items of given method (a_c_s/a_c_o/..) in plugin content,
 # split by comma; add each valid item to the `dest` list
-def add_all_items(method, dest, wrapopen=r'\(', wrapclose=r'\)'):
+def add_all_items(method, dest, plugfd, wrapopen=r'\(', wrapclose=r'\)'):
     regexp = f"{method}{wrapopen}(.*?){wrapclose}"
-    for match in re.findall(regexp, plugcontent,
+    for match in re.findall(regexp, plugfd,
                             flags=re.MULTILINE | re.DOTALL):
         # tuple of distros ended by either (class|from|import)
         if isinstance(match, tuple):
@@ -90,23 +89,25 @@ def add_all_items(method, dest, wrapopen=r'\(', wrapclose=r'\)'):
             'journals': [],
             'env': [],
     }
-    plugcontent = open(
-        os.path.join(PLUGDIR, plugfile)).read().replace('\n', '')
-    add_all_items(
-        "from sos.report.plugins import ",
-        plugs_data[plugname]['distros'],
-        wrapopen='',
-        wrapclose='(class|from|import)'
-    )
-    add_all_items("profiles = ", plugs_data[plugname]['profiles'], wrapopen='')
-    add_all_items("packages = ", plugs_data[plugname]['packages'], wrapopen='')
-    add_all_items("add_copy_spec", plugs_data[plugname]['copyspecs'])
-    add_all_items("add_forbidden_path", plugs_data[plugname]['forbidden'])
-    add_all_items("add_cmd_output", plugs_data[plugname]['commands'])
-    add_all_items("collect_cmd_output", plugs_data[plugname]['commands'])
-    add_all_items("add_service_status", plugs_data[plugname]['service_status'])
-    add_all_items("add_journal", plugs_data[plugname]['journals'])
-    add_all_items("add_env_var", plugs_data[plugname]['env'])
+    with open(os.path.join(PLUGDIR, plugfile)).read().replace('\n', '') as pfd:
+        add_all_items(
+            "from sos.report.plugins import ", plugs_data[plugname]['distros'],
+            pfd, wrapopen='', wrapclose='(class|from|import)'
+        )
+        add_all_items("profiles = ", plugs_data[plugname]['profiles'],
+                      pfd, wrapopen='')
+        add_all_items("packages = ", plugs_data[plugname]['packages'],
+                      pfd, wrapopen='')
+        add_all_items("add_copy_spec", plugs_data[plugname]['copyspecs'], pfd)
+        add_all_items("add_forbidden_path",
+                      plugs_data[plugname]['forbidden'], pfd)
+        add_all_items("add_cmd_output", plugs_data[plugname]['commands'], pfd)
+        add_all_items("collect_cmd_output",
+                      plugs_data[plugname]['commands'], pfd)
+        add_all_items("add_service_status",
+                      plugs_data[plugname]['service_status'], pfd)
+        add_all_items("add_journal", plugs_data[plugname]['journals'], pfd)
+        add_all_items("add_env_var", plugs_data[plugname]['env'], pfd)
 
 # print output; if "csv" is cmdline argument, print in CSV format, else JSON
 if (len(sys.argv) > 1) and (sys.argv[1] == "csv"):

diff --git a/pylintrc b/pylintrc
@@ -40,7 +40,6 @@ disable=
     W0719, # broad-exception-raised
     W1203, # logging-fstring-interpolation
     W1406, # redundant-u-string-prefix
-    R1732, # consider-using-with
     W1514, # unspecified-encoding
     W0107, # unnecessary-pass
     W0718, # broad-exception-caught

diff --git a/sos/archive.py b/sos/archive.py
@@ -738,22 +738,21 @@ def _build_archive(self, method):
             kwargs = {'compresslevel': 6}
         else:
             kwargs = {'preset': 3}
-        tar = tarfile.open(self._archive_name, mode=f"w:{_comp_mode}",
-                           **kwargs)
-        # add commonly reviewed files first, so that they can be more easily
-        # read from memory without needing to extract the whole archive
-        for _content in ['version.txt', 'sos_reports', 'sos_logs']:
-            if not os.path.exists(os.path.join(self._archive_root, _content)):
-                continue
-            tar.add(
-                os.path.join(self._archive_root, _content),
-                arcname=f"{self._name}/{_content}"
-            )
-        # we need to pass the absolute path to the archive root but we
-        # want the names used in the archive to be relative.
-        tar.add(self._archive_root, arcname=self._name,
-                filter=self.copy_permissions_filter)
-        tar.close()
+        with tarfile.open(self._archive_name, mode=f"w:{_comp_mode}",
+                          **kwargs) as tar:
+            # Add commonly reviewed files first, so that they can be more
+            # easily read from memory without needing to extract
+            # the whole archive
+            for _content in ['version.txt', 'sos_reports', 'sos_logs']:
+                if os.path.exists(os.path.join(self._archive_root, _content)):
+                    tar.add(
+                        os.path.join(self._archive_root, _content),
+                        arcname=f"{self._name}/{_content}"
+                    )
+            # we need to pass the absolute path to the archive root but we
+            # want the names used in the archive to be relative.
+            tar.add(self._archive_root, arcname=self._name,
+                    filter=self.copy_permissions_filter)
         self._suffix += f".{_comp_mode}"
         return self.name()
 

diff --git a/sos/cleaner/__init__.py b/sos/cleaner/__init__.py
@@ -782,20 +782,20 @@ def obfuscate_file(self, filename, short_name=None, arc_name=None):
                 return 0
             self.log_debug(f"Obfuscating {short_name or filename}",
                            caller=arc_name)
-            tfile = tempfile.NamedTemporaryFile(mode='w', dir=self.tmpdir)
-            with open(filename, 'r', errors='replace') as fname:
-                for line in fname:
-                    try:
-                        line, count = self.obfuscate_line(line, _parsers)
-                        subs += count
-                        tfile.write(line)
-                    except Exception as err:
-                        self.log_debug(f"Unable to obfuscate {short_name}: "
-                                       f"{err}", caller=arc_name)
-            tfile.seek(0)
-            if subs:
-                shutil.copyfile(tfile.name, filename)
-            tfile.close()
+            with tempfile.NamedTemporaryFile(mode='w', dir=self.tmpdir) \
+                    as tfile:
+                with open(filename, 'r', errors='replace') as fname:
+                    for line in fname:
+                        try:
+                            line, count = self.obfuscate_line(line, _parsers)
+                            subs += count
+                            tfile.write(line)
+                        except Exception as err:
+                            self.log_debug(f"Unable to obfuscate {short_name}:"
+                                           f"{err}", caller=arc_name)
+                tfile.seek(0)
+                if subs:
+                    shutil.copyfile(tfile.name, filename)
 
         _ob_short_name = self.obfuscate_string(short_name.split('/')[-1])
         _ob_filename = short_name.replace(short_name.split('/')[-1],

diff --git a/sos/cleaner/archives/__init__.py b/sos/cleaner/archives/__init__.py
@@ -23,17 +23,27 @@
 # process for extraction if this method is a part of the SoSObfuscationArchive
 # class. So, the simplest solution is to remove it from the class.
 def extract_archive(archive_path, tmpdir):
-    archive = tarfile.open(archive_path)
-    path = os.path.join(tmpdir, 'cleaner')
-    # set extract filter since python 3.12 (see PEP-706 for more)
-    # Because python 3.10 and 3.11 raises false alarms as exceptions
-    # (see #3330 for examples), we can't use data filter but must
-    # fully trust the archive (legacy behaviour)
-    archive.extraction_filter = getattr(tarfile, 'fully_trusted_filter',
-                                        (lambda member, path: member))
-    archive.extractall(path)
-    archive.close()
-    return os.path.join(path, archive.name.split('/')[-1].split('.tar')[0])
+    with tarfile.open(archive_path) as archive:
+        path = os.path.join(tmpdir, 'cleaner')
+        # set extract filter since python 3.12 (see PEP-706 for more)
+        # Because python 3.10 and 3.11 raises false alarms as exceptions
+        # (see #3330 for examples), we can't use data filter but must
+        # fully trust the archive (legacy behaviour)
+        archive.extraction_filter = getattr(tarfile, 'fully_trusted_filter',
+                                            (lambda member, path: member))
+
+        # Guard against "Arbitrary file write during tarfile extraction"
+        # Checks the extracted files don't stray out of the target directory.
+        for member in archive.getmembers():
+            member_path = os.path.join(path, member.name)
+            abs_directory = os.path.abspath(path)
+            abs_target = os.path.abspath(member_path)
+            prefix = os.path.commonprefix([abs_directory, abs_target])
+            if prefix != abs_directory:
+                raise Exception(f"Attempted path traversal in tarfle"
+                                f"{prefix} != {abs_directory}")
+            archive.extract(member, path)
+        return os.path.join(path, archive.name.split('/')[-1].split('.tar')[0])
 
 
 class SoSObfuscationArchive():
@@ -83,6 +93,7 @@ def is_insights(self):
 
     def _load_self(self):
         if self.is_tarfile:
+            # pylint: disable=consider-using-with
             self.tarobj = tarfile.open(self.archive_path)
 
     def get_nested_archives(self):
@@ -255,10 +266,9 @@ def build_tar_file(self, method):
             else:
                 compr_args = {'compresslevel': 6}
         self.log_debug(f"Building tar file {tarpath}")
-        tar = tarfile.open(tarpath, mode=mode, **compr_args)
-        tar.add(self.extracted_path,
-                arcname=os.path.split(self.archive_name)[1])
-        tar.close()
+        with tarfile.open(tarpath, mode=mode, **compr_args) as tar:
+            tar.add(self.extracted_path,
+                    arcname=os.path.split(self.archive_name)[1])
         return tarpath
 
     def compress(self, method):

diff --git a/sos/collector/transports/control_persist.py b/sos/collector/transports/control_persist.py
@@ -61,12 +61,12 @@ def _check_for_control_persist(self):
             True if ControlPersist is supported, else raise Exception.
         """
         ssh_cmd = ['ssh', '-o', 'ControlPersist']
-        cmd = subprocess.Popen(ssh_cmd, stdout=subprocess.PIPE,
-                               stderr=subprocess.PIPE)
-        _, err = cmd.communicate()
-        err = err.decode('utf-8')
-        if 'Bad configuration option' in err or 'Usage:' in err:
-            raise ControlPersistUnsupportedException
+        with subprocess.Popen(ssh_cmd, stdout=subprocess.PIPE,
+                              stderr=subprocess.PIPE) as cmd:
+            _, err = cmd.communicate()
+            err = err.decode('utf-8')
+            if 'Bad configuration option' in err or 'Usage:' in err:
+                raise ControlPersistUnsupportedException
         return True
 
     def _connect(self, password=''):  # pylint: disable=too-many-branches

diff --git a/sos/report/__init__.py b/sos/report/__init__.py
@@ -1505,24 +1505,22 @@ def _create_checksum(self, archive, hash_name):
 
         try:
             hash_size = 1024**2  # Hash 1MiB of content at a time.
-            archive_fp = open(archive, 'rb')
             digest = hashlib.new(hash_name)
-            while True:
-                hashdata = archive_fp.read(hash_size)
-                if not hashdata:
-                    break
-                digest.update(hashdata)
-            archive_fp.close()
+            with open(archive, 'rb') as archive_fp:
+                while True:
+                    hashdata = archive_fp.read(hash_size)
+                    if not hashdata:
+                        break
+                    digest.update(hashdata)
         except Exception:
             self.handle_exception()
         return digest.hexdigest()
 
     def _write_checksum(self, archive, hash_name, checksum):
         # store checksum into file
-        fp = open(archive + "." + hash_name, "w")
-        if checksum:
-            fp.write(checksum + "\n")
-        fp.close()
+        with open(archive + "." + hash_name, "w") as fp:
+            if checksum:
+                fp.write(checksum + "\n")
 
     def final_work(self):
         archive = None    # archive path

diff --git a/sos/utilities.py b/sos/utilities.py
@@ -280,63 +280,61 @@ def _check_poller(proc):
         else:
             expanded_args.append(arg)
     if to_file:
-        _output = open(to_file, 'w')
+        _output = open(to_file, 'w')  # pylint: disable=consider-using-with
     else:
         _output = PIPE
     try:
-        p = Popen(expanded_args, shell=False, stdout=_output,
-                  stderr=STDOUT if stderr else PIPE,
-                  bufsize=-1, env=cmd_env, close_fds=True,
-                  preexec_fn=_child_prep_fn)
+        with Popen(expanded_args, shell=False, stdout=_output,
+                   stderr=STDOUT if stderr else PIPE,
+                   bufsize=-1, env=cmd_env, close_fds=True,
+                   preexec_fn=_child_prep_fn) as p:
 
-        if not to_file:
-            reader = AsyncReader(p.stdout, sizelimit, binary)
-        else:
-            reader = FakeReader(p, binary)
-
-        if poller:
-            while reader.running:
-                _check_poller(p)
-        else:
-            try:
-                # override timeout=0 to timeout=None, as Popen will treat the
-                # former as a literal 0-second timeout
-                p.wait(timeout if timeout else None)
-            except Exception:
-                p.terminate()
-                if to_file:
-                    _output.close()
-                # until we separate timeouts from the `timeout` command
-                # handle per-cmd timeouts via Plugin status checks
-                reader.running = False
-                return {'status': 124, 'output': reader.get_contents(),
-                        'truncated': reader.is_full}
-        if to_file:
-            _output.close()
+            if not to_file:
+                reader = AsyncReader(p.stdout, sizelimit, binary)
+            else:
+                reader = FakeReader(p, binary)
 
-        # wait for Popen to set the returncode
-        while p.poll() is None:
-            pass
+            if poller:
+                while reader.running:
+                    _check_poller(p)
+            else:
+                try:
+                    # override timeout=0 to timeout=None, as Popen will treat
+                    # the former as a literal 0-second timeout
+                    p.wait(timeout if timeout else None)
+                except Exception:
+                    p.terminate()
+                    if to_file:
+                        _output.close()
+                    # until we separate timeouts from the `timeout` command
+                    # handle per-cmd timeouts via Plugin status checks
+                    reader.running = False
+                    return {'status': 124, 'output': reader.get_contents(),
+                            'truncated': reader.is_full}
+            if to_file:
+                _output.close()
+
+            # wait for Popen to set the returncode
+            while p.poll() is None:
+                pass
 
-        stdout = reader.get_contents()
-        truncated = reader.is_full
+            if p.returncode in (126, 127):
+                stdout = b""
+            else:
+                stdout = reader.get_contents()
 
+            return {
+                'status': p.returncode,
+                'output': stdout,
+                'truncated': reader.is_full
+            }
     except OSError as e:
         if to_file:
             _output.close()
         if e.errno == errno.ENOENT:
             return {'status': 127, 'output': "", 'truncated': ''}
         raise e
 
-    if p.returncode in (126, 127):
-        stdout = b""
-
-    return {
-        'status': p.returncode,
-        'output': stdout,
-        'truncated': truncated
-    }
-
 
 def import_module(module_fqname, superclasses=None):
     """Imports the module module_fqname and returns a list of defined classes

diff --git a/tests/cleaner_tests/basic_function_tests/report_with_mask.py b/tests/cleaner_tests/basic_function_tests/report_with_mask.py
@@ -25,8 +25,8 @@ def pre_sos_setup(self):
         # obfuscate a random word from /etc/hosts and ensure the updated
         # sanitised file has same permissions (a+r)
         try:
-            self.hosts_obfuscated = open(
-                '/etc/hosts').read().strip('#\n').split()[-1]
+            with open('/etc/hosts') as fp:
+                self.obsfuncated = fp.read().strip('#\n').split()[-1]
         except (FileNotFoundError, IndexError) as e:
             self.warning(f"Unable to process /etc/hosts: {e}")
         if self.hosts_obfuscated: