Update flake8 configuration

* Remove language version settings in pre-commit config * Remove flake8-import-order tox setting * Update tox configuration * Add flake8-bugbear * Add flake8-comprehensions
artefactual · Oct 24, 2023 · e68f7a1 · e68f7a1
1 parent 26115c2
commit e68f7a1
Show file tree

Hide file tree

Showing 66 changed files with 230 additions and 190 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -29,9 +29,10 @@ repos:
   hooks:
   - id: black
     args: [--safe, --quiet]
-    language_version: python3
 - repo: https://github.com/pycqa/flake8
   rev: "6.1.0"
   hooks:
   - id: flake8
-    language_version: python3
+    additional_dependencies:
+    - flake8-bugbear==23.9.16
+    - flake8-comprehensions==3.14.0
diff --git a/src/MCPClient/lib/clientScripts/archivematica_clamscan.py b/src/MCPClient/lib/clientScripts/archivematica_clamscan.py
@@ -122,10 +122,9 @@ def scan(self, path):
             state, details = result[result_key]
         except Exception as err:
             passed = ClamdScanner.clamd_exception_handler(err)
-        finally:
-            if state == "OK":
-                passed = True
-            return passed, state, details
+        if state == "OK":
+            passed = True
+        return passed, state, details
 
     @staticmethod
     def clamd_exception_handler(err):
@@ -297,7 +296,7 @@ def get_size(file_uuid, path):
     # Our fallback.
     try:
         return os.path.getsize(path)
-    except:
+    except Exception:
         return None
 
 
@@ -349,7 +348,7 @@ def scan_file(event_queue, file_uuid, path, date, task_uuid):
         else:
             passed, state, details = None, None, None
 
-    except:
+    except Exception:
         logger.error("Unexpected error scanning file %s", path, exc_info=True)
         return 1
     else:

diff --git a/src/MCPClient/lib/clientScripts/check_for_access_directory.py b/src/MCPClient/lib/clientScripts/check_for_access_directory.py
@@ -166,7 +166,7 @@ def call(jobs):
                         os.mkdir(DIPDirectory)
                     if not os.path.isdir(os.path.join(DIPDirectory, "objects")):
                         os.mkdir(os.path.join(DIPDirectory, "objects"))
-                except:
+                except Exception:
                     job.pyprint("error creating DIP directory")
 
                 exitCode = main(

diff --git a/src/MCPClient/lib/clientScripts/check_for_service_directory.py b/src/MCPClient/lib/clientScripts/check_for_service_directory.py
@@ -32,7 +32,7 @@ def something(job, SIPDirectory, serviceDirectory, objectsDirectory, SIPUUID, da
     exitCode = 0
     job.pyprint(SIPDirectory)
     # For every file, & directory Try to find the matching file & directory in the objects directory
-    for path, dirs, files in os.walk(serviceDirectory):
+    for path, _, files in os.walk(serviceDirectory):
         for file in files:
             servicePreExtension = "_me"
             originalPreExtension = "_m"
@@ -79,7 +79,7 @@ def regular(SIPDirectory, objectsDirectory, SIPUUID, date):
     if not searchForRegularExpressions:
         return
 
-    for path, dirs, files in os.walk(objectsDirectory):
+    for path, _, files in os.walk(objectsDirectory):
         for file in files:
             m = re.search(r"_me\.[a-zA-Z0-9]{2,4}$", file)
             if m is not None:

diff --git a/src/MCPClient/lib/clientScripts/create_transfer_mets.py b/src/MCPClient/lib/clientScripts/create_transfer_mets.py
@@ -256,11 +256,11 @@ def load_rights_data_from_db(self):
         )
 
         for rights in transfer_rights:
-            for path, fsentry in self.file_index.items():
+            for _, fsentry in self.file_index.items():
                 premis_rights = rights_to_premis(rights, fsentry.file_uuid)
                 fsentry.add_premis_rights(premis_rights)
 
-        for path, fsentry in self.file_index.items():
+        for _, fsentry in self.file_index.items():
             file_rights = self.rights_queryset.filter(
                 metadataappliestoidentifier=fsentry.file_uuid,
                 metadataappliestotype_id=self.FILE_RIGHTS_LOOKUP_UUID,

diff --git a/src/MCPClient/lib/clientScripts/email_fail_report.py b/src/MCPClient/lib/clientScripts/email_fail_report.py
@@ -57,7 +57,7 @@ def send_email(subject, to, content):
             recipient_list=to,
             html_message=content,
         )
-    except:
+    except Exception:
         logger.exception("Report email was not delivered")
         raise
     else:
@@ -171,7 +171,7 @@ def get_content_for(unit_type, unit_name, unit_uuid, html=True):
         else:
             root.append(t1)
             etree.SubElement(root, "p")
-    except:
+    except Exception:
         pass
 
     html2code = get_unit_job_log_html(unit_uuid)
@@ -255,7 +255,7 @@ def call(jobs):
 
     # Generate report in plain text and store it in the database
     with transaction.atomic():
-        for arg in reports_to_store:
+        for args in reports_to_store:
             content = get_content_for(
                 args.unit_type, args.unit_name, args.unit_uuid, html=False
             )

diff --git a/src/MCPClient/lib/clientScripts/extract_contents.py b/src/MCPClient/lib/clientScripts/extract_contents.py
@@ -131,7 +131,7 @@ def main(job, transfer_uuid, sip_directory, date, task_uuid, delete=False):
         try:
             format_id = FileFormatVersion.objects.get(file_uuid=file_.uuid)
         # Can't do anything if the file wasn't identified in the previous step
-        except:
+        except Exception:
             job.pyprint(
                 "Not extracting contents from",
                 os.path.basename(file_.currentlocation.decode()),

diff --git a/src/MCPClient/lib/clientScripts/extract_maildir_attachments.py b/src/MCPClient/lib/clientScripts/extract_maildir_attachments.py
@@ -46,7 +46,7 @@ def __init__(self):
 def writeFile(filePath, fileContents):
     try:
         os.makedirs(os.path.dirname(filePath))
-    except:
+    except Exception:
         pass
     FILE = open(filePath, "w")
     FILE.writelines(fileContents)
@@ -59,9 +59,11 @@ def addFile(
     transferUUID,
     date,
     eventDetail="",
-    fileUUID=uuid.uuid4().__str__(),
+    fileUUID=None,
 ):
-    taskUUID = uuid.uuid4().__str__()
+    if fileUUID is None:
+        fileUUID = str(uuid.uuid4())
+    taskUUID = str(uuid.uuid4())
     filePathRelativeToSIP = filePath.replace(transferPath, "%transferDirectory%", 1)
     addFileToTransfer(
         filePathRelativeToSIP,
@@ -72,7 +74,7 @@ def addFile(
         sourceType="unpacking",
         eventDetail=eventDetail,
     )
-    updateSizeAndChecksum(fileUUID, filePath, date, uuid.uuid4.__str__())
+    updateSizeAndChecksum(fileUUID, filePath, date, str(uuid.uuid4()))
 
 
 def getFileUUIDofSourceFile(transferUUID, sourceFilePath):
@@ -93,8 +95,10 @@ def addKeyFileToNormalizeMaildirOffOf(
     transferUUID,
     date,
     eventDetail="",
-    fileUUID=uuid.uuid4().__str__(),
+    fileUUID=None,
 ):
+    if fileUUID is None:
+        fileUUID = str(uuid.uuid4())
     basename = os.path.basename(mirrorDir)
     dirname = os.path.dirname(mirrorDir)
     outFile = os.path.join(dirname, basename + ".archivematicaMaildir")
@@ -248,7 +252,7 @@ def handle_job(job):
         mirrorDir = os.path.join(transferDir, "objects/attachments", maildirsub)
         try:
             os.makedirs(mirrorDir)
-        except:
+        except Exception:
             pass
         eventDetail = "added for normalization purposes"
         fileUUID = uuid.uuid4().__str__()

diff --git a/src/MCPClient/lib/clientScripts/fits.py b/src/MCPClient/lib/clientScripts/fits.py
@@ -88,7 +88,7 @@ def main(target, xml_file, date, event_uuid, file_uuid, file_grpuse):
 
         try:
             tree = etree.parse(temp_file)
-        except:
+        except Exception:
             logger.exception("Failed to read Fits's XML.")
             return 2
 

diff --git a/src/MCPClient/lib/clientScripts/is_maildir_aip.py b/src/MCPClient/lib/clientScripts/is_maildir_aip.py
@@ -31,7 +31,7 @@ def isMaildir(path):
         for maildirsub2 in os.listdir(maildir):
             maildirsub = os.path.join(maildir, maildirsub2)
             mailbox.Maildir(maildirsub, None)
-    except:
+    except Exception:
         return False
     return True
 

diff --git a/src/MCPClient/lib/clientScripts/json_metadata_to_csv.py b/src/MCPClient/lib/clientScripts/json_metadata_to_csv.py
@@ -30,7 +30,7 @@ def fetch_keys(objects):
     # Column order is important so the output is consistent.
     # "filename" and "parts" must be column 0.
     # (They are mutually exclusive.)
-    keys = sorted(list(keys))
+    keys = sorted(keys)
     if "filename" in keys:
         keys.remove("filename")
         keys.insert(0, "filename")

diff --git a/src/MCPClient/lib/clientScripts/manual_normalization_move_access_files_to_dip.py b/src/MCPClient/lib/clientScripts/manual_normalization_move_access_files_to_dip.py
@@ -141,7 +141,7 @@ def main(job):
     try:
         if not os.path.isdir(dstDir):
             os.makedirs(dstDir)
-    except:
+    except Exception:
         pass
 
     # Rename the file or directory src to dst. If dst is a directory, OSError will be raised. On Unix, if dst exists and is a file, it will be replaced silently if the user has permission. The operation may fail on some Unix flavors if src and dst are on different filesystems.

diff --git a/src/MCPClient/lib/clientScripts/manual_normalization_remove_mn_directories.py b/src/MCPClient/lib/clientScripts/manual_normalization_remove_mn_directories.py
@@ -32,7 +32,7 @@
 
 def recursivelyRemoveEmptyDirectories(job, dir):
     error_count = 0
-    for root, dirs, files in os.walk(dir, topdown=False):
+    for root, dirs, _ in os.walk(dir, topdown=False):
         for directory in dirs:
             try:
                 os.rmdir(os.path.join(root, directory))

diff --git a/src/MCPClient/lib/clientScripts/normalize_report.py b/src/MCPClient/lib/clientScripts/normalize_report.py
@@ -184,7 +184,7 @@ def report(uuid):
             recipient_list=recipient_list,
             html_message=html_message,
         )
-    except:
+    except Exception:
         logger.exception("Report email was not delivered")
         return 1
     else:

diff --git a/src/MCPClient/lib/clientScripts/restructure_dip_for_content_dm_upload.py b/src/MCPClient/lib/clientScripts/restructure_dip_for_content_dm_upload.py
@@ -82,9 +82,9 @@ def getItemCountType(structMap):
     """
     divs_with_dmdsecs = structMap.findall(".//mets:div[@DMDID]", namespaces=ns.NSMAP)
     # If any are TYPE Directory, then it is compound
-    if any([e.get("TYPE") == "Directory" for e in divs_with_dmdsecs]):
+    if any(e.get("TYPE") == "Directory" for e in divs_with_dmdsecs):
         # If all are TYPE Directory then it is bulk
-        if all([e.get("TYPE") == "Directory" for e in divs_with_dmdsecs]):
+        if all(e.get("TYPE") == "Directory" for e in divs_with_dmdsecs):
             return "compound-dirs"
         else:
             return "compound-files"

diff --git a/src/MCPClient/lib/clientScripts/save_dublin_core.py b/src/MCPClient/lib/clientScripts/save_dublin_core.py
@@ -34,7 +34,9 @@ def main(job, transfer_uuid, target_path):
     jsonified = {}
     try:
         dc = models.DublinCore.objects.get(metadataappliestoidentifier=transfer_uuid)
-    except:  # There may not be any DC metadata for this transfer, and that's fine
+    except (
+        Exception
+    ):  # There may not be any DC metadata for this transfer, and that's fine
         job.pyprint("No DC metadata found; skipping", file=sys.stderr)
         return 0
     for field in FIELDS:

diff --git a/src/MCPClient/lib/clientScripts/set_maildir_file_grp_use_and_file_ids.py b/src/MCPClient/lib/clientScripts/set_maildir_file_grp_use_and_file_ids.py
@@ -64,7 +64,7 @@ def set_maildir_files(sip_uuid, sip_path):
         maildir_path,
         sip_uuid,
     )
-    for root, dirs, files in os.walk(maildir_path):
+    for root, _, files in os.walk(maildir_path):
         for item in files:
             file_relative_path = os.path.join(root, item).replace(
                 sip_path, "%SIPDirectory%", 1
@@ -84,7 +84,7 @@ def set_archivematica_maildir_files(sip_uuid, sip_path):
         attachments_path,
         sip_uuid,
     )
-    for root, dirs, files in os.walk(attachments_path):
+    for root, _, files in os.walk(attachments_path):
         for item in files:
             if not item.endswith(".archivematicaMaildir"):
                 continue

diff --git a/src/MCPClient/lib/clientScripts/trim_create_rights_entries.py b/src/MCPClient/lib/clientScripts/trim_create_rights_entries.py
@@ -109,7 +109,7 @@ def call(jobs):
                     try:
                         tree = etree.parse(xmlFilePath)
                         root = tree.getroot()
-                    except:
+                    except Exception:
                         job.pyprint(
                             "Error parsing: ",
                             xmlFilePath.replace(transferPath, "%transferDirectory%", 1),
@@ -122,7 +122,7 @@ def call(jobs):
                             "Container/RetentionSchedule"
                         ).text
                         DateClosed = root.find("Container/DateClosed").text
-                    except:
+                    except Exception:
                         job.pyprint(
                             "Error retrieving values from: ",
                             xmlFilePath.replace(transferPath, "%transferDirectory%", 1),

diff --git a/src/MCPClient/lib/clientScripts/trim_restructure_for_compliance.py b/src/MCPClient/lib/clientScripts/trim_restructure_for_compliance.py
@@ -107,7 +107,7 @@ def restructureTRIMForComplianceFileUUIDsAssigned(
             files = fileOperations.getFileUUIDLike(
                 dst, unitPath, unitIdentifier, unitIdentifierType, unitPathReplaceWith
             )
-            for key, value in files.items():
+            for value in files.values():
                 fileUUID = value
                 fileOperations.updateFileGrpUse(fileUUID, "TRIM metadata")
 

diff --git a/src/MCPClient/lib/clientScripts/trim_verify_checksums.py b/src/MCPClient/lib/clientScripts/trim_verify_checksums.py
@@ -67,7 +67,7 @@ def call(jobs):
                             root = tree.getroot()
 
                             xmlMD5 = root.find("Document/MD5").text
-                        except:
+                        except Exception:
                             job.pyprint("Error parsing: ", xmlFilePath, file=sys.stderr)
                             exitCode += 1
                             continue
@@ -88,14 +88,14 @@ def call(jobs):
                                 "transfer",
                                 "%transferDirectory%",
                             )
-                            for path, fileUUID in fileID.items():
+                            for fileUUID in fileID.values():
                                 eventDetail = 'program="python"; module="hashlib.md5()"'
                                 eventOutcome = "Pass"
                                 eventOutcomeDetailNote = "{} {}".format(
                                     xmlFile.__str__(),
                                     "verified",
                                 )
-                                eventIdentifierUUID = uuid.uuid4().__str__()
+                                eventIdentifierUUID = str(uuid.uuid4())
 
                                 databaseFunctions.insertIntoEvents(
                                     fileUUID=fileUUID,

diff --git a/src/MCPClient/lib/clientScripts/trim_verify_manifest.py b/src/MCPClient/lib/clientScripts/trim_verify_manifest.py
@@ -118,11 +118,11 @@ def call(jobs):
                                     file=sys.stderr,
                                 )
                                 exitCode += 1
-                            for paths, fileUUID in fileID.items():
+                            for fileUUID in fileID.values():
                                 eventDetail = 'program="archivematica"; module="trimVerifyManifest"'
                                 eventOutcome = "Pass"
                                 eventOutcomeDetailNote = "Verified file exists"
-                                eventIdentifierUUID = uuid.uuid4().__str__()
+                                eventIdentifierUUID = str(uuid.uuid4())
                                 databaseFunctions.insertIntoEvents(
                                     fileUUID=fileUUID,
                                     eventIdentifierUUID=eventIdentifierUUID,
@@ -158,11 +158,11 @@ def call(jobs):
                                         file=sys.stderr,
                                     )
                                     exitCode += 1
-                                for paths, fileUUID in fileID.items():
+                                for fileUUID in fileID.values():
                                     eventDetail = 'program="archivematica"; module="trimVerifyManifest"'
                                     eventOutcome = "Pass"
                                     eventOutcomeDetailNote = "Verified file exists, but with implicit extension case"
-                                    eventIdentifierUUID = uuid.uuid4().__str__()
+                                    eventIdentifierUUID = str(uuid.uuid4())
                                     databaseFunctions.insertIntoEvents(
                                         fileUUID=fileUUID,
                                         eventIdentifierUUID=eventIdentifierUUID,

diff --git a/src/MCPClient/lib/clientScripts/upload_archivesspace.py b/src/MCPClient/lib/clientScripts/upload_archivesspace.py
@@ -26,7 +26,7 @@
 
 
 def recursive_file_gen(mydir):
-    for root, dirs, files in os.walk(mydir):
+    for root, _, files in os.walk(mydir):
         for file in files:
             yield os.path.join(root, file)
 

diff --git a/src/MCPClient/lib/clientScripts/verify_checksum.py b/src/MCPClient/lib/clientScripts/verify_checksum.py
@@ -191,12 +191,9 @@ def get_ext(path):
     @staticmethod
     def _count_lines(path):
         """Count the number of lines in a checksum file."""
-        count = 0
         with open(path) as hashfile:
-            for count, _ in enumerate(hashfile):
-                pass
-        # Negate zero-based count.
-        return count + 1
+            count = sum(1 for _ in hashfile)
+        return count
 
     @staticmethod
     def _count_files(path):

diff --git a/src/MCPClient/lib/clientScripts/verify_sip_compliance.py b/src/MCPClient/lib/clientScripts/verify_sip_compliance.py
@@ -33,7 +33,7 @@
 
 def checkDirectory(job, directory, ret=0):
     try:
-        for directory, subDirectories, files in os.walk(directory):
+        for _, _, files in os.walk(directory):
             for file in files:
                 os.path.join(directory, file)
     except Exception as inst:

diff --git a/src/MCPClient/lib/fork_runner.py b/src/MCPClient/lib/fork_runner.py
@@ -31,11 +31,13 @@
 THIS_SCRIPT = "fork_runner.py"
 
 
-def call(module_name, jobs, task_count=multiprocessing.cpu_count()):
+def call(module_name, jobs, task_count=None):
     """
     Split `jobs` into `task_count` groups and fork a subprocess to run
     `module_name`.call() for each of them.
     """
+    if task_count is None:
+        task_count = multiprocessing.cpu_count()
     jobs_by_uuid = {}
     for job in jobs:
         jobs_by_uuid[job.UUID] = job