Add parameter to set if print json or not

cado-security · Dec 4, 2023 · 2c8be99 · 2c8be99
1 parent 1ea0bda
commit 2c8be99
Show file tree

Hide file tree

Showing 5 changed files with 54 additions and 34 deletions.
diff --git a/cloudgrep/__main__.py b/cloudgrep/__main__.py
@@ -106,6 +106,13 @@ def main() -> None:
         help="Define custom list of properties to traverse to dynamically extract final list of log records. Used if --log_type is not defined. E.g. [""Records""]. ",
         required=False
     )
+    parser.add_argument(
+    "-jo",
+    "--json_output",
+    help="Output as JSON.",
+    required=False,
+    default=False
+    )
     args = vars(parser.parse_args())
 
     if len(sys.argv) == 1:
@@ -136,6 +143,7 @@ def main() -> None:
         args["log_format"],
         args["log_properties"],
         args["profile"],
+        args["json_output"]
     )
 
 

diff --git a/cloudgrep/cloud.py b/cloudgrep/cloud.py
@@ -22,6 +22,7 @@ def download_from_s3_multithread(
         yara_rules: Any,
         log_format: Optional[str] = None,
         log_properties: Optional[list[str]] = None,
+        json_output: Optional[bool] = False,
     ) -> int:
         """Use ThreadPoolExecutor and boto3 to download every file in the bucket from s3
         Returns number of matched files"""
@@ -37,15 +38,15 @@ def download_file(key: str) -> None:
             with tempfile.NamedTemporaryFile() as tmp:
                 logging.info(f"Downloading {bucket} {key} to {tmp.name}")
                 s3.download_file(bucket, key, tmp.name)
-                matched = Search().search_file(tmp.name, key, query, hide_filenames, yara_rules, log_format, log_properties)
+                matched = Search().search_file(tmp.name, key, query, hide_filenames, yara_rules, log_format, log_properties, json_output)
                 if matched:
                     nonlocal matched_count
                     matched_count += 1
 
         # Use ThreadPoolExecutor to download the files
         with concurrent.futures.ThreadPoolExecutor() as executor:  # type: ignore
             executor.map(download_file, files)
-        # For logging, single thread:
+        # For debugging, run in a single thread for clearer logging:
         # for file in files:
         #    download_file(file)
 
@@ -61,6 +62,7 @@ def download_from_azure(
         yara_rules: Any,
         log_format: str,
         log_properties: Optional[list[str]] = None,
+        json_output: Optional[bool] = False,
     ) -> int:
         """Download every file in the container from azure
         Returns number of matched files"""
@@ -80,7 +82,7 @@ def download_file(key: str) -> None:
                     with open(tmp.name, "wb") as my_blob:
                         blob_data = blob_client.download_blob()
                         blob_data.readinto(my_blob)
-                    matched = Search().search_file(tmp.name, key, query, hide_filenames, yara_rules, log_format, log_properties)
+                    matched = Search().search_file(tmp.name, key, query, hide_filenames, yara_rules, log_format, log_properties, json_output)
                     if matched:
                         nonlocal matched_count
                         matched_count += 1
@@ -102,6 +104,7 @@ def download_from_google(
         yara_rules: Any,
         log_format: str,
         log_properties: Optional[list[str]] = None,
+        json_output: Optional[bool] = False,
     ) -> int:
         """Download every file in the bucket from google
         Returns number of matched files"""
@@ -115,7 +118,7 @@ def download_file(key: str) -> None:
                 logging.info(f"Downloading {bucket} {key} to {tmp.name}")
                 blob = bucket_gcp.get_blob(key)
                 blob.download_to_filename(tmp.name)
-                matched = Search().search_file(tmp.name, key, query, hide_filenames, yara_rules, log_format, log_properties)
+                matched = Search().search_file(tmp.name, key, query, hide_filenames, yara_rules, log_format, log_properties, json_output)
                 if matched:
                     nonlocal matched_count
                     matched_count += 1

diff --git a/cloudgrep/cloudgrep.py b/cloudgrep/cloudgrep.py
@@ -32,6 +32,7 @@ def search(
         log_format: Optional[str] = None,
         log_properties: Optional[list[str]] = None,
         profile: Optional[str] = None,
+        json_output: Optional[bool] = False,
     ) -> None:
         # load in a list of queries from a file
         if not query and file:
@@ -44,14 +45,6 @@ def search(
                 case "cloudtrail":
                     log_format = "json"
                     log_properties = ["Records"]
-                # TODO: add and test Azure and other log_type mappings
-                # case "azure":
-                #     log_format = "json"
-                #     log_properties = []
-                # TODO: add and test Azure and other log_type mappings
-                # case "gcp":
-                #     log_format = "json"
-                #     log_properties = []
                 case _:
                     logging.error(f"Invalid log_type value ('{log_type}') unhandled in switch statement in 'search' function.")
 
@@ -86,7 +79,7 @@ def search(
             else:
                 print(f"Bucket is in region: {region['LocationConstraint']} : Search from the same region to avoid egress charges.")
                 print(f"Searching {len(matching_keys)} files in {bucket} for {query}...")
-            Cloud().download_from_s3_multithread(bucket, matching_keys, query, hide_filenames, yara_rules, log_format, log_properties)
+            Cloud().download_from_s3_multithread(bucket, matching_keys, query, hide_filenames, yara_rules, log_format, log_properties, json_output)
 
         if account_name and container_name:
             matching_keys = list(
@@ -95,7 +88,7 @@ def search(
                 )
             )
             print(f"Searching {len(matching_keys)} files in {account_name}/{container_name} for {query}...")
-            Cloud().download_from_azure(account_name, container_name, matching_keys, query, hide_filenames, yara_rules, log_format, log_properties)
+            Cloud().download_from_azure(account_name, container_name, matching_keys, query, hide_filenames, yara_rules, log_format, log_properties, json_output)
 
         if google_bucket:
             matching_keys = list(
@@ -104,4 +97,4 @@ def search(
 
             print(f"Searching {len(matching_keys)} files in {google_bucket} for {query}...")
 
-            Cloud().download_from_google(google_bucket, matching_keys, query, hide_filenames, yara_rules, log_format, log_properties)
+            Cloud().download_from_google(google_bucket, matching_keys, query, hide_filenames, yara_rules, log_format, log_properties, json_output)
diff --git a/cloudgrep/search.py b/cloudgrep/search.py
@@ -20,14 +20,26 @@ def get_all_strings_line(self, file_path: str) -> List[str]:
             string_list = b.split("\r")
             return string_list
 
-    def print_match(self, matched_line_dict: dict, hide_filenames: bool) -> None:
+    def print_match(self, matched_line_dict: dict, hide_filenames: bool, json_output: bool) -> None:
         """Print matched line"""
-        if hide_filenames:
-            matched_line_dict.pop("key_name")
-        try:
-            print(json.dumps(matched_line_dict))
-        except TypeError:
-            print(str(matched_line_dict))
+        if json_output:
+            if hide_filenames:
+                matched_line_dict.pop("key_name")
+            try:
+                print(json.dumps(matched_line_dict))
+            except TypeError:
+                print(str(matched_line_dict))
+        else:
+            line = ""
+            if "line" in matched_line_dict:
+                line = matched_line_dict['line']
+            if "match_rule" in matched_line_dict:
+                line = f"{matched_line_dict['match_rule']}: {matched_line_dict['match_strings']}"
+
+            if not hide_filenames:
+                print(f"{matched_line_dict['key_name']}: {line}")
+            else:
+                print(line)
 
     def search_logs(
         self,
@@ -37,6 +49,7 @@ def search_logs(
         hide_filenames: bool,
         log_format: Optional[str] = None,
         log_properties: Optional[list[str]] = None,
+        json_output: Optional[bool] = False,
     ):
         """Regex search of each log record in input line"""
         # Parse input line based on defined format.
@@ -72,7 +85,7 @@ def search_logs(
                     "key_name": key_name,
                     "line" : record
                 }
-                self.print_match(matched_line_dict, hide_filenames)
+                self.print_match(matched_line_dict, hide_filenames, json_output)
 
     def search_line(
         self,
@@ -82,21 +95,22 @@ def search_line(
         line: str,
         log_format: Optional[str],
         log_properties: Optional[list[str]] = None,
+        json_output: Optional[bool] = False,
     ) -> bool:
         """Regex search of the line"""
         if re.search(search, line):
             if log_format != None:
-                self.search_logs(line, key_name, search, hide_filenames, log_format, log_properties)
+                self.search_logs(line, key_name, search, hide_filenames, log_format, log_properties, json_output)
             else:
                 matched_line_dict = {
                     "key_name": key_name,
                     "line" : line
                 }
-                self.print_match(matched_line_dict, hide_filenames)
+                self.print_match(matched_line_dict, hide_filenames, json_output)
             return True
         return False
 
-    def yara_scan_file(self, file_name: str, key_name: str, hide_filenames: bool, yara_rules: Any) -> bool:  # type: ignore
+    def yara_scan_file(self, file_name: str, key_name: str, hide_filenames: bool, yara_rules: Any, json_output: Optional[bool] = False) -> bool:  # type: ignore
         matched = False
         matches = yara_rules.match(file_name)
         if matches:
@@ -106,7 +120,7 @@ def yara_scan_file(self, file_name: str, key_name: str, hide_filenames: bool, ya
                     "match_rule": match.rule,
                     "match_strings": match.strings
                 }
-                self.print_match(matched_line_dict, hide_filenames)
+                self.print_match(matched_line_dict, hide_filenames, json_output)
                 matched = True
         return matched
 
@@ -119,18 +133,19 @@ def search_file(
         yara_rules: Any,
         log_format: Optional[str] = None,
         log_properties: Optional[list[str]] = None,
+        json_output: Optional[bool] = False,
     ) -> bool:
         """Regex search of the file line by line"""
         matched = False
         logging.info(f"Searching {file_name} for {search}")
 
         if yara_rules:
-            matched = self.yara_scan_file(file_name, key_name, hide_filenames, yara_rules)
+            matched = self.yara_scan_file(file_name, key_name, hide_filenames, yara_rules, json_output)
         else:
             if key_name.endswith(".gz"):
                 with gzip.open(file_name, "rt") as f:
                     for line in f:
-                        if self.search_line(key_name, search, hide_filenames, line, log_format, log_properties):
+                        if self.search_line(key_name, search, hide_filenames, line, log_format, log_properties, json_output):
                             matched = True
             elif key_name.endswith(".zip"):
                 with tempfile.TemporaryDirectory() as tempdir:
@@ -142,11 +157,11 @@ def search_file(
                             if os.path.isfile(os.path.join(tempdir, filename)):
                                 with open(os.path.join(tempdir, filename)) as f:
                                     for line in f:
-                                        if self.search_line("{key_name}/{filename}", search, hide_filenames, line, log_format, log_properties):
+                                        if self.search_line("{key_name}/{filename}", search, hide_filenames, line, log_format, log_properties, json_output):
                                             matched = True
             else:
                 for line in self.get_all_strings_line(file_name):
-                    if self.search_line(key_name, search, hide_filenames, line, log_format, log_properties):
+                    if self.search_line(key_name, search, hide_filenames, line, log_format, log_properties, json_output):
                         matched = True
 
         return matched
diff --git a/tests/test_unit.py b/tests/test_unit.py
@@ -130,7 +130,7 @@ def test_yara(self) -> None:
 
         # Act
         with patch("sys.stdout", new=StringIO()) as fake_out:
-            matched = search.yara_scan_file(file_name, key_name, hide_filenames, yara_rules)
+            matched = search.yara_scan_file(file_name, key_name, hide_filenames, yara_rules, True)
             output = fake_out.getvalue().strip()
 
         # Assert
@@ -145,7 +145,7 @@ def test_json_output(self) -> None:
 
         # Act
         with patch("sys.stdout", new=StringIO()) as fake_out:
-            found = Search().search_file(f"{BASE_PATH}/data/000000.gz", "000000.gz", "Running on machine", False, None)
+            found = Search().search_file(f"{BASE_PATH}/data/000000.gz", "000000.gz", "Running on machine", False, None, None, None, True)
             output = fake_out.getvalue().strip()
 
         # Assert we can parse the output
@@ -166,8 +166,9 @@ def test_search_cloudtrail(self) -> None:
         found = Search().search_file(f"{BASE_PATH}/data/cloudtrail.json", "cloudtrail.json", "Running on machine", False, None, log_format, log_properties)
         # Get the output for a hit
         with patch("sys.stdout", new=StringIO()) as fake_out:
-            found = Search().search_file(f"{BASE_PATH}/data/cloudtrail_singleline.json", "cloudtrail_singleline.json", "SignatureVersion", False, None, log_format, log_properties)
+            found = Search().search_file(f"{BASE_PATH}/data/cloudtrail_singleline.json", "cloudtrail_singleline.json", "SignatureVersion", False, None, log_format, log_properties, True)
             output = fake_out.getvalue().strip()
 
         # Assert we can parse the output
+        self.assertIn("SignatureVersion", output)
         self.assertTrue(json.loads(output))