Skip to content

Commit

Permalink
Add parameter to set if print json or not
Browse files Browse the repository at this point in the history
  • Loading branch information
cadosecurity committed Dec 4, 2023
1 parent 1ea0bda commit 2c8be99
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 34 deletions.
8 changes: 8 additions & 0 deletions cloudgrep/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,13 @@ def main() -> None:
help="Define custom list of properties to traverse to dynamically extract final list of log records. Used if --log_type is not defined. E.g. [""Records""]. ",
required=False
)
parser.add_argument(
"-jo",
"--json_output",
help="Output as JSON.",
required=False,
default=False
)
args = vars(parser.parse_args())

if len(sys.argv) == 1:
Expand Down Expand Up @@ -136,6 +143,7 @@ def main() -> None:
args["log_format"],
args["log_properties"],
args["profile"],
args["json_output"]
)


Expand Down
11 changes: 7 additions & 4 deletions cloudgrep/cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def download_from_s3_multithread(
yara_rules: Any,
log_format: Optional[str] = None,
log_properties: Optional[list[str]] = None,
json_output: Optional[bool] = False,
) -> int:
"""Use ThreadPoolExecutor and boto3 to download every file in the bucket from s3
Returns number of matched files"""
Expand All @@ -37,15 +38,15 @@ def download_file(key: str) -> None:
with tempfile.NamedTemporaryFile() as tmp:
logging.info(f"Downloading {bucket} {key} to {tmp.name}")
s3.download_file(bucket, key, tmp.name)
matched = Search().search_file(tmp.name, key, query, hide_filenames, yara_rules, log_format, log_properties)
matched = Search().search_file(tmp.name, key, query, hide_filenames, yara_rules, log_format, log_properties, json_output)
if matched:
nonlocal matched_count
matched_count += 1

# Use ThreadPoolExecutor to download the files
with concurrent.futures.ThreadPoolExecutor() as executor: # type: ignore
executor.map(download_file, files)
# For logging, single thread:
# For debugging, run in a single thread for clearer logging:
# for file in files:
# download_file(file)

Expand All @@ -61,6 +62,7 @@ def download_from_azure(
yara_rules: Any,
log_format: str,
log_properties: Optional[list[str]] = None,
json_output: Optional[bool] = False,
) -> int:
"""Download every file in the container from azure
Returns number of matched files"""
Expand All @@ -80,7 +82,7 @@ def download_file(key: str) -> None:
with open(tmp.name, "wb") as my_blob:
blob_data = blob_client.download_blob()
blob_data.readinto(my_blob)
matched = Search().search_file(tmp.name, key, query, hide_filenames, yara_rules, log_format, log_properties)
matched = Search().search_file(tmp.name, key, query, hide_filenames, yara_rules, log_format, log_properties, json_output)
if matched:
nonlocal matched_count
matched_count += 1
Expand All @@ -102,6 +104,7 @@ def download_from_google(
yara_rules: Any,
log_format: str,
log_properties: Optional[list[str]] = None,
json_output: Optional[bool] = False,
) -> int:
"""Download every file in the bucket from google
Returns number of matched files"""
Expand All @@ -115,7 +118,7 @@ def download_file(key: str) -> None:
logging.info(f"Downloading {bucket} {key} to {tmp.name}")
blob = bucket_gcp.get_blob(key)
blob.download_to_filename(tmp.name)
matched = Search().search_file(tmp.name, key, query, hide_filenames, yara_rules, log_format, log_properties)
matched = Search().search_file(tmp.name, key, query, hide_filenames, yara_rules, log_format, log_properties, json_output)
if matched:
nonlocal matched_count
matched_count += 1
Expand Down
15 changes: 4 additions & 11 deletions cloudgrep/cloudgrep.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def search(
log_format: Optional[str] = None,
log_properties: Optional[list[str]] = None,
profile: Optional[str] = None,
json_output: Optional[bool] = False,
) -> None:
# load in a list of queries from a file
if not query and file:
Expand All @@ -44,14 +45,6 @@ def search(
case "cloudtrail":
log_format = "json"
log_properties = ["Records"]
# TODO: add and test Azure and other log_type mappings
# case "azure":
# log_format = "json"
# log_properties = []
# TODO: add and test Azure and other log_type mappings
# case "gcp":
# log_format = "json"
# log_properties = []
case _:
logging.error(f"Invalid log_type value ('{log_type}') unhandled in switch statement in 'search' function.")

Expand Down Expand Up @@ -86,7 +79,7 @@ def search(
else:
print(f"Bucket is in region: {region['LocationConstraint']} : Search from the same region to avoid egress charges.")
print(f"Searching {len(matching_keys)} files in {bucket} for {query}...")
Cloud().download_from_s3_multithread(bucket, matching_keys, query, hide_filenames, yara_rules, log_format, log_properties)
Cloud().download_from_s3_multithread(bucket, matching_keys, query, hide_filenames, yara_rules, log_format, log_properties, json_output)

if account_name and container_name:
matching_keys = list(
Expand All @@ -95,7 +88,7 @@ def search(
)
)
print(f"Searching {len(matching_keys)} files in {account_name}/{container_name} for {query}...")
Cloud().download_from_azure(account_name, container_name, matching_keys, query, hide_filenames, yara_rules, log_format, log_properties)
Cloud().download_from_azure(account_name, container_name, matching_keys, query, hide_filenames, yara_rules, log_format, log_properties, json_output)

if google_bucket:
matching_keys = list(
Expand All @@ -104,4 +97,4 @@ def search(

print(f"Searching {len(matching_keys)} files in {google_bucket} for {query}...")

Cloud().download_from_google(google_bucket, matching_keys, query, hide_filenames, yara_rules, log_format, log_properties)
Cloud().download_from_google(google_bucket, matching_keys, query, hide_filenames, yara_rules, log_format, log_properties, json_output)
47 changes: 31 additions & 16 deletions cloudgrep/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,26 @@ def get_all_strings_line(self, file_path: str) -> List[str]:
string_list = b.split("\r")
return string_list

def print_match(self, matched_line_dict: dict, hide_filenames: bool) -> None:
def print_match(self, matched_line_dict: dict, hide_filenames: bool, json_output: bool) -> None:
"""Print matched line"""
if hide_filenames:
matched_line_dict.pop("key_name")
try:
print(json.dumps(matched_line_dict))
except TypeError:
print(str(matched_line_dict))
if json_output:
if hide_filenames:
matched_line_dict.pop("key_name")
try:
print(json.dumps(matched_line_dict))
except TypeError:
print(str(matched_line_dict))
else:
line = ""
if "line" in matched_line_dict:
line = matched_line_dict['line']
if "match_rule" in matched_line_dict:
line = f"{matched_line_dict['match_rule']}: {matched_line_dict['match_strings']}"

if not hide_filenames:
print(f"{matched_line_dict['key_name']}: {line}")
else:
print(line)

def search_logs(
self,
Expand All @@ -37,6 +49,7 @@ def search_logs(
hide_filenames: bool,
log_format: Optional[str] = None,
log_properties: Optional[list[str]] = None,
json_output: Optional[bool] = False,
):
"""Regex search of each log record in input line"""
# Parse input line based on defined format.
Expand Down Expand Up @@ -72,7 +85,7 @@ def search_logs(
"key_name": key_name,
"line" : record
}
self.print_match(matched_line_dict, hide_filenames)
self.print_match(matched_line_dict, hide_filenames, json_output)

def search_line(
self,
Expand All @@ -82,21 +95,22 @@ def search_line(
line: str,
log_format: Optional[str],
log_properties: Optional[list[str]] = None,
json_output: Optional[bool] = False,
) -> bool:
"""Regex search of the line"""
if re.search(search, line):
if log_format != None:
self.search_logs(line, key_name, search, hide_filenames, log_format, log_properties)
self.search_logs(line, key_name, search, hide_filenames, log_format, log_properties, json_output)
else:
matched_line_dict = {
"key_name": key_name,
"line" : line
}
self.print_match(matched_line_dict, hide_filenames)
self.print_match(matched_line_dict, hide_filenames, json_output)
return True
return False

def yara_scan_file(self, file_name: str, key_name: str, hide_filenames: bool, yara_rules: Any) -> bool: # type: ignore
def yara_scan_file(self, file_name: str, key_name: str, hide_filenames: bool, yara_rules: Any, json_output: Optional[bool] = False) -> bool: # type: ignore
matched = False
matches = yara_rules.match(file_name)
if matches:
Expand All @@ -106,7 +120,7 @@ def yara_scan_file(self, file_name: str, key_name: str, hide_filenames: bool, ya
"match_rule": match.rule,
"match_strings": match.strings
}
self.print_match(matched_line_dict, hide_filenames)
self.print_match(matched_line_dict, hide_filenames, json_output)
matched = True
return matched

Expand All @@ -119,18 +133,19 @@ def search_file(
yara_rules: Any,
log_format: Optional[str] = None,
log_properties: Optional[list[str]] = None,
json_output: Optional[bool] = False,
) -> bool:
"""Regex search of the file line by line"""
matched = False
logging.info(f"Searching {file_name} for {search}")

if yara_rules:
matched = self.yara_scan_file(file_name, key_name, hide_filenames, yara_rules)
matched = self.yara_scan_file(file_name, key_name, hide_filenames, yara_rules, json_output)
else:
if key_name.endswith(".gz"):
with gzip.open(file_name, "rt") as f:
for line in f:
if self.search_line(key_name, search, hide_filenames, line, log_format, log_properties):
if self.search_line(key_name, search, hide_filenames, line, log_format, log_properties, json_output):
matched = True
elif key_name.endswith(".zip"):
with tempfile.TemporaryDirectory() as tempdir:
Expand All @@ -142,11 +157,11 @@ def search_file(
if os.path.isfile(os.path.join(tempdir, filename)):
with open(os.path.join(tempdir, filename)) as f:
for line in f:
if self.search_line("{key_name}/{filename}", search, hide_filenames, line, log_format, log_properties):
if self.search_line("{key_name}/{filename}", search, hide_filenames, line, log_format, log_properties, json_output):
matched = True
else:
for line in self.get_all_strings_line(file_name):
if self.search_line(key_name, search, hide_filenames, line, log_format, log_properties):
if self.search_line(key_name, search, hide_filenames, line, log_format, log_properties, json_output):
matched = True

return matched
7 changes: 4 additions & 3 deletions tests/test_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def test_yara(self) -> None:

# Act
with patch("sys.stdout", new=StringIO()) as fake_out:
matched = search.yara_scan_file(file_name, key_name, hide_filenames, yara_rules)
matched = search.yara_scan_file(file_name, key_name, hide_filenames, yara_rules, True)
output = fake_out.getvalue().strip()

# Assert
Expand All @@ -145,7 +145,7 @@ def test_json_output(self) -> None:

# Act
with patch("sys.stdout", new=StringIO()) as fake_out:
found = Search().search_file(f"{BASE_PATH}/data/000000.gz", "000000.gz", "Running on machine", False, None)
found = Search().search_file(f"{BASE_PATH}/data/000000.gz", "000000.gz", "Running on machine", False, None, None, None, True)
output = fake_out.getvalue().strip()

# Assert we can parse the output
Expand All @@ -166,8 +166,9 @@ def test_search_cloudtrail(self) -> None:
found = Search().search_file(f"{BASE_PATH}/data/cloudtrail.json", "cloudtrail.json", "Running on machine", False, None, log_format, log_properties)
# Get the output for a hit
with patch("sys.stdout", new=StringIO()) as fake_out:
found = Search().search_file(f"{BASE_PATH}/data/cloudtrail_singleline.json", "cloudtrail_singleline.json", "SignatureVersion", False, None, log_format, log_properties)
found = Search().search_file(f"{BASE_PATH}/data/cloudtrail_singleline.json", "cloudtrail_singleline.json", "SignatureVersion", False, None, log_format, log_properties, True)
output = fake_out.getvalue().strip()

# Assert we can parse the output
self.assertIn("SignatureVersion", output)
self.assertTrue(json.loads(output))

0 comments on commit 2c8be99

Please sign in to comment.