diff --git a/.envrc.local.template b/.envrc.local.template index a196e5d3..4007b13c 100644 --- a/.envrc.local.template +++ b/.envrc.local.template @@ -44,5 +44,5 @@ export TEST_KEY="" # export AV_STATUS_SNS_PUBLISH_INFECTED # export AV_TIMESTAMP_METADATA # export CLAMAVLIB_PATH -# export CLAMSCAN_PATH +# export CLAMDSCAN_PATH # export FRESHCLAM_PATH diff --git a/Dockerfile b/Dockerfile index fc17f6dc..429e0c09 100644 --- a/Dockerfile +++ b/Dockerfile @@ -34,8 +34,12 @@ RUN yumdownloader -x \*i686 --archlist=x86_64 \ clamav \ clamav-lib \ clamav-update \ + clamav-scanner-systemd \ + elfutils-libs \ json-c \ + lz4 \ pcre2 \ + systemd-libs \ libtool-ltdl \ libxml2 \ bzip2-libs \ @@ -54,11 +58,21 @@ RUN rpm2cpio clamav-0*.rpm | cpio -vimd \ && rpm2cpio xz-libs*.rpm | cpio -vimd \ && rpm2cpio libprelude*.rpm | cpio -vimd \ && rpm2cpio gnutls*.rpm | cpio -vimd \ - && rpm2cpio nettle*.rpm | cpio -vimd + && rpm2cpio nettle*.rpm | cpio -vimd \ + && rpm2cpio clamd-0*.rpm | cpio -idmv \ + && rpm2cpio elfutils-libs*.rpm | cpio -idmv \ + && rpm2cpio lz4*.rpm | cpio -idmv \ + && rpm2cpio systemd-libs*.rpm | cpio -idmv # Copy over the binaries and libraries -RUN cp /tmp/usr/bin/clamscan /tmp/usr/bin/freshclam /tmp/usr/lib64/* /usr/lib64/libpcre.so.1 /opt/app/bin/ +RUN cp -r \ + /tmp/usr/bin/clamdscan \ + /tmp/usr/sbin/clamd \ + /tmp/usr/bin/freshclam \ + /tmp/usr/lib64/* \ + /usr/lib64/libpcre.so.1 \ + /opt/app/bin/ # Fix the freshclam.conf settings RUN echo "DatabaseMirror database.clamav.net" > /opt/app/bin/freshclam.conf \ @@ -66,11 +80,16 @@ RUN echo "DatabaseMirror database.clamav.net" > /opt/app/bin/freshclam.conf \ && echo "ScriptedUpdates no" >> /opt/app/bin/freshclam.conf \ && echo "DatabaseDirectory /var/lib/clamav" >> /opt/app/bin/freshclam.conf \ && echo "DetectPUA yes" >> /opt/app/bin/freshclam.conf \ - && echo "ExcludePUA PUA.Win.Packer" >> /opt/app/bin/freshclam.conf \ - && echo "ExcludePUA PUA.Win.Trojan.Packed" >> /opt/app/bin/freshclam.conf \ - && echo "ExcludePUA PUA.Win.Trojan.Molebox" >> /opt/app/bin/freshclam.conf \ - && echo "ExcludePUA PUA.Win.Packer.Upx" >> /opt/app/bin/freshclam.conf \ - && echo "ExcludePUA PUA.Doc.Packed" >> /opt/app/bin/freshclam.conf + && echo "DatabaseDirectory /tmp/clamav_defs" > /opt/app/bin/scan.conf \ + && echo "PidFile /tmp/clamd.pid" >> /opt/app/bin/scan.conf \ + && echo "LogFile /tmp/clamd.log" >> /opt/app/bin/scan.conf \ + && echo "LocalSocket /tmp/clamd.sock" >> /opt/app/bin/scan.conf \ + && echo "FixStaleSocket yes" >> /opt/app/bin/scan.conf \ + && echo "ExcludePUA PUA.Win.Packer" >> /opt/app/bin/scan.conf \ + && echo "ExcludePUA PUA.Win.Trojan.Packed" >> /opt/app/bin/scan.conf \ + && echo "ExcludePUA PUA.Win.Trojan.Molebox" >> /opt/app/bin/scan.conf \ + && echo "ExcludePUA PUA.Win.Packer.Upx" >> /opt/app/bin/scan.conf \ + && echo "ExcludePUA PUA.Doc.Packed" >> /opt/app/bin/scan.conf RUN groupadd clamav \ && useradd -g clamav -s /bin/false -c "Clam Antivirus" clamav \ diff --git a/README.md b/README.md index 7da043ec..aba98f84 100644 --- a/README.md +++ b/README.md @@ -81,32 +81,32 @@ can cause a continuous loop of scanning if improperly configured. Runtime configuration is accomplished using environment variables. See the table below for reference. -| Variable | Description | Default | Required | -|----------------------------------|-------------------------------------------------------------------------------------------------|------------------------|----------| -| AV_DEFINITION_S3_BUCKET | Bucket containing antivirus definition files | | Yes | -| AV_DEFINITION_S3_PREFIX | Prefix for antivirus definition files | clamav_defs | No | -| AV_DEFINITION_PATH | Path containing virus definition files at runtime | /tmp/clamav_defs | No | -| AV_SCAN_START_SNS_ARN | SNS topic ARN to publish notification about start of scan | | No | -| AV_SCAN_START_METADATA | The tag/metadata indicating the start of the scan | av-scan-start | No | -| AV_SIGNATURE_METADATA | The tag/metadata name representing file's AV type | av-signature | No | -| AV_STATUS_CLEAN | The value assigned to clean items inside of tags/metadata | CLEAN | No | -| AV_STATUS_INFECTED | The value assigned to clean items inside of tags/metadata | INFECTED | No | -| AV_STATUS_METADATA | The tag/metadata name representing file's AV status | av-status | No | -| AV_STATUS_SNS_ARN | SNS topic ARN to publish scan results (optional) | | No | -| AV_STATUS_SNS_PUBLISH_CLEAN | Publish AV_STATUS_CLEAN results to AV_STATUS_SNS_ARN | True | No | -| AV_STATUS_SNS_PUBLISH_INFECTED | Publish AV_STATUS_INFECTED results to AV_STATUS_SNS_ARN | True | No | -| AV_TIMESTAMP_METADATA | The tag/metadata name representing file's scan time | av-timestamp | No | -| AV_EXTRA_VIRUS_DEFINITIONS | Uses fangfrisch for extra antivirus definitions | False | No | -| CLAMAVLIB_PATH | Path to ClamAV library files | ./bin | No | -| CLAMSCAN_PATH | Path to ClamAV clamscan binary | ./bin/clamscan | No | -| FRESHCLAM_PATH | Path to ClamAV freshclam binary | ./bin/freshclam | No | -| DATADOG_API_KEY | API Key for pushing metrics to DataDog (optional) | | No | -| AV_PROCESS_ORIGINAL_VERSION_ONLY | Controls that only original version of an S3 key is processed (if bucket versioning is enabled) | False | No | -| AV_DELETE_INFECTED_FILES | Controls whether infected files should be automatically deleted | False | No | -| EVENT_SOURCE | The source of antivirus scan event "S3" or "SNS" (optional) | S3 | No | -| S3_ENDPOINT | The Endpoint to use when interacting wth S3 | None | No | -| SNS_ENDPOINT | The Endpoint to use when interacting wth SNS | None | No | -| LAMBDA_ENDPOINT | The Endpoint to use when interacting wth Lambda | None | No | +| Variable | Description | Default | Required | +|----------------------------------|-------------------------------------------------------------------------------------------------|------------------|----------| +| AV_DEFINITION_S3_BUCKET | Bucket containing antivirus definition files | | Yes | +| AV_DEFINITION_S3_PREFIX | Prefix for antivirus definition files | clamav_defs | No | +| AV_DEFINITION_PATH | Path containing virus definition files at runtime | /tmp/clamav_defs | No | +| AV_SCAN_START_SNS_ARN | SNS topic ARN to publish notification about start of scan | | No | +| AV_SCAN_START_METADATA | The tag/metadata indicating the start of the scan | av-scan-start | No | +| AV_SIGNATURE_METADATA | The tag/metadata name representing file's AV type | av-signature | No | +| AV_STATUS_CLEAN | The value assigned to clean items inside of tags/metadata | CLEAN | No | +| AV_STATUS_INFECTED | The value assigned to clean items inside of tags/metadata | INFECTED | No | +| AV_STATUS_METADATA | The tag/metadata name representing file's AV status | av-status | No | +| AV_STATUS_SNS_ARN | SNS topic ARN to publish scan results (optional) | | No | +| AV_STATUS_SNS_PUBLISH_CLEAN | Publish AV_STATUS_CLEAN results to AV_STATUS_SNS_ARN | True | No | +| AV_STATUS_SNS_PUBLISH_INFECTED | Publish AV_STATUS_INFECTED results to AV_STATUS_SNS_ARN | True | No | +| AV_TIMESTAMP_METADATA | The tag/metadata name representing file's scan time | av-timestamp | No | +| AV_EXTRA_VIRUS_DEFINITIONS | Uses fangfrisch for extra antivirus definitions | False | No | +| CLAMAVLIB_PATH | Path to ClamAV library files | ./bin | No | +| CLAMDSCAN_PATH | Path to ClamAV clamdscan binary | ./bin/clamdscan | No | +| FRESHCLAM_PATH | Path to ClamAV freshclam binary | ./bin/freshclam | No | +| DATADOG_API_KEY | API Key for pushing metrics to DataDog (optional) | | No | +| AV_PROCESS_ORIGINAL_VERSION_ONLY | Controls that only original version of an S3 key is processed (if bucket versioning is enabled) | False | No | +| AV_DELETE_INFECTED_FILES | Controls whether infected files should be automatically deleted | False | No | +| EVENT_SOURCE | The source of antivirus scan event "S3" or "SNS" (optional) | S3 | No | +| S3_ENDPOINT | The Endpoint to use when interacting wth S3 | None | No | +| SNS_ENDPOINT | The Endpoint to use when interacting wth SNS | None | No | +| LAMBDA_ENDPOINT | The Endpoint to use when interacting wth Lambda | None | No | ## S3 Bucket Policy Examples diff --git a/clamav.py b/clamav.py index 24a59e67..71fba2d4 100644 --- a/clamav.py +++ b/clamav.py @@ -19,23 +19,29 @@ import pwd import re import subprocess +import socket +import errno import boto3 import botocore from pytz import utc +from common import AV_DEFINITION_S3_BUCKET +from common import AV_DEFINITION_S3_PREFIX +from common import AV_DEFINITION_PATH from common import AV_DEFINITION_FILE_PREFIXES from common import AV_DEFINITION_FILE_SUFFIXES -from common import AV_DEFINITION_PATH from common import AV_SIGNATURE_OK from common import AV_SIGNATURE_UNKNOWN from common import AV_STATUS_CLEAN from common import AV_STATUS_INFECTED from common import CLAMAVLIB_PATH -from common import CLAMSCAN_PATH +from common import CLAMDSCAN_PATH from common import FRESHCLAM_PATH -from common import S3_ENDPOINT +from common import CLAMDSCAN_TIMEOUT from common import create_dir +from common import CLAMD_SOCKET + RE_SEARCH_DIR = r"SEARCH_DIR\(\"=([A-z0-9\/\-_]*)\"\)" @@ -53,7 +59,7 @@ def update_defs_from_s3(s3_client, bucket, prefix): s3_best_time = None for file_suffix in AV_DEFINITION_FILE_SUFFIXES: filename = file_prefix + "." + file_suffix - s3_path = os.path.join(prefix, filename) + s3_path = os.path.join(AV_DEFINITION_S3_PREFIX, filename) local_path = os.path.join(AV_DEFINITION_PATH, filename) s3_md5 = md5_from_s3_tags(s3_client, bucket, s3_path) s3_time = time_from_s3(s3_client, bucket, s3_path) @@ -89,7 +95,7 @@ def upload_defs_to_s3(s3_client, bucket, prefix, local_path): "Uploading %s to s3://%s" % (local_file_path, os.path.join(bucket, prefix, filename)) ) - s3 = boto3.resource("s3", endpoint_url=S3_ENDPOINT) + s3 = boto3.resource("s3") s3_object = s3.Object(bucket, os.path.join(prefix, filename)) s3_object.upload_file(os.path.join(local_path, filename)) s3_client.put_object_tagging( @@ -118,7 +124,7 @@ def update_defs_from_freshclam(path, library_path=""): fc_proc = subprocess.Popen( [ FRESHCLAM_PATH, - "--config-file=./bin/freshclam.conf", + "--config-file=%s/freshclam.conf" % CLAMAVLIB_PATH, "-u %s" % pwd.getpwuid(os.getuid())[0], "--datadir=%s" % path, ], @@ -186,24 +192,102 @@ def scan_output_to_json(output): def scan_file(path): av_env = os.environ.copy() av_env["LD_LIBRARY_PATH"] = CLAMAVLIB_PATH - print("Starting clamscan of %s." % path) + print("Starting clamdscan of %s." % path) av_proc = subprocess.Popen( - [CLAMSCAN_PATH, "-v", "-a", "--stdout", "-d", AV_DEFINITION_PATH, path], + [ + CLAMDSCAN_PATH, + "-v", + "--stdout", + "--config-file", + "%s/scan.conf" % CLAMAVLIB_PATH, + path, + ], stderr=subprocess.STDOUT, stdout=subprocess.PIPE, env=av_env, ) - output = av_proc.communicate()[0].decode() - print("clamscan output:\n%s" % output) - # Turn the output into a data source we can read - summary = scan_output_to_json(output) + try: + output, errors = av_proc.communicate(timeout=CLAMDSCAN_TIMEOUT) + except subprocess.TimeoutExpired: + av_proc.kill() + output, errors = av_proc.communicate() + + decoded_output = output.decode() + print("clamdscan output:\n%s" % decoded_output) + if av_proc.returncode == 0: return AV_STATUS_CLEAN, AV_SIGNATURE_OK elif av_proc.returncode == 1: + # Turn the output into a data source we can read + summary = scan_output_to_json(decoded_output) signature = summary.get(path, AV_SIGNATURE_UNKNOWN) return AV_STATUS_INFECTED, signature else: - msg = "Unexpected exit code from clamscan: %s.\n" % av_proc.returncode + msg = "Unexpected exit code from clamdscan: %s.\n" % av_proc.returncode + + if errors: + msg += "Errors: %s\n" % errors.decode() + print(msg) raise Exception(msg) + +def is_clamd_running(): + print("Checking if clamd is running on %s" % CLAMD_SOCKET) + + if os.path.exists(CLAMD_SOCKET): + with socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) as s: + s.settimeout(10) + s.connect(CLAMD_SOCKET) + s.send(b"PING") + try: + data = s.recv(32) + except (socket.timeout, socket.error) as e: + print("Failed to read from socket: %s\n" % e) + return False + + print("Received %s in response to PING" % repr(data)) + return data == b"PONG\n" + + print("Clamd is not running on %s" % CLAMD_SOCKET) + return False + +def start_clamd_daemon(): + s3 = boto3.resource("s3") + s3_client = boto3.client("s3") + + to_download = update_defs_from_s3( + s3_client, AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_PREFIX + ) + + for download in to_download.values(): + s3_path = download["s3_path"] + local_path = download["local_path"] + print("Downloading definition file %s from s3://%s" % (local_path, s3_path)) + s3.Bucket(AV_DEFINITION_S3_BUCKET).download_file(s3_path, local_path) + print("Downloading definition file %s complete!" % (local_path)) + + av_env = os.environ.copy() + av_env["LD_LIBRARY_PATH"] = CLAMAVLIB_PATH + + print("Starting clamd") + + if os.path.exists(CLAMD_SOCKET): + try: + os.unlink(CLAMD_SOCKET) + except OSError as e: + if e.errno != errno.ENOENT: + print("Could not unlink clamd socket %s" % CLAMD_SOCKET) + raise + + clamd_proc = subprocess.Popen( + ["%s/clamd" % CLAMAVLIB_PATH, "-c", "%s/scan.conf" % CLAMAVLIB_PATH], + env=av_env, + ) + + clamd_proc.wait() + + clamd_log_file = open("/tmp/clamd.log") + print(clamd_log_file.read()) + + return clamd_proc.pid diff --git a/common.py b/common.py index 335c6cce..740a20e3 100644 --- a/common.py +++ b/common.py @@ -55,8 +55,10 @@ def str_to_bool(s): AV_TIMESTAMP_METADATA = os.getenv("AV_TIMESTAMP_METADATA", "av-timestamp") AV_EXTRA_VIRUS_DEFINITIONS = str_to_bool(os.getenv("AV_EXTRA_VIRUS_DEFINITIONS", "False")) CLAMAVLIB_PATH = os.getenv("CLAMAVLIB_PATH", "./bin") -CLAMSCAN_PATH = os.getenv("CLAMSCAN_PATH", "./bin/clamscan") +CLAMDSCAN_PATH = os.getenv("CLAMDSCAN_PATH", "./bin/clamdscan") FRESHCLAM_PATH = os.getenv("FRESHCLAM_PATH", "./bin/freshclam") +CLAMDSCAN_TIMEOUT = os.getenv("CLAMDSCAN_TIMEOUT", 240) +CLAMD_SOCKET = os.getenv("CLAMD_SOCKET", "/tmp/clamd.sock") AV_PROCESS_ORIGINAL_VERSION_ONLY = str_to_bool(os.getenv( "AV_PROCESS_ORIGINAL_VERSION_ONLY", "False" )) diff --git a/scan.py b/scan.py index 92feb81d..c065e89b 100644 --- a/scan.py +++ b/scan.py @@ -16,7 +16,9 @@ import copy import json import os +import signal from urllib.parse import unquote_plus +from distutils.util import strtobool import boto3 @@ -36,13 +38,14 @@ from common import AV_STATUS_SNS_PUBLISH_CLEAN from common import AV_STATUS_SNS_PUBLISH_INFECTED from common import AV_TIMESTAMP_METADATA -from common import S3_ENDPOINT -from common import SNS_ENDPOINT from common import create_dir from common import get_timestamp +clamd_pid = None + def event_object(event, event_source="s3"): + # SNS events are slightly different if event_source.upper() == "SNS": event = json.loads(event["Records"][0]["Sns"]["Message"]) @@ -73,7 +76,7 @@ def event_object(event, event_source="s3"): raise Exception("Unable to retrieve object from event.\n{}".format(event)) # Create and return the object - s3 = boto3.resource("s3", endpoint_url=S3_ENDPOINT) + s3 = boto3.resource("s3") return s3.Object(bucket_name, key_name) @@ -169,10 +172,12 @@ def sns_scan_results( sns_client, s3_object, sns_arn, scan_result, scan_signature, timestamp ): # Don't publish if scan_result is CLEAN and CLEAN results should not be published - if scan_result == AV_STATUS_CLEAN and not AV_STATUS_SNS_PUBLISH_CLEAN: + if scan_result == AV_STATUS_CLEAN and not str_to_bool(AV_STATUS_SNS_PUBLISH_CLEAN): return # Don't publish if scan_result is INFECTED and INFECTED results should not be published - if scan_result == AV_STATUS_INFECTED and not AV_STATUS_SNS_PUBLISH_INFECTED: + if scan_result == AV_STATUS_INFECTED and not str_to_bool( + AV_STATUS_SNS_PUBLISH_INFECTED + ): return message = { "bucket": s3_object.bucket_name, @@ -196,20 +201,44 @@ def sns_scan_results( ) +def kill_process_by_pid(pid): + # Check if process is running on PID + try: + os.kill(clamd_pid, 0) + except OSError: + return + + print("Killing the process by PID %s" % clamd_pid) + + try: + os.kill(clamd_pid, signal.SIGTERM) + except OSError: + os.kill(clamd_pid, signal.SIGKILL) + + def lambda_handler(event, context): - s3 = boto3.resource("s3", endpoint_url=S3_ENDPOINT) - s3_client = boto3.client("s3", endpoint_url=S3_ENDPOINT) - sns_client = boto3.client("sns", endpoint_url=SNS_ENDPOINT) + global clamd_pid + + s3 = boto3.resource("s3") + s3_client = boto3.client("s3") + sns_client = boto3.client("sns") # Get some environment variables ENV = os.getenv("ENV", "") EVENT_SOURCE = os.getenv("EVENT_SOURCE", "S3") + if not clamav.is_clamd_running(): + if clamd_pid is not None: + kill_process_by_pid(clamd_pid) + + clamd_pid = clamav.start_clamd_daemon() + print("Clamd PID: %s" % clamd_pid) + start_time = get_timestamp() - print("Script starting at %s\n" % start_time) + print("Script starting at %s\n" % (start_time)) s3_object = event_object(event, event_source=EVENT_SOURCE) - if AV_PROCESS_ORIGINAL_VERSION_ONLY: + if str_to_bool(AV_PROCESS_ORIGINAL_VERSION_ONLY): verify_s3_object_version(s3, s3_object) # Publish the start time of the scan @@ -221,17 +250,6 @@ def lambda_handler(event, context): create_dir(os.path.dirname(file_path)) s3_object.download_file(file_path) - to_download = clamav.update_defs_from_s3( - s3_client, AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_PREFIX - ) - - for download in to_download.values(): - s3_path = download["s3_path"] - local_path = download["local_path"] - s3_url = os.path.join("s3://", AV_DEFINITION_S3_BUCKET, s3_path) - print("Downloading definition file %s from %s" % (local_path, s3_url)) - s3.Bucket(AV_DEFINITION_S3_BUCKET).download_file(s3_path, local_path) - print("Downloading definition file %s complete!" % local_path) scan_result, scan_signature = clamav.scan_file(file_path) print( "Scan of s3://%s resulted in %s\n" @@ -263,7 +281,11 @@ def lambda_handler(event, context): os.remove(file_path) except OSError: pass - if AV_DELETE_INFECTED_FILES and scan_result == AV_STATUS_INFECTED: + if str_to_bool(AV_DELETE_INFECTED_FILES) and scan_result == AV_STATUS_INFECTED: delete_s3_object(s3_object) stop_scan_time = get_timestamp() print("Script finished at %s\n" % stop_scan_time) + + +def str_to_bool(s): + return bool(strtobool(str(s)))