From 3c63cd821ea5aa7575e7c8d615b5ed5487f0d44b Mon Sep 17 00:00:00 2001 From: Scott Hutton Date: Fri, 3 Nov 2023 12:41:00 -0700 Subject: [PATCH 01/12] Import Talos' async implementation --- Cargo.toml | 44 ++- src/callback.rs | 215 +++++++++++ src/cvd.rs | 125 +++++++ src/cvd/head_libclamav.rs | 80 +++++ src/cvd/head_native.rs | 159 +++++++++ src/db.rs | 29 ++ src/engine.rs | 635 +++++++++++++++++++++++++++++++++ src/error.rs | 77 ++++ src/fmap.rs | 188 ++++++++++ src/layer_attr.rs | 14 + src/lib.rs | 165 +++++++++ src/scan_settings.rs | 491 +++++++++++++++++++++++++ src/version.rs | 43 +++ src/windows_fd.rs | 67 ++++ test_data/build-database.sh | 15 + test_data/database/example.cud | Bin 0 -> 770 bytes test_data/files/good_file | 1 + test_data/files/naughty_file | 1 + tests/common.rs | 1 + tests/scan_eicar_test_virus.rs | 1 + 20 files changed, 2344 insertions(+), 7 deletions(-) create mode 100644 src/callback.rs create mode 100644 src/cvd.rs create mode 100644 src/cvd/head_libclamav.rs create mode 100644 src/cvd/head_native.rs create mode 100644 src/db.rs create mode 100644 src/engine.rs create mode 100644 src/error.rs create mode 100644 src/fmap.rs create mode 100644 src/layer_attr.rs create mode 100644 src/scan_settings.rs create mode 100644 src/version.rs create mode 100644 src/windows_fd.rs create mode 100755 test_data/build-database.sh create mode 100644 test_data/database/example.cud create mode 100644 test_data/files/good_file create mode 100644 test_data/files/naughty_file create mode 100644 tests/common.rs create mode 100644 tests/scan_eicar_test_virus.rs diff --git a/Cargo.toml b/Cargo.toml index 0535cfb..083ea13 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,17 +1,47 @@ [package] authors = [ + "Jonas Zaddach ", "Scott Hutton ", - "Micah Snyder ", - "Jonas Zaddach ", ] categories = ["api-bindings"] -description = "Asynchronous API for clamav-sys" -documentation = "https://docs.rs/clamav-async-rs" +description = "Async ClamAV bindings for Rust" edition = "2021" -keywords = ["clamav", "libclamav", "antivirus"] +exclude = ["test_data/*"] +homepage = "https://github.com/zaddach/clamav-rs" license = "GPL-2.0" name = "clamav-async" -repository = "https://github.com/Cisco-Talos/clamav-async-rs" -version = "0.1.0" +repository = "https://github.com/zaddach/clamav-rs" +version = "0.5.5" + +[features] +default = ["native-impl"] +# native-impl enables native implementations where possible (ignoring +# implementations provided by libclamav) +native-impl = [] +# tokio-runtime activates async functionality [dependencies] +bitflags = "2" +derivative = "2" +lazy_static = "1" +libc = "0.2" +log = "0.4" +thiserror = "1" +time = { version = "0.3", features = [ + "parsing", + "macros", + "formatting", + "std", +] } +tokio = { version = "1", features = ["sync", "rt", "macros"] } +tokio-stream = { version = "0.1" } + +[dependencies.clamav-sys] +path = "../clamav-sys" + +[target.'cfg(windows)'.dependencies] +bindings = { version = "0.5.5", package = "clamav-rs-bindings" } + +[dev-dependencies] +tempfile = "3" diff --git a/src/callback.rs b/src/callback.rs new file mode 100644 index 0000000..5f0d906 --- /dev/null +++ b/src/callback.rs @@ -0,0 +1,215 @@ +use crate::{engine::ScanEvent, layer_attr::LayerAttributes, ClamError, ContentHandle}; +use clamav_sys::cl_error_t; +use std::{ + ffi::CStr, + io::Cursor, + os::raw::{c_char, c_int, c_uchar, c_void}, +}; + +/// A type defining a closure or function that, when given a recursion depth, +/// file type, optional file name, and file size, returns whether or not the +/// content should be duplicated into a buffer that can be passed via +/// FileInspect messages. +type ShouldCopyFileBuffer = Box Fn(u32, &'a str, Option<&'a str>, usize) -> bool>; + +/// A wrapper structure around the context passed to callbacks that execute with scans +pub(crate) struct ScanCbContext { + pub(crate) sender: tokio::sync::mpsc::Sender, + pub(crate) should_copy_file_buffer: Option, +} + +/// A completion progress report, with a final result +#[derive(Debug)] +pub enum Progress { + Update { + /// How many elements have been handled + now_completed: usize, + /// How many elements are expected to be handled + total_items: usize, + }, + Complete(Result), +} + +/// Wrapper function for callbacks that accept a Progress message +/// +/// This function has libclamav's `clcb_progress` function signature +pub(crate) unsafe extern "C" fn progress( + total_items: usize, + now_completed: usize, + context: *mut c_void, +) -> cl_error_t { + // All errors are handled silently as there is no other means to report errors + if let Some(sender) = + (context as *mut tokio::sync::mpsc::Sender>).as_ref() + { + let _ = sender.blocking_send(Progress::Update { + total_items, + now_completed, + }); + } + + // ClamAV doesn't specify any action on this value, so it's hardcoded into + // the wrapper + cl_error_t::CL_SUCCESS +} + +pub(crate) unsafe extern "C" fn engine_pre_scan( + fd: c_int, + type_: *const c_char, + context: *mut c_void, +) -> cl_error_t { + if let Some(cxt) = (context as *mut ScanCbContext).as_ref() { + let file_type = CStr::from_ptr(type_).to_string_lossy(); + + let _ = cxt.sender.blocking_send(ScanEvent::PreScan { + file: dup_fd_to_file(fd), + file_type: file_type.into(), + }); + } + + cl_error_t::CL_CLEAN +} + +pub(crate) unsafe extern "C" fn engine_post_scan( + fd: c_int, + result: c_int, + virname: *const c_char, + context: *mut c_void, +) -> cl_error_t { + if let Some(cxt) = (context as *mut ScanCbContext).as_ref() { + let result = result as isize; + let match_name = if virname.is_null() { + String::from("") + } else { + CStr::from_ptr(virname).to_string_lossy().into() + }; + + let _ = cxt.sender.blocking_send(ScanEvent::PostScan { + file: dup_fd_to_file(fd), + result, + match_name, + }); + } + + cl_error_t::CL_CLEAN +} + +pub(crate) unsafe extern "C" fn engine_virus_found( + fd: c_int, + virname: *const c_char, + context: *mut c_void, +) { + if let Some(cxt) = (context as *mut ScanCbContext).as_ref() { + let name = CStr::from_ptr(virname).to_string_lossy().into(); + + let _ = cxt.sender.blocking_send(ScanEvent::MatchFound { + file: dup_fd_to_file(fd), + name, + }); + } +} + +pub(crate) unsafe extern "C" fn engine_file_inspection( + // NOTE: this file descriptor is unsafe to use after the callback has + // returned, even if dup'd + _fd: c_int, + type_: *const c_char, + c_ancestors: *mut *const c_char, + parent_file_size: usize, + filename: *const c_char, + file_size: usize, + file_buffer: *const c_char, + recursion_level: u32, + layer_attributes: u32, + context: *mut c_void, +) -> cl_error_t { + // NOTE: this function is probably doing too much work generating structures + // that won't be used. TALOSAV-28 offers a solution. + if let Some(cxt) = (context as *mut ScanCbContext).as_ref() { + let file_type: String = CStr::from_ptr(type_).to_string_lossy().into(); + let file_name = filename + .as_ref() + .map(|p| CStr::from_ptr(p)) + .map(CStr::to_string_lossy) + .map(|s| s.to_string()); + + let layer_attrs = LayerAttributes::from_bits(layer_attributes).unwrap_or_default(); + + let mut ancestors = vec![]; + if !c_ancestors.is_null() { + for i in 0..recursion_level { + let ancestor = *(c_ancestors.offset(i as isize)); + if ancestor.is_null() { + ancestors.push(None); + } else { + let ancestor = CStr::from_ptr(ancestor).to_string_lossy(); + ancestors.push(Some(ancestor.into())); + } + } + } + + // Duplicate the content buffer? + let mut content = None; + if let Some(cb) = &cxt.should_copy_file_buffer { + // Never include content for the root document. That should be known to the caller already. + if cb( + recursion_level, + file_type.as_str(), + file_name.as_deref(), + file_size, + ) { + let buffer = unsafe { + core::slice::from_raw_parts(file_buffer as *const c_uchar, file_size) + } + .to_vec(); + // NOTE: the content is provided as a trait object that + // implements AsyncRead in order to facilitate future + // functionality where this could be passed as a more + // "lightweight" object, such as a file handle or socket, or + // perhaps a ref-counted buffer that releases its reference once + // completely read. + content = Some(Box::pin(Cursor::new(buffer)) as ContentHandle) + } + } + + let _ = cxt.sender.blocking_send(ScanEvent::FileInspect { + #[cfg(unix)] + file_type, + file_name, + file_size, + parent_file_size, + recursion_level, + layer_attrs, + ancestors, + content, + }); + } + + cl_error_t::CL_CLEAN +} + +#[cfg(unix)] +fn dup_fd_to_file(fd: c_int) -> Option { + use std::os::unix::prelude::FromRawFd; + + if fd != -1 { + // dup the file descriptor first in case this message isn't handled + // before it's closed. The file will be closed when the containing + // message is discarded. + let new_fd = unsafe { libc::dup(fd) }; + if new_fd != -1 { + Some(unsafe { std::fs::File::from_raw_fd(new_fd) }) + } else { + // TODO: log a warning? Or embed error in FileInspect message? + None + } + } else { + None + } +} + +#[cfg(windows)] +fn dup_fd_to_file(fd: c_int) -> Option { + // Not supported + None +} diff --git a/src/cvd.rs b/src/cvd.rs new file mode 100644 index 0000000..ae64ecb --- /dev/null +++ b/src/cvd.rs @@ -0,0 +1,125 @@ +use std::{borrow::Cow, fs::File, num::ParseIntError, path::Path, str::Utf8Error}; +use thiserror::Error; + +#[cfg(not(feature = "native-impl"))] +pub mod head_libclamav; +#[cfg(feature = "native-impl")] +pub mod head_native; + +#[cfg(not(feature = "native-impl"))] +pub use head_libclamav::CvdHdr; + +#[cfg(feature = "native-impl")] +pub use head_native::CvdHdr; + +pub trait CvdMeta { + /// Load fromm the initial bytes found at the beginning of the CVD/CLD + fn from_header_bytes(bytes: &[u8; 512]) -> Result + where + Self: Sized; + + /// Obtain a CVD/CLD header from an open file + fn from_file(fh: &mut File) -> Result + where + Self: Sized, + { + use std::io::Read; + + let mut buf = [0u8; 512]; + fh.read_exact(buf.as_mut_slice())?; + Self::from_header_bytes(&buf) + } + + /// Obtain a CVD/CLD header from the specified path + fn from_path(path: &Path) -> Result + where + Self: Sized, + { + let mut fh = File::open(path)?; + Self::from_file(&mut fh) + } + + /// Database "feature level" + fn f_level(&self) -> usize; + + /// Number of signatures reported to be within the database + fn n_sigs(&self) -> usize; + + /// Creation time (as a string) + fn time_str(&self) -> Cow<'_, str>; + + /// Database version + fn version(&self) -> usize; + + /// MD5 digest (as a hex string) + fn md5_str(&self) -> Cow<'_, str>; + + /// Digital signature (as a hex string) + fn dsig_str(&self) -> Cow<'_, str>; + + /// Database builder's ID + fn builder(&self) -> Cow<'_, str>; + + /// Creation time as seconds + fn stime(&self) -> usize; +} + +#[derive(Debug, Error)] +pub enum CvdHeadError { + /// Generic error from the libclamav parser. Unfortunately, it outputs its + /// error via a message + #[error("unable to parse (see log output)")] + Parse, + + /// An IO error occurred + #[error("IO Error: {0}")] + Io(#[from] std::io::Error), + + /// Header was missing an expected leading signature + #[error("bad magic")] + BadMagic, + + /// Header fields ended when expecting creation time + #[error("missing creation time field")] + MissingCreationTime, + + /// Header fields ended when expecting version + #[error("missing version field")] + MissingVersion, + + /// Header fields ended when expecting number of signatures + #[error("missing number of signatures field")] + MissingNumberOfSigs, + + /// Header fields ended when expecting feature level + #[error("missing f_level field")] + MissingFLevel, + + /// Header fields ended when expecting database MD5 + #[error("missing md5 field")] + MissingMd5, + + /// Header fields ended when expecting digital signature + #[error("missing dsig field")] + MissingDSig, + + /// Header fields ended when expecting builder identity + #[error("missing builder field")] + MissingBuilder, + + /// Header field contains non-UTF-8 content + #[error("non-UTF-8 contenti: {0}")] + Utf80(#[from] Utf8Error), + + /// Header field content can't be parsed as number + #[error("unable to parse integer: {0}")] + ParseInt(#[from] ParseIntError), + + /// Header field content can't be parsed as a timestamp + #[error("unable to parse time: {0}")] + ParseTime(#[from] time::error::Parse), + + /// Value of "stime" header field would overflow a SystemTime representation + #[error("value of stime would overflow SystemTime")] + STimeTooLarge, +} diff --git a/src/cvd/head_libclamav.rs b/src/cvd/head_libclamav.rs new file mode 100644 index 0000000..d9a1944 --- /dev/null +++ b/src/cvd/head_libclamav.rs @@ -0,0 +1,80 @@ +use super::{CvdHeadError, CvdMeta}; +use std::{borrow::Cow, ffi::CStr}; + +/// The header of a CVD +pub struct CvdHdr(*mut clamav_sys::cl_cvd); + +impl CvdMeta for CvdHdr { + /// Parse a CVD header from a buffer obtained from the beginning of a CVD + /// (or CLD) file + fn from_header_bytes(bytes: &[u8; 512]) -> Result { + unsafe { + let raw = clamav_sys::cl_cvdparse(bytes.as_ptr() as *const i8); + + if raw.is_null() { + Err(CvdHeadError::Parse) + } else { + Ok(CvdHdr(raw)) + } + } + } + + /// Database "feature level" + fn f_level(&self) -> usize { + unsafe { (*self.0).fl as usize } + } + + /// Number of signatures reported to be within the database + fn n_sigs(&self) -> usize { + unsafe { (*self.0).sigs as usize } + } + + /// Creation time (as a string) + fn time_str(&self) -> Cow<'_, str> { + // libclamav guarantees that this pointer is non-NULL + unsafe { CStr::from_ptr((*self.0).time).to_string_lossy() } + } + + /// Database version + fn version(&self) -> usize { + unsafe { (*self.0).version as usize } + } + + /// MD5 digest (as a hex string) + fn md5_str(&self) -> Cow<'_, str> { + // libclamav guarantees that this pointer is non-NULL + unsafe { CStr::from_ptr((*self.0).md5).to_string_lossy() } + } + + /// Digital signature (as a hex string) + fn dsig_str(&self) -> Cow<'_, str> { + // libclamav guarantees that this pointer is non-NULL + unsafe { CStr::from_ptr((*self.0).dsig).to_string_lossy() } + } + + /// Database builder's ID + fn builder(&self) -> Cow<'_, str> { + // libclamav guarantees that this pointer is non-NULL + unsafe { CStr::from_ptr((*self.0).builder).to_string_lossy() } + } + + /// Creation time as seconds + fn stime(&self) -> usize { + unsafe { (*self.0).stime as usize } + } +} + +impl std::fmt::Debug for CvdHdr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CvdHead") + .field("f_level", &self.f_level()) + .field("n_sigs", &self.n_sigs()) + .field("time", &self.time_str()) + .field("version", &self.version()) + .field("md5", &self.md5_str()) + .field("dsig", &self.dsig_str()) + .field("builder", &self.builder()) + .field("stime", &self.stime()) + .finish() + } +} diff --git a/src/cvd/head_native.rs b/src/cvd/head_native.rs new file mode 100644 index 0000000..8c2c4ae --- /dev/null +++ b/src/cvd/head_native.rs @@ -0,0 +1,159 @@ +use super::{CvdHeadError, CvdMeta}; +use std::{ + borrow::Cow, + str::{self, FromStr}, + time::{Duration, SystemTime, UNIX_EPOCH}, +}; + +/// CVD files are expected to begin with this string. Note that this constant +/// contains the first colon, as well +const CVD_HEAD_MAGIC: &[u8] = b"ClamAV-VDB:"; + +/// The non-standard timestamp format used in CVD headers to represent the +/// creation time +const CVD_TIMESTAMP_FMT: &[time::format_description::FormatItem] = time::macros::format_description!( + "[day padding:zero] [month repr:short] [year] [hour]:[minute] [offset_hour][offset_minute]" +); + +pub struct CvdHdr { + version: usize, + n_sigs: usize, + f_level: usize, + dsig: String, + builder: String, + ctime: SystemTime, + is_old_db: bool, + ctime_str: String, + md5_str: String, +} + +impl CvdMeta for CvdHdr { + fn from_header_bytes(bytes: &[u8; 512]) -> Result + where + Self: std::marker::Sized, + { + let mut fields = bytes + .strip_prefix(CVD_HEAD_MAGIC) + .ok_or(CvdHeadError::BadMagic)? + .split(|b| *b == b':'); + let creation_time_str = fields + .next() + .map(str::from_utf8) + .transpose()? + .ok_or(CvdHeadError::MissingCreationTime)?; + let version = fields + .next() + .map(str::from_utf8) + .transpose()? + .ok_or(CvdHeadError::MissingVersion)? + .parse()?; + let n_sigs: usize = fields + .next() + .map(str::from_utf8) + .transpose()? + .ok_or(CvdHeadError::MissingNumberOfSigs)? + .parse()?; + let f_level: usize = + str::from_utf8(fields.next().ok_or(CvdHeadError::MissingFLevel)?)?.parse()?; + // Just preserve this verbatim. + let md5_str = fields + .next() + .map(str::from_utf8) + .transpose()? + .ok_or(CvdHeadError::MissingMd5)? + .into(); + let dsig = str::from_utf8(fields.next().ok_or(CvdHeadError::MissingDSig)?)?.into(); + let builder = + std::str::from_utf8(fields.next().ok_or(CvdHeadError::MissingBuilder)?)?.into(); + + // This field is not present in older signature database files. It + // should be the last field (and will be padded out with spaces at the + // end) + let (ctime, is_old_db) = fields + // Is it there? + .next() + // Try to make it a str + .map(str::from_utf8) + // ...and check that that worked (by flipping the Result out of the + // Option) + .transpose()? + // This value is padded to the right with spaces + .map(str::trim_end) + // Try to make it a usize + .map(usize::from_str) + // ...and check that that worked + .transpose()? + .map(|stime| UNIX_EPOCH.checked_add(Duration::from_secs(stime as u64))) + // ...and check that that worked + .ok_or(CvdHeadError::STimeTooLarge)? + // It's there, so this isn't an old DB + .map(|stime| Ok((stime, false))) + // Or it wasn't there, so this *is* an old DB + .unwrap_or_else(|| { + // Parse the string version, e.g.: "16 Sep 2021 08:32 -0400" + // Oddly, there are no seconds. So this is very much a custom format + time::OffsetDateTime::parse(creation_time_str, CVD_TIMESTAMP_FMT) + // And mark this as old-format + .map(|odt| (odt.into(), true)) + })?; + + let ctime_str = time::OffsetDateTime::from(ctime) + .format(CVD_TIMESTAMP_FMT) + .expect("format timestamp"); + + Ok(Self { + ctime, + version, + n_sigs, + f_level, + dsig, + builder, + is_old_db, + ctime_str, + md5_str, + }) + } + + fn f_level(&self) -> usize { + self.f_level + } + + fn n_sigs(&self) -> usize { + self.n_sigs + } + + fn time_str(&self) -> std::borrow::Cow<'_, str> { + // This is returned in the same format as normally appears within the header + Cow::from(&self.ctime_str) + } + + fn version(&self) -> usize { + self.version + } + + fn md5_str(&self) -> std::borrow::Cow<'_, str> { + Cow::from(&self.md5_str) + } + + fn dsig_str(&self) -> std::borrow::Cow<'_, str> { + Cow::from(&self.dsig) + } + + fn builder(&self) -> std::borrow::Cow<'_, str> { + std::borrow::Cow::from(&self.builder) + } + + fn stime(&self) -> usize { + self.ctime + .duration_since(UNIX_EPOCH) + .expect("compute seconds since epoch") + .as_secs() as usize + } +} + +impl CvdHdr { + /// Whether or not this is an old-format DB (no stime field in header) + pub fn is_old_db(&self) -> bool { + self.is_old_db + } +} diff --git a/src/db.rs b/src/db.rs new file mode 100644 index 0000000..6ebf097 --- /dev/null +++ b/src/db.rs @@ -0,0 +1,29 @@ +use std::ffi::CStr; +use std::str; + +use clamav_sys::cl_retdbdir; + +/// Gets the default database directory for clamav +pub fn default_directory() -> String { + unsafe { + let ptr = cl_retdbdir(); + let bytes = CStr::from_ptr(ptr).to_bytes(); + str::from_utf8(bytes) + .expect("Invalid UTF8 string") + .to_string() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn default_directory_success() { + crate::initialize().expect("initialize should succeed"); + assert!( + !default_directory().is_empty(), + "should have a default db dir" + ); + } +} diff --git a/src/engine.rs b/src/engine.rs new file mode 100644 index 0000000..bd4903f --- /dev/null +++ b/src/engine.rs @@ -0,0 +1,635 @@ +use crate::error::ClamError; +use clamav_sys::cl_engine_field; +use clamav_sys::{cl_error_t, time_t}; +use derivative::Derivative; +use std::{path::Path, pin::Pin, sync::Arc, time}; + +#[cfg(windows)] +use crate::windows_fd::WindowsFd; + +use {tokio::sync::RwLock, tokio_stream::wrappers::ReceiverStream}; + +/// Stats of a loaded database +#[derive(Debug)] +pub struct DatabaseStats { + /// The total number of loaded signatures + pub signature_count: u32, +} + +#[derive(Debug)] +pub enum ScanResult { + /// Clean result + Clean, + /// Whitelisted result + Whitelisted, + /// Virus result, with detected name + Virus(String), +} + +impl ScanResult { + pub(crate) fn from_ffi( + scan_result: cl_error_t, + c_virname: *const i8, + ) -> Result { + use std::ffi::CStr; + + match scan_result { + cl_error_t::CL_CLEAN => Ok(Self::Clean), + cl_error_t::CL_BREAK => Ok(Self::Whitelisted), + cl_error_t::CL_VIRUS => unsafe { + Ok(ScanResult::Virus( + CStr::from_ptr(c_virname).to_string_lossy().to_string(), + )) + }, + code => Err(ClamError::new(code)), + } + } +} + +#[derive(Derivative)] +#[derivative(Debug)] +pub enum ScanEvent { + PreScan { + file: Option, + file_type: String, + }, + MatchFound { + file: Option, + name: String, + }, + PostScan { + file: Option, + result: isize, + match_name: String, + }, + FileInspect { + #[cfg(unix)] + ancestors: Vec>, + file_name: Option, + file_size: usize, + file_type: String, + #[derivative(Debug = "ignore")] + content: Option>>, + #[derivative(Debug = "ignore")] + layer_attrs: crate::layer_attr::LayerAttributes, + parent_file_size: usize, + recursion_level: u32, + }, + Result(Result), +} + +#[derive(Debug, PartialEq, Eq)] +pub enum EngineValueType { + U32, + U64, + String, + Time, +} + +#[derive(Debug)] +pub struct ClamTime(time_t); + +impl ClamTime { + pub fn as_system_time(&self) -> time::SystemTime { + if self.0 >= 0 { + time::UNIX_EPOCH + time::Duration::from_secs(self.0 as u64) + } else { + time::UNIX_EPOCH - time::Duration::from_secs(-self.0 as u64) + } + } +} + +#[derive(Debug)] +pub enum EngineValue { + U32(u32), + U64(u64), + String(String), + Time(ClamTime), +} + +#[derive(Clone)] +/// Engine used for scanning files +pub struct Engine { + handle: Arc>, +} + +pub(crate) struct EngineHandle(*mut clamav_sys::cl_engine); + +impl EngineHandle { + pub(crate) fn as_ptr(&self) -> *mut clamav_sys::cl_engine { + self.0 + } +} + +// # Safety +// +// libclamav docs claim that the engine is thread-safe *provided* that its +// options are not changed. These checks are enforced within this crate. +unsafe impl Send for EngineHandle {} +unsafe impl Sync for EngineHandle {} + +impl Engine { + /// Initialises the engine + pub fn new() -> Self { + unsafe { + let handle = clamav_sys::cl_engine_new(); + + // Set up some callbacks + + { + use crate::callback; + + clamav_sys::cl_engine_set_clcb_pre_scan(handle, Some(callback::engine_pre_scan)); + clamav_sys::cl_engine_set_clcb_virus_found( + handle, + Some(callback::engine_virus_found), + ); + clamav_sys::cl_engine_set_clcb_post_scan(handle, Some(callback::engine_post_scan)); + clamav_sys::cl_engine_set_clcb_file_inspection( + handle, + Some(callback::engine_file_inspection), + ); + } + + Engine { + handle: Arc::new(RwLock::new(EngineHandle(handle))), + } + } + } + + /// Obtain a new reference to the wrapped EngineHandle. It must still be + /// locked prior to use. + fn handle(&self) -> Arc> { + self.handle.clone() + } + + pub async fn compile(&self) -> Result<(), ClamError> { + let engine_handle = self.handle(); + tokio::task::spawn_blocking(move || ffi::compile(engine_handle.blocking_write().as_ptr())) + .await + .expect("join thread") + } + + /// An extended version of `compile()` that streams [crate::callback::Progress] events (concluding with a + /// [crate::callback::Progress::Result] event). + + pub async fn compile_with_progress( + &mut self, + ) -> tokio_stream::wrappers::ReceiverStream> { + let (sender, receiver) = tokio::sync::mpsc::channel(128); + let engine_handle = self.handle(); + + tokio::task::spawn_blocking(move || unsafe { + let engine_handle = engine_handle.blocking_write(); + let context = Box::into_raw(Box::new(sender)); + + clamav_sys::cl_engine_set_clcb_engine_compile_progress( + engine_handle.as_ptr(), + Some(crate::callback::progress), + context as *mut libc::c_void, + ); + + let result = ffi::compile(engine_handle.as_ptr()); + + // Clear the pointer from the libclamav engine context + clamav_sys::cl_engine_set_clcb_engine_compile_progress( + engine_handle.as_ptr(), + None, + std::ptr::null_mut(), + ); + + // Reclaim the sender + let sender = Box::from_raw(context); + sender + .blocking_send(crate::callback::Progress::Complete(result)) + .expect("blocking send"); + }); + + receiver.into() + } + + pub async fn load_databases<'a, P>(&self, dbpath: &'a P) -> Result + where + P: 'a + ?Sized + AsRef, + { + let engine_handle = self.handle(); + let dbpath = dbpath.as_ref().to_owned(); + tokio::task::spawn_blocking(move || { + let engine_handle = engine_handle.blocking_write(); + let result = ffi::load_databases(dbpath.as_ref(), engine_handle.as_ptr()); + result + }) + .await + .unwrap() + } + + /// An extended version of `load_databases()` that streams [crate::callback::Progress] events (concluding with + /// a [crate::callback::Progress::Result] event). + pub async fn load_databases_with_progress<'a, P>( + &mut self, + dbpath: &'a P, + ) -> tokio_stream::wrappers::ReceiverStream> + where + P: 'a + ?Sized + AsRef, + { + let dbpath = dbpath.as_ref().to_owned(); + + let (sender, receiver) = tokio::sync::mpsc::channel(128); + let engine_handle = self.handle(); + + tokio::task::spawn_blocking(move || unsafe { + let engine_handle = engine_handle.blocking_write(); + let context = Box::into_raw(Box::new(sender)); + clamav_sys::cl_engine_set_clcb_sigload_progress( + engine_handle.as_ptr(), + Some(crate::callback::progress), + context as *mut libc::c_void, + ); + + let result = ffi::load_databases(dbpath.as_ref(), engine_handle.as_ptr()); + + // Reclaim the sender + let sender = Box::from_raw(context); + sender + .blocking_send(crate::callback::Progress::Complete(result)) + .expect("blocking send"); + + // Clear the pointer from the libclamav engine context + clamav_sys::cl_engine_set_clcb_sigload_progress( + engine_handle.as_ptr(), + None, + std::ptr::null_mut(), + ); + }); + + receiver.into() + } + + pub async fn scan>( + &self, + target: T, + filename: Option<&str>, + mut settings: crate::scan_settings::ScanSettings, + ) -> ReceiverStream { + use crate::callback::ScanCbContext; + use crate::fmap::Fmap; + use std::ffi::CString; + use std::os::raw::c_void; + use std::ptr; + + let fmap: Fmap = target.into(); + + let (sender, receiver) = tokio::sync::mpsc::channel::(128); + let c_filename = filename.map(|n| CString::new(n).expect("CString::new failed")); + let engine_handle = self.handle.clone(); + let fmap_handle = fmap.handle(); + + // A placeholder callback that directs the file inspection callback to + // copy content only for embedded files (and not the root document) + // + // This may be overridden in the future with API extensions. + let should_copy_file_buffer = |recursion_level: u32, + _file_type: &str, + _file_name: Option<&str>, + _file_size: usize| + -> bool { recursion_level > 0 }; + + tokio::task::spawn_blocking(move || { + let mut c_virname = ptr::null(); + let scan_cb_context = ScanCbContext { + sender: sender.clone(), + should_copy_file_buffer: Some(Box::new(should_copy_file_buffer)), + }; + let c_sender = Box::into_raw(Box::new(scan_cb_context)); + let c_filename = c_filename.map_or(ptr::null(), |n| n.as_ptr()); + let fmap_guard = fmap_handle.blocking_lock(); + + let retval = unsafe { + clamav_sys::cl_scanmap_callback( + fmap_guard.fmap, + c_filename, + &mut c_virname, + ptr::null_mut(), + engine_handle.blocking_read().as_ptr(), + &mut settings.settings, + c_sender as *mut c_void, + ) + }; + // Reclaim the sender from C-land and send a final message + let scan_cb_cxt = unsafe { Box::from_raw(c_sender) }; + // Try to send back the final result, silently ignoring any failure + // (as the receiving task may disappear during shutdown) + let _ = scan_cb_cxt + .sender + .blocking_send(ScanEvent::Result(ScanResult::from_ffi(retval, c_virname))); + }); + + receiver.into() + } + + async fn get(&self, field: cl_engine_field) -> Result { + let engine_handle = self.handle(); + let engine_handle = engine_handle.read().await; + ffi::get(engine_handle.as_ptr(), field) + } + + async fn set(&self, field: cl_engine_field, value: EngineValue) -> Result<(), ClamError> { + dbg!(&field, &value); + let engine_handle = self.handle.write().await; + ffi::set(engine_handle.as_ptr(), field, value) + } + + pub async fn database_version(&self) -> Result { + if let EngineValue::U32(value) = self.get(cl_engine_field::CL_ENGINE_DB_VERSION).await? { + Ok(value) + } else { + Err(ClamError::new(cl_error_t::CL_EARG)) + } + } + + pub async fn database_timestamp(&self) -> Result { + if let EngineValue::Time(value) = self.get(cl_engine_field::CL_ENGINE_DB_TIME).await? { + Ok(value.as_system_time()) + } else { + Err(ClamError::new(cl_error_t::CL_EARG)) + } + } + + pub async fn disable_cache(&self, disable_cache: bool) -> Result<(), ClamError> { + self.set( + cl_engine_field::CL_ENGINE_DISABLE_CACHE, + EngineValue::U32(disable_cache.into()), + ) + .await + } + + pub async fn set_max_scansize(&self, max_scansize: u64) -> Result<(), ClamError> { + self.set( + cl_engine_field::CL_ENGINE_MAX_SCANSIZE, + EngineValue::U64(max_scansize), + ) + .await + } + + pub async fn max_scansize(&self) -> Result { + if let EngineValue::U64(value) = self.get(cl_engine_field::CL_ENGINE_MAX_SCANSIZE).await? { + Ok(value) + } else { + Err(ClamError::new(cl_error_t::CL_EARG)) + } + } +} + +impl Default for Engine { + fn default() -> Self { + Self::new() + } +} + +impl Drop for EngineHandle { + fn drop(&mut self) { + unsafe { + clamav_sys::cl_engine_free(self.0); + } + } +} + +mod ffi { + use super::{ClamTime, DatabaseStats, EngineValue, EngineValueType}; + use crate::ClamError; + use clamav_sys::{ + cl_engine_field, cl_engine_get_num, cl_engine_get_str, cl_engine_set_num, + cl_engine_set_str, cl_error_t, cl_load, time_t, CL_DB_STDOPT, + }; + use std::{ + ffi::{CStr, CString}, + mem, + os::{raw::c_int, unix::prelude::OsStrExt}, + path::Path, + }; + + pub(super) fn compile(handle: *mut clamav_sys::cl_engine) -> Result<(), ClamError> { + unsafe { + let result = clamav_sys::cl_engine_compile(handle); + match result { + cl_error_t::CL_SUCCESS => Ok(()), + _ => Err(ClamError::new(result)), + } + } + } + + pub(super) fn load_databases( + dbpath: &Path, + handle: *mut clamav_sys::cl_engine, + ) -> Result { + let raw_path = CString::new(dbpath.as_os_str().as_bytes()).unwrap(); + unsafe { + let mut signature_count: u32 = 0; + let result = cl_load( + raw_path.as_ptr(), + handle, + &mut signature_count, + CL_DB_STDOPT, + ); + match result { + cl_error_t::CL_SUCCESS => Ok(DatabaseStats { signature_count }), + _ => Err(ClamError::new(result)), + } + } + } + + pub(super) fn get( + engine_handle: *mut clamav_sys::cl_engine, + field: cl_engine_field, + ) -> Result { + unsafe { + match get_field_type(field) { + EngineValueType::U32 => { + let mut err: c_int = 0; + let value = cl_engine_get_num(engine_handle, field, &mut err) as u32; + if err != 0 { + Err(ClamError::new(mem::transmute(err))) + } else { + Ok(EngineValue::U32(value)) + } + } + EngineValueType::U64 => { + let mut err: c_int = 0; + let value = cl_engine_get_num(engine_handle, field, &mut err) as u64; + if err != 0 { + Err(ClamError::new(mem::transmute(err))) + } else { + Ok(EngineValue::U64(value)) + } + } + EngineValueType::String => { + let mut err = 0; + let value = cl_engine_get_str(engine_handle, field, &mut err); + if err != 0 { + Err(ClamError::new(mem::transmute(err))) + } else { + Ok(EngineValue::String( + CStr::from_ptr(value).to_str().unwrap().to_string(), + )) + } + } + EngineValueType::Time => { + let mut err = 0; + let value = cl_engine_get_num(engine_handle, field, &mut err) as time_t; + if err != 0 { + Err(ClamError::new(mem::transmute(err))) + } else { + Ok(EngineValue::Time(ClamTime(value))) + } + } + } + } + } + + pub(super) fn set( + engine_handle: *mut clamav_sys::cl_engine, + field: cl_engine_field, + value: EngineValue, + ) -> Result<(), ClamError> { + let expected_type = get_field_type(field); + let actual_type = match &value { + EngineValue::U32(_) => EngineValueType::U32, + EngineValue::U64(_) => EngineValueType::U64, + EngineValue::String(_) => EngineValueType::String, + EngineValue::Time(_) => EngineValueType::Time, + }; + + if expected_type != actual_type { + return Err(ClamError::new(cl_error_t::CL_EARG)); + } + + unsafe { + match value { + EngineValue::U32(val) => { + let err = cl_engine_set_num(engine_handle, field, val as i64); + if err != cl_error_t::CL_SUCCESS { + Err(ClamError::new(err)) + } else { + Ok(()) + } + } + EngineValue::U64(val) => { + let err = cl_engine_set_num(engine_handle, field, val as i64); + if err != cl_error_t::CL_SUCCESS { + Err(ClamError::new(err)) + } else { + Ok(()) + } + } + EngineValue::String(val) => { + let val = CString::new(val).unwrap(); + let err = cl_engine_set_str(engine_handle, field, val.as_ptr()); + if err != cl_error_t::CL_SUCCESS { + Err(ClamError::new(err)) + } else { + Ok(()) + } + } + EngineValue::Time(ClamTime(val)) => { + let err = cl_engine_set_num(engine_handle, field, val); + if err != cl_error_t::CL_SUCCESS { + Err(ClamError::new(err)) + } else { + Ok(()) + } + } + } + } + } + + fn get_field_type(field: cl_engine_field) -> EngineValueType { + match field { + cl_engine_field::CL_ENGINE_MAX_SCANSIZE => EngineValueType::U64, + cl_engine_field::CL_ENGINE_MAX_FILESIZE => EngineValueType::U64, + cl_engine_field::CL_ENGINE_MAX_RECURSION => EngineValueType::U32, + cl_engine_field::CL_ENGINE_MAX_FILES => EngineValueType::U32, + cl_engine_field::CL_ENGINE_MIN_CC_COUNT => EngineValueType::U32, + cl_engine_field::CL_ENGINE_MIN_SSN_COUNT => EngineValueType::U32, + cl_engine_field::CL_ENGINE_PUA_CATEGORIES => EngineValueType::String, + cl_engine_field::CL_ENGINE_DB_OPTIONS => EngineValueType::U32, + cl_engine_field::CL_ENGINE_DB_VERSION => EngineValueType::U32, + cl_engine_field::CL_ENGINE_DB_TIME => EngineValueType::Time, + cl_engine_field::CL_ENGINE_AC_ONLY => EngineValueType::U32, + cl_engine_field::CL_ENGINE_AC_MINDEPTH => EngineValueType::U32, + cl_engine_field::CL_ENGINE_AC_MAXDEPTH => EngineValueType::U32, + cl_engine_field::CL_ENGINE_TMPDIR => EngineValueType::String, + cl_engine_field::CL_ENGINE_KEEPTMP => EngineValueType::U32, + cl_engine_field::CL_ENGINE_BYTECODE_SECURITY => EngineValueType::U32, + cl_engine_field::CL_ENGINE_BYTECODE_TIMEOUT => EngineValueType::U32, + cl_engine_field::CL_ENGINE_BYTECODE_MODE => EngineValueType::U32, + cl_engine_field::CL_ENGINE_MAX_EMBEDDEDPE => EngineValueType::U64, + cl_engine_field::CL_ENGINE_MAX_HTMLNORMALIZE => EngineValueType::U64, + cl_engine_field::CL_ENGINE_MAX_HTMLNOTAGS => EngineValueType::U64, + cl_engine_field::CL_ENGINE_MAX_SCRIPTNORMALIZE => EngineValueType::U64, + cl_engine_field::CL_ENGINE_MAX_ZIPTYPERCG => EngineValueType::U64, + cl_engine_field::CL_ENGINE_FORCETODISK => EngineValueType::U32, + cl_engine_field::CL_ENGINE_DISABLE_CACHE => EngineValueType::U32, + cl_engine_field::CL_ENGINE_DISABLE_PE_STATS => EngineValueType::U32, + cl_engine_field::CL_ENGINE_STATS_TIMEOUT => EngineValueType::U32, + cl_engine_field::CL_ENGINE_MAX_PARTITIONS => EngineValueType::U32, + cl_engine_field::CL_ENGINE_MAX_ICONSPE => EngineValueType::U32, + cl_engine_field::CL_ENGINE_MAX_RECHWP3 => EngineValueType::U32, + cl_engine_field::CL_ENGINE_MAX_SCANTIME => EngineValueType::U32, + cl_engine_field::CL_ENGINE_PCRE_MATCH_LIMIT => EngineValueType::U64, + cl_engine_field::CL_ENGINE_PCRE_RECMATCH_LIMIT => EngineValueType::U64, + cl_engine_field::CL_ENGINE_PCRE_MAX_FILESIZE => EngineValueType::U64, + cl_engine_field::CL_ENGINE_DISABLE_PE_CERTS => EngineValueType::U32, + cl_engine_field::CL_ENGINE_PE_DUMPCERTS => EngineValueType::U32, + field => panic!("{field:?} not yet supported"), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + const TEST_DATABASES_PATH: &str = "test_data/database/"; + const EXAMPLE_DATABASE_PATH: &str = "test_data/database/example.cud"; + + #[tokio::test] + async fn compile_empty_engine_success() { + crate::initialize().expect("initialize should succeed"); + let scanner = Engine::new(); + assert!(scanner.compile().await.is_ok(), "compile should succeed"); + } + + #[tokio::test] + async fn load_databases_success() { + crate::initialize().expect("initialize should succeed"); + let scanner = Engine::new(); + let result = scanner.load_databases(TEST_DATABASES_PATH).await; + assert!(result.is_ok(), "load should succeed"); + assert!( + result.unwrap().signature_count > 0, + "should load some signatures" + ); + } + + #[tokio::test] + async fn load_databases_with_file_success() { + crate::initialize().expect("initialize should succeed"); + let scanner = Engine::new(); + let result = scanner.load_databases(EXAMPLE_DATABASE_PATH).await; + assert!(result.is_ok(), "load should succeed"); + assert!( + result.unwrap().signature_count > 0, + "should load some signatures" + ); + } + + #[tokio::test] + async fn load_databases_fake_path_fails() { + crate::initialize().expect("initialize should succeed"); + let scanner = Engine::new(); + assert!( + scanner.load_databases("/dev/null").await.is_err(), + "should fail to load invalid databases" + ); + } +} diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..e588630 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,77 @@ +use std::error; +use std::ffi::CStr; +use std::fmt; +use std::str; + +use clamav_sys::cl_error_t; + +/// An error indicating a clam failure. +#[derive(Clone, PartialEq, Eq)] +pub struct ClamError { + code: cl_error_t, +} + +impl ClamError { + pub fn new(code: cl_error_t) -> Self { + ClamError { code } + } + + pub fn string_error(&self) -> String { + unsafe { + let ptr = clamav_sys::cl_strerror(self.code); + let bytes = CStr::from_ptr(ptr).to_bytes(); + str::from_utf8(bytes) + .expect("Invalid UTF8 string") + .to_string() + } + } + + pub fn code(&self) -> i32 { + self.code.0 as i32 + } +} + +impl From for ClamError { + fn from(code: cl_error_t) -> Self { + Self::new(code) + } +} + +impl fmt::Display for ClamError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "cl_error {}: {}", + self.code(), + self.string_error() + ) + } +} + +impl fmt::Debug for ClamError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self) + } +} + +impl error::Error for ClamError { + fn source(&self) -> Option<&(dyn error::Error + 'static)> { + None + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn error_as_string_success() { + let err = ClamError::new(cl_error_t::CL_EFORMAT); + let err_string = err.to_string(); + dbg!(&err_string); + assert!( + err_string.contains("Bad format or broken data"), + "error description should contain string error" + ); + } +} diff --git a/src/fmap.rs b/src/fmap.rs new file mode 100644 index 0000000..4324a35 --- /dev/null +++ b/src/fmap.rs @@ -0,0 +1,188 @@ +// +// Copyright (C) 2020 Jonas Zaddach. +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License version 2 as +// published by the Free Software Foundation. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +// MA 02110-1301, USA. +// + +#[cfg(windows)] +use bindings::Windows::{ + Win32::Storage::FileSystem::ReadFile, + Win32::System::Diagnostics::Debug::GetLastError, + Win32::System::Diagnostics::Debug::ERROR_HANDLE_EOF, + Win32::System::SystemServices::{HANDLE, OVERLAPPED}, +}; +use clamav_sys::{cl_fmap_close, cl_fmap_open_handle, cl_fmap_open_memory, cl_fmap_t}; +use std::{ + error, fmt, + fs::File, + os::{self, raw::c_void, unix::prelude::AsRawFd}, + result, + sync::Arc, +}; + +use tokio::sync::Mutex; + +#[derive(Debug, Clone)] +pub struct MapError; + +impl fmt::Display for MapError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "Failed to open mapping") + } +} + +impl error::Error for MapError { + fn source(&self) -> Option<&(dyn error::Error + 'static)> { + None + } +} + +impl MapError { + pub fn new() -> MapError { + MapError {} + } +} + +impl Default for MapError { + fn default() -> Self { + Self::new() + } +} + +pub type Result = result::Result; + +#[cfg(windows)] +extern "C" fn cl_pread( + handle: *mut os::raw::c_void, + buf: *mut os::raw::c_void, + count: os::raw::c_ulonglong, + offset: os::raw::c_long, +) -> os::raw::c_long { + let mut read_bytes = 0; + + unsafe { + let mut overlapped: OVERLAPPED = std::mem::MaybeUninit::zeroed().assume_init(); + overlapped.InternalHigh = (offset as usize) >> 32; + overlapped.Internal = (offset as usize) & 0xffffffff; + + if !ReadFile( + std::mem::transmute::<_, HANDLE>(handle), + buf, + count as u32, + &mut read_bytes, + &mut overlapped, + ) + .as_bool() + { + let err = GetLastError(); + if err != ERROR_HANDLE_EOF { + return -1; + } + } + } + + read_bytes as i32 +} + +#[cfg(unix)] +extern "C" fn cl_pread( + handle: *mut os::raw::c_void, + buf: *mut os::raw::c_void, + count: usize, + offset: os::raw::c_long, +) -> os::raw::c_long { + unsafe { + libc::pread(handle as i32, buf, count, offset) + .try_into() + .unwrap() + } +} + +/// A safer abstraction around ClamAV's cl_fmap_t. +#[derive(Clone)] +pub struct Fmap { + handle: Arc>, +} + +pub(crate) struct FmapHandle { + source: Option, + pub(crate) fmap: *mut cl_fmap_t, +} + +pub enum FmapSource { + Vec(Vec), + File(std::fs::File), +} + +impl From> for Fmap { + fn from(vec: Vec) -> Self { + let fmap = unsafe { cl_fmap_open_memory(vec.as_ptr() as *const c_void, vec.len()) }; + + Self { + handle: Arc::new(Mutex::new(FmapHandle { + source: Some(FmapSource::Vec(vec)), + fmap, + })), + } + } +} + +impl TryFrom for Fmap { + type Error = std::io::Error; + + fn try_from(file: File) -> std::result::Result { + let offset = 0; + let len = file.metadata()?.len(); + let aging = true; + Ok(Self::from_file(file, offset, len as usize, aging)) + } +} + +impl Fmap { + pub fn from_file(file: File, offset: usize, len: usize, aging: bool) -> Self { + #[cfg(unix)] + let fd = file.as_raw_fd(); + #[cfg(windows)] + let fd = file.as_raw_handle(); + let fmap = unsafe { + cl_fmap_open_handle(fd as *mut c_void, offset, len, Some(cl_pread), aging.into()) + }; + Self { + handle: Arc::new(Mutex::new(FmapHandle { + fmap, + source: Some(FmapSource::File(file)), + })), + } + } + + pub(crate) fn handle(&self) -> Arc> { + self.handle.clone() + } + + /// Reclaim the underlying structure from which the Fmap was created + + pub async fn into_inner(self) -> FmapSource { + let mut handle = self.handle.lock().await; + handle.source.take().unwrap() + } +} + +impl Drop for FmapHandle { + fn drop(&mut self) { + unsafe { cl_fmap_close(self.fmap) } + } +} + +unsafe impl Send for FmapHandle {} diff --git a/src/layer_attr.rs b/src/layer_attr.rs new file mode 100644 index 0000000..adf5866 --- /dev/null +++ b/src/layer_attr.rs @@ -0,0 +1,14 @@ +use bitflags::bitflags; + +bitflags! { + #[repr(C)] + #[derive(Default)] + /// Bitfield representing attributes of a file layer encountered during file + /// inspection + pub struct LayerAttributes: u32 { + /// Layer has been normalized + const NORMALIZED = clamav_sys::LAYER_ATTRIBUTES_NORMALIZED; + /// Layer was decrypted, or contained within another decrypted layer + const DECRYPTED = clamav_sys::LAYER_ATTRIBUTES_DECRYPTED; + } +} diff --git a/src/lib.rs b/src/lib.rs index e69de29..dd63ab6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -0,0 +1,165 @@ +/// Callback support structures and support functions +pub mod callback; +pub mod db; +pub mod engine; +mod error; +pub mod fmap; +/// File inspection layer attributes +pub mod layer_attr; +pub mod scan_settings; +pub mod version; + +/// Signature database processing +pub mod cvd; + +#[cfg(windows)] +pub mod windows_fd; + +use clamav_sys::{cl_error_t, cl_init, cl_initialize_crypto}; +pub use error::ClamError; +use lazy_static::lazy_static; +use std::{ + ffi::CStr, + pin::Pin, + sync::{Arc, Mutex, Once}, +}; +use tokio::io::AsyncRead; + +lazy_static! { + /// Optional function to call for message callbacks + static ref CLAMAV_MESSAGE_CALLBACK: Arc>> = Arc::new(Mutex::new(None)); +} + +/// Initializes clamav +/// +/// This must be called once per process. This is safe to call multiple times. +pub fn initialize() -> Result<(), ClamError> { + // the cl_init implementation isn't thread-safe, which is painful for tests + static ONCE: Once = Once::new(); + static mut RESULT: cl_error_t = cl_error_t::CL_SUCCESS; + unsafe { + ONCE.call_once(|| { + RESULT = cl_init(clamav_sys::CL_INIT_DEFAULT); + // this function always returns OK + if RESULT == cl_error_t::CL_SUCCESS { + cl_initialize_crypto(); + libc::atexit(cleanup); + } + }); + + extern "C" fn cleanup() { + unsafe { + clamav_sys::cl_cleanup_crypto(); + } + } + + match RESULT { + cl_error_t::CL_SUCCESS => Ok(()), + _ => Err(ClamError::new(RESULT)), + } + } +} + +pub fn version() -> String { + let ver = unsafe { clamav_sys::cl_retver() }; + if ver.is_null() { + "".to_string() + } else { + unsafe { std::ffi::CStr::from_ptr(ver).to_string_lossy().to_string() } + } +} + +pub type MsgCallback = Box; + +/// Specify a callback to execute when libclamav would emit a message to the +/// console +/// +/// Note that the libclamav APIs do not permit restoring the default handler. +pub fn set_msg_callback(cb: MsgCallback) { + unsafe { + *(CLAMAV_MESSAGE_CALLBACK.lock().unwrap()) = Some(cb); + clamav_sys::cl_set_clcb_msg(Some(clcb_msg_wrapper)); + } +} + +/// +/// Check whether the libclamav message callback has been overriden (which it +/// should be if this function is being called). If so, safely capture a +/// C-string message emitted by libclamav (converting any non-UTF-8 content to +/// "safe" replacements) and pass to the previously-specified callback. +/// +unsafe extern "C" fn clcb_msg_wrapper( + severity: clamav_sys::cl_msg, + fullmsg: *const i8, + msg: *const i8, + _context: *mut libc::c_void, +) { + // Remap the log level to "standard" Rust log levels + let log_level = match severity { + clamav_sys::cl_msg::CL_MSG_WARN => log::Level::Warn, + clamav_sys::cl_msg::CL_MSG_ERROR => log::Level::Error, + _ => log::Level::Info, + }; + if let Ok(cb) = CLAMAV_MESSAGE_CALLBACK.lock() { + if let Some(cb) = &*cb { + // Convert the provided C-strings into safe types + let fullmsg = CStr::from_ptr(fullmsg).to_string_lossy().to_string(); + let msg = CStr::from_ptr(msg).to_string_lossy().to_string(); + cb(log_level, &fullmsg, &msg); + } else { + // This function shouldn't fire when the callback has been set to None + unreachable!() + } + } +} + +/// A type defining the trait object returned in the FileInspect event that +/// allows access to embedded file content. +pub type ContentHandle = Pin>; + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap; + + lazy_static! { + // A global that can be modified in tests + static ref TEST_STORE: Arc>> = Arc::new(Mutex::new(HashMap::new())); + } + + #[test] + fn initialize_success() { + assert!(initialize().is_ok(), "initialize should succeed"); + } + + #[tokio::test] + async fn clcb_msg_override() { + const KEY: &str = module_path!(); + + { + let mut test_store = TEST_STORE.lock().unwrap(); + (*test_store).insert(KEY.into(), "".into()); + } + + fn cb(_severity: log::Level, _fullmsg: &str, msg: &str) { + let mut test_store = TEST_STORE.lock().unwrap(); + (*test_store).insert(KEY.into(), msg.into()); + } + + // Override the message callback + set_msg_callback(Box::new(cb)); + + // Force an error + let clam_engine = crate::engine::Engine::new(); + if clam_engine.load_databases("/no-such-path").await.is_ok() { + panic!("database load should have failed") + } + + // Check that the message callback captured the error + let test_store = TEST_STORE.lock().unwrap(); + let msg = (*test_store) + .get(KEY) + .expect(concat!("value of ", module_path!())); + assert!(msg.contains("/no-such-path")); + } +} diff --git a/src/scan_settings.rs b/src/scan_settings.rs new file mode 100644 index 0000000..1d48da0 --- /dev/null +++ b/src/scan_settings.rs @@ -0,0 +1,491 @@ +#![allow(dead_code)] + +use clamav_sys::{ + cl_scan_options, CL_SCAN_DEV_COLLECT_PERFORMANCE_INFO, CL_SCAN_DEV_COLLECT_SHA, + CL_SCAN_GENERAL_ALLMATCHES, CL_SCAN_GENERAL_COLLECT_METADATA, CL_SCAN_GENERAL_HEURISTICS, + CL_SCAN_GENERAL_HEURISTIC_PRECEDENCE, CL_SCAN_GENERAL_UNPRIVILEGED, CL_SCAN_HEURISTIC_BROKEN, + CL_SCAN_HEURISTIC_ENCRYPTED_ARCHIVE, CL_SCAN_HEURISTIC_ENCRYPTED_DOC, + CL_SCAN_HEURISTIC_EXCEEDS_MAX, CL_SCAN_HEURISTIC_MACROS, CL_SCAN_HEURISTIC_PARTITION_INTXN, + CL_SCAN_HEURISTIC_PHISHING_CLOAK, CL_SCAN_HEURISTIC_PHISHING_SSL_MISMATCH, + CL_SCAN_HEURISTIC_STRUCTURED, CL_SCAN_HEURISTIC_STRUCTURED_CC, + CL_SCAN_HEURISTIC_STRUCTURED_SSN_NORMAL, CL_SCAN_HEURISTIC_STRUCTURED_SSN_STRIPPED, + CL_SCAN_MAIL_PARTIAL_MESSAGE, CL_SCAN_PARSE_ARCHIVE, CL_SCAN_PARSE_ELF, CL_SCAN_PARSE_HTML, + CL_SCAN_PARSE_HWP3, CL_SCAN_PARSE_MAIL, CL_SCAN_PARSE_OLE2, CL_SCAN_PARSE_PDF, + CL_SCAN_PARSE_PE, CL_SCAN_PARSE_SWF, CL_SCAN_PARSE_XMLDOCS, +}; + +use bitflags::bitflags; + +bitflags! { + #[repr(C)] + pub struct GeneralFlags: u32 { + /// scan in all-match mode + const CL_SCAN_GENERAL_ALLMATCHES = CL_SCAN_GENERAL_ALLMATCHES; + /// collect metadata (--gen-json) + const CL_SCAN_GENERAL_COLLECT_METADATA = CL_SCAN_GENERAL_COLLECT_METADATA; + /// option to enable heuristic alerts + const CL_SCAN_GENERAL_HEURISTICS = CL_SCAN_GENERAL_HEURISTICS; + /// allow heuristic match to take precedence. + const CL_SCAN_GENERAL_HEURISTIC_PRECEDENCE = CL_SCAN_GENERAL_HEURISTIC_PRECEDENCE; + /// scanner will not have read access to files. + const CL_SCAN_GENERAL_UNPRIVILEGED = CL_SCAN_GENERAL_UNPRIVILEGED; + } +} + +bitflags! { + #[repr(C)] + pub struct ParseFlags : u32 { + const CL_SCAN_PARSE_ARCHIVE = CL_SCAN_PARSE_ARCHIVE; + const CL_SCAN_PARSE_ELF = CL_SCAN_PARSE_ELF; + const CL_SCAN_PARSE_PDF = CL_SCAN_PARSE_PDF; + const CL_SCAN_PARSE_SWF = CL_SCAN_PARSE_SWF; + const CL_SCAN_PARSE_HWP3 = CL_SCAN_PARSE_HWP3; + const CL_SCAN_PARSE_XMLDOCS = CL_SCAN_PARSE_XMLDOCS; + const CL_SCAN_PARSE_MAIL = CL_SCAN_PARSE_MAIL; + const CL_SCAN_PARSE_OLE2 = CL_SCAN_PARSE_OLE2; + const CL_SCAN_PARSE_HTML = CL_SCAN_PARSE_HTML; + const CL_SCAN_PARSE_PE = CL_SCAN_PARSE_PE; + } +} + +bitflags! { + #[repr(C)] + pub struct HeuristicFlags : u32 { + /// alert on broken PE and broken ELF files + const CL_SCAN_HEURISTIC_BROKEN = CL_SCAN_HEURISTIC_BROKEN; + /// alert when files exceed scan limits (filesize, max scansize, or max recursion depth) + const CL_SCAN_HEURISTIC_EXCEEDS_MAX = CL_SCAN_HEURISTIC_EXCEEDS_MAX; + /// alert on SSL mismatches + const CL_SCAN_HEURISTIC_PHISHING_SSL_MISMATCH = CL_SCAN_HEURISTIC_PHISHING_SSL_MISMATCH; + /// alert on cloaked URLs in emails + const CL_SCAN_HEURISTIC_PHISHING_CLOAK = CL_SCAN_HEURISTIC_PHISHING_CLOAK; + /// alert on OLE2 files containing macros + const CL_SCAN_HEURISTIC_MACROS = CL_SCAN_HEURISTIC_MACROS; + /// alert if archive is encrypted (rar, zip, etc) + const CL_SCAN_HEURISTIC_ENCRYPTED_ARCHIVE = CL_SCAN_HEURISTIC_ENCRYPTED_ARCHIVE; + /// alert if a document is encrypted (pdf, docx, etc) + const CL_SCAN_HEURISTIC_ENCRYPTED_DOC = CL_SCAN_HEURISTIC_ENCRYPTED_DOC; + /// alert if partition table size doesn't make sense + const CL_SCAN_HEURISTIC_PARTITION_INTXN = CL_SCAN_HEURISTIC_PARTITION_INTXN; + /// data loss prevention options, i.e. alert when detecting personal information + const CL_SCAN_HEURISTIC_STRUCTURED = CL_SCAN_HEURISTIC_STRUCTURED; + /// alert when detecting social security numbers + const CL_SCAN_HEURISTIC_STRUCTURED_SSN_NORMAL = CL_SCAN_HEURISTIC_STRUCTURED_SSN_NORMAL; + /// alert when detecting stripped social security numbers + const CL_SCAN_HEURISTIC_STRUCTURED_SSN_STRIPPED = CL_SCAN_HEURISTIC_STRUCTURED_SSN_STRIPPED; + /// alert when detecting credit card numbers + const CL_SCAN_HEURISTIC_STRUCTURED_CC = CL_SCAN_HEURISTIC_STRUCTURED_CC; + } +} + +bitflags! { + #[repr(C)] + pub struct MailFlags : u32 { + const CL_SCAN_MAIL_PARTIAL_MESSAGE = CL_SCAN_MAIL_PARTIAL_MESSAGE; + } +} + +bitflags! { + #[repr(C)] + pub struct DevFlags : u32 { + /// Enables hash output in sha-collect builds - for internal use only + const CL_SCAN_DEV_COLLECT_SHA = CL_SCAN_DEV_COLLECT_SHA; + /// collect performance timings + const CL_SCAN_DEV_COLLECT_PERFORMANCE_INFO = CL_SCAN_DEV_COLLECT_PERFORMANCE_INFO; + } +} + +#[derive(Default)] +pub struct ScanSettings { + pub settings: cl_scan_options, +} + +impl ScanSettings { + pub fn general(&self) -> GeneralFlags { + GeneralFlags::from_bits(self.settings.general).unwrap() + } + pub fn set_general(&mut self, flags: GeneralFlags) { + self.settings.general = flags.bits(); + } + pub fn parse(&self) -> ParseFlags { + ParseFlags::from_bits(self.settings.parse).unwrap() + } + pub fn set_parse(&mut self, flags: ParseFlags) { + self.settings.parse = flags.bits(); + } + pub fn heuristic(&self) -> HeuristicFlags { + HeuristicFlags::from_bits(self.settings.heuristic).unwrap() + } + pub fn set_heuristic(&mut self, flags: HeuristicFlags) { + self.settings.heuristic = flags.bits(); + } + pub fn mail(&self) -> MailFlags { + MailFlags::from_bits(self.settings.mail).unwrap() + } + pub fn set_mail(&mut self, flags: MailFlags) { + self.settings.mail = flags.bits(); + } + pub fn dev(&self) -> DevFlags { + DevFlags::from_bits(self.settings.dev).unwrap() + } + pub fn set_dev(&mut self, flags: DevFlags) { + self.settings.dev = flags.bits(); + } +} + +impl ToString for ScanSettings { + fn to_string(&self) -> String { + let mut flag_names = Vec::::new(); + + let general_flags = vec![ + ( + GeneralFlags::CL_SCAN_GENERAL_ALLMATCHES, + "CL_SCAN_GENERAL_ALLMATCHES", + ), + ( + GeneralFlags::CL_SCAN_GENERAL_COLLECT_METADATA, + "CL_SCAN_GENERAL_COLLECT_METADATA", + ), + ( + GeneralFlags::CL_SCAN_GENERAL_HEURISTICS, + "CL_SCAN_GENERAL_HEURISTICS", + ), + ( + GeneralFlags::CL_SCAN_GENERAL_HEURISTIC_PRECEDENCE, + "CL_SCAN_GENERAL_HEURISTIC_PRECEDENCE", + ), + ( + GeneralFlags::CL_SCAN_GENERAL_UNPRIVILEGED, + "CL_SCAN_GENERAL_UNPRIVILEGED", + ), + ]; + let parse_flags = vec![ + (ParseFlags::CL_SCAN_PARSE_ARCHIVE, "CL_SCAN_PARSE_ARCHIVE"), + (ParseFlags::CL_SCAN_PARSE_ELF, "CL_SCAN_PARSE_ELF"), + (ParseFlags::CL_SCAN_PARSE_PDF, "CL_SCAN_PARSE_PDF"), + (ParseFlags::CL_SCAN_PARSE_SWF, "CL_SCAN_PARSE_SWF"), + (ParseFlags::CL_SCAN_PARSE_HWP3, "CL_SCAN_PARSE_HWP3"), + (ParseFlags::CL_SCAN_PARSE_XMLDOCS, "CL_SCAN_PARSE_XMLDOCS"), + (ParseFlags::CL_SCAN_PARSE_MAIL, "CL_SCAN_PARSE_MAIL"), + (ParseFlags::CL_SCAN_PARSE_OLE2, "CL_SCAN_PARSE_OLE2"), + (ParseFlags::CL_SCAN_PARSE_HTML, "CL_SCAN_PARSE_HTML"), + (ParseFlags::CL_SCAN_PARSE_PE, "CL_SCAN_PARSE_PE"), + ]; + let heuristic_flags = vec![ + ( + HeuristicFlags::CL_SCAN_HEURISTIC_BROKEN, + "CL_SCAN_HEURISTIC_BROKEN", + ), + ( + HeuristicFlags::CL_SCAN_HEURISTIC_EXCEEDS_MAX, + "CL_SCAN_HEURISTIC_EXCEEDS_MAX", + ), + ( + HeuristicFlags::CL_SCAN_HEURISTIC_PHISHING_SSL_MISMATCH, + "CL_SCAN_HEURISTIC_PHISHING_SSL_MISMATCH", + ), + ( + HeuristicFlags::CL_SCAN_HEURISTIC_PHISHING_CLOAK, + "CL_SCAN_HEURISTIC_PHISHING_CLOAK", + ), + ( + HeuristicFlags::CL_SCAN_HEURISTIC_MACROS, + "CL_SCAN_HEURISTIC_MACROS", + ), + ( + HeuristicFlags::CL_SCAN_HEURISTIC_ENCRYPTED_ARCHIVE, + "CL_SCAN_HEURISTIC_ENCRYPTED_ARCHIVE", + ), + ( + HeuristicFlags::CL_SCAN_HEURISTIC_ENCRYPTED_DOC, + "CL_SCAN_HEURISTIC_ENCRYPTED_DOC", + ), + ( + HeuristicFlags::CL_SCAN_HEURISTIC_PARTITION_INTXN, + "CL_SCAN_HEURISTIC_PARTITION_INTXN", + ), + ( + HeuristicFlags::CL_SCAN_HEURISTIC_STRUCTURED, + "CL_SCAN_HEURISTIC_STRUCTURED", + ), + ( + HeuristicFlags::CL_SCAN_HEURISTIC_STRUCTURED_SSN_NORMAL, + "CL_SCAN_HEURISTIC_STRUCTURED_SSN_NORMAL", + ), + ( + HeuristicFlags::CL_SCAN_HEURISTIC_STRUCTURED_SSN_STRIPPED, + "CL_SCAN_HEURISTIC_STRUCTURED_SSN_STRIPPED", + ), + ( + HeuristicFlags::CL_SCAN_HEURISTIC_STRUCTURED_CC, + "CL_SCAN_HEURISTIC_STRUCTURED_CC", + ), + ]; + + let mail_flags = vec![( + MailFlags::CL_SCAN_MAIL_PARTIAL_MESSAGE, + "CL_SCAN_MAIL_PARTIAL_MESSAGE", + )]; + + let dev_flags = vec![ + (DevFlags::CL_SCAN_DEV_COLLECT_SHA, "CL_SCAN_DEV_COLLECT_SHA"), + ( + DevFlags::CL_SCAN_DEV_COLLECT_PERFORMANCE_INFO, + "CL_SCAN_DEV_COLLECT_PERFORMANCE_INFO", + ), + ]; + + for (flag, name) in general_flags { + if self.general().contains(flag) { + flag_names.push(name.to_string()); + } + } + for (flag, name) in parse_flags { + if self.parse().contains(flag) { + flag_names.push(name.to_string()); + } + } + for (flag, name) in heuristic_flags { + if self.heuristic().contains(flag) { + flag_names.push(name.to_string()); + } + } + for (flag, name) in mail_flags { + if self.mail().contains(flag) { + flag_names.push(name.to_string()); + } + } + for (flag, name) in dev_flags { + if self.dev().contains(flag) { + flag_names.push(name.to_string()); + } + } + + flag_names.join(" ") + } +} + +pub struct ScanSettingsBuilder { + current: cl_scan_options, +} + +impl ScanSettingsBuilder { + pub fn new() -> Self { + ScanSettingsBuilder { + current: cl_scan_options::default(), + } + } + + pub fn build(&self) -> ScanSettings { + ScanSettings { + settings: self.current, + } + } + + /// Disable support for special files. + pub fn clear(&mut self) -> &mut Self { + self.current.parse = 0; + self + } + + /// Enable transparent scanning of various archive formats. + pub fn enable_archive(&mut self) -> &mut Self { + self.current.parse |= CL_SCAN_PARSE_ARCHIVE; + self + } + + /// Enable support for mail files. + pub fn enable_mail(&mut self) -> &mut Self { + self.current.parse |= CL_SCAN_PARSE_MAIL; + self + } + + /// Enable support for OLE2 containers (used by MS Office and .msi files). + pub fn enable_ole2(&mut self) -> &mut Self { + self.current.parse |= CL_SCAN_PARSE_OLE2; + self + } + + /// With this flag the library will mark encrypted archives as viruses (Encrypted.Zip, Encrypted.RAR). + pub fn block_encrypted(&mut self) -> &mut Self { + self.current.heuristic |= CL_SCAN_HEURISTIC_ENCRYPTED_ARCHIVE; + self + } + + /// Enable HTML normalisation (including ScrEnc decryption). + pub fn enable_html(&mut self) -> &mut Self { + self.current.parse |= CL_SCAN_PARSE_HTML; + self + } + + /// Enable deep scanning of Portable Executable files and allows libclamav to unpack executables compressed with run-time unpackers. + pub fn enable_pe(&mut self) -> &mut Self { + self.current.parse |= CL_SCAN_PARSE_PE; + self + } + + /// Try to detect broken executables and mark them as Broken.Executable. + pub fn block_broken_executables(&mut self) -> &mut Self { + self.current.heuristic |= CL_SCAN_HEURISTIC_BROKEN; + self + } + + /// Mark archives as viruses if maxfiles, maxfilesize, or maxreclevel limit is reached. + pub fn block_max_limit(&mut self) -> &mut Self { + self.current.heuristic |= CL_SCAN_HEURISTIC_EXCEEDS_MAX; + self + } + + /// Enable phishing module: always block SSL mismatches in URLs. + pub fn enable_phishing_blockssl(&mut self) -> &mut Self { + self.current.heuristic |= CL_SCAN_HEURISTIC_PHISHING_SSL_MISMATCH; + self + } + + /// Enable phishing module: always block cloaked URLs. + pub fn enable_phishing_blockcloak(&mut self) -> &mut Self { + self.current.heuristic |= CL_SCAN_HEURISTIC_PHISHING_CLOAK; + self + } + + /// Enable support for ELF files. + pub fn enable_elf(&mut self) -> &mut Self { + self.current.parse |= CL_SCAN_PARSE_ELF; + self + } + + /// Enable scanning within PDF files. + pub fn enable_pdf(&mut self) -> &mut Self { + self.current.parse |= CL_SCAN_PARSE_PDF; + self + } + + /// Enable the DLP module which scans for credit card and SSN numbers. + pub fn enable_structured(&mut self) -> &mut Self { + self.current.heuristic |= CL_SCAN_HEURISTIC_STRUCTURED; + self + } + + /// Enable search for SSNs formatted as xx-yy-zzzz. + pub fn enable_structured_ssn_normal(&mut self) -> &mut Self { + self.current.heuristic |= CL_SCAN_HEURISTIC_STRUCTURED_SSN_NORMAL; + self + } + + /// Enable search for SSNs formatted as xxyyzzzz. + pub fn enable_structured_ssn_stripped(&mut self) -> &mut Self { + self.current.heuristic |= CL_SCAN_HEURISTIC_STRUCTURED_SSN_STRIPPED; + self + } + + /// Enable scanning of RFC1341 messages split over many emails. + /// + /// You will need to periodically clean up $TemporaryDirectory/clamav-partial directory. + pub fn enable_partial_message(&mut self) -> &mut Self { + self.current.mail |= CL_SCAN_MAIL_PARTIAL_MESSAGE; + self + } + + /// Allow heuristic match to take precedence. When enabled, if a heuristic scan (such + /// as phishingScan) detects a possible virus/phish it will stop scan immediately. + /// + /// Recommended, saves CPU scan-time. When disabled, virus/phish detected by heuristic + /// scans will be reported only at the end of a scan. If an archive contains both a + /// heuristically detected virus/phishing, and a real malware, the real malware will be + /// reported. + pub fn enable_heuristic_precedence(&mut self) -> &mut Self { + self.current.general |= CL_SCAN_GENERAL_HEURISTIC_PRECEDENCE; + self + } + + /// OLE2 containers, which contain VBA macros will be marked infected (Heuris-tics.OLE2.ContainsMacros). + pub fn block_macros(&mut self) -> &mut Self { + self.current.heuristic |= CL_SCAN_HEURISTIC_MACROS; + self + } + + /// Enable scanning within SWF files, notably compressed SWF. + pub fn enable_swf(&mut self) -> &mut Self { + self.current.parse |= CL_SCAN_PARSE_SWF; + self + } + + /// Enable scanning of XML docs. + pub fn enable_xmldocs(&mut self) -> &mut Self { + self.current.parse |= CL_SCAN_PARSE_XMLDOCS; + self + } + + /// Enable scanning of HWP3 files. + pub fn enable_hwp3(&mut self) -> &mut Self { + self.current.parse |= CL_SCAN_PARSE_HWP3; + self + } +} + +impl Default for ScanSettingsBuilder { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn builder_defaults_to_standard_opts() { + let settings = ScanSettingsBuilder::new().build(); + assert_eq!(settings.settings, clamav_sys::cl_scan_options::default()); + } + + #[test] + fn builder_clear_success() { + let settings = ScanSettingsBuilder::new().clear().build(); + assert_eq!(settings.settings.general, 0); + assert_eq!(settings.settings.parse, 0); + assert_eq!(settings.settings.heuristic, 0); + assert_eq!(settings.settings.mail, 0); + assert_eq!(settings.settings.dev, 0); + } + + #[test] + fn builder_just_pdf_success() { + let settings = ScanSettingsBuilder::new().clear().enable_pdf().build(); + assert_eq!(settings.settings.parse, CL_SCAN_PARSE_PDF); + } + + #[test] + fn builder_normal_files_success() { + let settings = ScanSettingsBuilder::new() + .clear() + .enable_pdf() + .enable_html() + .enable_pe() + .build(); + assert_eq!( + settings.settings.parse, + CL_SCAN_PARSE_PDF | CL_SCAN_PARSE_HTML | CL_SCAN_PARSE_PE + ); + } + + #[test] + fn display_settings_standard_options_success() { + let string_settings = ScanSettings::default().to_string(); + assert!(string_settings.contains("CL_SCAN_PARSE_ARCHIVE")); + assert!(string_settings.contains("CL_SCAN_PARSE_MAIL")); + assert!(string_settings.contains("CL_SCAN_PARSE_OLE2")); + assert!(string_settings.contains("CL_SCAN_PARSE_PDF")); + assert!(string_settings.contains("CL_SCAN_PARSE_HTML")); + assert!(string_settings.contains("CL_SCAN_PARSE_PE")); + assert!(string_settings.contains("CL_SCAN_PARSE_ELF")); + assert!(string_settings.contains("CL_SCAN_PARSE_SWF")); + assert!(string_settings.contains("CL_SCAN_PARSE_XMLDOCS")); + } + + #[test] + fn settings_default_to_standard() { + let settings: ScanSettings = Default::default(); + assert_eq!(settings.settings, cl_scan_options::default()); + } +} diff --git a/src/version.rs b/src/version.rs new file mode 100644 index 0000000..8592ee9 --- /dev/null +++ b/src/version.rs @@ -0,0 +1,43 @@ +use std::ffi::CStr; +use std::str; + +/// Returns the database version level that the engine supports +pub fn flevel() -> u32 { + unsafe { clamav_sys::cl_retflevel() } +} + +/// Gets the clamav engine version +/// +/// # Example +/// +/// ``` +/// use clamav_async::version; +/// +/// println!("Running version {} flevel {}", version::version(), version::flevel()); +/// ``` +pub fn version() -> String { + unsafe { + let ptr = clamav_sys::cl_retver(); + let bytes = CStr::from_ptr(ptr).to_bytes(); + str::from_utf8(bytes) + .expect("Invalid UTF8 string") + .to_string() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn version_success() { + crate::initialize().expect("initialize should succeed"); + assert!(!version().is_empty(), "expected a version"); + } + + #[test] + fn flevel_success() { + crate::initialize().expect("initialize should succeed"); + assert!(flevel() > 0, "expected an flevel"); + } +} diff --git a/src/windows_fd.rs b/src/windows_fd.rs new file mode 100644 index 0000000..6769345 --- /dev/null +++ b/src/windows_fd.rs @@ -0,0 +1,67 @@ +use std::io; +use std::mem; +use std::os::raw; + +#[cfg(windows)] +use bindings::Windows::{ + Win32::System::SystemServices::{HANDLE, INVALID_HANDLE_VALUE}, + Win32::System::Threading::GetCurrentProcess, + Win32::System::WindowsProgramming::DuplicateHandle, + Win32::System::WindowsProgramming::DUPLICATE_SAME_ACCESS, +}; + +extern "C" { + // https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/open-osfhandle?view=msvc-160 + fn _open_osfhandle(osfhandle: isize, flags: raw::c_int) -> raw::c_int; + + // https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/close?view=msvc-160 + fn _close(fd: raw::c_int) -> raw::c_int; +} + +pub const _O_RDONLY: raw::c_int = 0; + +pub struct WindowsFd(i32); + +impl WindowsFd { + pub fn new(handle: std::os::windows::io::RawHandle) -> io::Result { + let mut owned_handle = INVALID_HANDLE_VALUE; + unsafe { + if DuplicateHandle( + GetCurrentProcess(), + std::mem::transmute::<_, HANDLE>(handle), + GetCurrentProcess(), + &mut owned_handle, + 0, + false, + DUPLICATE_SAME_ACCESS, + ) + .as_bool() + == false + { + return Err(io::Error::last_os_error()); + } + + let fd = _open_osfhandle(mem::transmute(owned_handle), _O_RDONLY); + if fd == -1 { + Err(io::Error::new( + io::ErrorKind::InvalidInput, + "Error converting Windows HANDLE to file descriptor", + )) + } else { + Ok(WindowsFd(fd)) + } + } + } + + pub fn raw(&self) -> i32 { + self.0 + } +} + +impl Drop for WindowsFd { + fn drop(&mut self) { + unsafe { + let _ = _close(self.0); + } + } +} diff --git a/test_data/build-database.sh b/test_data/build-database.sh new file mode 100755 index 0000000..d0ca93b --- /dev/null +++ b/test_data/build-database.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +testDir="/tmp/rust-clam-av-testing" +testDataDir=$(pwd) + +install -d "${testDir}" +pushd "${testDir}" +echo testing > COPYING +sigtool --md5 "${testDataDir}/files/naughty_file" > example.hdb +install -d out + +SIGNDUSER=me sigtool --unsigned --datadir=. --build=example.cud --max-bad-sigs 0 --cvd-version 1 + +cp example.cud "${testDataDir}/database" +popd \ No newline at end of file diff --git a/test_data/database/example.cud b/test_data/database/example.cud new file mode 100644 index 0000000000000000000000000000000000000000..c4e356c7550b7da3ac09d7714da9ac1af77942a3 GIT binary patch literal 770 zcmZ?HNz8Q&(+zWRvNAMK@G8wyFfuT-P%tvoH854sHZ(9WurdTf3sb9Pv!v7{OAB*L zGs83sON$f>Q%iH>v=qZsQzOGfOUopy2&>#wD??Kw3o|1Nb5jF_Q7{BTK)#!U0R)(s z-%hpnXEx+HYW+RU!@2%BiJoLVpA;o+3DW)?&liw|Wyescf{UrOtKkS~ouG7l*&K7=U{4;#T zj;hTpyA^wG|4pl}R^~W+I5Ugmf@zENgJ)cf4gmo&ryco>(j3*&PeitTJ^!{OSLAK0 z(6Tv;{LKWGKMF2%Y)#26bLr^uIM&7!VYIx%$~h@0ZXw%jpU$n#dNcm7>+Q~#w)y`5 z0$A9PhebxOqVc;3G%-M Date: Tue, 7 Nov 2023 10:27:29 -0800 Subject: [PATCH 02/12] Address most clippy "pedantic" lints --- Cargo.toml | 2 +- src/callback.rs | 59 +++---- src/cvd.rs | 18 +-- src/cvd/head_libclamav.rs | 18 +-- src/cvd/head_native.rs | 56 +++---- src/db.rs | 5 + src/engine.rs | 320 +++++++++++++++++++++----------------- src/error.rs | 39 ++--- src/fmap.rs | 56 +++---- src/lib.rs | 39 +++-- src/scan_settings.rs | 57 ++++--- src/version.rs | 7 +- 12 files changed, 359 insertions(+), 317 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 083ea13..40e3781 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,7 +38,7 @@ tokio = { version = "1", features = ["sync", "rt", "macros"] } tokio-stream = { version = "0.1" } [dependencies.clamav-sys] -path = "../clamav-sys" +git = "https://github.com/Cisco-Talos/clamav-sys.git" [target.'cfg(windows)'.dependencies] bindings = { version = "0.5.5", package = "clamav-rs-bindings" } diff --git a/src/callback.rs b/src/callback.rs index 5f0d906..0fa86fa 100644 --- a/src/callback.rs +++ b/src/callback.rs @@ -1,4 +1,4 @@ -use crate::{engine::ScanEvent, layer_attr::LayerAttributes, ClamError, ContentHandle}; +use crate::{engine::ScanEvent, layer_attr::LayerAttributes, ContentHandle, EngineError}; use clamav_sys::cl_error_t; use std::{ ffi::CStr, @@ -9,7 +9,7 @@ use std::{ /// A type defining a closure or function that, when given a recursion depth, /// file type, optional file name, and file size, returns whether or not the /// content should be duplicated into a buffer that can be passed via -/// FileInspect messages. +/// `FileInspect` messages. type ShouldCopyFileBuffer = Box Fn(u32, &'a str, Option<&'a str>, usize) -> bool>; /// A wrapper structure around the context passed to callbacks that execute with scans @@ -39,8 +39,9 @@ pub(crate) unsafe extern "C" fn progress( context: *mut c_void, ) -> cl_error_t { // All errors are handled silently as there is no other means to report errors - if let Some(sender) = - (context as *mut tokio::sync::mpsc::Sender>).as_ref() + if let Some(sender) = context + .cast::>>() + .as_ref() { let _ = sender.blocking_send(Progress::Update { total_items, @@ -58,7 +59,7 @@ pub(crate) unsafe extern "C" fn engine_pre_scan( type_: *const c_char, context: *mut c_void, ) -> cl_error_t { - if let Some(cxt) = (context as *mut ScanCbContext).as_ref() { + if let Some(cxt) = context.cast::().as_ref() { let file_type = CStr::from_ptr(type_).to_string_lossy(); let _ = cxt.sender.blocking_send(ScanEvent::PreScan { @@ -76,7 +77,7 @@ pub(crate) unsafe extern "C" fn engine_post_scan( virname: *const c_char, context: *mut c_void, ) -> cl_error_t { - if let Some(cxt) = (context as *mut ScanCbContext).as_ref() { + if let Some(cxt) = context.cast::().as_ref() { let result = result as isize; let match_name = if virname.is_null() { String::from("") @@ -99,7 +100,7 @@ pub(crate) unsafe extern "C" fn engine_virus_found( virname: *const c_char, context: *mut c_void, ) { - if let Some(cxt) = (context as *mut ScanCbContext).as_ref() { + if let Some(cxt) = context.cast::().as_ref() { let name = CStr::from_ptr(virname).to_string_lossy().into(); let _ = cxt.sender.blocking_send(ScanEvent::MatchFound { @@ -116,7 +117,7 @@ pub(crate) unsafe extern "C" fn engine_file_inspection( type_: *const c_char, c_ancestors: *mut *const c_char, parent_file_size: usize, - filename: *const c_char, + file_name: *const c_char, file_size: usize, file_buffer: *const c_char, recursion_level: u32, @@ -125,9 +126,9 @@ pub(crate) unsafe extern "C" fn engine_file_inspection( ) -> cl_error_t { // NOTE: this function is probably doing too much work generating structures // that won't be used. TALOSAV-28 offers a solution. - if let Some(cxt) = (context as *mut ScanCbContext).as_ref() { + if let Some(cxt) = context.cast::().as_ref() { let file_type: String = CStr::from_ptr(type_).to_string_lossy().into(); - let file_name = filename + let file_name = file_name .as_ref() .map(|p| CStr::from_ptr(p)) .map(CStr::to_string_lossy) @@ -136,20 +137,22 @@ pub(crate) unsafe extern "C" fn engine_file_inspection( let layer_attrs = LayerAttributes::from_bits(layer_attributes).unwrap_or_default(); let mut ancestors = vec![]; - if !c_ancestors.is_null() { - for i in 0..recursion_level { - let ancestor = *(c_ancestors.offset(i as isize)); - if ancestor.is_null() { - ancestors.push(None); - } else { - let ancestor = CStr::from_ptr(ancestor).to_string_lossy(); - ancestors.push(Some(ancestor.into())); + if let Ok(recursion_level) = isize::try_from(recursion_level) { + if !c_ancestors.is_null() { + for i in 0..recursion_level { + let ancestor = *(c_ancestors.offset(i)); + if ancestor.is_null() { + ancestors.push(None); + } else { + let ancestor = CStr::from_ptr(ancestor).to_string_lossy(); + ancestors.push(Some(ancestor.into())); + } } } } // Duplicate the content buffer? - let mut content = None; + let mut scanned_content = None; if let Some(cb) = &cxt.should_copy_file_buffer { // Never include content for the root document. That should be known to the caller already. if cb( @@ -159,7 +162,7 @@ pub(crate) unsafe extern "C" fn engine_file_inspection( file_size, ) { let buffer = unsafe { - core::slice::from_raw_parts(file_buffer as *const c_uchar, file_size) + core::slice::from_raw_parts(file_buffer.cast::(), file_size) } .to_vec(); // NOTE: the content is provided as a trait object that @@ -168,7 +171,7 @@ pub(crate) unsafe extern "C" fn engine_file_inspection( // "lightweight" object, such as a file handle or socket, or // perhaps a ref-counted buffer that releases its reference once // completely read. - content = Some(Box::pin(Cursor::new(buffer)) as ContentHandle) + scanned_content = Some(Box::pin(Cursor::new(buffer)) as ContentHandle); } } @@ -181,7 +184,7 @@ pub(crate) unsafe extern "C" fn engine_file_inspection( recursion_level, layer_attrs, ancestors, - content, + content: scanned_content, }); } @@ -192,19 +195,19 @@ pub(crate) unsafe extern "C" fn engine_file_inspection( fn dup_fd_to_file(fd: c_int) -> Option { use std::os::unix::prelude::FromRawFd; - if fd != -1 { + if fd == -1 { + None + } else { // dup the file descriptor first in case this message isn't handled // before it's closed. The file will be closed when the containing // message is discarded. let new_fd = unsafe { libc::dup(fd) }; - if new_fd != -1 { - Some(unsafe { std::fs::File::from_raw_fd(new_fd) }) - } else { + if new_fd == -1 { // TODO: log a warning? Or embed error in FileInspect message? None + } else { + Some(unsafe { std::fs::File::from_raw_fd(new_fd) }) } - } else { - None } } diff --git a/src/cvd.rs b/src/cvd.rs index ae64ecb..a168ed9 100644 --- a/src/cvd.rs +++ b/src/cvd.rs @@ -7,19 +7,19 @@ pub mod head_libclamav; pub mod head_native; #[cfg(not(feature = "native-impl"))] -pub use head_libclamav::CvdHdr; +pub use head_libclamav::Header; #[cfg(feature = "native-impl")] -pub use head_native::CvdHdr; +pub use head_native::Header; -pub trait CvdMeta { +pub trait Meta { /// Load fromm the initial bytes found at the beginning of the CVD/CLD - fn from_header_bytes(bytes: &[u8; 512]) -> Result + fn from_header_bytes(bytes: &[u8; 512]) -> Result where Self: Sized; /// Obtain a CVD/CLD header from an open file - fn from_file(fh: &mut File) -> Result + fn from_file(fh: &mut File) -> Result where Self: Sized, { @@ -31,7 +31,7 @@ pub trait CvdMeta { } /// Obtain a CVD/CLD header from the specified path - fn from_path(path: &Path) -> Result + fn from_path(path: &Path) -> Result where Self: Sized, { @@ -60,12 +60,12 @@ pub trait CvdMeta { /// Database builder's ID fn builder(&self) -> Cow<'_, str>; - /// Creation time as seconds - fn stime(&self) -> usize; + /// Creation time as seconds since Unix epoch + fn stime(&self) -> u64; } #[derive(Debug, Error)] -pub enum CvdHeadError { +pub enum HeadError { /// Generic error from the libclamav parser. Unfortunately, it outputs its /// error via a message #[error("unable to parse (see log output)")] diff --git a/src/cvd/head_libclamav.rs b/src/cvd/head_libclamav.rs index d9a1944..58f29f7 100644 --- a/src/cvd/head_libclamav.rs +++ b/src/cvd/head_libclamav.rs @@ -1,20 +1,20 @@ -use super::{CvdHeadError, CvdMeta}; +use super::{HeadError, Meta}; use std::{borrow::Cow, ffi::CStr}; /// The header of a CVD -pub struct CvdHdr(*mut clamav_sys::cl_cvd); +pub struct Header(*mut clamav_sys::cl_cvd); -impl CvdMeta for CvdHdr { +impl Meta for Header { /// Parse a CVD header from a buffer obtained from the beginning of a CVD /// (or CLD) file - fn from_header_bytes(bytes: &[u8; 512]) -> Result { + fn from_header_bytes(bytes: &[u8; 512]) -> Result { unsafe { let raw = clamav_sys::cl_cvdparse(bytes.as_ptr() as *const i8); if raw.is_null() { - Err(CvdHeadError::Parse) + Err(HeadError::Parse) } else { - Ok(CvdHdr(raw)) + Ok(Header(raw)) } } } @@ -59,12 +59,12 @@ impl CvdMeta for CvdHdr { } /// Creation time as seconds - fn stime(&self) -> usize { - unsafe { (*self.0).stime as usize } + fn stime(&self) -> u64 { + unsafe { (*self.0).stime } } } -impl std::fmt::Debug for CvdHdr { +impl std::fmt::Debug for Header { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("CvdHead") .field("f_level", &self.f_level()) diff --git a/src/cvd/head_native.rs b/src/cvd/head_native.rs index 8c2c4ae..4eeadd2 100644 --- a/src/cvd/head_native.rs +++ b/src/cvd/head_native.rs @@ -1,4 +1,4 @@ -use super::{CvdHeadError, CvdMeta}; +use super::{HeadError, Meta}; use std::{ borrow::Cow, str::{self, FromStr}, @@ -15,7 +15,7 @@ const CVD_TIMESTAMP_FMT: &[time::format_description::FormatItem] = time::macros: "[day padding:zero] [month repr:short] [year] [hour]:[minute] [offset_hour][offset_minute]" ); -pub struct CvdHdr { +pub struct Header { version: usize, n_sigs: usize, f_level: usize, @@ -27,44 +27,43 @@ pub struct CvdHdr { md5_str: String, } -impl CvdMeta for CvdHdr { - fn from_header_bytes(bytes: &[u8; 512]) -> Result +impl Meta for Header { + fn from_header_bytes(bytes: &[u8; 512]) -> Result where Self: std::marker::Sized, { let mut fields = bytes .strip_prefix(CVD_HEAD_MAGIC) - .ok_or(CvdHeadError::BadMagic)? + .ok_or(HeadError::BadMagic)? .split(|b| *b == b':'); let creation_time_str = fields .next() .map(str::from_utf8) .transpose()? - .ok_or(CvdHeadError::MissingCreationTime)?; + .ok_or(HeadError::MissingCreationTime)?; let version = fields .next() .map(str::from_utf8) .transpose()? - .ok_or(CvdHeadError::MissingVersion)? + .ok_or(HeadError::MissingVersion)? .parse()?; let n_sigs: usize = fields .next() .map(str::from_utf8) .transpose()? - .ok_or(CvdHeadError::MissingNumberOfSigs)? + .ok_or(HeadError::MissingNumberOfSigs)? .parse()?; let f_level: usize = - str::from_utf8(fields.next().ok_or(CvdHeadError::MissingFLevel)?)?.parse()?; + str::from_utf8(fields.next().ok_or(HeadError::MissingFLevel)?)?.parse()?; // Just preserve this verbatim. let md5_str = fields .next() .map(str::from_utf8) .transpose()? - .ok_or(CvdHeadError::MissingMd5)? + .ok_or(HeadError::MissingMd5)? .into(); - let dsig = str::from_utf8(fields.next().ok_or(CvdHeadError::MissingDSig)?)?.into(); - let builder = - std::str::from_utf8(fields.next().ok_or(CvdHeadError::MissingBuilder)?)?.into(); + let dsig = str::from_utf8(fields.next().ok_or(HeadError::MissingDSig)?)?.into(); + let builder = std::str::from_utf8(fields.next().ok_or(HeadError::MissingBuilder)?)?.into(); // This field is not present in older signature database files. It // should be the last field (and will be padded out with spaces at the @@ -85,29 +84,31 @@ impl CvdMeta for CvdHdr { .transpose()? .map(|stime| UNIX_EPOCH.checked_add(Duration::from_secs(stime as u64))) // ...and check that that worked - .ok_or(CvdHeadError::STimeTooLarge)? + .ok_or(HeadError::STimeTooLarge)? // It's there, so this isn't an old DB - .map(|stime| Ok((stime, false))) - // Or it wasn't there, so this *is* an old DB - .unwrap_or_else(|| { - // Parse the string version, e.g.: "16 Sep 2021 08:32 -0400" - // Oddly, there are no seconds. So this is very much a custom format - time::OffsetDateTime::parse(creation_time_str, CVD_TIMESTAMP_FMT) - // And mark this as old-format - .map(|odt| (odt.into(), true)) - })?; + .map_or_else( + // It wasn't there, so this *is* an old DB + || { + // Parse the string version, e.g.: "16 Sep 2021 08:32 -0400" + // Oddly, there are no seconds. So this is very much a custom format + time::OffsetDateTime::parse(creation_time_str, CVD_TIMESTAMP_FMT) + // And mark this as old-format + .map(|odt| (odt.into(), true)) + }, + |stime| Ok((stime, false)), + )?; let ctime_str = time::OffsetDateTime::from(ctime) .format(CVD_TIMESTAMP_FMT) .expect("format timestamp"); Ok(Self { - ctime, version, n_sigs, f_level, dsig, builder, + ctime, is_old_db, ctime_str, md5_str, @@ -143,16 +144,17 @@ impl CvdMeta for CvdHdr { std::borrow::Cow::from(&self.builder) } - fn stime(&self) -> usize { + fn stime(&self) -> u64 { self.ctime .duration_since(UNIX_EPOCH) .expect("compute seconds since epoch") - .as_secs() as usize + .as_secs() } } -impl CvdHdr { +impl Header { /// Whether or not this is an old-format DB (no stime field in header) + #[must_use] pub fn is_old_db(&self) -> bool { self.is_old_db } diff --git a/src/db.rs b/src/db.rs index 6ebf097..53f9adf 100644 --- a/src/db.rs +++ b/src/db.rs @@ -4,6 +4,11 @@ use std::str; use clamav_sys::cl_retdbdir; /// Gets the default database directory for clamav +/// +/// # Panics +/// +/// Will panic if the default directory name is not valid UTF-8 +#[must_use] pub fn default_directory() -> String { unsafe { let ptr = cl_retdbdir(); diff --git a/src/engine.rs b/src/engine.rs index bd4903f..613b84b 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -1,7 +1,9 @@ -use crate::error::ClamError; +use crate::error::Error as ClamError; use clamav_sys::cl_engine_field; use clamav_sys::{cl_error_t, time_t}; +use core::num; use derivative::Derivative; +use std::ffi::NulError; use std::{path::Path, pin::Pin, sync::Arc, time}; #[cfg(windows)] @@ -27,10 +29,7 @@ pub enum ScanResult { } impl ScanResult { - pub(crate) fn from_ffi( - scan_result: cl_error_t, - c_virname: *const i8, - ) -> Result { + pub(crate) fn from_ffi(scan_result: cl_error_t, c_virname: *const i8) -> Result { use std::ffi::CStr; match scan_result { @@ -41,7 +40,7 @@ impl ScanResult { CStr::from_ptr(c_virname).to_string_lossy().to_string(), )) }, - code => Err(ClamError::new(code)), + code => Err(ClamError::new(code).into()), } } } @@ -75,11 +74,11 @@ pub enum ScanEvent { parent_file_size: usize, recursion_level: u32, }, - Result(Result), + Result(Result), } #[derive(Debug, PartialEq, Eq)] -pub enum EngineValueType { +pub enum ValueType { U32, U64, String, @@ -90,17 +89,21 @@ pub enum EngineValueType { pub struct ClamTime(time_t); impl ClamTime { + #[must_use] + // This function can't actually panic unless ClamTime (which is a time_t) is + // somehow larger than a u64 + #[allow(clippy::missing_panics_doc)] pub fn as_system_time(&self) -> time::SystemTime { if self.0 >= 0 { - time::UNIX_EPOCH + time::Duration::from_secs(self.0 as u64) + time::UNIX_EPOCH + time::Duration::from_secs(u64::try_from(self.0).unwrap()) } else { - time::UNIX_EPOCH - time::Duration::from_secs(-self.0 as u64) + time::UNIX_EPOCH - time::Duration::from_secs(u64::try_from(-self.0).unwrap()) } } } #[derive(Debug)] -pub enum EngineValue { +pub enum SettingsValue { U32(u32), U64(u64), String(String), @@ -128,6 +131,22 @@ impl EngineHandle { unsafe impl Send for EngineHandle {} unsafe impl Sync for EngineHandle {} +/// All errors that can be reported during engine configuration and execution. +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("libclamav error: {0}")] + Clam(#[from] ClamError), + + #[error("join error: {0}")] + Join(#[from] tokio::task::JoinError), + + #[error("string provided contains embedded NUL")] + Nul(#[from] NulError), + + #[error("unable to cast number: {0}")] + TryFromInt(#[from] num::TryFromIntError), +} + impl Engine { /// Initialises the engine pub fn new() -> Self { @@ -157,25 +176,25 @@ impl Engine { } } - /// Obtain a new reference to the wrapped EngineHandle. It must still be + /// Obtain a new reference to the wrapped `EngineHandle`. It must still be /// locked prior to use. fn handle(&self) -> Arc> { self.handle.clone() } - pub async fn compile(&self) -> Result<(), ClamError> { + /// Compile the loaded signatures + pub async fn compile(&self) -> Result<(), Error> { let engine_handle = self.handle(); tokio::task::spawn_blocking(move || ffi::compile(engine_handle.blocking_write().as_ptr())) - .await - .expect("join thread") + .await? } - /// An extended version of `compile()` that streams [crate::callback::Progress] events (concluding with a - /// [crate::callback::Progress::Result] event). + /// An extended version of `compile()` that streams [`crate::callback::Progress`] events (concluding with a + /// [`crate::callback::Progress::Result`] event). - pub async fn compile_with_progress( + pub fn compile_with_progress( &mut self, - ) -> tokio_stream::wrappers::ReceiverStream> { + ) -> tokio_stream::wrappers::ReceiverStream> { let (sender, receiver) = tokio::sync::mpsc::channel(128); let engine_handle = self.handle(); @@ -186,10 +205,10 @@ impl Engine { clamav_sys::cl_engine_set_clcb_engine_compile_progress( engine_handle.as_ptr(), Some(crate::callback::progress), - context as *mut libc::c_void, + context.cast::(), ); - let result = ffi::compile(engine_handle.as_ptr()); + let result = ffi::compile(engine_handle.as_ptr()).map_err(Error::from); // Clear the pointer from the libclamav engine context clamav_sys::cl_engine_set_clcb_engine_compile_progress( @@ -200,15 +219,13 @@ impl Engine { // Reclaim the sender let sender = Box::from_raw(context); - sender - .blocking_send(crate::callback::Progress::Complete(result)) - .expect("blocking send"); + sender.blocking_send(crate::callback::Progress::Complete(result)) }); receiver.into() } - pub async fn load_databases<'a, P>(&self, dbpath: &'a P) -> Result + pub async fn load_databases<'a, P>(&self, dbpath: &'a P) -> Result where P: 'a + ?Sized + AsRef, { @@ -220,15 +237,15 @@ impl Engine { result }) .await - .unwrap() + .map_err(Error::from)? } - /// An extended version of `load_databases()` that streams [crate::callback::Progress] events (concluding with - /// a [crate::callback::Progress::Result] event). - pub async fn load_databases_with_progress<'a, P>( + /// An extended version of `load_databases()` that streams [`crate::callback::Progress`] events (concluding with + /// a [`crate::callback::Progress::Result`] event). + pub fn load_databases_with_progress<'a, P>( &mut self, dbpath: &'a P, - ) -> tokio_stream::wrappers::ReceiverStream> + ) -> tokio_stream::wrappers::ReceiverStream> where P: 'a + ?Sized + AsRef, { @@ -243,16 +260,16 @@ impl Engine { clamav_sys::cl_engine_set_clcb_sigload_progress( engine_handle.as_ptr(), Some(crate::callback::progress), - context as *mut libc::c_void, + context.cast::(), ); - let result = ffi::load_databases(dbpath.as_ref(), engine_handle.as_ptr()); + let load_db_result = + ffi::load_databases(dbpath.as_ref(), engine_handle.as_ptr()).map_err(Error::from); // Reclaim the sender let sender = Box::from_raw(context); - sender - .blocking_send(crate::callback::Progress::Complete(result)) - .expect("blocking send"); + let final_result = + sender.blocking_send(crate::callback::Progress::Complete(load_db_result)); // Clear the pointer from the libclamav engine context clamav_sys::cl_engine_set_clcb_sigload_progress( @@ -260,17 +277,19 @@ impl Engine { None, std::ptr::null_mut(), ); + + final_result }); receiver.into() } - pub async fn scan>( + pub fn scan>( &self, target: T, filename: Option<&str>, mut settings: crate::scan_settings::ScanSettings, - ) -> ReceiverStream { + ) -> Result, Error> { use crate::callback::ScanCbContext; use crate::fmap::Fmap; use std::ffi::CString; @@ -280,7 +299,7 @@ impl Engine { let fmap: Fmap = target.into(); let (sender, receiver) = tokio::sync::mpsc::channel::(128); - let c_filename = filename.map(|n| CString::new(n).expect("CString::new failed")); + let c_filename = filename.map(CString::new).transpose()?; let engine_handle = self.handle.clone(); let fmap_handle = fmap.handle(); @@ -312,7 +331,7 @@ impl Engine { ptr::null_mut(), engine_handle.blocking_read().as_ptr(), &mut settings.settings, - c_sender as *mut c_void, + c_sender.cast::(), ) }; // Reclaim the sender from C-land and send a final message @@ -324,58 +343,59 @@ impl Engine { .blocking_send(ScanEvent::Result(ScanResult::from_ffi(retval, c_virname))); }); - receiver.into() + Ok(receiver.into()) } - async fn get(&self, field: cl_engine_field) -> Result { + async fn get(&self, field: cl_engine_field) -> Result { let engine_handle = self.handle(); let engine_handle = engine_handle.read().await; ffi::get(engine_handle.as_ptr(), field) } - async fn set(&self, field: cl_engine_field, value: EngineValue) -> Result<(), ClamError> { + async fn set(&self, field: cl_engine_field, value: SettingsValue) -> Result<(), Error> { dbg!(&field, &value); let engine_handle = self.handle.write().await; - ffi::set(engine_handle.as_ptr(), field, value) + ffi::set(engine_handle.as_ptr(), field, value).map_err(Error::from) } - pub async fn database_version(&self) -> Result { - if let EngineValue::U32(value) = self.get(cl_engine_field::CL_ENGINE_DB_VERSION).await? { + pub async fn database_version(&self) -> Result { + if let SettingsValue::U32(value) = self.get(cl_engine_field::CL_ENGINE_DB_VERSION).await? { Ok(value) } else { - Err(ClamError::new(cl_error_t::CL_EARG)) + Err(ClamError::new(cl_error_t::CL_EARG).into()) } } - pub async fn database_timestamp(&self) -> Result { - if let EngineValue::Time(value) = self.get(cl_engine_field::CL_ENGINE_DB_TIME).await? { + pub async fn database_timestamp(&self) -> Result { + if let SettingsValue::Time(value) = self.get(cl_engine_field::CL_ENGINE_DB_TIME).await? { Ok(value.as_system_time()) } else { - Err(ClamError::new(cl_error_t::CL_EARG)) + Err(ClamError::new(cl_error_t::CL_EARG).into()) } } - pub async fn disable_cache(&self, disable_cache: bool) -> Result<(), ClamError> { + pub async fn disable_cache(&self, disable_cache: bool) -> Result<(), Error> { self.set( cl_engine_field::CL_ENGINE_DISABLE_CACHE, - EngineValue::U32(disable_cache.into()), + SettingsValue::U32(disable_cache.into()), ) .await } - pub async fn set_max_scansize(&self, max_scansize: u64) -> Result<(), ClamError> { + pub async fn set_max_scansize(&self, max_scansize: u64) -> Result<(), Error> { self.set( cl_engine_field::CL_ENGINE_MAX_SCANSIZE, - EngineValue::U64(max_scansize), + SettingsValue::U64(max_scansize), ) .await } - pub async fn max_scansize(&self) -> Result { - if let EngineValue::U64(value) = self.get(cl_engine_field::CL_ENGINE_MAX_SCANSIZE).await? { + pub async fn max_scansize(&self) -> Result { + if let SettingsValue::U64(value) = self.get(cl_engine_field::CL_ENGINE_MAX_SCANSIZE).await? + { Ok(value) } else { - Err(ClamError::new(cl_error_t::CL_EARG)) + Err(ClamError::new(cl_error_t::CL_EARG).into()) } } } @@ -395,8 +415,7 @@ impl Drop for EngineHandle { } mod ffi { - use super::{ClamTime, DatabaseStats, EngineValue, EngineValueType}; - use crate::ClamError; + use super::{ClamError, ClamTime, DatabaseStats, Error, SettingsValue, ValueType}; use clamav_sys::{ cl_engine_field, cl_engine_get_num, cl_engine_get_str, cl_engine_set_num, cl_engine_set_str, cl_error_t, cl_load, time_t, CL_DB_STDOPT, @@ -408,12 +427,12 @@ mod ffi { path::Path, }; - pub(super) fn compile(handle: *mut clamav_sys::cl_engine) -> Result<(), ClamError> { + pub(super) fn compile(handle: *mut clamav_sys::cl_engine) -> Result<(), Error> { unsafe { let result = clamav_sys::cl_engine_compile(handle); match result { cl_error_t::CL_SUCCESS => Ok(()), - _ => Err(ClamError::new(result)), + _ => Err(ClamError::new(result).into()), } } } @@ -421,7 +440,7 @@ mod ffi { pub(super) fn load_databases( dbpath: &Path, handle: *mut clamav_sys::cl_engine, - ) -> Result { + ) -> Result { let raw_path = CString::new(dbpath.as_os_str().as_bytes()).unwrap(); unsafe { let mut signature_count: u32 = 0; @@ -433,7 +452,7 @@ mod ffi { ); match result { cl_error_t::CL_SUCCESS => Ok(DatabaseStats { signature_count }), - _ => Err(ClamError::new(result)), + _ => Err(ClamError::new(result).into()), } } } @@ -441,45 +460,48 @@ mod ffi { pub(super) fn get( engine_handle: *mut clamav_sys::cl_engine, field: cl_engine_field, - ) -> Result { + ) -> Result { unsafe { match get_field_type(field) { - EngineValueType::U32 => { + ValueType::U32 => { let mut err: c_int = 0; - let value = cl_engine_get_num(engine_handle, field, &mut err) as u32; - if err != 0 { - Err(ClamError::new(mem::transmute(err))) + let value: u32 = + cl_engine_get_num(engine_handle, field, &mut err).try_into()?; + if err == 0 { + Ok(SettingsValue::U32(value)) } else { - Ok(EngineValue::U32(value)) + Err(ClamError::new(mem::transmute(err)).into()) } } - EngineValueType::U64 => { + ValueType::U64 => { let mut err: c_int = 0; - let value = cl_engine_get_num(engine_handle, field, &mut err) as u64; - if err != 0 { - Err(ClamError::new(mem::transmute(err))) + let value = cl_engine_get_num(engine_handle, field, &mut err) + .try_into() + .expect("cast i64 to u64"); + if err == 0 { + Ok(SettingsValue::U64(value)) } else { - Ok(EngineValue::U64(value)) + Err(ClamError::new(mem::transmute(err)).into()) } } - EngineValueType::String => { + ValueType::String => { let mut err = 0; let value = cl_engine_get_str(engine_handle, field, &mut err); - if err != 0 { - Err(ClamError::new(mem::transmute(err))) - } else { - Ok(EngineValue::String( + if err == 0 { + Ok(SettingsValue::String( CStr::from_ptr(value).to_str().unwrap().to_string(), )) + } else { + Err(ClamError::new(mem::transmute(err)).into()) } } - EngineValueType::Time => { + ValueType::Time => { let mut err = 0; let value = cl_engine_get_num(engine_handle, field, &mut err) as time_t; - if err != 0 { - Err(ClamError::new(mem::transmute(err))) + if err == 0 { + Ok(SettingsValue::Time(ClamTime(value))) } else { - Ok(EngineValue::Time(ClamTime(value))) + Err(ClamError::new(mem::transmute(err)).into()) } } } @@ -489,97 +511,107 @@ mod ffi { pub(super) fn set( engine_handle: *mut clamav_sys::cl_engine, field: cl_engine_field, - value: EngineValue, - ) -> Result<(), ClamError> { + value: SettingsValue, + ) -> Result<(), Error> { let expected_type = get_field_type(field); let actual_type = match &value { - EngineValue::U32(_) => EngineValueType::U32, - EngineValue::U64(_) => EngineValueType::U64, - EngineValue::String(_) => EngineValueType::String, - EngineValue::Time(_) => EngineValueType::Time, + SettingsValue::U32(_) => ValueType::U32, + SettingsValue::U64(_) => ValueType::U64, + SettingsValue::String(_) => ValueType::String, + SettingsValue::Time(_) => ValueType::Time, }; if expected_type != actual_type { - return Err(ClamError::new(cl_error_t::CL_EARG)); + return Err(ClamError::new(cl_error_t::CL_EARG).into()); } unsafe { match value { - EngineValue::U32(val) => { - let err = cl_engine_set_num(engine_handle, field, val as i64); - if err != cl_error_t::CL_SUCCESS { - Err(ClamError::new(err)) - } else { + SettingsValue::U32(val) => { + let err = cl_engine_set_num( + engine_handle, + field, + val.try_into().expect("cast u32 to i64"), + ); + if err == cl_error_t::CL_SUCCESS { Ok(()) + } else { + Err(ClamError::new(err).into()) } } - EngineValue::U64(val) => { - let err = cl_engine_set_num(engine_handle, field, val as i64); - if err != cl_error_t::CL_SUCCESS { - Err(ClamError::new(err)) - } else { + SettingsValue::U64(val) => { + let err = cl_engine_set_num( + engine_handle, + field, + val.try_into().expect("cast u64 to i64"), + ); + if err == cl_error_t::CL_SUCCESS { Ok(()) + } else { + Err(ClamError::new(err).into()) } } - EngineValue::String(val) => { + SettingsValue::String(val) => { let val = CString::new(val).unwrap(); let err = cl_engine_set_str(engine_handle, field, val.as_ptr()); - if err != cl_error_t::CL_SUCCESS { - Err(ClamError::new(err)) - } else { + if err == cl_error_t::CL_SUCCESS { Ok(()) + } else { + Err(ClamError::new(err).into()) } } - EngineValue::Time(ClamTime(val)) => { + SettingsValue::Time(ClamTime(val)) => { let err = cl_engine_set_num(engine_handle, field, val); - if err != cl_error_t::CL_SUCCESS { - Err(ClamError::new(err)) - } else { + if err == cl_error_t::CL_SUCCESS { Ok(()) + } else { + Err(ClamError::new(err).into()) } } } } } - fn get_field_type(field: cl_engine_field) -> EngineValueType { + fn get_field_type(field: cl_engine_field) -> ValueType { match field { - cl_engine_field::CL_ENGINE_MAX_SCANSIZE => EngineValueType::U64, - cl_engine_field::CL_ENGINE_MAX_FILESIZE => EngineValueType::U64, - cl_engine_field::CL_ENGINE_MAX_RECURSION => EngineValueType::U32, - cl_engine_field::CL_ENGINE_MAX_FILES => EngineValueType::U32, - cl_engine_field::CL_ENGINE_MIN_CC_COUNT => EngineValueType::U32, - cl_engine_field::CL_ENGINE_MIN_SSN_COUNT => EngineValueType::U32, - cl_engine_field::CL_ENGINE_PUA_CATEGORIES => EngineValueType::String, - cl_engine_field::CL_ENGINE_DB_OPTIONS => EngineValueType::U32, - cl_engine_field::CL_ENGINE_DB_VERSION => EngineValueType::U32, - cl_engine_field::CL_ENGINE_DB_TIME => EngineValueType::Time, - cl_engine_field::CL_ENGINE_AC_ONLY => EngineValueType::U32, - cl_engine_field::CL_ENGINE_AC_MINDEPTH => EngineValueType::U32, - cl_engine_field::CL_ENGINE_AC_MAXDEPTH => EngineValueType::U32, - cl_engine_field::CL_ENGINE_TMPDIR => EngineValueType::String, - cl_engine_field::CL_ENGINE_KEEPTMP => EngineValueType::U32, - cl_engine_field::CL_ENGINE_BYTECODE_SECURITY => EngineValueType::U32, - cl_engine_field::CL_ENGINE_BYTECODE_TIMEOUT => EngineValueType::U32, - cl_engine_field::CL_ENGINE_BYTECODE_MODE => EngineValueType::U32, - cl_engine_field::CL_ENGINE_MAX_EMBEDDEDPE => EngineValueType::U64, - cl_engine_field::CL_ENGINE_MAX_HTMLNORMALIZE => EngineValueType::U64, - cl_engine_field::CL_ENGINE_MAX_HTMLNOTAGS => EngineValueType::U64, - cl_engine_field::CL_ENGINE_MAX_SCRIPTNORMALIZE => EngineValueType::U64, - cl_engine_field::CL_ENGINE_MAX_ZIPTYPERCG => EngineValueType::U64, - cl_engine_field::CL_ENGINE_FORCETODISK => EngineValueType::U32, - cl_engine_field::CL_ENGINE_DISABLE_CACHE => EngineValueType::U32, - cl_engine_field::CL_ENGINE_DISABLE_PE_STATS => EngineValueType::U32, - cl_engine_field::CL_ENGINE_STATS_TIMEOUT => EngineValueType::U32, - cl_engine_field::CL_ENGINE_MAX_PARTITIONS => EngineValueType::U32, - cl_engine_field::CL_ENGINE_MAX_ICONSPE => EngineValueType::U32, - cl_engine_field::CL_ENGINE_MAX_RECHWP3 => EngineValueType::U32, - cl_engine_field::CL_ENGINE_MAX_SCANTIME => EngineValueType::U32, - cl_engine_field::CL_ENGINE_PCRE_MATCH_LIMIT => EngineValueType::U64, - cl_engine_field::CL_ENGINE_PCRE_RECMATCH_LIMIT => EngineValueType::U64, - cl_engine_field::CL_ENGINE_PCRE_MAX_FILESIZE => EngineValueType::U64, - cl_engine_field::CL_ENGINE_DISABLE_PE_CERTS => EngineValueType::U32, - cl_engine_field::CL_ENGINE_PE_DUMPCERTS => EngineValueType::U32, + cl_engine_field::CL_ENGINE_MAX_SCANSIZE | cl_engine_field::CL_ENGINE_MAX_FILESIZE => { + ValueType::U64 + } + cl_engine_field::CL_ENGINE_PUA_CATEGORIES | cl_engine_field::CL_ENGINE_TMPDIR => { + ValueType::String + } + cl_engine_field::CL_ENGINE_DB_TIME => ValueType::Time, + cl_engine_field::CL_ENGINE_MAX_RECURSION + | cl_engine_field::CL_ENGINE_MAX_FILES + | cl_engine_field::CL_ENGINE_MIN_CC_COUNT + | cl_engine_field::CL_ENGINE_MIN_SSN_COUNT + | cl_engine_field::CL_ENGINE_DB_OPTIONS + | cl_engine_field::CL_ENGINE_DB_VERSION + | cl_engine_field::CL_ENGINE_AC_ONLY + | cl_engine_field::CL_ENGINE_AC_MINDEPTH + | cl_engine_field::CL_ENGINE_AC_MAXDEPTH + | cl_engine_field::CL_ENGINE_KEEPTMP + | cl_engine_field::CL_ENGINE_BYTECODE_SECURITY + | cl_engine_field::CL_ENGINE_BYTECODE_TIMEOUT + | cl_engine_field::CL_ENGINE_BYTECODE_MODE + | cl_engine_field::CL_ENGINE_DISABLE_PE_CERTS + | cl_engine_field::CL_ENGINE_PE_DUMPCERTS + | cl_engine_field::CL_ENGINE_FORCETODISK + | cl_engine_field::CL_ENGINE_DISABLE_CACHE + | cl_engine_field::CL_ENGINE_DISABLE_PE_STATS + | cl_engine_field::CL_ENGINE_STATS_TIMEOUT + | cl_engine_field::CL_ENGINE_MAX_PARTITIONS + | cl_engine_field::CL_ENGINE_MAX_ICONSPE + | cl_engine_field::CL_ENGINE_MAX_RECHWP3 + | cl_engine_field::CL_ENGINE_MAX_SCANTIME => ValueType::U32, + cl_engine_field::CL_ENGINE_MAX_EMBEDDEDPE + | cl_engine_field::CL_ENGINE_MAX_HTMLNORMALIZE + | cl_engine_field::CL_ENGINE_MAX_HTMLNOTAGS + | cl_engine_field::CL_ENGINE_MAX_SCRIPTNORMALIZE + | cl_engine_field::CL_ENGINE_MAX_ZIPTYPERCG + | cl_engine_field::CL_ENGINE_PCRE_MATCH_LIMIT + | cl_engine_field::CL_ENGINE_PCRE_RECMATCH_LIMIT + | cl_engine_field::CL_ENGINE_PCRE_MAX_FILESIZE => ValueType::U64, field => panic!("{field:?} not yet supported"), } } diff --git a/src/error.rs b/src/error.rs index e588630..4a2ad81 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,60 +1,55 @@ use std::error; use std::ffi::CStr; use std::fmt; -use std::str; use clamav_sys::cl_error_t; -/// An error indicating a clam failure. +/// An error reported directly from a libclamav function call #[derive(Clone, PartialEq, Eq)] -pub struct ClamError { +pub struct Error { code: cl_error_t, } -impl ClamError { +impl Error { + #[must_use] pub fn new(code: cl_error_t) -> Self { - ClamError { code } + Error { code } } + #[must_use] pub fn string_error(&self) -> String { unsafe { let ptr = clamav_sys::cl_strerror(self.code); let bytes = CStr::from_ptr(ptr).to_bytes(); - str::from_utf8(bytes) - .expect("Invalid UTF8 string") - .to_string() + String::from_utf8_lossy(bytes).to_string() } } - pub fn code(&self) -> i32 { - self.code.0 as i32 + #[must_use] + pub fn code(&self) -> u32 { + self.code.0 } } -impl From for ClamError { +impl From for Error { fn from(code: cl_error_t) -> Self { Self::new(code) } } -impl fmt::Display for ClamError { +impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "cl_error {}: {}", - self.code(), - self.string_error() - ) + write!(f, "cl_error {}: {}", self.code(), self.string_error()) } } -impl fmt::Debug for ClamError { +impl fmt::Debug for Error { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self) + write!(f, "{self}") } } -impl error::Error for ClamError { +impl error::Error for Error { fn source(&self) -> Option<&(dyn error::Error + 'static)> { None } @@ -66,7 +61,7 @@ mod tests { #[test] fn error_as_string_success() { - let err = ClamError::new(cl_error_t::CL_EFORMAT); + let err = Error::new(cl_error_t::CL_EFORMAT); let err_string = err.to_string(); dbg!(&err_string); assert!( diff --git a/src/fmap.rs b/src/fmap.rs index 4324a35..321afb9 100644 --- a/src/fmap.rs +++ b/src/fmap.rs @@ -25,44 +25,26 @@ use bindings::Windows::{ }; use clamav_sys::{cl_fmap_close, cl_fmap_open_handle, cl_fmap_open_memory, cl_fmap_t}; use std::{ - error, fmt, fs::File, + num::TryFromIntError, os::{self, raw::c_void, unix::prelude::AsRawFd}, - result, sync::Arc, }; use tokio::sync::Mutex; -#[derive(Debug, Clone)] -pub struct MapError; +#[derive(Debug, thiserror::Error)] +pub enum MapError { + #[error("IO error: {0}")] + Io(#[from] std::io::Error), -impl fmt::Display for MapError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Failed to open mapping") - } -} - -impl error::Error for MapError { - fn source(&self) -> Option<&(dyn error::Error + 'static)> { - None - } -} + #[error("source consumed")] + Consumed, -impl MapError { - pub fn new() -> MapError { - MapError {} - } + #[error("converting integer: {0}")] + TryFromInt(#[from] TryFromIntError), } -impl Default for MapError { - fn default() -> Self { - Self::new() - } -} - -pub type Result = result::Result; - #[cfg(windows)] extern "C" fn cl_pread( handle: *mut os::raw::c_void, @@ -110,29 +92,29 @@ extern "C" fn cl_pread( } } -/// A safer abstraction around ClamAV's cl_fmap_t. +/// A safer abstraction around `ClamAV`'s `cl_fmap_t`. #[derive(Clone)] pub struct Fmap { handle: Arc>, } pub(crate) struct FmapHandle { - source: Option, + source: Option, pub(crate) fmap: *mut cl_fmap_t, } -pub enum FmapSource { +pub enum Source { Vec(Vec), File(std::fs::File), } impl From> for Fmap { fn from(vec: Vec) -> Self { - let fmap = unsafe { cl_fmap_open_memory(vec.as_ptr() as *const c_void, vec.len()) }; + let fmap = unsafe { cl_fmap_open_memory(vec.as_ptr().cast::(), vec.len()) }; Self { handle: Arc::new(Mutex::new(FmapHandle { - source: Some(FmapSource::Vec(vec)), + source: Some(Source::Vec(vec)), fmap, })), } @@ -140,13 +122,13 @@ impl From> for Fmap { } impl TryFrom for Fmap { - type Error = std::io::Error; + type Error = MapError; fn try_from(file: File) -> std::result::Result { let offset = 0; let len = file.metadata()?.len(); let aging = true; - Ok(Self::from_file(file, offset, len as usize, aging)) + Ok(Self::from_file(file, offset, len.try_into()?, aging)) } } @@ -162,7 +144,7 @@ impl Fmap { Self { handle: Arc::new(Mutex::new(FmapHandle { fmap, - source: Some(FmapSource::File(file)), + source: Some(Source::File(file)), })), } } @@ -173,9 +155,9 @@ impl Fmap { /// Reclaim the underlying structure from which the Fmap was created - pub async fn into_inner(self) -> FmapSource { + pub async fn into_inner(self) -> Result { let mut handle = self.handle.lock().await; - handle.source.take().unwrap() + handle.source.take().ok_or(MapError::Consumed) } } diff --git a/src/lib.rs b/src/lib.rs index dd63ab6..586653c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,6 @@ +#![warn(clippy::all, clippy::pedantic)] +#![allow(clippy::missing_errors_doc)] + /// Callback support structures and support functions pub mod callback; pub mod db; @@ -16,7 +19,8 @@ pub mod cvd; pub mod windows_fd; use clamav_sys::{cl_error_t, cl_init, cl_initialize_crypto}; -pub use error::ClamError; +pub use engine::Error as EngineError; +pub use error::Error as ClamError; use lazy_static::lazy_static; use std::{ ffi::CStr, @@ -38,6 +42,12 @@ pub fn initialize() -> Result<(), ClamError> { static ONCE: Once = Once::new(); static mut RESULT: cl_error_t = cl_error_t::CL_SUCCESS; unsafe { + extern "C" fn cleanup() { + unsafe { + clamav_sys::cl_cleanup_crypto(); + } + } + ONCE.call_once(|| { RESULT = cl_init(clamav_sys::CL_INIT_DEFAULT); // this function always returns OK @@ -47,12 +57,6 @@ pub fn initialize() -> Result<(), ClamError> { } }); - extern "C" fn cleanup() { - unsafe { - clamav_sys::cl_cleanup_crypto(); - } - } - match RESULT { cl_error_t::CL_SUCCESS => Ok(()), _ => Err(ClamError::new(RESULT)), @@ -60,10 +64,11 @@ pub fn initialize() -> Result<(), ClamError> { } } +#[must_use] pub fn version() -> String { let ver = unsafe { clamav_sys::cl_retver() }; if ver.is_null() { - "".to_string() + String::new() } else { unsafe { std::ffi::CStr::from_ptr(ver).to_string_lossy().to_string() } } @@ -75,6 +80,7 @@ pub type MsgCallback = Box; /// console /// /// Note that the libclamav APIs do not permit restoring the default handler. +#[allow(clippy::missing_panics_doc)] pub fn set_msg_callback(cb: MsgCallback) { unsafe { *(CLAMAV_MESSAGE_CALLBACK.lock().unwrap()) = Some(cb); @@ -113,7 +119,7 @@ unsafe extern "C" fn clcb_msg_wrapper( } } -/// A type defining the trait object returned in the FileInspect event that +/// A type defining the trait object returned in the `FileInspect` event that /// allows access to embedded file content. pub type ContentHandle = Pin>; @@ -136,14 +142,14 @@ mod tests { async fn clcb_msg_override() { const KEY: &str = module_path!(); - { + fn cb(_severity: log::Level, _fullmsg: &str, msg: &str) { let mut test_store = TEST_STORE.lock().unwrap(); - (*test_store).insert(KEY.into(), "".into()); + (*test_store).insert(KEY.into(), msg.into()); } - fn cb(_severity: log::Level, _fullmsg: &str, msg: &str) { + { let mut test_store = TEST_STORE.lock().unwrap(); - (*test_store).insert(KEY.into(), msg.into()); + (*test_store).insert(KEY.into(), String::default()); } // Override the message callback @@ -151,9 +157,10 @@ mod tests { // Force an error let clam_engine = crate::engine::Engine::new(); - if clam_engine.load_databases("/no-such-path").await.is_ok() { - panic!("database load should have failed") - } + assert!( + clam_engine.load_databases("/no-such-path").await.is_err(), + "database load should have failed" + ); // Check that the message callback captured the error let test_store = TEST_STORE.lock().unwrap(); diff --git a/src/scan_settings.rs b/src/scan_settings.rs index 1d48da0..3003a6a 100644 --- a/src/scan_settings.rs +++ b/src/scan_settings.rs @@ -101,39 +101,54 @@ pub struct ScanSettings { } impl ScanSettings { + #[must_use] pub fn general(&self) -> GeneralFlags { - GeneralFlags::from_bits(self.settings.general).unwrap() + GeneralFlags::from_bits(self.settings.general).unwrap_or(GeneralFlags::empty()) } - pub fn set_general(&mut self, flags: GeneralFlags) { + + pub fn set_general(&mut self, flags: &GeneralFlags) { self.settings.general = flags.bits(); } + + #[must_use] pub fn parse(&self) -> ParseFlags { - ParseFlags::from_bits(self.settings.parse).unwrap() + ParseFlags::from_bits(self.settings.parse).unwrap_or(ParseFlags::empty()) } - pub fn set_parse(&mut self, flags: ParseFlags) { + + pub fn set_parse(&mut self, flags: &ParseFlags) { self.settings.parse = flags.bits(); } + + #[must_use] pub fn heuristic(&self) -> HeuristicFlags { - HeuristicFlags::from_bits(self.settings.heuristic).unwrap() + HeuristicFlags::from_bits(self.settings.heuristic).unwrap_or(HeuristicFlags::empty()) } - pub fn set_heuristic(&mut self, flags: HeuristicFlags) { + + pub fn set_heuristic(&mut self, flags: &HeuristicFlags) { self.settings.heuristic = flags.bits(); } + + #[must_use] pub fn mail(&self) -> MailFlags { - MailFlags::from_bits(self.settings.mail).unwrap() + MailFlags::from_bits(self.settings.mail).unwrap_or(MailFlags::empty()) } - pub fn set_mail(&mut self, flags: MailFlags) { + + pub fn set_mail(&mut self, flags: &MailFlags) { self.settings.mail = flags.bits(); } + + #[must_use] pub fn dev(&self) -> DevFlags { - DevFlags::from_bits(self.settings.dev).unwrap() + DevFlags::from_bits(self.settings.dev).unwrap_or(DevFlags::empty()) } - pub fn set_dev(&mut self, flags: DevFlags) { + + pub fn set_dev(&mut self, flags: &DevFlags) { self.settings.dev = flags.bits(); } } impl ToString for ScanSettings { + #[allow(clippy::too_many_lines)] fn to_string(&self) -> String { let mut flag_names = Vec::::new(); @@ -265,17 +280,19 @@ impl ToString for ScanSettings { } } -pub struct ScanSettingsBuilder { +pub struct Builder { current: cl_scan_options, } -impl ScanSettingsBuilder { +impl Builder { + #[must_use] pub fn new() -> Self { - ScanSettingsBuilder { + Builder { current: cl_scan_options::default(), } } + #[must_use] pub fn build(&self) -> ScanSettings { ScanSettings { settings: self.current, @@ -312,7 +329,7 @@ impl ScanSettingsBuilder { self } - /// Enable HTML normalisation (including ScrEnc decryption). + /// Enable HTML normalisation (including `ScrEnc` decryption). pub fn enable_html(&mut self) -> &mut Self { self.current.parse |= CL_SCAN_PARSE_HTML; self @@ -423,7 +440,7 @@ impl ScanSettingsBuilder { } } -impl Default for ScanSettingsBuilder { +impl Default for Builder { fn default() -> Self { Self::new() } @@ -435,13 +452,13 @@ mod tests { #[test] fn builder_defaults_to_standard_opts() { - let settings = ScanSettingsBuilder::new().build(); + let settings = Builder::new().build(); assert_eq!(settings.settings, clamav_sys::cl_scan_options::default()); } #[test] fn builder_clear_success() { - let settings = ScanSettingsBuilder::new().clear().build(); + let settings = Builder::new().clear().build(); assert_eq!(settings.settings.general, 0); assert_eq!(settings.settings.parse, 0); assert_eq!(settings.settings.heuristic, 0); @@ -451,13 +468,13 @@ mod tests { #[test] fn builder_just_pdf_success() { - let settings = ScanSettingsBuilder::new().clear().enable_pdf().build(); + let settings = Builder::new().clear().enable_pdf().build(); assert_eq!(settings.settings.parse, CL_SCAN_PARSE_PDF); } #[test] fn builder_normal_files_success() { - let settings = ScanSettingsBuilder::new() + let settings = Builder::new() .clear() .enable_pdf() .enable_html() @@ -485,7 +502,7 @@ mod tests { #[test] fn settings_default_to_standard() { - let settings: ScanSettings = Default::default(); + let settings: ScanSettings = ScanSettings::default(); assert_eq!(settings.settings, cl_scan_options::default()); } } diff --git a/src/version.rs b/src/version.rs index 8592ee9..01e67f4 100644 --- a/src/version.rs +++ b/src/version.rs @@ -1,7 +1,7 @@ use std::ffi::CStr; -use std::str; /// Returns the database version level that the engine supports +#[must_use] pub fn flevel() -> u32 { unsafe { clamav_sys::cl_retflevel() } } @@ -15,13 +15,12 @@ pub fn flevel() -> u32 { /// /// println!("Running version {} flevel {}", version::version(), version::flevel()); /// ``` +#[must_use] pub fn version() -> String { unsafe { let ptr = clamav_sys::cl_retver(); let bytes = CStr::from_ptr(ptr).to_bytes(); - str::from_utf8(bytes) - .expect("Invalid UTF8 string") - .to_string() + String::from_utf8_lossy(bytes).to_string() } } From d7e7d6d877e99ae1caeafd2eeff9c2177ea20550 Mon Sep 17 00:00:00 2001 From: Scott Hutton Date: Tue, 14 Nov 2023 11:14:33 -0800 Subject: [PATCH 03/12] Update clamav-sys dependency and repos --- Cargo.toml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 40e3781..93593b0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,10 +8,10 @@ categories = ["api-bindings"] description = "Async ClamAV bindings for Rust" edition = "2021" exclude = ["test_data/*"] -homepage = "https://github.com/zaddach/clamav-rs" +homepage = "https://github.com/Cisco-Talos/clamav-async-rs" license = "GPL-2.0" name = "clamav-async" -repository = "https://github.com/zaddach/clamav-rs" +repository = "https://github.com/Cisco-Talos/clamav-async-rs" version = "0.5.5" [features] @@ -36,9 +36,7 @@ time = { version = "0.3", features = [ ] } tokio = { version = "1", features = ["sync", "rt", "macros"] } tokio-stream = { version = "0.1" } - -[dependencies.clamav-sys] -git = "https://github.com/Cisco-Talos/clamav-sys.git" +clamav-sys = "1.0" [target.'cfg(windows)'.dependencies] bindings = { version = "0.5.5", package = "clamav-rs-bindings" } From b0ef6cd6e17bc8c65a3ba87e1d6b97b74ec2d9e6 Mon Sep 17 00:00:00 2001 From: Scott Hutton Date: Tue, 14 Nov 2023 12:00:21 -0800 Subject: [PATCH 04/12] Add/update copyright notices --- src/callback.rs | 16 ++++++++++++++++ src/cvd.rs | 16 ++++++++++++++++ src/cvd/head_libclamav.rs | 16 ++++++++++++++++ src/cvd/head_native.rs | 16 ++++++++++++++++ src/db.rs | 16 ++++++++++++++++ src/engine.rs | 16 ++++++++++++++++ src/error.rs | 16 ++++++++++++++++ src/fmap.rs | 4 +--- src/layer_attr.rs | 16 ++++++++++++++++ src/lib.rs | 16 ++++++++++++++++ src/scan_settings.rs | 16 ++++++++++++++++ src/version.rs | 16 ++++++++++++++++ src/windows_fd.rs | 16 ++++++++++++++++ 13 files changed, 193 insertions(+), 3 deletions(-) diff --git a/src/callback.rs b/src/callback.rs index 0fa86fa..d4aeffb 100644 --- a/src/callback.rs +++ b/src/callback.rs @@ -1,3 +1,19 @@ +// Copyright (C) 2020-2023 Cisco Systems, Inc. and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License version 2 as +// published by the Free Software Foundation. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +// MA 02110-1301, USA. + use crate::{engine::ScanEvent, layer_attr::LayerAttributes, ContentHandle, EngineError}; use clamav_sys::cl_error_t; use std::{ diff --git a/src/cvd.rs b/src/cvd.rs index a168ed9..339e302 100644 --- a/src/cvd.rs +++ b/src/cvd.rs @@ -1,3 +1,19 @@ +// Copyright (C) 2020-2023 Cisco Systems, Inc. and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License version 2 as +// published by the Free Software Foundation. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +// MA 02110-1301, USA. + use std::{borrow::Cow, fs::File, num::ParseIntError, path::Path, str::Utf8Error}; use thiserror::Error; diff --git a/src/cvd/head_libclamav.rs b/src/cvd/head_libclamav.rs index 58f29f7..b169e19 100644 --- a/src/cvd/head_libclamav.rs +++ b/src/cvd/head_libclamav.rs @@ -1,3 +1,19 @@ +// Copyright (C) 2020-2023 Cisco Systems, Inc. and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License version 2 as +// published by the Free Software Foundation. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +// MA 02110-1301, USA. + use super::{HeadError, Meta}; use std::{borrow::Cow, ffi::CStr}; diff --git a/src/cvd/head_native.rs b/src/cvd/head_native.rs index 4eeadd2..6c0aece 100644 --- a/src/cvd/head_native.rs +++ b/src/cvd/head_native.rs @@ -1,3 +1,19 @@ +// Copyright (C) 2020-2023 Cisco Systems, Inc. and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License version 2 as +// published by the Free Software Foundation. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +// MA 02110-1301, USA. + use super::{HeadError, Meta}; use std::{ borrow::Cow, diff --git a/src/db.rs b/src/db.rs index 53f9adf..04ea9bc 100644 --- a/src/db.rs +++ b/src/db.rs @@ -1,3 +1,19 @@ +// Copyright (C) 2020-2023 Cisco Systems, Inc. and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License version 2 as +// published by the Free Software Foundation. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +// MA 02110-1301, USA. + use std::ffi::CStr; use std::str; diff --git a/src/engine.rs b/src/engine.rs index 613b84b..fd3c9c7 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -1,3 +1,19 @@ +// Copyright (C) 2020-2023 Cisco Systems, Inc. and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License version 2 as +// published by the Free Software Foundation. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +// MA 02110-1301, USA. + use crate::error::Error as ClamError; use clamav_sys::cl_engine_field; use clamav_sys::{cl_error_t, time_t}; diff --git a/src/error.rs b/src/error.rs index 4a2ad81..92aa52e 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,3 +1,19 @@ +// Copyright (C) 2020-2023 Cisco Systems, Inc. and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License version 2 as +// published by the Free Software Foundation. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +// MA 02110-1301, USA. + use std::error; use std::ffi::CStr; use std::fmt; diff --git a/src/fmap.rs b/src/fmap.rs index 321afb9..a627cb9 100644 --- a/src/fmap.rs +++ b/src/fmap.rs @@ -1,5 +1,4 @@ -// -// Copyright (C) 2020 Jonas Zaddach. +// Copyright (C) 2020-2023 Cisco Systems, Inc. and/or its affiliates. All rights reserved. // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License version 2 as @@ -14,7 +13,6 @@ // along with this program; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, // MA 02110-1301, USA. -// #[cfg(windows)] use bindings::Windows::{ diff --git a/src/layer_attr.rs b/src/layer_attr.rs index adf5866..32a06c4 100644 --- a/src/layer_attr.rs +++ b/src/layer_attr.rs @@ -1,3 +1,19 @@ +// Copyright (C) 2020-2023 Cisco Systems, Inc. and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License version 2 as +// published by the Free Software Foundation. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +// MA 02110-1301, USA. + use bitflags::bitflags; bitflags! { diff --git a/src/lib.rs b/src/lib.rs index 586653c..848ca28 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,19 @@ +// Copyright (C) 2020-2023 Cisco Systems, Inc. and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License version 2 as +// published by the Free Software Foundation. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +// MA 02110-1301, USA. + #![warn(clippy::all, clippy::pedantic)] #![allow(clippy::missing_errors_doc)] diff --git a/src/scan_settings.rs b/src/scan_settings.rs index 3003a6a..37dbe7b 100644 --- a/src/scan_settings.rs +++ b/src/scan_settings.rs @@ -1,3 +1,19 @@ +// Copyright (C) 2020-2023 Cisco Systems, Inc. and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License version 2 as +// published by the Free Software Foundation. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +// MA 02110-1301, USA. + #![allow(dead_code)] use clamav_sys::{ diff --git a/src/version.rs b/src/version.rs index 01e67f4..29d02d6 100644 --- a/src/version.rs +++ b/src/version.rs @@ -1,3 +1,19 @@ +// Copyright (C) 2020-2023 Cisco Systems, Inc. and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License version 2 as +// published by the Free Software Foundation. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +// MA 02110-1301, USA. + use std::ffi::CStr; /// Returns the database version level that the engine supports diff --git a/src/windows_fd.rs b/src/windows_fd.rs index 6769345..b443e38 100644 --- a/src/windows_fd.rs +++ b/src/windows_fd.rs @@ -1,3 +1,19 @@ +// Copyright (C) 2020-2023 Cisco Systems, Inc. and/or its affiliates. All rights reserved. +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License version 2 as +// published by the Free Software Foundation. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +// MA 02110-1301, USA. + use std::io; use std::mem; use std::os::raw; From 51b8fc1ed415538399ae79482d3313a7439354e0 Mon Sep 17 00:00:00 2001 From: Scott Hutton Date: Tue, 14 Nov 2023 13:59:00 -0800 Subject: [PATCH 05/12] Remove no-longer-relevant comment --- src/callback.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/callback.rs b/src/callback.rs index d4aeffb..fa50230 100644 --- a/src/callback.rs +++ b/src/callback.rs @@ -140,8 +140,6 @@ pub(crate) unsafe extern "C" fn engine_file_inspection( layer_attributes: u32, context: *mut c_void, ) -> cl_error_t { - // NOTE: this function is probably doing too much work generating structures - // that won't be used. TALOSAV-28 offers a solution. if let Some(cxt) = context.cast::().as_ref() { let file_type: String = CStr::from_ptr(type_).to_string_lossy().into(); let file_name = file_name From 1e3a0cc2ea9bd69276ba799eb951e6ad5c0b14eb Mon Sep 17 00:00:00 2001 From: Scott Hutton Date: Tue, 14 Nov 2023 16:26:02 -0800 Subject: [PATCH 06/12] Remove vestigal #[cfg(unix)] from FileInspect --- src/callback.rs | 1 - src/engine.rs | 1 - 2 files changed, 2 deletions(-) diff --git a/src/callback.rs b/src/callback.rs index fa50230..4c7eb2e 100644 --- a/src/callback.rs +++ b/src/callback.rs @@ -190,7 +190,6 @@ pub(crate) unsafe extern "C" fn engine_file_inspection( } let _ = cxt.sender.blocking_send(ScanEvent::FileInspect { - #[cfg(unix)] file_type, file_name, file_size, diff --git a/src/engine.rs b/src/engine.rs index fd3c9c7..45591c7 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -78,7 +78,6 @@ pub enum ScanEvent { match_name: String, }, FileInspect { - #[cfg(unix)] ancestors: Vec>, file_name: Option, file_size: usize, From ffc27d5716578231d65803b4ffff2717b55e80e9 Mon Sep 17 00:00:00 2001 From: Scott Hutton Date: Wed, 15 Nov 2023 09:09:46 -0800 Subject: [PATCH 07/12] Add crate keywords --- Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Cargo.toml b/Cargo.toml index 93593b0..45780ab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ license = "GPL-2.0" name = "clamav-async" repository = "https://github.com/Cisco-Talos/clamav-async-rs" version = "0.5.5" +keywords = ["antivirus", "async", "clamav"] [features] default = ["native-impl"] From 0606972b65aafb8cd009d01263bb5d83f4e4499f Mon Sep 17 00:00:00 2001 From: Scott Hutton Date: Wed, 15 Nov 2023 09:10:02 -0800 Subject: [PATCH 08/12] Add safety, clean up file inspection callback --- src/callback.rs | 160 ++++++++++++++++++++++++++++++------------------ 1 file changed, 101 insertions(+), 59 deletions(-) diff --git a/src/callback.rs b/src/callback.rs index 4c7eb2e..c33e17a 100644 --- a/src/callback.rs +++ b/src/callback.rs @@ -20,7 +20,9 @@ use std::{ ffi::CStr, io::Cursor, os::raw::{c_char, c_int, c_uchar, c_void}, + pin::Pin, }; +use tokio::io::AsyncRead; /// A type defining a closure or function that, when given a recursion depth, /// file type, optional file name, and file size, returns whether or not the @@ -34,6 +36,44 @@ pub(crate) struct ScanCbContext { pub(crate) should_copy_file_buffer: Option, } +impl ScanCbContext { + /// Return a copy of a provided buffer if callback criteria are met + unsafe fn scanned_content( + &self, + file_buffer: *const c_char, + recursion_level: u32, + file_type: &str, + file_name: Option<&str>, + file_size: usize, + ) -> Option>> { + let Some(buffer) = file_buffer + .cast::() + .as_ref() + .map(|buf| core::slice::from_raw_parts(buf, file_size)) + else { + // No buffer provided + return None; + }; + + let Some(cb) = &self.should_copy_file_buffer else { + return None; + }; + + // Never include content for the root document. That should be known to the caller already. + if cb(recursion_level, file_type, file_name, file_size) { + // NOTE: the content is provided as a trait object that + // implements AsyncRead in order to facilitate future + // functionality where this could be passed as a more + // "lightweight" object, such as a file handle or socket, or + // perhaps a ref-counted buffer that releases its reference once + // completely read. + Some(Box::pin(Cursor::new(buffer.to_vec())) as ContentHandle) + } else { + None + } + } +} + /// A completion progress report, with a final result #[derive(Debug)] pub enum Progress { @@ -128,9 +168,9 @@ pub(crate) unsafe extern "C" fn engine_virus_found( pub(crate) unsafe extern "C" fn engine_file_inspection( // NOTE: this file descriptor is unsafe to use after the callback has - // returned, even if dup'd + // returned, even if dup'd. Hence, it's just ignored. _fd: c_int, - type_: *const c_char, + file_type: *const c_char, c_ancestors: *mut *const c_char, parent_file_size: usize, file_name: *const c_char, @@ -140,68 +180,70 @@ pub(crate) unsafe extern "C" fn engine_file_inspection( layer_attributes: u32, context: *mut c_void, ) -> cl_error_t { - if let Some(cxt) = context.cast::().as_ref() { - let file_type: String = CStr::from_ptr(type_).to_string_lossy().into(); - let file_name = file_name - .as_ref() - .map(|p| CStr::from_ptr(p)) - .map(CStr::to_string_lossy) - .map(|s| s.to_string()); - - let layer_attrs = LayerAttributes::from_bits(layer_attributes).unwrap_or_default(); - - let mut ancestors = vec![]; - if let Ok(recursion_level) = isize::try_from(recursion_level) { - if !c_ancestors.is_null() { - for i in 0..recursion_level { - let ancestor = *(c_ancestors.offset(i)); - if ancestor.is_null() { - ancestors.push(None); - } else { - let ancestor = CStr::from_ptr(ancestor).to_string_lossy(); - ancestors.push(Some(ancestor.into())); - } - } - } - } + let Some(cxt) = context.cast::().as_ref() else { + return cl_error_t::CL_CLEAN; + }; + + let Some(file_type) = file_type + .as_ref() + .map(|p| CStr::from_ptr(p)) + .map(CStr::to_string_lossy) + .map(|s| s.to_string()) + else { + // Quietly ignore NULL file types for safety, even though libclamav + // guarantees us one. + return cl_error_t::CL_CLEAN; + }; + + let file_name = file_name + .as_ref() + .map(|ptr| CStr::from_ptr(ptr)) + .map(CStr::to_string_lossy) + .map(|s| s.to_string()); + + let scanned_content = cxt.scanned_content( + file_buffer, + recursion_level, + &file_type, + file_name.as_deref(), + file_size, + ); + + let _ = cxt.sender.blocking_send(ScanEvent::FileInspect { + content: scanned_content, + ancestors: build_ancestors(recursion_level, c_ancestors), + file_name, + file_size, + file_type, + layer_attrs: LayerAttributes::from_bits(layer_attributes).unwrap_or_default(), + parent_file_size, + recursion_level, + }); + + cl_error_t::CL_CLEAN +} - // Duplicate the content buffer? - let mut scanned_content = None; - if let Some(cb) = &cxt.should_copy_file_buffer { - // Never include content for the root document. That should be known to the caller already. - if cb( - recursion_level, - file_type.as_str(), - file_name.as_deref(), - file_size, - ) { - let buffer = unsafe { - core::slice::from_raw_parts(file_buffer.cast::(), file_size) +/// Helper function for `engine_file_inspection` that builds a vector laying out +/// the filenames of ancestors for a container element +unsafe fn build_ancestors( + recursion_level: u32, + c_ancestors: *mut *const c_char, +) -> Vec> { + let mut ancestors = vec![]; + if let Ok(recursion_level) = isize::try_from(recursion_level) { + if !c_ancestors.is_null() { + for i in 0..recursion_level { + let ancestor = *(c_ancestors.offset(i)); + if ancestor.is_null() { + ancestors.push(None); + } else { + let ancestor = CStr::from_ptr(ancestor).to_string_lossy(); + ancestors.push(Some(ancestor.into())); } - .to_vec(); - // NOTE: the content is provided as a trait object that - // implements AsyncRead in order to facilitate future - // functionality where this could be passed as a more - // "lightweight" object, such as a file handle or socket, or - // perhaps a ref-counted buffer that releases its reference once - // completely read. - scanned_content = Some(Box::pin(Cursor::new(buffer)) as ContentHandle); } } - - let _ = cxt.sender.blocking_send(ScanEvent::FileInspect { - file_type, - file_name, - file_size, - parent_file_size, - recursion_level, - layer_attrs, - ancestors, - content: scanned_content, - }); } - - cl_error_t::CL_CLEAN + ancestors } #[cfg(unix)] From 6a4487859698d12f56d9367a9c190cd7aebb118e Mon Sep 17 00:00:00 2001 From: Scott Hutton Date: Thu, 16 Nov 2023 09:56:46 -0800 Subject: [PATCH 09/12] Update authors and version --- Cargo.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 45780ab..af74f03 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,8 +1,8 @@ [package] authors = [ - "Jonas Zaddach ", "Scott Hutton ", + "Zachary Sims ", + "Jonas Zaddach ", ] categories = ["api-bindings"] description = "Async ClamAV bindings for Rust" @@ -12,7 +12,7 @@ homepage = "https://github.com/Cisco-Talos/clamav-async-rs" license = "GPL-2.0" name = "clamav-async" repository = "https://github.com/Cisco-Talos/clamav-async-rs" -version = "0.5.5" +version = "0.1.0" keywords = ["antivirus", "async", "clamav"] [features] From ed6b491acde2c500450fa0c7cca052f376421c37 Mon Sep 17 00:00:00 2001 From: Scott Hutton Date: Thu, 16 Nov 2023 09:56:58 -0800 Subject: [PATCH 10/12] Fix typos --- src/cvd.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cvd.rs b/src/cvd.rs index 339e302..6c49f90 100644 --- a/src/cvd.rs +++ b/src/cvd.rs @@ -29,7 +29,7 @@ pub use head_libclamav::Header; pub use head_native::Header; pub trait Meta { - /// Load fromm the initial bytes found at the beginning of the CVD/CLD + /// Load from the initial bytes found at the beginning of the CVD/CLD fn from_header_bytes(bytes: &[u8; 512]) -> Result where Self: Sized; @@ -125,7 +125,7 @@ pub enum HeadError { /// Header field contains non-UTF-8 content #[error("non-UTF-8 contenti: {0}")] - Utf80(#[from] Utf8Error), + Utf8(#[from] Utf8Error), /// Header field content can't be parsed as number #[error("unable to parse integer: {0}")] From a7c90b43b6d64769e4ab059731d9ea4af6b59103 Mon Sep 17 00:00:00 2001 From: Scott Hutton Date: Thu, 16 Nov 2023 09:57:36 -0800 Subject: [PATCH 11/12] Rename cl_pread to pread_cb --- src/fmap.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/fmap.rs b/src/fmap.rs index a627cb9..bf06738 100644 --- a/src/fmap.rs +++ b/src/fmap.rs @@ -44,7 +44,7 @@ pub enum MapError { } #[cfg(windows)] -extern "C" fn cl_pread( +extern "C" fn pread_cb( handle: *mut os::raw::c_void, buf: *mut os::raw::c_void, count: os::raw::c_ulonglong, @@ -77,7 +77,7 @@ extern "C" fn cl_pread( } #[cfg(unix)] -extern "C" fn cl_pread( +extern "C" fn pread_cb( handle: *mut os::raw::c_void, buf: *mut os::raw::c_void, count: usize, @@ -137,7 +137,7 @@ impl Fmap { #[cfg(windows)] let fd = file.as_raw_handle(); let fmap = unsafe { - cl_fmap_open_handle(fd as *mut c_void, offset, len, Some(cl_pread), aging.into()) + cl_fmap_open_handle(fd as *mut c_void, offset, len, Some(pread_cb), aging.into()) }; Self { handle: Arc::new(Mutex::new(FmapHandle { From 9cd42d4c72aea55eb42ccc6c893654e23665d418 Mon Sep 17 00:00:00 2001 From: Scott Hutton Date: Fri, 17 Nov 2023 08:42:38 -0800 Subject: [PATCH 12/12] Update clamav-rs-bindings dependency --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index af74f03..4f6c90b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,7 +40,7 @@ tokio-stream = { version = "0.1" } clamav-sys = "1.0" [target.'cfg(windows)'.dependencies] -bindings = { version = "0.5.5", package = "clamav-rs-bindings" } +bindings = { version = "0.5", package = "clamav-rs-bindings" } [dev-dependencies] tempfile = "3"