Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Datasets locking/v5 #12334

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions rust/cbindgen.toml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ include = [
"FtpEvent",
"SCSigTableElmt",
"SCTransformTableElmt",
"DataRepType",
]

# A list of items to not include in the generated bindings
Expand Down
112 changes: 112 additions & 0 deletions rust/src/detect/datasets.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
/* Copyright (C) 2024 Open Information Security Foundation
inashivb marked this conversation as resolved.
Show resolved Hide resolved
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
* Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/

// Author: Shivani Bhardwaj <[email protected]>

//! This module exposes items from the datasets C code to Rust.

use base64::{engine::general_purpose::STANDARD, Engine};
use std::ffi::{c_char, CStr};
use std::fs::{File, OpenOptions};
use std::io::{self, BufRead};
use std::path::Path;

/// Opaque Dataset type defined in C
#[derive(Copy, Clone)]
pub enum Dataset {}

// Simple C type converted to Rust
#[derive(Debug, PartialEq)]
#[repr(C)]
pub struct DataRepType {
pub value: u16,
}

// Extern fns operating on the opaque Dataset type above
/// cbindgen:ignore
extern "C" {
pub fn DatasetAdd(set: &Dataset, data: *const u8, len: u32) -> i32;
pub fn DatasetAddwRep(set: &Dataset, data: *const u8, len: u32, rep: *const DataRepType)
-> i32;
}

#[no_mangle]
pub unsafe extern "C" fn ProcessDatasets(
inashivb marked this conversation as resolved.
Show resolved Hide resolved
set: &Dataset, name: *const c_char, fname: *const c_char, fmode: *const c_char,
) -> i32 {
let file_string = CStr::from_ptr(fname).to_str().unwrap();
inashivb marked this conversation as resolved.
Show resolved Hide resolved
let mode = CStr::from_ptr(fmode).to_str().unwrap();
let set_name = CStr::from_ptr(name).to_str().unwrap();
let filename = Path::new(file_string);
let mut is_dataset = false;
if let Ok(lines) = read_lines(filename, mode) {
for line in lines.map_while(Result::ok) {
let v: Vec<&str> = line.split(',').collect();
// Ignore empty and invalid lines in dataset/rep file
if v.is_empty() || v.len() > 2 {
continue;
}
if v.len() == 1 {
is_dataset = true;
// Dataset
let mut decoded: Vec<u8> = vec![];
if STANDARD.decode_vec(v[0], &mut decoded).is_err() {
inashivb marked this conversation as resolved.
Show resolved Hide resolved
SCLogError!("bad base64 encoding {}", set_name);
return -2;
}
DatasetAdd(set, decoded.as_ptr(), decoded.len() as u32);
} else {
if is_dataset {
SCLogError!("Cannot mix dataset and datarep values");
inashivb marked this conversation as resolved.
Show resolved Hide resolved
inashivb marked this conversation as resolved.
Show resolved Hide resolved
return -2;
}
// Datarep
let mut decoded: Vec<u8> = vec![];
if STANDARD.decode_vec(v[0], &mut decoded).is_err() {
SCLogError!("bad base64 encoding {}", set_name);
return -2;
}
if let Ok(val) = v[1].to_string().parse::<u16>() {
let rep: DataRepType = DataRepType { value: val };
DatasetAddwRep(set, decoded.as_ptr(), decoded.len() as u32, &rep);
} else {
SCLogError!("Invalid datarep value {}", set_name);
return -2;
}
}
}
} else {
return -1;
}
0
}

fn read_lines<P>(filename: P, fmode: &str) -> io::Result<io::Lines<io::BufReader<File>>>
where
P: AsRef<Path>,
{
let file: File = if fmode == "r" {
File::open(filename)?
} else {
OpenOptions::new()
.append(true)
.create(true)
.read(true)
.open(filename)?
};
Ok(io::BufReader::new(file).lines())
}
1 change: 1 addition & 0 deletions rust/src/detect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ pub mod transforms;
pub mod uint;
pub mod uri;
pub mod tojson;
pub mod datasets;

use crate::core::AppProto;
use std::os::raw::{c_int, c_void};
Expand Down
4 changes: 1 addition & 3 deletions src/datasets-reputation.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@
#ifndef SURICATA_DATASETS_REPUTATION_H
#define SURICATA_DATASETS_REPUTATION_H

typedef struct DataRepType {
uint16_t value;
} DataRepType;
#include "rust-bindings.h"

typedef struct DataRepResultType {
bool found;
Expand Down
75 changes: 7 additions & 68 deletions src/datasets.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@ SCMutex sets_lock = SCMUTEX_INITIALIZER;
static Dataset *sets = NULL;
static uint32_t set_ids = 0;

static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
DataRepType *rep);
int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep);

static inline void DatasetUnlockData(THashData *d)
{
Expand Down Expand Up @@ -496,80 +495,21 @@ static int DatasetLoadString(Dataset *set)
return 0;

SCLogConfig("dataset: %s loading from '%s'", set->name, set->load);

const char *fopen_mode = "r";
if (strlen(set->save) > 0 && strcmp(set->save, set->load) == 0) {
fopen_mode = "a+";
}

FILE *fp = fopen(set->load, fopen_mode);
if (fp == NULL) {
SCLogError("fopen '%s' failed: %s", set->load, strerror(errno));
int retval = ProcessDatasets(set, set->name, set->load, fopen_mode);
if (retval == -2) {
FatalErrorOnInit("dataset %s could not be processed", set->name);
} else if (retval == -1) {
return -1;
}

uint32_t cnt = 0;
char line[1024];
while (fgets(line, (int)sizeof(line), fp) != NULL) {
if (strlen(line) <= 1)
continue;

char *r = strchr(line, ',');
if (r == NULL) {
line[strlen(line) - 1] = '\0';
SCLogDebug("line: '%s'", line);
uint32_t decoded_size = Base64DecodeBufferSize(strlen(line));
// coverity[alloc_strlen : FALSE]
uint8_t decoded[decoded_size];
uint32_t num_decoded =
Base64Decode((const uint8_t *)line, strlen(line), Base64ModeStrict, decoded);
if (num_decoded == 0 && strlen(line) > 0) {
FatalErrorOnInit("bad base64 encoding %s/%s", set->name, set->load);
continue;
}

if (DatasetAdd(set, (const uint8_t *)decoded, num_decoded) < 0) {
FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
continue;
}
cnt++;
} else {
line[strlen(line) - 1] = '\0';
SCLogDebug("line: '%s'", line);

*r = '\0';

uint32_t decoded_size = Base64DecodeBufferSize(strlen(line));
uint8_t decoded[decoded_size];
uint32_t num_decoded =
Base64Decode((const uint8_t *)line, strlen(line), Base64ModeStrict, decoded);
if (num_decoded == 0) {
FatalErrorOnInit("bad base64 encoding %s/%s", set->name, set->load);
continue;
}

r++;
SCLogDebug("r '%s'", r);

DataRepType rep = { .value = 0 };
if (ParseRepLine(r, strlen(r), &rep) < 0) {
FatalErrorOnInit("die: bad rep");
continue;
}
SCLogDebug("rep %u", rep.value);

if (DatasetAddwRep(set, (const uint8_t *)decoded, num_decoded, &rep) < 0) {
FatalErrorOnInit("dataset data add failed %s/%s", set->name, set->load);
continue;
}
cnt++;

SCLogDebug("line with rep %s, %s", line, r);
}
}
THashConsolidateMemcap(set->hash);

fclose(fp);
SCLogConfig("dataset: %s loaded %u records", set->name, cnt);
return 0;
}

Expand Down Expand Up @@ -1572,8 +1512,7 @@ int DatasetAdd(Dataset *set, const uint8_t *data, const uint32_t data_len)
return -1;
}

static int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len,
DataRepType *rep)
int DatasetAddwRep(Dataset *set, const uint8_t *data, const uint32_t data_len, DataRepType *rep)
{
if (set == NULL)
return -1;
Expand Down
1 change: 1 addition & 0 deletions src/datasets.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#define SURICATA_DATASETS_H

#include "util-thash.h"
#include "rust.h"
#include "datasets-reputation.h"

int DatasetsInit(void);
Expand Down
2 changes: 0 additions & 2 deletions src/detect-dataset.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,6 @@
#define DETECT_DATASET_CMD_ISNOTSET 2
#define DETECT_DATASET_CMD_ISSET 3

int DetectDatasetMatch (ThreadVars *, DetectEngineThreadCtx *, Packet *,
const Signature *, const SigMatchCtx *);
static int DetectDatasetSetup (DetectEngineCtx *, Signature *, const char *);
void DetectDatasetFree (DetectEngineCtx *, void *);

Expand Down
Loading