Skip to content

Commit

Permalink
Improved XZ signaturing, added support for malformed XZ streams
Browse files Browse the repository at this point in the history
  • Loading branch information
devttys0 committed Dec 14, 2024
1 parent ff02dc7 commit 23b3aa5
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 83 deletions.
86 changes: 33 additions & 53 deletions src/signatures/xz.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use crate::common::is_offset_safe;
use crate::extractors::lzma::lzma_decompress;
use crate::extractors::sevenzip::sevenzip_extractor;
use crate::signatures::common::{SignatureError, SignatureResult, CONFIDENCE_HIGH};
use crate::structures::xz::{parse_xz_footer, parse_xz_header};
use aho_corasick::AhoCorasick;
use crate::structures::xz::parse_xz_header;

/// Human readable description
pub const DESCRIPTION: &str = "XZ compressed data";
Expand All @@ -23,69 +24,48 @@ pub fn xz_parser(file_data: &[u8], offset: usize) -> Result<SignatureResult, Sig

let mut next_offset = offset;
let mut previous_offset = None;
let mut stream_header_count = 0;
let available_data = file_data.len() - offset;

// XZ streams can be concatenated together, need to process them all to determine the size of an XZ file
while is_offset_safe(available_data, next_offset, previous_offset) {
// Parse the next XZ header to get the header's size
// Parse the next XZ header to validate the header CRC
match parse_xz_header(&file_data[next_offset..]) {
Err(_) => break,
Ok(header_size) => {
match file_data.get(next_offset + header_size..) {
None => break,
Some(xz_stream_data) => {
// Determine the size of the XZ stream data
match xz_stream_size(xz_stream_data) {
Err(_) => break,
Ok(stream_size) => {
previous_offset = Some(next_offset);
next_offset += header_size + stream_size;
}
}
}
Ok(_) => {
// Header is valid
stream_header_count += 1;

// Do an extraction dry-run to make sure the data decompresses correctly
let dry_run = lzma_decompress(file_data, next_offset, None);

// If dry run was a success, update the offset and size fields
if dry_run.success && dry_run.size.is_some() {
previous_offset = Some(next_offset);
next_offset += dry_run.size.unwrap();
result.size += dry_run.size.unwrap();
// Else, report that the data is malformed and stop processing XZ streams
} else {
// 7z may be able to at least partially extract malformed data streams
result.preferred_extractor = Some(sevenzip_extractor());
result.description = format!(
"{}, valid header with malformed data stream",
result.description
);
break;
}
}
}
}

// If at least one valid header and one valid stream were identified,
// next_offset will be greater than the starting offset.
if next_offset > offset {
result.size = next_offset - offset;
result.description = format!("{}, total size: {} bytes", result.description, result.size);
return Ok(result);
}

Err(SignatureError)
}

/// XZ file format has detectable, verifiable, end-of-stream markers.
fn xz_stream_size(xz_data: &[u8]) -> Result<usize, SignatureError> {
// The magic bytes we search for ("YZ") are actually 10 bytes into the footer header
const FOOTER_MAGIC_OFFSET: usize = 10;

/*
* Gotta grep for the end-of-stream magic bytes ("YZ").
* These are prone to false positives, but a valid footer includes a checksum,
* making false positive matches easy to filter out (see: parse_xz_footer).
*/
let eof_pattern = vec![b"YZ"];
let grep = AhoCorasick::new(eof_pattern).unwrap();

// Find all matching patterns in the xz compressed data
for eof_match in grep.find_overlapping_iter(xz_data) {
let match_offset: usize = eof_match.start();
let footer_start: usize = match_offset - FOOTER_MAGIC_OFFSET;

// Footer must be 4-byte aligned
if (footer_start % 4) == 0 {
if let Some(footer_data) = xz_data.get(footer_start..) {
// Parse the stream footer
if let Ok(footer_size) = parse_xz_footer(footer_data) {
return Ok(footer_start + footer_size);
}
}
// Return success if at least one valid XZ stream header was found
if stream_header_count > 0 {
// Only report the total size if we were able to determine the total size
if result.size > 0 {
result.description =
format!("{}, total size: {} bytes", result.description, result.size);
}
return Ok(result);
}

Err(SignatureError)
Expand Down
30 changes: 0 additions & 30 deletions src/structures/xz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,33 +24,3 @@ pub fn parse_xz_header(xz_data: &[u8]) -> Result<usize, StructureError> {

Err(StructureError)
}

/// Parse and validate an XZ footer, returns the footer size
pub fn parse_xz_footer(xz_data: &[u8]) -> Result<usize, StructureError> {
const FOOTER_SIZE: usize = 12;
const CRC_DATA_SIZE: usize = 6;
const CRC_START_INDEX: usize = 4;

let xz_footer_structure = vec![
("footer_crc", "u32"),
("backward_size", "u32"),
("flags", "u16"),
("magic", "u16"),
];

// Parse the stream footer
if let Ok(xz_footer) = common::parse(xz_data, &xz_footer_structure, "little") {
// Calculate the start and end offsets of the CRC'd data
let crc_start = CRC_START_INDEX;
let crc_end = crc_start + CRC_DATA_SIZE;

// Validate the stream footer
if let Some(crc_data) = xz_data.get(crc_start..crc_end) {
if crc32(crc_data) == (xz_footer["footer_crc"] as u32) {
return Ok(FOOTER_SIZE);
}
}
}

Err(StructureError)
}

0 comments on commit 23b3aa5

Please sign in to comment.