Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 77 additions & 14 deletions src/uu/date/src/date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
use jiff::{Timestamp, Zoned};
use std::borrow::Cow;
use std::collections::HashMap;
use std::ffi::OsString;
use std::fs::File;
use std::io::{BufRead, BufReader, BufWriter, Read, Write, stderr};
use std::path::PathBuf;
Expand Down Expand Up @@ -52,10 +53,15 @@
const OPT_UNIVERSAL: &str = "universal";
const OPT_UNIVERSAL_2: &str = "utc";

/// Character emitted by `String::from_utf8_lossy` for each ill-formed byte subsequence.
const UNICODE_REPLACEMENT: char = '\u{FFFD}';

Check failure on line 57 in src/uu/date/src/date.rs

View workflow job for this annotation

GitHub Actions / Style/spelling (ubuntu-latest, feat_os_unix)

ERROR: `cspell`: Unknown word 'FFFD' (file:'src/uu/date/src/date.rs', line:57)

/// Settings for this program, parsed from the command line
struct Settings {
utc: bool,
format: Format,
/// Raw format bytes for Custom format, to preserve non-UTF-8 bytes in output
format_raw: Option<Vec<u8>>,
date_source: DateSource,
set_to: Option<Zoned>,
debug: bool,
Expand Down Expand Up @@ -285,7 +291,7 @@
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let matches = uucore::clap_localization::handle_clap_result(uu_app(), args)?;

let date_source = if let Some(date_os) = matches.get_one::<std::ffi::OsString>(OPT_DATE) {
let date_source = if let Some(date_os) = matches.get_one::<OsString>(OPT_DATE) {
// Convert OsString to String, handling invalid UTF-8 with GNU-compatible error
let date = date_os.to_str().ok_or_else(|| {
let bytes = date_os.as_encoded_bytes();
Expand All @@ -307,34 +313,41 @@
};

// Check for extra operands (multiple positional arguments)
if let Some(formats) = matches.get_many::<String>(OPT_FORMAT) {
let format_args: Vec<&String> = formats.collect();
if let Some(formats) = matches.get_many::<OsString>(OPT_FORMAT) {
let format_args: Vec<&OsString> = formats.collect();
if format_args.len() > 1 {
return Err(USimpleError::new(
1,
translate!("date-error-extra-operand", "operand" => format_args[1]),
translate!("date-error-extra-operand", "operand" => format_args[1].to_string_lossy()),
));
}
}

let format = if let Some(form) = matches.get_one::<String>(OPT_FORMAT) {
if !form.starts_with('+') {
let mut format_raw: Option<Vec<u8>> = None;
let format = if let Some(form) = matches.get_one::<OsString>(OPT_FORMAT) {
let raw_bytes = form.as_encoded_bytes();
if raw_bytes.first() != Some(&b'+') {
let form_lossy = form.to_string_lossy();
// if an optional Format String was found but the user has not provided an input date
// GNU prints an invalid date Error
if !matches!(date_source, DateSource::Human(_)) {
return Err(USimpleError::new(
1,
translate!("date-error-invalid-date", "date" => form),
translate!("date-error-invalid-date", "date" => form_lossy),
));
}
// If the user did provide an input date with the --date flag and the Format String is
// not starting with '+' GNU prints the missing '+' error message
return Err(USimpleError::new(
1,
translate!("date-error-format-missing-plus", "arg" => form),
translate!("date-error-format-missing-plus", "arg" => form_lossy),
));
}
let form = form[1..].to_string();
let bytes_after_plus = &raw_bytes[1..];
if std::str::from_utf8(bytes_after_plus).is_err() {
format_raw = Some(bytes_after_plus.to_vec());
}
let form = String::from_utf8_lossy(bytes_after_plus).into_owned();
Format::Custom(form)
} else if let Some(fmt) = matches
.get_many::<String>(OPT_ISO_8601)
Expand Down Expand Up @@ -381,6 +394,7 @@
let settings = Settings {
utc,
format,
format_raw,
date_source,
set_to,
debug: debug_mode,
Expand Down Expand Up @@ -544,6 +558,26 @@
let format_string = make_format_string(&settings);
let mut stdout = BufWriter::new(std::io::stdout().lock());

// Pre-extract non-UTF-8 chunks from the raw format bytes (if any).
// from_utf8_lossy emits one U+FFFD per ill-formed subsequence (WTF-8 spec),
// so we can match them 1:1 when restoring original bytes in the output.
let raw_chunks: Option<Vec<&[u8]>> = settings.format_raw.as_ref().map(|raw| {
let mut chunks = Vec::new();
let mut i = 0;
while i < raw.len() {
match std::str::from_utf8(&raw[i..]) {
Ok(_) => break,
Err(e) => {
i += e.valid_up_to();
let len = e.error_len().unwrap_or(raw.len() - i);
chunks.push(&raw[i..i + len]);
i += len;
}
}
}
chunks
});

// Format all the dates
let config = Config::new().custom(PosixCustom::new()).lenient(true);
for date in dates {
Expand All @@ -562,9 +596,34 @@
&config,
skip_localization,
) {
Ok(s) => writeln!(stdout, "{s}").map_err(|e| {
USimpleError::new(1, translate!("date-error-write", "error" => e))
})?,
Ok(s) => {
if let Some(ref chunks) = raw_chunks {
// Restore non-UTF-8 bytes that were replaced with
// U+FFFD by the lossy conversion. strftime passes
// U+FFFD through unchanged. Each FFFD in the output

Check failure on line 603 in src/uu/date/src/date.rs

View workflow job for this annotation

GitHub Actions / Style/spelling (ubuntu-latest, feat_os_unix)

ERROR: `cspell`: Unknown word 'FFFD' (file:'src/uu/date/src/date.rs', line:603)
// corresponds to the next ill-formed byte subsequence
// from the original format string.
let mut chunk_iter = chunks.iter();
let mut out = Vec::with_capacity(s.len());
for ch in s.chars() {
if ch == UNICODE_REPLACEMENT {
if let Some(chunk) = chunk_iter.next() {
out.extend_from_slice(chunk);
}
} else {
let mut buf = [0u8; 4];
out.extend_from_slice(ch.encode_utf8(&mut buf).as_bytes());
}
}
out.push(b'\n');
stdout.write_all(&out)
} else {
writeln!(stdout, "{s}")
}
.map_err(|e| {
USimpleError::new(1, translate!("date-error-write", "error" => e))
})?;
}
Err(e) => {
let _ = stdout.flush();
return Err(USimpleError::new(
Expand Down Expand Up @@ -604,7 +663,7 @@
.value_name("STRING")
.allow_hyphen_values(true)
.overrides_with(OPT_DATE)
.value_parser(clap::value_parser!(std::ffi::OsString))
.value_parser(clap::value_parser!(OsString))
.help(translate!("date-help-date")),
)
.arg(
Expand Down Expand Up @@ -699,7 +758,11 @@
.help(translate!("date-help-universal"))
.action(ArgAction::SetTrue),
)
.arg(Arg::new(OPT_FORMAT).num_args(0..))
.arg(
Arg::new(OPT_FORMAT)
.num_args(0..)
.value_parser(clap::value_parser!(OsString)),
)
}

fn format_date_with_locale_aware_months(
Expand Down
4 changes: 2 additions & 2 deletions src/uu/date/src/locale.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,12 @@ cfg_langinfo! {
return None;
}

let format = CStr::from_ptr(d_t_fmt_ptr).to_str().ok()?;
let format = CStr::from_ptr(d_t_fmt_ptr).to_string_lossy();
if format.is_empty() {
return None;
}

Some(format.to_string())
Some(format.into_owned())
}
}

Expand Down
31 changes: 31 additions & 0 deletions tests/by-util/test_date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2161,6 +2161,37 @@ fn test_locale_day_names() {
}
}

/// Test that non-UTF-8 format bytes are preserved in output (not replaced
/// with U+FFFD), matching GNU behavior.
#[test]
#[cfg(unix)]
fn test_date_non_utf8_format_preserved() {
use std::ffi::OsStr;
use std::os::unix::ffi::OsStrExt;

// Simple case: \xFF should pass through as-is
let fmt_bytes: &[u8] = b"+\xff%m";
let result = new_ucmd!()
.arg("-d")
.arg("2025-10-11T13:00")
.arg(OsStr::from_bytes(fmt_bytes))
.succeeds();
let stdout_bytes = result.stdout();
assert_eq!(stdout_bytes[0], 0xFF);
assert_eq!(&stdout_bytes[1..3], b"10");

// GB18030-encoded "年" (0xC4EA) + "%m" + "月" (0xD4C2) + "%d" + "日" (0xC8D5)
let fmt_bytes: &[u8] = b"+\xc4\xea%m\xd4\xc2%d\xc8\xd5";
let result = new_ucmd!()
.arg("-d")
.arg("2025-10-11T13:00")
.arg(OsStr::from_bytes(fmt_bytes))
.succeeds();
let stdout_bytes = result.stdout();
// \xC4\xEA + "10" + \xD4\xC2 + "11" + \xC8\xD5 + "\n"
assert_eq!(stdout_bytes, b"\xc4\xea10\xd4\xc211\xc8\xd5\n");
}

#[test]
fn test_percent_percent_not_replaced() {
let cases = [
Expand Down
Loading