Skip to content

Commit

Permalink
uucore: unify 'os_string_as_result_bytes' implementations
Browse files Browse the repository at this point in the history
  • Loading branch information
BenWiederhake committed Jul 16, 2024
1 parent 68f8619 commit 3c949a7
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 48 deletions.
35 changes: 9 additions & 26 deletions src/uu/cksum/src/cksum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ use std::ffi::{OsStr, OsString};
use std::fs::File;
use std::io::{self, stdin, stdout, BufReader, Read, Write};
use std::iter;
#[cfg(unix)]
use std::os::unix::ffi::OsStrExt;
use std::path::Path;
use uucore::checksum::{
calculate_blake2b_length, detect_algo, digest_reader, perform_checksum_validation,
Expand All @@ -21,7 +19,7 @@ use uucore::checksum::{
use uucore::{
encoding,
error::{FromIo, UResult, USimpleError},
format_usage, help_about, help_section, help_usage, show,
format_usage, help_about, help_section, help_usage, os_string_as_bytes, show,
sum::{div_ceil, Digest},
};

Expand All @@ -46,28 +44,6 @@ struct Options {
asterisk: bool, // if we display an asterisk or not (--binary/--text)
}

// Helper function for processing delimiter values (which could be non UTF-8)
// It converts OsString to &[u8] for unix targets only
// On non-unix (i.e. Windows) it will just return an error if delimiter value is not UTF-8
// FIXME: This is stolen from cut.rs, and shouldn't be duplicated!
fn os_string_as_bytes(os_str: &OsStr) -> UResult<&[u8]> {
#[cfg(unix)]
let bytes = os_str.as_bytes();

#[cfg(not(unix))]
let bytes = os_str
.to_str()
.ok_or_else(|| {
uucore::error::UUsageError::new(
1,
"invalid UTF-8 was detected in one or more arguments",
)
})?
.as_bytes();

Ok(bytes)
}

/// Calculate checksum
///
/// # Arguments
Expand Down Expand Up @@ -210,7 +186,14 @@ where
if print_filename {
// The filename might not be valid UTF-8, and filename.display() would mangle the names.
// Therefore, emit the bytes directly to stdout, without any attempt at encoding them.
let _dropped_result = stdout().write_all(os_string_as_bytes(filename.as_os_str())?);
let _dropped_result = stdout().write_all(
os_string_as_bytes(&filename.as_os_str().to_os_string()).ok_or_else(|| {
uucore::error::UUsageError::new(

Check warning on line 191 in src/uu/cksum/src/cksum.rs

View check run for this annotation

Codecov / codecov/patch

src/uu/cksum/src/cksum.rs#L191

Added line #L191 was not covered by tests
1,
"invalid UTF-8 was detected in one or more arguments",
)
})?,

Check warning on line 195 in src/uu/cksum/src/cksum.rs

View check run for this annotation

Codecov / codecov/patch

src/uu/cksum/src/cksum.rs#L195

Added line #L195 was not covered by tests
);
}
println!("{}", after_filename);
}
Expand Down
32 changes: 10 additions & 22 deletions src/uu/cut/src/cut.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ use clap::{builder::ValueParser, crate_version, Arg, ArgAction, ArgMatches, Comm
use std::ffi::OsString;
use std::fs::File;
use std::io::{stdin, stdout, BufReader, BufWriter, IsTerminal, Read, Write};
#[cfg(unix)]
use std::os::unix::ffi::OsStrExt;
use std::path::Path;
use uucore::display::Quotable;
use uucore::error::{set_exit_code, FromIo, UResult, USimpleError};
Expand All @@ -20,7 +18,9 @@ use uucore::line_ending::LineEnding;
use self::searcher::Searcher;
use matcher::{ExactMatcher, Matcher, WhitespaceMatcher};
use uucore::ranges::Range;
use uucore::{format_usage, help_about, help_section, help_usage, show_error, show_if_err};
use uucore::{
format_usage, help_about, help_section, help_usage, os_string_as_bytes, show_error, show_if_err,
};

mod matcher;
mod searcher;
Expand Down Expand Up @@ -59,7 +59,7 @@ impl Default for Delimiter<'_> {

impl<'a> From<&'a OsString> for Delimiter<'a> {
fn from(s: &'a OsString) -> Self {
Self::Slice(os_string_as_bytes(s).unwrap())
Self::Slice(os_string_as_result_bytes(s).unwrap())
}
}

Expand Down Expand Up @@ -350,22 +350,10 @@ fn cut_files(mut filenames: Vec<String>, mode: &Mode) {
// Helper function for processing delimiter values (which could be non UTF-8)
// It converts OsString to &[u8] for unix targets only
// On non-unix (i.e. Windows) it will just return an error if delimiter value is not UTF-8
fn os_string_as_bytes(os_string: &OsString) -> UResult<&[u8]> {
#[cfg(unix)]
let bytes = os_string.as_bytes();

#[cfg(not(unix))]
let bytes = os_string
.to_str()
.ok_or_else(|| {
uucore::error::UUsageError::new(
1,
"invalid UTF-8 was detected in one or more arguments",
)
})?
.as_bytes();

Ok(bytes)
fn os_string_as_result_bytes(os_string: &OsString) -> UResult<&[u8]> {
os_string_as_bytes(os_string).ok_or_else(|| {
uucore::error::UUsageError::new(1, "invalid UTF-8 was detected in one or more arguments")
})

Check warning on line 356 in src/uu/cut/src/cut.rs

View check run for this annotation

Codecov / codecov/patch

src/uu/cut/src/cut.rs#L355-L356

Added lines #L355 - L356 were not covered by tests
}

// Get delimiter and output delimiter from `-d`/`--delimiter` and `--output-delimiter` options respectively
Expand Down Expand Up @@ -395,7 +383,7 @@ fn get_delimiters(
} else {
// For delimiter `-d` option value - allow both UTF-8 (possibly multi-byte) characters
// and Non UTF-8 (and not ASCII) single byte "characters", like `b"\xAD"` to align with GNU behavior
let bytes = os_string_as_bytes(os_string)?;
let bytes = os_string_as_result_bytes(os_string)?;
if os_string.to_str().is_some_and(|s| s.chars().count() > 1)
|| os_string.to_str().is_none() && bytes.len() > 1
{
Expand All @@ -422,7 +410,7 @@ fn get_delimiters(
if os_string.is_empty() || os_string == "''" {
b"\0"
} else {
os_string_as_bytes(os_string).unwrap()
os_string_as_result_bytes(os_string).unwrap()
}
});
Ok((delim, out_delim))
Expand Down
18 changes: 18 additions & 0 deletions src/uucore/src/lib/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ pub use crate::features::fsxattr;
//## core functions

use std::ffi::OsString;
#[cfg(unix)]
use std::os::unix::ffi::OsStrExt;
use std::sync::atomic::Ordering;

use once_cell::sync::Lazy;
Expand Down Expand Up @@ -128,6 +130,22 @@ macro_rules! bin {
};
}

/// Helper function for processing delimiter values (which could be non-UTF-8)
/// It converts OsString to &[u8] for unix targets only
/// On non-unix (i.e. Windows) it will just return an error if delimiter value is not UTF-8
#[cfg(unix)]
pub fn os_string_as_bytes(os_string: &OsString) -> Option<&[u8]> {
Some(os_string.as_bytes())
}

/// Helper function for processing delimiter values (which could be non-UTF-8)
/// It converts OsString to &[u8] for unix targets only
/// On non-unix (i.e. Windows) it will just return an error if delimiter value is not UTF-8
#[cfg(not(unix))]
pub fn os_string_as_bytes(os_string: &OsString) -> Option<&[u8]> {
os_string.to_str().map(|s| s.as_bytes())
}

/// Generate the usage string for clap.
///
/// This function does two things. It indents all but the first line to align
Expand Down

0 comments on commit 3c949a7

Please sign in to comment.