Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

xsv: add transpose command #146

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/cmd/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ pub mod sort;
pub mod split;
pub mod stats;
pub mod table;
pub mod transpose;
98 changes: 98 additions & 0 deletions src/cmd/transpose.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
use config::{Config, Delimiter};
use csv::ByteRecord;
use std::str;
use util;
use CliResult;

static USAGE: &'static str = "
Transpose the rows/columns of CSV data.

Note that by default this reads all of the CSV data into memory,
unless --multipass is given.

Usage:
xsv transpose [options] [<input>]

transpose options:
-m, --multipass Process the transpose by making multiple
passes over the dataset. Useful for really
big datasets. Consumes memory relative to
the number of rows.
Note that in general it is faster to
process the transpose in memory.

Common options:
-h, --help Display this message
-o, --output <file> Write output to <file> instead of stdout.
-d, --delimiter <arg> The field delimiter for reading CSV data.
Must be a single character. (default: ,)
";

#[derive(Deserialize)]
struct Args {
arg_input: Option<String>,
flag_output: Option<String>,
flag_delimiter: Option<Delimiter>,
flag_multipass: bool,
}

pub fn run(argv: &[&str]) -> CliResult<()> {
let args: Args = util::get_args(USAGE, argv)?;

let input_is_stdin = match args.arg_input {
Some(ref s) if s == "-" => true,
None => true,
_ => false,
};

if args.flag_multipass && !input_is_stdin {
args.multipass_transpose()
} else {
args.in_memory_transpose()
}
}

impl Args {
fn in_memory_transpose(&self) -> CliResult<()> {
let mut rdr = self.rconfig().reader()?;
let mut wtr = self.wconfig().writer()?;
let nrows = rdr.byte_headers()?.len();

let all = rdr.byte_records().collect::<Result<Vec<_>, _>>()?;
for i in 0..nrows {
let mut record = ByteRecord::new();

for row in all.iter() {
record.push_field(&row[i]);
}
wtr.write_byte_record(&record)?;
}
Ok(wtr.flush()?)
}

fn multipass_transpose(&self) -> CliResult<()> {
let mut wtr = self.wconfig().writer()?;
let nrows = self.rconfig().reader()?.byte_headers()?.len();

for i in 0..nrows {
let mut rdr = self.rconfig().reader()?;

let mut record = ByteRecord::new();
for row in rdr.byte_records() {
record.push_field(&row?[i]);
}
wtr.write_byte_record(&record)?;
}
Ok(wtr.flush()?)
}

fn wconfig(&self) -> Config {
Config::new(&self.flag_output)
}

fn rconfig(&self) -> Config {
Config::new(&self.arg_input)
.delimiter(self.flag_delimiter)
.no_headers(true)
}
}
3 changes: 3 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ macro_rules! command_list {
split Split CSV data into many files
stats Compute basic statistics
table Align CSV data into columns
transpose Transpose rows/columns of CSV data
"
)
}
Expand Down Expand Up @@ -160,6 +161,7 @@ enum Command {
Split,
Stats,
Table,
Transpose,
}

impl Command {
Expand Down Expand Up @@ -189,6 +191,7 @@ impl Command {
Command::Split => cmd::split::run(argv),
Command::Stats => cmd::stats::run(argv),
Command::Table => cmd::table::run(argv),
Command::Transpose => cmd::transpose::run(argv),
}
}
}
Expand Down
44 changes: 44 additions & 0 deletions tests/test_transpose.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
use workdir::Workdir;

use {CsvData, qcheck};

fn prop_transpose(name: &str, rows: CsvData, streaming: bool) -> bool {
let wrk = Workdir::new(name);
wrk.create("in.csv", rows.clone());

let mut cmd = wrk.command("transpose");
cmd.arg("in.csv");
if streaming { cmd.arg("--multipass"); }

let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);

let mut expected = vec![];

let nrows = rows.len();
let ncols = if !rows.is_empty() {rows[0].len() } else {0};

for i in 0..ncols {
let mut expected_row = vec![];
for j in 0..nrows {
expected_row.push(rows[j][i].to_owned());
}
expected.push(expected_row);
}
rassert_eq!(got, expected)
}

#[test]
fn prop_transpose_in_memory() {
fn p(rows: CsvData) -> bool {
prop_transpose("prop_transpose_in_memory", rows, false)
}
qcheck(p as fn(CsvData) -> bool);
}

#[test]
fn prop_transpose_multipass() {
fn p(rows: CsvData) -> bool {
prop_transpose("prop_transpose_multipass", rows, true)
}
qcheck(p as fn(CsvData) -> bool);
}
1 change: 1 addition & 0 deletions tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ mod test_sort;
mod test_split;
mod test_stats;
mod test_table;
mod test_transpose;

fn qcheck<T: Testable>(p: T) {
QuickCheck::new().gen(StdGen::new(thread_rng(), 5)).quickcheck(p);
Expand Down