Skip to content

Commit 4a03357

Browse files
authored
feat: add diff command (#54)
1 parent c6bc6d6 commit 4a03357

File tree

12 files changed

+351
-4
lines changed

12 files changed

+351
-4
lines changed

Cargo.lock

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ uuid = { version = "1.18.1", features = ["v4"] }
4141
memchr = "2"
4242
globset = "0.4"
4343
regex = "1"
44+
similar = "2"
4445

4546
[dev-dependencies]
4647
assert_cmd = "2.0.17"

README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,12 @@ storify grep -i "pattern" path/to/file # case-insensitive
140140
storify grep -n "pattern" path/to/file # show line numbers
141141
storify grep -R "pattern" path/ # recursive
142142

143+
# Diff two files (unified diff)
144+
storify diff left/file right/file # unified diff with 3 lines context
145+
storify diff -U 1 left/file right/file # set context lines
146+
storify diff -w left/file right/file # ignore trailing whitespace
147+
storify diff --size-limit 1 -f left right # size guard and force
148+
143149
# Find objects by name/regex/type
144150
storify find path/ --name '**/*.log' # glob on full path
145151
storify find path/ --regex '.*\\.(csv|parquet)$' # regex on full path
@@ -186,6 +192,7 @@ storify stat path/to/file --raw # raw key=value format
186192
| `tree` | View directory structure as a tree | `-d <DEPTH>`, `--dirs-only` |
187193
| `du` | Show disk usage | `-s` (summary) |
188194
| `stat` | Show object metadata | `--json`, `--raw` |
195+
| `diff` | Compare two files (unified diff) | `-U <N>` (context), `-w` (ignore-space), `--size-limit <MB>`, `-f` (force) |
189196

190197
### Config Commands
191198

src/cli/entry.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ use super::{
77
context::CliContext,
88
prompts::Prompt,
99
storage::{
10-
self, CatArgs, CpArgs, DuArgs, GetArgs, GrepArgs, HeadArgs, LsArgs, MkdirArgs, MvArgs,
11-
PutArgs, RmArgs, StatArgs, TailArgs, TreeArgs,
10+
self, CatArgs, CpArgs, DiffArgs, DuArgs, GetArgs, GrepArgs, HeadArgs, LsArgs, MkdirArgs,
11+
MvArgs, PutArgs, RmArgs, StatArgs, TailArgs, TreeArgs,
1212
},
1313
};
1414

@@ -87,6 +87,8 @@ pub enum Command {
8787
Find(super::storage::FindArgs),
8888
/// View directory structure as a tree
8989
Tree(TreeArgs),
90+
/// Diff two files and print unified diff
91+
Diff(DiffArgs),
9092
}
9193

9294
#[derive(Subcommand, Debug, Clone)]

src/cli/storage.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,33 @@ pub struct TreeArgs {
250250
pub dirs_only: bool,
251251
}
252252

253+
#[derive(ClapArgs, Debug, Clone)]
254+
pub struct DiffArgs {
255+
/// The left file path
256+
#[arg(value_name = "LEFT", value_parser = parse_validated_path)]
257+
pub left: String,
258+
259+
/// The right file path
260+
#[arg(value_name = "RIGHT", value_parser = parse_validated_path)]
261+
pub right: String,
262+
263+
/// Number of context lines to show around changes
264+
#[arg(short = 'U', long = "context", default_value_t = 3)]
265+
pub context: usize,
266+
267+
/// Ignore whitespace differences
268+
#[arg(short = 'w', long = "ignore-space")]
269+
pub ignore_space: bool,
270+
271+
/// Limit total size of compared files in MB (0 disables)
272+
#[arg(short = 's', long = "size-limit", default_value_t = 10)]
273+
pub size_limit_mb: u64,
274+
275+
/// Bypass size-limit check
276+
#[arg(short = 'f', long)]
277+
pub force: bool,
278+
}
279+
253280
pub async fn execute(command: &Command, ctx: &CliContext) -> Result<()> {
254281
let config = ctx.storage_config()?;
255282
let client = StorageClient::new(config.clone()).await?;
@@ -379,6 +406,18 @@ pub async fn execute(command: &Command, ctx: &CliContext) -> Result<()> {
379406
.print_tree(&tree_args.path, tree_args.depth, tree_args.dirs_only)
380407
.await?;
381408
}
409+
Command::Diff(diff_args) => {
410+
client
411+
.diff_files(
412+
&diff_args.left,
413+
&diff_args.right,
414+
diff_args.context,
415+
diff_args.ignore_space,
416+
diff_args.size_limit_mb,
417+
diff_args.force,
418+
)
419+
.await?;
420+
}
382421
Command::Config(_) => {
383422
unreachable!("Config commands are handled separately")
384423
}

src/error.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,13 @@ pub enum Error {
9393
#[snafu(display("Failed to find under '{path}': {source}"))]
9494
FindFailed { path: String, source: Box<Error> },
9595

96+
#[snafu(display("Failed to diff '{src_path}' and '{dest_path}': {source}"))]
97+
DiffFailed {
98+
src_path: String,
99+
dest_path: String,
100+
source: Box<Error>,
101+
},
102+
96103
#[snafu(display("Invalid argument: {message}"))]
97104
InvalidArgument { message: String },
98105

src/storage.rs

Lines changed: 79 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ pub use self::utils::OutputFormat;
1111
use self::operations::cat::OpenDalFileReader;
1212
use self::operations::copy::OpenDalCopier;
1313
use self::operations::delete::OpenDalDeleter;
14+
use self::operations::diff::OpenDalDiffer;
1415
use self::operations::download::OpenDalDownloader;
1516
use self::operations::find::OpenDalFinder;
1617
use self::operations::grep::OpenDalGreper;
@@ -23,8 +24,8 @@ use self::operations::tree::OpenDalTreer;
2324
use self::operations::upload::OpenDalUploader;
2425
use self::operations::usage::OpenDalUsageCalculator;
2526
use self::operations::{
26-
Cater, Copier, Deleter, Downloader, Greper, Header, Lister, Mkdirer, Mover, Stater, Tailer,
27-
Treer, Uploader, UsageCalculator,
27+
Cater, Copier, Deleter, Differ, Downloader, Greper, Header, Lister, Mkdirer, Mover, Stater,
28+
Tailer, Treer, Uploader, UsageCalculator,
2829
};
2930
use crate::storage::utils::error::IntoStorifyError;
3031
use crate::wrap_err;
@@ -673,4 +674,80 @@ impl StorageClient {
673674
},
674675
})
675676
}
677+
678+
pub async fn diff_files(
679+
&self,
680+
left: &str,
681+
right: &str,
682+
context: usize,
683+
ignore_space: bool,
684+
size_limit_mb: u64,
685+
force: bool,
686+
) -> Result<()> {
687+
// Validate both paths are files
688+
let left_meta = self.operator.stat(left).await.map_err(|e| {
689+
if e.kind() == opendal::ErrorKind::NotFound {
690+
Error::PathNotFound {
691+
path: std::path::PathBuf::from(left),
692+
}
693+
} else {
694+
Error::InvalidArgument {
695+
message: format!("Failed to stat '{}': {}", left, e),
696+
}
697+
}
698+
})?;
699+
let right_meta = self.operator.stat(right).await.map_err(|e| {
700+
if e.kind() == opendal::ErrorKind::NotFound {
701+
Error::PathNotFound {
702+
path: std::path::PathBuf::from(right),
703+
}
704+
} else {
705+
Error::InvalidArgument {
706+
message: format!("Failed to stat '{}': {}", right, e),
707+
}
708+
}
709+
})?;
710+
711+
if !left_meta.mode().is_file() || !right_meta.mode().is_file() {
712+
return Err(Error::InvalidArgument {
713+
message: "diff only supports files; directories are not supported".to_string(),
714+
});
715+
}
716+
717+
// Short-circuit: identical paths (after existence/type validation)
718+
if left == right {
719+
return Ok(());
720+
}
721+
722+
// Short-circuit when ETag and size match (content-identical for many providers)
723+
if left_meta.content_length() == right_meta.content_length() {
724+
let le = left_meta.etag();
725+
let re = right_meta.etag();
726+
match (le, re) {
727+
(Some(le), Some(re)) if le == re => return Ok(()),
728+
_ => {}
729+
}
730+
}
731+
732+
// Size check (sum of both files)
733+
let total_mb =
734+
(left_meta.content_length() + right_meta.content_length()).div_ceil(1024 * 1024);
735+
if size_limit_mb > 0 && total_mb > size_limit_mb && !force {
736+
return Err(Error::InvalidArgument {
737+
message: format!(
738+
"Files too large ({}MB > {}MB). Use --force to override",
739+
total_mb, size_limit_mb
740+
),
741+
});
742+
}
743+
744+
let differ = OpenDalDiffer::new(self.operator.clone());
745+
wrap_err!(
746+
differ.diff(left, right, context, ignore_space).await,
747+
DiffFailed {
748+
src_path: left.to_string(),
749+
dest_path: right.to_string()
750+
}
751+
)
752+
}
676753
}

src/storage/operations/diff.rs

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
use crate::error::{Error, Result};
2+
use opendal::Operator;
3+
use similar::TextDiff;
4+
use std::path::PathBuf;
5+
6+
/// Trait for diffing two files and printing a unified diff
7+
pub trait Differ {
8+
/// Diff two files and print the unified diff to stdout
9+
async fn diff(&self, left: &str, right: &str, context: usize, ignore_space: bool)
10+
-> Result<()>;
11+
}
12+
13+
pub struct OpenDalDiffer {
14+
operator: Operator,
15+
}
16+
17+
impl OpenDalDiffer {
18+
pub fn new(operator: Operator) -> Self {
19+
Self { operator }
20+
}
21+
22+
async fn read_text(&self, path: &str, ignore_space: bool) -> Result<String> {
23+
let data = self.operator.read(path).await.map_err(|e| {
24+
if e.kind() == opendal::ErrorKind::NotFound {
25+
Error::PathNotFound {
26+
path: PathBuf::from(path),
27+
}
28+
} else {
29+
Error::DiffFailed {
30+
src_path: path.to_string(),
31+
dest_path: path.to_string(),
32+
source: Box::new(e.into()),
33+
}
34+
}
35+
})?;
36+
37+
let mut s = String::from_utf8(data.to_vec()).map_err(|_| Error::InvalidArgument {
38+
message: format!("Non-UTF8 or binary file not supported: {}", path),
39+
})?;
40+
41+
if ignore_space {
42+
// Trim trailing spaces/tabs for each line with minimal allocations.
43+
// Use an output String with reserved capacity.
44+
let mut out = String::with_capacity(s.len());
45+
let mut lines = s.split_inclusive('\n');
46+
for line in lines.by_ref() {
47+
if let Some(stripped) = line.strip_suffix('\n') {
48+
out.push_str(stripped.trim_end_matches([' ', '\t']));
49+
out.push('\n');
50+
} else {
51+
out.push_str(line.trim_end_matches([' ', '\t']));
52+
}
53+
}
54+
s = out;
55+
}
56+
Ok(s)
57+
}
58+
}
59+
60+
impl Differ for OpenDalDiffer {
61+
async fn diff(
62+
&self,
63+
left: &str,
64+
right: &str,
65+
context: usize,
66+
ignore_space: bool,
67+
) -> Result<()> {
68+
let left_text = self.read_text(left, ignore_space).await?;
69+
let right_text = self.read_text(right, ignore_space).await?;
70+
71+
// Produce unified diff via `similar`
72+
let diff = TextDiff::from_lines(&left_text, &right_text);
73+
let unified = diff
74+
.unified_diff()
75+
.context_radius(context)
76+
.header(left, right)
77+
.to_string();
78+
79+
// If no differences, print nothing
80+
if unified.trim().is_empty() {
81+
return Ok(());
82+
}
83+
84+
println!("{}", unified);
85+
Ok(())
86+
}
87+
}

src/storage/operations/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
pub mod cat;
33
pub mod copy;
44
pub mod delete;
5+
pub mod diff;
56
pub mod download;
67
pub mod find;
78
pub mod grep;
@@ -19,6 +20,7 @@ pub mod usage;
1920
pub use cat::Cater;
2021
pub use copy::Copier;
2122
pub use delete::Deleter;
23+
pub use diff::Differ;
2224
pub use download::Downloader;
2325
pub use grep::Greper;
2426
pub use head::Header;

tests/behavior/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ fn main() -> Result<()> {
2929
operations::usage::tests(&client, &mut tests);
3030
operations::stat::tests(&client, &mut tests);
3131
operations::tree::tests(&client, &mut tests);
32+
operations::diff::tests(&client, &mut tests);
3233

3334
let _ = tracing_subscriber::fmt()
3435
.pretty()

0 commit comments

Comments
 (0)