Skip to content

Commit 3bd3259

Browse files
authored
support named capture groups
1 parent 0dc779e commit 3bd3259

File tree

9 files changed

+147
-60
lines changed

9 files changed

+147
-60
lines changed

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ license = "MIT OR Apache-2.0"
88
name = "nomino"
99
readme = "README.md"
1010
repository = "https://github.com/yaa110/nomino"
11-
version = "1.5.2"
11+
version = "1.6.0"
1212

1313
[dependencies]
1414
anyhow = "1.0"

README.md

Lines changed: 29 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,37 @@ Options:
5656
-V, --version Print version
5757
-w, --overwrite Overwrites output files, otherwise, a '_' is prepended to filename
5858

59-
OUTPUT pattern accepts placeholders that have the format of '{I:P}' where 'I' is the index of captured group and 'P' is the padding of digits with `0`. Please refer to https://github.com/yaa110/nomino for more information.
59+
OUTPUT pattern accepts placeholders that have the format of '{G:P}' where 'G' is the captured group and 'P' is the padding of digits with `0`. Please refer to https://github.com/yaa110/nomino for more information.
6060
```
6161
62+
### Placeholders
63+
64+
1. Placeholders have the format of `{G:P}` where `G` is the captured group and `P` is the padding of digits with `0`. For example, `{2:3}` means the third captured group with a padding of 3, i.e. `1` is formatted as `001`.
65+
1. Indices start from `0`, and `{0}` means the filename.
66+
1. The capture group `G` could be dropped, i.e. `{}` or `{:3}`. In this case an auto incremental index is used which starts from `1`. For example, `{} {}` equals `{1} {2}`.
67+
1. `{` and `}` characters could be escaped using `\` character, i.e. `\\{` and `\\}` in cli.
68+
1. Padding is only used for positive numbers, e.g. the formatted result of `{:3}` for `1` is `001`, for `-1` is `-1` and for `a` is `a`.
69+
1. If `--sort` option is used, the first index `{0}` is the filename and the second index `{1}` or first occurrence of `{}` is the enumerator index.
70+
71+
### Capture Groups
72+
73+
The accepted syntax of regex pattern is [Rust Regex](https://docs.rs/regex/latest/regex/).
74+
75+
Consider this example:
76+
77+
```regex
78+
(?<first>\w)(\w)\w(?<last>\w)
79+
```
80+
81+
This regular expression defines 4 capture groups:
82+
83+
- The group at index `0` corresponds to the overall match. It is always present in every match and never has a name: `{0}`.
84+
- The group at index `1` with name `first` corresponding to the first letter: `{1}`, `{first}` or the first occurrence of `{}`.
85+
- The group at index `2` with no name corresponding to the second letter: `{2}` or the second occurrence of `{}`.
86+
- The group at index `3` with name `last` corresponding to the fourth and last letter: `{3}`, `{last}` or the third occurrence of `{}`.
87+
88+
`?<first>` and `?<last>` are named capture groups.
89+
6290
### Windows
6391
6492
On Windows, `\\` must be used to separate path components in file paths because `\` is a special character in regular expressions.
@@ -73,23 +101,6 @@ On Windows, `\\` must be used to separate path components in file paths because
73101
}
74102
```
75103
76-
## Output
77-
78-
The output is necessary when using `--sort` or `--regex` options.
79-
80-
### Regex
81-
82-
The accepted syntax of regex pattern is [Rust Regex](https://docs.rs/regex/latest/regex/).
83-
84-
### Placeholders
85-
86-
1. Placeholders have the format of `{I:P}` where `I` is the index of captured group and `P` is the padding of digits with `0`. For example, `{2:3}` means the third captured group with a padding of 3, i.e. `1` is formatted as `001`.
87-
1. Indices start from `0`, and `{0}` means the filename.
88-
1. The index `I` could be dropped, i.e. `{}` or `{:3}`. In this case an auto incremental index is used which starts from `1`. For example, `{} {}` equals `{1} {2}`.
89-
1. `{` and `}` characters could be escaped using `\` character, i.e. `\\{` and `\\}` in cli.
90-
1. Padding is only used for positive numbers, e.g. the formatted result of `{:3}` for `1` is `001`, for `-1` is `-1` and for `a` is `a`.
91-
1. If `--sort` option is used, the first index `{0}` is the filename and the second index `{1}` or first occurrence of `{}` is the enumerator index.
92-
93104
## Wiki
94105
95106
- **[Examples](https://github.com/yaa110/nomino/wiki/Examples)** learn nomino by examples

src/cli.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ use std::path::PathBuf;
66
about,
77
author,
88
version,
9-
after_help = "OUTPUT pattern accepts placeholders that have the format of '{I:P}' where 'I' \
10-
is the index of captured group and 'P' is the padding of digits with `0`. Please refer to \
9+
after_help = "OUTPUT pattern accepts placeholders that have the format of '{G:P}' where 'G' \
10+
is the captured group and 'P' is the padding of digits with `0`. Please refer to \
1111
https://github.com/yaa110/nomino for more information.",
1212
next_display_order = None,
1313
)]

src/input/formatter.rs

Lines changed: 24 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
1+
use super::{provider::Capture, Provider};
12
use crate::errors::FormatError;
23

34
#[derive(Debug, PartialEq)]
45
enum Segment {
56
PlaceHolder {
67
padding: Option<usize>,
7-
index: usize,
8+
capture: Capture,
89
},
910
String(String),
1011
}
@@ -16,12 +17,12 @@ impl Formatter {
1617
pub fn new(format: &str) -> Result<Self, FormatError> {
1718
let mut segments = Vec::new();
1819
let mut should_escape = false;
19-
let mut is_parsing_index = false;
20+
let mut is_parsing_capture = false;
2021
let mut is_parsing_padding = false;
2122
let mut current_segment = String::new();
22-
let mut current_index: usize = 0;
23+
let mut current_capture = Capture::Index(0);
2324
let mut current_padding: Option<usize> = None;
24-
let mut incremental_index = 1;
25+
let mut incremental_index: usize = 1;
2526
for (i, ch) in format.chars().enumerate() {
2627
if !should_escape && ch == '\\' {
2728
should_escape = true;
@@ -31,29 +32,26 @@ impl Formatter {
3132
return Err(FormatError::InvalidEscapeCharacter(i, ch));
3233
}
3334
match ch {
34-
'{' if !should_escape && !is_parsing_index && !is_parsing_padding => {
35+
'{' if !should_escape && !is_parsing_capture && !is_parsing_padding => {
3536
if !current_segment.is_empty() {
3637
segments.push(Segment::String(current_segment));
3738
current_segment = String::new();
3839
}
39-
is_parsing_index = true;
40+
is_parsing_capture = true;
4041
}
4142
'}' if !should_escape => {
42-
if !is_parsing_index && !is_parsing_padding {
43+
if !is_parsing_capture && !is_parsing_padding {
4344
return Err(FormatError::UnopenedPlaceholder);
4445
}
4546
if current_segment.is_empty() {
46-
if is_parsing_index {
47-
current_index = incremental_index;
47+
if is_parsing_capture {
48+
current_capture = Capture::Index(incremental_index);
4849
incremental_index += 1;
4950
} else if is_parsing_padding {
5051
current_padding = None;
5152
}
52-
} else if is_parsing_index {
53-
current_index = current_segment
54-
.as_str()
55-
.parse()
56-
.map_err(|_| FormatError::InvalidIndex(current_segment.clone()))?;
53+
} else if is_parsing_capture {
54+
current_capture = current_segment.as_str().into();
5755
current_padding = None;
5856
} else if is_parsing_padding {
5957
current_padding =
@@ -63,25 +61,22 @@ impl Formatter {
6361
}
6462
segments.push(Segment::PlaceHolder {
6563
padding: current_padding,
66-
index: current_index,
64+
capture: current_capture,
6765
});
6866
current_segment.clear();
6967
current_padding = None;
70-
current_index = 0;
71-
is_parsing_index = false;
68+
current_capture = Capture::Index(0);
69+
is_parsing_capture = false;
7270
is_parsing_padding = false;
7371
}
74-
':' if is_parsing_index => {
75-
is_parsing_index = false;
72+
':' if is_parsing_capture => {
73+
is_parsing_capture = false;
7674
is_parsing_padding = true;
7775
if current_segment.is_empty() {
78-
current_index = incremental_index;
76+
current_capture = Capture::Index(incremental_index);
7977
incremental_index += 1;
8078
} else {
81-
current_index = current_segment
82-
.as_str()
83-
.parse()
84-
.map_err(|_| FormatError::InvalidIndex(current_segment.clone()))?;
79+
current_capture = current_segment.as_str().into();
8580
current_segment.clear();
8681
}
8782
}
@@ -91,7 +86,7 @@ impl Formatter {
9186
}
9287
}
9388
}
94-
if is_parsing_index || is_parsing_padding {
89+
if is_parsing_capture || is_parsing_padding {
9590
return Err(FormatError::UnclosedPlaceholder);
9691
}
9792
if !current_segment.is_empty() {
@@ -100,12 +95,12 @@ impl Formatter {
10095
Ok(Self(segments))
10196
}
10297

103-
pub fn format(&self, vars: &[&str]) -> String {
98+
pub fn format(&self, provider: impl Provider) -> String {
10499
let mut formatted = String::new();
105100
for segment in self.0.as_slice() {
106101
match segment {
107-
Segment::PlaceHolder { padding, index } => {
108-
let Some(var) = vars.get(*index) else {
102+
Segment::PlaceHolder { padding, capture } => {
103+
let Some(var) = provider.provide(capture) else {
109104
continue;
110105
};
111106
if let Some((padding, digits)) =
@@ -187,7 +182,7 @@ mod tests {
187182
while let Some((format, vars, expected)) = format_vars_expected.pop() {
188183
let output = Formatter::new(format)
189184
.expect(format!("unable to parse format '{}'", format).as_str());
190-
let actual = output.format(vars.as_slice());
185+
let actual = output.format(vars);
191186
assert_eq!(actual, expected);
192187
}
193188
}
@@ -200,8 +195,6 @@ mod tests {
200195
("2:5}", FormatError::UnopenedPlaceholder),
201196
(r"\{2:5}", FormatError::UnopenedPlaceholder),
202197
(r"{2:5\}", FormatError::UnclosedPlaceholder),
203-
("{{2:5}}", FormatError::InvalidIndex("{2".to_string())),
204-
("{a}", FormatError::InvalidIndex("a".to_string())),
205198
("{2:5a}", FormatError::InvalidPadding("5a".to_string())),
206199
("init {2:5", FormatError::UnclosedPlaceholder),
207200
("init {2:5 end", FormatError::UnclosedPlaceholder),

src/input/iterator.rs

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ impl InputIterator {
4747
});
4848
for (i, input) in inputs.into_iter().enumerate() {
4949
let index = (i + 1).to_string();
50-
let mut output = formatter.format(vec![input.as_str(), index.as_str()].as_slice());
50+
let mut output = formatter.format(vec![input.as_str(), index.as_str()]);
5151
if preserve_extension {
5252
if let Some(extension) = Path::new(input.as_str()).extension() {
5353
output.push('.');
@@ -94,14 +94,10 @@ impl Iterator for InputIterator {
9494
};
9595
let path = entry.path();
9696
let input = path.strip_prefix("./").unwrap_or(path).to_string_lossy();
97-
let Some(cap) = re.captures(input.as_ref()) else {
97+
let Some(captures) = re.captures(input.as_ref()) else {
9898
continue;
9999
};
100-
let vars: Vec<&str> = cap
101-
.iter()
102-
.map(|c| c.map(|c| c.as_str()).unwrap_or_default())
103-
.collect();
104-
let mut output = formatter.format(vars.as_slice());
100+
let mut output = formatter.format(captures);
105101
if *preserve_extension {
106102
if let Some(extension) = Path::new(input.as_ref()).extension() {
107103
output.push('.');

src/input/provider.rs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
use regex::Captures;
2+
3+
#[derive(Debug, PartialEq)]
4+
pub enum Capture {
5+
Index(usize),
6+
Name(String),
7+
}
8+
9+
pub trait Provider {
10+
fn provide(&self, cap: &Capture) -> Option<&str>;
11+
}
12+
13+
impl Provider for Captures<'_> {
14+
fn provide(&self, cap: &Capture) -> Option<&str> {
15+
match cap {
16+
Capture::Index(index) => self.get(*index),
17+
Capture::Name(name) => self.name(name.as_str()),
18+
}
19+
.map(|m| m.as_str())
20+
}
21+
}
22+
23+
impl Provider for Vec<&'_ str> {
24+
fn provide(&self, cap: &Capture) -> Option<&str> {
25+
match cap {
26+
Capture::Index(index) => self.get(*index).copied(),
27+
_ => None,
28+
}
29+
}
30+
}
31+
32+
impl From<&str> for Capture {
33+
fn from(value: &str) -> Self {
34+
if let Ok(index) = value.parse() {
35+
Capture::Index(index)
36+
} else {
37+
Capture::Name(value.into())
38+
}
39+
}
40+
}

src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@ pub mod cli;
33
pub mod input {
44
mod formatter;
55
mod iterator;
6+
mod provider;
67
mod separator;
78
mod source;
89
pub use self::formatter::*;
910
pub use self::iterator::*;
11+
pub use self::provider::*;
1012
pub use self::separator::*;
1113
pub use self::source::*;
1214
}

tests/regex_test.rs

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,51 @@ fn test_regex() {
4949
dir.close().unwrap();
5050
}
5151

52+
#[test]
53+
fn test_named_regex() {
54+
let dir = tempfile::tempdir().unwrap();
55+
56+
let inputs = vec![
57+
"Nomino (2020) S1.E1.1080p.mkv",
58+
"Nomino (2020) S1.E2.1080p.mkv",
59+
"Nomino (2020) S1.E3.1080p.mkv",
60+
"Nomino (2020) S1.E4.1080p.mkv",
61+
"Nomino (2020) S1.E5.1080p.mkv",
62+
];
63+
64+
let mut outputs = vec!["01.mkv", "02.mkv", "03.mkv", "04.mkv", "05.mkv"];
65+
66+
for input in inputs {
67+
let _ = File::create(dir.path().join(input)).unwrap();
68+
}
69+
70+
let cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))
71+
.unwrap()
72+
.args(&[
73+
"-E",
74+
"-d",
75+
dir.path().to_str().unwrap(),
76+
"-r",
77+
r".*E(?<episode>\d+).*",
78+
"{episode:2}.mkv",
79+
])
80+
.unwrap();
81+
82+
let mut files: Vec<String> = read_dir(dir.path())
83+
.unwrap()
84+
.map(|entry| entry.unwrap().file_name().to_str().unwrap().to_string())
85+
.collect();
86+
87+
files.sort();
88+
outputs.sort();
89+
90+
assert!(cmd.status.success());
91+
assert_eq!(files.len(), outputs.len());
92+
assert!(outputs.iter().zip(files.iter()).all(|(a, b)| a == b));
93+
94+
dir.close().unwrap();
95+
}
96+
5297
#[test]
5398
fn test_regex_not_overwrite() {
5499
let dir = tempfile::tempdir().unwrap();

0 commit comments

Comments
 (0)