diff --git a/Cargo.lock b/Cargo.lock index 8ece5003..91e7815e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -141,18 +141,6 @@ version = "9.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "59051ec02907378a67b0ba1b8631121f5388c8dbbb3cec8c749d8f93c2c3c211" -[[package]] -name = "ast-grep-core" -version = "0.32.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d203a50bd471340befbf7d0dee18d66699cc11513aafa1fea06b926e74130818" -dependencies = [ - "bit-set", - "regex", - "thiserror 2.0.17", - "tree-sitter-facade-sg", -] - [[package]] name = "autocfg" version = "1.5.0" @@ -220,21 +208,6 @@ dependencies = [ "syn 2.0.106", ] -[[package]] -name = "bit-set" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" -dependencies = [ - "bit-vec", -] - -[[package]] -name = "bit-vec" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" - [[package]] name = "bitflags" version = "1.3.2" @@ -257,11 +230,36 @@ dependencies = [ ] [[package]] -name = "brush-parser" -version = "0.2.20" +name = "bon" +version = "3.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6924d0efd702b4fe3d65f3a63c03520edfe53b67d0955b9d3f8e66b192a410ff" +checksum = "ebeb9aaf9329dff6ceb65c689ca3db33dbf15f324909c60e4e5eef5701ce31b1" +dependencies = [ + "bon-macros", + "rustversion", +] + +[[package]] +name = "bon-macros" +version = "3.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77e9d642a7e3a318e37c2c9427b5a6a48aa1ad55dcd986f3034ab2239045a645" +dependencies = [ + "darling 0.21.3", + "ident_case", + "prettyplease", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.106", +] + +[[package]] +name = "brush-parser" +version = "0.3.0" +source = "git+https://github.com/reubeno/brush?rev=dcb760933b10ee0433d7b740a5709b06f5c67c6b#dcb760933b10ee0433d7b740a5709b06f5c67c6b" dependencies = [ + "bon", "cached", "indenter", "peg", @@ -336,7 +334,7 @@ version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9225bdcf4e4a9a4c08bf16607908eb2fbf746828d5e0b5e019726dbf6571f201" dependencies = [ - "darling", + "darling 0.20.11", "proc-macro2", "quote", "syn 2.0.106", @@ -714,8 +712,18 @@ version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" dependencies = [ - "darling_core", - "darling_macro", + "darling_core 0.20.11", + "darling_macro 0.20.11", +] + +[[package]] +name = "darling" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" +dependencies = [ + "darling_core 0.21.3", + "darling_macro 0.21.3", ] [[package]] @@ -732,13 +740,38 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "darling_core" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.106", +] + [[package]] name = "darling_macro" version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ - "darling_core", + "darling_core 0.20.11", + "quote", + "syn 2.0.106", +] + +[[package]] +name = "darling_macro" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" +dependencies = [ + "darling_core 0.21.3", "quote", "syn 2.0.106", ] @@ -1411,7 +1444,7 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "435d80800b936787d62688c927b6490e887c7ef5ff9ce922c6c6050fca75eb9a" dependencies = [ - "darling", + "darling 0.20.11", "indoc", "proc-macro2", "quote", @@ -2628,12 +2661,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" -[[package]] -name = "streaming-iterator" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" - [[package]] name = "strsim" version = "0.11.1" @@ -2887,9 +2914,9 @@ checksum = "d163a63c116ce562a22cda521fcc4d79152e7aba014456fb5eb442f6d6a10109" [[package]] name = "tracing" -version = "0.1.41" +version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +checksum = "2d15d90a0b5c19378952d479dc858407149d7bb45a14de0142f6c534b16fc647" dependencies = [ "pin-project-lite", "tracing-attributes", @@ -2898,9 +2925,9 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.30" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", @@ -2909,9 +2936,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.34" +version = "0.1.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +checksum = "7a04e24fab5c89c6a36eb8558c9656f30d81de51dfa4d3b45f26b21d61fa0a6c" dependencies = [ "once_cell", "valuable", @@ -2957,49 +2984,6 @@ dependencies = [ "tracing-log", ] -[[package]] -name = "tree-sitter" -version = "0.24.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5387dffa7ffc7d2dae12b50c6f7aab8ff79d6210147c6613561fc3d474c6f75" -dependencies = [ - "cc", - "regex", - "regex-syntax", - "streaming-iterator", - "tree-sitter-language", -] - -[[package]] -name = "tree-sitter-bash" -version = "0.23.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "329a4d48623ac337d42b1df84e81a1c9dbb2946907c102ca72db158c1964a52e" -dependencies = [ - "cc", - "tree-sitter-language", -] - -[[package]] -name = "tree-sitter-facade-sg" -version = "0.24.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9195ab85ddd7df7ddac5b2e397ec6264816ae640346013002ceccf0f9b3578f1" -dependencies = [ - "js-sys", - "tree-sitter", - "tree-sitter-language", - "wasm-bindgen", - "web-sys", - "web-tree-sitter-sg", -] - -[[package]] -name = "tree-sitter-language" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4013970217383f67b18aef68f6fb2e8d409bc5755227092d32efb0422ba24b8" - [[package]] name = "tui-term" version = "0.2.0" @@ -3074,9 +3058,9 @@ checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae" [[package]] name = "utf8-chars" -version = "3.0.5" +version = "3.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f598f797138b219a4560b4e9c53c255e872e267c9e3fdcc75aa59a2a90953bcd" +checksum = "ebe49e006d6df172d7f14794568a90fe41e05a1fa9e03dc276fa6da4bb747ec3" dependencies = [ "arrayvec", ] @@ -3157,9 +3141,12 @@ dependencies = [ name = "vite_shell" version = "0.0.0" dependencies = [ - "ast-grep-core", - "thiserror 2.0.17", - "tree-sitter-bash", + "bincode", + "brush-parser", + "diff-struct", + "serde", + "shell-escape", + "vite_str", ] [[package]] @@ -3178,7 +3165,6 @@ version = "0.0.0" dependencies = [ "anyhow", "bincode", - "brush-parser", "bstr", "compact_str 0.9.0", "dashmap", @@ -3205,6 +3191,7 @@ dependencies = [ "uuid", "vite_glob", "vite_path", + "vite_shell", "vite_str", "vite_workspace", "wax", @@ -3372,19 +3359,6 @@ dependencies = [ "wasm-bindgen-shared", ] -[[package]] -name = "wasm-bindgen-futures" -version = "0.4.54" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e038d41e478cc73bae0ff9b36c60cff1c98b8f38f8d7e8061e79ee63608ac5c" -dependencies = [ - "cfg-if", - "js-sys", - "once_cell", - "wasm-bindgen", - "web-sys", -] - [[package]] name = "wasm-bindgen-macro" version = "0.2.104" @@ -3432,16 +3406,6 @@ dependencies = [ "walkdir", ] -[[package]] -name = "web-sys" -version = "0.3.81" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9367c417a924a74cae129e6a2ae3b47fabb1f8995595ab474029da749a8be120" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - [[package]] name = "web-time" version = "1.1.0" @@ -3452,17 +3416,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "web-tree-sitter-sg" -version = "0.24.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69cf7d34b16550f076d75b4a5d4673f1a9692f79787d040e3ac7ddb04e5c48a0" -dependencies = [ - "js-sys", - "wasm-bindgen", - "wasm-bindgen-futures", -] - [[package]] name = "which" version = "8.0.0" diff --git a/Cargo.toml b/Cargo.toml index 824adf82..824c2988 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,12 +36,13 @@ allocator-api2 = { version = "0.2.21", default-features = false, features = ["al anyhow = "1.0.98" assert2 = "0.3.16" assertables = "9.8.1" -ast-grep-core = "0.32.2" base64 = "0.22.1" bincode = "2.0.1" bindgen = "0.72.1" bitflags = "2.10.0" -brush-parser = "0.2.18" +# The newest released version (0.3.0) of brush-parser has a bug that reports incorrect locations for some ast nodes. +# The latest commit has fixed it. TODO: switch back to released version when a new version is published. +brush-parser = { git = "https://github.com/reubeno/brush", rev = "dcb760933b10ee0433d7b740a5709b06f5c67c6b" } bstr = { version = "1.12.0", default-features = false, features = ["alloc", "std"] } bumpalo = { version = "3.17.0", features = ["allocator-api2"] } bytemuck = { version = "1.23.0", features = ["extern_crate_alloc", "must_cast"] } @@ -109,16 +110,16 @@ thiserror = "2" tokio = "1.48.0" tokio-util = "0.7.17" toml = "0.9.5" -tracing = "0.1.41" +tracing = "0.1.43" tracing-error = "0.2.1" tracing-subscriber = { version = "0.3.19", features = ["env-filter", "serde"] } -tree-sitter-bash = "0.23.1" tui-term = "0.2.0" twox-hash = "2.1.1" uuid = "1.18.1" vec1 = "1.12.1" vite_glob = { path = "crates/vite_glob" } vite_path = { path = "crates/vite_path" } +vite_shell = { path = "crates/vite_shell" } vite_str = { path = "crates/vite_str" } vite_workspace = { path = "crates/vite_workspace" } wax = "0.6.0" diff --git a/crates/vite_shell/Cargo.toml b/crates/vite_shell/Cargo.toml index d3af5dc7..aab0afce 100644 --- a/crates/vite_shell/Cargo.toml +++ b/crates/vite_shell/Cargo.toml @@ -8,9 +8,12 @@ publish = false rust-version.workspace = true [dependencies] -ast-grep-core = { workspace = true } -thiserror = { workspace = true } -tree-sitter-bash = { workspace = true } +bincode = { workspace = true } +brush-parser = { workspace = true } +diff-struct = { workspace = true } +serde = { workspace = true, features = ["derive"] } +shell-escape = { workspace = true } +vite_str = { workspace = true } [lints] workspace = true diff --git a/crates/vite_shell/src/lib.rs b/crates/vite_shell/src/lib.rs index 505863db..15c6b93a 100644 --- a/crates/vite_shell/src/lib.rs +++ b/crates/vite_shell/src/lib.rs @@ -1,189 +1,125 @@ -//! Shell script parsing utilities using ast-grep for syntax analysis. -//! -//! This crate provides functionality to parse and split bash scripts by top-level operators. - -use ast_grep_core::{AstGrep, Doc, Language}; -use thiserror::Error; - -/// Errors that can occur during shell script parsing. -#[derive(Debug, Error)] -pub enum ShellParseError { - /// The shell script has invalid syntax. - #[error("Invalid shell syntax: {0}")] - InvalidSyntax(String), - - /// An error occurred during parsing. - #[error("Parse error: {0}")] - ParseError(String), +use std::{collections::BTreeMap, fmt::Display, ops::Range}; + +use bincode::{Decode, Encode}; +use brush_parser::{ + Parser, ParserOptions, + ast::{ + AndOr, Assignment, AssignmentName, AssignmentValue, Command, CommandPrefix, + CommandPrefixOrSuffixItem, CommandSuffix, CompoundListItem, Pipeline, Program, + SeparatorOperator, SimpleCommand, SourceLocation, Word, + }, + unquote_str, +}; +use diff::Diff; +use serde::{Deserialize, Serialize}; +use vite_str::Str; + +/// "FOO=BAR program arg1 arg2" +#[derive(Encode, Decode, Serialize, Deserialize, Debug, PartialEq, Eq, Diff, Clone)] +#[diff(attr(#[derive(Debug)]))] +pub struct TaskParsedCommand { + pub envs: BTreeMap, + pub program: Str, + pub args: Vec, } -/// Bash language implementation for ast-grep. -#[derive(Clone)] -struct BashLanguage; +impl Display for TaskParsedCommand { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // BTreeMap ensures stable iteration order + for (name, value) in &self.envs { + Display::fmt( + &format_args!("{}={} ", name, shell_escape::escape(value.as_str().into())), + f, + )?; + } + Display::fmt(&shell_escape::escape(self.program.as_str().into()), f)?; + for arg in &self.args { + Display::fmt(" ", f)?; + Display::fmt(&shell_escape::escape(arg.as_str().into()), f)?; + } -impl Language for BashLanguage { - fn get_ts_language(&self) -> ast_grep_core::language::TSLanguage { - tree_sitter_bash::LANGUAGE.into() + Ok(()) } } -/// Splits a bash script string into multiple command strings by top-level `&&` operators. -/// -/// This function parses the bash script and identifies command lists separated by `&&` at the -/// top level (not nested within subshells, functions, or other constructs). -/// -/// # Arguments -/// -/// * `script` - The bash script string to split -/// -/// # Returns -/// -/// A `Result` containing a vector of command strings, or a `ShellParseError` if parsing fails. -/// -/// # Examples -/// -/// ``` -/// use vite_shell::split_by_and; -/// -/// let script = "npm run build && npm test"; -/// let commands = split_by_and(script).unwrap(); -/// assert_eq!(commands, vec!["npm run build", "npm test"]); -/// ``` -/// -/// ``` -/// use vite_shell::split_by_and; -/// -/// let script = "echo 'hello' && echo 'world' && echo 'rust'"; -/// let commands = split_by_and(script).unwrap(); -/// assert_eq!(commands, vec!["echo 'hello'", "echo 'world'", "echo 'rust'"]); -/// ``` -pub fn split_by_and(script: &str) -> Result, ShellParseError> { - let grep = AstGrep::new(script, BashLanguage); - let root = grep.root(); - - // Split by top-level && operators - let commands = split_list_by_operator(&root, "&&", script); - - if commands.is_empty() { - // If no && operators found, return the entire script as a single command - Ok(vec![script.trim().to_string()]) - } else { - Ok(commands) - } +fn unquote(word: &Word) -> String { + let Word { value, loc: _ } = word; + unquote_str(value.as_str()) } -/// Splits a node by a specific operator at the top level only. -/// -/// This function walks the AST and splits only at the specified operator, -/// but handles nested lists that ALSO have the same operator (continuing the chain). -fn split_list_by_operator( - node: &ast_grep_core::Node, - operator: &str, - script: &str, -) -> Vec { - let kind = node.kind(); - - // Only process "list" nodes which contain operator sequences - if kind.as_ref() != "list" { - // For program nodes, check children - if kind.as_ref() == "program" { - for child in node.children() { - let results = split_list_by_operator(&child, operator, script); - if !results.is_empty() { - return results; - } - } +fn pipeline_to_command(pipeline: &Pipeline) -> Option<(TaskParsedCommand, Range)> { + let location = pipeline.location()?; + let range = location.start.index..location.end.index; + + let Pipeline { timed: None, bang: false, seq } = pipeline else { + return None; + }; + let [Command::Simple(simple_command)] = seq.as_slice() else { + return None; + }; + let SimpleCommand { prefix, word_or_name: Some(program), suffix } = simple_command else { + return None; + }; + let mut envs = BTreeMap::::new(); + if let Some(prefix) = prefix { + let CommandPrefix(items) = prefix; + for item in items { + let CommandPrefixOrSuffixItem::AssignmentWord( + Assignment { name, value, append: false, loc: _ }, + _, + ) = item + else { + return None; + }; + let AssignmentName::VariableName(name) = name else { + return None; + }; + let AssignmentValue::Scalar(value) = value else { + return None; + }; + envs.insert(name.as_str().into(), unquote(value).into()); } - return Vec::new(); - } - - // We have a list node - check if it contains our target operator AT THIS LEVEL - let children: Vec<_> = node.children().collect(); - let has_target_operator = children.iter().any(|c| c.kind().as_ref() == operator); - - if !has_target_operator { - // No target operator at this level - return Vec::new(); } - - // Found target operator at this level - split by it - // If we encounter a nested list, check if it's ONLY our operator (continue chain) - // or if it has OTHER operators (treat as atomic) - let mut commands = Vec::new(); - let mut current_start: Option = None; - let mut current_end: Option = None; - - for child in &children { - let child_kind = child.kind(); - - if child_kind.as_ref() == operator { - // Hit the operator - save current command if we have one - if let (Some(start), Some(end)) = (current_start, current_end) { - commands.push(script[start..end].trim().to_string()); - } - // Reset for next command - current_start = None; - current_end = None; - } else if child_kind.as_ref() == "list" { - // Nested list - check what operators it contains - let nested_children: Vec<_> = child.children().collect(); - let has_our_operator = nested_children.iter().any(|c| c.kind().as_ref() == operator); - let has_other_operator = nested_children.iter().any(|c| { - let k = c.kind(); - k.as_ref() == "||" || k.as_ref() == ";" || k.as_ref() == "|" || k.as_ref() == "&" - }); - - if has_our_operator && !has_other_operator { - // This nested list ONLY has our operator - it's a continuation of the chain - // Recursively process it and merge - let nested_results = split_list_by_operator(child, operator, script); - if !nested_results.is_empty() { - if let (Some(start), Some(_)) = (current_start, current_end) { - // Merge first result with accumulated parts - let prefix = script[start..child.range().start].trim(); - if !prefix.is_empty() { - commands.push(format!("{} && {}", prefix, nested_results[0])); - commands.extend(nested_results.into_iter().skip(1)); - } else { - commands.extend(nested_results); - } - current_start = None; - current_end = None; - } else { - commands.extend(nested_results); - } - } else { - // Shouldn't happen, but treat as atomic - let range = child.range(); - if current_start.is_none() { - current_start = Some(range.start); - } - current_end = Some(range.end); - } - } else { - // Nested list has other operators or no our operator - treat as atomic - let range = child.range(); - if current_start.is_none() { - current_start = Some(range.start); - } - current_end = Some(range.end); - } - } else { - // Part of a command - let range = child.range(); - if current_start.is_none() { - current_start = Some(range.start); - } - current_end = Some(range.end); + let mut args = Vec::::new(); + if let Some(CommandSuffix(suffix_items)) = suffix { + for suffix_item in suffix_items { + let CommandPrefixOrSuffixItem::Word(word) = suffix_item else { + return None; + }; + args.push(unquote(word).into()); } } + Some((TaskParsedCommand { envs, program: unquote(program).into(), args }, range)) +} - // Don't forget the last command - if let (Some(start), Some(end)) = (current_start, current_end) { - commands.push(script[start..end].trim().to_string()); - } - - commands +pub fn try_parse_as_and_list(cmd: &str) -> Option)>> { + let mut parser = Parser::new( + cmd.as_bytes(), + &ParserOptions { + enable_extended_globbing: false, + posix_mode: true, + sh_mode: true, + tilde_expansion: false, + }, + ); + let Program { complete_commands } = parser.parse_program().ok()?; + let [compound_list] = complete_commands.as_slice() else { + return None; + }; + let [CompoundListItem(and_or_list, SeparatorOperator::Sequence)] = compound_list.0.as_slice() + else { + return None; + }; + + let mut commands = Vec::<(TaskParsedCommand, Range)>::new(); + commands.push(pipeline_to_command(&and_or_list.first)?); + for and_or in &and_or_list.additional { + let AndOr::And(pipeline) = and_or else { + return None; + }; + commands.push(pipeline_to_command(pipeline)?); + } + Some(commands) } #[cfg(test)] @@ -191,97 +127,108 @@ mod tests { use super::*; #[test] - fn test_simple_split() { - let script = "cmd1 && cmd2"; - let commands = split_by_and(script).unwrap(); - assert_eq!(commands, vec!["cmd1", "cmd2"]); - } - - #[test] - fn test_or_then_and() { - // || and && have same precedence, left-associative - // So this parses as: (cmd0 || cmd1) && cmd2 - let script = "cmd0 || cmd1 && cmd2"; - let commands = split_by_and(script).unwrap(); - assert_eq!(commands, vec!["cmd0 || cmd1", "cmd2"]); - } - - #[test] - fn test_and_then_or() { - // This parses as: (a && b) || c - // The && is nested in a list inside an || context - // Since there's no && at the top level (only ||), we don't split - let script = "a && b || c"; - let commands = split_by_and(script).unwrap(); - // No top-level &&, so return the whole thing - assert_eq!(commands, vec!["a && b || c"]); - } - - #[test] - fn test_mixed_operators() { - // Parses as: ((a && b) || c) && d - let script = "a && b || c && d"; - let commands = split_by_and(script).unwrap(); - // There IS a top-level && (between "((a && b) || c)" and "d") - // So we split there, treating the left side as atomic - assert_eq!(commands, vec!["a && b || c", "d"]); - } - - #[test] - fn test_only_or() { - // Only || operators, no splitting - let script = "cmd1 || cmd2 || cmd3"; - let commands = split_by_and(script).unwrap(); - assert_eq!(commands, vec!["cmd1 || cmd2 || cmd3"]); - } - - #[test] - fn test_multiple_and() { - let script = "a && b && c"; - let commands = split_by_and(script).unwrap(); - assert_eq!(commands, vec!["a", "b", "c"]); - } - - #[test] - fn test_no_and() { - let script = "single command"; - let commands = split_by_and(script).unwrap(); - assert_eq!(commands, vec!["single command"]); - } - - #[test] - fn test_with_whitespace() { - let script = " cmd1 && cmd2 "; - let commands = split_by_and(script).unwrap(); - assert_eq!(commands, vec!["cmd1", "cmd2"]); - } - - #[test] - fn test_complex_commands() { - let script = "npm run build && npm test --coverage && echo 'done'"; - let commands = split_by_and(script).unwrap(); - assert_eq!(commands, vec!["npm run build", "npm test --coverage", "echo 'done'"]); + fn test_parse_single_command() { + let source = r#"A=B hello world"#; + let list = try_parse_as_and_list(source).unwrap(); + assert_eq!(list.len(), 1); + let (cmd, range) = &list[0]; + assert_eq!(&source[range.clone()], source); + assert_eq!( + cmd, + &TaskParsedCommand { + envs: [("A".into(), "B".into())].into(), + program: "hello".into(), + args: vec!["world".into()], + } + ); } #[test] - fn test_subshell_with_and() { - let script = "(cmd1 && cmd2) && cmd3"; - let commands = split_by_and(script).unwrap(); - // Should split at the top-level &&, keeping the subshell intact - assert_eq!(commands, vec!["(cmd1 && cmd2)", "cmd3"]); + fn test_parse_command() { + let source = r#"A=B hello world && FOO="BE\"R" program "arg1" "arg\"2" && zzz"#; + let list = try_parse_as_and_list(source).unwrap(); + + let commands = list.iter().map(|(cmd, _)| cmd).collect::>(); + assert_eq!( + commands, + vec![ + &TaskParsedCommand { + envs: [("A".into(), "B".into())].into(), + program: "hello".into(), + args: vec!["world".into()], + }, + &TaskParsedCommand { + envs: [("FOO".into(), "BE\"R".into())].into(), + program: "program".into(), + args: vec!["arg1".into(), "arg\"2".into()], + }, + &TaskParsedCommand { envs: [].into(), program: "zzz".into(), args: vec![] } + ] + ); + + let substrs = list.iter().map(|(_, range)| &source[range.clone()]).collect::>(); + + assert_eq!( + substrs, + vec!["A=B hello world", r#"FOO="BE\"R" program "arg1" "arg\"2""#, "zzz"] + ); } #[test] - fn test_with_pipes() { - let script = "cat file.txt | grep pattern && echo 'found'"; - let commands = split_by_and(script).unwrap(); - assert_eq!(commands, vec!["cat file.txt | grep pattern", "echo 'found'"]); + fn test_task_parsed_command_stable_env_ordering() { + // Test that environment variables maintain stable ordering + let cmd = TaskParsedCommand { + envs: [ + ("ZEBRA".into(), "last".into()), + ("ALPHA".into(), "first".into()), + ("MIDDLE".into(), "middle".into()), + ] + .into(), + program: "test".into(), + args: vec![], + }; + + // Convert to string multiple times and verify it's always the same + let str1 = cmd.to_string(); + let str2 = cmd.to_string(); + let str3 = cmd.to_string(); + + assert_eq!(str1, str2); + assert_eq!(str2, str3); + + // Verify the order is alphabetical (BTreeMap sorts by key) + assert!(str1.starts_with("ALPHA=first MIDDLE=middle ZEBRA=last")); } #[test] - fn test_with_newlines() { - let script = "cmd1 &&\n cmd2 &&\n cmd3"; - let commands = split_by_and(script).unwrap(); - assert_eq!(commands, vec!["cmd1", "cmd2", "cmd3"]); + fn test_task_parsed_command_serialization_stability() { + use bincode::{decode_from_slice, encode_to_vec}; + + // Create a command with multiple environment variables + let cmd = TaskParsedCommand { + envs: [ + ("VAR_C".into(), "value_c".into()), + ("VAR_A".into(), "value_a".into()), + ("VAR_B".into(), "value_b".into()), + ] + .into(), + program: "program".into(), + args: vec!["arg1".into(), "arg2".into()], + }; + + // Serialize multiple times + let config = bincode::config::standard(); + let bytes1 = encode_to_vec(&cmd, config).unwrap(); + let bytes2 = encode_to_vec(&cmd, config).unwrap(); + + // Verify serialization is stable + assert_eq!(bytes1, bytes2); + + // Verify deserialization works and maintains order + let (decoded, _): (TaskParsedCommand, _) = decode_from_slice(&bytes1, config).unwrap(); + assert_eq!(decoded, cmd); + + // Verify the decoded command still has stable string representation + assert_eq!(decoded.to_string(), cmd.to_string()); } } diff --git a/crates/vite_task/Cargo.toml b/crates/vite_task/Cargo.toml index 06d4b0a9..60396af1 100644 --- a/crates/vite_task/Cargo.toml +++ b/crates/vite_task/Cargo.toml @@ -14,7 +14,6 @@ workspace = true [dependencies] anyhow = { workspace = true } bincode = { workspace = true, features = ["derive"] } -brush-parser = { workspace = true } bstr = { workspace = true } compact_str = { workspace = true, features = ["serde"] } dashmap = { workspace = true } @@ -40,6 +39,7 @@ twox-hash = { workspace = true } uuid = { workspace = true, features = ["v4"] } vite_glob = { workspace = true } vite_path = { workspace = true } +vite_shell = { workspace = true } vite_str = { workspace = true } vite_workspace = { workspace = true } wax = { workspace = true } diff --git a/crates/vite_task/src/cmd.rs b/crates/vite_task/src/cmd.rs deleted file mode 100644 index d058547a..00000000 --- a/crates/vite_task/src/cmd.rs +++ /dev/null @@ -1,204 +0,0 @@ -use std::{collections::BTreeMap, fmt::Display}; - -use bincode::{Decode, Encode}; -use brush_parser::{ - Parser, ParserOptions, - ast::{ - AndOr, Assignment, AssignmentName, AssignmentValue, Command, CommandPrefix, - CommandPrefixOrSuffixItem, CommandSuffix, CompoundListItem, Pipeline, Program, - SeparatorOperator, SimpleCommand, Word, - }, - unquote_str, -}; -use diff::Diff; -use serde::{Deserialize, Serialize}; -use vite_str::Str; - -/// "FOO=BAR program arg1 arg2" -#[derive(Encode, Decode, Serialize, Deserialize, Debug, PartialEq, Eq, Diff, Clone)] -#[diff(attr(#[derive(Debug)]))] -pub struct TaskParsedCommand { - pub envs: BTreeMap, - pub program: Str, - pub args: Vec, -} - -impl Display for TaskParsedCommand { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - // BTreeMap ensures stable iteration order - for (name, value) in &self.envs { - Display::fmt( - &format_args!("{}={} ", name, shell_escape::escape(value.as_str().into())), - f, - )?; - } - Display::fmt(&shell_escape::escape(self.program.as_str().into()), f)?; - for arg in &self.args { - Display::fmt(" ", f)?; - Display::fmt(&shell_escape::escape(arg.as_str().into()), f)?; - } - - Ok(()) - } -} - -fn unquote(word: &Word) -> String { - let Word { value } = word; - unquote_str(value.as_str()) -} - -fn pipeline_to_command(pipeline: &Pipeline) -> Option { - let Pipeline { timed: None, bang: false, seq } = pipeline else { - return None; - }; - let [Command::Simple(simple_command)] = seq.as_slice() else { - return None; - }; - let SimpleCommand { prefix, word_or_name: Some(program), suffix } = simple_command else { - return None; - }; - let mut envs = BTreeMap::::new(); - if let Some(prefix) = prefix { - let CommandPrefix(items) = prefix; - for item in items { - let CommandPrefixOrSuffixItem::AssignmentWord( - Assignment { name, value, append: false }, - _, - ) = item - else { - return None; - }; - let AssignmentName::VariableName(name) = name else { - return None; - }; - let AssignmentValue::Scalar(value) = value else { - return None; - }; - envs.insert(name.as_str().into(), unquote(value).into()); - } - } - let mut args = Vec::::new(); - if let Some(CommandSuffix(suffix_items)) = suffix { - for suffix_item in suffix_items { - let CommandPrefixOrSuffixItem::Word(word) = suffix_item else { - return None; - }; - args.push(unquote(word).into()); - } - } - Some(TaskParsedCommand { envs, program: unquote(program).into(), args }) -} - -pub fn try_parse_as_and_list(cmd: &str) -> Option> { - let mut parser = Parser::new( - cmd.as_bytes(), - &ParserOptions { - enable_extended_globbing: false, - posix_mode: true, - sh_mode: true, - tilde_expansion: false, - }, - &Default::default(), - ); - let Program { complete_commands } = parser.parse_program().ok()?; - let [compound_list] = complete_commands.as_slice() else { - return None; - }; - let [CompoundListItem(and_or_list, SeparatorOperator::Sequence)] = compound_list.0.as_slice() - else { - return None; - }; - - let mut commands = Vec::::new(); - commands.push(pipeline_to_command(&and_or_list.first)?); - for and_or in &and_or_list.additional { - let AndOr::And(pipeline) = and_or else { - return None; - }; - commands.push(pipeline_to_command(pipeline)?); - } - Some(commands) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_parse_command() { - assert_eq!( - try_parse_as_and_list(r#"hello world && FOO="BE\"R" program "arg1" "arg\"2" && zzz"#), - Some(vec![ - TaskParsedCommand { - envs: [].into(), - program: "hello".into(), - args: vec!["world".into()], - }, - TaskParsedCommand { - envs: [("FOO".into(), "BE\"R".into())].into(), - program: "program".into(), - args: vec!["arg1".into(), "arg\"2".into()], - }, - TaskParsedCommand { envs: [].into(), program: "zzz".into(), args: vec![] } - ]) - ); - } - - #[test] - fn test_task_parsed_command_stable_env_ordering() { - // Test that environment variables maintain stable ordering - let cmd = TaskParsedCommand { - envs: [ - ("ZEBRA".into(), "last".into()), - ("ALPHA".into(), "first".into()), - ("MIDDLE".into(), "middle".into()), - ] - .into(), - program: "test".into(), - args: vec![], - }; - - // Convert to string multiple times and verify it's always the same - let str1 = cmd.to_string(); - let str2 = cmd.to_string(); - let str3 = cmd.to_string(); - - assert_eq!(str1, str2); - assert_eq!(str2, str3); - - // Verify the order is alphabetical (BTreeMap sorts by key) - assert!(str1.starts_with("ALPHA=first MIDDLE=middle ZEBRA=last")); - } - - #[test] - fn test_task_parsed_command_serialization_stability() { - use bincode::{decode_from_slice, encode_to_vec}; - - // Create a command with multiple environment variables - let cmd = TaskParsedCommand { - envs: [ - ("VAR_C".into(), "value_c".into()), - ("VAR_A".into(), "value_a".into()), - ("VAR_B".into(), "value_b".into()), - ] - .into(), - program: "program".into(), - args: vec!["arg1".into(), "arg2".into()], - }; - - // Serialize multiple times - let config = bincode::config::standard(); - let bytes1 = encode_to_vec(&cmd, config).unwrap(); - let bytes2 = encode_to_vec(&cmd, config).unwrap(); - - // Verify serialization is stable - assert_eq!(bytes1, bytes2); - - // Verify deserialization works and maintains order - let (decoded, _): (TaskParsedCommand, _) = decode_from_slice(&bytes1, config).unwrap(); - assert_eq!(decoded, cmd); - - // Verify the decoded command still has stable string representation - assert_eq!(decoded.to_string(), cmd.to_string()); - } -} diff --git a/crates/vite_task/src/config/mod.rs b/crates/vite_task/src/config/mod.rs index e7b16edf..bc07d54c 100644 --- a/crates/vite_task/src/config/mod.rs +++ b/crates/vite_task/src/config/mod.rs @@ -17,12 +17,12 @@ use serde::{Deserialize, Serialize}; pub use task_command::*; pub use task_graph_builder::*; use vite_path::{self, RelativePath, RelativePathBuf}; +use vite_shell::TaskParsedCommand; use vite_str::Str; pub use workspace::*; use crate::{ Error, - cmd::TaskParsedCommand, collections::{HashMap, HashSet}, config::name::TaskName, execute::TaskEnvs, diff --git a/crates/vite_task/src/config/task_command.rs b/crates/vite_task/src/config/task_command.rs index 94633744..fa13df3d 100644 --- a/crates/vite_task/src/config/task_command.rs +++ b/crates/vite_task/src/config/task_command.rs @@ -4,10 +4,11 @@ use bincode::{Decode, Encode}; use diff::Diff; use serde::{Deserialize, Serialize}; use vite_path::{AbsolutePath, RelativePathBuf}; +use vite_shell::TaskParsedCommand; use vite_str::Str; use super::{CommandFingerprint, ResolvedTaskCommand, TaskConfig}; -use crate::{Error, cmd::TaskParsedCommand, execute::TaskEnvs}; +use crate::{Error, execute::TaskEnvs}; #[derive(Encode, Decode, Serialize, Deserialize, Debug, PartialEq, Eq, Diff, Clone)] #[diff(attr(#[derive(Debug)]))] diff --git a/crates/vite_task/src/config/workspace.rs b/crates/vite_task/src/config/workspace.rs index 0f91d82f..6e536a8c 100644 --- a/crates/vite_task/src/config/workspace.rs +++ b/crates/vite_task/src/config/workspace.rs @@ -11,6 +11,7 @@ use petgraph::{ visit::IntoNodeReferences, }; use vite_path::{AbsolutePath, AbsolutePathBuf, RelativePath, RelativePathBuf}; +use vite_shell::try_parse_as_and_list; use vite_str::Str; use vite_workspace::{ DependencyType, PackageInfo, PackageIx, PackageJson, PackageNodeIndex, WorkspaceRoot, @@ -24,7 +25,6 @@ use super::{ use crate::{ Error, cache::TaskCache, - cmd::try_parse_as_and_list, collections::{HashMap, HashSet}, config::{DisplayOptions, TaskGroupId, name::TaskName}, fs::CachedFileSystem, @@ -495,7 +495,7 @@ impl Workspace { if let Some(and_list) = try_parse_as_and_list(script) { let and_list_len = and_list.len(); - for (index, command) in and_list.into_iter().enumerate() { + for (index, (command, _)) in and_list.into_iter().enumerate() { let is_last = index + 1 == and_list_len; let resolved_task = Self::resolve_task( diff --git a/crates/vite_task/src/fingerprint.rs b/crates/vite_task/src/fingerprint.rs index 916055e3..cc4b4bb9 100644 --- a/crates/vite_task/src/fingerprint.rs +++ b/crates/vite_task/src/fingerprint.rs @@ -122,10 +122,10 @@ impl PostRunFingerprint { #[cfg(test)] mod tests { use vite_path::RelativePathBuf; + use vite_shell::TaskParsedCommand; use vite_str::Str; use crate::{ - cmd::TaskParsedCommand, collections::HashSet, config::{CommandFingerprint, ResolvedTaskConfig, TaskCommand, TaskConfig}, }; diff --git a/crates/vite_task/src/lib.rs b/crates/vite_task/src/lib.rs index 5b1b7058..67a0fa25 100644 --- a/crates/vite_task/src/lib.rs +++ b/crates/vite_task/src/lib.rs @@ -1,5 +1,4 @@ mod cache; -mod cmd; mod collections; mod config; mod error;