|
| 1 | +use std::path::Path; |
| 2 | + |
| 3 | +use annotate_snippets::AnnotationKind; |
| 4 | +use annotate_snippets::Group; |
| 5 | +use annotate_snippets::Level; |
| 6 | +use annotate_snippets::Snippet; |
| 7 | +use cargo_util_schemas::manifest::TomlToolLints; |
| 8 | +use toml_parser::Source; |
| 9 | +use toml_parser::Span; |
| 10 | +use toml_parser::parser::EventReceiver; |
| 11 | + |
| 12 | +use crate::CargoResult; |
| 13 | +use crate::GlobalContext; |
| 14 | +use crate::core::MaybePackage; |
| 15 | +use crate::lints::Lint; |
| 16 | +use crate::lints::LintLevel; |
| 17 | +use crate::lints::ManifestFor; |
| 18 | +use crate::lints::rel_cwd_manifest_path; |
| 19 | + |
| 20 | +const UNICODE_BIDI_CODEPOINTS: &[(char, &str)] = &[ |
| 21 | + ('\u{202A}', "LEFT-TO-RIGHT EMBEDDING"), |
| 22 | + ('\u{202B}', "RIGHT-TO-LEFT EMBEDDING"), |
| 23 | + ('\u{202C}', "POP DIRECTIONAL FORMATTING"), |
| 24 | + ('\u{202D}', "LEFT-TO-RIGHT OVERRIDE"), |
| 25 | + ('\u{202E}', "RIGHT-TO-LEFT OVERRIDE"), |
| 26 | + ('\u{2066}', "LEFT-TO-RIGHT ISOLATE"), |
| 27 | + ('\u{2067}', "RIGHT-TO-LEFT ISOLATE"), |
| 28 | + ('\u{2068}', "FIRST STRONG ISOLATE"), |
| 29 | + ('\u{2069}', "POP DIRECTIONAL ISOLATE"), |
| 30 | +]; |
| 31 | + |
| 32 | +pub const LINT: Lint = Lint { |
| 33 | + name: "text_direction_codepoint", |
| 34 | + desc: "unicode codepoint changing visible direction of text present in manifest", |
| 35 | + groups: &[], |
| 36 | + default_level: LintLevel::Deny, |
| 37 | + edition_lint_opts: None, |
| 38 | + feature_gate: None, |
| 39 | + docs: None, |
| 40 | +}; |
| 41 | + |
| 42 | +/// Paths where BiDi codepoints are allowed (legitimate RTL content). |
| 43 | +const ALLOWED_BIDI_PATHS: &[&[&str]] = &[ |
| 44 | + &["package", "description"], |
| 45 | + &["package", "metadata"], |
| 46 | + &["workspace", "metadata"], |
| 47 | +]; |
| 48 | + |
| 49 | +fn is_allowed_bidi_path(path: &[String]) -> bool { |
| 50 | + ALLOWED_BIDI_PATHS.iter().any(|allowed| { |
| 51 | + if path.len() < allowed.len() { |
| 52 | + return false; |
| 53 | + } |
| 54 | + path.iter() |
| 55 | + .zip(allowed.iter()) |
| 56 | + .all(|(a, b)| a.as_str() == *b) |
| 57 | + }) |
| 58 | +} |
| 59 | + |
| 60 | +struct Finding { |
| 61 | + byte_idx: usize, |
| 62 | + ch: char, |
| 63 | + name: &'static str, |
| 64 | + in_allowed_value: bool, |
| 65 | +} |
| 66 | + |
| 67 | +struct BiDiCollector<'a> { |
| 68 | + contents: &'a str, |
| 69 | + findings: &'a mut Vec<Finding>, |
| 70 | + key_path: Vec<String>, |
| 71 | + table_stack: Vec<Vec<String>>, |
| 72 | +} |
| 73 | + |
| 74 | +impl<'a> BiDiCollector<'a> { |
| 75 | + fn new(contents: &'a str, findings: &'a mut Vec<Finding>) -> Self { |
| 76 | + Self { |
| 77 | + contents, |
| 78 | + findings, |
| 79 | + key_path: Vec::new(), |
| 80 | + table_stack: Vec::new(), |
| 81 | + } |
| 82 | + } |
| 83 | + |
| 84 | + fn check_span_for_bidi(&mut self, span: Span, in_value: bool) { |
| 85 | + let text = &self.contents[span.start()..span.end()]; |
| 86 | + for (offset, ch) in text.char_indices() { |
| 87 | + if let Some((_, name)) = UNICODE_BIDI_CODEPOINTS.iter().find(|(c, _)| *c == ch) { |
| 88 | + let in_allowed_value = in_value && is_allowed_bidi_path(&self.key_path); |
| 89 | + self.findings.push(Finding { |
| 90 | + byte_idx: span.start() + offset, |
| 91 | + ch, |
| 92 | + name, |
| 93 | + in_allowed_value, |
| 94 | + }); |
| 95 | + } |
| 96 | + } |
| 97 | + } |
| 98 | + |
| 99 | + fn current_path(&self) -> Vec<String> { |
| 100 | + let mut path = self.table_stack.last().cloned().unwrap_or_default(); |
| 101 | + path.extend(self.key_path.clone()); |
| 102 | + path |
| 103 | + } |
| 104 | +} |
| 105 | + |
| 106 | +impl EventReceiver for BiDiCollector<'_> { |
| 107 | + fn std_table_open(&mut self, _span: Span, _error: &mut dyn toml_parser::ErrorSink) { |
| 108 | + self.table_stack.push(self.key_path.clone()); |
| 109 | + self.key_path.clear(); |
| 110 | + } |
| 111 | + |
| 112 | + fn std_table_close(&mut self, _span: Span, _error: &mut dyn toml_parser::ErrorSink) { |
| 113 | + if let Some(last) = self.table_stack.last_mut() { |
| 114 | + *last = self.key_path.clone(); |
| 115 | + } |
| 116 | + self.key_path.clear(); |
| 117 | + } |
| 118 | + |
| 119 | + fn array_table_open(&mut self, _span: Span, _error: &mut dyn toml_parser::ErrorSink) { |
| 120 | + self.table_stack.push(self.key_path.clone()); |
| 121 | + self.key_path.clear(); |
| 122 | + } |
| 123 | + |
| 124 | + fn array_table_close(&mut self, _span: Span, _error: &mut dyn toml_parser::ErrorSink) { |
| 125 | + if let Some(last) = self.table_stack.last_mut() { |
| 126 | + *last = self.key_path.clone(); |
| 127 | + } |
| 128 | + self.key_path.clear(); |
| 129 | + } |
| 130 | + |
| 131 | + fn simple_key( |
| 132 | + &mut self, |
| 133 | + span: Span, |
| 134 | + _kind: Option<toml_parser::decoder::Encoding>, |
| 135 | + _error: &mut dyn toml_parser::ErrorSink, |
| 136 | + ) { |
| 137 | + self.check_span_for_bidi(span, false); |
| 138 | + |
| 139 | + let key_text = &self.contents[span.start()..span.end()]; |
| 140 | + let key = if (key_text.starts_with('"') && key_text.ends_with('"')) |
| 141 | + || (key_text.starts_with('\'') && key_text.ends_with('\'')) |
| 142 | + { |
| 143 | + key_text[1..key_text.len() - 1].to_string() |
| 144 | + } else { |
| 145 | + key_text.to_string() |
| 146 | + }; |
| 147 | + self.key_path.push(key); |
| 148 | + } |
| 149 | + |
| 150 | + fn key_sep(&mut self, _span: Span, _error: &mut dyn toml_parser::ErrorSink) {} |
| 151 | + |
| 152 | + fn key_val_sep(&mut self, _span: Span, _error: &mut dyn toml_parser::ErrorSink) {} |
| 153 | + |
| 154 | + fn scalar( |
| 155 | + &mut self, |
| 156 | + span: Span, |
| 157 | + _kind: Option<toml_parser::decoder::Encoding>, |
| 158 | + _error: &mut dyn toml_parser::ErrorSink, |
| 159 | + ) { |
| 160 | + let full_path = self.current_path(); |
| 161 | + let saved_key_path = std::mem::replace(&mut self.key_path, full_path); |
| 162 | + self.check_span_for_bidi(span, true); |
| 163 | + self.key_path = saved_key_path; |
| 164 | + self.key_path.clear(); |
| 165 | + } |
| 166 | + |
| 167 | + fn comment(&mut self, span: Span, _error: &mut dyn toml_parser::ErrorSink) { |
| 168 | + self.check_span_for_bidi(span, false); |
| 169 | + } |
| 170 | +} |
| 171 | + |
| 172 | +pub fn text_direction_codepoint( |
| 173 | + manifest: ManifestFor<'_>, |
| 174 | + manifest_path: &Path, |
| 175 | + cargo_lints: &TomlToolLints, |
| 176 | + error_count: &mut usize, |
| 177 | + gctx: &GlobalContext, |
| 178 | +) -> CargoResult<()> { |
| 179 | + let (lint_level, reason) = manifest.lint_level(cargo_lints, LINT); |
| 180 | + |
| 181 | + if lint_level == LintLevel::Allow { |
| 182 | + return Ok(()); |
| 183 | + } |
| 184 | + |
| 185 | + if matches!(&manifest, ManifestFor::Workspace(MaybePackage::Package(_))) { |
| 186 | + return Ok(()); |
| 187 | + } |
| 188 | + |
| 189 | + let contents = manifest.contents(); |
| 190 | + |
| 191 | + let has_bidi = contents.chars().any(|ch| { |
| 192 | + UNICODE_BIDI_CODEPOINTS |
| 193 | + .iter() |
| 194 | + .any(|(bidi_ch, _)| *bidi_ch == ch) |
| 195 | + }); |
| 196 | + |
| 197 | + if !has_bidi { |
| 198 | + return Ok(()); |
| 199 | + } |
| 200 | + |
| 201 | + let mut findings = Vec::new(); |
| 202 | + { |
| 203 | + let source = Source::new(contents); |
| 204 | + let tokens = source.lex().into_vec(); |
| 205 | + let mut collector = BiDiCollector::new(contents, &mut findings); |
| 206 | + let mut errors = Vec::new(); |
| 207 | + toml_parser::parser::parse_document(&tokens, &mut collector, &mut errors); |
| 208 | + } |
| 209 | + |
| 210 | + let disallowed_findings: Vec<_> = findings |
| 211 | + .into_iter() |
| 212 | + .filter(|f| !f.in_allowed_value) |
| 213 | + .collect(); |
| 214 | + |
| 215 | + if disallowed_findings.is_empty() { |
| 216 | + return Ok(()); |
| 217 | + } |
| 218 | + |
| 219 | + let manifest_path_str = rel_cwd_manifest_path(manifest_path, gctx); |
| 220 | + |
| 221 | + let mut line_map = Vec::new(); |
| 222 | + line_map.push(0); |
| 223 | + for (idx, ch) in contents.char_indices() { |
| 224 | + if ch == '\n' { |
| 225 | + line_map.push(idx + 1); |
| 226 | + } |
| 227 | + } |
| 228 | + |
| 229 | + let mut findings_by_line: std::collections::BTreeMap<usize, Vec<_>> = |
| 230 | + std::collections::BTreeMap::new(); |
| 231 | + for finding in disallowed_findings { |
| 232 | + let line_num = line_map |
| 233 | + .iter() |
| 234 | + .rposition(|&line_start| line_start <= finding.byte_idx) |
| 235 | + .map(|i| i + 1) |
| 236 | + .unwrap_or(1); |
| 237 | + findings_by_line |
| 238 | + .entry(line_num) |
| 239 | + .or_insert_with(Vec::new) |
| 240 | + .push(finding); |
| 241 | + } |
| 242 | + |
| 243 | + let level = lint_level.to_diagnostic_level(); |
| 244 | + let emitted_source = LINT.emitted_source(lint_level, reason); |
| 245 | + |
| 246 | + for (line_idx, line_findings) in findings_by_line.values().enumerate() { |
| 247 | + if lint_level.is_error() { |
| 248 | + *error_count += 1; |
| 249 | + } |
| 250 | + |
| 251 | + let title = LINT.desc.to_string(); |
| 252 | + |
| 253 | + let labels: Vec<String> = line_findings |
| 254 | + .iter() |
| 255 | + .map(|f| format!(r#"`\u{{{:04X}}}` ({})"#, f.ch as u32, f.name)) |
| 256 | + .collect(); |
| 257 | + |
| 258 | + let mut snippet = Snippet::source(contents).path(&manifest_path_str); |
| 259 | + for (finding, label) in line_findings.iter().zip(labels.iter()) { |
| 260 | + let span = finding.byte_idx..(finding.byte_idx + finding.ch.len_utf8()); |
| 261 | + snippet = snippet.annotation(AnnotationKind::Primary.span(span).label(label)); |
| 262 | + } |
| 263 | + |
| 264 | + let mut group = Group::with_title(level.clone().primary_title(&title)).element(snippet); |
| 265 | + |
| 266 | + if line_idx == 0 { |
| 267 | + group = group.element(Level::NOTE.message(&emitted_source)); |
| 268 | + group = group.element(Level::NOTE.message( |
| 269 | + "these kinds of unicode codepoints change the way text flows on screen, \ |
| 270 | + but can cause confusion because they change the order of characters", |
| 271 | + )); |
| 272 | + } |
| 273 | + group = group.element(Level::HELP.message( |
| 274 | + "if their presence wasn't intentional, you can remove them, \ |
| 275 | + or use their escape sequence (e.g., \\u{202E}) in double-quoted strings", |
| 276 | + )); |
| 277 | + |
| 278 | + gctx.shell().print_report(&[group], lint_level.force())?; |
| 279 | + } |
| 280 | + |
| 281 | + Ok(()) |
| 282 | +} |
0 commit comments