Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

conflicts: remove use of Regex from conflict marker parsing #4968

Closed
wants to merge 3 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 68 additions & 62 deletions lib/src/conflicts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@ use futures::StreamExt;
use futures::TryStreamExt;
use itertools::Itertools;
use pollster::FutureExt;
use regex::bytes::Regex;
use regex::bytes::RegexBuilder;

use crate::backend::BackendError;
use crate::backend::BackendResult;
Expand Down Expand Up @@ -65,18 +63,15 @@ const CONFLICT_MINUS_LINE_CHAR: u8 = CONFLICT_MINUS_LINE.as_bytes()[0];
const CONFLICT_PLUS_LINE_CHAR: u8 = CONFLICT_PLUS_LINE.as_bytes()[0];
const CONFLICT_GIT_ANCESTOR_LINE_CHAR: u8 = CONFLICT_GIT_ANCESTOR_LINE.as_bytes()[0];
const CONFLICT_GIT_SEPARATOR_LINE_CHAR: u8 = CONFLICT_GIT_SEPARATOR_LINE.as_bytes()[0];

/// A conflict marker is one of the separators, optionally followed by a space
/// and some text.
// TODO: All the `{7}` could be replaced with `{7,}` to allow longer
// separators. This could be useful to make it possible to allow conflict
// markers inside the text of the conflicts.
static CONFLICT_MARKER_REGEX: once_cell::sync::Lazy<Regex> = once_cell::sync::Lazy::new(|| {
RegexBuilder::new(r"^(<{7}|>{7}|%{7}|\-{7}|\+{7}|\|{7}|={7})( .*)?$")
.multi_line(true)
.build()
.unwrap()
});
const ALL_CONFLICT_MARKER_LINES: &[&str] = &[
CONFLICT_START_LINE,
CONFLICT_END_LINE,
CONFLICT_DIFF_LINE,
CONFLICT_MINUS_LINE,
CONFLICT_PLUS_LINE,
CONFLICT_GIT_ANCESTOR_LINE,
CONFLICT_GIT_SEPARATOR_LINE,
];

fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> io::Result<()> {
for hunk in hunks {
Expand Down Expand Up @@ -481,11 +476,12 @@ pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option<Vec<Merge<BStrin
let mut conflict_start = None;
let mut conflict_start_len = 0;
for line in input.split_inclusive(|b| *b == b'\n') {
if is_conflict_marker_line(line) {
if line[0] == CONFLICT_START_LINE_CHAR {
match detect_conflict_marker_line(line, &[CONFLICT_START_LINE, CONFLICT_END_LINE]) {
Some(CONFLICT_START_LINE_CHAR) => {
conflict_start = Some(pos);
conflict_start_len = line.len();
} else if conflict_start.is_some() && line[0] == CONFLICT_END_LINE_CHAR {
}
Some(CONFLICT_END_LINE_CHAR) if conflict_start.is_some() => {
let conflict_body = &input[conflict_start.unwrap() + conflict_start_len..pos];
let hunk = parse_conflict_hunk(conflict_body);
if hunk.num_sides() == num_sides {
Expand All @@ -498,6 +494,7 @@ pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option<Vec<Merge<BStrin
}
conflict_start = None;
}
_ => {}
}
pos += line.len();
}
Expand All @@ -522,8 +519,7 @@ fn parse_conflict_hunk(input: &[u8]) -> Merge<BString> {
let initial_conflict_marker_char = input
.lines_with_terminator()
.next()
.filter(|line| is_conflict_marker_line(line))
.map(|line| line[0]);
.and_then(|line| detect_conflict_marker_line(line, ALL_CONFLICT_MARKER_LINES));

match initial_conflict_marker_char {
// JJ-style conflicts must start with one of these 3 conflict marker lines
Expand All @@ -549,26 +545,27 @@ fn parse_jj_style_conflict_hunk(input: &[u8]) -> Merge<BString> {
let mut removes = vec![];
let mut adds = vec![];
for line in input.lines_with_terminator() {
if is_conflict_marker_line(line) {
match line[0] {
CONFLICT_DIFF_LINE_CHAR => {
state = State::Diff;
removes.push(BString::new(vec![]));
adds.push(BString::new(vec![]));
continue;
}
CONFLICT_MINUS_LINE_CHAR => {
state = State::Minus;
removes.push(BString::new(vec![]));
continue;
}
CONFLICT_PLUS_LINE_CHAR => {
state = State::Plus;
adds.push(BString::new(vec![]));
continue;
}
_ => {}
match detect_conflict_marker_line(
line,
&[CONFLICT_DIFF_LINE, CONFLICT_MINUS_LINE, CONFLICT_PLUS_LINE],
) {
Some(CONFLICT_DIFF_LINE_CHAR) => {
state = State::Diff;
removes.push(BString::new(vec![]));
adds.push(BString::new(vec![]));
continue;
}
Some(CONFLICT_MINUS_LINE_CHAR) => {
state = State::Minus;
removes.push(BString::new(vec![]));
continue;
}
Some(CONFLICT_PLUS_LINE_CHAR) => {
state = State::Plus;
adds.push(BString::new(vec![]));
continue;
}
_ => {}
}
match state {
State::Diff => {
Expand Down Expand Up @@ -623,28 +620,29 @@ fn parse_git_style_conflict_hunk(input: &[u8]) -> Merge<BString> {
let mut base = BString::new(vec![]);
let mut right = BString::new(vec![]);
for line in input.lines_with_terminator() {
if is_conflict_marker_line(line) {
match line[0] {
CONFLICT_GIT_ANCESTOR_LINE_CHAR => {
if state == State::Left {
state = State::Base;
continue;
} else {
// Base must come after left
return Merge::resolved(BString::new(vec![]));
}
match detect_conflict_marker_line(
line,
&[CONFLICT_GIT_ANCESTOR_LINE, CONFLICT_GIT_SEPARATOR_LINE],
) {
Some(CONFLICT_GIT_ANCESTOR_LINE_CHAR) => {
if state == State::Left {
state = State::Base;
continue;
} else {
// Base must come after left
return Merge::resolved(BString::new(vec![]));
}
CONFLICT_GIT_SEPARATOR_LINE_CHAR => {
if state == State::Base {
state = State::Right;
continue;
} else {
// Right must come after base
return Merge::resolved(BString::new(vec![]));
}
}
Some(CONFLICT_GIT_SEPARATOR_LINE_CHAR) => {
if state == State::Base {
state = State::Right;
continue;
} else {
// Right must come after base
return Merge::resolved(BString::new(vec![]));
}
_ => {}
}
_ => {}
}
match state {
State::Left => left.extend_from_slice(line),
Expand All @@ -661,11 +659,19 @@ fn parse_git_style_conflict_hunk(input: &[u8]) -> Merge<BString> {
}
}

/// Check whether a line is a conflict marker. Removes trailing whitespace
/// before checking against regex to ensure it parses CRLF endings correctly.
fn is_conflict_marker_line(line: &[u8]) -> bool {
let line = line.trim_end_with(|ch| ch.is_ascii_whitespace());
CONFLICT_MARKER_REGEX.is_match_at(line, 0)
/// Check whether a line is a conflict marker, returns a byte denoting the
/// marker type.
///
/// A conflict marker is one of the separators, optionally followed by a
/// whitespace (including CR or LF) and some text.
fn detect_conflict_marker_line(line: &[u8], marker_candidates: &[&str]) -> Option<u8> {
// TODO: Allow longer separators? This could be useful to make it possible
// to allow conflict markers inside the text of the conflicts.
let marker = marker_candidates
.iter()
.find(|marker| line.starts_with(marker.as_bytes()))?;
let separated = line.get(marker.len()).map_or(true, u8::is_ascii_whitespace);
separated.then_some(marker.as_bytes()[0])
}

/// Parses conflict markers in `content` and returns an updated version of
Expand Down
Loading