From 59daef23510e30c70355b7ba25f50e555c97cd22 Mon Sep 17 00:00:00 2001
From: Yuya Nishihara <yuya@tcha.org>
Date: Wed, 10 Jul 2024 17:34:52 +0900
Subject: [PATCH] diff: accept diff inputs by generic iterator

This helps migrate internal [u8] variables to BStr.

b"" literals in tests are changed to &str to get around potential type
incompatibility between &[u8; N].
---
 cli/src/diff_util.rs           |  6 +++---
 cli/src/merge_tools/builtin.rs |  2 +-
 lib/src/conflicts.rs           |  4 ++--
 lib/src/diff.rs                | 38 ++++++++++++++++++++--------------
 lib/src/files.rs               |  6 ++----
 5 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/cli/src/diff_util.rs b/cli/src/diff_util.rs
index 4e46fc0349..ac090d4de8 100644
--- a/cli/src/diff_util.rs
+++ b/cli/src/diff_util.rs
@@ -852,7 +852,7 @@ fn unified_diff_hunks<'content>(
         right_line_range: 1..1,
         lines: vec![],
     };
-    let diff = Diff::by_line(&[left_content, right_content]);
+    let diff = Diff::by_line([left_content, right_content]);
     let mut diff_hunks = diff.hunks().peekable();
     while let Some(hunk) = diff_hunks.next() {
         match hunk {
@@ -910,7 +910,7 @@ fn inline_diff_hunks<'content>(
 
     // Like Diff::default_refinement(), but doesn't try to match up contents by
     // lines. We know left/right_contents have no matching lines.
-    let mut diff = Diff::for_tokenizer(&[left_content, right_content], diff::find_word_ranges);
+    let mut diff = Diff::for_tokenizer([left_content, right_content], diff::find_word_ranges);
     diff.refine_changed_regions(diff::find_nonword_ranges);
     for hunk in diff.hunks() {
         match hunk {
@@ -1128,7 +1128,7 @@ fn get_diff_stat(
     // TODO: this matches git's behavior, which is to count the number of newlines
     // in the file. but that behavior seems unhelpful; no one really cares how
     // many `0x0a` characters are in an image.
-    let diff = Diff::by_line(&[&left_content.contents, &right_content.contents]);
+    let diff = Diff::by_line([&left_content.contents, &right_content.contents]);
     let mut added = 0;
     let mut removed = 0;
     for hunk in diff.hunks() {
diff --git a/cli/src/merge_tools/builtin.rs b/cli/src/merge_tools/builtin.rs
index 01ac8f6dd8..a124026c54 100644
--- a/cli/src/merge_tools/builtin.rs
+++ b/cli/src/merge_tools/builtin.rs
@@ -225,7 +225,7 @@ fn make_diff_sections(
     left_contents: &str,
     right_contents: &str,
 ) -> Result<Vec<scm_record::Section<'static>>, BuiltinToolError> {
-    let diff = Diff::by_line(&[left_contents.as_bytes(), right_contents.as_bytes()]);
+    let diff = Diff::by_line([left_contents.as_bytes(), right_contents.as_bytes()]);
     let mut sections = Vec::new();
     for hunk in diff.hunks() {
         match hunk {
diff --git a/lib/src/conflicts.rs b/lib/src/conflicts.rs
index c90aa27714..f8d586aa7e 100644
--- a/lib/src/conflicts.rs
+++ b/lib/src/conflicts.rs
@@ -259,12 +259,12 @@ pub fn materialize_merge_result(
                             output.write_all(&left.0)?;
                             continue;
                         };
-                        let diff1 = Diff::by_line(&[&left.0, &right1.0]).hunks().collect_vec();
+                        let diff1 = Diff::by_line([&left.0, &right1.0]).hunks().collect_vec();
                         // Check if the diff against the next positive term is better. Since
                         // we want to preserve the order of the terms, we don't match against
                         // any later positive terms.
                         if let Some(right2) = hunk.get_add(add_index + 1) {
-                            let diff2 = Diff::by_line(&[&left.0, &right2.0]).hunks().collect_vec();
+                            let diff2 = Diff::by_line([&left.0, &right2.0]).hunks().collect_vec();
                             if diff_size(&diff2) < diff_size(&diff1) {
                                 // If the next positive term is a better match, emit
                                 // the current positive term as a snapshot and the next
diff --git a/lib/src/diff.rs b/lib/src/diff.rs
index 05f536838a..cb691e5516 100644
--- a/lib/src/diff.rs
+++ b/lib/src/diff.rs
@@ -408,13 +408,13 @@ fn intersect_regions(
 }
 
 impl<'input> Diff<'input> {
-    pub fn for_tokenizer(
-        inputs: &[&'input [u8]],
+    pub fn for_tokenizer<T: AsRef<[u8]> + ?Sized + 'input>(
+        inputs: impl IntoIterator<Item = &'input T>,
         tokenizer: impl Fn(&[u8]) -> Vec<Range<usize>>,
     ) -> Self {
-        assert!(!inputs.is_empty());
-        let base_input = inputs[0];
-        let other_inputs = inputs.iter().skip(1).copied().collect_vec();
+        let mut inputs = inputs.into_iter().map(AsRef::as_ref);
+        let base_input = inputs.next().expect("inputs must not be empty");
+        let other_inputs = inputs.collect_vec();
         // First tokenize each input
         let base_token_ranges = tokenizer(base_input);
         let other_token_ranges = other_inputs
@@ -471,12 +471,16 @@ impl<'input> Diff<'input> {
         diff
     }
 
-    pub fn unrefined(inputs: &[&'input [u8]]) -> Self {
+    pub fn unrefined<T: AsRef<[u8]> + ?Sized + 'input>(
+        inputs: impl IntoIterator<Item = &'input T>,
+    ) -> Self {
         Diff::for_tokenizer(inputs, |_| vec![])
     }
 
     /// Compares `inputs` line by line.
-    pub fn by_line(inputs: &[&'input [u8]]) -> Self {
+    pub fn by_line<T: AsRef<[u8]> + ?Sized + 'input>(
+        inputs: impl IntoIterator<Item = &'input T>,
+    ) -> Self {
         Diff::for_tokenizer(inputs, find_line_ranges)
     }
 
@@ -486,7 +490,9 @@ impl<'input> Diff<'input> {
     // That would let each user decide which hunks to refine. However, it would
     // probably mean that many callers repeat the same code. Perhaps it
     // should be possible to refine a whole diff *or* individual hunks.
-    pub fn default_refinement(inputs: &[&'input [u8]]) -> Self {
+    pub fn default_refinement<T: AsRef<[u8]> + ?Sized + 'input>(
+        inputs: impl IntoIterator<Item = &'input T>,
+    ) -> Self {
         let mut diff = Diff::for_tokenizer(inputs, find_line_ranges);
         diff.refine_changed_regions(find_word_ranges);
         diff.refine_changed_regions(find_nonword_ranges);
@@ -526,7 +532,7 @@ impl<'input> Diff<'input> {
                 slices.push(&self.other_inputs[i][changed_range]);
             }
 
-            let refined_diff = Diff::for_tokenizer(&slices, &tokenizer);
+            let refined_diff = Diff::for_tokenizer(slices, &tokenizer);
 
             for UnchangedRange {
                 base_range,
@@ -663,7 +669,7 @@ pub fn diff<'a>(left: &'a [u8], right: &'a [u8]) -> Vec<DiffHunk<'a>> {
         return vec![DiffHunk::Different(vec![left, b""])];
     }
 
-    Diff::default_refinement(&[left, right])
+    Diff::default_refinement([left, right])
         .hunks()
         .collect_vec()
 }
@@ -967,19 +973,19 @@ mod tests {
 
     #[test]
     fn test_diff_single_input() {
-        let diff = Diff::default_refinement(&[b"abc"]);
+        let diff = Diff::default_refinement(["abc"]);
         assert_eq!(diff.hunks().collect_vec(), vec![DiffHunk::Matching(b"abc")]);
     }
 
     #[test]
     fn test_diff_single_empty_input() {
-        let diff = Diff::default_refinement(&[b""]);
+        let diff = Diff::default_refinement([""]);
         assert_eq!(diff.hunks().collect_vec(), vec![]);
     }
 
     #[test]
     fn test_diff_two_inputs_one_different() {
-        let diff = Diff::default_refinement(&[b"a b c", b"a X c"]);
+        let diff = Diff::default_refinement(["a b c", "a X c"]);
         assert_eq!(
             diff.hunks().collect_vec(),
             vec![
@@ -992,7 +998,7 @@ mod tests {
 
     #[test]
     fn test_diff_multiple_inputs_one_different() {
-        let diff = Diff::default_refinement(&[b"a b c", b"a X c", b"a b c"]);
+        let diff = Diff::default_refinement(["a b c", "a X c", "a b c"]);
         assert_eq!(
             diff.hunks().collect_vec(),
             vec![
@@ -1005,7 +1011,7 @@ mod tests {
 
     #[test]
     fn test_diff_multiple_inputs_all_different() {
-        let diff = Diff::default_refinement(&[b"a b c", b"a X c", b"a c X"]);
+        let diff = Diff::default_refinement(["a b c", "a X c", "a c X"]);
         assert_eq!(
             diff.hunks().collect_vec(),
             vec![
@@ -1021,7 +1027,7 @@ mod tests {
     fn test_diff_for_tokenizer_compacted() {
         // Tests that unchanged regions are compacted when using for_tokenizer()
         let diff = Diff::for_tokenizer(
-            &[b"a\nb\nc\nd\ne\nf\ng", b"a\nb\nc\nX\ne\nf\ng"],
+            ["a\nb\nc\nd\ne\nf\ng", "a\nb\nc\nX\ne\nf\ng"],
             find_line_ranges,
         );
         assert_eq!(
diff --git a/lib/src/files.rs b/lib/src/files.rs
index 8203ae3bc5..780f3e4ded 100644
--- a/lib/src/files.rs
+++ b/lib/src/files.rs
@@ -17,8 +17,6 @@
 use std::collections::VecDeque;
 use std::fmt::{Debug, Error, Formatter};
 
-use itertools::Itertools;
-
 use crate::diff;
 use crate::diff::{Diff, DiffHunk};
 use crate::merge::{trivial_merge, Merge};
@@ -163,9 +161,9 @@ pub fn merge(slices: &Merge<&[u8]>) -> MergeResult {
     // usually done for 3-way conflicts. Are there better heuristics when there are
     // more than 3 parts?
     let num_diffs = slices.removes().len();
-    let diff_inputs = slices.removes().chain(slices.adds()).copied().collect_vec();
+    let diff_inputs = slices.removes().chain(slices.adds());
 
-    let diff = Diff::by_line(&diff_inputs);
+    let diff = Diff::by_line(diff_inputs);
     let mut resolved_hunk = ContentHunk(vec![]);
     let mut merge_hunks: Vec<Merge<ContentHunk>> = vec![];
     for diff_hunk in diff.hunks() {