Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: Roll our own hex utils #4470

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 2 additions & 8 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ gix = { version = "0.66.0", default-features = false, features = [
] }
gix-filter = "0.13.0"
glob = "0.3.1"
hex = "0.4.3"
faster-hex = { version = "0.9.0", default-features = false, features = ["std"]}
Veykril marked this conversation as resolved.
Show resolved Hide resolved
ignore = "0.4.23"
indexmap = "2.5.0"
indoc = "2.0.4"
Expand Down
1 change: 1 addition & 0 deletions cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ esl01-renderdag = { workspace = true }
futures = { workspace = true }
git2 = { workspace = true }
gix = { workspace = true }
faster-hex = { workspace = true }
indexmap = { workspace = true }
indoc = { workspace = true }
itertools = { workspace = true }
Expand Down
15 changes: 9 additions & 6 deletions cli/src/cli_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ use jj_lib::git;
use jj_lib::git_backend::GitBackend;
use jj_lib::gitignore::GitIgnoreError;
use jj_lib::gitignore::GitIgnoreFile;
use jj_lib::hex_util::to_reverse_hex;
use jj_lib::id_prefix::IdPrefixContext;
use jj_lib::matchers::Matcher;
use jj_lib::merge::MergedTreeValue;
Expand Down Expand Up @@ -2632,17 +2631,21 @@ pub fn edit_temp_file(
}

pub fn short_commit_hash(commit_id: &CommitId) -> String {
commit_id.hex()[0..12].to_string()
let mut hash = commit_id.hex();
hash.truncate(12);
hash
}

pub fn short_change_hash(change_id: &ChangeId) -> String {
// TODO: We could avoid the unwrap() and make this more efficient by converting
// straight from binary.
to_reverse_hex(&change_id.hex()[0..12]).unwrap()
let mut hash = change_id.reverse_hex();
hash.truncate(12);
hash
}

pub fn short_operation_hash(operation_id: &OperationId) -> String {
operation_id.hex()[0..12].to_string()
let mut hash = operation_id.hex();
hash.truncate(12);
hash
}

/// Wrapper around a `DiffEditor` to conditionally start interactive session.
Expand Down
2 changes: 1 addition & 1 deletion cli/src/commands/debug/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ pub fn cmd_debug_tree(
let workspace_command = command.workspace_helper(ui)?;
let tree = if let Some(tree_id_hex) = &args.id {
let tree_id =
TreeId::try_from_hex(tree_id_hex).map_err(|_| user_error("Invalid tree id"))?;
TreeId::try_from_hex(tree_id_hex).ok_or_else(|| user_error("Invalid tree id"))?;
let dir = if let Some(dir_str) = &args.dir {
workspace_command.parse_file_path(dir_str)?
} else {
Expand Down
7 changes: 1 addition & 6 deletions cli/src/commit_templater.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ use jj_lib::fileset;
use jj_lib::fileset::FilesetDiagnostics;
use jj_lib::fileset::FilesetExpression;
use jj_lib::git;
use jj_lib::hex_util::to_reverse_hex;
use jj_lib::id_prefix::IdPrefixContext;
use jj_lib::matchers::Matcher;
use jj_lib::merged_tree::MergedTree;
Expand Down Expand Up @@ -1248,11 +1247,7 @@ impl CommitOrChangeId {
pub fn hex(&self) -> String {
match self {
CommitOrChangeId::Commit(id) => id.hex(),
CommitOrChangeId::Change(id) => {
// TODO: We can avoid the unwrap() and make this more efficient by converting
// straight from bytes.
to_reverse_hex(&id.hex()).unwrap()
}
CommitOrChangeId::Change(id) => id.reverse_hex(),
}
}

Expand Down
1 change: 0 additions & 1 deletion lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ git2 = { workspace = true, optional = true }
gix = { workspace = true, optional = true }
gix-filter = { workspace = true, optional = true }
glob = { workspace = true }
hex = { workspace = true }
ignore = { workspace = true }
indexmap = { workspace = true }
itertools = { workspace = true }
Expand Down
7 changes: 7 additions & 0 deletions lib/src/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ use futures::stream::BoxStream;
use thiserror::Error;

use crate::content_hash::ContentHash;
use crate::hex_util;
use crate::index::Index;
use crate::merge::Merge;
use crate::object_id::id_type;
Expand All @@ -50,6 +51,12 @@ id_type!(pub FileId);
id_type!(pub SymlinkId);
id_type!(pub ConflictId);

impl ChangeId {
pub fn reverse_hex(&self) -> String {
hex_util::encode_hex_string_reverse(&self.0)
}
}

#[derive(ContentHash, Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)]
pub struct MillisSinceEpoch(pub i64);

Expand Down
5 changes: 3 additions & 2 deletions lib/src/content_hash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ mod tests {
use std::collections::HashMap;

use super::*;
use crate::hex_util;

#[test]
fn test_string_sanity() {
Expand Down Expand Up @@ -215,7 +216,7 @@ mod tests {
x: Vec<Option<i32>>,
y: i64,
}
let foo_hash = hex::encode(hash(&Foo {
let foo_hash = hex_util::encode_hex_string(&hash(&Foo {
x: vec![None, Some(42)],
y: 17,
}));
Expand All @@ -231,7 +232,7 @@ mod tests {
y: Y,
}
assert_eq!(
hex::encode(hash(&GenericFoo {
hex_util::encode_hex_string(&hash(&GenericFoo {
x: vec![None, Some(42)],
y: 17i64
})),
Expand Down
3 changes: 2 additions & 1 deletion lib/src/default_index/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ use crate::backend::ChangeId;
use crate::backend::CommitId;
use crate::commit::Commit;
use crate::file_util::persist_content_addressed_temp_file;
use crate::hex_util;
use crate::index::AllHeadsForGcUnsupported;
use crate::index::ChangeIdIndex;
use crate::index::Index;
Expand Down Expand Up @@ -359,7 +360,7 @@ impl MutableIndexSegment {
self.serialize_local_entries(&mut buf);
let mut hasher = Blake2b512::new();
hasher.update(&buf);
let index_file_id_hex = hex::encode(hasher.finalize());
let index_file_id_hex = hex_util::encode_hex_string(&hasher.finalize());
let index_file_path = dir.join(&index_file_id_hex);

let mut temp_file = NamedTempFile::new_in(dir)?;
Expand Down
8 changes: 4 additions & 4 deletions lib/src/git_backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,7 @@ fn root_tree_from_header(git_commit: &CommitRef) -> Result<Option<MergedTreeId>,
if *key == JJ_TREES_COMMIT_HEADER {
let mut tree_ids = SmallVec::new();
for hex in str::from_utf8(value.as_ref()).or(Err(()))?.split(' ') {
let tree_id = TreeId::try_from_hex(hex).or(Err(()))?;
let tree_id = TreeId::try_from_hex(hex).ok_or(())?;
if tree_id.as_bytes().len() != HASH_LENGTH {
return Err(());
}
Expand Down Expand Up @@ -1505,19 +1505,19 @@ fn tree_value_from_json(json: &serde_json::Value) -> TreeValue {
}

fn bytes_vec_from_json(value: &serde_json::Value) -> Vec<u8> {
hex::decode(value.as_str().unwrap()).unwrap()
crate::hex_util::decode_hex_string(value.as_str().unwrap()).unwrap()
}

#[cfg(test)]
mod tests {
use assert_matches::assert_matches;
use git2::Oid;
use hex::ToHex;
use pollster::FutureExt;
use test_case::test_case;

use super::*;
use crate::content_hash::blake2b_hash;
use crate::hex_util;

#[test_case(false; "legacy tree format")]
#[test_case(true; "tree-level conflict format")]
Expand Down Expand Up @@ -2133,7 +2133,7 @@ mod tests {
};

let mut signer = |data: &_| {
let hash: String = blake2b_hash(data).encode_hex();
let hash: String = hex_util::encode_hex_string(&blake2b_hash(data));
Ok(format!("test sig\n\n\nhash={hash}").into_bytes())
};

Expand Down
112 changes: 105 additions & 7 deletions lib/src/hex_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,20 @@

#![allow(missing_docs)]

use std::iter;

/// Converts a hexadecimal ASCII character to a 0-based index.
fn hex_to_relative(b: u8) -> Option<u8> {
match b {
b'0'..=b'9' => Some(b - b'0'),
b'A'..=b'F' => Some(b - b'A' + 10),
b'a'..=b'f' => Some(b - b'a' + 10),
_ => None,
}
}

fn to_reverse_hex_digit(b: u8) -> Option<u8> {
let value = match b {
b'0'..=b'9' => b - b'0',
b'A'..=b'F' => b - b'A' + 10,
b'a'..=b'f' => b - b'a' + 10,
_ => return None,
};
Some(b'z' - value)
Some(b'z' - hex_to_relative(b)?)
}

fn to_forward_hex_digit(b: u8) -> Option<u8> {
Expand All @@ -37,20 +43,41 @@ fn to_forward_hex_digit(b: u8) -> Option<u8> {
}
}

/// Transforms a reverse hex into a forward hex.
///
/// If the reverse hex string contains non reverse hex characters the function
/// will return None.
pub fn to_forward_hex(reverse_hex: &str) -> Option<String> {
reverse_hex
.bytes()
.map(|b| to_forward_hex_digit(b).map(char::from))
.collect()
}

/// Transforms a forward hex into a reverse hex.
///
/// If the forward hex string contains non forward hex characters the function
/// will return None.
pub fn to_reverse_hex(forward_hex: &str) -> Option<String> {
forward_hex
.bytes()
.map(|b| to_reverse_hex_digit(b).map(char::from))
.collect()
}

pub fn decode_hex_string(src: &str) -> Option<Vec<u8>> {
if src.len() % 2 != 0 {
return None;
}
let mut dst = vec![0; src.len() / 2];
for (slot, bytes) in iter::zip(&mut dst, src.as_bytes().chunks_exact(2)) {
let a = hex_to_relative(bytes[0])? << 4;
let b = hex_to_relative(bytes[1])?;
*slot = a | b;
}
Some(dst)
}

/// Calculates common prefix length of two byte sequences. The length
/// to be returned is a number of hexadecimal digits.
pub fn common_hex_len(bytes_a: &[u8], bytes_b: &[u8]) -> usize {
Expand All @@ -64,10 +91,81 @@ pub fn common_hex_len(bytes_a: &[u8], bytes_b: &[u8]) -> usize {
.unwrap_or_else(|| bytes_a.len().min(bytes_b.len()) * 2)
}

pub fn encode_hex_string_reverse(src: &[u8]) -> String {
let mut dst = vec![0; src.len() * 2];
for (&src, dst) in src.iter().zip(dst.chunks_exact_mut(2)) {
dst[0] = hex_lower_reverse((src >> 4) & 0xf);
dst[1] = hex_lower_reverse(src & 0xf);
}
String::from_utf8(dst).expect("hex_lower_reverse emits ascii character bytes")
}

fn hex_lower_reverse(byte: u8) -> u8 {
static TABLE: &[u8] = b"zyxwvutsrqponmlk";
TABLE[byte as usize]
}

pub fn encode_hex_string(src: &[u8]) -> String {
let mut dst = vec![0; src.len() * 2];
for (&src, dst) in src.iter().zip(dst.chunks_exact_mut(2)) {
dst[0] = hex_lower((src >> 4) & 0xf);
dst[1] = hex_lower(src & 0xf);
}
String::from_utf8(dst).expect("hex_lower emits ascii character bytes")
}

fn hex_lower(byte: u8) -> u8 {
static TABLE: &[u8] = b"0123456789abcdef";
TABLE[byte as usize]
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_common_hex_len() {
assert_eq!(common_hex_len(b"", b""), 0);
assert_eq!(common_hex_len(b"abc", b"abc"), 6);

assert_eq!(common_hex_len(b"aaa", b"bbb"), 1);
assert_eq!(common_hex_len(b"aab", b"aac"), 5);
}

#[test]
fn test_encode_hex_string() {
assert_eq!(&encode_hex_string(b""), "");
assert_eq!(&encode_hex_string(b"012"), "303132");
assert_eq!(&encode_hex_string(b"0123"), "30313233");
assert_eq!(&encode_hex_string(b"abdz"), "6162647a");
}

#[test]
fn test_encode_hex_string_reverse() {
assert_eq!(&encode_hex_string_reverse(b""), "");
assert_eq!(&encode_hex_string_reverse(b"012"), "wzwywx");
assert_eq!(&encode_hex_string_reverse(b"0123"), "wzwywxww");
assert_eq!(&encode_hex_string_reverse(b"abdz"), "tytxtvsp");
}

#[test]
fn test_decode_hex_string() {
// Empty string
assert_eq!(decode_hex_string(""), Some(vec![]));

// Odd number of digits
assert_eq!(decode_hex_string("0"), None);

// Invalid digit
assert_eq!(decode_hex_string("g0"), None);
assert_eq!(decode_hex_string("0g"), None);

assert_eq!(
decode_hex_string("0123456789abcdefABCDEF"),
Some(b"\x01\x23\x45\x67\x89\xab\xcd\xef\xAB\xCD\xEF".to_vec())
);
}

#[test]
fn test_reverse_hex() {
// Empty string
Expand Down
Loading
Loading