diff --git a/src/Cids.sol b/src/Cids.sol index 4288611..72d4dcc 100644 --- a/src/Cids.sol +++ b/src/Cids.sol @@ -6,7 +6,7 @@ library Cids { // (cidv1) (raw) (fr32-sha2-256-trunc254-padded-binary-tree) bytes4 public constant COMMP_V2_PREFIX = hex"01559120"; - // A helper struct for events + getter functions to display digests as CommpV2 CIDs + // A helper struct for events + getter functions to display digests as PieceCIDv2 CIDs struct Cid { bytes data; } @@ -27,7 +27,7 @@ library Cids { return uint8(cid.data[cid.data.length - 32 - 1]); } - // Checks that CID is CommPv2 and decomposes it into its components. + // Checks that CID is PieceCIDv2 and decomposes it into its components. // See: https://github.com/filecoin-project/FIPs/blob/master/FRCs/frc-0069.md function validateCommPv2(Cid memory cid) internal @@ -69,6 +69,16 @@ library Cids { return (1 << (uint256(height) + 5)) - (128 * padding) / 127; } + // rawPieceSize returns the exact raw (pre-Fr32-expansion) byte size of the data behind a + // PieceCIDv2. Per FRC-0069, the CID describes a binary tree of 2^height 32-byte leaves + // holding Fr32-expanded data (128 Fr32 bytes per 127 raw), and `padding` is the trailing + // zero bytes appended to the raw data before that expansion. + // raw size = 2^height * 32 * 127/128 - padding = 2^(height-2) * 127 - padding + // Reverts on height < 2 or padding too large for the tree. + function rawPieceSize(uint256 padding, uint8 height) internal pure returns (uint256) { + return (1 << (uint256(height) - 2)) * 127 - padding; + } + // leafCount returns the number of 32b leaves that contain any amount of data // Utilize isPaddingExcessive to check if the padding size exceeds the size of the tree // If isPaddingExcessive is false, leafCount will never return a zero. @@ -82,7 +92,14 @@ library Cids { return (1 << uint256(height)) - paddingLeafs; } - // Creates a CommPv2 CID from a raw size and hash digest according to FRC-0069. + // leafCountToRawSize approximates raw bytes from a data-bearing leaf count (sum of + // Cids.leafCount across pieces, fully padded leaves excluded). Use rawPieceSize per piece + // for an exact result. Overestimates by up to 31 bytes per piece. + function leafCountToRawSize(uint256 leaves) internal pure returns (uint256) { + return (leaves * 32 * 127) / 128; + } + + // Creates a PieceCIDv2 CID from a raw size and hash digest according to FRC-0069. // The CID uses the Raw codec and fr32-sha2-256-trunc254-padded-binary-tree multihash. // The digest format is: uvarint padding | uint8 height | 32 byte root data function CommPv2FromDigest(uint256 padding, uint8 height, bytes32 digest) internal pure returns (Cids.Cid memory) { diff --git a/test/Cids.t.sol b/test/Cids.t.sol index 25a7419..4dff1e8 100644 --- a/test/Cids.t.sol +++ b/test/Cids.t.sol @@ -34,6 +34,53 @@ contract CidsTest is Test { assertEq(Cids.leafCount(128, 30), (1 << 30) - 4); } + function testRawPieceSize() public pure { + // Smallest representable piece: height=2, 4 leaves = 128 Fr32 bytes = 127 raw bytes. + assertEq(Cids.rawPieceSize(0, 2), 127); + // Padding subtracts directly from raw capacity. + assertEq(Cids.rawPieceSize(1, 2), 126); + assertEq(Cids.rawPieceSize(127, 2), 0); + + // Larger heights: capacity = 2^(height-2) * 127. + assertEq(Cids.rawPieceSize(0, 3), 254); // 2 * 127 + assertEq(Cids.rawPieceSize(0, 5), 8 * 127); + assertEq(Cids.rawPieceSize(0, 30), uint256(1 << 28) * 127); + assertEq(Cids.rawPieceSize(0, 35), uint256(1 << 33) * 127); // 32 GiB raw + + // FRC-0069 fixtures: padding=504, height=5 -> capacity 8*127=1016, raw=512. + assertEq(Cids.rawPieceSize(504, 5), 512); + } + + function testLeafCountToRawSize() public pure { + // A single leaf is 32 Fr32 bytes; the raw bound rounds down to 31. + assertEq(Cids.leafCountToRawSize(0), 0); + assertEq(Cids.leafCountToRawSize(1), 31); + assertEq(Cids.leafCountToRawSize(4), 127); // matches rawPieceSize(0, 2) + assertEq(Cids.leafCountToRawSize(8), 254); // matches rawPieceSize(0, 3) + + // 1 GiB of leaves (2^25 leaves). + uint256 leaves1GiB = 1 << 25; + assertEq(Cids.leafCountToRawSize(leaves1GiB), (leaves1GiB * 32 * 127) / 128); + + // The bound: leafCountToRawSize(leafCount(p, h)) >= rawPieceSize(p, h) for valid (p, h), + // and the gap is at most 31 bytes per piece. + uint256 raw = Cids.rawPieceSize(0, 2); + uint256 estimate = Cids.leafCountToRawSize(Cids.leafCount(0, 2)); + assertGe(estimate, raw); + assertLe(estimate - raw, 31); + + raw = Cids.rawPieceSize(504, 5); + estimate = Cids.leafCountToRawSize(Cids.leafCount(504, 5)); + assertGe(estimate, raw); + assertLe(estimate - raw, 31); + + // Aggregate-data-set scenario: a data set built from N pieces of height 30 (no padding) + // has N * 2^30 leaves and N * 2^28 * 127 raw bytes. The estimate is exact when each piece + // is fully populated. + uint256 leaves = uint256(1 << 30) * 16; // 16 * 32 GiB Fr32 of leaves + assertEq(Cids.leafCountToRawSize(leaves), uint256(1 << 28) * 127 * 16); + } + function testIsPaddingExcessive() public pure { assertEq(Cids.isPaddingExcessive(127, 2), true); assertEq(Cids.leafCount(127, 2), 0);