From 8e2dee61dca468d79aaa6dd88466481c5ceb634a Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Tue, 5 May 2026 16:09:43 +1000 Subject: [PATCH 1/2] feat(cid): raw size helpers in Cid library Ref: https://github.com/FilOzone/filecoin-services/issues/451 --- src/Cids.sol | 21 ++++++++++++++++++--- test/Cids.t.sol | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 3 deletions(-) diff --git a/src/Cids.sol b/src/Cids.sol index 4288611..76533d5 100644 --- a/src/Cids.sol +++ b/src/Cids.sol @@ -6,7 +6,7 @@ library Cids { // (cidv1) (raw) (fr32-sha2-256-trunc254-padded-binary-tree) bytes4 public constant COMMP_V2_PREFIX = hex"01559120"; - // A helper struct for events + getter functions to display digests as CommpV2 CIDs + // A helper struct for events + getter functions to display digests as PieceCIDv2 CIDs struct Cid { bytes data; } @@ -27,7 +27,7 @@ library Cids { return uint8(cid.data[cid.data.length - 32 - 1]); } - // Checks that CID is CommPv2 and decomposes it into its components. + // Checks that CID is PieceCIDv2 and decomposes it into its components. // See: https://github.com/filecoin-project/FIPs/blob/master/FRCs/frc-0069.md function validateCommPv2(Cid memory cid) internal @@ -69,6 +69,14 @@ library Cids { return (1 << (uint256(height) + 5)) - (128 * padding) / 127; } + // rawPieceSize returns the raw (pre-Fr32-expansion) byte size from a PieceCIDv2's padding + // and height. Exact. Tree capacity is 2^(height+5) Fr32 bytes = 2^(height-2) * 127 raw bytes; + // padding is expressed in raw bytes. Smallest representable piece is height=2 (127 raw bytes). + // Reverts on height < 2 or padding exceeding capacity (Solidity 0.8 underflow check). + function rawPieceSize(uint256 padding, uint8 height) internal pure returns (uint256) { + return (1 << (uint256(height) - 2)) * 127 - padding; + } + // leafCount returns the number of 32b leaves that contain any amount of data // Utilize isPaddingExcessive to check if the padding size exceeds the size of the tree // If isPaddingExcessive is false, leafCount will never return a zero. @@ -82,7 +90,14 @@ library Cids { return (1 << uint256(height)) - paddingLeafs; } - // Creates a CommPv2 CID from a raw size and hash digest according to FRC-0069. + // leafCountToRawSize gives an upper bound on raw bytes from a leaf count. Use rawPieceSize + // when padding and height are available; this is for aggregate counts (e.g. data set total). + // Overestimates by up to ~31 bytes per piece because per-piece leaf counts round up to 32. + function leafCountToRawSize(uint256 leaves) internal pure returns (uint256) { + return (leaves * 32 * 127) / 128; + } + + // Creates a PieceCIDv2 CID from a raw size and hash digest according to FRC-0069. // The CID uses the Raw codec and fr32-sha2-256-trunc254-padded-binary-tree multihash. // The digest format is: uvarint padding | uint8 height | 32 byte root data function CommPv2FromDigest(uint256 padding, uint8 height, bytes32 digest) internal pure returns (Cids.Cid memory) { diff --git a/test/Cids.t.sol b/test/Cids.t.sol index 25a7419..4dff1e8 100644 --- a/test/Cids.t.sol +++ b/test/Cids.t.sol @@ -34,6 +34,53 @@ contract CidsTest is Test { assertEq(Cids.leafCount(128, 30), (1 << 30) - 4); } + function testRawPieceSize() public pure { + // Smallest representable piece: height=2, 4 leaves = 128 Fr32 bytes = 127 raw bytes. + assertEq(Cids.rawPieceSize(0, 2), 127); + // Padding subtracts directly from raw capacity. + assertEq(Cids.rawPieceSize(1, 2), 126); + assertEq(Cids.rawPieceSize(127, 2), 0); + + // Larger heights: capacity = 2^(height-2) * 127. + assertEq(Cids.rawPieceSize(0, 3), 254); // 2 * 127 + assertEq(Cids.rawPieceSize(0, 5), 8 * 127); + assertEq(Cids.rawPieceSize(0, 30), uint256(1 << 28) * 127); + assertEq(Cids.rawPieceSize(0, 35), uint256(1 << 33) * 127); // 32 GiB raw + + // FRC-0069 fixtures: padding=504, height=5 -> capacity 8*127=1016, raw=512. + assertEq(Cids.rawPieceSize(504, 5), 512); + } + + function testLeafCountToRawSize() public pure { + // A single leaf is 32 Fr32 bytes; the raw bound rounds down to 31. + assertEq(Cids.leafCountToRawSize(0), 0); + assertEq(Cids.leafCountToRawSize(1), 31); + assertEq(Cids.leafCountToRawSize(4), 127); // matches rawPieceSize(0, 2) + assertEq(Cids.leafCountToRawSize(8), 254); // matches rawPieceSize(0, 3) + + // 1 GiB of leaves (2^25 leaves). + uint256 leaves1GiB = 1 << 25; + assertEq(Cids.leafCountToRawSize(leaves1GiB), (leaves1GiB * 32 * 127) / 128); + + // The bound: leafCountToRawSize(leafCount(p, h)) >= rawPieceSize(p, h) for valid (p, h), + // and the gap is at most 31 bytes per piece. + uint256 raw = Cids.rawPieceSize(0, 2); + uint256 estimate = Cids.leafCountToRawSize(Cids.leafCount(0, 2)); + assertGe(estimate, raw); + assertLe(estimate - raw, 31); + + raw = Cids.rawPieceSize(504, 5); + estimate = Cids.leafCountToRawSize(Cids.leafCount(504, 5)); + assertGe(estimate, raw); + assertLe(estimate - raw, 31); + + // Aggregate-data-set scenario: a data set built from N pieces of height 30 (no padding) + // has N * 2^30 leaves and N * 2^28 * 127 raw bytes. The estimate is exact when each piece + // is fully populated. + uint256 leaves = uint256(1 << 30) * 16; // 16 * 32 GiB Fr32 of leaves + assertEq(Cids.leafCountToRawSize(leaves), uint256(1 << 28) * 127 * 16); + } + function testIsPaddingExcessive() public pure { assertEq(Cids.isPaddingExcessive(127, 2), true); assertEq(Cids.leafCount(127, 2), 0); From 69df750ed515a58c8c2e56f88b27ac44d7bc5d85 Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Thu, 7 May 2026 18:01:00 +1000 Subject: [PATCH 2/2] fixup! feat(cid): raw size helpers in Cid library --- src/Cids.sol | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/Cids.sol b/src/Cids.sol index 76533d5..72d4dcc 100644 --- a/src/Cids.sol +++ b/src/Cids.sol @@ -69,10 +69,12 @@ library Cids { return (1 << (uint256(height) + 5)) - (128 * padding) / 127; } - // rawPieceSize returns the raw (pre-Fr32-expansion) byte size from a PieceCIDv2's padding - // and height. Exact. Tree capacity is 2^(height+5) Fr32 bytes = 2^(height-2) * 127 raw bytes; - // padding is expressed in raw bytes. Smallest representable piece is height=2 (127 raw bytes). - // Reverts on height < 2 or padding exceeding capacity (Solidity 0.8 underflow check). + // rawPieceSize returns the exact raw (pre-Fr32-expansion) byte size of the data behind a + // PieceCIDv2. Per FRC-0069, the CID describes a binary tree of 2^height 32-byte leaves + // holding Fr32-expanded data (128 Fr32 bytes per 127 raw), and `padding` is the trailing + // zero bytes appended to the raw data before that expansion. + // raw size = 2^height * 32 * 127/128 - padding = 2^(height-2) * 127 - padding + // Reverts on height < 2 or padding too large for the tree. function rawPieceSize(uint256 padding, uint8 height) internal pure returns (uint256) { return (1 << (uint256(height) - 2)) * 127 - padding; } @@ -90,9 +92,9 @@ library Cids { return (1 << uint256(height)) - paddingLeafs; } - // leafCountToRawSize gives an upper bound on raw bytes from a leaf count. Use rawPieceSize - // when padding and height are available; this is for aggregate counts (e.g. data set total). - // Overestimates by up to ~31 bytes per piece because per-piece leaf counts round up to 32. + // leafCountToRawSize approximates raw bytes from a data-bearing leaf count (sum of + // Cids.leafCount across pieces, fully padded leaves excluded). Use rawPieceSize per piece + // for an exact result. Overestimates by up to 31 bytes per piece. function leafCountToRawSize(uint256 leaves) internal pure returns (uint256) { return (leaves * 32 * 127) / 128; }