Skip to content

Commit

Permalink
feat: getPath with carScope
Browse files Browse the repository at this point in the history
add getPath method as a generator that returns blocks for the targeted dag and all blocks traversed while resolving a cid+path string

supports carScope to specify what blocks to return for the resolved dag
- 'all': return the entire dag starting at path. (default)
- 'block': return the block identified by the path.
- 'file': Mimic gateway semantics: Return All blocks for a multi-block file or just enough blocks to enumerate a dir/map but not the dir contents.

see: storacha/freeway#33
see: storacha/freeway#34

TODO:
- [] find out how to identify the boundaries of a unixfs hamt (unixfs-exported seems to define it as "not having an empty or null Link.Name after the first 2 chars are stripped, which seems risky... what happens if the actual dir listing has 2 char long link names? see: https://github.com/ipfs/js-ipfs-unixfs/blob/e853049bd63d6773442e1540ae49b6a443ca8672/packages/ipfs-unixfs-exporter/src/resolvers/unixfs-v1/content/hamt-sharded-directory.ts#L20-L42

License: MIT
Signed-off-by: Oli Evans <[email protected]>
  • Loading branch information
olizilla committed Apr 17, 2023
1 parent 6c0eed1 commit 6235baf
Show file tree
Hide file tree
Showing 5 changed files with 323 additions and 17 deletions.
7 changes: 4 additions & 3 deletions bin.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,9 @@ cli.command('get <cid>')
.describe('Fetch a DAG from the peer. Outputs a CAR file.')
.option('-p, --peer', 'Address of peer to fetch data from.')
.option('-t, --timeout', 'Timeout in milliseconds.', TIMEOUT)
.action(async (cid, { peer, timeout }) => {
cid = CID.parse(cid)
.action(async (cidPath, { peer, timeout }) => {
const [cidStr] = cidPath.replace(/^\/ipfs\//, '').split('/')
const cid = CID.parse(cidStr)
const controller = new TimeoutController(timeout)
const libp2p = await getLibp2p()
const dagula = await Dagula.fromNetwork(libp2p, { peer })
Expand All @@ -66,7 +67,7 @@ cli.command('get <cid>')
let error
;(async () => {
try {
for await (const block of dagula.get(cid, { signal: controller.signal })) {
for await (const block of dagula.getPath(cidPath, { signal: controller.signal })) {
controller.reset()
await writer.put(block)
}
Expand Down
18 changes: 16 additions & 2 deletions index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,21 @@ export interface Network {
handle: (protocol: string | string[], handler: StreamHandler) => Promise<void>
}

export type CarScope = 'all'|'file'|'block'

export interface CarScopeOptions {
carScope?: CarScope
}

export interface IDagula {
/**
* Get a complete DAG.
*/
get (cid: CID|string, options?: AbortOptions): AsyncIterableIterator<Block>
/**
* Get a DAG for a cid+path
*/
getPath (cidPath: string, options?: AbortOptions & CarScopeOptions): AsyncIterableIterator<Block>
/**
* Get a single block.
*/
Expand All @@ -41,7 +51,7 @@ export interface IDagula {
/**
* Emit nodes for all path segements and get UnixFS files and directories
*/
walkUnixfsPath (path: CID|string, options?: AbortOptions): Promise<UnixFSEntry>
walkUnixfsPath (path: CID|string, options?: AbortOptions): AsyncIterableIterator<UnixFSEntry & Block>
}

export declare class Dagula implements IDagula {
Expand All @@ -50,6 +60,10 @@ export declare class Dagula implements IDagula {
* Get a complete DAG.
*/
get (cid: CID|string, options?: AbortOptions): AsyncIterableIterator<Block>
/**
* Get a DAG for a cid+path
*/
getPath (cidPath: string, options?: AbortOptions & CarScopeOptions): AsyncIterableIterator<Block>
/**
* Get a single block.
*/
Expand All @@ -61,7 +75,7 @@ export declare class Dagula implements IDagula {
/**
* Emit nodes for all path segements and get UnixFS files and directories
*/
walkUnixfsPath (path: CID|string, options?: AbortOptions): Promise<UnixFSEntry>
walkUnixfsPath (path: CID|string, options?: AbortOptions): AsyncIterableIterator<UnixFSEntry & Block>
/**
* Create a new Dagula instance from the passed libp2p Network interface.
*/
Expand Down
56 changes: 48 additions & 8 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,14 @@ export class Dagula {
}

/**
* @param {import('multiformats').CID|string} cid
* @param {CID[]|CID|string} cid
* @param {{ signal?: AbortSignal }} [options]
*/
async * get (cid, options = {}) {
cid = typeof cid === 'string' ? CID.parse(cid) : cid
log('getting DAG %s', cid)
let cids = [cid]
while (true) {
let cids = Array.isArray(cid) ? cid : [cid]
while (cids.length > 0) {
log('fetching %d CIDs', cids.length)
const fetchBlocks = transform(cids.length, async cid => {
return this.getBlock(cid, { signal: options.signal })
Expand All @@ -98,6 +98,43 @@ export class Dagula {
}
}

/**
* @param {string} cidPath
* @param {object} [options]
* @param {AbortSignal} [options.signal]
* @param {'all'|'file'|'block'} [options.carScope] control how many layers of the dag are returned
* 'all': return the entire dag starting at path. (default)
* 'block': return the block identified by the path.
* 'file': Mimic gateway semantics: Return All blocks for a multi-block file or just enough blocks to enumerate a dir/map but not the dir contents.
* e.g. Where path points to a single block file, all three selectors would return the same thing.
* e.g. where path points to a sharded hamt: 'file' returns the blocks of the hamt so the dir can be listed. 'block' returns the root block of the hamt.
*/
async * getPath (cidPath, options = {}) {
const carScope = options.carScope ?? 'all'
/** @type {import('ipfs-unixfs-exporter').UnixFSEntry} */
let base
for await (const item of this.walkUnixfsPath(cidPath, { signal: options.signal })) {
base = item
yield item
}
if (carScope === 'all' || (carScope === 'file' && base.type !== 'directory')) {
// fetch the entire dag rooted at the end of the provided path
const links = base.node.Links?.map(l => l.Hash) || []
if (links.length) {
yield * this.get(links, { signal: options.signal })
}
}
// non-files, like directories, and IPLD Maps only return blocks necessary for their enumeration
if (carScope === 'file' && base.type === 'directory') {
// the single block for the root has already been yielded. For a hamt we must fetch all the blocks of the (current) hamt.
if (base.unixfs.type === 'hamt-sharded-directory') {
// TODO: how to determine the boudary of a hamt
throw new Error('hamt-sharded-directory is unsupported')
}
// otherwise a dir is a single block, so we're done.
}
}

/**
* @param {import('multiformats').CID|string} cid
* @param {{ signal?: AbortSignal }} [options]
Expand Down Expand Up @@ -133,11 +170,11 @@ export class Dagula {
}

/**
* @param {string|import('multiformats').CID} path
* @param {string} cidPath
* @param {{ signal?: AbortSignal }} [options]
*/
async * walkUnixfsPath (path, options = {}) {
log('walking unixfs %s', path)
async * walkUnixfsPath (cidPath, options = {}) {
log('walking unixfs %s', cidPath)
const blockstore = {
/**
* @param {CID} cid
Expand All @@ -148,7 +185,10 @@ export class Dagula {
return block.bytes
}
}

yield * walkPath(path, blockstore, { signal: options.signal })
for await (const entry of walkPath(cidPath, blockstore, { signal: options.signal })) {
/** @type {Uint8Array} */
const bytes = entry.node.Links ? dagPb.encode(entry.node) : entry.node
yield { ...entry, bytes }
}
}
}
Loading

0 comments on commit 6235baf

Please sign in to comment.