Skip to content

Commit

Permalink
Port to updated generic-filehandle2 (#65)
Browse files Browse the repository at this point in the history
  • Loading branch information
cmdcolin authored Dec 11, 2024
1 parent 850eaf5 commit 9121a10
Show file tree
Hide file tree
Showing 9 changed files with 539 additions and 1,384 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
- Update to generic-filehandle2

# v1.5.4

- Remove zlib import. It was only used for unzipping entire files on node.js
Expand Down
11 changes: 4 additions & 7 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
"lint": "eslint --report-unused-disable-directives --max-warnings 0",
"clean": "rimraf dist esm",
"prebuild": "yarn clean",
"build:esm": "tsc --target es2018 --outDir esm",
"build:es5": "tsc --target es2015 --module commonjs --outDir dist",
"build:esm": "tsc --outDir esm",
"build:es5": "tsc --module commonjs --outDir dist",
"build": "yarn build:esm && yarn build:es5",
"prepublishOnly": "yarn test --run && yarn build",
"postversion": "git push --follow-tags"
Expand All @@ -35,25 +35,22 @@
"biojs"
],
"dependencies": {
"es6-promisify": "^7.0.0",
"generic-filehandle": "^3.0.0",
"generic-filehandle2": "^0.0.1",
"long": "^4.0.0",
"pako": "^1.0.11"
},
"devDependencies": {
"@types/es6-promisify": "^6.0.0",
"@types/long": "^4.0.1",
"@types/node": "^18.11.16",
"@types/pako": "^2.0.0",
"@typescript-eslint/eslint-plugin": "^8.16.0",
"@typescript-eslint/parser": "^8.16.0",
"@vitest/coverage-v8": "^2.0.5",
"eslint": "^9.9.0",
"eslint-plugin-import": "^2.27.5",
"eslint-plugin-unicorn": "^56.0.1",
"prettier": "^3.4.1",
"rimraf": "^6.0.1",
"typescript": "~5.6.0",
"typescript": "^5.7.0",
"typescript-eslint": "^8.1.0",
"vitest": "^2.1.6"
},
Expand Down
39 changes: 11 additions & 28 deletions src/bgzFilehandle.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import { Buffer } from 'buffer'
import { LocalFile, GenericFilehandle } from 'generic-filehandle'
import { LocalFile, GenericFilehandle } from 'generic-filehandle2'

// locals
import { unzip } from './unzip'
import GziIndex from './gziIndex'
import { concatUint8Array } from './util'

export default class BgzFilehandle {
filehandle: GenericFilehandle
Expand Down Expand Up @@ -54,19 +54,15 @@ export default class BgzFilehandle {

const { size } = await this.filehandle.stat()

const buf = Buffer.allocUnsafe(4)
// note: there should be a 28-byte EOF marker (an empty block) at
// the end of the file, so we skip backward past that
const { bytesRead } = await this.filehandle.read(buf, 0, 4, size - 28 - 4)
if (bytesRead !== 4) {
throw new Error('read error')
}
const lastBlockUncompressedSize = buf.readUInt32LE(0)
const buf = await this.filehandle.read(4, size - 28 - 4)
const dataView = new DataView(buf.buffer)
const lastBlockUncompressedSize = dataView.getUint32(0, true)
return uncompressedPosition + lastBlockUncompressedSize
}

async _readAndUncompressBlock(
blockBuffer: Buffer,
[compressedPosition]: [number],
[nextCompressedPosition]: [number],
) {
Expand All @@ -78,38 +74,27 @@ export default class BgzFilehandle {
// read the compressed data into the block buffer
const blockCompressedLength = next - compressedPosition

await this.filehandle.read(
blockBuffer,
0,
const blockBuffer = await this.filehandle.read(
blockCompressedLength,
compressedPosition,
)

// uncompress it
const unzippedBuffer = await unzip(
blockBuffer.slice(0, blockCompressedLength),
)

return unzippedBuffer
return unzip(blockBuffer)
}

async read(buf: Buffer, offset: number, length: number, position: number) {
// get the block positions for this read
async read(length: number, position: number) {
const blockPositions = await this.gzi.getRelevantBlocksForRead(
length,
position,
)
const blockBuffer = Buffer.allocUnsafe(32768 * 2)
// uncompress the blocks and read from them one at a time to keep memory usage down
let destinationOffset = offset
let bytesRead = 0
const blocks = [] as Uint8Array[]
for (
let blockNum = 0;
blockNum < blockPositions.length - 1;
blockNum += 1
) {
const uncompressedBuffer = await this._readAndUncompressBlock(
blockBuffer,
blockPositions[blockNum],
blockPositions[blockNum + 1],
)
Expand All @@ -122,12 +107,10 @@ export default class BgzFilehandle {
uncompressedPosition + uncompressedBuffer.length,
) - uncompressedPosition
if (sourceOffset >= 0 && sourceOffset < uncompressedBuffer.length) {
uncompressedBuffer.copy(buf, destinationOffset, sourceOffset, sourceEnd)
destinationOffset += sourceEnd - sourceOffset
bytesRead += sourceEnd - sourceOffset
blocks.push(uncompressedBuffer.subarray(sourceOffset, sourceEnd))
}
}

return { bytesRead, buffer: buf }
return concatUint8Array(blocks)
}
}
15 changes: 6 additions & 9 deletions src/gziIndex.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import Long from 'long'
import { Buffer } from 'buffer'
import { LocalFile, GenericFilehandle } from 'generic-filehandle'
import { LocalFile, GenericFilehandle } from 'generic-filehandle2'

// const COMPRESSED_POSITION = 0
const UNCOMPRESSED_POSITION = 1
Expand All @@ -26,7 +25,7 @@ export default class GziIndex {
}
}

_readLongWithOverflow(buf: Buffer, offset = 0, unsigned = true) {
_readLongWithOverflow(buf: Uint8Array, offset = 0, unsigned = true) {
//@ts-ignore
const long = Long.fromBytesLE(buf.slice(offset, offset + 8), unsigned)
if (
Expand All @@ -47,8 +46,7 @@ export default class GziIndex {
}

async _readIndex(): Promise<[number, number][]> {
let buf = Buffer.allocUnsafe(8)
await this.filehandle.read(buf, 0, 8, 0)
const buf = await this.filehandle.read(8, 0)
const numEntries = this._readLongWithOverflow(buf, 0, true)
if (!numEntries) {
return [[0, 0]]
Expand All @@ -62,15 +60,14 @@ export default class GziIndex {
if (bufSize > Number.MAX_SAFE_INTEGER) {
throw new TypeError('integer overflow')
}
buf = Buffer.allocUnsafe(bufSize)
await this.filehandle.read(buf, 0, bufSize, 8)
const buf2 = await this.filehandle.read(bufSize, 8)
for (let entryNumber = 0; entryNumber < numEntries; entryNumber += 1) {
const compressedPosition = this._readLongWithOverflow(
buf,
buf2,
entryNumber * 16,
)
const uncompressedPosition = this._readLongWithOverflow(
buf,
buf2,
entryNumber * 16 + 8,
)
entries[entryNumber + 1] = [compressedPosition, uncompressedPosition]
Expand Down
55 changes: 21 additions & 34 deletions src/unzip.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { Buffer } from 'buffer'
//@ts-ignore
import { Z_SYNC_FLUSH, Inflate } from 'pako'
import { concatUint8Array } from './util'

interface VirtualOffset {
blockPosition: number
Expand All @@ -15,13 +15,11 @@ interface Chunk {
// does not properly uncompress bgzf chunks that contain more than one bgzf
// block, so export an unzip function that uses pako directly if we are running
// in a browser.
async function unzip(inputData: Buffer) {
async function unzip(inputData: Uint8Array) {
const blocks = [] as Uint8Array[]
try {
let strm
let pos = 0
let i = 0
const chunks = []
let totalSize = 0
let inflator
do {
const remainingInput = inputData.subarray(pos)
Expand All @@ -34,17 +32,10 @@ async function unzip(inputData: Buffer) {
}

pos += strm.next_in
chunks[i] = inflator.result as Uint8Array
totalSize += chunks[i]!.length
i += 1
blocks.push(inflator.result as Uint8Array)
} while (strm.avail_in)

const result = new Uint8Array(totalSize)
for (let i = 0, offset = 0; i < chunks.length; i++) {
result.set(chunks[i]!, offset)
offset += chunks[i]!.length
}
return Buffer.from(result)
return concatUint8Array(blocks)
} catch (e) {
//cleanup error message
if (/incorrect header check/.exec(`${e}`)) {
Expand All @@ -59,14 +50,14 @@ async function unzip(inputData: Buffer) {
// similar to pakounzip, except it does extra counting
// to return the positions of compressed and decompressed
// data offsets
async function unzipChunk(inputData: Buffer) {
async function unzipChunk(inputData: Uint8Array) {
try {
let strm
let cpos = 0
let dpos = 0
const blocks = []
const cpositions = []
const dpositions = []
const blocks = [] as Uint8Array[]
const cpositions = [] as number[]
const dpositions = [] as number[]
do {
const remainingInput = inputData.slice(cpos)
const inflator = new Inflate()
Expand All @@ -77,7 +68,7 @@ async function unzipChunk(inputData: Buffer) {
throw new Error(inflator.msg)
}

const buffer = Buffer.from(inflator.result)
const buffer = inflator.result as Uint8Array
blocks.push(buffer)

cpositions.push(cpos)
Expand All @@ -87,8 +78,12 @@ async function unzipChunk(inputData: Buffer) {
dpos += buffer.length
} while (strm.avail_in)

const buffer = Buffer.concat(blocks)
return { buffer, cpositions, dpositions }
const buffer = concatUint8Array(blocks)
return {
buffer,
cpositions,
dpositions,
}
} catch (e) {
//cleanup error message
if (/incorrect header check/.exec(`${e}`)) {
Expand All @@ -102,17 +97,16 @@ async function unzipChunk(inputData: Buffer) {

// similar to unzipChunk above but slices (0,minv.dataPosition) and
// (maxv.dataPosition,end) off
async function unzipChunkSlice(inputData: Buffer, chunk: Chunk) {
async function unzipChunkSlice(inputData: Uint8Array, chunk: Chunk) {
try {
let strm
const { minv, maxv } = chunk
let cpos = minv.blockPosition
let dpos = minv.dataPosition
const chunks = []
const cpositions = []
const dpositions = []
const chunks = [] as Uint8Array[]
const cpositions = [] as number[]
const dpositions = [] as number[]

let totalSize = 0
let i = 0
do {
const remainingInput = inputData.subarray(cpos - minv.blockPosition)
Expand Down Expand Up @@ -153,19 +147,12 @@ async function unzipChunkSlice(inputData: Buffer, chunk: Chunk) {

cpositions.push(cpos)
dpositions.push(dpos)
totalSize += chunks[i]!.length
break
}
totalSize += chunks[i]!.length
i++
} while (strm.avail_in)

const result = new Uint8Array(totalSize)
for (let i = 0, offset = 0; i < chunks.length; i++) {
result.set(chunks[i]!, offset)
offset += chunks[i]!.length
}
const buffer = Buffer.from(result)
const buffer = concatUint8Array(chunks)

return { buffer, cpositions, dpositions }
} catch (e) {
Expand Down
16 changes: 16 additions & 0 deletions src/util.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
export function sum(array: Uint8Array[]) {
let sum = 0
for (const entry of array) {
sum += entry.length
}
return sum
}
export function concatUint8Array(args: Uint8Array[]) {
const mergedArray = new Uint8Array(sum(args))
let offset = 0
for (const entry of args) {
mergedArray.set(entry, offset)
offset += entry.length
}
return mergedArray
}
Loading

0 comments on commit 9121a10

Please sign in to comment.