Updates

GMOD · Dec 12, 2024 · 43dc21c · 43dc21c
1 parent 53234c2
commit 43dc21c
Show file tree

Hide file tree

Showing 4 changed files with 75 additions and 124 deletions.
diff --git a/.prettierrc b/.prettierrc
@@ -2,5 +2,6 @@
   "semi": false,
   "trailingComma": "all",
   "singleQuote": true,
-  "arrowParens": "avoid"
+  "arrowParens": "avoid",
+  "proseWrap": "always"
 }
diff --git a/README.md b/README.md
@@ -2,13 +2,21 @@
 [![Coverage Status](https://img.shields.io/codecov/c/github/GMOD/bgzf-filehandle/master.svg?style=flat-square)](https://codecov.io/gh/GMOD/bgzf-filehandle/branch/master)
 [![Build Status](https://img.shields.io/github/actions/workflow/status/GMOD/bgzf-filehandle/push.yml?branch=master)](https://github.com/GMOD/bgzf-filehandle/actions)
 
-Transparently read [indexed block-gzipped (BGZF)](http://www.htslib.org/doc/bgzip.html) files, such as those created by bgzip, using coordinates from the uncompressed file. The module is used in @gmod/indexedfasta to read bgzip-indexed fasta files (with gzi index, fai index, and fa).
+Transparently read
+[indexed block-gzipped (BGZF)](http://www.htslib.org/doc/bgzip.html) files, such
+as those created by bgzip, using coordinates from the uncompressed file. The
+module is used in @gmod/indexedfasta to read bgzip-indexed fasta files (with gzi
+index, fai index, and fa).
 
-Users can also use the `unzip` function to unzip bgzip files whole (which pako has trouble with natively)
+Users can also use the `unzip` function to unzip bgzip files whole (which pako
+has trouble with natively)
 
-You can also use the unzipChunk or unzipChunkSlice functions to unzip ranges given by BAI or TBI files for BAM or tabix file formats (which are bgzip based).
+You can also use the unzipChunkSlice function to unzip ranges given by BAI or
+TBI files for BAM or tabix file formats (which are bgzip based).
 
-The `unzip` utility function properly decompresses BGZF chunks in both node and the browser, using `pako` when running in the browser and native `zlib` when running in node.
+The `unzip` utility function properly decompresses BGZF chunks in both node and
+the browser, using `pako` when running in the browser and native `zlib` when
+running in node.
 
 ## Install
 
@@ -54,7 +62,10 @@ const { buffer, dpositions, cpositions } = await unzipChunkSlice(
 
 ## Academic Use
 
-This package was written with funding from the [NHGRI](http://genome.gov) as part of the [JBrowse](http://jbrowse.org) project. If you use it in an academic project that you publish, please cite the most recent JBrowse paper, which will be linked from [jbrowse.org](http://jbrowse.org).
+This package was written with funding from the [NHGRI](http://genome.gov) as
+part of the [JBrowse](http://jbrowse.org) project. If you use it in an academic
+project that you publish, please cite the most recent JBrowse paper, which will
+be linked from [jbrowse.org](http://jbrowse.org).
 
 ## License
 

diff --git a/src/index.ts b/src/index.ts
@@ -1,2 +1,2 @@
 export { default as BgzfFilehandle } from './bgzFilehandle'
-export { unzip, unzipChunk, unzipChunkSlice } from './unzip'
+export * from './unzip'
diff --git a/src/unzip.ts b/src/unzip.ts
@@ -15,12 +15,12 @@ interface Chunk {
 // does not properly uncompress bgzf chunks that contain more than one bgzf
 // block, so export an unzip function that uses pako directly if we are running
 // in a browser.
-async function unzip(inputData: Uint8Array) {
-  const blocks = [] as Uint8Array[]
+export async function unzip(inputData: Uint8Array) {
   try {
     let strm
     let pos = 0
     let inflator
+    const blocks = [] as Uint8Array[]
     do {
       const remainingInput = inputData.subarray(pos)
       inflator = new Inflate()
@@ -37,7 +37,7 @@ async function unzip(inputData: Uint8Array) {
 
     return concatUint8Array(blocks)
   } catch (e) {
-    //cleanup error message
+    // return a slightly more informative error message
     if (/incorrect header check/.exec(`${e}`)) {
       throw new Error(
         'problem decompressing block: incorrect gzip header check',
@@ -47,127 +47,66 @@ async function unzip(inputData: Uint8Array) {
   }
 }
 
-// similar to pakounzip, except it does extra counting
-// to return the positions of compressed and decompressed
-// data offsets
-async function unzipChunk(inputData: Uint8Array) {
-  try {
-    let strm
-    let cpos = 0
-    let dpos = 0
-    const blocks = [] as Uint8Array[]
-    const cpositions = [] as number[]
-    const dpositions = [] as number[]
-    do {
-      const remainingInput = inputData.slice(cpos)
-      const inflator = new Inflate()
-      // @ts-ignore
-      ;({ strm } = inflator)
-      inflator.push(remainingInput, Z_SYNC_FLUSH)
-      if (inflator.err) {
-        throw new Error(inflator.msg)
-      }
-
-      const buffer = inflator.result as Uint8Array
-      blocks.push(buffer)
-
-      cpositions.push(cpos)
-      dpositions.push(dpos)
+// keeps track of the position of compressed blocks in terms of file offsets,
+// and a decompressed equivalent
+//
+// also slices (0,minv.dataPosition) and (maxv.dataPosition,end) off
+export async function unzipChunkSlice(inputData: Uint8Array, chunk: Chunk) {
+  let strm
+  const { minv, maxv } = chunk
+  let cpos = minv.blockPosition
+  let dpos = minv.dataPosition
+  const chunks = [] as Uint8Array[]
+  const cpositions = [] as number[]
+  const dpositions = [] as number[]
+
+  let i = 0
+  do {
+    const remainingInput = inputData.subarray(cpos - minv.blockPosition)
+    const inflator = new Inflate()
+    // @ts-ignore
+    ;({ strm } = inflator)
+    inflator.push(remainingInput, Z_SYNC_FLUSH)
+    if (inflator.err) {
+      throw new Error(inflator.msg)
+    }
 
-      cpos += strm.next_in
-      dpos += buffer.length
-    } while (strm.avail_in)
+    const buffer = inflator.result
+    chunks.push(buffer as Uint8Array)
+    let len = buffer.length
 
-    const buffer = concatUint8Array(blocks)
-    return {
-      buffer,
-      cpositions,
-      dpositions,
+    cpositions.push(cpos)
+    dpositions.push(dpos)
+    if (chunks.length === 1 && minv.dataPosition) {
+      // this is the first chunk, trim it
+      chunks[0] = chunks[0]!.subarray(minv.dataPosition)
+      len = chunks[0].length
     }
-  } catch (e) {
-    //cleanup error message
-    if (/incorrect header check/.exec(`${e}`)) {
-      throw new Error(
-        'problem decompressing block: incorrect gzip header check',
+    const origCpos = cpos
+    cpos += strm.next_in
+    dpos += len
+
+    if (origCpos >= maxv.blockPosition) {
+      // this is the last chunk, trim it and stop decompressing. note if it is
+      // the same block is minv it subtracts that already trimmed part of the
+      // slice length
+      chunks[i] = chunks[i]!.subarray(
+        0,
+        maxv.blockPosition === minv.blockPosition
+          ? maxv.dataPosition - minv.dataPosition + 1
+          : maxv.dataPosition + 1,
       )
-    }
-    throw e
-  }
-}
-
-// similar to unzipChunk above but slices (0,minv.dataPosition) and
-// (maxv.dataPosition,end) off
-async function unzipChunkSlice(inputData: Uint8Array, chunk: Chunk) {
-  try {
-    let strm
-    const { minv, maxv } = chunk
-    let cpos = minv.blockPosition
-    let dpos = minv.dataPosition
-    const chunks = [] as Uint8Array[]
-    const cpositions = [] as number[]
-    const dpositions = [] as number[]
-
-    let i = 0
-    do {
-      const remainingInput = inputData.subarray(cpos - minv.blockPosition)
-      const inflator = new Inflate()
-      // @ts-ignore
-      ;({ strm } = inflator)
-      inflator.push(remainingInput, Z_SYNC_FLUSH)
-      if (inflator.err) {
-        throw new Error(inflator.msg)
-      }
-
-      const buffer = inflator.result
-      chunks.push(buffer as Uint8Array)
-      let len = buffer.length
 
       cpositions.push(cpos)
       dpositions.push(dpos)
-      if (chunks.length === 1 && minv.dataPosition) {
-        // this is the first chunk, trim it
-        chunks[0] = chunks[0]!.subarray(minv.dataPosition)
-        len = chunks[0].length
-      }
-      const origCpos = cpos
-      cpos += strm.next_in
-      dpos += len
-
-      if (origCpos >= maxv.blockPosition) {
-        // this is the last chunk, trim it and stop decompressing
-        // note if it is the same block is minv it subtracts that already
-        // trimmed part of the slice length
-
-        chunks[i] = chunks[i]!.subarray(
-          0,
-          maxv.blockPosition === minv.blockPosition
-            ? maxv.dataPosition - minv.dataPosition + 1
-            : maxv.dataPosition + 1,
-        )
-
-        cpositions.push(cpos)
-        dpositions.push(dpos)
-        break
-      }
-      i++
-    } while (strm.avail_in)
-
-    const buffer = concatUint8Array(chunks)
-
-    return { buffer, cpositions, dpositions }
-  } catch (e) {
-    //cleanup error message
-    if (/incorrect header check/.exec(`${e}`)) {
-      throw new Error(
-        'problem decompressing block: incorrect gzip header check',
-      )
+      break
     }
-    throw e
-  }
-}
+    i++
+  } while (strm.avail_in)
 
-function nodeUnzip() {
-  throw new Error('nodeUnzip not implemented.')
+  return {
+    buffer: concatUint8Array(chunks),
+    cpositions,
+    dpositions,
+  }
 }
-
-export { unzip, unzipChunk, unzipChunkSlice, unzip as pakoUnzip, nodeUnzip }