From c57257fd9ccec6fcde2bd260d880966f84a6c1c5 Mon Sep 17 00:00:00 2001 From: Isaac <91521821+isimisi@users.noreply.github.com> Date: Fri, 22 Sep 2023 14:26:26 +0200 Subject: [PATCH 1/5] rollup rollup to cjs update rollup --- package.json | 158 ++-- pdfparser.cjs | 2203 ++++++++++++++++++++++++++++++++++++++++++++++ pdfparser.mjs | 2196 +++++++++++++++++++++++++++++++++++++++++++++ rollup.config.js | 64 ++ test/index.js | 377 ++++---- yarn-error.log | 145 +++ 6 files changed, 4902 insertions(+), 241 deletions(-) create mode 100644 pdfparser.cjs create mode 100644 pdfparser.mjs create mode 100644 rollup.config.js create mode 100644 yarn-error.log diff --git a/package.json b/package.json index 6dd48658..e3a48104 100644 --- a/package.json +++ b/package.json @@ -1,78 +1,90 @@ { - "name": "pdf2json", - "version": "3.0.4", - "description": "PDF file parser that converts PDF binaries to text based JSON, powered by porting a fork of PDF.JS to Node.js", - "keywords": [ - "pdf", - "pdf parser", - "pdf2json", - "convert pdf to json", - "server side PDF parser", - "port pdf.js to node.js", - "PDF binary to text", - "commandline utility to parse pdf to json", - "JSON", - "javascript", - "PDF canvas", - "pdf.js fork" - ], - "author": { - "name": "Modesty Zhang", - "email": "modestyz@hotmail.com", - "url": "http://www.codeproject.com/script/Articles/MemberArticles.aspx?amid=62372" - }, - "homepage": "https://github.com/modesty/pdf2json", - "repository": { - "type": "git", - "url": "git://github.com/modesty/pdf2json.git" - }, - "main": "./pdfparser.js", - "typings": "./pdfparser.d.ts", - "scripts": { - "test": "cd ./test && sh p2j.forms.sh", - "test-misc": "cd ./test && sh p2j.one.sh misc . \"Expected: 7 success, 3 fail exception with stack trace\" ", - "parse": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form", - "parse-s": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s", - "parse-t": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t", - "parse-c": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c", - "parse-m": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c -m", - "parse-r": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -t -c -m -r", - "parse-fd": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/ -o ./test/target/fd/form -t -c -m -r", - "parse-tb": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i242_testingWithTable.pdf -o ./test/target/misc", - "parse-tc": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i293_pdfpac.pdf -o ./test/target/misc", - "parse-rectFix": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/pr298_rect_fix_from_upstream.pdf -o ./test/target/misc", - "parse-e": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i43_encrypted.pdf -o ./test/target/misc", - "parse-e2": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i243_problem_file_anon.pdf -o ./test/target/misc", - "parse-e3": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i200_test.pdf -o ./test/target/misc" - }, - "engines": { - "node": ">=18.12.1", - "npm": ">=8.19.2" - }, - "type": "module", - "bin": { - "pdf2json": "./bin/pdf2json.js" - }, - "dependencies": { - "@xmldom/xmldom": "^0.8.8" - }, - "devDependencies": { - "@types/node": "^20.3.1" - }, - "bundledDependencies": [ - "@xmldom/xmldom" - ], - "maintainers": [ - { + "name": "pdf2json", + "version": "3.0.4", + "description": "PDF file parser that converts PDF binaries to text based JSON, powered by porting a fork of PDF.JS to Node.js", + "keywords": [ + "pdf", + "pdf parser", + "pdf2json", + "convert pdf to json", + "server side PDF parser", + "port pdf.js to node.js", + "PDF binary to text", + "commandline utility to parse pdf to json", + "JSON", + "javascript", + "PDF canvas", + "pdf.js fork" + ], + "author": { "name": "Modesty Zhang", "email": "modestyz@hotmail.com", "url": "http://www.codeproject.com/script/Articles/MemberArticles.aspx?amid=62372" - } - ], - "contributors": [], - "bugs": { - "url": "http://github.com/modesty/pdf2json/issues" - }, - "license": "Apache-2.0", - "readme": "https://github.com/modesty/pdf2json/blob/master/readme.md" + }, + "homepage": "https://github.com/modesty/pdf2json", + "repository": { + "type": "git", + "url": "git://github.com/modesty/pdf2json.git" + }, + "main": "./pdfparser.cjs", + "module": "./pdfparser.js", + "typings": "./pdfparser.d.ts", + "scripts": { + "test": "cd ./test && sh p2j.forms.sh", + "test-misc": "cd ./test && sh p2j.one.sh misc . \"Expected: 7 success, 3 fail exception with stack trace\" ", + "parse": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form", + "parse-s": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s", + "parse-t": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t", + "parse-c": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c", + "parse-m": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c -m", + "parse-r": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -t -c -m -r", + "parse-fd": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/ -o ./test/target/fd/form -t -c -m -r", + "parse-tb": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i242_testingWithTable.pdf -o ./test/target/misc", + "parse-tc": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i293_pdfpac.pdf -o ./test/target/misc", + "parse-rectFix": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/pr298_rect_fix_from_upstream.pdf -o ./test/target/misc", + "parse-e": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i43_encrypted.pdf -o ./test/target/misc", + "parse-e2": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i243_problem_file_anon.pdf -o ./test/target/misc", + "parse-e3": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i200_test.pdf -o ./test/target/misc", + "build:rollup": "rollup -c ./rollup.config.js", + "build": "npm run build:rollup" + }, + "engines": { + "node": ">=18.12.1", + "npm": ">=8.19.2" + }, + "type": "module", + "bin": { + "pdf2json": "./bin/pdf2json.js" + }, + "dependencies": { + "@xmldom/xmldom": "^0.8.8" + }, + "devDependencies": { + "@rollup/plugin-replace": "^5.0.2", + "@types/node": "^20.3.1", + "rollup": "^3.29.2", + "rollup-plugin-node-builtins": "^2.1.2" + }, + "bundledDependencies": [ + "@xmldom/xmldom" + ], + "maintainers": [ + { + "name": "Modesty Zhang", + "email": "modestyz@hotmail.com", + "url": "http://www.codeproject.com/script/Articles/MemberArticles.aspx?amid=62372" + } + ], + "contributors": [], + "bugs": { + "url": "http://github.com/modesty/pdf2json/issues" + }, + "exports": { + ".": { + "import": "./pdfparser.js", + "require": "./pdfparser.cjs" + } + }, + "license": "Apache-2.0", + "readme": "https://github.com/modesty/pdf2json/blob/master/readme.md" } diff --git a/pdfparser.cjs b/pdfparser.cjs new file mode 100644 index 00000000..38761cac --- /dev/null +++ b/pdfparser.cjs @@ -0,0 +1,2203 @@ +'use strict'; + +var fs = require('fs'); +var nodeUtil = require('util'); +var promises = require('fs/promises'); +var events = require('events'); +var path = require('path'); +var url = require('url'); +require('buffer'); +var xmldom = require('@xmldom/xmldom'); +var stream = require('stream'); + +var _documentCurrentScript = typeof document !== 'undefined' ? document.currentScript : null; +const kColors = [ + '#000000', // 0 + '#ffffff', // 1 + '#4c4c4c', // 2 + '#808080', // 3 + '#999999', // 4 + '#c0c0c0', // 5 + '#cccccc', // 6 + '#e5e5e5', // 7 + '#f2f2f2', // 8 + '#008000', // 9 + '#00ff00', // 10 + '#bfffa0', // 11 + '#ffd629', // 12 + '#ff99cc', // 13 + '#004080', // 14 + '#9fc0e1', // 15 + '#5580ff', // 16 + '#a9c9fa', // 17 + '#ff0080', // 18 + '#800080', // 19 + '#ffbfff', // 20 + '#e45b21', // 21 + '#ffbfaa', // 22 + '#008080', // 23 + '#ff0000', // 24 + '#fdc59f', // 25 + '#808000', // 26 + '#bfbf00', // 27 + '#824100', // 28 + '#007256', // 29 + '#008000', // 30 + '#000080', // Last + 1 + '#008080', // Last + 2 + '#800080', // Last + 3 + '#ff0000', // Last + 4 + '#0000ff', // Last + 5 + '#008000' // Last + 6 +]; + +const kFontFaces = [ + "quicktype,arial,helvetica,sans-serif", // 00 - QuickType - sans-serif variable font + "quicktype condensed,arial narrow,arial,helvetica,sans-serif", // 01 - QuickType Condensed - thin sans-serif variable font + "quicktypepi,quicktypeiipi", // 02 - QuickType Pi + "quicktype mono,courier new,courier,monospace", // 03 - QuickType Mono - san-serif fixed font + "ocr-a,courier new,courier,monospace", // 04 - OCR-A - OCR readable san-serif fixed font + "ocr b mt,courier new,courier,monospace" // 05 - OCR-B MT - OCR readable san-serif fixed font + ]; + + const kFontStyles = [ + // Face Size Bold Italic StyleID(Comment) + // ----- ---- ---- ----- ----------------- + [0, 6, 0, 0], //00 + [0, 8, 0, 0], //01 + [0, 10, 0, 0], //02 + [0, 12, 0, 0], //03 + [0, 14, 0, 0], //04 + [0, 18, 0, 0], //05 + [0, 6, 1, 0], //06 + [0, 8, 1, 0], //07 + [0, 10, 1, 0], //08 + [0, 12, 1, 0], //09 + [0, 14, 1, 0], //10 + [0, 18, 1, 0], //11 + [0, 6, 0, 1], //12 + [0, 8, 0, 1], //13 + [0, 10, 0, 1], //14 + [0, 12, 0, 1], //15 + [0, 14, 0, 1], //16 + [0, 18, 0, 1], //17 + [0, 6, 1, 1], //18 + [0, 8, 1, 1], //19 + [0, 10, 1, 1], //20 + [0, 12, 1, 1], //21 + [0, 14, 1, 1], //22 + [0, 18, 1, 1], //23 + [1, 6, 0, 0], //24 + [1, 8, 0, 0], //25 + [1, 10, 0, 0], //26 + [1, 12, 0, 0], //27 + [1, 14, 0, 0], //28 + [1, 18, 0, 0], //29 + [1, 6, 1, 0], //30 + [1, 8, 1, 0], //31 + [1, 10, 1, 0], //32 + [1, 12, 1, 0], //33 + [1, 14, 1, 0], //34 + [1, 18, 1, 0], //35 + [1, 6, 0, 1], //36 + [1, 8, 0, 1], //37 + [1, 10, 0, 1], //38 + [1, 12, 0, 1], //39 + [1, 14, 0, 1], //40 + [1, 18, 0, 1], //41 + [2, 8, 0, 0], //42 + [2, 10, 0, 0], //43 + [2, 12, 0, 0], //44 + [2, 14, 0, 0], //45 + [2, 18, 0, 0], //46 + [3, 8, 0, 0], //47 + [3, 10, 0, 0], //48 + [3, 12, 0, 0], //49 + [4, 12, 0, 0], //50 + [0, 9, 0, 0], //51 + [0, 9, 1, 0], //52 + [0, 9, 0, 1], //53 + [0, 9, 1, 1], //54 + [1, 9, 0, 0], //55 + [1, 9, 1, 0], //56 + [1, 9, 1, 1], //57 + [4, 10, 0, 0], //58 + [5, 10, 0, 0], //59 + [5, 12, 0, 0] //60 +]; + +const dpi = 96.0; +const gridXPerInch = 4.0; +const gridYPerInch = 4.0; + +const _pixelXPerGrid = dpi/gridXPerInch; +const _pixelYPerGrid = dpi/gridYPerInch; +const _pixelPerPoint = dpi/72; + +class PDFUnit { + static toFixedFloat(fNum) { + return parseFloat(fNum.toFixed(3)); + } + + static colorCount() { + return kColors.length; + } + + static toPixelX(formX) { + return Math.round(formX * _pixelXPerGrid); + } + + static toPixelY(formY) { + return Math.round(formY * _pixelYPerGrid); + } + + static pointToPixel(point) {// Point unit (1/72 an inch) to pixel units + return point * _pixelPerPoint; + } + + static getColorByIndex(clrId) { + return kColors[clrId]; + } + + static toFormPoint(viewportX, viewportY) { + return [(viewportX / _pixelXPerGrid), (viewportY / _pixelYPerGrid)]; + } + + static toFormX(viewportX) { + return PDFUnit.toFixedFloat(viewportX / _pixelXPerGrid); + } + + static toFormY(viewportY) { + return PDFUnit.toFixedFloat(viewportY / _pixelYPerGrid); + } + + static findColorIndex(color) { + if (color.length === 4) + color += "000"; + //MQZ. 07/29/2013: if color is not in dictionary, just return -1. The caller (pdffont, pdffill) will set the actual color + return kColors.indexOf(color); + } + + static dateToIso8601(date) { + // PDF spec p.160 + if (date.slice(0, 2) === 'D:') { // D: prefix is optional + date = date.slice(2); + } + let tz = 'Z'; + let idx = date.search(/[Z+-]/); // timezone is optional + if (idx >= 0) { + tz = date.slice(idx); + if (tz !== 'Z') { // timezone format OHH'mm' + tz = tz.slice(0, 3) + ':' + tz.slice(4, 6); + } + date = date.slice(0, idx); + } + let yr = date.slice(0, 4); // everything after year is optional + let mth = date.slice(4, 6) || '01'; + let day = date.slice(6, 8) || '01'; + let hr = date.slice(8, 10) || '00'; + let min = date.slice(10, 12) || '00'; + let sec = date.slice(12, 14) || '00'; + return yr + '-' + mth + '-' + day + 'T' + hr + ':' + min + ':' + sec + tz; + } +} + +class PDFLine { + constructor(x1, y1, x2, y2, lineWidth, color, dashed) { + this.x1 = x1; + this.y1 = y1; + this.x2 = x2; + this.y2 = y2; + this.lineWidth = lineWidth || 1.0; + this.color = color; + this.dashed = dashed; + } + + #setStartPoint(oneLine, x, y) { + oneLine.x = PDFUnit.toFormX(x); + oneLine.y = PDFUnit.toFormY(y); + } + + processLine(targetData) { + const xDelta = Math.abs(this.x2 - this.x1); + const yDelta = Math.abs(this.y2 - this.y1); + const minDelta = this.lineWidth; + + let oneLine = {x:0, y:0, w: PDFUnit.toFixedFloat(this.lineWidth), l:0}; + + //MQZ Aug.28.2013, adding color support, using color dictionary and default to black + const clrId = PDFUnit.findColorIndex(this.color); + const colorObj = (clrId > 0 && clrId < PDFUnit.colorCount()) ? {clr: clrId} : {oc: this.color}; + oneLine = {...oneLine, ...colorObj}; + + //MQZ Aug.29 dashed line support + if (this.dashed) { + oneLine = oneLine = {...oneLine, dsh: 1}; + } + + if ((yDelta < this.lineWidth) && (xDelta > minDelta)) { //HLine + if (this.lineWidth < 4 && (xDelta / this.lineWidth < 4)) { + nodeUtil.p2jinfo("Skipped: short thick HLine: lineWidth = " + this.lineWidth + ", xDelta = " + xDelta); + return; //skip short thick lines, like PA SPP lines behinds checkbox + } + + oneLine.l = PDFUnit.toFormX(xDelta); + if (this.x1 > this.x2) + this.#setStartPoint(oneLine, this.x2, this.y2); + else + this.#setStartPoint(oneLine, this.x1, this.y1); + targetData.HLines.push(oneLine); + } + else if ((xDelta < this.lineWidth) && (yDelta > minDelta)) {//VLine + if (this.lineWidth < 4 && (yDelta / this.lineWidth < 4)) { + nodeUtil.p2jinfo("Skipped: short thick VLine: lineWidth = " + this.lineWidth + ", yDelta = " + yDelta); + return; //skip short think lines, like PA SPP lines behinds checkbox + } + + oneLine.l = PDFUnit.toFormY(yDelta); + if (this.y1 > this.y2) + this.#setStartPoint(oneLine, this.x2, this.y2); + else + this.#setStartPoint(oneLine, this.x1, this.y1); + targetData.VLines.push(oneLine); + } + } +} + +class PDFFill{ + // constructor + constructor(x, y, width, height, color) { + this.x = x; + this.y = y; + this.width = width; + this.height = height; + this.color = color; + } + + processFill(targetData) { + //MQZ.07/29/2013: when color is not in color dictionary, set the original color (oc) + const clrId = PDFUnit.findColorIndex(this.color); + const colorObj = (clrId > 0 && clrId < PDFUnit.colorCount()) ? {clr: clrId} : {oc: this.color}; + + const oneFill = {x:PDFUnit.toFormX(this.x), + y:PDFUnit.toFormY(this.y), + w:PDFUnit.toFormX(this.width), + h:PDFUnit.toFormY(this.height), + ...colorObj}; + + + if (oneFill.w < 2 && oneFill.h < 2) { + nodeUtil.p2jinfo("Skipped: tiny fill: " + oneFill.w + " x " + oneFill.h); + return; //skip short thick lines, like PA SPP lines behinds checkbox + } + + targetData.Fills.push(oneFill); + } +} + +const _boldSubNames = ["bd", "bold", "demi", "black"]; +const _stdFonts = ["arial", "helvetica", "sans-serif ", "courier ","monospace ", "ocr "]; +const DISTANCE_DELTA = 0.1; + +class PDFFont { + #initTypeName() { + let typeName = (this.fontObj.name || this.fontObj.fallbackName); + if (!typeName) { + typeName = kFontFaces[0]; //default font family name + } + typeName = typeName.toLowerCase(); + return typeName; + } + + #initSubType() { + let subType = this.typeName; + let bold = false; + + let nameArray = this.typeName.split('+'); + if (Array.isArray(nameArray) && nameArray.length > 1) { + subType = nameArray[1].split("-"); + if (Array.isArray(subType) && subType.length > 1) { + let subName = subType[1].toLowerCase(); + bold = _boldSubNames.indexOf(subName) >= 0; + subType = subType[0]; + } + } + return {subType, bold}; + } + + #initSymbol() { + let isSymbol = this.typeName.indexOf("symbol") > 0 || kFontFaces[2].indexOf(this.subType) >= 0; + if (this.fontObj.isSymbolicFont) { + let mFonts = _stdFonts.filter( (oneName) => (this.typeName.indexOf(oneName) >= 0) ); + + if (mFonts.length > 0) { + this.fontObj.isSymbolicFont = false; //lots of Arial-based font is detected as symbol in VA forms (301, 76-c, etc.) reset the flag for now + nodeUtil.p2jinfo("Reset: isSymbolicFont (false) for " + this.fontObj.name); + } + } + else { + if (isSymbol) { + this.fontObj.isSymbolicFont = true; //text pdf: va_ind_760c + nodeUtil.p2jinfo("Reset: isSymbolicFont (true) for " + this.fontObj.name); + } + } + return isSymbol; + } + + #initSpaceWidth() { + let spaceWidth = this.fontObj.spaceWidth; + if (!spaceWidth) { + var spaceId = Array.isArray(this.fontObj.toFontChar) ? this.fontObj.toFontChar.indexOf(32) : -1; + spaceWidth = (spaceId >= 0 && Array.isArray(this.fontObj.widths)) ? this.fontObj.widths[spaceId] : 250; + } + spaceWidth = PDFUnit.toFormX(spaceWidth) / 32; + return spaceWidth; + } + + // constructor + constructor(fontObj) { + this.fontObj = fontObj; + + this.typeName = this.#initTypeName(); + + const {subType, bold} = this.#initSubType(); + this.subType = subType; + this.bold = bold; + + this.isSymbol = this.#initSymbol(); + this.spaceWidth = this.#initSpaceWidth(); + + this.fontSize = 1; + this.faceIdx = 0; + this.italic = false; + this.fontStyleId = -1; + } + + /** sort text blocks by y then x */ + static compareBlockPos(t1, t2) { + if (t1.y < t2.y - DISTANCE_DELTA) { + return -1; + } + if (Math.abs(t1.y - t2.y) <= DISTANCE_DELTA) { + if (t1.x < t2.x - DISTANCE_DELTA) { + return -1; + } + if (Math.abs(t1.x - t2.x) <= DISTANCE_DELTA) { + return 0; + } + } + return 1; + } + + static haveSameStyle(t1, t2) { + let retVal = t1.R[0].S === t2.R[0].S; + if (retVal && t1.R[0].S < 0) { + for (let i = 0; i < t1.R[0].TS.length; i++) { + if (t1.R[0].TS[i] !== t2.R[0].TS[i]) { + retVal = false; + break; + } + } + } + if (retVal) { // make sure both block are not rotated + retVal = (typeof t1.R[0].RA === 'undefined') && (typeof t2.R[0].RA === 'undefined'); + } + + return retVal; + } + + static getSpaceThreshHold(t1) { + return (PDFFont.getFontSize(t1)/12) * t1.sw; + } + + static areAdjacentBlocks(t1, t2) { + const isInSameLine = Math.abs(t1.y - t2.y) <= DISTANCE_DELTA; + const isDistanceSmallerThanASpace = ((t2.x - t1.x - t1.w) < PDFFont.getSpaceThreshHold(t1)); + + return isInSameLine && isDistanceSmallerThanASpace; + } + + static getFontSize(textBlock) { + const sId = textBlock.R[0].S; + return (sId < 0) ? textBlock.R[0].TS[1] : kFontStyles[sId][1]; + } + + static areDuplicateBlocks(t1, t2) { + return t1.x == t2.x && t1.y == t2.y && t1.R[0].T == t2.R[0].T && PDFFont.haveSameStyle(t1, t2); + } + + // private + #setFaceIndex() { + const fontObj = this.fontObj; + + this.bold = fontObj.bold; + if (!this.bold) { + this.bold = this.typeName.indexOf("bold") >= 0 || this.typeName.indexOf("black") >= 0; + } + this.italic = fontObj.italic; // fix https://github.com/modesty/pdf2json/issues/42 + // Extended the fix for https://github.com/modesty/pdf2json/issues/42 + if (!this.italic) { + this.italic = this.typeName.indexOf("italic") >= 0 || this.typeName.indexOf("oblique") >= 0; + } + // Added detection of hybrid dual bolditalic fonts + if (((!this.bold) || (!this.italic)) && (this.typeName.indexOf("boldobl") >= 0)) { + this.bold = true; + this.italic = true; + } + + let typeName = this.subType; + if (fontObj.isSerifFont) { + if (kFontFaces[1].indexOf(typeName) >= 0) + this.faceIdx = 1; + } + else if (kFontFaces[2].indexOf(this.subType) >= 0) { + this.faceIdx = 2; + } + else if (fontObj.isMonospace) { + this.faceIdx = 3; + + if (kFontFaces[4].indexOf(typeName) >= 0) + this.faceIdx = 4; + else if (kFontFaces[5].indexOf(typeName) >= 0) + this.faceIdx = 5; + } + else if (fontObj.isSymbolicFont) { + this.faceIdx = 2; + } + + if (this.faceIdx == 0) { + if (this.typeName.indexOf("narrow") > 0) + this.faceIdx = 1; + } + +// nodeUtil.p2jinfo"typeName = " + typeName + " => faceIdx = " + this.faceIdx); + } + + #getFontStyleIndex(fontSize) { + this.#setFaceIndex(); + + //MQZ Feb.28.2013. Adjust bold text fontsize to work around word spacing issue + this.fontSize = (this.bold && (fontSize > 12)) ? fontSize + 1 : fontSize; + + let fsa = [this.faceIdx, this.fontSize, this.bold?1:0, this.italic?1:0]; + let retVal = -1; + + kFontStyles.forEach(function(element, index, list){ + if (retVal === -1) { + if (element[0] === fsa[0] && element[1] === fsa[1] && + element[2] === fsa[2] && element[3] === fsa[3]) { + retVal = index; + } + } + }); + + return retVal; + } + + #processSymbolicFont(str) { + let retVal = str; + + if (!str || str.length !== 1) + return retVal; + + if (!this.fontObj.isSymbolicFont || !this.isSymbol) { + if (retVal == "C" || retVal == "G") { //prevent symbolic encoding from the client + retVal = " " + retVal + " "; //sample: va_ind_760c + } + return retVal; + } + + switch(str.charCodeAt(0)) { + case 20: retVal = '\u2713'; break; //check mark + case 70: retVal = (this.fontObj.type === "CIDFontType0") ? '\u26A0' : '\u007D'; break; //exclaimation in triangle OR right curly bracket + case 71: retVal = '\u25b6'; break; //right triangle + case 97: retVal = '\u25b6'; break; //right triangle + case 99: retVal = this.isSymbol ? '\u2022' : '\u25b2'; break; //up triangle. set to Bullet Dot for VA SchSCR + case 100: retVal = '\u25bc'; break; //down triangle + case 103: retVal = '\u27A8'; break; //right arrow. sample: va_ind_760pff and pmt + case 106: retVal = ''; break; //VA 301: string j character by the checkbox, hide it for now + case 114: retVal = '\u2022'; break; //Bullet dot + case 115: retVal = '\u25b2'; break; //up triangle + case 116: retVal = '\u2022'; break; //Bullet dot + case 118: retVal = '\u2022'; break; //Bullet dot + default: + nodeUtil.p2jinfo(this.fontObj.type + " - SymbolicFont - (" + this.fontObj.name + ") : " + + str.charCodeAt(0) + "::" + str.charCodeAt(1) + " => " + retVal); + } + + return retVal; + } + + #textRotationAngle(matrix2D) { + let retVal = 0; + if (matrix2D[0][0] === 0 && matrix2D[1][1] === 0) { + if (matrix2D[0][1] != 0 && matrix2D[1][0] != 0) { + if ((matrix2D[0][1] / matrix2D[1][0]) + 1 < 0.0001) + retVal = 90; + } + } + else if (matrix2D[0][0] !== 0 && matrix2D[1][1] !== 0) { + let r1 = Math.atan(-matrix2D[0][1] / matrix2D[0][0]); + let r2 = Math.atan(matrix2D[1][0] / matrix2D[1][1]); + if (Math.abs(r1) > 0.0001 && (r1 - r2 < 0.0001)) { + retVal = r1 * 180 / Math.PI; + } + } + return retVal; + } + + // public instance methods + processText(p, str, maxWidth, color, fontSize, targetData, matrix2D) { + const text = this.#processSymbolicFont(str); + if (!text) { + return; + } + this.fontStyleId = this.#getFontStyleIndex(fontSize); + + // when this.fontStyleId === -1, it means the text style doesn't match any entry in the dictionary + // adding TS to better describe text style [fontFaceId, fontSize, 1/0 for bold, 1/0 for italic]; + const TS = [this.faceIdx, this.fontSize, this.bold?1:0, this.italic?1:0]; + + const clrId = PDFUnit.findColorIndex(color); + const colorObj = (clrId >= 0 && clrId < PDFUnit.colorCount()) ? {clr: clrId} : {oc: color}; + + let textRun = { + T: this.flash_encode(text), + S: this.fontStyleId, + TS: TS + }; + const rAngle = this.#textRotationAngle(matrix2D); + if (rAngle != 0) { + nodeUtil.p2jinfo(str + ": rotated " + rAngle + " degree."); + textRun = {...textRun, RA: rAngle}; + } + + const oneText = {x: PDFUnit.toFormX(p.x) - 0.25, + y: PDFUnit.toFormY(p.y) - 0.75, + w: PDFUnit.toFixedFloat(maxWidth), + ...colorObj, //MQZ.07/29/2013: when color is not in color dictionary, set the original color (oc) + sw: this.spaceWidth, //font space width, use to merge adjacent text blocks + A: "left", + R: [textRun] + }; + + targetData.Texts.push(oneText); + } + + flash_encode(str) { + let retVal = encodeURIComponent(str); + retVal = retVal.replace("%C2%96", "-"); + retVal = retVal.replace("%C2%91", "%27"); + retVal = retVal.replace("%C2%92", "%27"); + retVal = retVal.replace("%C2%82", "%27"); + retVal = retVal.replace("%C2%93", "%22"); + retVal = retVal.replace("%C2%94", "%22"); + retVal = retVal.replace("%C2%84", "%22"); + retVal = retVal.replace("%C2%8B", "%C2%AB"); + retVal = retVal.replace("%C2%9B", "%C2%BB"); + + return retVal; + } + + clean() { + this.fontObj = null; + delete this.fontObj; + } +} + +// alias some functions to make (compiled) code shorter +const {round: mr, sin: ms, cos: mc, abs, sqrt} = Math; + +// precompute "00" to "FF" +const dec2hex = []; +for (let i = 0; i < 16; i++) { + for (let j = 0; j < 16; j++) { + dec2hex[i * 16 + j] = i.toString(16) + j.toString(16); + } +} + +function createMatrixIdentity() { + return [ + [1, 0, 0], + [0, 1, 0], + [0, 0, 1] + ]; +} + +function matrixMultiply(m1, m2) { + let result = createMatrixIdentity(); + + for (let x = 0; x < 3; x++) { + for (let y = 0; y < 3; y++) { + let sum = 0; + + for (let z = 0; z < 3; z++) { + sum += m1[x][z] * m2[z][y]; + } + + result[x][y] = sum; + } + } + return result; +} + +function copyState(o1, o2) { + o2.fillStyle = o1.fillStyle; + o2.lineCap = o1.lineCap; + o2.lineJoin = o1.lineJoin; + o2.lineWidth = o1.lineWidth; + o2.miterLimit = o1.miterLimit; + o2.shadowBlur = o1.shadowBlur; + o2.shadowColor = o1.shadowColor; + o2.shadowOffsetX = o1.shadowOffsetX; + o2.shadowOffsetY = o1.shadowOffsetY; + o2.strokeStyle = o1.strokeStyle; + o2.globalAlpha = o1.globalAlpha; + o2.arcScaleX_ = o1.arcScaleX_; + o2.arcScaleY_ = o1.arcScaleY_; + o2.lineScale_ = o1.lineScale_; + o2.dashArray = o1.dashArray; +} + +function processStyle(styleString) { + let str, alpha = 1; + + styleString = String(styleString); + if (styleString.substring(0, 3) == 'rgb') { + let start = styleString.indexOf('(', 3); + let end = styleString.indexOf(')', start + 1); + let guts = styleString.substring(start + 1, end).split(','); + + str = '#'; + for (let i = 0; i < 3; i++) { + str += dec2hex[Number(guts[i])]; + } + + if (guts.length == 4 && styleString.substring(3, 4) == 'a') { + alpha = guts[3]; + } + } else { + str = styleString; + } + + return {color:str, alpha:alpha}; +} + +// Helper function that takes the already fixed cordinates. +function bezierCurveToHelper(self, cp1, cp2, p) { + self.currentPath_.push({ + type:'bezierCurveTo', + cp1x:cp1.x, + cp1y:cp1.y, + cp2x:cp2.x, + cp2y:cp2.y, + x:p.x, + y:p.y + }); + self.currentX_ = p.x; + self.currentY_ = p.y; +} + +function matrixIsFinite(m) { + for (let j = 0; j < 3; j++) { + for (let k = 0; k < 2; k++) { + if (!isFinite(m[j][k]) || isNaN(m[j][k])) { + return false; + } + } + } + return true; +} + +function setM(ctx, m, updateLineScale) { + if (!matrixIsFinite(m)) { + return; + } + ctx.m_ = m; + + if (updateLineScale) { + // Get the line scale. + // Determinant of this.m_ means how much the area is enlarged by the + // transformation. So its square root can be used as a scale factor + // for width. + let det = m[0][0] * m[1][1] - m[0][1] * m[1][0]; + ctx.lineScale_ = sqrt(abs(det)); + } +} + +class CanvasPattern_ { + constructor() { + } +} + +// Gradient / Pattern Stubs +class CanvasGradient_ { + constructor(aType) { + this.type_ = aType; + this.x0_ = 0; + this.y0_ = 0; + this.r0_ = 0; + this.x1_ = 0; + this.y1_ = 0; + this.r1_ = 0; + this.colors_ = []; + } + addColorStop(aOffset, aColor) { + aColor = processStyle(aColor); + this.colors_.push({offset:aOffset, + color:aColor.color, + alpha:aColor.alpha}); + } +} + + +/** + * This class implements CanvasRenderingContext2D interface as described by + * the WHATWG. + * @param {HTMLElement} surfaceElement The element that the 2D context should + * be associated with + */ +class CanvasRenderingContext2D_ { + constructor(canvasTarget, scaledWidth, scaledHeight) { + this.m_ = createMatrixIdentity(); + + this.mStack_ = []; + this.aStack_ = []; + this.currentPath_ = []; + + // Canvas context properties + this.strokeStyle = '#000'; + this.fillStyle = '#000'; + + this.lineWidth = 1; + this.lineJoin = 'miter'; + this.lineCap = 'butt'; + this.dashArray = []; + this.miterLimit = 1; + this.globalAlpha = 1; + + if (!("HLines" in canvasTarget) || !Array.isArray(canvasTarget.HLines)) + canvasTarget.HLines = []; + if (!("VLines" in canvasTarget) || !Array.isArray(canvasTarget.VLines)) + canvasTarget.VLines = []; + if (!("Fills" in canvasTarget) || !Array.isArray(canvasTarget.Fills)) + canvasTarget.Fills = []; + if (!("Texts" in canvasTarget) || !Array.isArray(canvasTarget.Texts)) + canvasTarget.Texts = []; + + this.canvas = canvasTarget; + + this.width = scaledWidth; + this.height = scaledHeight; + + this.arcScaleX_ = 1; + this.arcScaleY_ = 1; + this.lineScale_ = 1; + + this.currentFont = null; + } + + //private helper methods + #drawPDFLine(p1, p2, lineWidth, color) { + let dashedLine = Array.isArray(this.dashArray) && (this.dashArray.length > 1); + let pL = new PDFLine(p1.x, p1.y, p2.x, p2.y, lineWidth, color, dashedLine); + pL.processLine(this.canvas); + } + + #drawPDFFill(cp, min, max, color) { + let width = max.x - min.x; + let height = max.y - min.y; + let pF = new PDFFill(cp.x, cp.y, width, height, color); + pF.processFill(this.canvas); + } + + #needRemoveRect(x, y, w, h) { + let retVal = (Math.abs(w - Math.abs(h)) < 1 && w < 13); + if (retVal) { + nodeUtil.p2jinfo("Skipped: tiny rect: w=" + w + ", h=" + h); + } + return retVal; + } + + getContext(ctxType) { + return (ctxType === "2d") ? this : null; + } + + setLineDash(lineDash) { + this.dashArray = lineDash; + } + + getLineDash() { + return this.dashArray; + } + + fillText(text, x, y, maxWidth, fontSize) { + if (!text || text.trim().length < 1) + return; + let p = this.getCoords_(x, y); + + let a = processStyle(this.fillStyle || this.strokeStyle); + let color = (!!a) ? a.color : '#000000'; + + this.currentFont.processText(p, text, maxWidth, color, fontSize, this.canvas, this.m_); + }; + + strokeText(text, x, y, maxWidth) { + //MQZ. 10/23/2012, yeah, no hollow text for now + this.fillText(text, x, y, maxWidth); + } + + measureText(text) { + console.warn("to be implemented: contextPrototype.measureText - ", text); + let chars = text.length || 1; + return {width: chars * (this.currentFont.spaceWidth || 5)}; + } + + setFont(fontObj) { + if ((!!this.currentFont) && typeof(this.currentFont.clean) === "function") { + this.currentFont.clean(); + this.currentFont = null; + } + + this.currentFont = new PDFFont(fontObj); + } + + clearRect() { + console.warn("to be implemented: contextPrototype.clearRect"); + } + + beginPath() { + // TODO: Branch current matrix so that save/restore has no effect + // as per safari docs. + this.currentPath_ = []; + } + + moveTo(aX, aY) { + let p = this.getCoords_(aX, aY); + this.currentPath_.push({type:'moveTo', x:p.x, y:p.y}); + this.currentX_ = p.x; + this.currentY_ = p.y; + } + + lineTo(aX, aY) { + let p = this.getCoords_(aX, aY); + this.currentPath_.push({type:'lineTo', x:p.x, y:p.y}); + + this.currentX_ = p.x; + this.currentY_ = p.y; + } + + bezierCurveTo(aCP1x, aCP1y, aCP2x, aCP2y, aX, aY) { + let p = this.getCoords_(aX, aY); + let cp1 = this.getCoords_(aCP1x, aCP1y); + let cp2 = this.getCoords_(aCP2x, aCP2y); + bezierCurveToHelper(this, cp1, cp2, p); + } + + quadraticCurveTo(aCPx, aCPy, aX, aY) { + // the following is lifted almost directly from + // http://developer.mozilla.org/en/docs/Canvas_tutorial:Drawing_shapes + + let cp = this.getCoords_(aCPx, aCPy); + let p = this.getCoords_(aX, aY); + + let cp1 = { + x:this.currentX_ + 2.0 / 3.0 * (cp.x - this.currentX_), + y:this.currentY_ + 2.0 / 3.0 * (cp.y - this.currentY_) + }; + let cp2 = { + x:cp1.x + (p.x - this.currentX_) / 3.0, + y:cp1.y + (p.y - this.currentY_) / 3.0 + }; + + bezierCurveToHelper(this, cp1, cp2, p); + } + + arc(aX, aY, aRadius, aStartAngle, aEndAngle, aClockwise) { + let arcType = aClockwise ? 'at' : 'wa'; + + let xStart = aX + mc(aStartAngle) * aRadius; + let yStart = aY + ms(aStartAngle) * aRadius; + + let xEnd = aX + mc(aEndAngle) * aRadius; + let yEnd = aY + ms(aEndAngle) * aRadius; + + // IE won't render arches drawn counter clockwise if xStart == xEnd. + if (xStart == xEnd && !aClockwise) { + xStart += 0.125; // Offset xStart by 1/80 of a pixel. Use something + // that can be represented in binary + } + + let p = this.getCoords_(aX, aY); + let pStart = this.getCoords_(xStart, yStart); + let pEnd = this.getCoords_(xEnd, yEnd); + + this.currentPath_.push({type:arcType, + x:p.x, + y:p.y, + radius:aRadius, + xStart:pStart.x, + yStart:pStart.y, + xEnd:pEnd.x, + yEnd:pEnd.y}); + } + + rect(aX, aY, aWidth, aHeight) { + if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { + return;//try to remove the rectangle behind radio buttons and checkboxes + } + + this.moveTo(aX, aY); + this.lineTo(aX + aWidth, aY); + this.lineTo(aX + aWidth, aY + aHeight); + this.lineTo(aX, aY + aHeight); + this.closePath(); + } + + strokeRect(aX, aY, aWidth, aHeight) { + if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { + return;//try to remove the rectangle behind radio buttons and checkboxes + } + + let oldPath = this.currentPath_; + this.beginPath(); + + this.moveTo(aX, aY); + this.lineTo(aX + aWidth, aY); + this.lineTo(aX + aWidth, aY + aHeight); + this.lineTo(aX, aY + aHeight); + this.closePath(); + this.stroke(); + + this.currentPath_ = oldPath; + } + + fillRect(aX, aY, aWidth, aHeight) { + if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { + return;//try to remove the rectangle behind radio buttons and checkboxes + } + + let oldPath = this.currentPath_; + this.beginPath(); + + this.moveTo(aX, aY); + this.lineTo(aX + aWidth, aY); + this.lineTo(aX + aWidth, aY + aHeight); + this.lineTo(aX, aY + aHeight); + this.closePath(); + this.fill(); + + this.currentPath_ = oldPath; + } + + createLinearGradient(aX0, aY0, aX1, aY1) { + let gradient = new CanvasGradient_('gradient'); + gradient.x0_ = aX0; + gradient.y0_ = aY0; + gradient.x1_ = aX1; + gradient.y1_ = aY1; + return gradient; + } + + createRadialGradient(aX0, aY0, aR0, aX1, aY1, aR1) { + let gradient = new CanvasGradient_('gradientradial'); + gradient.x0_ = aX0; + gradient.y0_ = aY0; + gradient.r0_ = aR0; + gradient.x1_ = aX1; + gradient.y1_ = aY1; + gradient.r1_ = aR1; + return gradient; + } + + drawImage(image, var_args) { + //MQZ. no image drawing support for now + } + + getImageData(x, y, w, h) { + //MQZ. returns empty data buffer for now + return { + width:w, + height:h, + data:new Uint8Array(w * h * 4) + }; + } + + stroke(aFill) { + if (this.currentPath_.length < 2) { + return; + } + + let a = processStyle(aFill ? this.fillStyle : this.strokeStyle); + let color = a.color; +// let opacity = a.alpha * this.globalAlpha; + let lineWidth = this.lineScale_ * this.lineWidth; + + let min = {x:null, y:null}; + let max = {x:null, y:null}; + + for (let i = 0; i < this.currentPath_.length; i++) { + let p = this.currentPath_[i]; + + switch (p.type) { + case 'moveTo': + break; + case 'lineTo': + if (!aFill) { //lines + if (i > 0) { + this.#drawPDFLine(this.currentPath_[i-1], p, lineWidth, color); + } + } + break; + case 'close': + if (!aFill) { //lines + if (i > 0) { + this.#drawPDFLine(this.currentPath_[i-1], this.currentPath_[0], lineWidth, color); + } + } + p = null; + break; + } + + // Figure out dimensions so we can set fills' coordinates correctly + if (aFill && p) { + if (min.x == null || p.x < min.x) { + min.x = p.x; + } + if (max.x == null || p.x > max.x) { + max.x = p.x; + } + if (min.y == null || p.y < min.y) { + min.y = p.y; + } + if (max.y == null || p.y > max.y) { + max.y = p.y; + } + } + } + + if (aFill) { //fill + this.#drawPDFFill(min, min, max, color); + } + } + + fill() { + this.stroke(true); + } + + closePath() { + this.currentPath_.push({type:'close'}); + } + + /** + * @private + */ + getCoords_ (aX, aY) { + let m = this.m_; + return { + x: (aX * m[0][0] + aY * m[1][0] + m[2][0]), + y: (aX * m[0][1] + aY * m[1][1] + m[2][1]) + }; + } + + save() { + let o = {}; + copyState(this, o); + this.aStack_.push(o); + this.mStack_.push(this.m_); + this.m_ = matrixMultiply(createMatrixIdentity(), this.m_); + } + + restore() { + copyState(this.aStack_.pop(), this); + this.m_ = this.mStack_.pop(); + } + + translate(aX, aY) { + let m1 = [ + [1, 0, 0], + [0, 1, 0], + [aX, aY, 1] + ]; + + setM(this, matrixMultiply(m1, this.m_), false); + } + + rotate(aRot) { + let c = mc(aRot); + let s = ms(aRot); + + let m1 = [ + [c, s, 0], + [-s, c, 0], + [0, 0, 1] + ]; + + setM(this, matrixMultiply(m1, this.m_), false); + } + + scale(aX, aY) { + this.arcScaleX_ *= aX; + this.arcScaleY_ *= aY; + let m1 = [ + [aX, 0, 0], + [0, aY, 0], + [0, 0, 1] + ]; + + setM(this, matrixMultiply(m1, this.m_), true); + } + + transform(m11, m12, m21, m22, dx, dy) { + let m1 = [ + [m11, m12, 0], + [m21, m22, 0], + [dx, dy, 1] + ]; + + setM(this, matrixMultiply(m1, this.m_), true); + } + + setTransform(m11, m12, m21, m22, dx, dy) { + let m = [ + [m11, m12, 0], + [m21, m22, 0], + [dx, dy, 1] + ]; + + setM(this, m, true); + } + + /******** STUBS ********/ + clip() { + // TODO: Implement + } + + arcTo() { + // TODO: Implement + } + + createPattern() { + return new CanvasPattern_(); + } +} + +const kFBANotOverridable = 0x00000400; // indicates the field is read only by the user +const kFBARequired = 0x00000010; // indicates the field is required +const kMinHeight = 20; + +class PDFField { + static tabIndex = 0; + + static isWidgetSupported(field) { + let retVal = false; + + switch(field.fieldType) { + case 'Tx': retVal = true; break; //text input + case 'Btn': + if (field.fieldFlags & 32768) { + field.fieldType = 'Rd'; //radio button + } + else if (field.fieldFlags & 65536) { + field.fieldType = 'Btn'; //push button + } + else { + field.fieldType = 'Cb'; //checkbox + } + retVal = true; + break; + case 'Ch': retVal = true; break; //drop down + case 'Sig': retVal = true; break; //signature + default: + nodeUtil.p2jwarn("Unsupported: field.fieldType of " + field.fieldType); + break; + } + + return retVal; + } + + static isFormElement(field) { + let retVal = false; + switch(field.subtype) { + case 'Widget': retVal = PDFField.isWidgetSupported(field); break; + default: + nodeUtil.p2jwarn("Unsupported: field.type of " + field.subtype); + break; + } + return retVal; + } + + // constructor + constructor(field, viewport, Fields, Boxsets) { + this.field = field; + this.viewport = viewport; + this.Fields = Fields; + this.Boxsets = Boxsets; + } + + // Normalize rectangle rect=[x1, y1, x2, y2] so that (x1,y1) < (x2,y2) + // For coordinate systems whose origin lies in the bottom-left, this + // means normalization to (BL,TR) ordering. For systems with origin in the + // top-left, this means (TL,BR) ordering. + static #normalizeRect(rect) { + const r = rect.slice(0); // clone rect + if (rect[0] > rect[2]) { + r[0] = rect[2]; + r[2] = rect[0]; + } + if (rect[1] > rect[3]) { + r[1] = rect[3]; + r[3] = rect[1]; + } + return r; + } + + #getFieldPosition(field) { + let viewPort = this.viewport; + let fieldRect = viewPort.convertToViewportRectangle(field.rect); + let rect = PDFField.#normalizeRect(fieldRect); + + let height = rect[3] - rect[1]; + if (field.fieldType === 'Tx') { + if (height > kMinHeight + 2) { + rect[1] += 2; + height -= 2; + } + } + else if (field.fieldType !== 'Ch') { //checkbox, radio button, and link button + rect[1] -= 3; + } + + height = (height >= kMinHeight) ? height : kMinHeight; + + return { + x: PDFUnit.toFormX(rect[0]), + y: PDFUnit.toFormY(rect[1]), + w: PDFUnit.toFormX(rect[2] - rect[0]), + h: PDFUnit.toFormY(height) + }; + } + + #getFieldBaseData(field) { + let attributeMask = 0; + //PDF Spec p.676 TABLE 8.70 Field flags common to all field types + if (field.fieldFlags & 0x00000001) { + attributeMask |= kFBANotOverridable; + } + if (field.fieldFlags & 0x00000002) { + attributeMask |= kFBARequired; + } + + let anData = { + id: { Id: field.fullName, EN: 0}, + TI: field.TI, + AM: attributeMask + }; + //PDF Spec p.675: add TU (AlternativeText) fields to provide accessibility info + if (field.alternativeText && field.alternativeText.length > 1) { + anData.TU = field.alternativeText; + } + + if (field.alternativeID && field.alternativeID.length > 1) { + anData.TM = field.alternativeID; + } + + return Object.assign(anData, this.#getFieldPosition(field)); + } + + #addAlpha(field) { + const anData = Object.assign({ + style: 48, + T: { + Name: field.TName || "alpha", + TypeInfo: {} + } + }, this.#getFieldBaseData(field)); + + if (field.MV) { //field attributes: arbitrary mask value + anData.MV = field.MV; + } + if (field.fieldValue) { + anData.V = field.fieldValue; //read-only field value, like "self-prepared" + } + + this.Fields.push(anData); + } + + #addCheckBox(box) { + const anData = Object.assign({ + style: 48, + T: { + Name: "box", + TypeInfo: {} + } + }, this.#getFieldBaseData(box)); + if(box.fieldValue) { + anData.checked = box.fieldValue !== 'Off'; + } + + this.Boxsets.push({boxes:[anData]}); + } + + #addRadioButton(box) { + const anData = Object.assign({ + style: 48, + T: { + Name: "box", + TypeInfo: {} + } + }, this.#getFieldBaseData(box)); + + anData.id.Id = box.value; + if ('checked' in box) { + anData.checked = box.checked; + } + + const rdGroup = this.Boxsets.filter(boxset => ('id' in boxset) && ('Id' in boxset.id) && (boxset.id.Id === box.fullName))[0]; + if ((!!rdGroup) && ('boxes' in rdGroup)) { + rdGroup.boxes.push(anData); + } + else { + this.Boxsets.push({boxes:[anData], id: { Id: box.fullName, EN: 0}}); + } + } + + #addLinkButton(field) { + const anData = Object.assign({ + style: 48, + T: { + Name: "link" + }, + FL: { + form: {Id: field.FL} + } + }, this.#getFieldBaseData(field)); + + this.Fields.push(anData); + } + + #addSelect(field) { + const anData = Object.assign({ + style: 48, + T: { + Name: "alpha", + TypeInfo: {} + } + }, this.#getFieldBaseData(field)); + + anData.w -= 0.5; //adjust combobox width + anData.PL = {V: [], D: []}; + field.value.forEach( (ele, idx) => { + if (Array.isArray(ele)) { + anData.PL.D.push(ele[0]); + anData.PL.V.push(ele[1]); + } else { + anData.PL.D.push(ele); + anData.PL.V.push(ele); + } + }); + + // add field value to the object + if (field.fieldValue) { + anData.V = field.fieldValue; + } + this.Fields.push(anData); + }; + + #addSignature(field) { + const anData = Object.assign({ + style: 48, + T: { + Name: "signature", + TypeInfo: {} + } + }, this.#getFieldBaseData(field)); + + if (field.Sig) { + anData.Sig = {}; + if (field.Sig.Name) anData.Sig.Name = field.Sig.Name; + if (field.Sig.M) anData.Sig.M = PDFUnit.dateToIso8601(field.Sig.M); + if (field.Sig.Location) anData.Sig.Location = field.Sig.Location; + if (field.Sig.Reason) anData.Sig.Reason = field.Sig.Reason; + if (field.Sig.ContactInfo) anData.Sig.ContactInfo = field.Sig.ContactInfo; + } + + this.Fields.push(anData); + } + + // public instance methods + processField() { + this.field.TI = PDFField.tabIndex++; + + switch(this.field.fieldType) { + case 'Tx': this.#addAlpha(this.field); break; + case 'Cb': this.#addCheckBox(this.field); break; + case 'Rd': this.#addRadioButton(this.field);break; + case 'Btn':this.#addLinkButton(this.field); break; + case 'Ch': this.#addSelect(this.field); break; + case 'Sig': this.#addSignature(this.field); break; + } + + this.clean(); + } + + clean() { + delete this.field; + delete this.viewport; + delete this.Fields; + delete this.Boxsets; + } + + //static public method to generate fieldsType object based on parser result + static getAllFieldsTypes(data) { + const isFieldReadOnly = field => { + return (field.AM & kFBANotOverridable) ? true : false; + }; + + const getFieldBase = field => { + return {id: field.id.Id, type: field.T.Name, calc: isFieldReadOnly(field), value: field.V || ""}; + }; + + let retVal = []; + data.Pages.forEach( page => { + page.Boxsets.forEach( boxsets => { + if (boxsets.boxes.length > 1) { //radio button + boxsets.boxes.forEach( box => { + retVal.push({id: boxsets.id.Id, type: "radio", calc: isFieldReadOnly(box), value: box.id.Id}); + }); + } + else { //checkbox + retVal.push(getFieldBase(boxsets.boxes[0])); + } + }); + + page.Fields.forEach(field => retVal.push(getFieldBase(field))); + + }); + return retVal; + } +} + +class PTIXmlParser { + xmlData = null; + ptiPageArray = []; + + // constructor + constructor() { + this.xmlData = null; + this.ptiPageArray = []; + } + + parseXml(filePath, callback) { + fs.readFile(filePath, 'utf8', (err, data) => { + if (err) { + callback(err); + } + else { + this.xmlData = data; + + var parser = new xmldom.DOMParser(); + var dom = parser.parseFromString(this.xmlData); + var root = dom.documentElement; + + var xmlFields = root.getElementsByTagName("field"); + var fields = []; + + for(var i=0;i preContent += fs.readFileSync(baseDir + fileName, 'utf8'), ""); +(function (globalScope = {}) { + (function (globalScope = {}) { + eval(_baseCode); + })(); + })(); + +////////////////////////////////start of helper classes +class PDFPageParser { + //static + static RenderingStates = { + INITIAL: 0, + RUNNING: 1, + PAUSED: 2, + FINISHED: 3 + }; + + //public + id = -1; + pdfPage = null; + ptiParser = null; + scale = 0; + viewport = null; + renderingState = -1; + + Fields = null; + Boxsets = null; + ctxCanvas = null; + + #_addField (field) { + if (!PDFField.isFormElement(field)) { + nodeUtil.p2jwarn("NOT valid form element", field); + return; + } + + const oneField = new PDFField(field, this.viewport, this.Fields, this.Boxsets); + oneField.processField(); + } + + // constructor + constructor(pdfPage, id, scale, ptiParser) { + // public, this instance copies + this.id = id; + this.pdfPage = pdfPage; + this.ptiParser = ptiParser; + + this.scale = scale || 1.0; + + //leave out the 2nd parameter in order to use page's default rotation (for both portrait and landscape form) + this.viewport = this.pdfPage.getViewport(this.scale); + + this.renderingState = PDFPageParser.RenderingStates.INITIAL; + + //form elements other than radio buttons and check boxes + this.Fields = []; + //form elements: radio buttons and check boxes + this.Boxsets = []; + this.ctxCanvas = {}; + } + + get width() { return PDFUnit.toFormX(this.viewport.width); } + get height() { return PDFUnit.toFormY(this.viewport.height); } + get HLines() { return this.ctxCanvas.HLines; } + get VLines() { return this.ctxCanvas.VLines; } + get Fills() { return this.ctxCanvas.Fills; } + get Texts() { return this.ctxCanvas.Texts; } + + destroy() { + this.pdfPage.destroy(); + this.pdfPage = null; + + this.ptiParser = null; + this.Fields = null; + this.Boxsets = null; + this.ctxCanvas = null; + } + + getPagePoint(x, y) { + return this.viewport.convertToPdfPoint(x, y); + } + + parsePage(callback, errorCallBack) { + if (this.renderingState !== PDFPageParser.RenderingStates.INITIAL) { + errorCallBack('Must be in new state before drawing'); + return; + } + + this.renderingState = PDFPageParser.RenderingStates.RUNNING; + + const canvas = createScratchCanvas(1, 1); + const ctx = canvas.getContext('2d'); + + function pageViewDrawCallback(error) { + this.renderingState = PDFPageParser.RenderingStates.FINISHED; + + if (error) { + console.error(error); + errorCallBack(`Error: Page ${this.id + 1}: ${error.message}`); + } + else { + if (this.ptiParser) { + const extraFields = this.ptiParser.getFields(parseInt(this.id) + 1); + extraFields.forEach( field => this.#_addField(field) ); + } + + this.ctxCanvas = ctx.canvas; + this.stats = this.pdfPage.stats; + + nodeUtil.p2jinfo(`Success: Page ${this.id + 1}`); + callback(); + } + } + + const renderContext = { + canvasContext:ctx, + viewport:this.viewport + }; + + this.pdfPage.render(renderContext).then( + data => { + this.pdfPage.getAnnotations().then( + fields => { + fields.forEach(field => this.#_addField(field)); + pageViewDrawCallback.call(this, null); + }, + err => errorCallBack("pdfPage.getAnnotations error:" + err)); + }, + err => pageViewDrawCallback.call(this, err) + ); + } +} + +////////////////////////////////Start of Node.js Module +class PDFJSClass extends events.EventEmitter { + pdfDocument = null; + pages = null; + rawTextContents = null; + + needRawText = null; + + // constructor + constructor(needRawText) { + super(); + + // public, this instance copies + this.pdfDocument = null; + this.pages = []; + this.rawTextContents = []; + + this.needRawText = needRawText; + } + + raiseErrorEvent(errMsg) { + console.error(errMsg); + process.nextTick( () => this.emit("pdfjs_parseDataError", errMsg)); + // this.emit("error", errMsg); + return errMsg; + } + + raiseReadyEvent(data) { + process.nextTick( () => this.emit("pdfjs_parseDataReady", data) ); + return data; + } + + + parsePDFData(arrayBuffer, password) { + this.pdfDocument = null; + + const parameters = {password: password, data: arrayBuffer}; + PDFJS.getDocument(parameters).then( + pdfDocument => this.load(pdfDocument, 1), + error => this.raiseErrorEvent(error) + ); + }; + + tryLoadFieldInfoXML(pdfFilePath) { + const _sufInfo = "_fieldInfo.xml"; + const fieldInfoXMLPath = pdfFilePath.replace(".pdf", _sufInfo); + if ((fieldInfoXMLPath.indexOf(_sufInfo) < 1) || (!fs.existsSync(fieldInfoXMLPath))) { + return; + } + nodeUtil.p2jinfo("About to load fieldInfo XML : " + fieldInfoXMLPath); + + this.ptiParser = new PTIXmlParser(); + this.ptiParser.parseXml(fieldInfoXMLPath, err => { + if (err) { + nodeUtil.p2jwarn("fieldInfo XML Error: " + JSON.stringify(err)); + this.ptiParser = null; + } + else { + nodeUtil.p2jinfo("fieldInfo XML loaded."); + } + }); + } + + load(pdfDocument, scale) { + this.pdfDocument = pdfDocument; + + return this.loadMetaData().then( + () => this.loadPages(), + error => this.raiseErrorEvent("loadMetaData error: " + error) + ); + } + + loadMetaData() { + return this.pdfDocument.getMetadata().then( + data => { + this.documentInfo = data.info; + this.metadata = data.metadata?.metadata ?? {}; + this.parseMetaData(); + }, + error => this.raiseErrorEvent("pdfDocument.getMetadata error: " + error) + ); + } + + parseMetaData() { + const meta = {Transcoder: _PARSER_SIG, Meta: {...this.documentInfo, Metadata: this.metadata}}; + this.raiseReadyEvent(meta); + this.emit("readable", meta); + } + + loadPages() { + const pagesCount = this.pdfDocument.numPages; + const pagePromises = []; + for (let i = 1; i <= pagesCount; i++) + pagePromises.push(this.pdfDocument.getPage(i)); + + const pagesPromise = PDFJS.Promise.all(pagePromises); + + nodeUtil.p2jinfo("PDF loaded. pagesCount = " + pagesCount); + + return pagesPromise.then( + promisedPages => this.parsePage(promisedPages, 0, 1.5), + error => this.raiseErrorEvent("pagesPromise error: " + error) + ); + } + + parsePage(promisedPages, id, scale) { + nodeUtil.p2jinfo("start to parse page:" + (id+1)); + + const pdfPage = promisedPages[id]; + const pageParser = new PDFPageParser(pdfPage, id, scale, this.ptiParser); + + function continueOnNextPage() { + nodeUtil.p2jinfo("complete parsing page:" + (id+1)); + if (id === (this.pdfDocument.numPages - 1) ) { + this.raiseReadyEvent({Pages:this.pages}); + //v1.1.2: signal end of parsed data with null + process.nextTick(() => this.raiseReadyEvent(null)); + this.emit("data", null); + } + else { + process.nextTick(() => this.parsePage(promisedPages, ++id, scale)); + } + } + + pageParser.parsePage( + data => { + const page = { + Width: pageParser.width, + Height: pageParser.height, + HLines: pageParser.HLines, + VLines: pageParser.VLines, + Fills: pageParser.Fills, + //needs to keep current default output format, text content will output to a separate file if '-c' command line argument is set + // Content:pdfPage.getTextContent(), + Texts: pageParser.Texts, + Fields: pageParser.Fields, + Boxsets: pageParser.Boxsets + }; + + this.pages.push(page); + this.emit("data", page); + + if (this.needRawText) { + pdfPage.getTextContent().then( + textContent => { + this.rawTextContents.push(textContent); + nodeUtil.p2jinfo("complete parsing raw text content:" + (id+1)); + continueOnNextPage.call(this); + }, + error => this.raiseErrorEvent("pdfPage.getTextContent error: " + error) + ); + } + else { + continueOnNextPage.call(this); + } + }, + errMsg => this.raiseErrorEvent(errMsg) + ); + } + + getRawTextContent() { + let retVal = ""; + if (!this.needRawText) + return retVal; + + this.rawTextContents.forEach( (textContent, index) => { + let prevText = null; + textContent.bidiTexts.forEach( (textObj, idx) => { + if (prevText) { + if (Math.abs(textObj.y - prevText.y) <= 9) { + prevText.str += textObj.str; + } + else { + retVal += prevText.str + "\r\n"; + prevText = textObj; + } + } + else { + prevText = textObj; + } + + }); + if (prevText) { + retVal += prevText.str; + } + retVal += "\r\n----------------Page (" + index + ") Break----------------\r\n"; + }); + + return retVal; + } + + getAllFieldsTypes() { + return PDFField.getAllFieldsTypes({Pages:this.pages || []}); + } + + getMergedTextBlocksIfNeeded() { + for (let p = 0; p < this.pages.length; p++) { + let prevText = null; + let page = this.pages[p]; + + page.Texts.sort(PDFFont.compareBlockPos); + page.Texts = page.Texts.filter( (t, j) => { + let isDup = (j > 0) && PDFFont.areDuplicateBlocks(page.Texts[j-1], t); + if (isDup) { + nodeUtil.p2jinfo("skipped: dup text block: " + decodeURIComponent(t.R[0].T)); + } + return !isDup; + }); + + for (let i = 0; i < page.Texts.length; i++) { + let text = page.Texts[i]; + + if (prevText) { + if (PDFFont.areAdjacentBlocks(prevText, text) && PDFFont.haveSameStyle(prevText, text)) { + let preT = decodeURIComponent(prevText.R[0].T); + let curT = decodeURIComponent(text.R[0].T); + + prevText.R[0].T += text.R[0].T; + prevText.w += text.w; + text.merged = true; + + let mergedText = decodeURIComponent(prevText.R[0].T); + nodeUtil.p2jinfo(`merged text block: ${preT} + ${curT} => ${mergedText}`); + prevText = null; //yeah, only merge two blocks for now + } + else { + prevText = text; + } + } + else { + prevText = text; + } + } + + page.Texts = page.Texts.filter( t => !t.merged); + } + + return {Pages:this.pages}; + } + + destroy() { + this.removeAllListeners(); + + if (this.pdfDocument) + this.pdfDocument.destroy(); + this.pdfDocument = null; + + this.pages = null; + this.rawTextContents = null; + } + +} + +class ParserStream extends stream.Transform { + static createContentStream(jsonObj) { + const rStream = new stream.Readable({objectMode: true}); + rStream.push(jsonObj); + rStream.push(null); + return rStream; + } + + static createOutputStream(outputPath, resolve, reject) { + const outputStream = fs.createWriteStream(outputPath); + outputStream.on('finish', () => resolve(outputPath)); + outputStream.on('error', err => reject(err) ); + return outputStream; + } + + #pdfParser = null; + #chunks = []; + #parsedData = {Pages:[]}; + #_flush_callback = null; + + constructor(pdfParser, options) { + super(options); + this.#pdfParser = pdfParser; + + this.#chunks = []; + + // this.#pdfParser.on("pdfParser_dataReady", evtData => { + // this.push(evtData); + // this.#_flush_callback(); + // this.emit('end', null); + // }); + this.#pdfParser.on("readable", meta => this.#parsedData = {...meta, Pages:[]}); + this.#pdfParser.on("data", page => { + if (!page) { + this.push(this.#parsedData); + this.#_flush_callback(); + } + else + this.#parsedData.Pages.push(page); + }); + } + + //implements transform stream + _transform(chunk, enc, callback) { + this.#chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk, enc)); + callback(); + } + + _flush(callback) { + this.#_flush_callback = callback; + this.#pdfParser.parseBuffer(Buffer.concat(this.#chunks)); + } + + _destroy() { + super.removeAllListeners(); + this.#pdfParser = null; + this.#chunks = []; + } +} + +class PDFParser extends events.EventEmitter { // inherit from event emitter + //public static + static get colorDict() {return kColors; } + static get fontFaceDict() { return kFontFaces; } + static get fontStyleDict() { return kFontStyles; } + + //private static + static #maxBinBufferCount = 10; + static #binBuffer = {}; + + //private + #password = ""; + + #context = null; // service context object, only used in Web Service project; null in command line + + #pdfFilePath = null; //current PDF file to load and parse, null means loading/parsing not started + #pdfFileMTime = null; // last time the current pdf was modified, used to recognize changes and ignore cache + #data = null; //if file read success, data is PDF content; if failed, data is "err" object + #PDFJS = null; //will be initialized in constructor + #processFieldInfoXML = false;//disable additional _fieldInfo.xml parsing and merging (do NOT set to true) + + // constructor + constructor(context, needRawText, password) { + //call constructor for super class + super(); + + // private + // service context object, only used in Web Service project; null in command line + this.#context = context; + + this.#pdfFilePath = null; //current PDF file to load and parse, null means loading/parsing not started + this.#pdfFileMTime = null; // last time the current pdf was modified, used to recognize changes and ignore cache + this.#data = null; //if file read success, data is PDF content; if failed, data is "err" object + this.#processFieldInfoXML = false;//disable additional _fieldInfo.xml parsing and merging (do NOT set to true) + + this.#PDFJS = new PDFJSClass(needRawText); + this.#password = password; + } + + //private methods, needs to invoked by [funcName].call(this, ...) + #onPDFJSParseDataReady(data) { + if (!data) { //v1.1.2: data===null means end of parsed data + nodeUtil.p2jinfo("PDF parsing completed."); + this.emit("pdfParser_dataReady", this.#data); + } + else { + this.#data = {...this.#data, ...data}; + } + } + + #onPDFJSParserDataError(err) { + this.#data = null; + this.emit("pdfParser_dataError", {"parserError": err}); + // this.emit("error", err); + } + + #startParsingPDF(buffer) { + this.#data = {}; + + this.#PDFJS.on("pdfjs_parseDataReady", data => this.#onPDFJSParseDataReady(data)); + this.#PDFJS.on("pdfjs_parseDataError", err => this.#onPDFJSParserDataError(err)); + + //v1.3.0 the following Readable Stream-like events are replacement for the top two custom events + this.#PDFJS.on("readable", meta => this.emit("readable", meta)); + this.#PDFJS.on("data", data => this.emit("data", data)); + this.#PDFJS.on("error", err => this.#onPDFJSParserDataError(err)); + + this.#PDFJS.parsePDFData(buffer || PDFParser.#binBuffer[this.binBufferKey], this.#password); + } + + #processBinaryCache() { + if (this.binBufferKey in PDFParser.#binBuffer) { + this.#startParsingPDF(); + return true; + } + + const allKeys = Object.keys(PDFParser.#binBuffer); + if (allKeys.length > PDFParser.#maxBinBufferCount) { + const idx = this.id % PDFParser.#maxBinBufferCount; + const key = allKeys[idx]; + PDFParser.#binBuffer[key] = null; + delete PDFParser.#binBuffer[key]; + + nodeUtil.p2jinfo("re-cycled cache for " + key); + } + + return false; + } + + //public getter + get data() { return this.#data; } + get binBufferKey() { return this.#pdfFilePath + this.#pdfFileMTime; } + + //public APIs + createParserStream() { + return new ParserStream(this, {objectMode: true, bufferSize: 64 * 1024}); + } + + async loadPDF(pdfFilePath, verbosity) { + nodeUtil.verbosity(verbosity || 0); + nodeUtil.p2jinfo("about to load PDF file " + pdfFilePath); + + this.#pdfFilePath = pdfFilePath; + + try { + this.#pdfFileMTime = fs.statSync(pdfFilePath).mtimeMs; + if (this.#processFieldInfoXML) { + this.#PDFJS.tryLoadFieldInfoXML(pdfFilePath); + } + + if (this.#processBinaryCache()) + return; + + PDFParser.#binBuffer[this.binBufferKey] = await promises.readFile(pdfFilePath); + nodeUtil.p2jinfo(`Load OK: ${pdfFilePath}`); + this.#startParsingPDF(); + } + catch(err) { + nodeUtil.p2jerror(`Load Failed: ${pdfFilePath} - ${err}`); + this.emit("pdfParser_dataError", err); + } + } + + // Introduce a way to directly process buffers without the need to write it to a temporary file + parseBuffer(pdfBuffer) { + this.#startParsingPDF(pdfBuffer); + } + + getRawTextContent() { return this.#PDFJS.getRawTextContent(); } + getRawTextContentStream() { return ParserStream.createContentStream(this.getRawTextContent()); } + + getAllFieldsTypes() { return this.#PDFJS.getAllFieldsTypes(); }; + getAllFieldsTypesStream() { return ParserStream.createContentStream(this.getAllFieldsTypes()); } + + getMergedTextBlocksIfNeeded() { return this.#PDFJS.getMergedTextBlocksIfNeeded(); } + getMergedTextBlocksStream() { return ParserStream.createContentStream(this.getMergedTextBlocksIfNeeded()) } + + destroy() { // invoked with stream transform process + super.removeAllListeners(); + + //context object will be set in Web Service project, but not in command line utility + if (this.#context) { + this.#context.destroy(); + this.#context = null; + } + + this.#pdfFilePath = null; + this.#pdfFileMTime = null; + this.#data = null; + this.#processFieldInfoXML = false;//disable additional _fieldInfo.xml parsing and merging (do NOT set to true) + + this.#PDFJS.destroy(); + this.#PDFJS = null; + } +} + +module.exports = PDFParser; diff --git a/pdfparser.mjs b/pdfparser.mjs new file mode 100644 index 00000000..d80caaf1 --- /dev/null +++ b/pdfparser.mjs @@ -0,0 +1,2196 @@ +import fs from 'fs'; +import nodeUtil from 'util'; +import { readFile } from 'fs/promises'; +import { EventEmitter } from 'events'; +import path from 'path'; +import { fileURLToPath } from 'url'; +import 'buffer'; +import { DOMParser } from '@xmldom/xmldom'; +import { Transform, Readable } from 'stream'; + +const kColors = [ + '#000000', // 0 + '#ffffff', // 1 + '#4c4c4c', // 2 + '#808080', // 3 + '#999999', // 4 + '#c0c0c0', // 5 + '#cccccc', // 6 + '#e5e5e5', // 7 + '#f2f2f2', // 8 + '#008000', // 9 + '#00ff00', // 10 + '#bfffa0', // 11 + '#ffd629', // 12 + '#ff99cc', // 13 + '#004080', // 14 + '#9fc0e1', // 15 + '#5580ff', // 16 + '#a9c9fa', // 17 + '#ff0080', // 18 + '#800080', // 19 + '#ffbfff', // 20 + '#e45b21', // 21 + '#ffbfaa', // 22 + '#008080', // 23 + '#ff0000', // 24 + '#fdc59f', // 25 + '#808000', // 26 + '#bfbf00', // 27 + '#824100', // 28 + '#007256', // 29 + '#008000', // 30 + '#000080', // Last + 1 + '#008080', // Last + 2 + '#800080', // Last + 3 + '#ff0000', // Last + 4 + '#0000ff', // Last + 5 + '#008000' // Last + 6 +]; + +const kFontFaces = [ + "quicktype,arial,helvetica,sans-serif", // 00 - QuickType - sans-serif variable font + "quicktype condensed,arial narrow,arial,helvetica,sans-serif", // 01 - QuickType Condensed - thin sans-serif variable font + "quicktypepi,quicktypeiipi", // 02 - QuickType Pi + "quicktype mono,courier new,courier,monospace", // 03 - QuickType Mono - san-serif fixed font + "ocr-a,courier new,courier,monospace", // 04 - OCR-A - OCR readable san-serif fixed font + "ocr b mt,courier new,courier,monospace" // 05 - OCR-B MT - OCR readable san-serif fixed font + ]; + + const kFontStyles = [ + // Face Size Bold Italic StyleID(Comment) + // ----- ---- ---- ----- ----------------- + [0, 6, 0, 0], //00 + [0, 8, 0, 0], //01 + [0, 10, 0, 0], //02 + [0, 12, 0, 0], //03 + [0, 14, 0, 0], //04 + [0, 18, 0, 0], //05 + [0, 6, 1, 0], //06 + [0, 8, 1, 0], //07 + [0, 10, 1, 0], //08 + [0, 12, 1, 0], //09 + [0, 14, 1, 0], //10 + [0, 18, 1, 0], //11 + [0, 6, 0, 1], //12 + [0, 8, 0, 1], //13 + [0, 10, 0, 1], //14 + [0, 12, 0, 1], //15 + [0, 14, 0, 1], //16 + [0, 18, 0, 1], //17 + [0, 6, 1, 1], //18 + [0, 8, 1, 1], //19 + [0, 10, 1, 1], //20 + [0, 12, 1, 1], //21 + [0, 14, 1, 1], //22 + [0, 18, 1, 1], //23 + [1, 6, 0, 0], //24 + [1, 8, 0, 0], //25 + [1, 10, 0, 0], //26 + [1, 12, 0, 0], //27 + [1, 14, 0, 0], //28 + [1, 18, 0, 0], //29 + [1, 6, 1, 0], //30 + [1, 8, 1, 0], //31 + [1, 10, 1, 0], //32 + [1, 12, 1, 0], //33 + [1, 14, 1, 0], //34 + [1, 18, 1, 0], //35 + [1, 6, 0, 1], //36 + [1, 8, 0, 1], //37 + [1, 10, 0, 1], //38 + [1, 12, 0, 1], //39 + [1, 14, 0, 1], //40 + [1, 18, 0, 1], //41 + [2, 8, 0, 0], //42 + [2, 10, 0, 0], //43 + [2, 12, 0, 0], //44 + [2, 14, 0, 0], //45 + [2, 18, 0, 0], //46 + [3, 8, 0, 0], //47 + [3, 10, 0, 0], //48 + [3, 12, 0, 0], //49 + [4, 12, 0, 0], //50 + [0, 9, 0, 0], //51 + [0, 9, 1, 0], //52 + [0, 9, 0, 1], //53 + [0, 9, 1, 1], //54 + [1, 9, 0, 0], //55 + [1, 9, 1, 0], //56 + [1, 9, 1, 1], //57 + [4, 10, 0, 0], //58 + [5, 10, 0, 0], //59 + [5, 12, 0, 0] //60 +]; + +const dpi = 96.0; +const gridXPerInch = 4.0; +const gridYPerInch = 4.0; + +const _pixelXPerGrid = dpi/gridXPerInch; +const _pixelYPerGrid = dpi/gridYPerInch; +const _pixelPerPoint = dpi/72; + +class PDFUnit { + static toFixedFloat(fNum) { + return parseFloat(fNum.toFixed(3)); + } + + static colorCount() { + return kColors.length; + } + + static toPixelX(formX) { + return Math.round(formX * _pixelXPerGrid); + } + + static toPixelY(formY) { + return Math.round(formY * _pixelYPerGrid); + } + + static pointToPixel(point) {// Point unit (1/72 an inch) to pixel units + return point * _pixelPerPoint; + } + + static getColorByIndex(clrId) { + return kColors[clrId]; + } + + static toFormPoint(viewportX, viewportY) { + return [(viewportX / _pixelXPerGrid), (viewportY / _pixelYPerGrid)]; + } + + static toFormX(viewportX) { + return PDFUnit.toFixedFloat(viewportX / _pixelXPerGrid); + } + + static toFormY(viewportY) { + return PDFUnit.toFixedFloat(viewportY / _pixelYPerGrid); + } + + static findColorIndex(color) { + if (color.length === 4) + color += "000"; + //MQZ. 07/29/2013: if color is not in dictionary, just return -1. The caller (pdffont, pdffill) will set the actual color + return kColors.indexOf(color); + } + + static dateToIso8601(date) { + // PDF spec p.160 + if (date.slice(0, 2) === 'D:') { // D: prefix is optional + date = date.slice(2); + } + let tz = 'Z'; + let idx = date.search(/[Z+-]/); // timezone is optional + if (idx >= 0) { + tz = date.slice(idx); + if (tz !== 'Z') { // timezone format OHH'mm' + tz = tz.slice(0, 3) + ':' + tz.slice(4, 6); + } + date = date.slice(0, idx); + } + let yr = date.slice(0, 4); // everything after year is optional + let mth = date.slice(4, 6) || '01'; + let day = date.slice(6, 8) || '01'; + let hr = date.slice(8, 10) || '00'; + let min = date.slice(10, 12) || '00'; + let sec = date.slice(12, 14) || '00'; + return yr + '-' + mth + '-' + day + 'T' + hr + ':' + min + ':' + sec + tz; + } +} + +class PDFLine { + constructor(x1, y1, x2, y2, lineWidth, color, dashed) { + this.x1 = x1; + this.y1 = y1; + this.x2 = x2; + this.y2 = y2; + this.lineWidth = lineWidth || 1.0; + this.color = color; + this.dashed = dashed; + } + + #setStartPoint(oneLine, x, y) { + oneLine.x = PDFUnit.toFormX(x); + oneLine.y = PDFUnit.toFormY(y); + } + + processLine(targetData) { + const xDelta = Math.abs(this.x2 - this.x1); + const yDelta = Math.abs(this.y2 - this.y1); + const minDelta = this.lineWidth; + + let oneLine = {x:0, y:0, w: PDFUnit.toFixedFloat(this.lineWidth), l:0}; + + //MQZ Aug.28.2013, adding color support, using color dictionary and default to black + const clrId = PDFUnit.findColorIndex(this.color); + const colorObj = (clrId > 0 && clrId < PDFUnit.colorCount()) ? {clr: clrId} : {oc: this.color}; + oneLine = {...oneLine, ...colorObj}; + + //MQZ Aug.29 dashed line support + if (this.dashed) { + oneLine = oneLine = {...oneLine, dsh: 1}; + } + + if ((yDelta < this.lineWidth) && (xDelta > minDelta)) { //HLine + if (this.lineWidth < 4 && (xDelta / this.lineWidth < 4)) { + nodeUtil.p2jinfo("Skipped: short thick HLine: lineWidth = " + this.lineWidth + ", xDelta = " + xDelta); + return; //skip short thick lines, like PA SPP lines behinds checkbox + } + + oneLine.l = PDFUnit.toFormX(xDelta); + if (this.x1 > this.x2) + this.#setStartPoint(oneLine, this.x2, this.y2); + else + this.#setStartPoint(oneLine, this.x1, this.y1); + targetData.HLines.push(oneLine); + } + else if ((xDelta < this.lineWidth) && (yDelta > minDelta)) {//VLine + if (this.lineWidth < 4 && (yDelta / this.lineWidth < 4)) { + nodeUtil.p2jinfo("Skipped: short thick VLine: lineWidth = " + this.lineWidth + ", yDelta = " + yDelta); + return; //skip short think lines, like PA SPP lines behinds checkbox + } + + oneLine.l = PDFUnit.toFormY(yDelta); + if (this.y1 > this.y2) + this.#setStartPoint(oneLine, this.x2, this.y2); + else + this.#setStartPoint(oneLine, this.x1, this.y1); + targetData.VLines.push(oneLine); + } + } +} + +class PDFFill{ + // constructor + constructor(x, y, width, height, color) { + this.x = x; + this.y = y; + this.width = width; + this.height = height; + this.color = color; + } + + processFill(targetData) { + //MQZ.07/29/2013: when color is not in color dictionary, set the original color (oc) + const clrId = PDFUnit.findColorIndex(this.color); + const colorObj = (clrId > 0 && clrId < PDFUnit.colorCount()) ? {clr: clrId} : {oc: this.color}; + + const oneFill = {x:PDFUnit.toFormX(this.x), + y:PDFUnit.toFormY(this.y), + w:PDFUnit.toFormX(this.width), + h:PDFUnit.toFormY(this.height), + ...colorObj}; + + + if (oneFill.w < 2 && oneFill.h < 2) { + nodeUtil.p2jinfo("Skipped: tiny fill: " + oneFill.w + " x " + oneFill.h); + return; //skip short thick lines, like PA SPP lines behinds checkbox + } + + targetData.Fills.push(oneFill); + } +} + +const _boldSubNames = ["bd", "bold", "demi", "black"]; +const _stdFonts = ["arial", "helvetica", "sans-serif ", "courier ","monospace ", "ocr "]; +const DISTANCE_DELTA = 0.1; + +class PDFFont { + #initTypeName() { + let typeName = (this.fontObj.name || this.fontObj.fallbackName); + if (!typeName) { + typeName = kFontFaces[0]; //default font family name + } + typeName = typeName.toLowerCase(); + return typeName; + } + + #initSubType() { + let subType = this.typeName; + let bold = false; + + let nameArray = this.typeName.split('+'); + if (Array.isArray(nameArray) && nameArray.length > 1) { + subType = nameArray[1].split("-"); + if (Array.isArray(subType) && subType.length > 1) { + let subName = subType[1].toLowerCase(); + bold = _boldSubNames.indexOf(subName) >= 0; + subType = subType[0]; + } + } + return {subType, bold}; + } + + #initSymbol() { + let isSymbol = this.typeName.indexOf("symbol") > 0 || kFontFaces[2].indexOf(this.subType) >= 0; + if (this.fontObj.isSymbolicFont) { + let mFonts = _stdFonts.filter( (oneName) => (this.typeName.indexOf(oneName) >= 0) ); + + if (mFonts.length > 0) { + this.fontObj.isSymbolicFont = false; //lots of Arial-based font is detected as symbol in VA forms (301, 76-c, etc.) reset the flag for now + nodeUtil.p2jinfo("Reset: isSymbolicFont (false) for " + this.fontObj.name); + } + } + else { + if (isSymbol) { + this.fontObj.isSymbolicFont = true; //text pdf: va_ind_760c + nodeUtil.p2jinfo("Reset: isSymbolicFont (true) for " + this.fontObj.name); + } + } + return isSymbol; + } + + #initSpaceWidth() { + let spaceWidth = this.fontObj.spaceWidth; + if (!spaceWidth) { + var spaceId = Array.isArray(this.fontObj.toFontChar) ? this.fontObj.toFontChar.indexOf(32) : -1; + spaceWidth = (spaceId >= 0 && Array.isArray(this.fontObj.widths)) ? this.fontObj.widths[spaceId] : 250; + } + spaceWidth = PDFUnit.toFormX(spaceWidth) / 32; + return spaceWidth; + } + + // constructor + constructor(fontObj) { + this.fontObj = fontObj; + + this.typeName = this.#initTypeName(); + + const {subType, bold} = this.#initSubType(); + this.subType = subType; + this.bold = bold; + + this.isSymbol = this.#initSymbol(); + this.spaceWidth = this.#initSpaceWidth(); + + this.fontSize = 1; + this.faceIdx = 0; + this.italic = false; + this.fontStyleId = -1; + } + + /** sort text blocks by y then x */ + static compareBlockPos(t1, t2) { + if (t1.y < t2.y - DISTANCE_DELTA) { + return -1; + } + if (Math.abs(t1.y - t2.y) <= DISTANCE_DELTA) { + if (t1.x < t2.x - DISTANCE_DELTA) { + return -1; + } + if (Math.abs(t1.x - t2.x) <= DISTANCE_DELTA) { + return 0; + } + } + return 1; + } + + static haveSameStyle(t1, t2) { + let retVal = t1.R[0].S === t2.R[0].S; + if (retVal && t1.R[0].S < 0) { + for (let i = 0; i < t1.R[0].TS.length; i++) { + if (t1.R[0].TS[i] !== t2.R[0].TS[i]) { + retVal = false; + break; + } + } + } + if (retVal) { // make sure both block are not rotated + retVal = (typeof t1.R[0].RA === 'undefined') && (typeof t2.R[0].RA === 'undefined'); + } + + return retVal; + } + + static getSpaceThreshHold(t1) { + return (PDFFont.getFontSize(t1)/12) * t1.sw; + } + + static areAdjacentBlocks(t1, t2) { + const isInSameLine = Math.abs(t1.y - t2.y) <= DISTANCE_DELTA; + const isDistanceSmallerThanASpace = ((t2.x - t1.x - t1.w) < PDFFont.getSpaceThreshHold(t1)); + + return isInSameLine && isDistanceSmallerThanASpace; + } + + static getFontSize(textBlock) { + const sId = textBlock.R[0].S; + return (sId < 0) ? textBlock.R[0].TS[1] : kFontStyles[sId][1]; + } + + static areDuplicateBlocks(t1, t2) { + return t1.x == t2.x && t1.y == t2.y && t1.R[0].T == t2.R[0].T && PDFFont.haveSameStyle(t1, t2); + } + + // private + #setFaceIndex() { + const fontObj = this.fontObj; + + this.bold = fontObj.bold; + if (!this.bold) { + this.bold = this.typeName.indexOf("bold") >= 0 || this.typeName.indexOf("black") >= 0; + } + this.italic = fontObj.italic; // fix https://github.com/modesty/pdf2json/issues/42 + // Extended the fix for https://github.com/modesty/pdf2json/issues/42 + if (!this.italic) { + this.italic = this.typeName.indexOf("italic") >= 0 || this.typeName.indexOf("oblique") >= 0; + } + // Added detection of hybrid dual bolditalic fonts + if (((!this.bold) || (!this.italic)) && (this.typeName.indexOf("boldobl") >= 0)) { + this.bold = true; + this.italic = true; + } + + let typeName = this.subType; + if (fontObj.isSerifFont) { + if (kFontFaces[1].indexOf(typeName) >= 0) + this.faceIdx = 1; + } + else if (kFontFaces[2].indexOf(this.subType) >= 0) { + this.faceIdx = 2; + } + else if (fontObj.isMonospace) { + this.faceIdx = 3; + + if (kFontFaces[4].indexOf(typeName) >= 0) + this.faceIdx = 4; + else if (kFontFaces[5].indexOf(typeName) >= 0) + this.faceIdx = 5; + } + else if (fontObj.isSymbolicFont) { + this.faceIdx = 2; + } + + if (this.faceIdx == 0) { + if (this.typeName.indexOf("narrow") > 0) + this.faceIdx = 1; + } + +// nodeUtil.p2jinfo"typeName = " + typeName + " => faceIdx = " + this.faceIdx); + } + + #getFontStyleIndex(fontSize) { + this.#setFaceIndex(); + + //MQZ Feb.28.2013. Adjust bold text fontsize to work around word spacing issue + this.fontSize = (this.bold && (fontSize > 12)) ? fontSize + 1 : fontSize; + + let fsa = [this.faceIdx, this.fontSize, this.bold?1:0, this.italic?1:0]; + let retVal = -1; + + kFontStyles.forEach(function(element, index, list){ + if (retVal === -1) { + if (element[0] === fsa[0] && element[1] === fsa[1] && + element[2] === fsa[2] && element[3] === fsa[3]) { + retVal = index; + } + } + }); + + return retVal; + } + + #processSymbolicFont(str) { + let retVal = str; + + if (!str || str.length !== 1) + return retVal; + + if (!this.fontObj.isSymbolicFont || !this.isSymbol) { + if (retVal == "C" || retVal == "G") { //prevent symbolic encoding from the client + retVal = " " + retVal + " "; //sample: va_ind_760c + } + return retVal; + } + + switch(str.charCodeAt(0)) { + case 20: retVal = '\u2713'; break; //check mark + case 70: retVal = (this.fontObj.type === "CIDFontType0") ? '\u26A0' : '\u007D'; break; //exclaimation in triangle OR right curly bracket + case 71: retVal = '\u25b6'; break; //right triangle + case 97: retVal = '\u25b6'; break; //right triangle + case 99: retVal = this.isSymbol ? '\u2022' : '\u25b2'; break; //up triangle. set to Bullet Dot for VA SchSCR + case 100: retVal = '\u25bc'; break; //down triangle + case 103: retVal = '\u27A8'; break; //right arrow. sample: va_ind_760pff and pmt + case 106: retVal = ''; break; //VA 301: string j character by the checkbox, hide it for now + case 114: retVal = '\u2022'; break; //Bullet dot + case 115: retVal = '\u25b2'; break; //up triangle + case 116: retVal = '\u2022'; break; //Bullet dot + case 118: retVal = '\u2022'; break; //Bullet dot + default: + nodeUtil.p2jinfo(this.fontObj.type + " - SymbolicFont - (" + this.fontObj.name + ") : " + + str.charCodeAt(0) + "::" + str.charCodeAt(1) + " => " + retVal); + } + + return retVal; + } + + #textRotationAngle(matrix2D) { + let retVal = 0; + if (matrix2D[0][0] === 0 && matrix2D[1][1] === 0) { + if (matrix2D[0][1] != 0 && matrix2D[1][0] != 0) { + if ((matrix2D[0][1] / matrix2D[1][0]) + 1 < 0.0001) + retVal = 90; + } + } + else if (matrix2D[0][0] !== 0 && matrix2D[1][1] !== 0) { + let r1 = Math.atan(-matrix2D[0][1] / matrix2D[0][0]); + let r2 = Math.atan(matrix2D[1][0] / matrix2D[1][1]); + if (Math.abs(r1) > 0.0001 && (r1 - r2 < 0.0001)) { + retVal = r1 * 180 / Math.PI; + } + } + return retVal; + } + + // public instance methods + processText(p, str, maxWidth, color, fontSize, targetData, matrix2D) { + const text = this.#processSymbolicFont(str); + if (!text) { + return; + } + this.fontStyleId = this.#getFontStyleIndex(fontSize); + + // when this.fontStyleId === -1, it means the text style doesn't match any entry in the dictionary + // adding TS to better describe text style [fontFaceId, fontSize, 1/0 for bold, 1/0 for italic]; + const TS = [this.faceIdx, this.fontSize, this.bold?1:0, this.italic?1:0]; + + const clrId = PDFUnit.findColorIndex(color); + const colorObj = (clrId >= 0 && clrId < PDFUnit.colorCount()) ? {clr: clrId} : {oc: color}; + + let textRun = { + T: this.flash_encode(text), + S: this.fontStyleId, + TS: TS + }; + const rAngle = this.#textRotationAngle(matrix2D); + if (rAngle != 0) { + nodeUtil.p2jinfo(str + ": rotated " + rAngle + " degree."); + textRun = {...textRun, RA: rAngle}; + } + + const oneText = {x: PDFUnit.toFormX(p.x) - 0.25, + y: PDFUnit.toFormY(p.y) - 0.75, + w: PDFUnit.toFixedFloat(maxWidth), + ...colorObj, //MQZ.07/29/2013: when color is not in color dictionary, set the original color (oc) + sw: this.spaceWidth, //font space width, use to merge adjacent text blocks + A: "left", + R: [textRun] + }; + + targetData.Texts.push(oneText); + } + + flash_encode(str) { + let retVal = encodeURIComponent(str); + retVal = retVal.replace("%C2%96", "-"); + retVal = retVal.replace("%C2%91", "%27"); + retVal = retVal.replace("%C2%92", "%27"); + retVal = retVal.replace("%C2%82", "%27"); + retVal = retVal.replace("%C2%93", "%22"); + retVal = retVal.replace("%C2%94", "%22"); + retVal = retVal.replace("%C2%84", "%22"); + retVal = retVal.replace("%C2%8B", "%C2%AB"); + retVal = retVal.replace("%C2%9B", "%C2%BB"); + + return retVal; + } + + clean() { + this.fontObj = null; + delete this.fontObj; + } +} + +// alias some functions to make (compiled) code shorter +const {round: mr, sin: ms, cos: mc, abs, sqrt} = Math; + +// precompute "00" to "FF" +const dec2hex = []; +for (let i = 0; i < 16; i++) { + for (let j = 0; j < 16; j++) { + dec2hex[i * 16 + j] = i.toString(16) + j.toString(16); + } +} + +function createMatrixIdentity() { + return [ + [1, 0, 0], + [0, 1, 0], + [0, 0, 1] + ]; +} + +function matrixMultiply(m1, m2) { + let result = createMatrixIdentity(); + + for (let x = 0; x < 3; x++) { + for (let y = 0; y < 3; y++) { + let sum = 0; + + for (let z = 0; z < 3; z++) { + sum += m1[x][z] * m2[z][y]; + } + + result[x][y] = sum; + } + } + return result; +} + +function copyState(o1, o2) { + o2.fillStyle = o1.fillStyle; + o2.lineCap = o1.lineCap; + o2.lineJoin = o1.lineJoin; + o2.lineWidth = o1.lineWidth; + o2.miterLimit = o1.miterLimit; + o2.shadowBlur = o1.shadowBlur; + o2.shadowColor = o1.shadowColor; + o2.shadowOffsetX = o1.shadowOffsetX; + o2.shadowOffsetY = o1.shadowOffsetY; + o2.strokeStyle = o1.strokeStyle; + o2.globalAlpha = o1.globalAlpha; + o2.arcScaleX_ = o1.arcScaleX_; + o2.arcScaleY_ = o1.arcScaleY_; + o2.lineScale_ = o1.lineScale_; + o2.dashArray = o1.dashArray; +} + +function processStyle(styleString) { + let str, alpha = 1; + + styleString = String(styleString); + if (styleString.substring(0, 3) == 'rgb') { + let start = styleString.indexOf('(', 3); + let end = styleString.indexOf(')', start + 1); + let guts = styleString.substring(start + 1, end).split(','); + + str = '#'; + for (let i = 0; i < 3; i++) { + str += dec2hex[Number(guts[i])]; + } + + if (guts.length == 4 && styleString.substring(3, 4) == 'a') { + alpha = guts[3]; + } + } else { + str = styleString; + } + + return {color:str, alpha:alpha}; +} + +// Helper function that takes the already fixed cordinates. +function bezierCurveToHelper(self, cp1, cp2, p) { + self.currentPath_.push({ + type:'bezierCurveTo', + cp1x:cp1.x, + cp1y:cp1.y, + cp2x:cp2.x, + cp2y:cp2.y, + x:p.x, + y:p.y + }); + self.currentX_ = p.x; + self.currentY_ = p.y; +} + +function matrixIsFinite(m) { + for (let j = 0; j < 3; j++) { + for (let k = 0; k < 2; k++) { + if (!isFinite(m[j][k]) || isNaN(m[j][k])) { + return false; + } + } + } + return true; +} + +function setM(ctx, m, updateLineScale) { + if (!matrixIsFinite(m)) { + return; + } + ctx.m_ = m; + + if (updateLineScale) { + // Get the line scale. + // Determinant of this.m_ means how much the area is enlarged by the + // transformation. So its square root can be used as a scale factor + // for width. + let det = m[0][0] * m[1][1] - m[0][1] * m[1][0]; + ctx.lineScale_ = sqrt(abs(det)); + } +} + +class CanvasPattern_ { + constructor() { + } +} + +// Gradient / Pattern Stubs +class CanvasGradient_ { + constructor(aType) { + this.type_ = aType; + this.x0_ = 0; + this.y0_ = 0; + this.r0_ = 0; + this.x1_ = 0; + this.y1_ = 0; + this.r1_ = 0; + this.colors_ = []; + } + addColorStop(aOffset, aColor) { + aColor = processStyle(aColor); + this.colors_.push({offset:aOffset, + color:aColor.color, + alpha:aColor.alpha}); + } +} + + +/** + * This class implements CanvasRenderingContext2D interface as described by + * the WHATWG. + * @param {HTMLElement} surfaceElement The element that the 2D context should + * be associated with + */ +class CanvasRenderingContext2D_ { + constructor(canvasTarget, scaledWidth, scaledHeight) { + this.m_ = createMatrixIdentity(); + + this.mStack_ = []; + this.aStack_ = []; + this.currentPath_ = []; + + // Canvas context properties + this.strokeStyle = '#000'; + this.fillStyle = '#000'; + + this.lineWidth = 1; + this.lineJoin = 'miter'; + this.lineCap = 'butt'; + this.dashArray = []; + this.miterLimit = 1; + this.globalAlpha = 1; + + if (!("HLines" in canvasTarget) || !Array.isArray(canvasTarget.HLines)) + canvasTarget.HLines = []; + if (!("VLines" in canvasTarget) || !Array.isArray(canvasTarget.VLines)) + canvasTarget.VLines = []; + if (!("Fills" in canvasTarget) || !Array.isArray(canvasTarget.Fills)) + canvasTarget.Fills = []; + if (!("Texts" in canvasTarget) || !Array.isArray(canvasTarget.Texts)) + canvasTarget.Texts = []; + + this.canvas = canvasTarget; + + this.width = scaledWidth; + this.height = scaledHeight; + + this.arcScaleX_ = 1; + this.arcScaleY_ = 1; + this.lineScale_ = 1; + + this.currentFont = null; + } + + //private helper methods + #drawPDFLine(p1, p2, lineWidth, color) { + let dashedLine = Array.isArray(this.dashArray) && (this.dashArray.length > 1); + let pL = new PDFLine(p1.x, p1.y, p2.x, p2.y, lineWidth, color, dashedLine); + pL.processLine(this.canvas); + } + + #drawPDFFill(cp, min, max, color) { + let width = max.x - min.x; + let height = max.y - min.y; + let pF = new PDFFill(cp.x, cp.y, width, height, color); + pF.processFill(this.canvas); + } + + #needRemoveRect(x, y, w, h) { + let retVal = (Math.abs(w - Math.abs(h)) < 1 && w < 13); + if (retVal) { + nodeUtil.p2jinfo("Skipped: tiny rect: w=" + w + ", h=" + h); + } + return retVal; + } + + getContext(ctxType) { + return (ctxType === "2d") ? this : null; + } + + setLineDash(lineDash) { + this.dashArray = lineDash; + } + + getLineDash() { + return this.dashArray; + } + + fillText(text, x, y, maxWidth, fontSize) { + if (!text || text.trim().length < 1) + return; + let p = this.getCoords_(x, y); + + let a = processStyle(this.fillStyle || this.strokeStyle); + let color = (!!a) ? a.color : '#000000'; + + this.currentFont.processText(p, text, maxWidth, color, fontSize, this.canvas, this.m_); + }; + + strokeText(text, x, y, maxWidth) { + //MQZ. 10/23/2012, yeah, no hollow text for now + this.fillText(text, x, y, maxWidth); + } + + measureText(text) { + console.warn("to be implemented: contextPrototype.measureText - ", text); + let chars = text.length || 1; + return {width: chars * (this.currentFont.spaceWidth || 5)}; + } + + setFont(fontObj) { + if ((!!this.currentFont) && typeof(this.currentFont.clean) === "function") { + this.currentFont.clean(); + this.currentFont = null; + } + + this.currentFont = new PDFFont(fontObj); + } + + clearRect() { + console.warn("to be implemented: contextPrototype.clearRect"); + } + + beginPath() { + // TODO: Branch current matrix so that save/restore has no effect + // as per safari docs. + this.currentPath_ = []; + } + + moveTo(aX, aY) { + let p = this.getCoords_(aX, aY); + this.currentPath_.push({type:'moveTo', x:p.x, y:p.y}); + this.currentX_ = p.x; + this.currentY_ = p.y; + } + + lineTo(aX, aY) { + let p = this.getCoords_(aX, aY); + this.currentPath_.push({type:'lineTo', x:p.x, y:p.y}); + + this.currentX_ = p.x; + this.currentY_ = p.y; + } + + bezierCurveTo(aCP1x, aCP1y, aCP2x, aCP2y, aX, aY) { + let p = this.getCoords_(aX, aY); + let cp1 = this.getCoords_(aCP1x, aCP1y); + let cp2 = this.getCoords_(aCP2x, aCP2y); + bezierCurveToHelper(this, cp1, cp2, p); + } + + quadraticCurveTo(aCPx, aCPy, aX, aY) { + // the following is lifted almost directly from + // http://developer.mozilla.org/en/docs/Canvas_tutorial:Drawing_shapes + + let cp = this.getCoords_(aCPx, aCPy); + let p = this.getCoords_(aX, aY); + + let cp1 = { + x:this.currentX_ + 2.0 / 3.0 * (cp.x - this.currentX_), + y:this.currentY_ + 2.0 / 3.0 * (cp.y - this.currentY_) + }; + let cp2 = { + x:cp1.x + (p.x - this.currentX_) / 3.0, + y:cp1.y + (p.y - this.currentY_) / 3.0 + }; + + bezierCurveToHelper(this, cp1, cp2, p); + } + + arc(aX, aY, aRadius, aStartAngle, aEndAngle, aClockwise) { + let arcType = aClockwise ? 'at' : 'wa'; + + let xStart = aX + mc(aStartAngle) * aRadius; + let yStart = aY + ms(aStartAngle) * aRadius; + + let xEnd = aX + mc(aEndAngle) * aRadius; + let yEnd = aY + ms(aEndAngle) * aRadius; + + // IE won't render arches drawn counter clockwise if xStart == xEnd. + if (xStart == xEnd && !aClockwise) { + xStart += 0.125; // Offset xStart by 1/80 of a pixel. Use something + // that can be represented in binary + } + + let p = this.getCoords_(aX, aY); + let pStart = this.getCoords_(xStart, yStart); + let pEnd = this.getCoords_(xEnd, yEnd); + + this.currentPath_.push({type:arcType, + x:p.x, + y:p.y, + radius:aRadius, + xStart:pStart.x, + yStart:pStart.y, + xEnd:pEnd.x, + yEnd:pEnd.y}); + } + + rect(aX, aY, aWidth, aHeight) { + if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { + return;//try to remove the rectangle behind radio buttons and checkboxes + } + + this.moveTo(aX, aY); + this.lineTo(aX + aWidth, aY); + this.lineTo(aX + aWidth, aY + aHeight); + this.lineTo(aX, aY + aHeight); + this.closePath(); + } + + strokeRect(aX, aY, aWidth, aHeight) { + if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { + return;//try to remove the rectangle behind radio buttons and checkboxes + } + + let oldPath = this.currentPath_; + this.beginPath(); + + this.moveTo(aX, aY); + this.lineTo(aX + aWidth, aY); + this.lineTo(aX + aWidth, aY + aHeight); + this.lineTo(aX, aY + aHeight); + this.closePath(); + this.stroke(); + + this.currentPath_ = oldPath; + } + + fillRect(aX, aY, aWidth, aHeight) { + if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { + return;//try to remove the rectangle behind radio buttons and checkboxes + } + + let oldPath = this.currentPath_; + this.beginPath(); + + this.moveTo(aX, aY); + this.lineTo(aX + aWidth, aY); + this.lineTo(aX + aWidth, aY + aHeight); + this.lineTo(aX, aY + aHeight); + this.closePath(); + this.fill(); + + this.currentPath_ = oldPath; + } + + createLinearGradient(aX0, aY0, aX1, aY1) { + let gradient = new CanvasGradient_('gradient'); + gradient.x0_ = aX0; + gradient.y0_ = aY0; + gradient.x1_ = aX1; + gradient.y1_ = aY1; + return gradient; + } + + createRadialGradient(aX0, aY0, aR0, aX1, aY1, aR1) { + let gradient = new CanvasGradient_('gradientradial'); + gradient.x0_ = aX0; + gradient.y0_ = aY0; + gradient.r0_ = aR0; + gradient.x1_ = aX1; + gradient.y1_ = aY1; + gradient.r1_ = aR1; + return gradient; + } + + drawImage(image, var_args) { + //MQZ. no image drawing support for now + } + + getImageData(x, y, w, h) { + //MQZ. returns empty data buffer for now + return { + width:w, + height:h, + data:new Uint8Array(w * h * 4) + }; + } + + stroke(aFill) { + if (this.currentPath_.length < 2) { + return; + } + + let a = processStyle(aFill ? this.fillStyle : this.strokeStyle); + let color = a.color; +// let opacity = a.alpha * this.globalAlpha; + let lineWidth = this.lineScale_ * this.lineWidth; + + let min = {x:null, y:null}; + let max = {x:null, y:null}; + + for (let i = 0; i < this.currentPath_.length; i++) { + let p = this.currentPath_[i]; + + switch (p.type) { + case 'moveTo': + break; + case 'lineTo': + if (!aFill) { //lines + if (i > 0) { + this.#drawPDFLine(this.currentPath_[i-1], p, lineWidth, color); + } + } + break; + case 'close': + if (!aFill) { //lines + if (i > 0) { + this.#drawPDFLine(this.currentPath_[i-1], this.currentPath_[0], lineWidth, color); + } + } + p = null; + break; + } + + // Figure out dimensions so we can set fills' coordinates correctly + if (aFill && p) { + if (min.x == null || p.x < min.x) { + min.x = p.x; + } + if (max.x == null || p.x > max.x) { + max.x = p.x; + } + if (min.y == null || p.y < min.y) { + min.y = p.y; + } + if (max.y == null || p.y > max.y) { + max.y = p.y; + } + } + } + + if (aFill) { //fill + this.#drawPDFFill(min, min, max, color); + } + } + + fill() { + this.stroke(true); + } + + closePath() { + this.currentPath_.push({type:'close'}); + } + + /** + * @private + */ + getCoords_ (aX, aY) { + let m = this.m_; + return { + x: (aX * m[0][0] + aY * m[1][0] + m[2][0]), + y: (aX * m[0][1] + aY * m[1][1] + m[2][1]) + }; + } + + save() { + let o = {}; + copyState(this, o); + this.aStack_.push(o); + this.mStack_.push(this.m_); + this.m_ = matrixMultiply(createMatrixIdentity(), this.m_); + } + + restore() { + copyState(this.aStack_.pop(), this); + this.m_ = this.mStack_.pop(); + } + + translate(aX, aY) { + let m1 = [ + [1, 0, 0], + [0, 1, 0], + [aX, aY, 1] + ]; + + setM(this, matrixMultiply(m1, this.m_), false); + } + + rotate(aRot) { + let c = mc(aRot); + let s = ms(aRot); + + let m1 = [ + [c, s, 0], + [-s, c, 0], + [0, 0, 1] + ]; + + setM(this, matrixMultiply(m1, this.m_), false); + } + + scale(aX, aY) { + this.arcScaleX_ *= aX; + this.arcScaleY_ *= aY; + let m1 = [ + [aX, 0, 0], + [0, aY, 0], + [0, 0, 1] + ]; + + setM(this, matrixMultiply(m1, this.m_), true); + } + + transform(m11, m12, m21, m22, dx, dy) { + let m1 = [ + [m11, m12, 0], + [m21, m22, 0], + [dx, dy, 1] + ]; + + setM(this, matrixMultiply(m1, this.m_), true); + } + + setTransform(m11, m12, m21, m22, dx, dy) { + let m = [ + [m11, m12, 0], + [m21, m22, 0], + [dx, dy, 1] + ]; + + setM(this, m, true); + } + + /******** STUBS ********/ + clip() { + // TODO: Implement + } + + arcTo() { + // TODO: Implement + } + + createPattern() { + return new CanvasPattern_(); + } +} + +const kFBANotOverridable = 0x00000400; // indicates the field is read only by the user +const kFBARequired = 0x00000010; // indicates the field is required +const kMinHeight = 20; + +class PDFField { + static tabIndex = 0; + + static isWidgetSupported(field) { + let retVal = false; + + switch(field.fieldType) { + case 'Tx': retVal = true; break; //text input + case 'Btn': + if (field.fieldFlags & 32768) { + field.fieldType = 'Rd'; //radio button + } + else if (field.fieldFlags & 65536) { + field.fieldType = 'Btn'; //push button + } + else { + field.fieldType = 'Cb'; //checkbox + } + retVal = true; + break; + case 'Ch': retVal = true; break; //drop down + case 'Sig': retVal = true; break; //signature + default: + nodeUtil.p2jwarn("Unsupported: field.fieldType of " + field.fieldType); + break; + } + + return retVal; + } + + static isFormElement(field) { + let retVal = false; + switch(field.subtype) { + case 'Widget': retVal = PDFField.isWidgetSupported(field); break; + default: + nodeUtil.p2jwarn("Unsupported: field.type of " + field.subtype); + break; + } + return retVal; + } + + // constructor + constructor(field, viewport, Fields, Boxsets) { + this.field = field; + this.viewport = viewport; + this.Fields = Fields; + this.Boxsets = Boxsets; + } + + // Normalize rectangle rect=[x1, y1, x2, y2] so that (x1,y1) < (x2,y2) + // For coordinate systems whose origin lies in the bottom-left, this + // means normalization to (BL,TR) ordering. For systems with origin in the + // top-left, this means (TL,BR) ordering. + static #normalizeRect(rect) { + const r = rect.slice(0); // clone rect + if (rect[0] > rect[2]) { + r[0] = rect[2]; + r[2] = rect[0]; + } + if (rect[1] > rect[3]) { + r[1] = rect[3]; + r[3] = rect[1]; + } + return r; + } + + #getFieldPosition(field) { + let viewPort = this.viewport; + let fieldRect = viewPort.convertToViewportRectangle(field.rect); + let rect = PDFField.#normalizeRect(fieldRect); + + let height = rect[3] - rect[1]; + if (field.fieldType === 'Tx') { + if (height > kMinHeight + 2) { + rect[1] += 2; + height -= 2; + } + } + else if (field.fieldType !== 'Ch') { //checkbox, radio button, and link button + rect[1] -= 3; + } + + height = (height >= kMinHeight) ? height : kMinHeight; + + return { + x: PDFUnit.toFormX(rect[0]), + y: PDFUnit.toFormY(rect[1]), + w: PDFUnit.toFormX(rect[2] - rect[0]), + h: PDFUnit.toFormY(height) + }; + } + + #getFieldBaseData(field) { + let attributeMask = 0; + //PDF Spec p.676 TABLE 8.70 Field flags common to all field types + if (field.fieldFlags & 0x00000001) { + attributeMask |= kFBANotOverridable; + } + if (field.fieldFlags & 0x00000002) { + attributeMask |= kFBARequired; + } + + let anData = { + id: { Id: field.fullName, EN: 0}, + TI: field.TI, + AM: attributeMask + }; + //PDF Spec p.675: add TU (AlternativeText) fields to provide accessibility info + if (field.alternativeText && field.alternativeText.length > 1) { + anData.TU = field.alternativeText; + } + + if (field.alternativeID && field.alternativeID.length > 1) { + anData.TM = field.alternativeID; + } + + return Object.assign(anData, this.#getFieldPosition(field)); + } + + #addAlpha(field) { + const anData = Object.assign({ + style: 48, + T: { + Name: field.TName || "alpha", + TypeInfo: {} + } + }, this.#getFieldBaseData(field)); + + if (field.MV) { //field attributes: arbitrary mask value + anData.MV = field.MV; + } + if (field.fieldValue) { + anData.V = field.fieldValue; //read-only field value, like "self-prepared" + } + + this.Fields.push(anData); + } + + #addCheckBox(box) { + const anData = Object.assign({ + style: 48, + T: { + Name: "box", + TypeInfo: {} + } + }, this.#getFieldBaseData(box)); + if(box.fieldValue) { + anData.checked = box.fieldValue !== 'Off'; + } + + this.Boxsets.push({boxes:[anData]}); + } + + #addRadioButton(box) { + const anData = Object.assign({ + style: 48, + T: { + Name: "box", + TypeInfo: {} + } + }, this.#getFieldBaseData(box)); + + anData.id.Id = box.value; + if ('checked' in box) { + anData.checked = box.checked; + } + + const rdGroup = this.Boxsets.filter(boxset => ('id' in boxset) && ('Id' in boxset.id) && (boxset.id.Id === box.fullName))[0]; + if ((!!rdGroup) && ('boxes' in rdGroup)) { + rdGroup.boxes.push(anData); + } + else { + this.Boxsets.push({boxes:[anData], id: { Id: box.fullName, EN: 0}}); + } + } + + #addLinkButton(field) { + const anData = Object.assign({ + style: 48, + T: { + Name: "link" + }, + FL: { + form: {Id: field.FL} + } + }, this.#getFieldBaseData(field)); + + this.Fields.push(anData); + } + + #addSelect(field) { + const anData = Object.assign({ + style: 48, + T: { + Name: "alpha", + TypeInfo: {} + } + }, this.#getFieldBaseData(field)); + + anData.w -= 0.5; //adjust combobox width + anData.PL = {V: [], D: []}; + field.value.forEach( (ele, idx) => { + if (Array.isArray(ele)) { + anData.PL.D.push(ele[0]); + anData.PL.V.push(ele[1]); + } else { + anData.PL.D.push(ele); + anData.PL.V.push(ele); + } + }); + + // add field value to the object + if (field.fieldValue) { + anData.V = field.fieldValue; + } + this.Fields.push(anData); + }; + + #addSignature(field) { + const anData = Object.assign({ + style: 48, + T: { + Name: "signature", + TypeInfo: {} + } + }, this.#getFieldBaseData(field)); + + if (field.Sig) { + anData.Sig = {}; + if (field.Sig.Name) anData.Sig.Name = field.Sig.Name; + if (field.Sig.M) anData.Sig.M = PDFUnit.dateToIso8601(field.Sig.M); + if (field.Sig.Location) anData.Sig.Location = field.Sig.Location; + if (field.Sig.Reason) anData.Sig.Reason = field.Sig.Reason; + if (field.Sig.ContactInfo) anData.Sig.ContactInfo = field.Sig.ContactInfo; + } + + this.Fields.push(anData); + } + + // public instance methods + processField() { + this.field.TI = PDFField.tabIndex++; + + switch(this.field.fieldType) { + case 'Tx': this.#addAlpha(this.field); break; + case 'Cb': this.#addCheckBox(this.field); break; + case 'Rd': this.#addRadioButton(this.field);break; + case 'Btn':this.#addLinkButton(this.field); break; + case 'Ch': this.#addSelect(this.field); break; + case 'Sig': this.#addSignature(this.field); break; + } + + this.clean(); + } + + clean() { + delete this.field; + delete this.viewport; + delete this.Fields; + delete this.Boxsets; + } + + //static public method to generate fieldsType object based on parser result + static getAllFieldsTypes(data) { + const isFieldReadOnly = field => { + return (field.AM & kFBANotOverridable) ? true : false; + }; + + const getFieldBase = field => { + return {id: field.id.Id, type: field.T.Name, calc: isFieldReadOnly(field), value: field.V || ""}; + }; + + let retVal = []; + data.Pages.forEach( page => { + page.Boxsets.forEach( boxsets => { + if (boxsets.boxes.length > 1) { //radio button + boxsets.boxes.forEach( box => { + retVal.push({id: boxsets.id.Id, type: "radio", calc: isFieldReadOnly(box), value: box.id.Id}); + }); + } + else { //checkbox + retVal.push(getFieldBase(boxsets.boxes[0])); + } + }); + + page.Fields.forEach(field => retVal.push(getFieldBase(field))); + + }); + return retVal; + } +} + +class PTIXmlParser { + xmlData = null; + ptiPageArray = []; + + // constructor + constructor() { + this.xmlData = null; + this.ptiPageArray = []; + } + + parseXml(filePath, callback) { + fs.readFile(filePath, 'utf8', (err, data) => { + if (err) { + callback(err); + } + else { + this.xmlData = data; + + var parser = new DOMParser(); + var dom = parser.parseFromString(this.xmlData); + var root = dom.documentElement; + + var xmlFields = root.getElementsByTagName("field"); + var fields = []; + + for(var i=0;i preContent += fs.readFileSync(baseDir + fileName, 'utf8'), ""); +eval(_baseCode); + +////////////////////////////////start of helper classes +class PDFPageParser { + //static + static RenderingStates = { + INITIAL: 0, + RUNNING: 1, + PAUSED: 2, + FINISHED: 3 + }; + + //public + id = -1; + pdfPage = null; + ptiParser = null; + scale = 0; + viewport = null; + renderingState = -1; + + Fields = null; + Boxsets = null; + ctxCanvas = null; + + #_addField (field) { + if (!PDFField.isFormElement(field)) { + nodeUtil.p2jwarn("NOT valid form element", field); + return; + } + + const oneField = new PDFField(field, this.viewport, this.Fields, this.Boxsets); + oneField.processField(); + } + + // constructor + constructor(pdfPage, id, scale, ptiParser) { + // public, this instance copies + this.id = id; + this.pdfPage = pdfPage; + this.ptiParser = ptiParser; + + this.scale = scale || 1.0; + + //leave out the 2nd parameter in order to use page's default rotation (for both portrait and landscape form) + this.viewport = this.pdfPage.getViewport(this.scale); + + this.renderingState = PDFPageParser.RenderingStates.INITIAL; + + //form elements other than radio buttons and check boxes + this.Fields = []; + //form elements: radio buttons and check boxes + this.Boxsets = []; + this.ctxCanvas = {}; + } + + get width() { return PDFUnit.toFormX(this.viewport.width); } + get height() { return PDFUnit.toFormY(this.viewport.height); } + get HLines() { return this.ctxCanvas.HLines; } + get VLines() { return this.ctxCanvas.VLines; } + get Fills() { return this.ctxCanvas.Fills; } + get Texts() { return this.ctxCanvas.Texts; } + + destroy() { + this.pdfPage.destroy(); + this.pdfPage = null; + + this.ptiParser = null; + this.Fields = null; + this.Boxsets = null; + this.ctxCanvas = null; + } + + getPagePoint(x, y) { + return this.viewport.convertToPdfPoint(x, y); + } + + parsePage(callback, errorCallBack) { + if (this.renderingState !== PDFPageParser.RenderingStates.INITIAL) { + errorCallBack('Must be in new state before drawing'); + return; + } + + this.renderingState = PDFPageParser.RenderingStates.RUNNING; + + const canvas = createScratchCanvas(1, 1); + const ctx = canvas.getContext('2d'); + + function pageViewDrawCallback(error) { + this.renderingState = PDFPageParser.RenderingStates.FINISHED; + + if (error) { + console.error(error); + errorCallBack(`Error: Page ${this.id + 1}: ${error.message}`); + } + else { + if (this.ptiParser) { + const extraFields = this.ptiParser.getFields(parseInt(this.id) + 1); + extraFields.forEach( field => this.#_addField(field) ); + } + + this.ctxCanvas = ctx.canvas; + this.stats = this.pdfPage.stats; + + nodeUtil.p2jinfo(`Success: Page ${this.id + 1}`); + callback(); + } + } + + const renderContext = { + canvasContext:ctx, + viewport:this.viewport + }; + + this.pdfPage.render(renderContext).then( + data => { + this.pdfPage.getAnnotations().then( + fields => { + fields.forEach(field => this.#_addField(field)); + pageViewDrawCallback.call(this, null); + }, + err => errorCallBack("pdfPage.getAnnotations error:" + err)); + }, + err => pageViewDrawCallback.call(this, err) + ); + } +} + +////////////////////////////////Start of Node.js Module +class PDFJSClass extends EventEmitter { + pdfDocument = null; + pages = null; + rawTextContents = null; + + needRawText = null; + + // constructor + constructor(needRawText) { + super(); + + // public, this instance copies + this.pdfDocument = null; + this.pages = []; + this.rawTextContents = []; + + this.needRawText = needRawText; + } + + raiseErrorEvent(errMsg) { + console.error(errMsg); + process.nextTick( () => this.emit("pdfjs_parseDataError", errMsg)); + // this.emit("error", errMsg); + return errMsg; + } + + raiseReadyEvent(data) { + process.nextTick( () => this.emit("pdfjs_parseDataReady", data) ); + return data; + } + + + parsePDFData(arrayBuffer, password) { + this.pdfDocument = null; + + const parameters = {password: password, data: arrayBuffer}; + PDFJS.getDocument(parameters).then( + pdfDocument => this.load(pdfDocument, 1), + error => this.raiseErrorEvent(error) + ); + }; + + tryLoadFieldInfoXML(pdfFilePath) { + const _sufInfo = "_fieldInfo.xml"; + const fieldInfoXMLPath = pdfFilePath.replace(".pdf", _sufInfo); + if ((fieldInfoXMLPath.indexOf(_sufInfo) < 1) || (!fs.existsSync(fieldInfoXMLPath))) { + return; + } + nodeUtil.p2jinfo("About to load fieldInfo XML : " + fieldInfoXMLPath); + + this.ptiParser = new PTIXmlParser(); + this.ptiParser.parseXml(fieldInfoXMLPath, err => { + if (err) { + nodeUtil.p2jwarn("fieldInfo XML Error: " + JSON.stringify(err)); + this.ptiParser = null; + } + else { + nodeUtil.p2jinfo("fieldInfo XML loaded."); + } + }); + } + + load(pdfDocument, scale) { + this.pdfDocument = pdfDocument; + + return this.loadMetaData().then( + () => this.loadPages(), + error => this.raiseErrorEvent("loadMetaData error: " + error) + ); + } + + loadMetaData() { + return this.pdfDocument.getMetadata().then( + data => { + this.documentInfo = data.info; + this.metadata = data.metadata?.metadata ?? {}; + this.parseMetaData(); + }, + error => this.raiseErrorEvent("pdfDocument.getMetadata error: " + error) + ); + } + + parseMetaData() { + const meta = {Transcoder: _PARSER_SIG, Meta: {...this.documentInfo, Metadata: this.metadata}}; + this.raiseReadyEvent(meta); + this.emit("readable", meta); + } + + loadPages() { + const pagesCount = this.pdfDocument.numPages; + const pagePromises = []; + for (let i = 1; i <= pagesCount; i++) + pagePromises.push(this.pdfDocument.getPage(i)); + + const pagesPromise = PDFJS.Promise.all(pagePromises); + + nodeUtil.p2jinfo("PDF loaded. pagesCount = " + pagesCount); + + return pagesPromise.then( + promisedPages => this.parsePage(promisedPages, 0, 1.5), + error => this.raiseErrorEvent("pagesPromise error: " + error) + ); + } + + parsePage(promisedPages, id, scale) { + nodeUtil.p2jinfo("start to parse page:" + (id+1)); + + const pdfPage = promisedPages[id]; + const pageParser = new PDFPageParser(pdfPage, id, scale, this.ptiParser); + + function continueOnNextPage() { + nodeUtil.p2jinfo("complete parsing page:" + (id+1)); + if (id === (this.pdfDocument.numPages - 1) ) { + this.raiseReadyEvent({Pages:this.pages}); + //v1.1.2: signal end of parsed data with null + process.nextTick(() => this.raiseReadyEvent(null)); + this.emit("data", null); + } + else { + process.nextTick(() => this.parsePage(promisedPages, ++id, scale)); + } + } + + pageParser.parsePage( + data => { + const page = { + Width: pageParser.width, + Height: pageParser.height, + HLines: pageParser.HLines, + VLines: pageParser.VLines, + Fills: pageParser.Fills, + //needs to keep current default output format, text content will output to a separate file if '-c' command line argument is set + // Content:pdfPage.getTextContent(), + Texts: pageParser.Texts, + Fields: pageParser.Fields, + Boxsets: pageParser.Boxsets + }; + + this.pages.push(page); + this.emit("data", page); + + if (this.needRawText) { + pdfPage.getTextContent().then( + textContent => { + this.rawTextContents.push(textContent); + nodeUtil.p2jinfo("complete parsing raw text content:" + (id+1)); + continueOnNextPage.call(this); + }, + error => this.raiseErrorEvent("pdfPage.getTextContent error: " + error) + ); + } + else { + continueOnNextPage.call(this); + } + }, + errMsg => this.raiseErrorEvent(errMsg) + ); + } + + getRawTextContent() { + let retVal = ""; + if (!this.needRawText) + return retVal; + + this.rawTextContents.forEach( (textContent, index) => { + let prevText = null; + textContent.bidiTexts.forEach( (textObj, idx) => { + if (prevText) { + if (Math.abs(textObj.y - prevText.y) <= 9) { + prevText.str += textObj.str; + } + else { + retVal += prevText.str + "\r\n"; + prevText = textObj; + } + } + else { + prevText = textObj; + } + + }); + if (prevText) { + retVal += prevText.str; + } + retVal += "\r\n----------------Page (" + index + ") Break----------------\r\n"; + }); + + return retVal; + } + + getAllFieldsTypes() { + return PDFField.getAllFieldsTypes({Pages:this.pages || []}); + } + + getMergedTextBlocksIfNeeded() { + for (let p = 0; p < this.pages.length; p++) { + let prevText = null; + let page = this.pages[p]; + + page.Texts.sort(PDFFont.compareBlockPos); + page.Texts = page.Texts.filter( (t, j) => { + let isDup = (j > 0) && PDFFont.areDuplicateBlocks(page.Texts[j-1], t); + if (isDup) { + nodeUtil.p2jinfo("skipped: dup text block: " + decodeURIComponent(t.R[0].T)); + } + return !isDup; + }); + + for (let i = 0; i < page.Texts.length; i++) { + let text = page.Texts[i]; + + if (prevText) { + if (PDFFont.areAdjacentBlocks(prevText, text) && PDFFont.haveSameStyle(prevText, text)) { + let preT = decodeURIComponent(prevText.R[0].T); + let curT = decodeURIComponent(text.R[0].T); + + prevText.R[0].T += text.R[0].T; + prevText.w += text.w; + text.merged = true; + + let mergedText = decodeURIComponent(prevText.R[0].T); + nodeUtil.p2jinfo(`merged text block: ${preT} + ${curT} => ${mergedText}`); + prevText = null; //yeah, only merge two blocks for now + } + else { + prevText = text; + } + } + else { + prevText = text; + } + } + + page.Texts = page.Texts.filter( t => !t.merged); + } + + return {Pages:this.pages}; + } + + destroy() { + this.removeAllListeners(); + + if (this.pdfDocument) + this.pdfDocument.destroy(); + this.pdfDocument = null; + + this.pages = null; + this.rawTextContents = null; + } + +} + +class ParserStream extends Transform { + static createContentStream(jsonObj) { + const rStream = new Readable({objectMode: true}); + rStream.push(jsonObj); + rStream.push(null); + return rStream; + } + + static createOutputStream(outputPath, resolve, reject) { + const outputStream = fs.createWriteStream(outputPath); + outputStream.on('finish', () => resolve(outputPath)); + outputStream.on('error', err => reject(err) ); + return outputStream; + } + + #pdfParser = null; + #chunks = []; + #parsedData = {Pages:[]}; + #_flush_callback = null; + + constructor(pdfParser, options) { + super(options); + this.#pdfParser = pdfParser; + + this.#chunks = []; + + // this.#pdfParser.on("pdfParser_dataReady", evtData => { + // this.push(evtData); + // this.#_flush_callback(); + // this.emit('end', null); + // }); + this.#pdfParser.on("readable", meta => this.#parsedData = {...meta, Pages:[]}); + this.#pdfParser.on("data", page => { + if (!page) { + this.push(this.#parsedData); + this.#_flush_callback(); + } + else + this.#parsedData.Pages.push(page); + }); + } + + //implements transform stream + _transform(chunk, enc, callback) { + this.#chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk, enc)); + callback(); + } + + _flush(callback) { + this.#_flush_callback = callback; + this.#pdfParser.parseBuffer(Buffer.concat(this.#chunks)); + } + + _destroy() { + super.removeAllListeners(); + this.#pdfParser = null; + this.#chunks = []; + } +} + +class PDFParser extends EventEmitter { // inherit from event emitter + //public static + static get colorDict() {return kColors; } + static get fontFaceDict() { return kFontFaces; } + static get fontStyleDict() { return kFontStyles; } + + //private static + static #maxBinBufferCount = 10; + static #binBuffer = {}; + + //private + #password = ""; + + #context = null; // service context object, only used in Web Service project; null in command line + + #pdfFilePath = null; //current PDF file to load and parse, null means loading/parsing not started + #pdfFileMTime = null; // last time the current pdf was modified, used to recognize changes and ignore cache + #data = null; //if file read success, data is PDF content; if failed, data is "err" object + #PDFJS = null; //will be initialized in constructor + #processFieldInfoXML = false;//disable additional _fieldInfo.xml parsing and merging (do NOT set to true) + + // constructor + constructor(context, needRawText, password) { + //call constructor for super class + super(); + + // private + // service context object, only used in Web Service project; null in command line + this.#context = context; + + this.#pdfFilePath = null; //current PDF file to load and parse, null means loading/parsing not started + this.#pdfFileMTime = null; // last time the current pdf was modified, used to recognize changes and ignore cache + this.#data = null; //if file read success, data is PDF content; if failed, data is "err" object + this.#processFieldInfoXML = false;//disable additional _fieldInfo.xml parsing and merging (do NOT set to true) + + this.#PDFJS = new PDFJSClass(needRawText); + this.#password = password; + } + + //private methods, needs to invoked by [funcName].call(this, ...) + #onPDFJSParseDataReady(data) { + if (!data) { //v1.1.2: data===null means end of parsed data + nodeUtil.p2jinfo("PDF parsing completed."); + this.emit("pdfParser_dataReady", this.#data); + } + else { + this.#data = {...this.#data, ...data}; + } + } + + #onPDFJSParserDataError(err) { + this.#data = null; + this.emit("pdfParser_dataError", {"parserError": err}); + // this.emit("error", err); + } + + #startParsingPDF(buffer) { + this.#data = {}; + + this.#PDFJS.on("pdfjs_parseDataReady", data => this.#onPDFJSParseDataReady(data)); + this.#PDFJS.on("pdfjs_parseDataError", err => this.#onPDFJSParserDataError(err)); + + //v1.3.0 the following Readable Stream-like events are replacement for the top two custom events + this.#PDFJS.on("readable", meta => this.emit("readable", meta)); + this.#PDFJS.on("data", data => this.emit("data", data)); + this.#PDFJS.on("error", err => this.#onPDFJSParserDataError(err)); + + this.#PDFJS.parsePDFData(buffer || PDFParser.#binBuffer[this.binBufferKey], this.#password); + } + + #processBinaryCache() { + if (this.binBufferKey in PDFParser.#binBuffer) { + this.#startParsingPDF(); + return true; + } + + const allKeys = Object.keys(PDFParser.#binBuffer); + if (allKeys.length > PDFParser.#maxBinBufferCount) { + const idx = this.id % PDFParser.#maxBinBufferCount; + const key = allKeys[idx]; + PDFParser.#binBuffer[key] = null; + delete PDFParser.#binBuffer[key]; + + nodeUtil.p2jinfo("re-cycled cache for " + key); + } + + return false; + } + + //public getter + get data() { return this.#data; } + get binBufferKey() { return this.#pdfFilePath + this.#pdfFileMTime; } + + //public APIs + createParserStream() { + return new ParserStream(this, {objectMode: true, bufferSize: 64 * 1024}); + } + + async loadPDF(pdfFilePath, verbosity) { + nodeUtil.verbosity(verbosity || 0); + nodeUtil.p2jinfo("about to load PDF file " + pdfFilePath); + + this.#pdfFilePath = pdfFilePath; + + try { + this.#pdfFileMTime = fs.statSync(pdfFilePath).mtimeMs; + if (this.#processFieldInfoXML) { + this.#PDFJS.tryLoadFieldInfoXML(pdfFilePath); + } + + if (this.#processBinaryCache()) + return; + + PDFParser.#binBuffer[this.binBufferKey] = await readFile(pdfFilePath); + nodeUtil.p2jinfo(`Load OK: ${pdfFilePath}`); + this.#startParsingPDF(); + } + catch(err) { + nodeUtil.p2jerror(`Load Failed: ${pdfFilePath} - ${err}`); + this.emit("pdfParser_dataError", err); + } + } + + // Introduce a way to directly process buffers without the need to write it to a temporary file + parseBuffer(pdfBuffer) { + this.#startParsingPDF(pdfBuffer); + } + + getRawTextContent() { return this.#PDFJS.getRawTextContent(); } + getRawTextContentStream() { return ParserStream.createContentStream(this.getRawTextContent()); } + + getAllFieldsTypes() { return this.#PDFJS.getAllFieldsTypes(); }; + getAllFieldsTypesStream() { return ParserStream.createContentStream(this.getAllFieldsTypes()); } + + getMergedTextBlocksIfNeeded() { return this.#PDFJS.getMergedTextBlocksIfNeeded(); } + getMergedTextBlocksStream() { return ParserStream.createContentStream(this.getMergedTextBlocksIfNeeded()) } + + destroy() { // invoked with stream transform process + super.removeAllListeners(); + + //context object will be set in Web Service project, but not in command line utility + if (this.#context) { + this.#context.destroy(); + this.#context = null; + } + + this.#pdfFilePath = null; + this.#pdfFileMTime = null; + this.#data = null; + this.#processFieldInfoXML = false;//disable additional _fieldInfo.xml parsing and merging (do NOT set to true) + + this.#PDFJS.destroy(); + this.#PDFJS = null; + } +} + +export { PDFParser as default }; diff --git a/rollup.config.js b/rollup.config.js new file mode 100644 index 00000000..7ed65e6e --- /dev/null +++ b/rollup.config.js @@ -0,0 +1,64 @@ +import replace from '@rollup/plugin-replace'; +import builtins from 'rollup-plugin-node-builtins'; + +export default [ + { + input: './pdfparser.js', + external: [ + 'fs', + 'util', + 'fs/promises', + 'events', + 'path', + 'url', + 'buffer', + '@xmldom/xmldom', + 'stream', + ], + output: { + file: 'pdfparser.cjs', + format: 'cjs', + name: 'pdfparser', + exports: 'default', + }, + plugins: [ + replace({ + '../base': '/base/', + delimiters: ['/', '/'], + }), + replace({ + 'eval(_baseCode);': `(function (globalScope = {}) { + eval(_baseCode); + })();`, + delimiters: ['', ''], + preventAssignment: false, + }), + builtins(), + ], + }, + { + input: './pdfparser.js', + external: [ + 'fs', + 'util', + 'fs/promises', + 'events', + 'path', + 'url', + 'buffer', + '@xmldom/xmldom', + 'stream', + ], + output: { + file: 'pdfparser.mjs', + format: 'es', + }, + plugins: [ + replace({ + '../base': '/base/', + delimiters: ['/', '/'], + }), + builtins(), + ], + }, +]; diff --git a/test/index.js b/test/index.js index 5ea65cde..384f6fc0 100644 --- a/test/index.js +++ b/test/index.js @@ -2,175 +2,216 @@ //12 test cases should be honored var vows = require('vows'), - assert = require('assert'), - fs = require('fs') - nodeEvents = require("events"), - _ = require('lodash'), - PFParser = require("../pdfparser"); - -var suite = vows.describe('PDF Node Parser'); - -function pdfParserPromise(fileName, fromBuffer) { - var promise = new(nodeEvents.EventEmitter); - - var pdfParser = new PFParser(); - - pdfParser.on("pdfParser_dataReady", function(evtData) { - if ((!!evtData) && (!!evtData.data)) { - promise.emit('success', evtData.data); - } - else { - promise.emit('error', new Error()); + assert = require('assert'), + fs = require('fs'); +(nodeEvents = require('events')), + (_ = require('lodash')), + (PdfParser_js = require('../pdfparser.js')); +PdfParser_cjs = require('../dist/pdfparser.cjs'); +PdfParser_mjs = require('../dist/pdfparser.mjs'); + +testSuite(PdfParser_js); +testSuite(PdfParser_cjs); +testSuite(PdfParser_mjs); + +function testSuite(PFParser) { + var suite = vows.describe('PDF Node Parser'); + + function pdfParserPromise(fileName, fromBuffer) { + var promise = new nodeEvents.EventEmitter(); + + var pdfParser = new PFParser(); + + pdfParser.on('pdfParser_dataReady', function (evtData) { + if (!!evtData && !!evtData.data) { + promise.emit('success', evtData.data); + } else { + promise.emit('error', new Error()); } - }); + }); - pdfParser.on("pdfParser_dataError", function(evtData) { + pdfParser.on('pdfParser_dataError', function (evtData) { promise.emit('error', evtData.data); - }); - - var pdfFilePath = __dirname + "/data/" + fileName + ".pdf"; - if (fromBuffer) { - pdf = fs.readFileSync(pdfFilePath); - pdfParser.parseBuffer(pdf); - } else { - pdfParser.loadPDF(pdfFilePath); - } - - return promise; + }); + + var pdfFilePath = __dirname + '/data/' + fileName + '.pdf'; + if (fromBuffer) { + pdf = fs.readFileSync(pdfFilePath); + pdfParser.parseBuffer(pdf); + } else { + pdfParser.loadPDF(pdfFilePath); + } + + return promise; + } + + function checkResult_parseStatus(err, stat, fileName) { + assert(_.isNull(err) || _.isUndefined(err), fileName + ' has errors!'); // We have no error + assert( + _.isObject(stat), + fileName + ' parsing result should be JS object' + ); // We have a stat object + } + + function checkResult_mainFields(parsedData, fileName) { + assert( + _.has(parsedData, 'Agency'), + fileName + " parsing error: doesn't have Agency object" + ); + assert( + _.has(parsedData, 'Id'), + fileName + " parsing error: doesn't have Id object" + ); + assert( + _.has(parsedData, 'Pages'), + fileName + " parsing error: doesn't have Pages object" + ); + assert( + _.has(parsedData, 'Width'), + fileName + " parsing error: doesn't have Width object" + ); + } + + function checkResult_pageCount(Pages, count, fileName) { + assert( + _.isArray(Pages), + fileName + " parsing error: doesn't have Pages array" + ); + assert( + Pages.length === count, + fileName + ' parsing error: Pages array is empty' + ); + } + + function checkResult_pageContent(Pages, fileName) { + _.each(Pages, function (page, index, list) { + assert( + _.has(page, 'Height'), + fileName + ' page ' + index + " : doesn't have Height field" + ); + assert( + _.has(page, 'HLines'), + fileName + ' page ' + index + " : doesn't have HLines object" + ); + assert( + _.has(page, 'VLines'), + fileName + ' page ' + index + " : doesn't have VLines object" + ); + assert( + _.has(page, 'Fills'), + fileName + ' page ' + index + " : doesn't have Fills object" + ); + assert( + _.has(page, 'Texts'), + fileName + ' page ' + index + " : doesn't have Texts object" + ); + }); + } + + suite.addBatch({ + 'Parse 1040ez first (from file)': { + topic: function () { + return pdfParserPromise('f1040ez', false); + }, + 'has parsing data': function (err, stat) { + checkResult_parseStatus(err, stat, 'f1040ez'); + }, + 'has four main objects': function (err, stat) { + checkResult_mainFields(stat, 'f1040ez'); + }, + 'has pages': function (err, stat) { + checkResult_pageCount(stat.Pages, 1, 'f1040ez'); + }, + 'has page elements': function (err, stat) { + checkResult_pageContent(stat.Pages, 'f1040ez'); + }, + }, + 'Parse 1040ez first (from buffer)': { + topic: function () { + return pdfParserPromise('f1040ez', true); + }, + 'has parsing data': function (err, stat) { + checkResult_parseStatus(err, stat, 'f1040ez'); + }, + 'has four main objects': function (err, stat) { + checkResult_mainFields(stat, 'f1040ez'); + }, + 'has pages': function (err, stat) { + checkResult_pageCount(stat.Pages, 1, 'f1040ez'); + }, + 'has page elements': function (err, stat) { + checkResult_pageContent(stat.Pages, 'f1040ez'); + }, + }, + 'Parse 1040a (from file)': { + topic: function () { + return pdfParserPromise('f1040a', false); + }, + 'has parsing data': function (err, stat) { + checkResult_parseStatus(err, stat, 'f1040a'); + }, + 'has four main objects': function (err, stat) { + checkResult_mainFields(stat, 'f1040a'); + }, + 'has pages': function (err, stat) { + checkResult_pageCount(stat.Pages, 2, 'f1040a'); + }, + 'has page elements': function (err, stat) { + checkResult_pageContent(stat.Pages, 'f1040a'); + }, + }, + 'Parse 1040a (from buffer)': { + topic: function () { + return pdfParserPromise('f1040a', true); + }, + 'has parsing data': function (err, stat) { + checkResult_parseStatus(err, stat, 'f1040a'); + }, + 'has four main objects': function (err, stat) { + checkResult_mainFields(stat, 'f1040a'); + }, + 'has pages': function (err, stat) { + checkResult_pageCount(stat.Pages, 2, 'f1040a'); + }, + 'has page elements': function (err, stat) { + checkResult_pageContent(stat.Pages, 'f1040a'); + }, + }, + 'Parse 1040 (from file)': { + topic: function () { + return pdfParserPromise('f1040', false); + }, + 'has parsing data': function (err, stat) { + checkResult_parseStatus(err, stat, 'f1040'); + }, + 'has four main objects': function (err, stat) { + checkResult_mainFields(stat, 'f1040'); + }, + 'has pages': function (err, stat) { + checkResult_pageCount(stat.Pages, 2, 'f1040'); + }, + 'has page elements': function (err, stat) { + checkResult_pageContent(stat.Pages, 'f1040'); + }, + }, + 'Parse 1040 (from buffer)': { + topic: function () { + return pdfParserPromise('f1040', true); + }, + 'has parsing data': function (err, stat) { + checkResult_parseStatus(err, stat, 'f1040'); + }, + 'has four main objects': function (err, stat) { + checkResult_mainFields(stat, 'f1040'); + }, + 'has pages': function (err, stat) { + checkResult_pageCount(stat.Pages, 2, 'f1040'); + }, + 'has page elements': function (err, stat) { + checkResult_pageContent(stat.Pages, 'f1040'); + }, + }, + }); + + suite.run(); } - -function checkResult_parseStatus(err, stat, fileName) { - assert(_.isNull(err) || _.isUndefined(err), fileName + " has errors!"); // We have no error - assert(_.isObject(stat), fileName + " parsing result should be JS object");// We have a stat object -} - -function checkResult_mainFields(parsedData, fileName) { - assert(_.has(parsedData, "Agency"), fileName + " parsing error: doesn't have Agency object"); - assert(_.has(parsedData, "Id"), fileName + " parsing error: doesn't have Id object"); - assert(_.has(parsedData, "Pages"), fileName + " parsing error: doesn't have Pages object"); - assert(_.has(parsedData, "Width"), fileName + " parsing error: doesn't have Width object"); -} - -function checkResult_pageCount(Pages, count, fileName) { - assert(_.isArray(Pages), fileName + " parsing error: doesn't have Pages array"); - assert(Pages.length === count, fileName + " parsing error: Pages array is empty"); -} - -function checkResult_pageContent(Pages, fileName) { - _.each(Pages, function(page, index, list) { - assert(_.has(page, "Height"), fileName + " page " + index + " : doesn't have Height field"); - assert(_.has(page, "HLines"), fileName + " page " + index + " : doesn't have HLines object"); - assert(_.has(page, "VLines"), fileName + " page " + index + " : doesn't have VLines object"); - assert(_.has(page, "Fills"), fileName + " page " + index + " : doesn't have Fills object"); - assert(_.has(page, "Texts"), fileName + " page " + index + " : doesn't have Texts object"); - }); -} - - -suite.addBatch({ - 'Parse 1040ez first (from file)':{ - topic:function () { - return pdfParserPromise("f1040ez", false); - }, - 'has parsing data':function (err, stat) { - checkResult_parseStatus(err, stat, "f1040ez"); - }, - 'has four main objects': function(err, stat) { - checkResult_mainFields(stat, "f1040ez"); - }, - 'has pages': function(err, stat) { - checkResult_pageCount(stat.Pages, 1, "f1040ez"); - }, - 'has page elements': function(err, stat) { - checkResult_pageContent(stat.Pages, "f1040ez"); - } - }, - 'Parse 1040ez first (from buffer)':{ - topic:function () { - return pdfParserPromise("f1040ez", true); - }, - 'has parsing data':function (err, stat) { - checkResult_parseStatus(err, stat, "f1040ez"); - }, - 'has four main objects': function(err, stat) { - checkResult_mainFields(stat, "f1040ez"); - }, - 'has pages': function(err, stat) { - checkResult_pageCount(stat.Pages, 1, "f1040ez"); - }, - 'has page elements': function(err, stat) { - checkResult_pageContent(stat.Pages, "f1040ez"); - } - }, - 'Parse 1040a (from file)':{ - topic:function () { - return pdfParserPromise("f1040a", false); - }, - 'has parsing data':function (err, stat) { - checkResult_parseStatus(err, stat, "f1040a"); - }, - 'has four main objects': function(err, stat) { - checkResult_mainFields(stat, "f1040a"); - }, - 'has pages': function(err, stat) { - checkResult_pageCount(stat.Pages, 2, "f1040a"); - }, - 'has page elements': function(err, stat) { - checkResult_pageContent(stat.Pages, "f1040a"); - } - }, - 'Parse 1040a (from buffer)':{ - topic:function () { - return pdfParserPromise("f1040a", true); - }, - 'has parsing data':function (err, stat) { - checkResult_parseStatus(err, stat, "f1040a"); - }, - 'has four main objects': function(err, stat) { - checkResult_mainFields(stat, "f1040a"); - }, - 'has pages': function(err, stat) { - checkResult_pageCount(stat.Pages, 2, "f1040a"); - }, - 'has page elements': function(err, stat) { - checkResult_pageContent(stat.Pages, "f1040a"); - } - }, - 'Parse 1040 (from file)':{ - topic:function () { - return pdfParserPromise("f1040", false); - }, - 'has parsing data':function (err, stat) { - checkResult_parseStatus(err, stat, "f1040"); - }, - 'has four main objects': function(err, stat) { - checkResult_mainFields(stat, "f1040"); - }, - 'has pages': function(err, stat) { - checkResult_pageCount(stat.Pages, 2, "f1040"); - }, - 'has page elements': function(err, stat) { - checkResult_pageContent(stat.Pages, "f1040"); - } - }, - 'Parse 1040 (from buffer)':{ - topic:function () { - return pdfParserPromise("f1040", true); - }, - 'has parsing data':function (err, stat) { - checkResult_parseStatus(err, stat, "f1040"); - }, - 'has four main objects': function(err, stat) { - checkResult_mainFields(stat, "f1040"); - }, - 'has pages': function(err, stat) { - checkResult_pageCount(stat.Pages, 2, "f1040"); - }, - 'has page elements': function(err, stat) { - checkResult_pageContent(stat.Pages, "f1040"); - } - } -}); - -suite.run(); - diff --git a/yarn-error.log b/yarn-error.log new file mode 100644 index 00000000..3faf9a64 --- /dev/null +++ b/yarn-error.log @@ -0,0 +1,145 @@ +Arguments: + /usr/local/bin/node /usr/local/bin/yarn publish + +PATH: + /opt/homebrew/opt/imagemagick@6/bin:/opt/homebrew/bin:/opt/homebrew/sbin:/usr/local/bin:/System/Cryptexes/App/usr/bin:/usr/bin:/bin:/usr/sbin:/sbin:/Library/Apple/usr/bin:/var/run/com.apple.security.cryptexd/codex.system/bootstrap/usr/local/bin:/var/run/com.apple.security.cryptexd/codex.system/bootstrap/usr/bin:/var/run/com.apple.security.cryptexd/codex.system/bootstrap/usr/appleinternal/bin + +Yarn version: + 1.22.19 + +Node version: + 18.13.0 + +Platform: + darwin arm64 + +Trace: + Error: Cannot find module "." + at webpackMissingModule (/usr/local/lib/node_modules/yarn/lib/cli.js:118255:64) + at pkg (/usr/local/lib/node_modules/yarn/lib/cli.js:118255:142) + at again (/usr/local/lib/node_modules/yarn/lib/cli.js:118210:22) + at depsFor (/usr/local/lib/node_modules/yarn/lib/cli.js:118224:4) + at /usr/local/lib/node_modules/yarn/lib/cli.js:42622:30 + at Generator.next () + at step (/usr/local/lib/node_modules/yarn/lib/cli.js:310:30) + at /usr/local/lib/node_modules/yarn/lib/cli.js:321:13 + +npm manifest: + { + "name": "@juristic/pdf2json", + "version": "3.0.5", + "description": "PDF file parser that converts PDF binaries to text based JSON, powered by porting a fork of PDF.JS to Node.js", + "keywords": [ + "pdf", + "pdf parser", + "pdf2json", + "convert pdf to json", + "server side PDF parser", + "port pdf.js to node.js", + "PDF binary to text", + "commandline utility to parse pdf to json", + "JSON", + "javascript", + "PDF canvas", + "pdf.js fork" + ], + "author": { + "name": "Modesty Zhang", + "email": "modestyz@hotmail.com", + "url": "http://www.codeproject.com/script/Articles/MemberArticles.aspx?amid=62372" + }, + "homepage": "https://github.com/modesty/pdf2json", + "repository": { + "type": "git", + "url": "git://github.com/modesty/pdf2json.git" + }, + "main": "./dist/pdfparser.cjs", + "module": "./pdfparser.js", + "typings": "./pdfparser.d.ts", + "scripts": { + "test": "cd ./test && sh p2j.forms.sh", + "test-misc": "cd ./test && sh p2j.one.sh misc . \"Expected: 7 success, 3 fail exception with stack trace\" ", + "parse": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form", + "parse-s": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s", + "parse-t": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t", + "parse-c": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c", + "parse-m": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c -m", + "parse-r": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -t -c -m -r", + "parse-fd": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/ -o ./test/target/fd/form -t -c -m -r", + "parse-tb": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i242_testingWithTable.pdf -o ./test/target/misc", + "parse-tc": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i293_pdfpac.pdf -o ./test/target/misc", + "parse-rectFix": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/pr298_rect_fix_from_upstream.pdf -o ./test/target/misc", + "parse-e": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i43_encrypted.pdf -o ./test/target/misc", + "parse-e2": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i243_problem_file_anon.pdf -o ./test/target/misc", + "parse-e3": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i200_test.pdf -o ./test/target/misc", + "build:dist": "rollup -c ./rollup.config.js", + "build": "npm run build:dist" + }, + "engines": { + "node": ">=18.12.1", + "npm": ">=8.19.2" + }, + "type": "module", + "bin": { + "pdf2json": "./bin/pdf2json.js" + }, + "dependencies": { + "@xmldom/xmldom": "^0.8.8" + }, + "devDependencies": { + "@types/node": "^20.3.1", + "rollup": "^3.29.2" + }, + "bundledDependencies": [ + "@xmldom/xmldom" + ], + "maintainers": [ + { + "name": "Modesty Zhang", + "email": "modestyz@hotmail.com", + "url": "http://www.codeproject.com/script/Articles/MemberArticles.aspx?amid=62372" + } + ], + "contributors": [], + "bugs": { + "url": "http://github.com/modesty/pdf2json/issues" + }, + "exports": { + ".": { + "import": "./pdfparser.js", + "require": "./dist/pdfparser.cjs" + } + }, + "license": "Apache-2.0", + "readme": "https://github.com/modesty/pdf2json/blob/master/readme.md" + } + +yarn manifest: + No manifest + +Lockfile: + # THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. + # yarn lockfile v1 + + + "@types/node@^20.3.1": + version "20.6.3" + resolved "https://registry.yarnpkg.com/@types/node/-/node-20.6.3.tgz#5b763b321cd3b80f6b8dde7a37e1a77ff9358dd9" + integrity sha512-HksnYH4Ljr4VQgEy2lTStbCKv/P590tmPe5HqOnv9Gprffgv5WXAY+Y5Gqniu0GGqeTCUdBnzC3QSrzPkBkAMA== + + "@xmldom/xmldom@^0.8.8": + version "0.8.10" + resolved "https://registry.yarnpkg.com/@xmldom/xmldom/-/xmldom-0.8.10.tgz#a1337ca426aa61cef9fe15b5b28e340a72f6fa99" + integrity sha512-2WALfTl4xo2SkGCYRt6rDTFfk9R1czmBvUQy12gK2KuRKIpWEhcbbzy8EZXtz/jkRqHX8bFEc6FC1HjX4TUWYw== + + fsevents@~2.3.2: + version "2.3.3" + resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.3.3.tgz#cac6407785d03675a2a5e1a5305c697b347d90d6" + integrity sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw== + + rollup@^3.29.2: + version "3.29.2" + resolved "https://registry.yarnpkg.com/rollup/-/rollup-3.29.2.tgz#cbc76cd5b03b9f9e93be991d23a1dff9c6d5b740" + integrity sha512-CJouHoZ27v6siztc21eEQGo0kIcE5D1gVPA571ez0mMYb25LGYGKnVNXpEj5MGlepmDWGXNjDB5q7uNiPHC11A== + optionalDependencies: + fsevents "~2.3.2" From 20516846abf578ede2cb152a1063805383ce113f Mon Sep 17 00:00:00 2001 From: Isaac <91521821+isimisi@users.noreply.github.com> Date: Mon, 25 Sep 2023 11:34:52 +0200 Subject: [PATCH 2/5] removed error --- yarn-error.log | 145 ------------------------------------------------- 1 file changed, 145 deletions(-) delete mode 100644 yarn-error.log diff --git a/yarn-error.log b/yarn-error.log deleted file mode 100644 index 3faf9a64..00000000 --- a/yarn-error.log +++ /dev/null @@ -1,145 +0,0 @@ -Arguments: - /usr/local/bin/node /usr/local/bin/yarn publish - -PATH: - /opt/homebrew/opt/imagemagick@6/bin:/opt/homebrew/bin:/opt/homebrew/sbin:/usr/local/bin:/System/Cryptexes/App/usr/bin:/usr/bin:/bin:/usr/sbin:/sbin:/Library/Apple/usr/bin:/var/run/com.apple.security.cryptexd/codex.system/bootstrap/usr/local/bin:/var/run/com.apple.security.cryptexd/codex.system/bootstrap/usr/bin:/var/run/com.apple.security.cryptexd/codex.system/bootstrap/usr/appleinternal/bin - -Yarn version: - 1.22.19 - -Node version: - 18.13.0 - -Platform: - darwin arm64 - -Trace: - Error: Cannot find module "." - at webpackMissingModule (/usr/local/lib/node_modules/yarn/lib/cli.js:118255:64) - at pkg (/usr/local/lib/node_modules/yarn/lib/cli.js:118255:142) - at again (/usr/local/lib/node_modules/yarn/lib/cli.js:118210:22) - at depsFor (/usr/local/lib/node_modules/yarn/lib/cli.js:118224:4) - at /usr/local/lib/node_modules/yarn/lib/cli.js:42622:30 - at Generator.next () - at step (/usr/local/lib/node_modules/yarn/lib/cli.js:310:30) - at /usr/local/lib/node_modules/yarn/lib/cli.js:321:13 - -npm manifest: - { - "name": "@juristic/pdf2json", - "version": "3.0.5", - "description": "PDF file parser that converts PDF binaries to text based JSON, powered by porting a fork of PDF.JS to Node.js", - "keywords": [ - "pdf", - "pdf parser", - "pdf2json", - "convert pdf to json", - "server side PDF parser", - "port pdf.js to node.js", - "PDF binary to text", - "commandline utility to parse pdf to json", - "JSON", - "javascript", - "PDF canvas", - "pdf.js fork" - ], - "author": { - "name": "Modesty Zhang", - "email": "modestyz@hotmail.com", - "url": "http://www.codeproject.com/script/Articles/MemberArticles.aspx?amid=62372" - }, - "homepage": "https://github.com/modesty/pdf2json", - "repository": { - "type": "git", - "url": "git://github.com/modesty/pdf2json.git" - }, - "main": "./dist/pdfparser.cjs", - "module": "./pdfparser.js", - "typings": "./pdfparser.d.ts", - "scripts": { - "test": "cd ./test && sh p2j.forms.sh", - "test-misc": "cd ./test && sh p2j.one.sh misc . \"Expected: 7 success, 3 fail exception with stack trace\" ", - "parse": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form", - "parse-s": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s", - "parse-t": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t", - "parse-c": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c", - "parse-m": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c -m", - "parse-r": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -t -c -m -r", - "parse-fd": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/ -o ./test/target/fd/form -t -c -m -r", - "parse-tb": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i242_testingWithTable.pdf -o ./test/target/misc", - "parse-tc": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i293_pdfpac.pdf -o ./test/target/misc", - "parse-rectFix": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/pr298_rect_fix_from_upstream.pdf -o ./test/target/misc", - "parse-e": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i43_encrypted.pdf -o ./test/target/misc", - "parse-e2": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i243_problem_file_anon.pdf -o ./test/target/misc", - "parse-e3": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i200_test.pdf -o ./test/target/misc", - "build:dist": "rollup -c ./rollup.config.js", - "build": "npm run build:dist" - }, - "engines": { - "node": ">=18.12.1", - "npm": ">=8.19.2" - }, - "type": "module", - "bin": { - "pdf2json": "./bin/pdf2json.js" - }, - "dependencies": { - "@xmldom/xmldom": "^0.8.8" - }, - "devDependencies": { - "@types/node": "^20.3.1", - "rollup": "^3.29.2" - }, - "bundledDependencies": [ - "@xmldom/xmldom" - ], - "maintainers": [ - { - "name": "Modesty Zhang", - "email": "modestyz@hotmail.com", - "url": "http://www.codeproject.com/script/Articles/MemberArticles.aspx?amid=62372" - } - ], - "contributors": [], - "bugs": { - "url": "http://github.com/modesty/pdf2json/issues" - }, - "exports": { - ".": { - "import": "./pdfparser.js", - "require": "./dist/pdfparser.cjs" - } - }, - "license": "Apache-2.0", - "readme": "https://github.com/modesty/pdf2json/blob/master/readme.md" - } - -yarn manifest: - No manifest - -Lockfile: - # THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. - # yarn lockfile v1 - - - "@types/node@^20.3.1": - version "20.6.3" - resolved "https://registry.yarnpkg.com/@types/node/-/node-20.6.3.tgz#5b763b321cd3b80f6b8dde7a37e1a77ff9358dd9" - integrity sha512-HksnYH4Ljr4VQgEy2lTStbCKv/P590tmPe5HqOnv9Gprffgv5WXAY+Y5Gqniu0GGqeTCUdBnzC3QSrzPkBkAMA== - - "@xmldom/xmldom@^0.8.8": - version "0.8.10" - resolved "https://registry.yarnpkg.com/@xmldom/xmldom/-/xmldom-0.8.10.tgz#a1337ca426aa61cef9fe15b5b28e340a72f6fa99" - integrity sha512-2WALfTl4xo2SkGCYRt6rDTFfk9R1czmBvUQy12gK2KuRKIpWEhcbbzy8EZXtz/jkRqHX8bFEc6FC1HjX4TUWYw== - - fsevents@~2.3.2: - version "2.3.3" - resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.3.3.tgz#cac6407785d03675a2a5e1a5305c697b347d90d6" - integrity sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw== - - rollup@^3.29.2: - version "3.29.2" - resolved "https://registry.yarnpkg.com/rollup/-/rollup-3.29.2.tgz#cbc76cd5b03b9f9e93be991d23a1dff9c6d5b740" - integrity sha512-CJouHoZ27v6siztc21eEQGo0kIcE5D1gVPA571ez0mMYb25LGYGKnVNXpEj5MGlepmDWGXNjDB5q7uNiPHC11A== - optionalDependencies: - fsevents "~2.3.2" From 9f45d87e20390598b27e1e83ac0e5811fd369151 Mon Sep 17 00:00:00 2001 From: Isaac <91521821+isimisi@users.noreply.github.com> Date: Mon, 25 Sep 2023 14:56:27 +0200 Subject: [PATCH 3/5] disabled treeshake and adjusted for DOMParser DOMParser was not available globally in the cjs version, so I made a fix for that in base/display/metadata.js However I don't understand how DOMParser was available globally in ESM in the first place - feel free to look at this. --- base/display/metadata.js | 166 +- lib/pdf.js | 866 +++---- package.json | 5 +- pdfparser.cjs | 4620 +++++++++++++++++++++----------------- pdfparser.mjs | 2196 ------------------ rollup.config.js | 33 +- test/index.js | 4 +- 7 files changed, 3125 insertions(+), 4765 deletions(-) delete mode 100644 pdfparser.mjs diff --git a/base/display/metadata.js b/base/display/metadata.js index 6f62537d..b27c8d54 100755 --- a/base/display/metadata.js +++ b/base/display/metadata.js @@ -18,80 +18,110 @@ 'use strict'; -var Metadata = PDFJS.Metadata = (function MetadataClosure() { - function fixMetadata(meta) { - return meta.replace(/>\\376\\377([^<]+)/g, function(all, codes) { - var bytes = codes.replace(/\\([0-3])([0-7])([0-7])/g, - function(code, d1, d2, d3) { - return String.fromCharCode(d1 * 64 + d2 * 8 + d3 * 1); +// since this gets evaluated in pdfparser and DOMParser somehow does not exist in a CJS environment +// we catch the error early and use @xmldom/xmldom DOMParser + +// Not exactly sure how DOMParser is available globally in a node environment in the first place +// feel free to change this if a better method is to be found +try { + var DomParser = DOMParser; +} catch (error) { + if (typeof module === 'object' && typeof module.exports === 'object') { + // This is a CommonJS (CJS) environment + DomParser = require('@xmldom/xmldom').DOMParser; + } else { + throw error; + } +} + +var Metadata = (PDFJS.Metadata = (function MetadataClosure() { + function fixMetadata(meta) { + return meta.replace(/>\\376\\377([^<]+)/g, function (all, codes) { + var bytes = codes.replace( + /\\([0-3])([0-7])([0-7])/g, + function (code, d1, d2, d3) { + return String.fromCharCode(d1 * 64 + d2 * 8 + d3 * 1); + } + ); + var chars = ''; + for (var i = 0; i < bytes.length; i += 2) { + var code = bytes.charCodeAt(i) * 256 + bytes.charCodeAt(i + 1); + chars += + code >= 32 && + code < 127 && + code != 60 && + code != 62 && + code != 38 && + false + ? String.fromCharCode(code) + : '&#x' + (0x10000 + code).toString(16).substring(1) + ';'; + } + return '>' + chars; }); - var chars = ''; - for (var i = 0; i < bytes.length; i += 2) { - var code = bytes.charCodeAt(i) * 256 + bytes.charCodeAt(i + 1); - chars += code >= 32 && code < 127 && code != 60 && code != 62 && - code != 38 && false ? String.fromCharCode(code) : - '&#x' + (0x10000 + code).toString(16).substring(1) + ';'; - } - return '>' + chars; - }); - } - - function Metadata(meta) { - if (typeof meta === 'string') { - // Ghostscript produces invalid metadata - meta = fixMetadata(meta); - - var parser = new DOMParser(); - meta = parser.parseFromString(meta, 'application/xml'); - } else if (!(meta instanceof Document)) { - error('Metadata: Invalid metadata object'); - } - - this.metaDocument = meta; - this.metadata = {}; - this.parse(); - } - - Metadata.prototype = { - parse: function Metadata_parse() { - var doc = this.metaDocument; - var rdf = doc.documentElement; - - if (rdf && rdf.nodeName.toLowerCase() !== 'rdf:rdf') { // Wrapped in - rdf = rdf.firstChild; - while (rdf && rdf.nodeName.toLowerCase() !== 'rdf:rdf') - rdf = rdf.nextSibling; + } + + function Metadata(meta) { + if (typeof meta === 'string') { + // Ghostscript produces invalid metadata + meta = fixMetadata(meta); + + var parser = new DomParser(); + meta = parser.parseFromString(meta, 'application/xml'); + } else if (!(meta instanceof Document)) { + error('Metadata: Invalid metadata object'); } - var nodeName = (rdf) ? rdf.nodeName.toLowerCase() : null; - if (!rdf || nodeName !== 'rdf:rdf' || !rdf.hasChildNodes()) - return; + this.metaDocument = meta; + this.metadata = {}; + this.parse(); + } - var children = rdf.childNodes, desc, entry, name, i, ii, length, iLength; + Metadata.prototype = { + parse: function Metadata_parse() { + var doc = this.metaDocument; + var rdf = doc.documentElement; - for (i = 0, length = children.length; i < length; i++) { - desc = children[i]; - if (desc.nodeName.toLowerCase() !== 'rdf:description') - continue; + if (rdf && rdf.nodeName.toLowerCase() !== 'rdf:rdf') { + // Wrapped in + rdf = rdf.firstChild; + while (rdf && rdf.nodeName.toLowerCase() !== 'rdf:rdf') + rdf = rdf.nextSibling; + } - for (ii = 0, iLength = desc.childNodes.length; ii < iLength; ii++) { - if (desc.childNodes[ii].nodeName.toLowerCase() !== '#text') { - entry = desc.childNodes[ii]; - name = entry.nodeName.toLowerCase(); - this.metadata[name] = entry.textContent.trim(); - } - } - } - }, + var nodeName = rdf ? rdf.nodeName.toLowerCase() : null; + if (!rdf || nodeName !== 'rdf:rdf' || !rdf.hasChildNodes()) return; + + var children = rdf.childNodes, + desc, + entry, + name, + i, + ii, + length, + iLength; + + for (i = 0, length = children.length; i < length; i++) { + desc = children[i]; + if (desc.nodeName.toLowerCase() !== 'rdf:description') continue; + + for (ii = 0, iLength = desc.childNodes.length; ii < iLength; ii++) { + if (desc.childNodes[ii].nodeName.toLowerCase() !== '#text') { + entry = desc.childNodes[ii]; + name = entry.nodeName.toLowerCase(); + this.metadata[name] = entry.textContent.trim(); + } + } + } + }, - get: function Metadata_get(name) { - return this.metadata[name] || null; - }, + get: function Metadata_get(name) { + return this.metadata[name] || null; + }, - has: function Metadata_has(name) { - return typeof this.metadata[name] !== 'undefined'; - } - }; + has: function Metadata_has(name) { + return typeof this.metadata[name] !== 'undefined'; + }, + }; - return Metadata; -})(); + return Metadata; +})()); diff --git a/lib/pdf.js b/lib/pdf.js index 6930fa58..d29e73f6 100644 --- a/lib/pdf.js +++ b/lib/pdf.js @@ -1,444 +1,480 @@ -import nodeUtil from "util"; -import fs from "fs"; +import nodeUtil from 'util'; +import fs from 'fs'; import path from 'path'; -import {fileURLToPath} from 'url'; +import { fileURLToPath } from 'url'; -import {EventEmitter} from "events"; -import {Blob} from "buffer"; -import {DOMParser} from "@xmldom/xmldom"; +import { EventEmitter } from 'events'; +import { Blob } from 'buffer'; +import { DOMParser } from '@xmldom/xmldom'; -import PDFCanvas from "./pdfcanvas.js"; -import PDFUnit from "./pdfunit.js"; -import PDFField from "./pdffield.js"; -import PDFAnno from "./pdfanno.js"; -import Image from "./pdfimage.js"; -import PDFFont from "./pdffont.js"; -import PTIXmlParser from "./ptixmlinject.js"; +import PDFCanvas from './pdfcanvas.js'; +import PDFUnit from './pdfunit.js'; +import PDFField from './pdffield.js'; +import PDFAnno from './pdfanno.js'; +import Image from './pdfimage.js'; +import PDFFont from './pdffont.js'; +import PTIXmlParser from './ptixmlinject.js'; -import { pkInfo, _PARSER_SIG } from "../pkinfo.js"; +import { pkInfo, _PARSER_SIG } from '../pkinfo.js'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); const _pdfjsFiles = [ - 'shared/util.js', - 'shared/colorspace.js', - 'shared/pattern.js', - 'shared/function.js', - 'shared/annotation.js', - - 'core/core.js', - 'core/obj.js', - 'core/charsets.js', - 'core/crypto.js', - 'core/evaluator.js', - 'core/fonts.js', - 'core/font_renderer.js', - 'core/glyphlist.js', - 'core/image.js', - 'core/metrics.js', - 'core/parser.js', - 'core/stream.js', - 'core/worker.js', - 'core/jpx.js', - 'core/jbig2.js', - 'core/bidi.js', - 'core/jpg.js', - 'core/chunked_stream.js', - 'core/pdf_manager.js', - 'core/cmap.js', - 'core/cidmaps.js', - - 'display/canvas.js', - 'display/font_loader.js', - 'display/metadata.js', - 'display/api.js' + 'shared/util.js', + 'shared/colorspace.js', + 'shared/pattern.js', + 'shared/function.js', + 'shared/annotation.js', + + 'core/core.js', + 'core/obj.js', + 'core/charsets.js', + 'core/crypto.js', + 'core/evaluator.js', + 'core/fonts.js', + 'core/font_renderer.js', + 'core/glyphlist.js', + 'core/image.js', + 'core/metrics.js', + 'core/parser.js', + 'core/stream.js', + 'core/worker.js', + 'core/jpx.js', + 'core/jbig2.js', + 'core/bidi.js', + 'core/jpg.js', + 'core/chunked_stream.js', + 'core/pdf_manager.js', + 'core/cmap.js', + 'core/cidmaps.js', + + 'display/canvas.js', + 'display/font_loader.js', + 'display/metadata.js', + 'display/api.js', ]; //////replacing HTML5 canvas with PDFCanvas (in-memory canvas) -function createScratchCanvas(width, height) { return new PDFCanvas({}, width, height); } +function createScratchCanvas(width, height) { + return new PDFCanvas({}, width, height); +} const PDFJS = {}; -const globalScope = {console}; +const globalScope = { console }; const baseDir = `${__dirname}/../base/`; -const _baseCode = _pdfjsFiles.reduce( (preContent, fileName, idx, arr) => preContent += fs.readFileSync(baseDir + fileName, 'utf8'), ""); +const _baseCode = _pdfjsFiles.reduce( + (preContent, fileName, idx, arr) => + (preContent += fs.readFileSync(baseDir + fileName, 'utf8')), + '' +); eval(_baseCode); ////////////////////////////////start of helper classes class PDFPageParser { - //static - static RenderingStates = { - INITIAL: 0, - RUNNING: 1, - PAUSED: 2, - FINISHED: 3 - }; - - //public - id = -1; - pdfPage = null; - ptiParser = null; - scale = 0; - viewport = null; - renderingState = -1; - - Fields = null; - Boxsets = null; - ctxCanvas = null; - - #_addField (field) { - if (!PDFField.isFormElement(field)) { - nodeUtil.p2jwarn("NOT valid form element", field); - return; - } - - const oneField = new PDFField(field, this.viewport, this.Fields, this.Boxsets); - oneField.processField(); - } - - // constructor - constructor(pdfPage, id, scale, ptiParser) { - // public, this instance copies - this.id = id; - this.pdfPage = pdfPage; - this.ptiParser = ptiParser; - - this.scale = scale || 1.0; - - //leave out the 2nd parameter in order to use page's default rotation (for both portrait and landscape form) - this.viewport = this.pdfPage.getViewport(this.scale); - - this.renderingState = PDFPageParser.RenderingStates.INITIAL; - - //form elements other than radio buttons and check boxes - this.Fields = []; - //form elements: radio buttons and check boxes - this.Boxsets = []; - this.ctxCanvas = {}; - } - - get width() { return PDFUnit.toFormX(this.viewport.width); } - get height() { return PDFUnit.toFormY(this.viewport.height); } - get HLines() { return this.ctxCanvas.HLines; } - get VLines() { return this.ctxCanvas.VLines; } - get Fills() { return this.ctxCanvas.Fills; } - get Texts() { return this.ctxCanvas.Texts; } - - destroy() { - this.pdfPage.destroy(); - this.pdfPage = null; - - this.ptiParser = null; - this.Fields = null; - this.Boxsets = null; - this.ctxCanvas = null; - } - - getPagePoint(x, y) { - return this.viewport.convertToPdfPoint(x, y); - } - - parsePage(callback, errorCallBack) { - if (this.renderingState !== PDFPageParser.RenderingStates.INITIAL) { - errorCallBack('Must be in new state before drawing'); - return; - } - - this.renderingState = PDFPageParser.RenderingStates.RUNNING; - - const canvas = createScratchCanvas(1, 1); - const ctx = canvas.getContext('2d'); - - function pageViewDrawCallback(error) { - this.renderingState = PDFPageParser.RenderingStates.FINISHED; - - if (error) { - console.error(error); - errorCallBack(`Error: Page ${this.id + 1}: ${error.message}`); - } - else { - if (this.ptiParser) { - const extraFields = this.ptiParser.getFields(parseInt(this.id) + 1); - extraFields.forEach( field => this.#_addField(field) ); - } - - this.ctxCanvas = ctx.canvas; - this.stats = this.pdfPage.stats; - - nodeUtil.p2jinfo(`Success: Page ${this.id + 1}`); - callback(); + //static + static RenderingStates = { + INITIAL: 0, + RUNNING: 1, + PAUSED: 2, + FINISHED: 3, + }; + + //public + id = -1; + pdfPage = null; + ptiParser = null; + scale = 0; + viewport = null; + renderingState = -1; + + Fields = null; + Boxsets = null; + ctxCanvas = null; + + #_addField(field) { + if (!PDFField.isFormElement(field)) { + nodeUtil.p2jwarn('NOT valid form element', field); + return; + } + + const oneField = new PDFField( + field, + this.viewport, + this.Fields, + this.Boxsets + ); + oneField.processField(); + } + + // constructor + constructor(pdfPage, id, scale, ptiParser) { + // public, this instance copies + this.id = id; + this.pdfPage = pdfPage; + this.ptiParser = ptiParser; + + this.scale = scale || 1.0; + + //leave out the 2nd parameter in order to use page's default rotation (for both portrait and landscape form) + this.viewport = this.pdfPage.getViewport(this.scale); + + this.renderingState = PDFPageParser.RenderingStates.INITIAL; + + //form elements other than radio buttons and check boxes + this.Fields = []; + //form elements: radio buttons and check boxes + this.Boxsets = []; + this.ctxCanvas = {}; + } + + get width() { + return PDFUnit.toFormX(this.viewport.width); + } + get height() { + return PDFUnit.toFormY(this.viewport.height); + } + get HLines() { + return this.ctxCanvas.HLines; + } + get VLines() { + return this.ctxCanvas.VLines; + } + get Fills() { + return this.ctxCanvas.Fills; + } + get Texts() { + return this.ctxCanvas.Texts; + } + + destroy() { + this.pdfPage.destroy(); + this.pdfPage = null; + + this.ptiParser = null; + this.Fields = null; + this.Boxsets = null; + this.ctxCanvas = null; + } + + getPagePoint(x, y) { + return this.viewport.convertToPdfPoint(x, y); + } + + parsePage(callback, errorCallBack) { + if (this.renderingState !== PDFPageParser.RenderingStates.INITIAL) { + errorCallBack('Must be in new state before drawing'); + return; + } + + this.renderingState = PDFPageParser.RenderingStates.RUNNING; + + const canvas = createScratchCanvas(1, 1); + const ctx = canvas.getContext('2d'); + + function pageViewDrawCallback(error) { + this.renderingState = PDFPageParser.RenderingStates.FINISHED; + + if (error) { + console.error(error); + errorCallBack(`Error: Page ${this.id + 1}: ${error.message}`); + } else { + if (this.ptiParser) { + const extraFields = this.ptiParser.getFields( + parseInt(this.id) + 1 + ); + extraFields.forEach((field) => this.#_addField(field)); } - } - - const renderContext = { - canvasContext:ctx, - viewport:this.viewport - }; - - this.pdfPage.render(renderContext).then( - data => { - this.pdfPage.getAnnotations().then( - fields => { - fields.forEach(field => this.#_addField(field)); - pageViewDrawCallback.call(this, null); - }, - err => errorCallBack("pdfPage.getAnnotations error:" + err)); - }, - err => pageViewDrawCallback.call(this, err) - ); - } + + this.ctxCanvas = ctx.canvas; + this.stats = this.pdfPage.stats; + + nodeUtil.p2jinfo(`Success: Page ${this.id + 1}`); + callback(); + } + } + + const renderContext = { + canvasContext: ctx, + viewport: this.viewport, + }; + + this.pdfPage.render(renderContext).then( + (data) => { + this.pdfPage.getAnnotations().then( + (fields) => { + fields.forEach((field) => this.#_addField(field)); + pageViewDrawCallback.call(this, null); + }, + (err) => errorCallBack('pdfPage.getAnnotations error:' + err) + ); + }, + (err) => pageViewDrawCallback.call(this, err) + ); + } } ////////////////////////////////Start of Node.js Module export default class PDFJSClass extends EventEmitter { - pdfDocument = null; - pages = null; - rawTextContents = null; - - needRawText = null; - - // constructor - constructor(needRawText) { - super(); - - // public, this instance copies - this.pdfDocument = null; - this.pages = []; - this.rawTextContents = []; - - this.needRawText = needRawText; - } - - raiseErrorEvent(errMsg) { - console.error(errMsg); - process.nextTick( () => this.emit("pdfjs_parseDataError", errMsg)); - // this.emit("error", errMsg); - return errMsg; - } - - raiseReadyEvent(data) { - process.nextTick( () => this.emit("pdfjs_parseDataReady", data) ); - return data; - } - - - parsePDFData(arrayBuffer, password) { - this.pdfDocument = null; - - const parameters = {password: password, data: arrayBuffer}; - PDFJS.getDocument(parameters).then( - pdfDocument => this.load(pdfDocument, 1), - error => this.raiseErrorEvent(error) - ); - }; - - tryLoadFieldInfoXML(pdfFilePath) { - const _sufInfo = "_fieldInfo.xml"; - const fieldInfoXMLPath = pdfFilePath.replace(".pdf", _sufInfo); - if ((fieldInfoXMLPath.indexOf(_sufInfo) < 1) || (!fs.existsSync(fieldInfoXMLPath))) { - return; - } - nodeUtil.p2jinfo("About to load fieldInfo XML : " + fieldInfoXMLPath); - - this.ptiParser = new PTIXmlParser(); - this.ptiParser.parseXml(fieldInfoXMLPath, err => { - if (err) { - nodeUtil.p2jwarn("fieldInfo XML Error: " + JSON.stringify(err)); - this.ptiParser = null; + pdfDocument = null; + pages = null; + rawTextContents = null; + + needRawText = null; + + // constructor + constructor(needRawText) { + super(); + + // public, this instance copies + this.pdfDocument = null; + this.pages = []; + this.rawTextContents = []; + + this.needRawText = needRawText; + } + + raiseErrorEvent(errMsg) { + console.error(errMsg); + process.nextTick(() => this.emit('pdfjs_parseDataError', errMsg)); + // this.emit("error", errMsg); + return errMsg; + } + + raiseReadyEvent(data) { + process.nextTick(() => this.emit('pdfjs_parseDataReady', data)); + return data; + } + + parsePDFData(arrayBuffer, password) { + this.pdfDocument = null; + + const parameters = { password: password, data: arrayBuffer }; + PDFJS.getDocument(parameters).then( + (pdfDocument) => this.load(pdfDocument, 1), + (error) => this.raiseErrorEvent(error) + ); + } + + tryLoadFieldInfoXML(pdfFilePath) { + const _sufInfo = '_fieldInfo.xml'; + const fieldInfoXMLPath = pdfFilePath.replace('.pdf', _sufInfo); + if ( + fieldInfoXMLPath.indexOf(_sufInfo) < 1 || + !fs.existsSync(fieldInfoXMLPath) + ) { + return; + } + nodeUtil.p2jinfo('About to load fieldInfo XML : ' + fieldInfoXMLPath); + + this.ptiParser = new PTIXmlParser(); + this.ptiParser.parseXml(fieldInfoXMLPath, (err) => { + if (err) { + nodeUtil.p2jwarn('fieldInfo XML Error: ' + JSON.stringify(err)); + this.ptiParser = null; + } else { + nodeUtil.p2jinfo('fieldInfo XML loaded.'); + } + }); + } + + load(pdfDocument, scale) { + this.pdfDocument = pdfDocument; + + return this.loadMetaData().then( + () => this.loadPages(), + (error) => this.raiseErrorEvent('loadMetaData error: ' + error) + ); + } + + loadMetaData() { + return this.pdfDocument.getMetadata().then( + (data) => { + this.documentInfo = data.info; + this.metadata = data.metadata?.metadata ?? {}; + this.parseMetaData(); + }, + (error) => + this.raiseErrorEvent('pdfDocument.getMetadata error: ' + error) + ); + } + + parseMetaData() { + const meta = { + Transcoder: _PARSER_SIG, + Meta: { ...this.documentInfo, Metadata: this.metadata }, + }; + this.raiseReadyEvent(meta); + this.emit('readable', meta); + } + + loadPages() { + const pagesCount = this.pdfDocument.numPages; + const pagePromises = []; + for (let i = 1; i <= pagesCount; i++) + pagePromises.push(this.pdfDocument.getPage(i)); + + const pagesPromise = PDFJS.Promise.all(pagePromises); + + nodeUtil.p2jinfo('PDF loaded. pagesCount = ' + pagesCount); + + return pagesPromise.then( + (promisedPages) => this.parsePage(promisedPages, 0, 1.5), + (error) => this.raiseErrorEvent('pagesPromise error: ' + error) + ); + } + + parsePage(promisedPages, id, scale) { + nodeUtil.p2jinfo('start to parse page:' + (id + 1)); + + const pdfPage = promisedPages[id]; + const pageParser = new PDFPageParser(pdfPage, id, scale, this.ptiParser); + + function continueOnNextPage() { + nodeUtil.p2jinfo('complete parsing page:' + (id + 1)); + if (id === this.pdfDocument.numPages - 1) { + this.raiseReadyEvent({ Pages: this.pages }); + //v1.1.2: signal end of parsed data with null + process.nextTick(() => this.raiseReadyEvent(null)); + this.emit('data', null); + } else { + process.nextTick(() => this.parsePage(promisedPages, ++id, scale)); + } + } + + pageParser.parsePage( + (data) => { + const page = { + Width: pageParser.width, + Height: pageParser.height, + HLines: pageParser.HLines, + VLines: pageParser.VLines, + Fills: pageParser.Fills, + //needs to keep current default output format, text content will output to a separate file if '-c' command line argument is set + // Content:pdfPage.getTextContent(), + Texts: pageParser.Texts, + Fields: pageParser.Fields, + Boxsets: pageParser.Boxsets, + }; + + this.pages.push(page); + this.emit('data', page); + + if (this.needRawText) { + pdfPage.getTextContent().then( + (textContent) => { + this.rawTextContents.push(textContent); + nodeUtil.p2jinfo( + 'complete parsing raw text content:' + (id + 1) + ); + continueOnNextPage.call(this); + }, + (error) => + this.raiseErrorEvent( + 'pdfPage.getTextContent error: ' + error + ) + ); + } else { + continueOnNextPage.call(this); } - else { - nodeUtil.p2jinfo("fieldInfo XML loaded."); + }, + (errMsg) => this.raiseErrorEvent(errMsg) + ); + } + + getRawTextContent() { + let retVal = ''; + if (!this.needRawText) return retVal; + + this.rawTextContents.forEach((textContent, index) => { + let prevText = null; + textContent.bidiTexts.forEach((textObj, idx) => { + if (prevText) { + if (Math.abs(textObj.y - prevText.y) <= 9) { + prevText.str += textObj.str; + } else { + retVal += prevText.str + '\r\n'; + prevText = textObj; + } + } else { + prevText = textObj; } - }); - } - - load(pdfDocument, scale) { - this.pdfDocument = pdfDocument; - - return this.loadMetaData().then( - () => this.loadPages(), - error => this.raiseErrorEvent("loadMetaData error: " + error) - ); - } - - loadMetaData() { - return this.pdfDocument.getMetadata().then( - data => { - this.documentInfo = data.info; - this.metadata = data.metadata?.metadata ?? {}; - this.parseMetaData(); - }, - error => this.raiseErrorEvent("pdfDocument.getMetadata error: " + error) - ); - } - - parseMetaData() { - const meta = {Transcoder: _PARSER_SIG, Meta: {...this.documentInfo, Metadata: this.metadata}}; - this.raiseReadyEvent(meta); - this.emit("readable", meta); - } - - loadPages() { - const pagesCount = this.pdfDocument.numPages; - const pagePromises = []; - for (let i = 1; i <= pagesCount; i++) - pagePromises.push(this.pdfDocument.getPage(i)); - - const pagesPromise = PDFJS.Promise.all(pagePromises); - - nodeUtil.p2jinfo("PDF loaded. pagesCount = " + pagesCount); - - return pagesPromise.then( - promisedPages => this.parsePage(promisedPages, 0, 1.5), - error => this.raiseErrorEvent("pagesPromise error: " + error) - ); - } - - parsePage(promisedPages, id, scale) { - nodeUtil.p2jinfo("start to parse page:" + (id+1)); - - const pdfPage = promisedPages[id]; - const pageParser = new PDFPageParser(pdfPage, id, scale, this.ptiParser); - - function continueOnNextPage() { - nodeUtil.p2jinfo("complete parsing page:" + (id+1)); - if (id === (this.pdfDocument.numPages - 1) ) { - this.raiseReadyEvent({Pages:this.pages}); - //v1.1.2: signal end of parsed data with null - process.nextTick(() => this.raiseReadyEvent(null)); - this.emit("data", null); + }); + if (prevText) { + retVal += prevText.str; + } + retVal += + '\r\n----------------Page (' + + index + + ') Break----------------\r\n'; + }); + + return retVal; + } + + getAllFieldsTypes() { + return PDFField.getAllFieldsTypes({ Pages: this.pages || [] }); + } + + getMergedTextBlocksIfNeeded() { + for (let p = 0; p < this.pages.length; p++) { + let prevText = null; + let page = this.pages[p]; + + page.Texts.sort(PDFFont.compareBlockPos); + page.Texts = page.Texts.filter((t, j) => { + let isDup = + j > 0 && PDFFont.areDuplicateBlocks(page.Texts[j - 1], t); + if (isDup) { + nodeUtil.p2jinfo( + 'skipped: dup text block: ' + decodeURIComponent(t.R[0].T) + ); } - else { - process.nextTick(() => this.parsePage(promisedPages, ++id, scale)); + return !isDup; + }); + + for (let i = 0; i < page.Texts.length; i++) { + let text = page.Texts[i]; + + if (prevText) { + if ( + PDFFont.areAdjacentBlocks(prevText, text) && + PDFFont.haveSameStyle(prevText, text) + ) { + let preT = decodeURIComponent(prevText.R[0].T); + let curT = decodeURIComponent(text.R[0].T); + + prevText.R[0].T += text.R[0].T; + prevText.w += text.w; + text.merged = true; + + let mergedText = decodeURIComponent(prevText.R[0].T); + nodeUtil.p2jinfo( + `merged text block: ${preT} + ${curT} => ${mergedText}` + ); + prevText = null; //yeah, only merge two blocks for now + } else { + prevText = text; + } + } else { + prevText = text; } - } - - pageParser.parsePage( - data => { - const page = { - Width: pageParser.width, - Height: pageParser.height, - HLines: pageParser.HLines, - VLines: pageParser.VLines, - Fills: pageParser.Fills, - //needs to keep current default output format, text content will output to a separate file if '-c' command line argument is set - // Content:pdfPage.getTextContent(), - Texts: pageParser.Texts, - Fields: pageParser.Fields, - Boxsets: pageParser.Boxsets - }; - - this.pages.push(page); - this.emit("data", page); - - if (this.needRawText) { - pdfPage.getTextContent().then( - textContent => { - this.rawTextContents.push(textContent); - nodeUtil.p2jinfo("complete parsing raw text content:" + (id+1)); - continueOnNextPage.call(this); - }, - error => this.raiseErrorEvent("pdfPage.getTextContent error: " + error) - ); - } - else { - continueOnNextPage.call(this); - } - }, - errMsg => this.raiseErrorEvent(errMsg) - ); - } - - getRawTextContent() { - let retVal = ""; - if (!this.needRawText) - return retVal; - - this.rawTextContents.forEach( (textContent, index) => { - let prevText = null; - textContent.bidiTexts.forEach( (textObj, idx) => { - if (prevText) { - if (Math.abs(textObj.y - prevText.y) <= 9) { - prevText.str += textObj.str; - } - else { - retVal += prevText.str + "\r\n"; - prevText = textObj; - } - } - else { - prevText = textObj; - } - - }); - if (prevText) { - retVal += prevText.str; - } - retVal += "\r\n----------------Page (" + index + ") Break----------------\r\n"; - }); - - return retVal; - } - - getAllFieldsTypes() { - return PDFField.getAllFieldsTypes({Pages:this.pages || []}); - } - - getMergedTextBlocksIfNeeded() { - for (let p = 0; p < this.pages.length; p++) { - let prevText = null; - let page = this.pages[p]; - - page.Texts.sort(PDFFont.compareBlockPos); - page.Texts = page.Texts.filter( (t, j) => { - let isDup = (j > 0) && PDFFont.areDuplicateBlocks(page.Texts[j-1], t); - if (isDup) { - nodeUtil.p2jinfo("skipped: dup text block: " + decodeURIComponent(t.R[0].T)); - } - return !isDup; - }); - - for (let i = 0; i < page.Texts.length; i++) { - let text = page.Texts[i]; - - if (prevText) { - if (PDFFont.areAdjacentBlocks(prevText, text) && PDFFont.haveSameStyle(prevText, text)) { - let preT = decodeURIComponent(prevText.R[0].T); - let curT = decodeURIComponent(text.R[0].T); - - prevText.R[0].T += text.R[0].T; - prevText.w += text.w; - text.merged = true; - - let mergedText = decodeURIComponent(prevText.R[0].T); - nodeUtil.p2jinfo(`merged text block: ${preT} + ${curT} => ${mergedText}`); - prevText = null; //yeah, only merge two blocks for now - } - else { - prevText = text; - } - } - else { - prevText = text; - } - } - - page.Texts = page.Texts.filter( t => !t.merged); - } - - return {Pages:this.pages}; - } - - destroy() { - this.removeAllListeners(); - - if (this.pdfDocument) - this.pdfDocument.destroy(); - this.pdfDocument = null; - - this.pages = null; - this.rawTextContents = null; - } - -} \ No newline at end of file + } + + page.Texts = page.Texts.filter((t) => !t.merged); + } + + return { Pages: this.pages }; + } + + destroy() { + this.removeAllListeners(); + + if (this.pdfDocument) this.pdfDocument.destroy(); + this.pdfDocument = null; + + this.pages = null; + this.rawTextContents = null; + } +} diff --git a/package.json b/package.json index e3a48104..f5f88a26 100644 --- a/package.json +++ b/package.json @@ -31,6 +31,7 @@ "typings": "./pdfparser.d.ts", "scripts": { "test": "cd ./test && sh p2j.forms.sh", + "test:folder": "node ./test/index.cjs", "test-misc": "cd ./test && sh p2j.one.sh misc . \"Expected: 7 success, 3 fail exception with stack trace\" ", "parse": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form", "parse-s": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s", @@ -62,8 +63,10 @@ "devDependencies": { "@rollup/plugin-replace": "^5.0.2", "@types/node": "^20.3.1", + "lodash": "^4.17.21", "rollup": "^3.29.2", - "rollup-plugin-node-builtins": "^2.1.2" + "rollup-plugin-node-builtins": "^2.1.2", + "vows": "^0.8.3" }, "bundledDependencies": [ "@xmldom/xmldom" diff --git a/pdfparser.cjs b/pdfparser.cjs index 38761cac..606b1385 100644 --- a/pdfparser.cjs +++ b/pdfparser.cjs @@ -10,747 +10,861 @@ require('buffer'); var xmldom = require('@xmldom/xmldom'); var stream = require('stream'); -var _documentCurrentScript = typeof document !== 'undefined' ? document.currentScript : null; +var _documentCurrentScript = + typeof document !== 'undefined' ? document.currentScript : null; const kColors = [ - '#000000', // 0 - '#ffffff', // 1 - '#4c4c4c', // 2 - '#808080', // 3 - '#999999', // 4 - '#c0c0c0', // 5 - '#cccccc', // 6 - '#e5e5e5', // 7 - '#f2f2f2', // 8 - '#008000', // 9 - '#00ff00', // 10 - '#bfffa0', // 11 - '#ffd629', // 12 - '#ff99cc', // 13 - '#004080', // 14 - '#9fc0e1', // 15 - '#5580ff', // 16 - '#a9c9fa', // 17 - '#ff0080', // 18 - '#800080', // 19 - '#ffbfff', // 20 - '#e45b21', // 21 - '#ffbfaa', // 22 - '#008080', // 23 - '#ff0000', // 24 - '#fdc59f', // 25 - '#808000', // 26 - '#bfbf00', // 27 - '#824100', // 28 - '#007256', // 29 - '#008000', // 30 - '#000080', // Last + 1 - '#008080', // Last + 2 - '#800080', // Last + 3 - '#ff0000', // Last + 4 - '#0000ff', // Last + 5 - '#008000' // Last + 6 + '#000000', // 0 + '#ffffff', // 1 + '#4c4c4c', // 2 + '#808080', // 3 + '#999999', // 4 + '#c0c0c0', // 5 + '#cccccc', // 6 + '#e5e5e5', // 7 + '#f2f2f2', // 8 + '#008000', // 9 + '#00ff00', // 10 + '#bfffa0', // 11 + '#ffd629', // 12 + '#ff99cc', // 13 + '#004080', // 14 + '#9fc0e1', // 15 + '#5580ff', // 16 + '#a9c9fa', // 17 + '#ff0080', // 18 + '#800080', // 19 + '#ffbfff', // 20 + '#e45b21', // 21 + '#ffbfaa', // 22 + '#008080', // 23 + '#ff0000', // 24 + '#fdc59f', // 25 + '#808000', // 26 + '#bfbf00', // 27 + '#824100', // 28 + '#007256', // 29 + '#008000', // 30 + '#000080', // Last + 1 + '#008080', // Last + 2 + '#800080', // Last + 3 + '#ff0000', // Last + 4 + '#0000ff', // Last + 5 + '#008000', // Last + 6 ]; const kFontFaces = [ - "quicktype,arial,helvetica,sans-serif", // 00 - QuickType - sans-serif variable font - "quicktype condensed,arial narrow,arial,helvetica,sans-serif", // 01 - QuickType Condensed - thin sans-serif variable font - "quicktypepi,quicktypeiipi", // 02 - QuickType Pi - "quicktype mono,courier new,courier,monospace", // 03 - QuickType Mono - san-serif fixed font - "ocr-a,courier new,courier,monospace", // 04 - OCR-A - OCR readable san-serif fixed font - "ocr b mt,courier new,courier,monospace" // 05 - OCR-B MT - OCR readable san-serif fixed font - ]; - - const kFontStyles = [ - // Face Size Bold Italic StyleID(Comment) - // ----- ---- ---- ----- ----------------- - [0, 6, 0, 0], //00 - [0, 8, 0, 0], //01 - [0, 10, 0, 0], //02 - [0, 12, 0, 0], //03 - [0, 14, 0, 0], //04 - [0, 18, 0, 0], //05 - [0, 6, 1, 0], //06 - [0, 8, 1, 0], //07 - [0, 10, 1, 0], //08 - [0, 12, 1, 0], //09 - [0, 14, 1, 0], //10 - [0, 18, 1, 0], //11 - [0, 6, 0, 1], //12 - [0, 8, 0, 1], //13 - [0, 10, 0, 1], //14 - [0, 12, 0, 1], //15 - [0, 14, 0, 1], //16 - [0, 18, 0, 1], //17 - [0, 6, 1, 1], //18 - [0, 8, 1, 1], //19 - [0, 10, 1, 1], //20 - [0, 12, 1, 1], //21 - [0, 14, 1, 1], //22 - [0, 18, 1, 1], //23 - [1, 6, 0, 0], //24 - [1, 8, 0, 0], //25 - [1, 10, 0, 0], //26 - [1, 12, 0, 0], //27 - [1, 14, 0, 0], //28 - [1, 18, 0, 0], //29 - [1, 6, 1, 0], //30 - [1, 8, 1, 0], //31 - [1, 10, 1, 0], //32 - [1, 12, 1, 0], //33 - [1, 14, 1, 0], //34 - [1, 18, 1, 0], //35 - [1, 6, 0, 1], //36 - [1, 8, 0, 1], //37 - [1, 10, 0, 1], //38 - [1, 12, 0, 1], //39 - [1, 14, 0, 1], //40 - [1, 18, 0, 1], //41 - [2, 8, 0, 0], //42 - [2, 10, 0, 0], //43 - [2, 12, 0, 0], //44 - [2, 14, 0, 0], //45 - [2, 18, 0, 0], //46 - [3, 8, 0, 0], //47 - [3, 10, 0, 0], //48 - [3, 12, 0, 0], //49 - [4, 12, 0, 0], //50 - [0, 9, 0, 0], //51 - [0, 9, 1, 0], //52 - [0, 9, 0, 1], //53 - [0, 9, 1, 1], //54 - [1, 9, 0, 0], //55 - [1, 9, 1, 0], //56 - [1, 9, 1, 1], //57 - [4, 10, 0, 0], //58 - [5, 10, 0, 0], //59 - [5, 12, 0, 0] //60 + 'quicktype,arial,helvetica,sans-serif', // 00 - QuickType - sans-serif variable font + 'quicktype condensed,arial narrow,arial,helvetica,sans-serif', // 01 - QuickType Condensed - thin sans-serif variable font + 'quicktypepi,quicktypeiipi', // 02 - QuickType Pi + 'quicktype mono,courier new,courier,monospace', // 03 - QuickType Mono - san-serif fixed font + 'ocr-a,courier new,courier,monospace', // 04 - OCR-A - OCR readable san-serif fixed font + 'ocr b mt,courier new,courier,monospace', // 05 - OCR-B MT - OCR readable san-serif fixed font +]; + +const kFontStyles = [ + // Face Size Bold Italic StyleID(Comment) + // ----- ---- ---- ----- ----------------- + [0, 6, 0, 0], //00 + [0, 8, 0, 0], //01 + [0, 10, 0, 0], //02 + [0, 12, 0, 0], //03 + [0, 14, 0, 0], //04 + [0, 18, 0, 0], //05 + [0, 6, 1, 0], //06 + [0, 8, 1, 0], //07 + [0, 10, 1, 0], //08 + [0, 12, 1, 0], //09 + [0, 14, 1, 0], //10 + [0, 18, 1, 0], //11 + [0, 6, 0, 1], //12 + [0, 8, 0, 1], //13 + [0, 10, 0, 1], //14 + [0, 12, 0, 1], //15 + [0, 14, 0, 1], //16 + [0, 18, 0, 1], //17 + [0, 6, 1, 1], //18 + [0, 8, 1, 1], //19 + [0, 10, 1, 1], //20 + [0, 12, 1, 1], //21 + [0, 14, 1, 1], //22 + [0, 18, 1, 1], //23 + [1, 6, 0, 0], //24 + [1, 8, 0, 0], //25 + [1, 10, 0, 0], //26 + [1, 12, 0, 0], //27 + [1, 14, 0, 0], //28 + [1, 18, 0, 0], //29 + [1, 6, 1, 0], //30 + [1, 8, 1, 0], //31 + [1, 10, 1, 0], //32 + [1, 12, 1, 0], //33 + [1, 14, 1, 0], //34 + [1, 18, 1, 0], //35 + [1, 6, 0, 1], //36 + [1, 8, 0, 1], //37 + [1, 10, 0, 1], //38 + [1, 12, 0, 1], //39 + [1, 14, 0, 1], //40 + [1, 18, 0, 1], //41 + [2, 8, 0, 0], //42 + [2, 10, 0, 0], //43 + [2, 12, 0, 0], //44 + [2, 14, 0, 0], //45 + [2, 18, 0, 0], //46 + [3, 8, 0, 0], //47 + [3, 10, 0, 0], //48 + [3, 12, 0, 0], //49 + [4, 12, 0, 0], //50 + [0, 9, 0, 0], //51 + [0, 9, 1, 0], //52 + [0, 9, 0, 1], //53 + [0, 9, 1, 1], //54 + [1, 9, 0, 0], //55 + [1, 9, 1, 0], //56 + [1, 9, 1, 1], //57 + [4, 10, 0, 0], //58 + [5, 10, 0, 0], //59 + [5, 12, 0, 0], //60 ]; const dpi = 96.0; const gridXPerInch = 4.0; const gridYPerInch = 4.0; -const _pixelXPerGrid = dpi/gridXPerInch; -const _pixelYPerGrid = dpi/gridYPerInch; -const _pixelPerPoint = dpi/72; - +const _pixelXPerGrid = dpi / gridXPerInch; +const _pixelYPerGrid = dpi / gridYPerInch; +const _pixelPerPoint = dpi / 72; + class PDFUnit { - static toFixedFloat(fNum) { - return parseFloat(fNum.toFixed(3)); - } - - static colorCount() { - return kColors.length; - } - - static toPixelX(formX) { - return Math.round(formX * _pixelXPerGrid); - } - - static toPixelY(formY) { - return Math.round(formY * _pixelYPerGrid); - } - - static pointToPixel(point) {// Point unit (1/72 an inch) to pixel units - return point * _pixelPerPoint; - } - - static getColorByIndex(clrId) { - return kColors[clrId]; - } - - static toFormPoint(viewportX, viewportY) { - return [(viewportX / _pixelXPerGrid), (viewportY / _pixelYPerGrid)]; - } - - static toFormX(viewportX) { - return PDFUnit.toFixedFloat(viewportX / _pixelXPerGrid); - } - - static toFormY(viewportY) { - return PDFUnit.toFixedFloat(viewportY / _pixelYPerGrid); - } - - static findColorIndex(color) { - if (color.length === 4) - color += "000"; - //MQZ. 07/29/2013: if color is not in dictionary, just return -1. The caller (pdffont, pdffill) will set the actual color - return kColors.indexOf(color); - } - - static dateToIso8601(date) { - // PDF spec p.160 - if (date.slice(0, 2) === 'D:') { // D: prefix is optional - date = date.slice(2); - } - let tz = 'Z'; - let idx = date.search(/[Z+-]/); // timezone is optional - if (idx >= 0) { - tz = date.slice(idx); - if (tz !== 'Z') { // timezone format OHH'mm' - tz = tz.slice(0, 3) + ':' + tz.slice(4, 6); - } - date = date.slice(0, idx); - } - let yr = date.slice(0, 4); // everything after year is optional - let mth = date.slice(4, 6) || '01'; - let day = date.slice(6, 8) || '01'; - let hr = date.slice(8, 10) || '00'; - let min = date.slice(10, 12) || '00'; - let sec = date.slice(12, 14) || '00'; - return yr + '-' + mth + '-' + day + 'T' + hr + ':' + min + ':' + sec + tz; - } + static toFixedFloat(fNum) { + return parseFloat(fNum.toFixed(3)); + } + + static colorCount() { + return kColors.length; + } + + static toPixelX(formX) { + return Math.round(formX * _pixelXPerGrid); + } + + static toPixelY(formY) { + return Math.round(formY * _pixelYPerGrid); + } + + static pointToPixel(point) { + // Point unit (1/72 an inch) to pixel units + return point * _pixelPerPoint; + } + + static getColorByIndex(clrId) { + return kColors[clrId]; + } + + static toFormPoint(viewportX, viewportY) { + return [viewportX / _pixelXPerGrid, viewportY / _pixelYPerGrid]; + } + + static toFormX(viewportX) { + return PDFUnit.toFixedFloat(viewportX / _pixelXPerGrid); + } + + static toFormY(viewportY) { + return PDFUnit.toFixedFloat(viewportY / _pixelYPerGrid); + } + + static findColorIndex(color) { + if (color.length === 4) color += '000'; + //MQZ. 07/29/2013: if color is not in dictionary, just return -1. The caller (pdffont, pdffill) will set the actual color + return kColors.indexOf(color); + } + + static dateToIso8601(date) { + // PDF spec p.160 + if (date.slice(0, 2) === 'D:') { + // D: prefix is optional + date = date.slice(2); + } + let tz = 'Z'; + let idx = date.search(/[Z+-]/); // timezone is optional + if (idx >= 0) { + tz = date.slice(idx); + if (tz !== 'Z') { + // timezone format OHH'mm' + tz = tz.slice(0, 3) + ':' + tz.slice(4, 6); + } + date = date.slice(0, idx); + } + let yr = date.slice(0, 4); // everything after year is optional + let mth = date.slice(4, 6) || '01'; + let day = date.slice(6, 8) || '01'; + let hr = date.slice(8, 10) || '00'; + let min = date.slice(10, 12) || '00'; + let sec = date.slice(12, 14) || '00'; + return yr + '-' + mth + '-' + day + 'T' + hr + ':' + min + ':' + sec + tz; + } } class PDFLine { - constructor(x1, y1, x2, y2, lineWidth, color, dashed) { - this.x1 = x1; - this.y1 = y1; - this.x2 = x2; - this.y2 = y2; - this.lineWidth = lineWidth || 1.0; - this.color = color; - this.dashed = dashed; - } - - #setStartPoint(oneLine, x, y) { - oneLine.x = PDFUnit.toFormX(x); - oneLine.y = PDFUnit.toFormY(y); - } - - processLine(targetData) { - const xDelta = Math.abs(this.x2 - this.x1); - const yDelta = Math.abs(this.y2 - this.y1); - const minDelta = this.lineWidth; - - let oneLine = {x:0, y:0, w: PDFUnit.toFixedFloat(this.lineWidth), l:0}; - - //MQZ Aug.28.2013, adding color support, using color dictionary and default to black - const clrId = PDFUnit.findColorIndex(this.color); - const colorObj = (clrId > 0 && clrId < PDFUnit.colorCount()) ? {clr: clrId} : {oc: this.color}; - oneLine = {...oneLine, ...colorObj}; - - //MQZ Aug.29 dashed line support - if (this.dashed) { - oneLine = oneLine = {...oneLine, dsh: 1}; - } - - if ((yDelta < this.lineWidth) && (xDelta > minDelta)) { //HLine - if (this.lineWidth < 4 && (xDelta / this.lineWidth < 4)) { - nodeUtil.p2jinfo("Skipped: short thick HLine: lineWidth = " + this.lineWidth + ", xDelta = " + xDelta); - return; //skip short thick lines, like PA SPP lines behinds checkbox - } - - oneLine.l = PDFUnit.toFormX(xDelta); - if (this.x1 > this.x2) - this.#setStartPoint(oneLine, this.x2, this.y2); - else - this.#setStartPoint(oneLine, this.x1, this.y1); - targetData.HLines.push(oneLine); - } - else if ((xDelta < this.lineWidth) && (yDelta > minDelta)) {//VLine - if (this.lineWidth < 4 && (yDelta / this.lineWidth < 4)) { - nodeUtil.p2jinfo("Skipped: short thick VLine: lineWidth = " + this.lineWidth + ", yDelta = " + yDelta); - return; //skip short think lines, like PA SPP lines behinds checkbox - } - - oneLine.l = PDFUnit.toFormY(yDelta); - if (this.y1 > this.y2) - this.#setStartPoint(oneLine, this.x2, this.y2); - else - this.#setStartPoint(oneLine, this.x1, this.y1); - targetData.VLines.push(oneLine); - } - } -} - -class PDFFill{ - // constructor - constructor(x, y, width, height, color) { - this.x = x; - this.y = y; - this.width = width; - this.height = height; - this.color = color; - } - - processFill(targetData) { - //MQZ.07/29/2013: when color is not in color dictionary, set the original color (oc) - const clrId = PDFUnit.findColorIndex(this.color); - const colorObj = (clrId > 0 && clrId < PDFUnit.colorCount()) ? {clr: clrId} : {oc: this.color}; - - const oneFill = {x:PDFUnit.toFormX(this.x), - y:PDFUnit.toFormY(this.y), - w:PDFUnit.toFormX(this.width), - h:PDFUnit.toFormY(this.height), - ...colorObj}; - - - if (oneFill.w < 2 && oneFill.h < 2) { - nodeUtil.p2jinfo("Skipped: tiny fill: " + oneFill.w + " x " + oneFill.h); + constructor(x1, y1, x2, y2, lineWidth, color, dashed) { + this.x1 = x1; + this.y1 = y1; + this.x2 = x2; + this.y2 = y2; + this.lineWidth = lineWidth || 1.0; + this.color = color; + this.dashed = dashed; + } + + #setStartPoint(oneLine, x, y) { + oneLine.x = PDFUnit.toFormX(x); + oneLine.y = PDFUnit.toFormY(y); + } + + processLine(targetData) { + const xDelta = Math.abs(this.x2 - this.x1); + const yDelta = Math.abs(this.y2 - this.y1); + const minDelta = this.lineWidth; + + let oneLine = { + x: 0, + y: 0, + w: PDFUnit.toFixedFloat(this.lineWidth), + l: 0, + }; + + //MQZ Aug.28.2013, adding color support, using color dictionary and default to black + const clrId = PDFUnit.findColorIndex(this.color); + const colorObj = + clrId > 0 && clrId < PDFUnit.colorCount() + ? { clr: clrId } + : { oc: this.color }; + oneLine = { ...oneLine, ...colorObj }; + + //MQZ Aug.29 dashed line support + if (this.dashed) { + oneLine = oneLine = { ...oneLine, dsh: 1 }; + } + + if (yDelta < this.lineWidth && xDelta > minDelta) { + //HLine + if (this.lineWidth < 4 && xDelta / this.lineWidth < 4) { + nodeUtil.p2jinfo( + 'Skipped: short thick HLine: lineWidth = ' + + this.lineWidth + + ', xDelta = ' + + xDelta + ); return; //skip short thick lines, like PA SPP lines behinds checkbox - } + } + + oneLine.l = PDFUnit.toFormX(xDelta); + if (this.x1 > this.x2) this.#setStartPoint(oneLine, this.x2, this.y2); + else this.#setStartPoint(oneLine, this.x1, this.y1); + targetData.HLines.push(oneLine); + } else if (xDelta < this.lineWidth && yDelta > minDelta) { + //VLine + if (this.lineWidth < 4 && yDelta / this.lineWidth < 4) { + nodeUtil.p2jinfo( + 'Skipped: short thick VLine: lineWidth = ' + + this.lineWidth + + ', yDelta = ' + + yDelta + ); + return; //skip short think lines, like PA SPP lines behinds checkbox + } + + oneLine.l = PDFUnit.toFormY(yDelta); + if (this.y1 > this.y2) this.#setStartPoint(oneLine, this.x2, this.y2); + else this.#setStartPoint(oneLine, this.x1, this.y1); + targetData.VLines.push(oneLine); + } + } +} - targetData.Fills.push(oneFill); - } +class PDFFill { + // constructor + constructor(x, y, width, height, color) { + this.x = x; + this.y = y; + this.width = width; + this.height = height; + this.color = color; + } + + processFill(targetData) { + //MQZ.07/29/2013: when color is not in color dictionary, set the original color (oc) + const clrId = PDFUnit.findColorIndex(this.color); + const colorObj = + clrId > 0 && clrId < PDFUnit.colorCount() + ? { clr: clrId } + : { oc: this.color }; + + const oneFill = { + x: PDFUnit.toFormX(this.x), + y: PDFUnit.toFormY(this.y), + w: PDFUnit.toFormX(this.width), + h: PDFUnit.toFormY(this.height), + ...colorObj, + }; + + if (oneFill.w < 2 && oneFill.h < 2) { + nodeUtil.p2jinfo( + 'Skipped: tiny fill: ' + oneFill.w + ' x ' + oneFill.h + ); + return; //skip short thick lines, like PA SPP lines behinds checkbox + } + + targetData.Fills.push(oneFill); + } } -const _boldSubNames = ["bd", "bold", "demi", "black"]; -const _stdFonts = ["arial", "helvetica", "sans-serif ", "courier ","monospace ", "ocr "]; +const _boldSubNames = ['bd', 'bold', 'demi', 'black']; +const _stdFonts = [ + 'arial', + 'helvetica', + 'sans-serif ', + 'courier ', + 'monospace ', + 'ocr ', +]; const DISTANCE_DELTA = 0.1; class PDFFont { - #initTypeName() { - let typeName = (this.fontObj.name || this.fontObj.fallbackName); - if (!typeName) { - typeName = kFontFaces[0]; //default font family name - } - typeName = typeName.toLowerCase(); - return typeName; - } - - #initSubType() { - let subType = this.typeName; - let bold = false; - - let nameArray = this.typeName.split('+'); - if (Array.isArray(nameArray) && nameArray.length > 1) { - subType = nameArray[1].split("-"); - if (Array.isArray(subType) && subType.length > 1) { - let subName = subType[1].toLowerCase(); - bold = _boldSubNames.indexOf(subName) >= 0; - subType = subType[0]; - } - } - return {subType, bold}; - } - - #initSymbol() { - let isSymbol = this.typeName.indexOf("symbol") > 0 || kFontFaces[2].indexOf(this.subType) >= 0; - if (this.fontObj.isSymbolicFont) { - let mFonts = _stdFonts.filter( (oneName) => (this.typeName.indexOf(oneName) >= 0) ); - - if (mFonts.length > 0) { - this.fontObj.isSymbolicFont = false; //lots of Arial-based font is detected as symbol in VA forms (301, 76-c, etc.) reset the flag for now - nodeUtil.p2jinfo("Reset: isSymbolicFont (false) for " + this.fontObj.name); - } - } - else { - if (isSymbol) { - this.fontObj.isSymbolicFont = true; //text pdf: va_ind_760c - nodeUtil.p2jinfo("Reset: isSymbolicFont (true) for " + this.fontObj.name); - } - } - return isSymbol; - } - - #initSpaceWidth() { - let spaceWidth = this.fontObj.spaceWidth; - if (!spaceWidth) { - var spaceId = Array.isArray(this.fontObj.toFontChar) ? this.fontObj.toFontChar.indexOf(32) : -1; - spaceWidth = (spaceId >= 0 && Array.isArray(this.fontObj.widths)) ? this.fontObj.widths[spaceId] : 250; - } - spaceWidth = PDFUnit.toFormX(spaceWidth) / 32; - return spaceWidth; - } - - // constructor - constructor(fontObj) { - this.fontObj = fontObj; - - this.typeName = this.#initTypeName(); - - const {subType, bold} = this.#initSubType(); - this.subType = subType; - this.bold = bold; - - this.isSymbol = this.#initSymbol(); - this.spaceWidth = this.#initSpaceWidth(); - - this.fontSize = 1; - this.faceIdx = 0; - this.italic = false; - this.fontStyleId = -1; - } - - /** sort text blocks by y then x */ - static compareBlockPos(t1, t2) { - if (t1.y < t2.y - DISTANCE_DELTA) { + #initTypeName() { + let typeName = this.fontObj.name || this.fontObj.fallbackName; + if (!typeName) { + typeName = kFontFaces[0]; //default font family name + } + typeName = typeName.toLowerCase(); + return typeName; + } + + #initSubType() { + let subType = this.typeName; + let bold = false; + + let nameArray = this.typeName.split('+'); + if (Array.isArray(nameArray) && nameArray.length > 1) { + subType = nameArray[1].split('-'); + if (Array.isArray(subType) && subType.length > 1) { + let subName = subType[1].toLowerCase(); + bold = _boldSubNames.indexOf(subName) >= 0; + subType = subType[0]; + } + } + return { subType, bold }; + } + + #initSymbol() { + let isSymbol = + this.typeName.indexOf('symbol') > 0 || + kFontFaces[2].indexOf(this.subType) >= 0; + if (this.fontObj.isSymbolicFont) { + let mFonts = _stdFonts.filter( + (oneName) => this.typeName.indexOf(oneName) >= 0 + ); + + if (mFonts.length > 0) { + this.fontObj.isSymbolicFont = false; //lots of Arial-based font is detected as symbol in VA forms (301, 76-c, etc.) reset the flag for now + nodeUtil.p2jinfo( + 'Reset: isSymbolicFont (false) for ' + this.fontObj.name + ); + } + } else { + if (isSymbol) { + this.fontObj.isSymbolicFont = true; //text pdf: va_ind_760c + nodeUtil.p2jinfo( + 'Reset: isSymbolicFont (true) for ' + this.fontObj.name + ); + } + } + return isSymbol; + } + + #initSpaceWidth() { + let spaceWidth = this.fontObj.spaceWidth; + if (!spaceWidth) { + var spaceId = Array.isArray(this.fontObj.toFontChar) + ? this.fontObj.toFontChar.indexOf(32) + : -1; + spaceWidth = + spaceId >= 0 && Array.isArray(this.fontObj.widths) + ? this.fontObj.widths[spaceId] + : 250; + } + spaceWidth = PDFUnit.toFormX(spaceWidth) / 32; + return spaceWidth; + } + + // constructor + constructor(fontObj) { + this.fontObj = fontObj; + + this.typeName = this.#initTypeName(); + + const { subType, bold } = this.#initSubType(); + this.subType = subType; + this.bold = bold; + + this.isSymbol = this.#initSymbol(); + this.spaceWidth = this.#initSpaceWidth(); + + this.fontSize = 1; + this.faceIdx = 0; + this.italic = false; + this.fontStyleId = -1; + } + + /** sort text blocks by y then x */ + static compareBlockPos(t1, t2) { + if (t1.y < t2.y - DISTANCE_DELTA) { + return -1; + } + if (Math.abs(t1.y - t2.y) <= DISTANCE_DELTA) { + if (t1.x < t2.x - DISTANCE_DELTA) { return -1; - } - if (Math.abs(t1.y - t2.y) <= DISTANCE_DELTA) { - if (t1.x < t2.x - DISTANCE_DELTA) { - return -1; - } - if (Math.abs(t1.x - t2.x) <= DISTANCE_DELTA) { - return 0; - } - } - return 1; - } - - static haveSameStyle(t1, t2) { - let retVal = t1.R[0].S === t2.R[0].S; - if (retVal && t1.R[0].S < 0) { - for (let i = 0; i < t1.R[0].TS.length; i++) { - if (t1.R[0].TS[i] !== t2.R[0].TS[i]) { - retVal = false; - break; - } - } - } - if (retVal) { // make sure both block are not rotated - retVal = (typeof t1.R[0].RA === 'undefined') && (typeof t2.R[0].RA === 'undefined'); - } - - return retVal; - } - - static getSpaceThreshHold(t1) { - return (PDFFont.getFontSize(t1)/12) * t1.sw; - } - - static areAdjacentBlocks(t1, t2) { - const isInSameLine = Math.abs(t1.y - t2.y) <= DISTANCE_DELTA; - const isDistanceSmallerThanASpace = ((t2.x - t1.x - t1.w) < PDFFont.getSpaceThreshHold(t1)); - - return isInSameLine && isDistanceSmallerThanASpace; - } - - static getFontSize(textBlock) { - const sId = textBlock.R[0].S; - return (sId < 0) ? textBlock.R[0].TS[1] : kFontStyles[sId][1]; - } - - static areDuplicateBlocks(t1, t2) { - return t1.x == t2.x && t1.y == t2.y && t1.R[0].T == t2.R[0].T && PDFFont.haveSameStyle(t1, t2); - } - - // private - #setFaceIndex() { - const fontObj = this.fontObj; - - this.bold = fontObj.bold; - if (!this.bold) { - this.bold = this.typeName.indexOf("bold") >= 0 || this.typeName.indexOf("black") >= 0; - } - this.italic = fontObj.italic; // fix https://github.com/modesty/pdf2json/issues/42 - // Extended the fix for https://github.com/modesty/pdf2json/issues/42 - if (!this.italic) { - this.italic = this.typeName.indexOf("italic") >= 0 || this.typeName.indexOf("oblique") >= 0; - } - // Added detection of hybrid dual bolditalic fonts - if (((!this.bold) || (!this.italic)) && (this.typeName.indexOf("boldobl") >= 0)) { - this.bold = true; - this.italic = true; - } - - let typeName = this.subType; - if (fontObj.isSerifFont) { - if (kFontFaces[1].indexOf(typeName) >= 0) - this.faceIdx = 1; - } - else if (kFontFaces[2].indexOf(this.subType) >= 0) { - this.faceIdx = 2; - } - else if (fontObj.isMonospace) { - this.faceIdx = 3; - - if (kFontFaces[4].indexOf(typeName) >= 0) - this.faceIdx = 4; - else if (kFontFaces[5].indexOf(typeName) >= 0) - this.faceIdx = 5; - } - else if (fontObj.isSymbolicFont) { - this.faceIdx = 2; - } - - if (this.faceIdx == 0) { - if (this.typeName.indexOf("narrow") > 0) - this.faceIdx = 1; - } - -// nodeUtil.p2jinfo"typeName = " + typeName + " => faceIdx = " + this.faceIdx); - } - - #getFontStyleIndex(fontSize) { - this.#setFaceIndex(); - - //MQZ Feb.28.2013. Adjust bold text fontsize to work around word spacing issue - this.fontSize = (this.bold && (fontSize > 12)) ? fontSize + 1 : fontSize; - - let fsa = [this.faceIdx, this.fontSize, this.bold?1:0, this.italic?1:0]; - let retVal = -1; - - kFontStyles.forEach(function(element, index, list){ - if (retVal === -1) { - if (element[0] === fsa[0] && element[1] === fsa[1] && - element[2] === fsa[2] && element[3] === fsa[3]) { - retVal = index; - } - } - }); - - return retVal; - } - - #processSymbolicFont(str) { - let retVal = str; - - if (!str || str.length !== 1) - return retVal; - - if (!this.fontObj.isSymbolicFont || !this.isSymbol) { - if (retVal == "C" || retVal == "G") { //prevent symbolic encoding from the client - retVal = " " + retVal + " "; //sample: va_ind_760c - } - return retVal; - } - - switch(str.charCodeAt(0)) { - case 20: retVal = '\u2713'; break; //check mark - case 70: retVal = (this.fontObj.type === "CIDFontType0") ? '\u26A0' : '\u007D'; break; //exclaimation in triangle OR right curly bracket - case 71: retVal = '\u25b6'; break; //right triangle - case 97: retVal = '\u25b6'; break; //right triangle - case 99: retVal = this.isSymbol ? '\u2022' : '\u25b2'; break; //up triangle. set to Bullet Dot for VA SchSCR - case 100: retVal = '\u25bc'; break; //down triangle - case 103: retVal = '\u27A8'; break; //right arrow. sample: va_ind_760pff and pmt - case 106: retVal = ''; break; //VA 301: string j character by the checkbox, hide it for now - case 114: retVal = '\u2022'; break; //Bullet dot - case 115: retVal = '\u25b2'; break; //up triangle - case 116: retVal = '\u2022'; break; //Bullet dot - case 118: retVal = '\u2022'; break; //Bullet dot - default: - nodeUtil.p2jinfo(this.fontObj.type + " - SymbolicFont - (" + this.fontObj.name + ") : " + - str.charCodeAt(0) + "::" + str.charCodeAt(1) + " => " + retVal); - } - - return retVal; - } - - #textRotationAngle(matrix2D) { - let retVal = 0; - if (matrix2D[0][0] === 0 && matrix2D[1][1] === 0) { - if (matrix2D[0][1] != 0 && matrix2D[1][0] != 0) { - if ((matrix2D[0][1] / matrix2D[1][0]) + 1 < 0.0001) - retVal = 90; + } + if (Math.abs(t1.x - t2.x) <= DISTANCE_DELTA) { + return 0; + } + } + return 1; + } + + static haveSameStyle(t1, t2) { + let retVal = t1.R[0].S === t2.R[0].S; + if (retVal && t1.R[0].S < 0) { + for (let i = 0; i < t1.R[0].TS.length; i++) { + if (t1.R[0].TS[i] !== t2.R[0].TS[i]) { + retVal = false; + break; } - } - else if (matrix2D[0][0] !== 0 && matrix2D[1][1] !== 0) { - let r1 = Math.atan(-matrix2D[0][1] / matrix2D[0][0]); - let r2 = Math.atan(matrix2D[1][0] / matrix2D[1][1]); - if (Math.abs(r1) > 0.0001 && (r1 - r2 < 0.0001)) { - retVal = r1 * 180 / Math.PI; + } + } + if (retVal) { + // make sure both block are not rotated + retVal = + typeof t1.R[0].RA === 'undefined' && + typeof t2.R[0].RA === 'undefined'; + } + + return retVal; + } + + static getSpaceThreshHold(t1) { + return (PDFFont.getFontSize(t1) / 12) * t1.sw; + } + + static areAdjacentBlocks(t1, t2) { + const isInSameLine = Math.abs(t1.y - t2.y) <= DISTANCE_DELTA; + const isDistanceSmallerThanASpace = + t2.x - t1.x - t1.w < PDFFont.getSpaceThreshHold(t1); + + return isInSameLine && isDistanceSmallerThanASpace; + } + + static getFontSize(textBlock) { + const sId = textBlock.R[0].S; + return sId < 0 ? textBlock.R[0].TS[1] : kFontStyles[sId][1]; + } + + static areDuplicateBlocks(t1, t2) { + return ( + t1.x == t2.x && + t1.y == t2.y && + t1.R[0].T == t2.R[0].T && + PDFFont.haveSameStyle(t1, t2) + ); + } + + // private + #setFaceIndex() { + const fontObj = this.fontObj; + + this.bold = fontObj.bold; + if (!this.bold) { + this.bold = + this.typeName.indexOf('bold') >= 0 || + this.typeName.indexOf('black') >= 0; + } + this.italic = fontObj.italic; // fix https://github.com/modesty/pdf2json/issues/42 + // Extended the fix for https://github.com/modesty/pdf2json/issues/42 + if (!this.italic) { + this.italic = + this.typeName.indexOf('italic') >= 0 || + this.typeName.indexOf('oblique') >= 0; + } + // Added detection of hybrid dual bolditalic fonts + if ( + (!this.bold || !this.italic) && + this.typeName.indexOf('boldobl') >= 0 + ) { + this.bold = true; + this.italic = true; + } + + let typeName = this.subType; + if (fontObj.isSerifFont) { + if (kFontFaces[1].indexOf(typeName) >= 0) this.faceIdx = 1; + } else if (kFontFaces[2].indexOf(this.subType) >= 0) { + this.faceIdx = 2; + } else if (fontObj.isMonospace) { + this.faceIdx = 3; + + if (kFontFaces[4].indexOf(typeName) >= 0) this.faceIdx = 4; + else if (kFontFaces[5].indexOf(typeName) >= 0) this.faceIdx = 5; + } else if (fontObj.isSymbolicFont) { + this.faceIdx = 2; + } + + if (this.faceIdx == 0) { + if (this.typeName.indexOf('narrow') > 0) this.faceIdx = 1; + } + + // nodeUtil.p2jinfo"typeName = " + typeName + " => faceIdx = " + this.faceIdx); + } + + #getFontStyleIndex(fontSize) { + this.#setFaceIndex(); + + //MQZ Feb.28.2013. Adjust bold text fontsize to work around word spacing issue + this.fontSize = this.bold && fontSize > 12 ? fontSize + 1 : fontSize; + + let fsa = [ + this.faceIdx, + this.fontSize, + this.bold ? 1 : 0, + this.italic ? 1 : 0, + ]; + let retVal = -1; + + kFontStyles.forEach(function (element, index, list) { + if (retVal === -1) { + if ( + element[0] === fsa[0] && + element[1] === fsa[1] && + element[2] === fsa[2] && + element[3] === fsa[3] + ) { + retVal = index; } - } - return retVal; - } - - // public instance methods - processText(p, str, maxWidth, color, fontSize, targetData, matrix2D) { - const text = this.#processSymbolicFont(str); - if (!text) { - return; - } - this.fontStyleId = this.#getFontStyleIndex(fontSize); - - // when this.fontStyleId === -1, it means the text style doesn't match any entry in the dictionary - // adding TS to better describe text style [fontFaceId, fontSize, 1/0 for bold, 1/0 for italic]; - const TS = [this.faceIdx, this.fontSize, this.bold?1:0, this.italic?1:0]; - - const clrId = PDFUnit.findColorIndex(color); - const colorObj = (clrId >= 0 && clrId < PDFUnit.colorCount()) ? {clr: clrId} : {oc: color}; - - let textRun = { - T: this.flash_encode(text), - S: this.fontStyleId, - TS: TS - }; - const rAngle = this.#textRotationAngle(matrix2D); - if (rAngle != 0) { - nodeUtil.p2jinfo(str + ": rotated " + rAngle + " degree."); - textRun = {...textRun, RA: rAngle}; - } - - const oneText = {x: PDFUnit.toFormX(p.x) - 0.25, - y: PDFUnit.toFormY(p.y) - 0.75, - w: PDFUnit.toFixedFloat(maxWidth), - ...colorObj, //MQZ.07/29/2013: when color is not in color dictionary, set the original color (oc) - sw: this.spaceWidth, //font space width, use to merge adjacent text blocks - A: "left", - R: [textRun] - }; - - targetData.Texts.push(oneText); - } - - flash_encode(str) { - let retVal = encodeURIComponent(str); - retVal = retVal.replace("%C2%96", "-"); - retVal = retVal.replace("%C2%91", "%27"); - retVal = retVal.replace("%C2%92", "%27"); - retVal = retVal.replace("%C2%82", "%27"); - retVal = retVal.replace("%C2%93", "%22"); - retVal = retVal.replace("%C2%94", "%22"); - retVal = retVal.replace("%C2%84", "%22"); - retVal = retVal.replace("%C2%8B", "%C2%AB"); - retVal = retVal.replace("%C2%9B", "%C2%BB"); - - return retVal; - } - - clean() { - this.fontObj = null; - delete this.fontObj; - } + } + }); + + return retVal; + } + + #processSymbolicFont(str) { + let retVal = str; + + if (!str || str.length !== 1) return retVal; + + if (!this.fontObj.isSymbolicFont || !this.isSymbol) { + if (retVal == 'C' || retVal == 'G') { + //prevent symbolic encoding from the client + retVal = ' ' + retVal + ' '; //sample: va_ind_760c + } + return retVal; + } + + switch (str.charCodeAt(0)) { + case 20: + retVal = '\u2713'; + break; //check mark + case 70: + retVal = this.fontObj.type === 'CIDFontType0' ? '\u26A0' : '\u007D'; + break; //exclaimation in triangle OR right curly bracket + case 71: + retVal = '\u25b6'; + break; //right triangle + case 97: + retVal = '\u25b6'; + break; //right triangle + case 99: + retVal = this.isSymbol ? '\u2022' : '\u25b2'; + break; //up triangle. set to Bullet Dot for VA SchSCR + case 100: + retVal = '\u25bc'; + break; //down triangle + case 103: + retVal = '\u27A8'; + break; //right arrow. sample: va_ind_760pff and pmt + case 106: + retVal = ''; + break; //VA 301: string j character by the checkbox, hide it for now + case 114: + retVal = '\u2022'; + break; //Bullet dot + case 115: + retVal = '\u25b2'; + break; //up triangle + case 116: + retVal = '\u2022'; + break; //Bullet dot + case 118: + retVal = '\u2022'; + break; //Bullet dot + default: + nodeUtil.p2jinfo( + this.fontObj.type + + ' - SymbolicFont - (' + + this.fontObj.name + + ') : ' + + str.charCodeAt(0) + + '::' + + str.charCodeAt(1) + + ' => ' + + retVal + ); + } + + return retVal; + } + + #textRotationAngle(matrix2D) { + let retVal = 0; + if (matrix2D[0][0] === 0 && matrix2D[1][1] === 0) { + if (matrix2D[0][1] != 0 && matrix2D[1][0] != 0) { + if (matrix2D[0][1] / matrix2D[1][0] + 1 < 0.0001) retVal = 90; + } + } else if (matrix2D[0][0] !== 0 && matrix2D[1][1] !== 0) { + let r1 = Math.atan(-matrix2D[0][1] / matrix2D[0][0]); + let r2 = Math.atan(matrix2D[1][0] / matrix2D[1][1]); + if (Math.abs(r1) > 0.0001 && r1 - r2 < 0.0001) { + retVal = (r1 * 180) / Math.PI; + } + } + return retVal; + } + + // public instance methods + processText(p, str, maxWidth, color, fontSize, targetData, matrix2D) { + const text = this.#processSymbolicFont(str); + if (!text) { + return; + } + this.fontStyleId = this.#getFontStyleIndex(fontSize); + + // when this.fontStyleId === -1, it means the text style doesn't match any entry in the dictionary + // adding TS to better describe text style [fontFaceId, fontSize, 1/0 for bold, 1/0 for italic]; + const TS = [ + this.faceIdx, + this.fontSize, + this.bold ? 1 : 0, + this.italic ? 1 : 0, + ]; + + const clrId = PDFUnit.findColorIndex(color); + const colorObj = + clrId >= 0 && clrId < PDFUnit.colorCount() + ? { clr: clrId } + : { oc: color }; + + let textRun = { + T: this.flash_encode(text), + S: this.fontStyleId, + TS: TS, + }; + const rAngle = this.#textRotationAngle(matrix2D); + if (rAngle != 0) { + nodeUtil.p2jinfo(str + ': rotated ' + rAngle + ' degree.'); + textRun = { ...textRun, RA: rAngle }; + } + + const oneText = { + x: PDFUnit.toFormX(p.x) - 0.25, + y: PDFUnit.toFormY(p.y) - 0.75, + w: PDFUnit.toFixedFloat(maxWidth), + ...colorObj, //MQZ.07/29/2013: when color is not in color dictionary, set the original color (oc) + sw: this.spaceWidth, //font space width, use to merge adjacent text blocks + A: 'left', + R: [textRun], + }; + + targetData.Texts.push(oneText); + } + + flash_encode(str) { + let retVal = encodeURIComponent(str); + retVal = retVal.replace('%C2%96', '-'); + retVal = retVal.replace('%C2%91', '%27'); + retVal = retVal.replace('%C2%92', '%27'); + retVal = retVal.replace('%C2%82', '%27'); + retVal = retVal.replace('%C2%93', '%22'); + retVal = retVal.replace('%C2%94', '%22'); + retVal = retVal.replace('%C2%84', '%22'); + retVal = retVal.replace('%C2%8B', '%C2%AB'); + retVal = retVal.replace('%C2%9B', '%C2%BB'); + + return retVal; + } + + clean() { + this.fontObj = null; + delete this.fontObj; + } } // alias some functions to make (compiled) code shorter -const {round: mr, sin: ms, cos: mc, abs, sqrt} = Math; +const { round: mr, sin: ms, cos: mc, abs, sqrt } = Math; // precompute "00" to "FF" const dec2hex = []; for (let i = 0; i < 16; i++) { - for (let j = 0; j < 16; j++) { - dec2hex[i * 16 + j] = i.toString(16) + j.toString(16); - } + for (let j = 0; j < 16; j++) { + dec2hex[i * 16 + j] = i.toString(16) + j.toString(16); + } } function createMatrixIdentity() { - return [ - [1, 0, 0], - [0, 1, 0], - [0, 0, 1] - ]; + return [ + [1, 0, 0], + [0, 1, 0], + [0, 0, 1], + ]; } function matrixMultiply(m1, m2) { - let result = createMatrixIdentity(); + let result = createMatrixIdentity(); - for (let x = 0; x < 3; x++) { - for (let y = 0; y < 3; y++) { - let sum = 0; + for (let x = 0; x < 3; x++) { + for (let y = 0; y < 3; y++) { + let sum = 0; - for (let z = 0; z < 3; z++) { - sum += m1[x][z] * m2[z][y]; - } + for (let z = 0; z < 3; z++) { + sum += m1[x][z] * m2[z][y]; + } - result[x][y] = sum; - } - } - return result; + result[x][y] = sum; + } + } + return result; } function copyState(o1, o2) { - o2.fillStyle = o1.fillStyle; - o2.lineCap = o1.lineCap; - o2.lineJoin = o1.lineJoin; - o2.lineWidth = o1.lineWidth; - o2.miterLimit = o1.miterLimit; - o2.shadowBlur = o1.shadowBlur; - o2.shadowColor = o1.shadowColor; - o2.shadowOffsetX = o1.shadowOffsetX; - o2.shadowOffsetY = o1.shadowOffsetY; - o2.strokeStyle = o1.strokeStyle; - o2.globalAlpha = o1.globalAlpha; - o2.arcScaleX_ = o1.arcScaleX_; - o2.arcScaleY_ = o1.arcScaleY_; - o2.lineScale_ = o1.lineScale_; - o2.dashArray = o1.dashArray; + o2.fillStyle = o1.fillStyle; + o2.lineCap = o1.lineCap; + o2.lineJoin = o1.lineJoin; + o2.lineWidth = o1.lineWidth; + o2.miterLimit = o1.miterLimit; + o2.shadowBlur = o1.shadowBlur; + o2.shadowColor = o1.shadowColor; + o2.shadowOffsetX = o1.shadowOffsetX; + o2.shadowOffsetY = o1.shadowOffsetY; + o2.strokeStyle = o1.strokeStyle; + o2.globalAlpha = o1.globalAlpha; + o2.arcScaleX_ = o1.arcScaleX_; + o2.arcScaleY_ = o1.arcScaleY_; + o2.lineScale_ = o1.lineScale_; + o2.dashArray = o1.dashArray; } function processStyle(styleString) { - let str, alpha = 1; - - styleString = String(styleString); - if (styleString.substring(0, 3) == 'rgb') { - let start = styleString.indexOf('(', 3); - let end = styleString.indexOf(')', start + 1); - let guts = styleString.substring(start + 1, end).split(','); - - str = '#'; - for (let i = 0; i < 3; i++) { - str += dec2hex[Number(guts[i])]; - } - - if (guts.length == 4 && styleString.substring(3, 4) == 'a') { - alpha = guts[3]; - } - } else { - str = styleString; - } - - return {color:str, alpha:alpha}; + let str, + alpha = 1; + + styleString = String(styleString); + if (styleString.substring(0, 3) == 'rgb') { + let start = styleString.indexOf('(', 3); + let end = styleString.indexOf(')', start + 1); + let guts = styleString.substring(start + 1, end).split(','); + + str = '#'; + for (let i = 0; i < 3; i++) { + str += dec2hex[Number(guts[i])]; + } + + if (guts.length == 4 && styleString.substring(3, 4) == 'a') { + alpha = guts[3]; + } + } else { + str = styleString; + } + + return { color: str, alpha: alpha }; +} + +function processLineCap(lineCap) { + switch (lineCap) { + case 'butt': + return 'flat'; + case 'round': + return 'round'; + case 'square': + default: + return 'square'; + } } // Helper function that takes the already fixed cordinates. function bezierCurveToHelper(self, cp1, cp2, p) { - self.currentPath_.push({ - type:'bezierCurveTo', - cp1x:cp1.x, - cp1y:cp1.y, - cp2x:cp2.x, - cp2y:cp2.y, - x:p.x, - y:p.y - }); - self.currentX_ = p.x; - self.currentY_ = p.y; + self.currentPath_.push({ + type: 'bezierCurveTo', + cp1x: cp1.x, + cp1y: cp1.y, + cp2x: cp2.x, + cp2y: cp2.y, + x: p.x, + y: p.y, + }); + self.currentX_ = p.x; + self.currentY_ = p.y; } function matrixIsFinite(m) { - for (let j = 0; j < 3; j++) { - for (let k = 0; k < 2; k++) { - if (!isFinite(m[j][k]) || isNaN(m[j][k])) { - return false; - } - } - } - return true; + for (let j = 0; j < 3; j++) { + for (let k = 0; k < 2; k++) { + if (!isFinite(m[j][k]) || isNaN(m[j][k])) { + return false; + } + } + } + return true; } function setM(ctx, m, updateLineScale) { - if (!matrixIsFinite(m)) { - return; - } - ctx.m_ = m; - - if (updateLineScale) { - // Get the line scale. - // Determinant of this.m_ means how much the area is enlarged by the - // transformation. So its square root can be used as a scale factor - // for width. - let det = m[0][0] * m[1][1] - m[0][1] * m[1][0]; - ctx.lineScale_ = sqrt(abs(det)); - } + if (!matrixIsFinite(m)) { + return; + } + ctx.m_ = m; + + if (updateLineScale) { + // Get the line scale. + // Determinant of this.m_ means how much the area is enlarged by the + // transformation. So its square root can be used as a scale factor + // for width. + let det = m[0][0] * m[1][1] - m[0][1] * m[1][0]; + ctx.lineScale_ = sqrt(abs(det)); + } } class CanvasPattern_ { - constructor() { - } + constructor() {} } // Gradient / Pattern Stubs class CanvasGradient_ { - constructor(aType) { - this.type_ = aType; - this.x0_ = 0; - this.y0_ = 0; - this.r0_ = 0; - this.x1_ = 0; - this.y1_ = 0; - this.r1_ = 0; - this.colors_ = []; - } - addColorStop(aOffset, aColor) { - aColor = processStyle(aColor); - this.colors_.push({offset:aOffset, - color:aColor.color, - alpha:aColor.alpha}); - } + constructor(aType) { + this.type_ = aType; + this.x0_ = 0; + this.y0_ = 0; + this.r0_ = 0; + this.x1_ = 0; + this.y1_ = 0; + this.r1_ = 0; + this.colors_ = []; + } + addColorStop(aOffset, aColor) { + aColor = processStyle(aColor); + this.colors_.push({ + offset: aOffset, + color: aColor.color, + alpha: aColor.alpha, + }); + } } - /** * This class implements CanvasRenderingContext2D interface as described by * the WHATWG. @@ -758,428 +872,464 @@ class CanvasGradient_ { * be associated with */ class CanvasRenderingContext2D_ { - constructor(canvasTarget, scaledWidth, scaledHeight) { - this.m_ = createMatrixIdentity(); - - this.mStack_ = []; - this.aStack_ = []; - this.currentPath_ = []; - - // Canvas context properties - this.strokeStyle = '#000'; - this.fillStyle = '#000'; - - this.lineWidth = 1; - this.lineJoin = 'miter'; - this.lineCap = 'butt'; - this.dashArray = []; - this.miterLimit = 1; - this.globalAlpha = 1; - - if (!("HLines" in canvasTarget) || !Array.isArray(canvasTarget.HLines)) - canvasTarget.HLines = []; - if (!("VLines" in canvasTarget) || !Array.isArray(canvasTarget.VLines)) - canvasTarget.VLines = []; - if (!("Fills" in canvasTarget) || !Array.isArray(canvasTarget.Fills)) - canvasTarget.Fills = []; - if (!("Texts" in canvasTarget) || !Array.isArray(canvasTarget.Texts)) - canvasTarget.Texts = []; - - this.canvas = canvasTarget; - - this.width = scaledWidth; - this.height = scaledHeight; - - this.arcScaleX_ = 1; - this.arcScaleY_ = 1; - this.lineScale_ = 1; - - this.currentFont = null; - } - - //private helper methods - #drawPDFLine(p1, p2, lineWidth, color) { - let dashedLine = Array.isArray(this.dashArray) && (this.dashArray.length > 1); - let pL = new PDFLine(p1.x, p1.y, p2.x, p2.y, lineWidth, color, dashedLine); - pL.processLine(this.canvas); - } - - #drawPDFFill(cp, min, max, color) { - let width = max.x - min.x; - let height = max.y - min.y; - let pF = new PDFFill(cp.x, cp.y, width, height, color); - pF.processFill(this.canvas); - } - - #needRemoveRect(x, y, w, h) { - let retVal = (Math.abs(w - Math.abs(h)) < 1 && w < 13); - if (retVal) { - nodeUtil.p2jinfo("Skipped: tiny rect: w=" + w + ", h=" + h); - } - return retVal; - } - - getContext(ctxType) { - return (ctxType === "2d") ? this : null; - } - - setLineDash(lineDash) { - this.dashArray = lineDash; - } - - getLineDash() { - return this.dashArray; - } - - fillText(text, x, y, maxWidth, fontSize) { - if (!text || text.trim().length < 1) - return; - let p = this.getCoords_(x, y); - - let a = processStyle(this.fillStyle || this.strokeStyle); - let color = (!!a) ? a.color : '#000000'; - - this.currentFont.processText(p, text, maxWidth, color, fontSize, this.canvas, this.m_); - }; - - strokeText(text, x, y, maxWidth) { - //MQZ. 10/23/2012, yeah, no hollow text for now - this.fillText(text, x, y, maxWidth); - } - - measureText(text) { - console.warn("to be implemented: contextPrototype.measureText - ", text); - let chars = text.length || 1; - return {width: chars * (this.currentFont.spaceWidth || 5)}; - } - - setFont(fontObj) { - if ((!!this.currentFont) && typeof(this.currentFont.clean) === "function") { - this.currentFont.clean(); - this.currentFont = null; - } - - this.currentFont = new PDFFont(fontObj); - } - - clearRect() { - console.warn("to be implemented: contextPrototype.clearRect"); - } - - beginPath() { - // TODO: Branch current matrix so that save/restore has no effect - // as per safari docs. - this.currentPath_ = []; - } - - moveTo(aX, aY) { - let p = this.getCoords_(aX, aY); - this.currentPath_.push({type:'moveTo', x:p.x, y:p.y}); - this.currentX_ = p.x; - this.currentY_ = p.y; - } - - lineTo(aX, aY) { - let p = this.getCoords_(aX, aY); - this.currentPath_.push({type:'lineTo', x:p.x, y:p.y}); - - this.currentX_ = p.x; - this.currentY_ = p.y; - } - - bezierCurveTo(aCP1x, aCP1y, aCP2x, aCP2y, aX, aY) { - let p = this.getCoords_(aX, aY); - let cp1 = this.getCoords_(aCP1x, aCP1y); - let cp2 = this.getCoords_(aCP2x, aCP2y); - bezierCurveToHelper(this, cp1, cp2, p); - } - - quadraticCurveTo(aCPx, aCPy, aX, aY) { - // the following is lifted almost directly from - // http://developer.mozilla.org/en/docs/Canvas_tutorial:Drawing_shapes - - let cp = this.getCoords_(aCPx, aCPy); - let p = this.getCoords_(aX, aY); - - let cp1 = { - x:this.currentX_ + 2.0 / 3.0 * (cp.x - this.currentX_), - y:this.currentY_ + 2.0 / 3.0 * (cp.y - this.currentY_) - }; - let cp2 = { - x:cp1.x + (p.x - this.currentX_) / 3.0, - y:cp1.y + (p.y - this.currentY_) / 3.0 - }; - - bezierCurveToHelper(this, cp1, cp2, p); - } - - arc(aX, aY, aRadius, aStartAngle, aEndAngle, aClockwise) { - let arcType = aClockwise ? 'at' : 'wa'; - - let xStart = aX + mc(aStartAngle) * aRadius; - let yStart = aY + ms(aStartAngle) * aRadius; - - let xEnd = aX + mc(aEndAngle) * aRadius; - let yEnd = aY + ms(aEndAngle) * aRadius; - - // IE won't render arches drawn counter clockwise if xStart == xEnd. - if (xStart == xEnd && !aClockwise) { - xStart += 0.125; // Offset xStart by 1/80 of a pixel. Use something - // that can be represented in binary - } - - let p = this.getCoords_(aX, aY); - let pStart = this.getCoords_(xStart, yStart); - let pEnd = this.getCoords_(xEnd, yEnd); - - this.currentPath_.push({type:arcType, - x:p.x, - y:p.y, - radius:aRadius, - xStart:pStart.x, - yStart:pStart.y, - xEnd:pEnd.x, - yEnd:pEnd.y}); - } - - rect(aX, aY, aWidth, aHeight) { - if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { - return;//try to remove the rectangle behind radio buttons and checkboxes - } - - this.moveTo(aX, aY); - this.lineTo(aX + aWidth, aY); - this.lineTo(aX + aWidth, aY + aHeight); - this.lineTo(aX, aY + aHeight); - this.closePath(); - } - - strokeRect(aX, aY, aWidth, aHeight) { - if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { - return;//try to remove the rectangle behind radio buttons and checkboxes - } - - let oldPath = this.currentPath_; - this.beginPath(); - - this.moveTo(aX, aY); - this.lineTo(aX + aWidth, aY); - this.lineTo(aX + aWidth, aY + aHeight); - this.lineTo(aX, aY + aHeight); - this.closePath(); - this.stroke(); - - this.currentPath_ = oldPath; - } - - fillRect(aX, aY, aWidth, aHeight) { - if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { - return;//try to remove the rectangle behind radio buttons and checkboxes - } - - let oldPath = this.currentPath_; - this.beginPath(); - - this.moveTo(aX, aY); - this.lineTo(aX + aWidth, aY); - this.lineTo(aX + aWidth, aY + aHeight); - this.lineTo(aX, aY + aHeight); - this.closePath(); - this.fill(); - - this.currentPath_ = oldPath; - } - - createLinearGradient(aX0, aY0, aX1, aY1) { - let gradient = new CanvasGradient_('gradient'); - gradient.x0_ = aX0; - gradient.y0_ = aY0; - gradient.x1_ = aX1; - gradient.y1_ = aY1; - return gradient; - } - - createRadialGradient(aX0, aY0, aR0, aX1, aY1, aR1) { - let gradient = new CanvasGradient_('gradientradial'); - gradient.x0_ = aX0; - gradient.y0_ = aY0; - gradient.r0_ = aR0; - gradient.x1_ = aX1; - gradient.y1_ = aY1; - gradient.r1_ = aR1; - return gradient; - } - - drawImage(image, var_args) { - //MQZ. no image drawing support for now - } - - getImageData(x, y, w, h) { - //MQZ. returns empty data buffer for now - return { - width:w, - height:h, - data:new Uint8Array(w * h * 4) - }; - } - - stroke(aFill) { - if (this.currentPath_.length < 2) { - return; - } - - let a = processStyle(aFill ? this.fillStyle : this.strokeStyle); - let color = a.color; -// let opacity = a.alpha * this.globalAlpha; - let lineWidth = this.lineScale_ * this.lineWidth; - - let min = {x:null, y:null}; - let max = {x:null, y:null}; - - for (let i = 0; i < this.currentPath_.length; i++) { - let p = this.currentPath_[i]; - - switch (p.type) { - case 'moveTo': - break; - case 'lineTo': - if (!aFill) { //lines - if (i > 0) { - this.#drawPDFLine(this.currentPath_[i-1], p, lineWidth, color); - } - } - break; - case 'close': - if (!aFill) { //lines - if (i > 0) { - this.#drawPDFLine(this.currentPath_[i-1], this.currentPath_[0], lineWidth, color); - } - } - p = null; - break; + constructor(canvasTarget, scaledWidth, scaledHeight) { + this.m_ = createMatrixIdentity(); + + this.mStack_ = []; + this.aStack_ = []; + this.currentPath_ = []; + + // Canvas context properties + this.strokeStyle = '#000'; + this.fillStyle = '#000'; + + this.lineWidth = 1; + this.lineJoin = 'miter'; + this.lineCap = 'butt'; + this.dashArray = []; + this.miterLimit = 1; + this.globalAlpha = 1; + + if (!('HLines' in canvasTarget) || !Array.isArray(canvasTarget.HLines)) + canvasTarget.HLines = []; + if (!('VLines' in canvasTarget) || !Array.isArray(canvasTarget.VLines)) + canvasTarget.VLines = []; + if (!('Fills' in canvasTarget) || !Array.isArray(canvasTarget.Fills)) + canvasTarget.Fills = []; + if (!('Texts' in canvasTarget) || !Array.isArray(canvasTarget.Texts)) + canvasTarget.Texts = []; + + this.canvas = canvasTarget; + + this.width = scaledWidth; + this.height = scaledHeight; + + this.arcScaleX_ = 1; + this.arcScaleY_ = 1; + this.lineScale_ = 1; + + this.currentFont = null; + } + + //private helper methods + #drawPDFLine(p1, p2, lineWidth, color) { + let dashedLine = + Array.isArray(this.dashArray) && this.dashArray.length > 1; + let pL = new PDFLine( + p1.x, + p1.y, + p2.x, + p2.y, + lineWidth, + color, + dashedLine + ); + pL.processLine(this.canvas); + } + + #drawPDFFill(cp, min, max, color) { + let width = max.x - min.x; + let height = max.y - min.y; + let pF = new PDFFill(cp.x, cp.y, width, height, color); + pF.processFill(this.canvas); + } + + #needRemoveRect(x, y, w, h) { + let retVal = Math.abs(w - Math.abs(h)) < 1 && w < 13; + if (retVal) { + nodeUtil.p2jinfo('Skipped: tiny rect: w=' + w + ', h=' + h); + } + return retVal; + } + + getContext(ctxType) { + return ctxType === '2d' ? this : null; + } + + setLineDash(lineDash) { + this.dashArray = lineDash; + } + + getLineDash() { + return this.dashArray; + } + + fillText(text, x, y, maxWidth, fontSize) { + if (!text || text.trim().length < 1) return; + let p = this.getCoords_(x, y); + + let a = processStyle(this.fillStyle || this.strokeStyle); + let color = !!a ? a.color : '#000000'; + + this.currentFont.processText( + p, + text, + maxWidth, + color, + fontSize, + this.canvas, + this.m_ + ); + } + + strokeText(text, x, y, maxWidth) { + //MQZ. 10/23/2012, yeah, no hollow text for now + this.fillText(text, x, y, maxWidth); + } + + measureText(text) { + console.warn('to be implemented: contextPrototype.measureText - ', text); + let chars = text.length || 1; + return { width: chars * (this.currentFont.spaceWidth || 5) }; + } + + setFont(fontObj) { + if (!!this.currentFont && typeof this.currentFont.clean === 'function') { + this.currentFont.clean(); + this.currentFont = null; + } + + this.currentFont = new PDFFont(fontObj); + } + + clearRect() { + console.warn('to be implemented: contextPrototype.clearRect'); + } + + beginPath() { + // TODO: Branch current matrix so that save/restore has no effect + // as per safari docs. + this.currentPath_ = []; + } + + moveTo(aX, aY) { + let p = this.getCoords_(aX, aY); + this.currentPath_.push({ type: 'moveTo', x: p.x, y: p.y }); + this.currentX_ = p.x; + this.currentY_ = p.y; + } + + lineTo(aX, aY) { + let p = this.getCoords_(aX, aY); + this.currentPath_.push({ type: 'lineTo', x: p.x, y: p.y }); + + this.currentX_ = p.x; + this.currentY_ = p.y; + } + + bezierCurveTo(aCP1x, aCP1y, aCP2x, aCP2y, aX, aY) { + let p = this.getCoords_(aX, aY); + let cp1 = this.getCoords_(aCP1x, aCP1y); + let cp2 = this.getCoords_(aCP2x, aCP2y); + bezierCurveToHelper(this, cp1, cp2, p); + } + + quadraticCurveTo(aCPx, aCPy, aX, aY) { + // the following is lifted almost directly from + // http://developer.mozilla.org/en/docs/Canvas_tutorial:Drawing_shapes + + let cp = this.getCoords_(aCPx, aCPy); + let p = this.getCoords_(aX, aY); + + let cp1 = { + x: this.currentX_ + (2.0 / 3.0) * (cp.x - this.currentX_), + y: this.currentY_ + (2.0 / 3.0) * (cp.y - this.currentY_), + }; + let cp2 = { + x: cp1.x + (p.x - this.currentX_) / 3.0, + y: cp1.y + (p.y - this.currentY_) / 3.0, + }; + + bezierCurveToHelper(this, cp1, cp2, p); + } + + arc(aX, aY, aRadius, aStartAngle, aEndAngle, aClockwise) { + let arcType = aClockwise ? 'at' : 'wa'; + + let xStart = aX + mc(aStartAngle) * aRadius; + let yStart = aY + ms(aStartAngle) * aRadius; + + let xEnd = aX + mc(aEndAngle) * aRadius; + let yEnd = aY + ms(aEndAngle) * aRadius; + + // IE won't render arches drawn counter clockwise if xStart == xEnd. + if (xStart == xEnd && !aClockwise) { + xStart += 0.125; // Offset xStart by 1/80 of a pixel. Use something + // that can be represented in binary + } + + let p = this.getCoords_(aX, aY); + let pStart = this.getCoords_(xStart, yStart); + let pEnd = this.getCoords_(xEnd, yEnd); + + this.currentPath_.push({ + type: arcType, + x: p.x, + y: p.y, + radius: aRadius, + xStart: pStart.x, + yStart: pStart.y, + xEnd: pEnd.x, + yEnd: pEnd.y, + }); + } + + rect(aX, aY, aWidth, aHeight) { + if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { + return; //try to remove the rectangle behind radio buttons and checkboxes + } + + this.moveTo(aX, aY); + this.lineTo(aX + aWidth, aY); + this.lineTo(aX + aWidth, aY + aHeight); + this.lineTo(aX, aY + aHeight); + this.closePath(); + } + + strokeRect(aX, aY, aWidth, aHeight) { + if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { + return; //try to remove the rectangle behind radio buttons and checkboxes + } + + let oldPath = this.currentPath_; + this.beginPath(); + + this.moveTo(aX, aY); + this.lineTo(aX + aWidth, aY); + this.lineTo(aX + aWidth, aY + aHeight); + this.lineTo(aX, aY + aHeight); + this.closePath(); + this.stroke(); + + this.currentPath_ = oldPath; + } + + fillRect(aX, aY, aWidth, aHeight) { + if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { + return; //try to remove the rectangle behind radio buttons and checkboxes + } + + let oldPath = this.currentPath_; + this.beginPath(); + + this.moveTo(aX, aY); + this.lineTo(aX + aWidth, aY); + this.lineTo(aX + aWidth, aY + aHeight); + this.lineTo(aX, aY + aHeight); + this.closePath(); + this.fill(); + + this.currentPath_ = oldPath; + } + + createLinearGradient(aX0, aY0, aX1, aY1) { + let gradient = new CanvasGradient_('gradient'); + gradient.x0_ = aX0; + gradient.y0_ = aY0; + gradient.x1_ = aX1; + gradient.y1_ = aY1; + return gradient; + } + + createRadialGradient(aX0, aY0, aR0, aX1, aY1, aR1) { + let gradient = new CanvasGradient_('gradientradial'); + gradient.x0_ = aX0; + gradient.y0_ = aY0; + gradient.r0_ = aR0; + gradient.x1_ = aX1; + gradient.y1_ = aY1; + gradient.r1_ = aR1; + return gradient; + } + + drawImage(image, var_args) { + //MQZ. no image drawing support for now + } + + getImageData(x, y, w, h) { + //MQZ. returns empty data buffer for now + return { + width: w, + height: h, + data: new Uint8Array(w * h * 4), + }; + } + + stroke(aFill) { + if (this.currentPath_.length < 2) { + return; + } + + let a = processStyle(aFill ? this.fillStyle : this.strokeStyle); + let color = a.color; + // let opacity = a.alpha * this.globalAlpha; + let lineWidth = this.lineScale_ * this.lineWidth; + + let min = { x: null, y: null }; + let max = { x: null, y: null }; + + for (let i = 0; i < this.currentPath_.length; i++) { + let p = this.currentPath_[i]; + + switch (p.type) { + case 'moveTo': + break; + case 'lineTo': + if (!aFill) { + //lines + if (i > 0) { + this.#drawPDFLine( + this.currentPath_[i - 1], + p, + lineWidth, + color + ); + } + } + break; + case 'close': + if (!aFill) { + //lines + if (i > 0) { + this.#drawPDFLine( + this.currentPath_[i - 1], + this.currentPath_[0], + lineWidth, + color + ); + } + } + p = null; + break; + case 'bezierCurveTo': + break; + case 'at': + case 'wa': + break; + } + + // Figure out dimensions so we can set fills' coordinates correctly + if (aFill && p) { + if (min.x == null || p.x < min.x) { + min.x = p.x; } - - // Figure out dimensions so we can set fills' coordinates correctly - if (aFill && p) { - if (min.x == null || p.x < min.x) { - min.x = p.x; - } - if (max.x == null || p.x > max.x) { - max.x = p.x; - } - if (min.y == null || p.y < min.y) { - min.y = p.y; - } - if (max.y == null || p.y > max.y) { - max.y = p.y; - } + if (max.x == null || p.x > max.x) { + max.x = p.x; } - } - - if (aFill) { //fill - this.#drawPDFFill(min, min, max, color); - } - } - - fill() { - this.stroke(true); - } - - closePath() { - this.currentPath_.push({type:'close'}); - } - - /** - * @private - */ - getCoords_ (aX, aY) { - let m = this.m_; - return { - x: (aX * m[0][0] + aY * m[1][0] + m[2][0]), - y: (aX * m[0][1] + aY * m[1][1] + m[2][1]) - }; - } - - save() { - let o = {}; - copyState(this, o); - this.aStack_.push(o); - this.mStack_.push(this.m_); - this.m_ = matrixMultiply(createMatrixIdentity(), this.m_); - } - - restore() { - copyState(this.aStack_.pop(), this); - this.m_ = this.mStack_.pop(); - } - - translate(aX, aY) { - let m1 = [ - [1, 0, 0], - [0, 1, 0], - [aX, aY, 1] - ]; - - setM(this, matrixMultiply(m1, this.m_), false); - } - - rotate(aRot) { - let c = mc(aRot); - let s = ms(aRot); - - let m1 = [ - [c, s, 0], - [-s, c, 0], - [0, 0, 1] - ]; - - setM(this, matrixMultiply(m1, this.m_), false); - } - - scale(aX, aY) { - this.arcScaleX_ *= aX; - this.arcScaleY_ *= aY; - let m1 = [ - [aX, 0, 0], - [0, aY, 0], - [0, 0, 1] - ]; - - setM(this, matrixMultiply(m1, this.m_), true); - } - - transform(m11, m12, m21, m22, dx, dy) { - let m1 = [ - [m11, m12, 0], - [m21, m22, 0], - [dx, dy, 1] - ]; - - setM(this, matrixMultiply(m1, this.m_), true); - } - - setTransform(m11, m12, m21, m22, dx, dy) { - let m = [ - [m11, m12, 0], - [m21, m22, 0], - [dx, dy, 1] - ]; - - setM(this, m, true); - } - - /******** STUBS ********/ - clip() { - // TODO: Implement - } - - arcTo() { - // TODO: Implement - } - - createPattern() { - return new CanvasPattern_(); - } + if (min.y == null || p.y < min.y) { + min.y = p.y; + } + if (max.y == null || p.y > max.y) { + max.y = p.y; + } + } + } + + if (aFill) { + //fill + this.#drawPDFFill(min, min, max, color); + } + } + + fill() { + this.stroke(true); + } + + closePath() { + this.currentPath_.push({ type: 'close' }); + } + + /** + * @private + */ + getCoords_(aX, aY) { + let m = this.m_; + return { + x: aX * m[0][0] + aY * m[1][0] + m[2][0], + y: aX * m[0][1] + aY * m[1][1] + m[2][1], + }; + } + + save() { + let o = {}; + copyState(this, o); + this.aStack_.push(o); + this.mStack_.push(this.m_); + this.m_ = matrixMultiply(createMatrixIdentity(), this.m_); + } + + restore() { + copyState(this.aStack_.pop(), this); + this.m_ = this.mStack_.pop(); + } + + translate(aX, aY) { + let m1 = [ + [1, 0, 0], + [0, 1, 0], + [aX, aY, 1], + ]; + + setM(this, matrixMultiply(m1, this.m_), false); + } + + rotate(aRot) { + let c = mc(aRot); + let s = ms(aRot); + + let m1 = [ + [c, s, 0], + [-s, c, 0], + [0, 0, 1], + ]; + + setM(this, matrixMultiply(m1, this.m_), false); + } + + scale(aX, aY) { + this.arcScaleX_ *= aX; + this.arcScaleY_ *= aY; + let m1 = [ + [aX, 0, 0], + [0, aY, 0], + [0, 0, 1], + ]; + + setM(this, matrixMultiply(m1, this.m_), true); + } + + transform(m11, m12, m21, m22, dx, dy) { + let m1 = [ + [m11, m12, 0], + [m21, m22, 0], + [dx, dy, 1], + ]; + + setM(this, matrixMultiply(m1, this.m_), true); + } + + setTransform(m11, m12, m21, m22, dx, dy) { + let m = [ + [m11, m12, 0], + [m21, m22, 0], + [dx, dy, 1], + ]; + + setM(this, m, true); + } + + /******** STUBS ********/ + clip() { + // TODO: Implement + } + + arcTo() { + // TODO: Implement + } + + createPattern() { + return new CanvasPattern_(); + } } const kFBANotOverridable = 0x00000400; // indicates the field is read only by the user @@ -1187,1017 +1337,1385 @@ const kFBARequired = 0x00000010; // indicates the field is required const kMinHeight = 20; class PDFField { - static tabIndex = 0; - - static isWidgetSupported(field) { - let retVal = false; - - switch(field.fieldType) { - case 'Tx': retVal = true; break; //text input - case 'Btn': - if (field.fieldFlags & 32768) { - field.fieldType = 'Rd'; //radio button - } - else if (field.fieldFlags & 65536) { - field.fieldType = 'Btn'; //push button - } - else { - field.fieldType = 'Cb'; //checkbox - } - retVal = true; - break; - case 'Ch': retVal = true; break; //drop down - case 'Sig': retVal = true; break; //signature - default: - nodeUtil.p2jwarn("Unsupported: field.fieldType of " + field.fieldType); - break; - } - - return retVal; - } - - static isFormElement(field) { - let retVal = false; - switch(field.subtype) { - case 'Widget': retVal = PDFField.isWidgetSupported(field); break; - default: - nodeUtil.p2jwarn("Unsupported: field.type of " + field.subtype); - break; - } - return retVal; - } - - // constructor - constructor(field, viewport, Fields, Boxsets) { - this.field = field; - this.viewport = viewport; - this.Fields = Fields; - this.Boxsets = Boxsets; - } - - // Normalize rectangle rect=[x1, y1, x2, y2] so that (x1,y1) < (x2,y2) - // For coordinate systems whose origin lies in the bottom-left, this - // means normalization to (BL,TR) ordering. For systems with origin in the - // top-left, this means (TL,BR) ordering. - static #normalizeRect(rect) { - const r = rect.slice(0); // clone rect - if (rect[0] > rect[2]) { - r[0] = rect[2]; - r[2] = rect[0]; - } - if (rect[1] > rect[3]) { - r[1] = rect[3]; - r[3] = rect[1]; - } - return r; - } - - #getFieldPosition(field) { - let viewPort = this.viewport; - let fieldRect = viewPort.convertToViewportRectangle(field.rect); - let rect = PDFField.#normalizeRect(fieldRect); - - let height = rect[3] - rect[1]; - if (field.fieldType === 'Tx') { - if (height > kMinHeight + 2) { - rect[1] += 2; - height -= 2; + static tabIndex = 0; + + static isWidgetSupported(field) { + let retVal = false; + + switch (field.fieldType) { + case 'Tx': + retVal = true; + break; //text input + case 'Btn': + if (field.fieldFlags & 32768) { + field.fieldType = 'Rd'; //radio button + } else if (field.fieldFlags & 65536) { + field.fieldType = 'Btn'; //push button + } else { + field.fieldType = 'Cb'; //checkbox } - } - else if (field.fieldType !== 'Ch') { //checkbox, radio button, and link button - rect[1] -= 3; - } - - height = (height >= kMinHeight) ? height : kMinHeight; - - return { - x: PDFUnit.toFormX(rect[0]), - y: PDFUnit.toFormY(rect[1]), - w: PDFUnit.toFormX(rect[2] - rect[0]), - h: PDFUnit.toFormY(height) - }; - } - - #getFieldBaseData(field) { - let attributeMask = 0; - //PDF Spec p.676 TABLE 8.70 Field flags common to all field types - if (field.fieldFlags & 0x00000001) { - attributeMask |= kFBANotOverridable; - } - if (field.fieldFlags & 0x00000002) { - attributeMask |= kFBARequired; - } - - let anData = { - id: { Id: field.fullName, EN: 0}, - TI: field.TI, - AM: attributeMask - }; - //PDF Spec p.675: add TU (AlternativeText) fields to provide accessibility info - if (field.alternativeText && field.alternativeText.length > 1) { - anData.TU = field.alternativeText; - } - - if (field.alternativeID && field.alternativeID.length > 1) { - anData.TM = field.alternativeID; - } - - return Object.assign(anData, this.#getFieldPosition(field)); - } - - #addAlpha(field) { - const anData = Object.assign({ + retVal = true; + break; + case 'Ch': + retVal = true; + break; //drop down + case 'Sig': + retVal = true; + break; //signature + default: + nodeUtil.p2jwarn( + 'Unsupported: field.fieldType of ' + field.fieldType + ); + break; + } + + return retVal; + } + + static isFormElement(field) { + let retVal = false; + switch (field.subtype) { + case 'Widget': + retVal = PDFField.isWidgetSupported(field); + break; + default: + nodeUtil.p2jwarn('Unsupported: field.type of ' + field.subtype); + break; + } + return retVal; + } + + // constructor + constructor(field, viewport, Fields, Boxsets) { + this.field = field; + this.viewport = viewport; + this.Fields = Fields; + this.Boxsets = Boxsets; + } + + // Normalize rectangle rect=[x1, y1, x2, y2] so that (x1,y1) < (x2,y2) + // For coordinate systems whose origin lies in the bottom-left, this + // means normalization to (BL,TR) ordering. For systems with origin in the + // top-left, this means (TL,BR) ordering. + static #normalizeRect(rect) { + const r = rect.slice(0); // clone rect + if (rect[0] > rect[2]) { + r[0] = rect[2]; + r[2] = rect[0]; + } + if (rect[1] > rect[3]) { + r[1] = rect[3]; + r[3] = rect[1]; + } + return r; + } + + #getFieldPosition(field) { + let viewPort = this.viewport; + let fieldRect = viewPort.convertToViewportRectangle(field.rect); + let rect = PDFField.#normalizeRect(fieldRect); + + let height = rect[3] - rect[1]; + if (field.fieldType === 'Tx') { + if (height > kMinHeight + 2) { + rect[1] += 2; + height -= 2; + } + } else if (field.fieldType !== 'Ch') { + //checkbox, radio button, and link button + rect[1] -= 3; + } + + height = height >= kMinHeight ? height : kMinHeight; + + return { + x: PDFUnit.toFormX(rect[0]), + y: PDFUnit.toFormY(rect[1]), + w: PDFUnit.toFormX(rect[2] - rect[0]), + h: PDFUnit.toFormY(height), + }; + } + + #getFieldBaseData(field) { + let attributeMask = 0; + //PDF Spec p.676 TABLE 8.70 Field flags common to all field types + if (field.fieldFlags & 0x00000001) { + attributeMask |= kFBANotOverridable; + } + if (field.fieldFlags & 0x00000002) { + attributeMask |= kFBARequired; + } + + let anData = { + id: { Id: field.fullName, EN: 0 }, + TI: field.TI, + AM: attributeMask, + }; + //PDF Spec p.675: add TU (AlternativeText) fields to provide accessibility info + if (field.alternativeText && field.alternativeText.length > 1) { + anData.TU = field.alternativeText; + } + + if (field.alternativeID && field.alternativeID.length > 1) { + anData.TM = field.alternativeID; + } + + return Object.assign(anData, this.#getFieldPosition(field)); + } + + #addAlpha(field) { + const anData = Object.assign( + { style: 48, T: { - Name: field.TName || "alpha", - TypeInfo: {} - } - }, this.#getFieldBaseData(field)); - - if (field.MV) { //field attributes: arbitrary mask value - anData.MV = field.MV; - } - if (field.fieldValue) { - anData.V = field.fieldValue; //read-only field value, like "self-prepared" - } - - this.Fields.push(anData); - } - - #addCheckBox(box) { - const anData = Object.assign({ + Name: field.TName || 'alpha', + TypeInfo: {}, + }, + }, + this.#getFieldBaseData(field) + ); + + if (field.MV) { + //field attributes: arbitrary mask value + anData.MV = field.MV; + } + if (field.fieldValue) { + anData.V = field.fieldValue; //read-only field value, like "self-prepared" + } + + this.Fields.push(anData); + } + + #addCheckBox(box) { + const anData = Object.assign( + { style: 48, T: { - Name: "box", - TypeInfo: {} - } - }, this.#getFieldBaseData(box)); - if(box.fieldValue) { - anData.checked = box.fieldValue !== 'Off'; - } - - this.Boxsets.push({boxes:[anData]}); - } - - #addRadioButton(box) { - const anData = Object.assign({ + Name: 'box', + TypeInfo: {}, + }, + }, + this.#getFieldBaseData(box) + ); + if (box.fieldValue) { + anData.checked = box.fieldValue !== 'Off'; + } + + this.Boxsets.push({ boxes: [anData] }); + } + + #addRadioButton(box) { + const anData = Object.assign( + { style: 48, T: { - Name: "box", - TypeInfo: {} - } - }, this.#getFieldBaseData(box)); - - anData.id.Id = box.value; - if ('checked' in box) { - anData.checked = box.checked; - } - - const rdGroup = this.Boxsets.filter(boxset => ('id' in boxset) && ('Id' in boxset.id) && (boxset.id.Id === box.fullName))[0]; - if ((!!rdGroup) && ('boxes' in rdGroup)) { - rdGroup.boxes.push(anData); - } - else { - this.Boxsets.push({boxes:[anData], id: { Id: box.fullName, EN: 0}}); - } - } - - #addLinkButton(field) { - const anData = Object.assign({ + Name: 'box', + TypeInfo: {}, + }, + }, + this.#getFieldBaseData(box) + ); + + anData.id.Id = box.value; + if ('checked' in box) { + anData.checked = box.checked; + } + + const rdGroup = this.Boxsets.filter( + (boxset) => + 'id' in boxset && 'Id' in boxset.id && boxset.id.Id === box.fullName + )[0]; + if (!!rdGroup && 'boxes' in rdGroup) { + rdGroup.boxes.push(anData); + } else { + this.Boxsets.push({ + boxes: [anData], + id: { Id: box.fullName, EN: 0 }, + }); + } + } + + #addLinkButton(field) { + const anData = Object.assign( + { style: 48, T: { - Name: "link" + Name: 'link', }, FL: { - form: {Id: field.FL} - } - }, this.#getFieldBaseData(field)); + form: { Id: field.FL }, + }, + }, + this.#getFieldBaseData(field) + ); - this.Fields.push(anData); - } + this.Fields.push(anData); + } - #addSelect(field) { - const anData = Object.assign({ + #addSelect(field) { + const anData = Object.assign( + { style: 48, T: { - Name: "alpha", - TypeInfo: {} - } - }, this.#getFieldBaseData(field)); - - anData.w -= 0.5; //adjust combobox width - anData.PL = {V: [], D: []}; - field.value.forEach( (ele, idx) => { - if (Array.isArray(ele)) { - anData.PL.D.push(ele[0]); - anData.PL.V.push(ele[1]); - } else { - anData.PL.D.push(ele); - anData.PL.V.push(ele); - } - }); - - // add field value to the object - if (field.fieldValue) { - anData.V = field.fieldValue; - } - this.Fields.push(anData); - }; - - #addSignature(field) { - const anData = Object.assign({ + Name: 'alpha', + TypeInfo: {}, + }, + }, + this.#getFieldBaseData(field) + ); + + anData.w -= 0.5; //adjust combobox width + anData.PL = { V: [], D: [] }; + field.value.forEach((ele, idx) => { + if (Array.isArray(ele)) { + anData.PL.D.push(ele[0]); + anData.PL.V.push(ele[1]); + } else { + anData.PL.D.push(ele); + anData.PL.V.push(ele); + } + }); + + // add field value to the object + if (field.fieldValue) { + anData.V = field.fieldValue; + } + this.Fields.push(anData); + } + + #addSignature(field) { + const anData = Object.assign( + { style: 48, T: { - Name: "signature", - TypeInfo: {} + Name: 'signature', + TypeInfo: {}, + }, + }, + this.#getFieldBaseData(field) + ); + + if (field.Sig) { + anData.Sig = {}; + if (field.Sig.Name) anData.Sig.Name = field.Sig.Name; + if (field.Sig.M) anData.Sig.M = PDFUnit.dateToIso8601(field.Sig.M); + if (field.Sig.Location) anData.Sig.Location = field.Sig.Location; + if (field.Sig.Reason) anData.Sig.Reason = field.Sig.Reason; + if (field.Sig.ContactInfo) + anData.Sig.ContactInfo = field.Sig.ContactInfo; + } + + this.Fields.push(anData); + } + + // public instance methods + processField() { + this.field.TI = PDFField.tabIndex++; + + switch (this.field.fieldType) { + case 'Tx': + this.#addAlpha(this.field); + break; + case 'Cb': + this.#addCheckBox(this.field); + break; + case 'Rd': + this.#addRadioButton(this.field); + break; + case 'Btn': + this.#addLinkButton(this.field); + break; + case 'Ch': + this.#addSelect(this.field); + break; + case 'Sig': + this.#addSignature(this.field); + break; + } + + this.clean(); + } + + clean() { + delete this.field; + delete this.viewport; + delete this.Fields; + delete this.Boxsets; + } + + //static public method to generate fieldsType object based on parser result + static getAllFieldsTypes(data) { + const isFieldReadOnly = (field) => { + return field.AM & kFBANotOverridable ? true : false; + }; + + const getFieldBase = (field) => { + return { + id: field.id.Id, + type: field.T.Name, + calc: isFieldReadOnly(field), + value: field.V || '', + }; + }; + + let retVal = []; + data.Pages.forEach((page) => { + page.Boxsets.forEach((boxsets) => { + if (boxsets.boxes.length > 1) { + //radio button + boxsets.boxes.forEach((box) => { + retVal.push({ + id: boxsets.id.Id, + type: 'radio', + calc: isFieldReadOnly(box), + value: box.id.Id, + }); + }); + } else { + //checkbox + retVal.push(getFieldBase(boxsets.boxes[0])); } - }, this.#getFieldBaseData(field)); - - if (field.Sig) { - anData.Sig = {}; - if (field.Sig.Name) anData.Sig.Name = field.Sig.Name; - if (field.Sig.M) anData.Sig.M = PDFUnit.dateToIso8601(field.Sig.M); - if (field.Sig.Location) anData.Sig.Location = field.Sig.Location; - if (field.Sig.Reason) anData.Sig.Reason = field.Sig.Reason; - if (field.Sig.ContactInfo) anData.Sig.ContactInfo = field.Sig.ContactInfo; - } - - this.Fields.push(anData); - } - - // public instance methods - processField() { - this.field.TI = PDFField.tabIndex++; - - switch(this.field.fieldType) { - case 'Tx': this.#addAlpha(this.field); break; - case 'Cb': this.#addCheckBox(this.field); break; - case 'Rd': this.#addRadioButton(this.field);break; - case 'Btn':this.#addLinkButton(this.field); break; - case 'Ch': this.#addSelect(this.field); break; - case 'Sig': this.#addSignature(this.field); break; - } - - this.clean(); - } - - clean() { - delete this.field; - delete this.viewport; - delete this.Fields; - delete this.Boxsets; - } - - //static public method to generate fieldsType object based on parser result - static getAllFieldsTypes(data) { - const isFieldReadOnly = field => { - return (field.AM & kFBANotOverridable) ? true : false; - }; - - const getFieldBase = field => { - return {id: field.id.Id, type: field.T.Name, calc: isFieldReadOnly(field), value: field.V || ""}; - }; - - let retVal = []; - data.Pages.forEach( page => { - page.Boxsets.forEach( boxsets => { - if (boxsets.boxes.length > 1) { //radio button - boxsets.boxes.forEach( box => { - retVal.push({id: boxsets.id.Id, type: "radio", calc: isFieldReadOnly(box), value: box.id.Id}); - }); - } - else { //checkbox - retVal.push(getFieldBase(boxsets.boxes[0])); - } - }); - - page.Fields.forEach(field => retVal.push(getFieldBase(field))); - - }); - return retVal; - } + }); + + page.Fields.forEach((field) => retVal.push(getFieldBase(field))); + }); + return retVal; + } +} + +//BEGIN - MQZ 9/19/2012. Helper functions to parse acroForm elements +function setupRadioButton(annotation, item) { + let asName = ''; + //PDF Spec p.689: parent item's DV holds the item's value that is selected by default + let po = annotation.get('Parent'); + if (po) { + po.forEach(function (key, val) { + if (key === 'DV') { + asName = val.name || ''; + } else if (key === 'TU') { + //radio buttons use the alternative text from the parent + item.alternativeText = val; + } else if (key == 'TM') { + item.alternativeID = val; + } + }); + } + + //PDF Spec p.606: get appearance dictionary + let ap = annotation.get('AP'); + //PDF Spec p.614 get normal appearance + let nVal = ap.get('N'); + //PDF Spec p.689 + nVal.forEach(function (key, value) { + if (key.toLowerCase() != 'off') { + //value if selected + item.value = key; //export value + item.checked = key === asName; //initial selection state + } + }); + + if (!item.value) item.value = 'off'; +} + +function setupPushButton(annotation, item) { + //button label: PDF Spec p.640 + let mk = annotation.get('MK'); + if (mk) { + item.value = mk.get('CA') || ''; + } + + //button action: url when mouse up: PDF Spec:p.642 + item.FL = ''; + let ap = annotation.get('A'); + if (ap) { + let sp = ap.get('S'); + item.FL = ap.get(sp.name); + } +} + +function setupCheckBox(annotation, item) { + //PDF Spec p.606: get appearance dictionary + let ap = annotation.get('AP'); + //PDF Spec p.614 get normal appearance + let nVal = ap.get('N'); + + //PDF Spec p.689 + let i = 0; + nVal.forEach(function (key, value) { + i++; + if (i == 1) + //initial selection state + item.value = key; + }); +} + +function setupDropDown(annotation, item) { + //PDF Spec p.688 + item.value = annotation.get('Opt') || []; +} + +function setupFieldAttributes(annotation, item) { + //MQZ. Jan.03.2013. additional-actions dictionary + //PDF Spec P.648. 8.5.2. Trigger Events + let aa = annotation.get('AA'); + if (!aa) { + return; + } + + //PDF Spec p.651 get format dictionary + let nVal = aa.get('F'); + if (!nVal) { + nVal = aa.get('K'); + if (!nVal) return; + } + + nVal.forEach(function (key, value) { + if (key === 'JS') { + processFieldAttribute(value, item); + } + }); +} + +const AFSpecial_Format = ['zip', 'zip', 'phone', 'ssn', '']; +// let AFNumber_Format = ['nDec', 'sepStyle', 'negStyle', 'currStyle', 'strCurrency', 'bCurrencyPrepend']; +//– nDec is the number of places after the decimal point; +//– sepStyle is an integer denoting whether to use a separator or not. If sepStyle=0, use commas. If sepStyle=1, do not separate. +//– negStyle is the formatting used for negative numbers: 0 = MinusBlack, 1 = Red, 2 = ParensBlack, 3 = ParensRed +//– currStyle is the currency style - not used +//- strCurrency is the currency symbol +//– bCurrencyPrepend +// let AFDate_FormatEx = ["m/d", "m/d/yy", "mm/dd/yy", "mm/yy", "d-mmm", "d-mmm-yy", "dd-mmm-yy", "yymm-dd", "mmm-yy", "mmmm-yy", "mmm d, yyyy", "mmmm d, yyyy", "m/d/yy h:MM tt", "m/d/yy HH:MM"]; + +function processFieldAttribute(jsFuncName, item) { + if (item.hasOwnProperty('TName')) return; + + if (!jsFuncName.split) return; + + let vParts = jsFuncName.split('('); + if (vParts.length !== 2) return; + + let funcName = vParts[0]; + let funcParam = vParts[1].split(')')[0]; + + switch (funcName) { + case 'AFSpecial_Format': + item.TName = AFSpecial_Format[Number(funcParam)]; + break; + case 'AFNumber_Format': + // nfs = funcParam.split(','); + //set the Money fields to use the Number type with no decimal places after, no commas, and bCurrencyPrepend is set as true; (o use a negative sign (fits the PDF layout and our print formatting as well). + // if (nfs[0] === '0' && nfs[1] === '1' && nfs[5]) + // item.TName = 'money'; + // else + item.TName = 'number'; + break; + case 'AFDate_FormatEx': + item.TName = 'date'; + item.MV = funcParam.replace(/^'+|^"+|'+$|"+$/g, ''); //mask value + break; + case 'AFSpecial_KeystrokeEx': //special format: "arbitrary mask" + let maskValue = funcParam.replace(/^'+|^"+|'+$|"+$/g, ''); //mask value + if (!!maskValue && maskValue.length > 0 && maskValue.length < 64) { + item.TName = 'mask'; //fixed length input + item.MV = maskValue; + } + break; + case 'AFPercent_Format': + item.TName = 'percent'; //funcParam => 2, 0, will specified how many decimal places + break; + } +} + +function setupSignature(annotation, item) { + //PDF Spec p.695: field value is signature dict if signed + let sig = annotation.get('V'); + if (!sig) return; + + //PDF Spec p.728: get signature information + item.Sig = {}; + let name = sig.get('Name'); + if (name) item.Sig.Name = name; + let time = sig.get('M'); + if (time) item.Sig.M = time; + let location = sig.get('Location'); + if (location) item.Sig.Location = location; + let reason = sig.get('Reason'); + if (reason) item.Sig.Reason = reason; + let contactInfo = sig.get('ContactInfo'); + if (contactInfo) item.Sig.ContactInfo = contactInfo; +} + +//END - MQZ 9/19/2012. Helper functions to parse acroForm elements + +class PDFAnno { + static processAnnotation(annotation, item) { + if (item.fieldType == 'Btn') { + //PDF Spec p.675 + if (item.fieldFlags & 32768) { + setupRadioButton(annotation, item); + } else if (item.fieldFlags & 65536) { + setupPushButton(annotation, item); + } else { + setupCheckBox(annotation, item); + } + } else if (item.fieldType == 'Ch') { + setupDropDown(annotation, item); + } else if (item.fieldType == 'Tx') { + setupFieldAttributes(annotation, item); + } else if (item.fieldType === 'Sig') { + setupSignature(annotation, item); + } else { + nodeUtil.p2jwarn('Unknown fieldType: ', item); + } + } +} + +class PDFImage { + #_src = ''; + #_onload = null; + + set onload(val) { + this.#_onload = typeof val === 'function' ? val : null; + } + + get onload() { + return this.#_onload; + } + + set src(val) { + this.#_src = val; + if (this.#_onload) this.#_onload(); + } + + get src() { + return this.#_src; + } + + btoa(val) { + if (typeof window === 'undefined') { + return new Buffer.from(val, 'ascii').toString('base64'); + } else if (typeof window.btoa === 'function') return window.btoa(val); + + return ''; + } } class PTIXmlParser { - xmlData = null; - ptiPageArray = []; - - // constructor - constructor() { - this.xmlData = null; - this.ptiPageArray = []; - } - - parseXml(filePath, callback) { - fs.readFile(filePath, 'utf8', (err, data) => { - if (err) { - callback(err); - } - else { - this.xmlData = data; - - var parser = new xmldom.DOMParser(); - var dom = parser.parseFromString(this.xmlData); - var root = dom.documentElement; - - var xmlFields = root.getElementsByTagName("field"); - var fields = []; - - for(var i=0;i { + if (err) { + callback(err); + } else { + this.xmlData = data; + + var parser = new xmldom.DOMParser(); + var dom = parser.parseFromString(this.xmlData); + var root = dom.documentElement; + + var xmlFields = root.getElementsByTagName('field'); + var fields = []; + + for (var i = 0; i < xmlFields.length; i++) { + var id = xmlFields[i].getAttribute('id'); + var xPos = xmlFields[i].getAttribute('x'); + var yPos = xmlFields[i].getAttribute('y'); + var width = xmlFields[i].getAttribute('width'); + var height = xmlFields[i].getAttribute('height'); + var type = xmlFields[i].getAttribute('xsi:type'); + var page = xmlFields[i].getAttribute('page'); + var fontName = xmlFields[i].getAttribute('fontName'); + var fontSize = xmlFields[i].getAttribute('fontSize'); + + var item = {}; + + var rectLeft = parseInt(xPos) - 21; //was 23.5 + var rectTop = parseInt(yPos) - 20; //was 23 + var rectRight = parseInt(rectLeft) + parseInt(width) - 4; + var rectBottom = parseInt(rectTop) + parseInt(height) - 4; + + item.fieldType = 'Tx'; + if (type == 'Boolean') { + item.fieldType = 'Btn'; + } else if (type == 'SSN' || type == 'Phone' || type == 'zip') { + item.TName = type.toLowerCase(); + } + item.alternativeText = ''; + item.fullName = id; + item.fontSize = fontSize; + item.subtype = 'Widget'; + + item.rect = [rectLeft, rectTop, rectRight, rectBottom]; + + fields.push(item); + + this.ptiPageArray[parseInt(page)] = fields; + } + } + callback(); + }); + } + + getFields(pageNum) { + return this.ptiPageArray[pageNum]; + } } -const __filename$2 = url.fileURLToPath((typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.src || new URL('pdfparser.cjs', document.baseURI).href))); +const __filename$2 = url.fileURLToPath( + typeof document === 'undefined' + ? require('u' + 'rl').pathToFileURL(__filename).href + : (_documentCurrentScript && _documentCurrentScript.src) || + new URL('pdfparser.cjs', document.baseURI).href +); const __dirname$2 = path.dirname(__filename$2); -const pkInfo = JSON.parse(fs.readFileSync(`${__dirname$2}/package.json`, 'utf8')); +const pkInfo = JSON.parse( + fs.readFileSync(`${__dirname$2}/package.json`, 'utf8') +); const _PARSER_SIG = `${pkInfo.name}@${pkInfo.version} [${pkInfo.homepage}]`; -const __filename$1 = url.fileURLToPath((typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.src || new URL('pdfparser.cjs', document.baseURI).href))); +const __filename$1 = url.fileURLToPath( + typeof document === 'undefined' + ? require('u' + 'rl').pathToFileURL(__filename).href + : (_documentCurrentScript && _documentCurrentScript.src) || + new URL('pdfparser.cjs', document.baseURI).href +); const __dirname$1 = path.dirname(__filename$1); const _pdfjsFiles = [ - 'shared/util.js', - 'shared/colorspace.js', - 'shared/pattern.js', - 'shared/function.js', - 'shared/annotation.js', - - 'core/core.js', - 'core/obj.js', - 'core/charsets.js', - 'core/crypto.js', - 'core/evaluator.js', - 'core/fonts.js', - 'core/font_renderer.js', - 'core/glyphlist.js', - 'core/image.js', - 'core/metrics.js', - 'core/parser.js', - 'core/stream.js', - 'core/worker.js', - 'core/jpx.js', - 'core/jbig2.js', - 'core/bidi.js', - 'core/jpg.js', - 'core/chunked_stream.js', - 'core/pdf_manager.js', - 'core/cmap.js', - 'core/cidmaps.js', - - 'display/canvas.js', - 'display/font_loader.js', - 'display/metadata.js', - 'display/api.js' + 'shared/util.js', + 'shared/colorspace.js', + 'shared/pattern.js', + 'shared/function.js', + 'shared/annotation.js', + + 'core/core.js', + 'core/obj.js', + 'core/charsets.js', + 'core/crypto.js', + 'core/evaluator.js', + 'core/fonts.js', + 'core/font_renderer.js', + 'core/glyphlist.js', + 'core/image.js', + 'core/metrics.js', + 'core/parser.js', + 'core/stream.js', + 'core/worker.js', + 'core/jpx.js', + 'core/jbig2.js', + 'core/bidi.js', + 'core/jpg.js', + 'core/chunked_stream.js', + 'core/pdf_manager.js', + 'core/cmap.js', + 'core/cidmaps.js', + + 'display/canvas.js', + 'display/font_loader.js', + 'display/metadata.js', + 'display/api.js', ]; //////replacing HTML5 canvas with PDFCanvas (in-memory canvas) -function createScratchCanvas(width, height) { return new CanvasRenderingContext2D_({}, width, height); } +function createScratchCanvas(width, height) { + return new CanvasRenderingContext2D_({}, width, height); +} const PDFJS = {}; +const globalScope = { console }; const baseDir = `${__dirname$1}/base/`; -const _baseCode = _pdfjsFiles.reduce( (preContent, fileName, idx, arr) => preContent += fs.readFileSync(baseDir + fileName, 'utf8'), ""); -(function (globalScope = {}) { - (function (globalScope = {}) { - eval(_baseCode); - })(); - })(); +const _baseCode = _pdfjsFiles.reduce( + (preContent, fileName, idx, arr) => + (preContent += fs.readFileSync(baseDir + fileName, 'utf8')), + '' +); +eval(_baseCode); ////////////////////////////////start of helper classes class PDFPageParser { - //static - static RenderingStates = { - INITIAL: 0, - RUNNING: 1, - PAUSED: 2, - FINISHED: 3 - }; - - //public - id = -1; - pdfPage = null; - ptiParser = null; - scale = 0; - viewport = null; - renderingState = -1; - - Fields = null; - Boxsets = null; - ctxCanvas = null; - - #_addField (field) { - if (!PDFField.isFormElement(field)) { - nodeUtil.p2jwarn("NOT valid form element", field); - return; - } - - const oneField = new PDFField(field, this.viewport, this.Fields, this.Boxsets); - oneField.processField(); - } - - // constructor - constructor(pdfPage, id, scale, ptiParser) { - // public, this instance copies - this.id = id; - this.pdfPage = pdfPage; - this.ptiParser = ptiParser; - - this.scale = scale || 1.0; - - //leave out the 2nd parameter in order to use page's default rotation (for both portrait and landscape form) - this.viewport = this.pdfPage.getViewport(this.scale); - - this.renderingState = PDFPageParser.RenderingStates.INITIAL; - - //form elements other than radio buttons and check boxes - this.Fields = []; - //form elements: radio buttons and check boxes - this.Boxsets = []; - this.ctxCanvas = {}; - } - - get width() { return PDFUnit.toFormX(this.viewport.width); } - get height() { return PDFUnit.toFormY(this.viewport.height); } - get HLines() { return this.ctxCanvas.HLines; } - get VLines() { return this.ctxCanvas.VLines; } - get Fills() { return this.ctxCanvas.Fills; } - get Texts() { return this.ctxCanvas.Texts; } - - destroy() { - this.pdfPage.destroy(); - this.pdfPage = null; - - this.ptiParser = null; - this.Fields = null; - this.Boxsets = null; - this.ctxCanvas = null; - } - - getPagePoint(x, y) { - return this.viewport.convertToPdfPoint(x, y); - } - - parsePage(callback, errorCallBack) { - if (this.renderingState !== PDFPageParser.RenderingStates.INITIAL) { - errorCallBack('Must be in new state before drawing'); - return; - } - - this.renderingState = PDFPageParser.RenderingStates.RUNNING; - - const canvas = createScratchCanvas(1, 1); - const ctx = canvas.getContext('2d'); - - function pageViewDrawCallback(error) { - this.renderingState = PDFPageParser.RenderingStates.FINISHED; - - if (error) { - console.error(error); - errorCallBack(`Error: Page ${this.id + 1}: ${error.message}`); - } - else { - if (this.ptiParser) { - const extraFields = this.ptiParser.getFields(parseInt(this.id) + 1); - extraFields.forEach( field => this.#_addField(field) ); - } - - this.ctxCanvas = ctx.canvas; - this.stats = this.pdfPage.stats; - - nodeUtil.p2jinfo(`Success: Page ${this.id + 1}`); - callback(); + //static + static RenderingStates = { + INITIAL: 0, + RUNNING: 1, + PAUSED: 2, + FINISHED: 3, + }; + + //public + id = -1; + pdfPage = null; + ptiParser = null; + scale = 0; + viewport = null; + renderingState = -1; + + Fields = null; + Boxsets = null; + ctxCanvas = null; + + #_addField(field) { + if (!PDFField.isFormElement(field)) { + nodeUtil.p2jwarn('NOT valid form element', field); + return; + } + + const oneField = new PDFField( + field, + this.viewport, + this.Fields, + this.Boxsets + ); + oneField.processField(); + } + + // constructor + constructor(pdfPage, id, scale, ptiParser) { + // public, this instance copies + this.id = id; + this.pdfPage = pdfPage; + this.ptiParser = ptiParser; + + this.scale = scale || 1.0; + + //leave out the 2nd parameter in order to use page's default rotation (for both portrait and landscape form) + this.viewport = this.pdfPage.getViewport(this.scale); + + this.renderingState = PDFPageParser.RenderingStates.INITIAL; + + //form elements other than radio buttons and check boxes + this.Fields = []; + //form elements: radio buttons and check boxes + this.Boxsets = []; + this.ctxCanvas = {}; + } + + get width() { + return PDFUnit.toFormX(this.viewport.width); + } + get height() { + return PDFUnit.toFormY(this.viewport.height); + } + get HLines() { + return this.ctxCanvas.HLines; + } + get VLines() { + return this.ctxCanvas.VLines; + } + get Fills() { + return this.ctxCanvas.Fills; + } + get Texts() { + return this.ctxCanvas.Texts; + } + + destroy() { + this.pdfPage.destroy(); + this.pdfPage = null; + + this.ptiParser = null; + this.Fields = null; + this.Boxsets = null; + this.ctxCanvas = null; + } + + getPagePoint(x, y) { + return this.viewport.convertToPdfPoint(x, y); + } + + parsePage(callback, errorCallBack) { + if (this.renderingState !== PDFPageParser.RenderingStates.INITIAL) { + errorCallBack('Must be in new state before drawing'); + return; + } + + this.renderingState = PDFPageParser.RenderingStates.RUNNING; + + const canvas = createScratchCanvas(1, 1); + const ctx = canvas.getContext('2d'); + + function pageViewDrawCallback(error) { + this.renderingState = PDFPageParser.RenderingStates.FINISHED; + + if (error) { + console.error(error); + errorCallBack(`Error: Page ${this.id + 1}: ${error.message}`); + } else { + if (this.ptiParser) { + const extraFields = this.ptiParser.getFields( + parseInt(this.id) + 1 + ); + extraFields.forEach((field) => this.#_addField(field)); } - } - - const renderContext = { - canvasContext:ctx, - viewport:this.viewport - }; - - this.pdfPage.render(renderContext).then( - data => { - this.pdfPage.getAnnotations().then( - fields => { - fields.forEach(field => this.#_addField(field)); - pageViewDrawCallback.call(this, null); - }, - err => errorCallBack("pdfPage.getAnnotations error:" + err)); - }, - err => pageViewDrawCallback.call(this, err) - ); - } + + this.ctxCanvas = ctx.canvas; + this.stats = this.pdfPage.stats; + + nodeUtil.p2jinfo(`Success: Page ${this.id + 1}`); + callback(); + } + } + + const renderContext = { + canvasContext: ctx, + viewport: this.viewport, + }; + + this.pdfPage.render(renderContext).then( + (data) => { + this.pdfPage.getAnnotations().then( + (fields) => { + fields.forEach((field) => this.#_addField(field)); + pageViewDrawCallback.call(this, null); + }, + (err) => errorCallBack('pdfPage.getAnnotations error:' + err) + ); + }, + (err) => pageViewDrawCallback.call(this, err) + ); + } } ////////////////////////////////Start of Node.js Module class PDFJSClass extends events.EventEmitter { - pdfDocument = null; - pages = null; - rawTextContents = null; - - needRawText = null; - - // constructor - constructor(needRawText) { - super(); - - // public, this instance copies - this.pdfDocument = null; - this.pages = []; - this.rawTextContents = []; - - this.needRawText = needRawText; - } - - raiseErrorEvent(errMsg) { - console.error(errMsg); - process.nextTick( () => this.emit("pdfjs_parseDataError", errMsg)); - // this.emit("error", errMsg); - return errMsg; - } - - raiseReadyEvent(data) { - process.nextTick( () => this.emit("pdfjs_parseDataReady", data) ); - return data; - } - - - parsePDFData(arrayBuffer, password) { - this.pdfDocument = null; - - const parameters = {password: password, data: arrayBuffer}; - PDFJS.getDocument(parameters).then( - pdfDocument => this.load(pdfDocument, 1), - error => this.raiseErrorEvent(error) - ); - }; - - tryLoadFieldInfoXML(pdfFilePath) { - const _sufInfo = "_fieldInfo.xml"; - const fieldInfoXMLPath = pdfFilePath.replace(".pdf", _sufInfo); - if ((fieldInfoXMLPath.indexOf(_sufInfo) < 1) || (!fs.existsSync(fieldInfoXMLPath))) { - return; - } - nodeUtil.p2jinfo("About to load fieldInfo XML : " + fieldInfoXMLPath); - - this.ptiParser = new PTIXmlParser(); - this.ptiParser.parseXml(fieldInfoXMLPath, err => { - if (err) { - nodeUtil.p2jwarn("fieldInfo XML Error: " + JSON.stringify(err)); - this.ptiParser = null; + pdfDocument = null; + pages = null; + rawTextContents = null; + + needRawText = null; + + // constructor + constructor(needRawText) { + super(); + + // public, this instance copies + this.pdfDocument = null; + this.pages = []; + this.rawTextContents = []; + + this.needRawText = needRawText; + } + + raiseErrorEvent(errMsg) { + console.error(errMsg); + process.nextTick(() => this.emit('pdfjs_parseDataError', errMsg)); + // this.emit("error", errMsg); + return errMsg; + } + + raiseReadyEvent(data) { + process.nextTick(() => this.emit('pdfjs_parseDataReady', data)); + return data; + } + + parsePDFData(arrayBuffer, password) { + this.pdfDocument = null; + + const parameters = { password: password, data: arrayBuffer }; + PDFJS.getDocument(parameters).then( + (pdfDocument) => this.load(pdfDocument, 1), + (error) => this.raiseErrorEvent(error) + ); + } + + tryLoadFieldInfoXML(pdfFilePath) { + const _sufInfo = '_fieldInfo.xml'; + const fieldInfoXMLPath = pdfFilePath.replace('.pdf', _sufInfo); + if ( + fieldInfoXMLPath.indexOf(_sufInfo) < 1 || + !fs.existsSync(fieldInfoXMLPath) + ) { + return; + } + nodeUtil.p2jinfo('About to load fieldInfo XML : ' + fieldInfoXMLPath); + + this.ptiParser = new PTIXmlParser(); + this.ptiParser.parseXml(fieldInfoXMLPath, (err) => { + if (err) { + nodeUtil.p2jwarn('fieldInfo XML Error: ' + JSON.stringify(err)); + this.ptiParser = null; + } else { + nodeUtil.p2jinfo('fieldInfo XML loaded.'); + } + }); + } + + load(pdfDocument, scale) { + this.pdfDocument = pdfDocument; + + return this.loadMetaData().then( + () => this.loadPages(), + (error) => this.raiseErrorEvent('loadMetaData error: ' + error) + ); + } + + loadMetaData() { + return this.pdfDocument.getMetadata().then( + (data) => { + this.documentInfo = data.info; + this.metadata = data.metadata?.metadata ?? {}; + this.parseMetaData(); + }, + (error) => + this.raiseErrorEvent('pdfDocument.getMetadata error: ' + error) + ); + } + + parseMetaData() { + const meta = { + Transcoder: _PARSER_SIG, + Meta: { ...this.documentInfo, Metadata: this.metadata }, + }; + this.raiseReadyEvent(meta); + this.emit('readable', meta); + } + + loadPages() { + const pagesCount = this.pdfDocument.numPages; + const pagePromises = []; + for (let i = 1; i <= pagesCount; i++) + pagePromises.push(this.pdfDocument.getPage(i)); + + const pagesPromise = PDFJS.Promise.all(pagePromises); + + nodeUtil.p2jinfo('PDF loaded. pagesCount = ' + pagesCount); + + return pagesPromise.then( + (promisedPages) => this.parsePage(promisedPages, 0, 1.5), + (error) => this.raiseErrorEvent('pagesPromise error: ' + error) + ); + } + + parsePage(promisedPages, id, scale) { + nodeUtil.p2jinfo('start to parse page:' + (id + 1)); + + const pdfPage = promisedPages[id]; + const pageParser = new PDFPageParser(pdfPage, id, scale, this.ptiParser); + + function continueOnNextPage() { + nodeUtil.p2jinfo('complete parsing page:' + (id + 1)); + if (id === this.pdfDocument.numPages - 1) { + this.raiseReadyEvent({ Pages: this.pages }); + //v1.1.2: signal end of parsed data with null + process.nextTick(() => this.raiseReadyEvent(null)); + this.emit('data', null); + } else { + process.nextTick(() => this.parsePage(promisedPages, ++id, scale)); + } + } + + pageParser.parsePage( + (data) => { + const page = { + Width: pageParser.width, + Height: pageParser.height, + HLines: pageParser.HLines, + VLines: pageParser.VLines, + Fills: pageParser.Fills, + //needs to keep current default output format, text content will output to a separate file if '-c' command line argument is set + // Content:pdfPage.getTextContent(), + Texts: pageParser.Texts, + Fields: pageParser.Fields, + Boxsets: pageParser.Boxsets, + }; + + this.pages.push(page); + this.emit('data', page); + + if (this.needRawText) { + pdfPage.getTextContent().then( + (textContent) => { + this.rawTextContents.push(textContent); + nodeUtil.p2jinfo( + 'complete parsing raw text content:' + (id + 1) + ); + continueOnNextPage.call(this); + }, + (error) => + this.raiseErrorEvent( + 'pdfPage.getTextContent error: ' + error + ) + ); + } else { + continueOnNextPage.call(this); } - else { - nodeUtil.p2jinfo("fieldInfo XML loaded."); + }, + (errMsg) => this.raiseErrorEvent(errMsg) + ); + } + + getRawTextContent() { + let retVal = ''; + if (!this.needRawText) return retVal; + + this.rawTextContents.forEach((textContent, index) => { + let prevText = null; + textContent.bidiTexts.forEach((textObj, idx) => { + if (prevText) { + if (Math.abs(textObj.y - prevText.y) <= 9) { + prevText.str += textObj.str; + } else { + retVal += prevText.str + '\r\n'; + prevText = textObj; + } + } else { + prevText = textObj; } - }); - } - - load(pdfDocument, scale) { - this.pdfDocument = pdfDocument; - - return this.loadMetaData().then( - () => this.loadPages(), - error => this.raiseErrorEvent("loadMetaData error: " + error) - ); - } - - loadMetaData() { - return this.pdfDocument.getMetadata().then( - data => { - this.documentInfo = data.info; - this.metadata = data.metadata?.metadata ?? {}; - this.parseMetaData(); - }, - error => this.raiseErrorEvent("pdfDocument.getMetadata error: " + error) - ); - } - - parseMetaData() { - const meta = {Transcoder: _PARSER_SIG, Meta: {...this.documentInfo, Metadata: this.metadata}}; - this.raiseReadyEvent(meta); - this.emit("readable", meta); - } - - loadPages() { - const pagesCount = this.pdfDocument.numPages; - const pagePromises = []; - for (let i = 1; i <= pagesCount; i++) - pagePromises.push(this.pdfDocument.getPage(i)); - - const pagesPromise = PDFJS.Promise.all(pagePromises); - - nodeUtil.p2jinfo("PDF loaded. pagesCount = " + pagesCount); - - return pagesPromise.then( - promisedPages => this.parsePage(promisedPages, 0, 1.5), - error => this.raiseErrorEvent("pagesPromise error: " + error) - ); - } - - parsePage(promisedPages, id, scale) { - nodeUtil.p2jinfo("start to parse page:" + (id+1)); - - const pdfPage = promisedPages[id]; - const pageParser = new PDFPageParser(pdfPage, id, scale, this.ptiParser); - - function continueOnNextPage() { - nodeUtil.p2jinfo("complete parsing page:" + (id+1)); - if (id === (this.pdfDocument.numPages - 1) ) { - this.raiseReadyEvent({Pages:this.pages}); - //v1.1.2: signal end of parsed data with null - process.nextTick(() => this.raiseReadyEvent(null)); - this.emit("data", null); + }); + if (prevText) { + retVal += prevText.str; + } + retVal += + '\r\n----------------Page (' + + index + + ') Break----------------\r\n'; + }); + + return retVal; + } + + getAllFieldsTypes() { + return PDFField.getAllFieldsTypes({ Pages: this.pages || [] }); + } + + getMergedTextBlocksIfNeeded() { + for (let p = 0; p < this.pages.length; p++) { + let prevText = null; + let page = this.pages[p]; + + page.Texts.sort(PDFFont.compareBlockPos); + page.Texts = page.Texts.filter((t, j) => { + let isDup = + j > 0 && PDFFont.areDuplicateBlocks(page.Texts[j - 1], t); + if (isDup) { + nodeUtil.p2jinfo( + 'skipped: dup text block: ' + decodeURIComponent(t.R[0].T) + ); } - else { - process.nextTick(() => this.parsePage(promisedPages, ++id, scale)); + return !isDup; + }); + + for (let i = 0; i < page.Texts.length; i++) { + let text = page.Texts[i]; + + if (prevText) { + if ( + PDFFont.areAdjacentBlocks(prevText, text) && + PDFFont.haveSameStyle(prevText, text) + ) { + let preT = decodeURIComponent(prevText.R[0].T); + let curT = decodeURIComponent(text.R[0].T); + + prevText.R[0].T += text.R[0].T; + prevText.w += text.w; + text.merged = true; + + let mergedText = decodeURIComponent(prevText.R[0].T); + nodeUtil.p2jinfo( + `merged text block: ${preT} + ${curT} => ${mergedText}` + ); + prevText = null; //yeah, only merge two blocks for now + } else { + prevText = text; + } + } else { + prevText = text; } - } - - pageParser.parsePage( - data => { - const page = { - Width: pageParser.width, - Height: pageParser.height, - HLines: pageParser.HLines, - VLines: pageParser.VLines, - Fills: pageParser.Fills, - //needs to keep current default output format, text content will output to a separate file if '-c' command line argument is set - // Content:pdfPage.getTextContent(), - Texts: pageParser.Texts, - Fields: pageParser.Fields, - Boxsets: pageParser.Boxsets - }; - - this.pages.push(page); - this.emit("data", page); - - if (this.needRawText) { - pdfPage.getTextContent().then( - textContent => { - this.rawTextContents.push(textContent); - nodeUtil.p2jinfo("complete parsing raw text content:" + (id+1)); - continueOnNextPage.call(this); - }, - error => this.raiseErrorEvent("pdfPage.getTextContent error: " + error) - ); - } - else { - continueOnNextPage.call(this); - } - }, - errMsg => this.raiseErrorEvent(errMsg) - ); - } - - getRawTextContent() { - let retVal = ""; - if (!this.needRawText) - return retVal; - - this.rawTextContents.forEach( (textContent, index) => { - let prevText = null; - textContent.bidiTexts.forEach( (textObj, idx) => { - if (prevText) { - if (Math.abs(textObj.y - prevText.y) <= 9) { - prevText.str += textObj.str; - } - else { - retVal += prevText.str + "\r\n"; - prevText = textObj; - } - } - else { - prevText = textObj; - } - - }); - if (prevText) { - retVal += prevText.str; - } - retVal += "\r\n----------------Page (" + index + ") Break----------------\r\n"; - }); - - return retVal; - } - - getAllFieldsTypes() { - return PDFField.getAllFieldsTypes({Pages:this.pages || []}); - } - - getMergedTextBlocksIfNeeded() { - for (let p = 0; p < this.pages.length; p++) { - let prevText = null; - let page = this.pages[p]; - - page.Texts.sort(PDFFont.compareBlockPos); - page.Texts = page.Texts.filter( (t, j) => { - let isDup = (j > 0) && PDFFont.areDuplicateBlocks(page.Texts[j-1], t); - if (isDup) { - nodeUtil.p2jinfo("skipped: dup text block: " + decodeURIComponent(t.R[0].T)); - } - return !isDup; - }); - - for (let i = 0; i < page.Texts.length; i++) { - let text = page.Texts[i]; - - if (prevText) { - if (PDFFont.areAdjacentBlocks(prevText, text) && PDFFont.haveSameStyle(prevText, text)) { - let preT = decodeURIComponent(prevText.R[0].T); - let curT = decodeURIComponent(text.R[0].T); - - prevText.R[0].T += text.R[0].T; - prevText.w += text.w; - text.merged = true; - - let mergedText = decodeURIComponent(prevText.R[0].T); - nodeUtil.p2jinfo(`merged text block: ${preT} + ${curT} => ${mergedText}`); - prevText = null; //yeah, only merge two blocks for now - } - else { - prevText = text; - } - } - else { - prevText = text; - } - } - - page.Texts = page.Texts.filter( t => !t.merged); - } - - return {Pages:this.pages}; - } - - destroy() { - this.removeAllListeners(); - - if (this.pdfDocument) - this.pdfDocument.destroy(); - this.pdfDocument = null; - - this.pages = null; - this.rawTextContents = null; - } + } + + page.Texts = page.Texts.filter((t) => !t.merged); + } + + return { Pages: this.pages }; + } + + destroy() { + this.removeAllListeners(); + + if (this.pdfDocument) this.pdfDocument.destroy(); + this.pdfDocument = null; + this.pages = null; + this.rawTextContents = null; + } } class ParserStream extends stream.Transform { - static createContentStream(jsonObj) { - const rStream = new stream.Readable({objectMode: true}); - rStream.push(jsonObj); - rStream.push(null); - return rStream; - } - - static createOutputStream(outputPath, resolve, reject) { - const outputStream = fs.createWriteStream(outputPath); - outputStream.on('finish', () => resolve(outputPath)); - outputStream.on('error', err => reject(err) ); - return outputStream; - } - - #pdfParser = null; - #chunks = []; - #parsedData = {Pages:[]}; - #_flush_callback = null; - - constructor(pdfParser, options) { - super(options); - this.#pdfParser = pdfParser; - - this.#chunks = []; - - // this.#pdfParser.on("pdfParser_dataReady", evtData => { - // this.push(evtData); - // this.#_flush_callback(); - // this.emit('end', null); - // }); - this.#pdfParser.on("readable", meta => this.#parsedData = {...meta, Pages:[]}); - this.#pdfParser.on("data", page => { - if (!page) { - this.push(this.#parsedData); - this.#_flush_callback(); - } - else - this.#parsedData.Pages.push(page); - }); - } - - //implements transform stream - _transform(chunk, enc, callback) { - this.#chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk, enc)); - callback(); - } - - _flush(callback) { - this.#_flush_callback = callback; - this.#pdfParser.parseBuffer(Buffer.concat(this.#chunks)); - } - - _destroy() { - super.removeAllListeners(); - this.#pdfParser = null; - this.#chunks = []; - } + static createContentStream(jsonObj) { + const rStream = new stream.Readable({ objectMode: true }); + rStream.push(jsonObj); + rStream.push(null); + return rStream; + } + + static createOutputStream(outputPath, resolve, reject) { + const outputStream = fs.createWriteStream(outputPath); + outputStream.on('finish', () => resolve(outputPath)); + outputStream.on('error', (err) => reject(err)); + return outputStream; + } + + #pdfParser = null; + #chunks = []; + #parsedData = { Pages: [] }; + #_flush_callback = null; + + constructor(pdfParser, options) { + super(options); + this.#pdfParser = pdfParser; + + this.#chunks = []; + + // this.#pdfParser.on("pdfParser_dataReady", evtData => { + // this.push(evtData); + // this.#_flush_callback(); + // this.emit('end', null); + // }); + this.#pdfParser.on( + 'readable', + (meta) => (this.#parsedData = { ...meta, Pages: [] }) + ); + this.#pdfParser.on('data', (page) => { + if (!page) { + this.push(this.#parsedData); + this.#_flush_callback(); + } else this.#parsedData.Pages.push(page); + }); + } + + //implements transform stream + _transform(chunk, enc, callback) { + this.#chunks.push( + Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk, enc) + ); + callback(); + } + + _flush(callback) { + this.#_flush_callback = callback; + this.#pdfParser.parseBuffer(Buffer.concat(this.#chunks)); + } + + _destroy() { + super.removeAllListeners(); + this.#pdfParser = null; + this.#chunks = []; + } } -class PDFParser extends events.EventEmitter { // inherit from event emitter - //public static - static get colorDict() {return kColors; } - static get fontFaceDict() { return kFontFaces; } - static get fontStyleDict() { return kFontStyles; } - - //private static - static #maxBinBufferCount = 10; - static #binBuffer = {}; - - //private - #password = ""; - - #context = null; // service context object, only used in Web Service project; null in command line - - #pdfFilePath = null; //current PDF file to load and parse, null means loading/parsing not started - #pdfFileMTime = null; // last time the current pdf was modified, used to recognize changes and ignore cache - #data = null; //if file read success, data is PDF content; if failed, data is "err" object - #PDFJS = null; //will be initialized in constructor - #processFieldInfoXML = false;//disable additional _fieldInfo.xml parsing and merging (do NOT set to true) - - // constructor - constructor(context, needRawText, password) { - //call constructor for super class - super(); - - // private - // service context object, only used in Web Service project; null in command line - this.#context = context; - - this.#pdfFilePath = null; //current PDF file to load and parse, null means loading/parsing not started - this.#pdfFileMTime = null; // last time the current pdf was modified, used to recognize changes and ignore cache - this.#data = null; //if file read success, data is PDF content; if failed, data is "err" object - this.#processFieldInfoXML = false;//disable additional _fieldInfo.xml parsing and merging (do NOT set to true) - - this.#PDFJS = new PDFJSClass(needRawText); - this.#password = password; - } - - //private methods, needs to invoked by [funcName].call(this, ...) - #onPDFJSParseDataReady(data) { - if (!data) { //v1.1.2: data===null means end of parsed data - nodeUtil.p2jinfo("PDF parsing completed."); - this.emit("pdfParser_dataReady", this.#data); - } - else { - this.#data = {...this.#data, ...data}; - } - } - - #onPDFJSParserDataError(err) { - this.#data = null; - this.emit("pdfParser_dataError", {"parserError": err}); - // this.emit("error", err); - } - - #startParsingPDF(buffer) { - this.#data = {}; - - this.#PDFJS.on("pdfjs_parseDataReady", data => this.#onPDFJSParseDataReady(data)); - this.#PDFJS.on("pdfjs_parseDataError", err => this.#onPDFJSParserDataError(err)); - - //v1.3.0 the following Readable Stream-like events are replacement for the top two custom events - this.#PDFJS.on("readable", meta => this.emit("readable", meta)); - this.#PDFJS.on("data", data => this.emit("data", data)); - this.#PDFJS.on("error", err => this.#onPDFJSParserDataError(err)); - - this.#PDFJS.parsePDFData(buffer || PDFParser.#binBuffer[this.binBufferKey], this.#password); - } - - #processBinaryCache() { - if (this.binBufferKey in PDFParser.#binBuffer) { - this.#startParsingPDF(); - return true; - } - - const allKeys = Object.keys(PDFParser.#binBuffer); - if (allKeys.length > PDFParser.#maxBinBufferCount) { - const idx = this.id % PDFParser.#maxBinBufferCount; - const key = allKeys[idx]; - PDFParser.#binBuffer[key] = null; - delete PDFParser.#binBuffer[key]; - - nodeUtil.p2jinfo("re-cycled cache for " + key); - } - - return false; - } - - //public getter - get data() { return this.#data; } - get binBufferKey() { return this.#pdfFilePath + this.#pdfFileMTime; } - - //public APIs - createParserStream() { - return new ParserStream(this, {objectMode: true, bufferSize: 64 * 1024}); - } - - async loadPDF(pdfFilePath, verbosity) { - nodeUtil.verbosity(verbosity || 0); - nodeUtil.p2jinfo("about to load PDF file " + pdfFilePath); - - this.#pdfFilePath = pdfFilePath; - - try { - this.#pdfFileMTime = fs.statSync(pdfFilePath).mtimeMs; - if (this.#processFieldInfoXML) { - this.#PDFJS.tryLoadFieldInfoXML(pdfFilePath); - } +class StringifyStream extends stream.Transform { + constructor(options) { + super(options); + + this._readableState.objectMode = false; + this._writableState.objectMode = true; + } + + _transform(obj, encoding, callback) { + this.push(JSON.stringify(obj)); + callback(); + } +} - if (this.#processBinaryCache()) - return; - - PDFParser.#binBuffer[this.binBufferKey] = await promises.readFile(pdfFilePath); - nodeUtil.p2jinfo(`Load OK: ${pdfFilePath}`); - this.#startParsingPDF(); - } - catch(err) { - nodeUtil.p2jerror(`Load Failed: ${pdfFilePath} - ${err}`); - this.emit("pdfParser_dataError", err); - } - } - - // Introduce a way to directly process buffers without the need to write it to a temporary file - parseBuffer(pdfBuffer) { - this.#startParsingPDF(pdfBuffer); - } - - getRawTextContent() { return this.#PDFJS.getRawTextContent(); } - getRawTextContentStream() { return ParserStream.createContentStream(this.getRawTextContent()); } - - getAllFieldsTypes() { return this.#PDFJS.getAllFieldsTypes(); }; - getAllFieldsTypesStream() { return ParserStream.createContentStream(this.getAllFieldsTypes()); } - - getMergedTextBlocksIfNeeded() { return this.#PDFJS.getMergedTextBlocksIfNeeded(); } - getMergedTextBlocksStream() { return ParserStream.createContentStream(this.getMergedTextBlocksIfNeeded()) } - - destroy() { // invoked with stream transform process - super.removeAllListeners(); - - //context object will be set in Web Service project, but not in command line utility - if (this.#context) { - this.#context.destroy(); - this.#context = null; - } - - this.#pdfFilePath = null; - this.#pdfFileMTime = null; - this.#data = null; - this.#processFieldInfoXML = false;//disable additional _fieldInfo.xml parsing and merging (do NOT set to true) - - this.#PDFJS.destroy(); - this.#PDFJS = null; - } +class PDFParser extends events.EventEmitter { + // inherit from event emitter + //public static + static get colorDict() { + return kColors; + } + static get fontFaceDict() { + return kFontFaces; + } + static get fontStyleDict() { + return kFontStyles; + } + + //private static + static #maxBinBufferCount = 10; + static #binBuffer = {}; + + //private + #password = ''; + + #context = null; // service context object, only used in Web Service project; null in command line + + #pdfFilePath = null; //current PDF file to load and parse, null means loading/parsing not started + #pdfFileMTime = null; // last time the current pdf was modified, used to recognize changes and ignore cache + #data = null; //if file read success, data is PDF content; if failed, data is "err" object + #PDFJS = null; //will be initialized in constructor + #processFieldInfoXML = false; //disable additional _fieldInfo.xml parsing and merging (do NOT set to true) + + // constructor + constructor(context, needRawText, password) { + //call constructor for super class + super(); + + // private + // service context object, only used in Web Service project; null in command line + this.#context = context; + + this.#pdfFilePath = null; //current PDF file to load and parse, null means loading/parsing not started + this.#pdfFileMTime = null; // last time the current pdf was modified, used to recognize changes and ignore cache + this.#data = null; //if file read success, data is PDF content; if failed, data is "err" object + this.#processFieldInfoXML = false; //disable additional _fieldInfo.xml parsing and merging (do NOT set to true) + + this.#PDFJS = new PDFJSClass(needRawText); + this.#password = password; + } + + //private methods, needs to invoked by [funcName].call(this, ...) + #onPDFJSParseDataReady(data) { + if (!data) { + //v1.1.2: data===null means end of parsed data + nodeUtil.p2jinfo('PDF parsing completed.'); + this.emit('pdfParser_dataReady', this.#data); + } else { + this.#data = { ...this.#data, ...data }; + } + } + + #onPDFJSParserDataError(err) { + this.#data = null; + this.emit('pdfParser_dataError', { parserError: err }); + // this.emit("error", err); + } + + #startParsingPDF(buffer) { + this.#data = {}; + + this.#PDFJS.on('pdfjs_parseDataReady', (data) => + this.#onPDFJSParseDataReady(data) + ); + this.#PDFJS.on('pdfjs_parseDataError', (err) => + this.#onPDFJSParserDataError(err) + ); + + //v1.3.0 the following Readable Stream-like events are replacement for the top two custom events + this.#PDFJS.on('readable', (meta) => this.emit('readable', meta)); + this.#PDFJS.on('data', (data) => this.emit('data', data)); + this.#PDFJS.on('error', (err) => this.#onPDFJSParserDataError(err)); + + this.#PDFJS.parsePDFData( + buffer || PDFParser.#binBuffer[this.binBufferKey], + this.#password + ); + } + + #processBinaryCache() { + if (this.binBufferKey in PDFParser.#binBuffer) { + this.#startParsingPDF(); + return true; + } + + const allKeys = Object.keys(PDFParser.#binBuffer); + if (allKeys.length > PDFParser.#maxBinBufferCount) { + const idx = this.id % PDFParser.#maxBinBufferCount; + const key = allKeys[idx]; + PDFParser.#binBuffer[key] = null; + delete PDFParser.#binBuffer[key]; + + nodeUtil.p2jinfo('re-cycled cache for ' + key); + } + + return false; + } + + //public getter + get data() { + return this.#data; + } + get binBufferKey() { + return this.#pdfFilePath + this.#pdfFileMTime; + } + + //public APIs + createParserStream() { + return new ParserStream(this, { + objectMode: true, + bufferSize: 64 * 1024, + }); + } + + async loadPDF(pdfFilePath, verbosity) { + nodeUtil.verbosity(verbosity || 0); + nodeUtil.p2jinfo('about to load PDF file ' + pdfFilePath); + + this.#pdfFilePath = pdfFilePath; + + try { + this.#pdfFileMTime = fs.statSync(pdfFilePath).mtimeMs; + if (this.#processFieldInfoXML) { + this.#PDFJS.tryLoadFieldInfoXML(pdfFilePath); + } + + if (this.#processBinaryCache()) return; + + PDFParser.#binBuffer[this.binBufferKey] = await promises.readFile( + pdfFilePath + ); + nodeUtil.p2jinfo(`Load OK: ${pdfFilePath}`); + this.#startParsingPDF(); + } catch (err) { + nodeUtil.p2jerror(`Load Failed: ${pdfFilePath} - ${err}`); + this.emit('pdfParser_dataError', err); + } + } + + // Introduce a way to directly process buffers without the need to write it to a temporary file + parseBuffer(pdfBuffer) { + this.#startParsingPDF(pdfBuffer); + } + + getRawTextContent() { + return this.#PDFJS.getRawTextContent(); + } + getRawTextContentStream() { + return ParserStream.createContentStream(this.getRawTextContent()); + } + + getAllFieldsTypes() { + return this.#PDFJS.getAllFieldsTypes(); + } + getAllFieldsTypesStream() { + return ParserStream.createContentStream(this.getAllFieldsTypes()); + } + + getMergedTextBlocksIfNeeded() { + return this.#PDFJS.getMergedTextBlocksIfNeeded(); + } + getMergedTextBlocksStream() { + return ParserStream.createContentStream( + this.getMergedTextBlocksIfNeeded() + ); + } + + destroy() { + // invoked with stream transform process + super.removeAllListeners(); + + //context object will be set in Web Service project, but not in command line utility + if (this.#context) { + this.#context.destroy(); + this.#context = null; + } + + this.#pdfFilePath = null; + this.#pdfFileMTime = null; + this.#data = null; + this.#processFieldInfoXML = false; //disable additional _fieldInfo.xml parsing and merging (do NOT set to true) + + this.#PDFJS.destroy(); + this.#PDFJS = null; + } } module.exports = PDFParser; diff --git a/pdfparser.mjs b/pdfparser.mjs deleted file mode 100644 index d80caaf1..00000000 --- a/pdfparser.mjs +++ /dev/null @@ -1,2196 +0,0 @@ -import fs from 'fs'; -import nodeUtil from 'util'; -import { readFile } from 'fs/promises'; -import { EventEmitter } from 'events'; -import path from 'path'; -import { fileURLToPath } from 'url'; -import 'buffer'; -import { DOMParser } from '@xmldom/xmldom'; -import { Transform, Readable } from 'stream'; - -const kColors = [ - '#000000', // 0 - '#ffffff', // 1 - '#4c4c4c', // 2 - '#808080', // 3 - '#999999', // 4 - '#c0c0c0', // 5 - '#cccccc', // 6 - '#e5e5e5', // 7 - '#f2f2f2', // 8 - '#008000', // 9 - '#00ff00', // 10 - '#bfffa0', // 11 - '#ffd629', // 12 - '#ff99cc', // 13 - '#004080', // 14 - '#9fc0e1', // 15 - '#5580ff', // 16 - '#a9c9fa', // 17 - '#ff0080', // 18 - '#800080', // 19 - '#ffbfff', // 20 - '#e45b21', // 21 - '#ffbfaa', // 22 - '#008080', // 23 - '#ff0000', // 24 - '#fdc59f', // 25 - '#808000', // 26 - '#bfbf00', // 27 - '#824100', // 28 - '#007256', // 29 - '#008000', // 30 - '#000080', // Last + 1 - '#008080', // Last + 2 - '#800080', // Last + 3 - '#ff0000', // Last + 4 - '#0000ff', // Last + 5 - '#008000' // Last + 6 -]; - -const kFontFaces = [ - "quicktype,arial,helvetica,sans-serif", // 00 - QuickType - sans-serif variable font - "quicktype condensed,arial narrow,arial,helvetica,sans-serif", // 01 - QuickType Condensed - thin sans-serif variable font - "quicktypepi,quicktypeiipi", // 02 - QuickType Pi - "quicktype mono,courier new,courier,monospace", // 03 - QuickType Mono - san-serif fixed font - "ocr-a,courier new,courier,monospace", // 04 - OCR-A - OCR readable san-serif fixed font - "ocr b mt,courier new,courier,monospace" // 05 - OCR-B MT - OCR readable san-serif fixed font - ]; - - const kFontStyles = [ - // Face Size Bold Italic StyleID(Comment) - // ----- ---- ---- ----- ----------------- - [0, 6, 0, 0], //00 - [0, 8, 0, 0], //01 - [0, 10, 0, 0], //02 - [0, 12, 0, 0], //03 - [0, 14, 0, 0], //04 - [0, 18, 0, 0], //05 - [0, 6, 1, 0], //06 - [0, 8, 1, 0], //07 - [0, 10, 1, 0], //08 - [0, 12, 1, 0], //09 - [0, 14, 1, 0], //10 - [0, 18, 1, 0], //11 - [0, 6, 0, 1], //12 - [0, 8, 0, 1], //13 - [0, 10, 0, 1], //14 - [0, 12, 0, 1], //15 - [0, 14, 0, 1], //16 - [0, 18, 0, 1], //17 - [0, 6, 1, 1], //18 - [0, 8, 1, 1], //19 - [0, 10, 1, 1], //20 - [0, 12, 1, 1], //21 - [0, 14, 1, 1], //22 - [0, 18, 1, 1], //23 - [1, 6, 0, 0], //24 - [1, 8, 0, 0], //25 - [1, 10, 0, 0], //26 - [1, 12, 0, 0], //27 - [1, 14, 0, 0], //28 - [1, 18, 0, 0], //29 - [1, 6, 1, 0], //30 - [1, 8, 1, 0], //31 - [1, 10, 1, 0], //32 - [1, 12, 1, 0], //33 - [1, 14, 1, 0], //34 - [1, 18, 1, 0], //35 - [1, 6, 0, 1], //36 - [1, 8, 0, 1], //37 - [1, 10, 0, 1], //38 - [1, 12, 0, 1], //39 - [1, 14, 0, 1], //40 - [1, 18, 0, 1], //41 - [2, 8, 0, 0], //42 - [2, 10, 0, 0], //43 - [2, 12, 0, 0], //44 - [2, 14, 0, 0], //45 - [2, 18, 0, 0], //46 - [3, 8, 0, 0], //47 - [3, 10, 0, 0], //48 - [3, 12, 0, 0], //49 - [4, 12, 0, 0], //50 - [0, 9, 0, 0], //51 - [0, 9, 1, 0], //52 - [0, 9, 0, 1], //53 - [0, 9, 1, 1], //54 - [1, 9, 0, 0], //55 - [1, 9, 1, 0], //56 - [1, 9, 1, 1], //57 - [4, 10, 0, 0], //58 - [5, 10, 0, 0], //59 - [5, 12, 0, 0] //60 -]; - -const dpi = 96.0; -const gridXPerInch = 4.0; -const gridYPerInch = 4.0; - -const _pixelXPerGrid = dpi/gridXPerInch; -const _pixelYPerGrid = dpi/gridYPerInch; -const _pixelPerPoint = dpi/72; - -class PDFUnit { - static toFixedFloat(fNum) { - return parseFloat(fNum.toFixed(3)); - } - - static colorCount() { - return kColors.length; - } - - static toPixelX(formX) { - return Math.round(formX * _pixelXPerGrid); - } - - static toPixelY(formY) { - return Math.round(formY * _pixelYPerGrid); - } - - static pointToPixel(point) {// Point unit (1/72 an inch) to pixel units - return point * _pixelPerPoint; - } - - static getColorByIndex(clrId) { - return kColors[clrId]; - } - - static toFormPoint(viewportX, viewportY) { - return [(viewportX / _pixelXPerGrid), (viewportY / _pixelYPerGrid)]; - } - - static toFormX(viewportX) { - return PDFUnit.toFixedFloat(viewportX / _pixelXPerGrid); - } - - static toFormY(viewportY) { - return PDFUnit.toFixedFloat(viewportY / _pixelYPerGrid); - } - - static findColorIndex(color) { - if (color.length === 4) - color += "000"; - //MQZ. 07/29/2013: if color is not in dictionary, just return -1. The caller (pdffont, pdffill) will set the actual color - return kColors.indexOf(color); - } - - static dateToIso8601(date) { - // PDF spec p.160 - if (date.slice(0, 2) === 'D:') { // D: prefix is optional - date = date.slice(2); - } - let tz = 'Z'; - let idx = date.search(/[Z+-]/); // timezone is optional - if (idx >= 0) { - tz = date.slice(idx); - if (tz !== 'Z') { // timezone format OHH'mm' - tz = tz.slice(0, 3) + ':' + tz.slice(4, 6); - } - date = date.slice(0, idx); - } - let yr = date.slice(0, 4); // everything after year is optional - let mth = date.slice(4, 6) || '01'; - let day = date.slice(6, 8) || '01'; - let hr = date.slice(8, 10) || '00'; - let min = date.slice(10, 12) || '00'; - let sec = date.slice(12, 14) || '00'; - return yr + '-' + mth + '-' + day + 'T' + hr + ':' + min + ':' + sec + tz; - } -} - -class PDFLine { - constructor(x1, y1, x2, y2, lineWidth, color, dashed) { - this.x1 = x1; - this.y1 = y1; - this.x2 = x2; - this.y2 = y2; - this.lineWidth = lineWidth || 1.0; - this.color = color; - this.dashed = dashed; - } - - #setStartPoint(oneLine, x, y) { - oneLine.x = PDFUnit.toFormX(x); - oneLine.y = PDFUnit.toFormY(y); - } - - processLine(targetData) { - const xDelta = Math.abs(this.x2 - this.x1); - const yDelta = Math.abs(this.y2 - this.y1); - const minDelta = this.lineWidth; - - let oneLine = {x:0, y:0, w: PDFUnit.toFixedFloat(this.lineWidth), l:0}; - - //MQZ Aug.28.2013, adding color support, using color dictionary and default to black - const clrId = PDFUnit.findColorIndex(this.color); - const colorObj = (clrId > 0 && clrId < PDFUnit.colorCount()) ? {clr: clrId} : {oc: this.color}; - oneLine = {...oneLine, ...colorObj}; - - //MQZ Aug.29 dashed line support - if (this.dashed) { - oneLine = oneLine = {...oneLine, dsh: 1}; - } - - if ((yDelta < this.lineWidth) && (xDelta > minDelta)) { //HLine - if (this.lineWidth < 4 && (xDelta / this.lineWidth < 4)) { - nodeUtil.p2jinfo("Skipped: short thick HLine: lineWidth = " + this.lineWidth + ", xDelta = " + xDelta); - return; //skip short thick lines, like PA SPP lines behinds checkbox - } - - oneLine.l = PDFUnit.toFormX(xDelta); - if (this.x1 > this.x2) - this.#setStartPoint(oneLine, this.x2, this.y2); - else - this.#setStartPoint(oneLine, this.x1, this.y1); - targetData.HLines.push(oneLine); - } - else if ((xDelta < this.lineWidth) && (yDelta > minDelta)) {//VLine - if (this.lineWidth < 4 && (yDelta / this.lineWidth < 4)) { - nodeUtil.p2jinfo("Skipped: short thick VLine: lineWidth = " + this.lineWidth + ", yDelta = " + yDelta); - return; //skip short think lines, like PA SPP lines behinds checkbox - } - - oneLine.l = PDFUnit.toFormY(yDelta); - if (this.y1 > this.y2) - this.#setStartPoint(oneLine, this.x2, this.y2); - else - this.#setStartPoint(oneLine, this.x1, this.y1); - targetData.VLines.push(oneLine); - } - } -} - -class PDFFill{ - // constructor - constructor(x, y, width, height, color) { - this.x = x; - this.y = y; - this.width = width; - this.height = height; - this.color = color; - } - - processFill(targetData) { - //MQZ.07/29/2013: when color is not in color dictionary, set the original color (oc) - const clrId = PDFUnit.findColorIndex(this.color); - const colorObj = (clrId > 0 && clrId < PDFUnit.colorCount()) ? {clr: clrId} : {oc: this.color}; - - const oneFill = {x:PDFUnit.toFormX(this.x), - y:PDFUnit.toFormY(this.y), - w:PDFUnit.toFormX(this.width), - h:PDFUnit.toFormY(this.height), - ...colorObj}; - - - if (oneFill.w < 2 && oneFill.h < 2) { - nodeUtil.p2jinfo("Skipped: tiny fill: " + oneFill.w + " x " + oneFill.h); - return; //skip short thick lines, like PA SPP lines behinds checkbox - } - - targetData.Fills.push(oneFill); - } -} - -const _boldSubNames = ["bd", "bold", "demi", "black"]; -const _stdFonts = ["arial", "helvetica", "sans-serif ", "courier ","monospace ", "ocr "]; -const DISTANCE_DELTA = 0.1; - -class PDFFont { - #initTypeName() { - let typeName = (this.fontObj.name || this.fontObj.fallbackName); - if (!typeName) { - typeName = kFontFaces[0]; //default font family name - } - typeName = typeName.toLowerCase(); - return typeName; - } - - #initSubType() { - let subType = this.typeName; - let bold = false; - - let nameArray = this.typeName.split('+'); - if (Array.isArray(nameArray) && nameArray.length > 1) { - subType = nameArray[1].split("-"); - if (Array.isArray(subType) && subType.length > 1) { - let subName = subType[1].toLowerCase(); - bold = _boldSubNames.indexOf(subName) >= 0; - subType = subType[0]; - } - } - return {subType, bold}; - } - - #initSymbol() { - let isSymbol = this.typeName.indexOf("symbol") > 0 || kFontFaces[2].indexOf(this.subType) >= 0; - if (this.fontObj.isSymbolicFont) { - let mFonts = _stdFonts.filter( (oneName) => (this.typeName.indexOf(oneName) >= 0) ); - - if (mFonts.length > 0) { - this.fontObj.isSymbolicFont = false; //lots of Arial-based font is detected as symbol in VA forms (301, 76-c, etc.) reset the flag for now - nodeUtil.p2jinfo("Reset: isSymbolicFont (false) for " + this.fontObj.name); - } - } - else { - if (isSymbol) { - this.fontObj.isSymbolicFont = true; //text pdf: va_ind_760c - nodeUtil.p2jinfo("Reset: isSymbolicFont (true) for " + this.fontObj.name); - } - } - return isSymbol; - } - - #initSpaceWidth() { - let spaceWidth = this.fontObj.spaceWidth; - if (!spaceWidth) { - var spaceId = Array.isArray(this.fontObj.toFontChar) ? this.fontObj.toFontChar.indexOf(32) : -1; - spaceWidth = (spaceId >= 0 && Array.isArray(this.fontObj.widths)) ? this.fontObj.widths[spaceId] : 250; - } - spaceWidth = PDFUnit.toFormX(spaceWidth) / 32; - return spaceWidth; - } - - // constructor - constructor(fontObj) { - this.fontObj = fontObj; - - this.typeName = this.#initTypeName(); - - const {subType, bold} = this.#initSubType(); - this.subType = subType; - this.bold = bold; - - this.isSymbol = this.#initSymbol(); - this.spaceWidth = this.#initSpaceWidth(); - - this.fontSize = 1; - this.faceIdx = 0; - this.italic = false; - this.fontStyleId = -1; - } - - /** sort text blocks by y then x */ - static compareBlockPos(t1, t2) { - if (t1.y < t2.y - DISTANCE_DELTA) { - return -1; - } - if (Math.abs(t1.y - t2.y) <= DISTANCE_DELTA) { - if (t1.x < t2.x - DISTANCE_DELTA) { - return -1; - } - if (Math.abs(t1.x - t2.x) <= DISTANCE_DELTA) { - return 0; - } - } - return 1; - } - - static haveSameStyle(t1, t2) { - let retVal = t1.R[0].S === t2.R[0].S; - if (retVal && t1.R[0].S < 0) { - for (let i = 0; i < t1.R[0].TS.length; i++) { - if (t1.R[0].TS[i] !== t2.R[0].TS[i]) { - retVal = false; - break; - } - } - } - if (retVal) { // make sure both block are not rotated - retVal = (typeof t1.R[0].RA === 'undefined') && (typeof t2.R[0].RA === 'undefined'); - } - - return retVal; - } - - static getSpaceThreshHold(t1) { - return (PDFFont.getFontSize(t1)/12) * t1.sw; - } - - static areAdjacentBlocks(t1, t2) { - const isInSameLine = Math.abs(t1.y - t2.y) <= DISTANCE_DELTA; - const isDistanceSmallerThanASpace = ((t2.x - t1.x - t1.w) < PDFFont.getSpaceThreshHold(t1)); - - return isInSameLine && isDistanceSmallerThanASpace; - } - - static getFontSize(textBlock) { - const sId = textBlock.R[0].S; - return (sId < 0) ? textBlock.R[0].TS[1] : kFontStyles[sId][1]; - } - - static areDuplicateBlocks(t1, t2) { - return t1.x == t2.x && t1.y == t2.y && t1.R[0].T == t2.R[0].T && PDFFont.haveSameStyle(t1, t2); - } - - // private - #setFaceIndex() { - const fontObj = this.fontObj; - - this.bold = fontObj.bold; - if (!this.bold) { - this.bold = this.typeName.indexOf("bold") >= 0 || this.typeName.indexOf("black") >= 0; - } - this.italic = fontObj.italic; // fix https://github.com/modesty/pdf2json/issues/42 - // Extended the fix for https://github.com/modesty/pdf2json/issues/42 - if (!this.italic) { - this.italic = this.typeName.indexOf("italic") >= 0 || this.typeName.indexOf("oblique") >= 0; - } - // Added detection of hybrid dual bolditalic fonts - if (((!this.bold) || (!this.italic)) && (this.typeName.indexOf("boldobl") >= 0)) { - this.bold = true; - this.italic = true; - } - - let typeName = this.subType; - if (fontObj.isSerifFont) { - if (kFontFaces[1].indexOf(typeName) >= 0) - this.faceIdx = 1; - } - else if (kFontFaces[2].indexOf(this.subType) >= 0) { - this.faceIdx = 2; - } - else if (fontObj.isMonospace) { - this.faceIdx = 3; - - if (kFontFaces[4].indexOf(typeName) >= 0) - this.faceIdx = 4; - else if (kFontFaces[5].indexOf(typeName) >= 0) - this.faceIdx = 5; - } - else if (fontObj.isSymbolicFont) { - this.faceIdx = 2; - } - - if (this.faceIdx == 0) { - if (this.typeName.indexOf("narrow") > 0) - this.faceIdx = 1; - } - -// nodeUtil.p2jinfo"typeName = " + typeName + " => faceIdx = " + this.faceIdx); - } - - #getFontStyleIndex(fontSize) { - this.#setFaceIndex(); - - //MQZ Feb.28.2013. Adjust bold text fontsize to work around word spacing issue - this.fontSize = (this.bold && (fontSize > 12)) ? fontSize + 1 : fontSize; - - let fsa = [this.faceIdx, this.fontSize, this.bold?1:0, this.italic?1:0]; - let retVal = -1; - - kFontStyles.forEach(function(element, index, list){ - if (retVal === -1) { - if (element[0] === fsa[0] && element[1] === fsa[1] && - element[2] === fsa[2] && element[3] === fsa[3]) { - retVal = index; - } - } - }); - - return retVal; - } - - #processSymbolicFont(str) { - let retVal = str; - - if (!str || str.length !== 1) - return retVal; - - if (!this.fontObj.isSymbolicFont || !this.isSymbol) { - if (retVal == "C" || retVal == "G") { //prevent symbolic encoding from the client - retVal = " " + retVal + " "; //sample: va_ind_760c - } - return retVal; - } - - switch(str.charCodeAt(0)) { - case 20: retVal = '\u2713'; break; //check mark - case 70: retVal = (this.fontObj.type === "CIDFontType0") ? '\u26A0' : '\u007D'; break; //exclaimation in triangle OR right curly bracket - case 71: retVal = '\u25b6'; break; //right triangle - case 97: retVal = '\u25b6'; break; //right triangle - case 99: retVal = this.isSymbol ? '\u2022' : '\u25b2'; break; //up triangle. set to Bullet Dot for VA SchSCR - case 100: retVal = '\u25bc'; break; //down triangle - case 103: retVal = '\u27A8'; break; //right arrow. sample: va_ind_760pff and pmt - case 106: retVal = ''; break; //VA 301: string j character by the checkbox, hide it for now - case 114: retVal = '\u2022'; break; //Bullet dot - case 115: retVal = '\u25b2'; break; //up triangle - case 116: retVal = '\u2022'; break; //Bullet dot - case 118: retVal = '\u2022'; break; //Bullet dot - default: - nodeUtil.p2jinfo(this.fontObj.type + " - SymbolicFont - (" + this.fontObj.name + ") : " + - str.charCodeAt(0) + "::" + str.charCodeAt(1) + " => " + retVal); - } - - return retVal; - } - - #textRotationAngle(matrix2D) { - let retVal = 0; - if (matrix2D[0][0] === 0 && matrix2D[1][1] === 0) { - if (matrix2D[0][1] != 0 && matrix2D[1][0] != 0) { - if ((matrix2D[0][1] / matrix2D[1][0]) + 1 < 0.0001) - retVal = 90; - } - } - else if (matrix2D[0][0] !== 0 && matrix2D[1][1] !== 0) { - let r1 = Math.atan(-matrix2D[0][1] / matrix2D[0][0]); - let r2 = Math.atan(matrix2D[1][0] / matrix2D[1][1]); - if (Math.abs(r1) > 0.0001 && (r1 - r2 < 0.0001)) { - retVal = r1 * 180 / Math.PI; - } - } - return retVal; - } - - // public instance methods - processText(p, str, maxWidth, color, fontSize, targetData, matrix2D) { - const text = this.#processSymbolicFont(str); - if (!text) { - return; - } - this.fontStyleId = this.#getFontStyleIndex(fontSize); - - // when this.fontStyleId === -1, it means the text style doesn't match any entry in the dictionary - // adding TS to better describe text style [fontFaceId, fontSize, 1/0 for bold, 1/0 for italic]; - const TS = [this.faceIdx, this.fontSize, this.bold?1:0, this.italic?1:0]; - - const clrId = PDFUnit.findColorIndex(color); - const colorObj = (clrId >= 0 && clrId < PDFUnit.colorCount()) ? {clr: clrId} : {oc: color}; - - let textRun = { - T: this.flash_encode(text), - S: this.fontStyleId, - TS: TS - }; - const rAngle = this.#textRotationAngle(matrix2D); - if (rAngle != 0) { - nodeUtil.p2jinfo(str + ": rotated " + rAngle + " degree."); - textRun = {...textRun, RA: rAngle}; - } - - const oneText = {x: PDFUnit.toFormX(p.x) - 0.25, - y: PDFUnit.toFormY(p.y) - 0.75, - w: PDFUnit.toFixedFloat(maxWidth), - ...colorObj, //MQZ.07/29/2013: when color is not in color dictionary, set the original color (oc) - sw: this.spaceWidth, //font space width, use to merge adjacent text blocks - A: "left", - R: [textRun] - }; - - targetData.Texts.push(oneText); - } - - flash_encode(str) { - let retVal = encodeURIComponent(str); - retVal = retVal.replace("%C2%96", "-"); - retVal = retVal.replace("%C2%91", "%27"); - retVal = retVal.replace("%C2%92", "%27"); - retVal = retVal.replace("%C2%82", "%27"); - retVal = retVal.replace("%C2%93", "%22"); - retVal = retVal.replace("%C2%94", "%22"); - retVal = retVal.replace("%C2%84", "%22"); - retVal = retVal.replace("%C2%8B", "%C2%AB"); - retVal = retVal.replace("%C2%9B", "%C2%BB"); - - return retVal; - } - - clean() { - this.fontObj = null; - delete this.fontObj; - } -} - -// alias some functions to make (compiled) code shorter -const {round: mr, sin: ms, cos: mc, abs, sqrt} = Math; - -// precompute "00" to "FF" -const dec2hex = []; -for (let i = 0; i < 16; i++) { - for (let j = 0; j < 16; j++) { - dec2hex[i * 16 + j] = i.toString(16) + j.toString(16); - } -} - -function createMatrixIdentity() { - return [ - [1, 0, 0], - [0, 1, 0], - [0, 0, 1] - ]; -} - -function matrixMultiply(m1, m2) { - let result = createMatrixIdentity(); - - for (let x = 0; x < 3; x++) { - for (let y = 0; y < 3; y++) { - let sum = 0; - - for (let z = 0; z < 3; z++) { - sum += m1[x][z] * m2[z][y]; - } - - result[x][y] = sum; - } - } - return result; -} - -function copyState(o1, o2) { - o2.fillStyle = o1.fillStyle; - o2.lineCap = o1.lineCap; - o2.lineJoin = o1.lineJoin; - o2.lineWidth = o1.lineWidth; - o2.miterLimit = o1.miterLimit; - o2.shadowBlur = o1.shadowBlur; - o2.shadowColor = o1.shadowColor; - o2.shadowOffsetX = o1.shadowOffsetX; - o2.shadowOffsetY = o1.shadowOffsetY; - o2.strokeStyle = o1.strokeStyle; - o2.globalAlpha = o1.globalAlpha; - o2.arcScaleX_ = o1.arcScaleX_; - o2.arcScaleY_ = o1.arcScaleY_; - o2.lineScale_ = o1.lineScale_; - o2.dashArray = o1.dashArray; -} - -function processStyle(styleString) { - let str, alpha = 1; - - styleString = String(styleString); - if (styleString.substring(0, 3) == 'rgb') { - let start = styleString.indexOf('(', 3); - let end = styleString.indexOf(')', start + 1); - let guts = styleString.substring(start + 1, end).split(','); - - str = '#'; - for (let i = 0; i < 3; i++) { - str += dec2hex[Number(guts[i])]; - } - - if (guts.length == 4 && styleString.substring(3, 4) == 'a') { - alpha = guts[3]; - } - } else { - str = styleString; - } - - return {color:str, alpha:alpha}; -} - -// Helper function that takes the already fixed cordinates. -function bezierCurveToHelper(self, cp1, cp2, p) { - self.currentPath_.push({ - type:'bezierCurveTo', - cp1x:cp1.x, - cp1y:cp1.y, - cp2x:cp2.x, - cp2y:cp2.y, - x:p.x, - y:p.y - }); - self.currentX_ = p.x; - self.currentY_ = p.y; -} - -function matrixIsFinite(m) { - for (let j = 0; j < 3; j++) { - for (let k = 0; k < 2; k++) { - if (!isFinite(m[j][k]) || isNaN(m[j][k])) { - return false; - } - } - } - return true; -} - -function setM(ctx, m, updateLineScale) { - if (!matrixIsFinite(m)) { - return; - } - ctx.m_ = m; - - if (updateLineScale) { - // Get the line scale. - // Determinant of this.m_ means how much the area is enlarged by the - // transformation. So its square root can be used as a scale factor - // for width. - let det = m[0][0] * m[1][1] - m[0][1] * m[1][0]; - ctx.lineScale_ = sqrt(abs(det)); - } -} - -class CanvasPattern_ { - constructor() { - } -} - -// Gradient / Pattern Stubs -class CanvasGradient_ { - constructor(aType) { - this.type_ = aType; - this.x0_ = 0; - this.y0_ = 0; - this.r0_ = 0; - this.x1_ = 0; - this.y1_ = 0; - this.r1_ = 0; - this.colors_ = []; - } - addColorStop(aOffset, aColor) { - aColor = processStyle(aColor); - this.colors_.push({offset:aOffset, - color:aColor.color, - alpha:aColor.alpha}); - } -} - - -/** - * This class implements CanvasRenderingContext2D interface as described by - * the WHATWG. - * @param {HTMLElement} surfaceElement The element that the 2D context should - * be associated with - */ -class CanvasRenderingContext2D_ { - constructor(canvasTarget, scaledWidth, scaledHeight) { - this.m_ = createMatrixIdentity(); - - this.mStack_ = []; - this.aStack_ = []; - this.currentPath_ = []; - - // Canvas context properties - this.strokeStyle = '#000'; - this.fillStyle = '#000'; - - this.lineWidth = 1; - this.lineJoin = 'miter'; - this.lineCap = 'butt'; - this.dashArray = []; - this.miterLimit = 1; - this.globalAlpha = 1; - - if (!("HLines" in canvasTarget) || !Array.isArray(canvasTarget.HLines)) - canvasTarget.HLines = []; - if (!("VLines" in canvasTarget) || !Array.isArray(canvasTarget.VLines)) - canvasTarget.VLines = []; - if (!("Fills" in canvasTarget) || !Array.isArray(canvasTarget.Fills)) - canvasTarget.Fills = []; - if (!("Texts" in canvasTarget) || !Array.isArray(canvasTarget.Texts)) - canvasTarget.Texts = []; - - this.canvas = canvasTarget; - - this.width = scaledWidth; - this.height = scaledHeight; - - this.arcScaleX_ = 1; - this.arcScaleY_ = 1; - this.lineScale_ = 1; - - this.currentFont = null; - } - - //private helper methods - #drawPDFLine(p1, p2, lineWidth, color) { - let dashedLine = Array.isArray(this.dashArray) && (this.dashArray.length > 1); - let pL = new PDFLine(p1.x, p1.y, p2.x, p2.y, lineWidth, color, dashedLine); - pL.processLine(this.canvas); - } - - #drawPDFFill(cp, min, max, color) { - let width = max.x - min.x; - let height = max.y - min.y; - let pF = new PDFFill(cp.x, cp.y, width, height, color); - pF.processFill(this.canvas); - } - - #needRemoveRect(x, y, w, h) { - let retVal = (Math.abs(w - Math.abs(h)) < 1 && w < 13); - if (retVal) { - nodeUtil.p2jinfo("Skipped: tiny rect: w=" + w + ", h=" + h); - } - return retVal; - } - - getContext(ctxType) { - return (ctxType === "2d") ? this : null; - } - - setLineDash(lineDash) { - this.dashArray = lineDash; - } - - getLineDash() { - return this.dashArray; - } - - fillText(text, x, y, maxWidth, fontSize) { - if (!text || text.trim().length < 1) - return; - let p = this.getCoords_(x, y); - - let a = processStyle(this.fillStyle || this.strokeStyle); - let color = (!!a) ? a.color : '#000000'; - - this.currentFont.processText(p, text, maxWidth, color, fontSize, this.canvas, this.m_); - }; - - strokeText(text, x, y, maxWidth) { - //MQZ. 10/23/2012, yeah, no hollow text for now - this.fillText(text, x, y, maxWidth); - } - - measureText(text) { - console.warn("to be implemented: contextPrototype.measureText - ", text); - let chars = text.length || 1; - return {width: chars * (this.currentFont.spaceWidth || 5)}; - } - - setFont(fontObj) { - if ((!!this.currentFont) && typeof(this.currentFont.clean) === "function") { - this.currentFont.clean(); - this.currentFont = null; - } - - this.currentFont = new PDFFont(fontObj); - } - - clearRect() { - console.warn("to be implemented: contextPrototype.clearRect"); - } - - beginPath() { - // TODO: Branch current matrix so that save/restore has no effect - // as per safari docs. - this.currentPath_ = []; - } - - moveTo(aX, aY) { - let p = this.getCoords_(aX, aY); - this.currentPath_.push({type:'moveTo', x:p.x, y:p.y}); - this.currentX_ = p.x; - this.currentY_ = p.y; - } - - lineTo(aX, aY) { - let p = this.getCoords_(aX, aY); - this.currentPath_.push({type:'lineTo', x:p.x, y:p.y}); - - this.currentX_ = p.x; - this.currentY_ = p.y; - } - - bezierCurveTo(aCP1x, aCP1y, aCP2x, aCP2y, aX, aY) { - let p = this.getCoords_(aX, aY); - let cp1 = this.getCoords_(aCP1x, aCP1y); - let cp2 = this.getCoords_(aCP2x, aCP2y); - bezierCurveToHelper(this, cp1, cp2, p); - } - - quadraticCurveTo(aCPx, aCPy, aX, aY) { - // the following is lifted almost directly from - // http://developer.mozilla.org/en/docs/Canvas_tutorial:Drawing_shapes - - let cp = this.getCoords_(aCPx, aCPy); - let p = this.getCoords_(aX, aY); - - let cp1 = { - x:this.currentX_ + 2.0 / 3.0 * (cp.x - this.currentX_), - y:this.currentY_ + 2.0 / 3.0 * (cp.y - this.currentY_) - }; - let cp2 = { - x:cp1.x + (p.x - this.currentX_) / 3.0, - y:cp1.y + (p.y - this.currentY_) / 3.0 - }; - - bezierCurveToHelper(this, cp1, cp2, p); - } - - arc(aX, aY, aRadius, aStartAngle, aEndAngle, aClockwise) { - let arcType = aClockwise ? 'at' : 'wa'; - - let xStart = aX + mc(aStartAngle) * aRadius; - let yStart = aY + ms(aStartAngle) * aRadius; - - let xEnd = aX + mc(aEndAngle) * aRadius; - let yEnd = aY + ms(aEndAngle) * aRadius; - - // IE won't render arches drawn counter clockwise if xStart == xEnd. - if (xStart == xEnd && !aClockwise) { - xStart += 0.125; // Offset xStart by 1/80 of a pixel. Use something - // that can be represented in binary - } - - let p = this.getCoords_(aX, aY); - let pStart = this.getCoords_(xStart, yStart); - let pEnd = this.getCoords_(xEnd, yEnd); - - this.currentPath_.push({type:arcType, - x:p.x, - y:p.y, - radius:aRadius, - xStart:pStart.x, - yStart:pStart.y, - xEnd:pEnd.x, - yEnd:pEnd.y}); - } - - rect(aX, aY, aWidth, aHeight) { - if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { - return;//try to remove the rectangle behind radio buttons and checkboxes - } - - this.moveTo(aX, aY); - this.lineTo(aX + aWidth, aY); - this.lineTo(aX + aWidth, aY + aHeight); - this.lineTo(aX, aY + aHeight); - this.closePath(); - } - - strokeRect(aX, aY, aWidth, aHeight) { - if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { - return;//try to remove the rectangle behind radio buttons and checkboxes - } - - let oldPath = this.currentPath_; - this.beginPath(); - - this.moveTo(aX, aY); - this.lineTo(aX + aWidth, aY); - this.lineTo(aX + aWidth, aY + aHeight); - this.lineTo(aX, aY + aHeight); - this.closePath(); - this.stroke(); - - this.currentPath_ = oldPath; - } - - fillRect(aX, aY, aWidth, aHeight) { - if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { - return;//try to remove the rectangle behind radio buttons and checkboxes - } - - let oldPath = this.currentPath_; - this.beginPath(); - - this.moveTo(aX, aY); - this.lineTo(aX + aWidth, aY); - this.lineTo(aX + aWidth, aY + aHeight); - this.lineTo(aX, aY + aHeight); - this.closePath(); - this.fill(); - - this.currentPath_ = oldPath; - } - - createLinearGradient(aX0, aY0, aX1, aY1) { - let gradient = new CanvasGradient_('gradient'); - gradient.x0_ = aX0; - gradient.y0_ = aY0; - gradient.x1_ = aX1; - gradient.y1_ = aY1; - return gradient; - } - - createRadialGradient(aX0, aY0, aR0, aX1, aY1, aR1) { - let gradient = new CanvasGradient_('gradientradial'); - gradient.x0_ = aX0; - gradient.y0_ = aY0; - gradient.r0_ = aR0; - gradient.x1_ = aX1; - gradient.y1_ = aY1; - gradient.r1_ = aR1; - return gradient; - } - - drawImage(image, var_args) { - //MQZ. no image drawing support for now - } - - getImageData(x, y, w, h) { - //MQZ. returns empty data buffer for now - return { - width:w, - height:h, - data:new Uint8Array(w * h * 4) - }; - } - - stroke(aFill) { - if (this.currentPath_.length < 2) { - return; - } - - let a = processStyle(aFill ? this.fillStyle : this.strokeStyle); - let color = a.color; -// let opacity = a.alpha * this.globalAlpha; - let lineWidth = this.lineScale_ * this.lineWidth; - - let min = {x:null, y:null}; - let max = {x:null, y:null}; - - for (let i = 0; i < this.currentPath_.length; i++) { - let p = this.currentPath_[i]; - - switch (p.type) { - case 'moveTo': - break; - case 'lineTo': - if (!aFill) { //lines - if (i > 0) { - this.#drawPDFLine(this.currentPath_[i-1], p, lineWidth, color); - } - } - break; - case 'close': - if (!aFill) { //lines - if (i > 0) { - this.#drawPDFLine(this.currentPath_[i-1], this.currentPath_[0], lineWidth, color); - } - } - p = null; - break; - } - - // Figure out dimensions so we can set fills' coordinates correctly - if (aFill && p) { - if (min.x == null || p.x < min.x) { - min.x = p.x; - } - if (max.x == null || p.x > max.x) { - max.x = p.x; - } - if (min.y == null || p.y < min.y) { - min.y = p.y; - } - if (max.y == null || p.y > max.y) { - max.y = p.y; - } - } - } - - if (aFill) { //fill - this.#drawPDFFill(min, min, max, color); - } - } - - fill() { - this.stroke(true); - } - - closePath() { - this.currentPath_.push({type:'close'}); - } - - /** - * @private - */ - getCoords_ (aX, aY) { - let m = this.m_; - return { - x: (aX * m[0][0] + aY * m[1][0] + m[2][0]), - y: (aX * m[0][1] + aY * m[1][1] + m[2][1]) - }; - } - - save() { - let o = {}; - copyState(this, o); - this.aStack_.push(o); - this.mStack_.push(this.m_); - this.m_ = matrixMultiply(createMatrixIdentity(), this.m_); - } - - restore() { - copyState(this.aStack_.pop(), this); - this.m_ = this.mStack_.pop(); - } - - translate(aX, aY) { - let m1 = [ - [1, 0, 0], - [0, 1, 0], - [aX, aY, 1] - ]; - - setM(this, matrixMultiply(m1, this.m_), false); - } - - rotate(aRot) { - let c = mc(aRot); - let s = ms(aRot); - - let m1 = [ - [c, s, 0], - [-s, c, 0], - [0, 0, 1] - ]; - - setM(this, matrixMultiply(m1, this.m_), false); - } - - scale(aX, aY) { - this.arcScaleX_ *= aX; - this.arcScaleY_ *= aY; - let m1 = [ - [aX, 0, 0], - [0, aY, 0], - [0, 0, 1] - ]; - - setM(this, matrixMultiply(m1, this.m_), true); - } - - transform(m11, m12, m21, m22, dx, dy) { - let m1 = [ - [m11, m12, 0], - [m21, m22, 0], - [dx, dy, 1] - ]; - - setM(this, matrixMultiply(m1, this.m_), true); - } - - setTransform(m11, m12, m21, m22, dx, dy) { - let m = [ - [m11, m12, 0], - [m21, m22, 0], - [dx, dy, 1] - ]; - - setM(this, m, true); - } - - /******** STUBS ********/ - clip() { - // TODO: Implement - } - - arcTo() { - // TODO: Implement - } - - createPattern() { - return new CanvasPattern_(); - } -} - -const kFBANotOverridable = 0x00000400; // indicates the field is read only by the user -const kFBARequired = 0x00000010; // indicates the field is required -const kMinHeight = 20; - -class PDFField { - static tabIndex = 0; - - static isWidgetSupported(field) { - let retVal = false; - - switch(field.fieldType) { - case 'Tx': retVal = true; break; //text input - case 'Btn': - if (field.fieldFlags & 32768) { - field.fieldType = 'Rd'; //radio button - } - else if (field.fieldFlags & 65536) { - field.fieldType = 'Btn'; //push button - } - else { - field.fieldType = 'Cb'; //checkbox - } - retVal = true; - break; - case 'Ch': retVal = true; break; //drop down - case 'Sig': retVal = true; break; //signature - default: - nodeUtil.p2jwarn("Unsupported: field.fieldType of " + field.fieldType); - break; - } - - return retVal; - } - - static isFormElement(field) { - let retVal = false; - switch(field.subtype) { - case 'Widget': retVal = PDFField.isWidgetSupported(field); break; - default: - nodeUtil.p2jwarn("Unsupported: field.type of " + field.subtype); - break; - } - return retVal; - } - - // constructor - constructor(field, viewport, Fields, Boxsets) { - this.field = field; - this.viewport = viewport; - this.Fields = Fields; - this.Boxsets = Boxsets; - } - - // Normalize rectangle rect=[x1, y1, x2, y2] so that (x1,y1) < (x2,y2) - // For coordinate systems whose origin lies in the bottom-left, this - // means normalization to (BL,TR) ordering. For systems with origin in the - // top-left, this means (TL,BR) ordering. - static #normalizeRect(rect) { - const r = rect.slice(0); // clone rect - if (rect[0] > rect[2]) { - r[0] = rect[2]; - r[2] = rect[0]; - } - if (rect[1] > rect[3]) { - r[1] = rect[3]; - r[3] = rect[1]; - } - return r; - } - - #getFieldPosition(field) { - let viewPort = this.viewport; - let fieldRect = viewPort.convertToViewportRectangle(field.rect); - let rect = PDFField.#normalizeRect(fieldRect); - - let height = rect[3] - rect[1]; - if (field.fieldType === 'Tx') { - if (height > kMinHeight + 2) { - rect[1] += 2; - height -= 2; - } - } - else if (field.fieldType !== 'Ch') { //checkbox, radio button, and link button - rect[1] -= 3; - } - - height = (height >= kMinHeight) ? height : kMinHeight; - - return { - x: PDFUnit.toFormX(rect[0]), - y: PDFUnit.toFormY(rect[1]), - w: PDFUnit.toFormX(rect[2] - rect[0]), - h: PDFUnit.toFormY(height) - }; - } - - #getFieldBaseData(field) { - let attributeMask = 0; - //PDF Spec p.676 TABLE 8.70 Field flags common to all field types - if (field.fieldFlags & 0x00000001) { - attributeMask |= kFBANotOverridable; - } - if (field.fieldFlags & 0x00000002) { - attributeMask |= kFBARequired; - } - - let anData = { - id: { Id: field.fullName, EN: 0}, - TI: field.TI, - AM: attributeMask - }; - //PDF Spec p.675: add TU (AlternativeText) fields to provide accessibility info - if (field.alternativeText && field.alternativeText.length > 1) { - anData.TU = field.alternativeText; - } - - if (field.alternativeID && field.alternativeID.length > 1) { - anData.TM = field.alternativeID; - } - - return Object.assign(anData, this.#getFieldPosition(field)); - } - - #addAlpha(field) { - const anData = Object.assign({ - style: 48, - T: { - Name: field.TName || "alpha", - TypeInfo: {} - } - }, this.#getFieldBaseData(field)); - - if (field.MV) { //field attributes: arbitrary mask value - anData.MV = field.MV; - } - if (field.fieldValue) { - anData.V = field.fieldValue; //read-only field value, like "self-prepared" - } - - this.Fields.push(anData); - } - - #addCheckBox(box) { - const anData = Object.assign({ - style: 48, - T: { - Name: "box", - TypeInfo: {} - } - }, this.#getFieldBaseData(box)); - if(box.fieldValue) { - anData.checked = box.fieldValue !== 'Off'; - } - - this.Boxsets.push({boxes:[anData]}); - } - - #addRadioButton(box) { - const anData = Object.assign({ - style: 48, - T: { - Name: "box", - TypeInfo: {} - } - }, this.#getFieldBaseData(box)); - - anData.id.Id = box.value; - if ('checked' in box) { - anData.checked = box.checked; - } - - const rdGroup = this.Boxsets.filter(boxset => ('id' in boxset) && ('Id' in boxset.id) && (boxset.id.Id === box.fullName))[0]; - if ((!!rdGroup) && ('boxes' in rdGroup)) { - rdGroup.boxes.push(anData); - } - else { - this.Boxsets.push({boxes:[anData], id: { Id: box.fullName, EN: 0}}); - } - } - - #addLinkButton(field) { - const anData = Object.assign({ - style: 48, - T: { - Name: "link" - }, - FL: { - form: {Id: field.FL} - } - }, this.#getFieldBaseData(field)); - - this.Fields.push(anData); - } - - #addSelect(field) { - const anData = Object.assign({ - style: 48, - T: { - Name: "alpha", - TypeInfo: {} - } - }, this.#getFieldBaseData(field)); - - anData.w -= 0.5; //adjust combobox width - anData.PL = {V: [], D: []}; - field.value.forEach( (ele, idx) => { - if (Array.isArray(ele)) { - anData.PL.D.push(ele[0]); - anData.PL.V.push(ele[1]); - } else { - anData.PL.D.push(ele); - anData.PL.V.push(ele); - } - }); - - // add field value to the object - if (field.fieldValue) { - anData.V = field.fieldValue; - } - this.Fields.push(anData); - }; - - #addSignature(field) { - const anData = Object.assign({ - style: 48, - T: { - Name: "signature", - TypeInfo: {} - } - }, this.#getFieldBaseData(field)); - - if (field.Sig) { - anData.Sig = {}; - if (field.Sig.Name) anData.Sig.Name = field.Sig.Name; - if (field.Sig.M) anData.Sig.M = PDFUnit.dateToIso8601(field.Sig.M); - if (field.Sig.Location) anData.Sig.Location = field.Sig.Location; - if (field.Sig.Reason) anData.Sig.Reason = field.Sig.Reason; - if (field.Sig.ContactInfo) anData.Sig.ContactInfo = field.Sig.ContactInfo; - } - - this.Fields.push(anData); - } - - // public instance methods - processField() { - this.field.TI = PDFField.tabIndex++; - - switch(this.field.fieldType) { - case 'Tx': this.#addAlpha(this.field); break; - case 'Cb': this.#addCheckBox(this.field); break; - case 'Rd': this.#addRadioButton(this.field);break; - case 'Btn':this.#addLinkButton(this.field); break; - case 'Ch': this.#addSelect(this.field); break; - case 'Sig': this.#addSignature(this.field); break; - } - - this.clean(); - } - - clean() { - delete this.field; - delete this.viewport; - delete this.Fields; - delete this.Boxsets; - } - - //static public method to generate fieldsType object based on parser result - static getAllFieldsTypes(data) { - const isFieldReadOnly = field => { - return (field.AM & kFBANotOverridable) ? true : false; - }; - - const getFieldBase = field => { - return {id: field.id.Id, type: field.T.Name, calc: isFieldReadOnly(field), value: field.V || ""}; - }; - - let retVal = []; - data.Pages.forEach( page => { - page.Boxsets.forEach( boxsets => { - if (boxsets.boxes.length > 1) { //radio button - boxsets.boxes.forEach( box => { - retVal.push({id: boxsets.id.Id, type: "radio", calc: isFieldReadOnly(box), value: box.id.Id}); - }); - } - else { //checkbox - retVal.push(getFieldBase(boxsets.boxes[0])); - } - }); - - page.Fields.forEach(field => retVal.push(getFieldBase(field))); - - }); - return retVal; - } -} - -class PTIXmlParser { - xmlData = null; - ptiPageArray = []; - - // constructor - constructor() { - this.xmlData = null; - this.ptiPageArray = []; - } - - parseXml(filePath, callback) { - fs.readFile(filePath, 'utf8', (err, data) => { - if (err) { - callback(err); - } - else { - this.xmlData = data; - - var parser = new DOMParser(); - var dom = parser.parseFromString(this.xmlData); - var root = dom.documentElement; - - var xmlFields = root.getElementsByTagName("field"); - var fields = []; - - for(var i=0;i preContent += fs.readFileSync(baseDir + fileName, 'utf8'), ""); -eval(_baseCode); - -////////////////////////////////start of helper classes -class PDFPageParser { - //static - static RenderingStates = { - INITIAL: 0, - RUNNING: 1, - PAUSED: 2, - FINISHED: 3 - }; - - //public - id = -1; - pdfPage = null; - ptiParser = null; - scale = 0; - viewport = null; - renderingState = -1; - - Fields = null; - Boxsets = null; - ctxCanvas = null; - - #_addField (field) { - if (!PDFField.isFormElement(field)) { - nodeUtil.p2jwarn("NOT valid form element", field); - return; - } - - const oneField = new PDFField(field, this.viewport, this.Fields, this.Boxsets); - oneField.processField(); - } - - // constructor - constructor(pdfPage, id, scale, ptiParser) { - // public, this instance copies - this.id = id; - this.pdfPage = pdfPage; - this.ptiParser = ptiParser; - - this.scale = scale || 1.0; - - //leave out the 2nd parameter in order to use page's default rotation (for both portrait and landscape form) - this.viewport = this.pdfPage.getViewport(this.scale); - - this.renderingState = PDFPageParser.RenderingStates.INITIAL; - - //form elements other than radio buttons and check boxes - this.Fields = []; - //form elements: radio buttons and check boxes - this.Boxsets = []; - this.ctxCanvas = {}; - } - - get width() { return PDFUnit.toFormX(this.viewport.width); } - get height() { return PDFUnit.toFormY(this.viewport.height); } - get HLines() { return this.ctxCanvas.HLines; } - get VLines() { return this.ctxCanvas.VLines; } - get Fills() { return this.ctxCanvas.Fills; } - get Texts() { return this.ctxCanvas.Texts; } - - destroy() { - this.pdfPage.destroy(); - this.pdfPage = null; - - this.ptiParser = null; - this.Fields = null; - this.Boxsets = null; - this.ctxCanvas = null; - } - - getPagePoint(x, y) { - return this.viewport.convertToPdfPoint(x, y); - } - - parsePage(callback, errorCallBack) { - if (this.renderingState !== PDFPageParser.RenderingStates.INITIAL) { - errorCallBack('Must be in new state before drawing'); - return; - } - - this.renderingState = PDFPageParser.RenderingStates.RUNNING; - - const canvas = createScratchCanvas(1, 1); - const ctx = canvas.getContext('2d'); - - function pageViewDrawCallback(error) { - this.renderingState = PDFPageParser.RenderingStates.FINISHED; - - if (error) { - console.error(error); - errorCallBack(`Error: Page ${this.id + 1}: ${error.message}`); - } - else { - if (this.ptiParser) { - const extraFields = this.ptiParser.getFields(parseInt(this.id) + 1); - extraFields.forEach( field => this.#_addField(field) ); - } - - this.ctxCanvas = ctx.canvas; - this.stats = this.pdfPage.stats; - - nodeUtil.p2jinfo(`Success: Page ${this.id + 1}`); - callback(); - } - } - - const renderContext = { - canvasContext:ctx, - viewport:this.viewport - }; - - this.pdfPage.render(renderContext).then( - data => { - this.pdfPage.getAnnotations().then( - fields => { - fields.forEach(field => this.#_addField(field)); - pageViewDrawCallback.call(this, null); - }, - err => errorCallBack("pdfPage.getAnnotations error:" + err)); - }, - err => pageViewDrawCallback.call(this, err) - ); - } -} - -////////////////////////////////Start of Node.js Module -class PDFJSClass extends EventEmitter { - pdfDocument = null; - pages = null; - rawTextContents = null; - - needRawText = null; - - // constructor - constructor(needRawText) { - super(); - - // public, this instance copies - this.pdfDocument = null; - this.pages = []; - this.rawTextContents = []; - - this.needRawText = needRawText; - } - - raiseErrorEvent(errMsg) { - console.error(errMsg); - process.nextTick( () => this.emit("pdfjs_parseDataError", errMsg)); - // this.emit("error", errMsg); - return errMsg; - } - - raiseReadyEvent(data) { - process.nextTick( () => this.emit("pdfjs_parseDataReady", data) ); - return data; - } - - - parsePDFData(arrayBuffer, password) { - this.pdfDocument = null; - - const parameters = {password: password, data: arrayBuffer}; - PDFJS.getDocument(parameters).then( - pdfDocument => this.load(pdfDocument, 1), - error => this.raiseErrorEvent(error) - ); - }; - - tryLoadFieldInfoXML(pdfFilePath) { - const _sufInfo = "_fieldInfo.xml"; - const fieldInfoXMLPath = pdfFilePath.replace(".pdf", _sufInfo); - if ((fieldInfoXMLPath.indexOf(_sufInfo) < 1) || (!fs.existsSync(fieldInfoXMLPath))) { - return; - } - nodeUtil.p2jinfo("About to load fieldInfo XML : " + fieldInfoXMLPath); - - this.ptiParser = new PTIXmlParser(); - this.ptiParser.parseXml(fieldInfoXMLPath, err => { - if (err) { - nodeUtil.p2jwarn("fieldInfo XML Error: " + JSON.stringify(err)); - this.ptiParser = null; - } - else { - nodeUtil.p2jinfo("fieldInfo XML loaded."); - } - }); - } - - load(pdfDocument, scale) { - this.pdfDocument = pdfDocument; - - return this.loadMetaData().then( - () => this.loadPages(), - error => this.raiseErrorEvent("loadMetaData error: " + error) - ); - } - - loadMetaData() { - return this.pdfDocument.getMetadata().then( - data => { - this.documentInfo = data.info; - this.metadata = data.metadata?.metadata ?? {}; - this.parseMetaData(); - }, - error => this.raiseErrorEvent("pdfDocument.getMetadata error: " + error) - ); - } - - parseMetaData() { - const meta = {Transcoder: _PARSER_SIG, Meta: {...this.documentInfo, Metadata: this.metadata}}; - this.raiseReadyEvent(meta); - this.emit("readable", meta); - } - - loadPages() { - const pagesCount = this.pdfDocument.numPages; - const pagePromises = []; - for (let i = 1; i <= pagesCount; i++) - pagePromises.push(this.pdfDocument.getPage(i)); - - const pagesPromise = PDFJS.Promise.all(pagePromises); - - nodeUtil.p2jinfo("PDF loaded. pagesCount = " + pagesCount); - - return pagesPromise.then( - promisedPages => this.parsePage(promisedPages, 0, 1.5), - error => this.raiseErrorEvent("pagesPromise error: " + error) - ); - } - - parsePage(promisedPages, id, scale) { - nodeUtil.p2jinfo("start to parse page:" + (id+1)); - - const pdfPage = promisedPages[id]; - const pageParser = new PDFPageParser(pdfPage, id, scale, this.ptiParser); - - function continueOnNextPage() { - nodeUtil.p2jinfo("complete parsing page:" + (id+1)); - if (id === (this.pdfDocument.numPages - 1) ) { - this.raiseReadyEvent({Pages:this.pages}); - //v1.1.2: signal end of parsed data with null - process.nextTick(() => this.raiseReadyEvent(null)); - this.emit("data", null); - } - else { - process.nextTick(() => this.parsePage(promisedPages, ++id, scale)); - } - } - - pageParser.parsePage( - data => { - const page = { - Width: pageParser.width, - Height: pageParser.height, - HLines: pageParser.HLines, - VLines: pageParser.VLines, - Fills: pageParser.Fills, - //needs to keep current default output format, text content will output to a separate file if '-c' command line argument is set - // Content:pdfPage.getTextContent(), - Texts: pageParser.Texts, - Fields: pageParser.Fields, - Boxsets: pageParser.Boxsets - }; - - this.pages.push(page); - this.emit("data", page); - - if (this.needRawText) { - pdfPage.getTextContent().then( - textContent => { - this.rawTextContents.push(textContent); - nodeUtil.p2jinfo("complete parsing raw text content:" + (id+1)); - continueOnNextPage.call(this); - }, - error => this.raiseErrorEvent("pdfPage.getTextContent error: " + error) - ); - } - else { - continueOnNextPage.call(this); - } - }, - errMsg => this.raiseErrorEvent(errMsg) - ); - } - - getRawTextContent() { - let retVal = ""; - if (!this.needRawText) - return retVal; - - this.rawTextContents.forEach( (textContent, index) => { - let prevText = null; - textContent.bidiTexts.forEach( (textObj, idx) => { - if (prevText) { - if (Math.abs(textObj.y - prevText.y) <= 9) { - prevText.str += textObj.str; - } - else { - retVal += prevText.str + "\r\n"; - prevText = textObj; - } - } - else { - prevText = textObj; - } - - }); - if (prevText) { - retVal += prevText.str; - } - retVal += "\r\n----------------Page (" + index + ") Break----------------\r\n"; - }); - - return retVal; - } - - getAllFieldsTypes() { - return PDFField.getAllFieldsTypes({Pages:this.pages || []}); - } - - getMergedTextBlocksIfNeeded() { - for (let p = 0; p < this.pages.length; p++) { - let prevText = null; - let page = this.pages[p]; - - page.Texts.sort(PDFFont.compareBlockPos); - page.Texts = page.Texts.filter( (t, j) => { - let isDup = (j > 0) && PDFFont.areDuplicateBlocks(page.Texts[j-1], t); - if (isDup) { - nodeUtil.p2jinfo("skipped: dup text block: " + decodeURIComponent(t.R[0].T)); - } - return !isDup; - }); - - for (let i = 0; i < page.Texts.length; i++) { - let text = page.Texts[i]; - - if (prevText) { - if (PDFFont.areAdjacentBlocks(prevText, text) && PDFFont.haveSameStyle(prevText, text)) { - let preT = decodeURIComponent(prevText.R[0].T); - let curT = decodeURIComponent(text.R[0].T); - - prevText.R[0].T += text.R[0].T; - prevText.w += text.w; - text.merged = true; - - let mergedText = decodeURIComponent(prevText.R[0].T); - nodeUtil.p2jinfo(`merged text block: ${preT} + ${curT} => ${mergedText}`); - prevText = null; //yeah, only merge two blocks for now - } - else { - prevText = text; - } - } - else { - prevText = text; - } - } - - page.Texts = page.Texts.filter( t => !t.merged); - } - - return {Pages:this.pages}; - } - - destroy() { - this.removeAllListeners(); - - if (this.pdfDocument) - this.pdfDocument.destroy(); - this.pdfDocument = null; - - this.pages = null; - this.rawTextContents = null; - } - -} - -class ParserStream extends Transform { - static createContentStream(jsonObj) { - const rStream = new Readable({objectMode: true}); - rStream.push(jsonObj); - rStream.push(null); - return rStream; - } - - static createOutputStream(outputPath, resolve, reject) { - const outputStream = fs.createWriteStream(outputPath); - outputStream.on('finish', () => resolve(outputPath)); - outputStream.on('error', err => reject(err) ); - return outputStream; - } - - #pdfParser = null; - #chunks = []; - #parsedData = {Pages:[]}; - #_flush_callback = null; - - constructor(pdfParser, options) { - super(options); - this.#pdfParser = pdfParser; - - this.#chunks = []; - - // this.#pdfParser.on("pdfParser_dataReady", evtData => { - // this.push(evtData); - // this.#_flush_callback(); - // this.emit('end', null); - // }); - this.#pdfParser.on("readable", meta => this.#parsedData = {...meta, Pages:[]}); - this.#pdfParser.on("data", page => { - if (!page) { - this.push(this.#parsedData); - this.#_flush_callback(); - } - else - this.#parsedData.Pages.push(page); - }); - } - - //implements transform stream - _transform(chunk, enc, callback) { - this.#chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk, enc)); - callback(); - } - - _flush(callback) { - this.#_flush_callback = callback; - this.#pdfParser.parseBuffer(Buffer.concat(this.#chunks)); - } - - _destroy() { - super.removeAllListeners(); - this.#pdfParser = null; - this.#chunks = []; - } -} - -class PDFParser extends EventEmitter { // inherit from event emitter - //public static - static get colorDict() {return kColors; } - static get fontFaceDict() { return kFontFaces; } - static get fontStyleDict() { return kFontStyles; } - - //private static - static #maxBinBufferCount = 10; - static #binBuffer = {}; - - //private - #password = ""; - - #context = null; // service context object, only used in Web Service project; null in command line - - #pdfFilePath = null; //current PDF file to load and parse, null means loading/parsing not started - #pdfFileMTime = null; // last time the current pdf was modified, used to recognize changes and ignore cache - #data = null; //if file read success, data is PDF content; if failed, data is "err" object - #PDFJS = null; //will be initialized in constructor - #processFieldInfoXML = false;//disable additional _fieldInfo.xml parsing and merging (do NOT set to true) - - // constructor - constructor(context, needRawText, password) { - //call constructor for super class - super(); - - // private - // service context object, only used in Web Service project; null in command line - this.#context = context; - - this.#pdfFilePath = null; //current PDF file to load and parse, null means loading/parsing not started - this.#pdfFileMTime = null; // last time the current pdf was modified, used to recognize changes and ignore cache - this.#data = null; //if file read success, data is PDF content; if failed, data is "err" object - this.#processFieldInfoXML = false;//disable additional _fieldInfo.xml parsing and merging (do NOT set to true) - - this.#PDFJS = new PDFJSClass(needRawText); - this.#password = password; - } - - //private methods, needs to invoked by [funcName].call(this, ...) - #onPDFJSParseDataReady(data) { - if (!data) { //v1.1.2: data===null means end of parsed data - nodeUtil.p2jinfo("PDF parsing completed."); - this.emit("pdfParser_dataReady", this.#data); - } - else { - this.#data = {...this.#data, ...data}; - } - } - - #onPDFJSParserDataError(err) { - this.#data = null; - this.emit("pdfParser_dataError", {"parserError": err}); - // this.emit("error", err); - } - - #startParsingPDF(buffer) { - this.#data = {}; - - this.#PDFJS.on("pdfjs_parseDataReady", data => this.#onPDFJSParseDataReady(data)); - this.#PDFJS.on("pdfjs_parseDataError", err => this.#onPDFJSParserDataError(err)); - - //v1.3.0 the following Readable Stream-like events are replacement for the top two custom events - this.#PDFJS.on("readable", meta => this.emit("readable", meta)); - this.#PDFJS.on("data", data => this.emit("data", data)); - this.#PDFJS.on("error", err => this.#onPDFJSParserDataError(err)); - - this.#PDFJS.parsePDFData(buffer || PDFParser.#binBuffer[this.binBufferKey], this.#password); - } - - #processBinaryCache() { - if (this.binBufferKey in PDFParser.#binBuffer) { - this.#startParsingPDF(); - return true; - } - - const allKeys = Object.keys(PDFParser.#binBuffer); - if (allKeys.length > PDFParser.#maxBinBufferCount) { - const idx = this.id % PDFParser.#maxBinBufferCount; - const key = allKeys[idx]; - PDFParser.#binBuffer[key] = null; - delete PDFParser.#binBuffer[key]; - - nodeUtil.p2jinfo("re-cycled cache for " + key); - } - - return false; - } - - //public getter - get data() { return this.#data; } - get binBufferKey() { return this.#pdfFilePath + this.#pdfFileMTime; } - - //public APIs - createParserStream() { - return new ParserStream(this, {objectMode: true, bufferSize: 64 * 1024}); - } - - async loadPDF(pdfFilePath, verbosity) { - nodeUtil.verbosity(verbosity || 0); - nodeUtil.p2jinfo("about to load PDF file " + pdfFilePath); - - this.#pdfFilePath = pdfFilePath; - - try { - this.#pdfFileMTime = fs.statSync(pdfFilePath).mtimeMs; - if (this.#processFieldInfoXML) { - this.#PDFJS.tryLoadFieldInfoXML(pdfFilePath); - } - - if (this.#processBinaryCache()) - return; - - PDFParser.#binBuffer[this.binBufferKey] = await readFile(pdfFilePath); - nodeUtil.p2jinfo(`Load OK: ${pdfFilePath}`); - this.#startParsingPDF(); - } - catch(err) { - nodeUtil.p2jerror(`Load Failed: ${pdfFilePath} - ${err}`); - this.emit("pdfParser_dataError", err); - } - } - - // Introduce a way to directly process buffers without the need to write it to a temporary file - parseBuffer(pdfBuffer) { - this.#startParsingPDF(pdfBuffer); - } - - getRawTextContent() { return this.#PDFJS.getRawTextContent(); } - getRawTextContentStream() { return ParserStream.createContentStream(this.getRawTextContent()); } - - getAllFieldsTypes() { return this.#PDFJS.getAllFieldsTypes(); }; - getAllFieldsTypesStream() { return ParserStream.createContentStream(this.getAllFieldsTypes()); } - - getMergedTextBlocksIfNeeded() { return this.#PDFJS.getMergedTextBlocksIfNeeded(); } - getMergedTextBlocksStream() { return ParserStream.createContentStream(this.getMergedTextBlocksIfNeeded()) } - - destroy() { // invoked with stream transform process - super.removeAllListeners(); - - //context object will be set in Web Service project, but not in command line utility - if (this.#context) { - this.#context.destroy(); - this.#context = null; - } - - this.#pdfFilePath = null; - this.#pdfFileMTime = null; - this.#data = null; - this.#processFieldInfoXML = false;//disable additional _fieldInfo.xml parsing and merging (do NOT set to true) - - this.#PDFJS.destroy(); - this.#PDFJS = null; - } -} - -export { PDFParser as default }; diff --git a/rollup.config.js b/rollup.config.js index 7ed65e6e..9b462295 100644 --- a/rollup.config.js +++ b/rollup.config.js @@ -21,38 +21,7 @@ export default [ name: 'pdfparser', exports: 'default', }, - plugins: [ - replace({ - '../base': '/base/', - delimiters: ['/', '/'], - }), - replace({ - 'eval(_baseCode);': `(function (globalScope = {}) { - eval(_baseCode); - })();`, - delimiters: ['', ''], - preventAssignment: false, - }), - builtins(), - ], - }, - { - input: './pdfparser.js', - external: [ - 'fs', - 'util', - 'fs/promises', - 'events', - 'path', - 'url', - 'buffer', - '@xmldom/xmldom', - 'stream', - ], - output: { - file: 'pdfparser.mjs', - format: 'es', - }, + treeshake: false, plugins: [ replace({ '../base': '/base/', diff --git a/test/index.js b/test/index.js index 384f6fc0..8df253c2 100644 --- a/test/index.js +++ b/test/index.js @@ -7,8 +7,8 @@ var vows = require('vows'), (nodeEvents = require('events')), (_ = require('lodash')), (PdfParser_js = require('../pdfparser.js')); -PdfParser_cjs = require('../dist/pdfparser.cjs'); -PdfParser_mjs = require('../dist/pdfparser.mjs'); +PdfParser_cjs = require('../pdfparser.cjs'); +PdfParser_mjs = require('../pdfparser.mjs'); testSuite(PdfParser_js); testSuite(PdfParser_cjs); From 2cc982317b7cb97537395290e3e697bf3c18b6fd Mon Sep 17 00:00:00 2001 From: Isaac <91521821+isimisi@users.noreply.github.com> Date: Tue, 26 Sep 2023 15:22:19 +0200 Subject: [PATCH 4/5] Fixed small bug Found why the DOMParser was not globally available - CJS works as ESM now. --- base/display/metadata.js | 18 +- package.json | 5 +- pdfparser.cjs | 3960 +++++++++++++++----------------- rollup.config.js | 4 +- rollup/addDestructedImports.js | 46 + 5 files changed, 1923 insertions(+), 2110 deletions(-) create mode 100644 rollup/addDestructedImports.js diff --git a/base/display/metadata.js b/base/display/metadata.js index b27c8d54..6b1c3c26 100755 --- a/base/display/metadata.js +++ b/base/display/metadata.js @@ -18,22 +18,6 @@ 'use strict'; -// since this gets evaluated in pdfparser and DOMParser somehow does not exist in a CJS environment -// we catch the error early and use @xmldom/xmldom DOMParser - -// Not exactly sure how DOMParser is available globally in a node environment in the first place -// feel free to change this if a better method is to be found -try { - var DomParser = DOMParser; -} catch (error) { - if (typeof module === 'object' && typeof module.exports === 'object') { - // This is a CommonJS (CJS) environment - DomParser = require('@xmldom/xmldom').DOMParser; - } else { - throw error; - } -} - var Metadata = (PDFJS.Metadata = (function MetadataClosure() { function fixMetadata(meta) { return meta.replace(/>\\376\\377([^<]+)/g, function (all, codes) { @@ -65,7 +49,7 @@ var Metadata = (PDFJS.Metadata = (function MetadataClosure() { // Ghostscript produces invalid metadata meta = fixMetadata(meta); - var parser = new DomParser(); + var parser = new DOMParser(); meta = parser.parseFromString(meta, 'application/xml'); } else if (!(meta instanceof Document)) { error('Metadata: Invalid metadata object'); diff --git a/package.json b/package.json index f5f88a26..10953d77 100644 --- a/package.json +++ b/package.json @@ -47,7 +47,8 @@ "parse-e2": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i243_problem_file_anon.pdf -o ./test/target/misc", "parse-e3": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i200_test.pdf -o ./test/target/misc", "build:rollup": "rollup -c ./rollup.config.js", - "build": "npm run build:rollup" + "build:add-destructured-imports-to-cjs": "node rollup/addDestructedImports.js", + "build": "npm run build:rollup && npm run build:add-destructured-imports-to-cjs" }, "engines": { "node": ">=18.12.1", @@ -61,6 +62,8 @@ "@xmldom/xmldom": "^0.8.8" }, "devDependencies": { + "@rollup/plugin-commonjs": "^25.0.4", + "@rollup/plugin-node-resolve": "^15.2.1", "@rollup/plugin-replace": "^5.0.2", "@types/node": "^20.3.1", "lodash": "^4.17.21", diff --git a/pdfparser.cjs b/pdfparser.cjs index 606b1385..974ee4ca 100644 --- a/pdfparser.cjs +++ b/pdfparser.cjs @@ -6,1774 +6,1589 @@ var promises = require('fs/promises'); var events = require('events'); var path = require('path'); var url = require('url'); -require('buffer'); +var { Blob } = require('buffer'); var xmldom = require('@xmldom/xmldom'); +var DOMParser = xmldom.DOMParser; var stream = require('stream'); -var _documentCurrentScript = - typeof document !== 'undefined' ? document.currentScript : null; +var _documentCurrentScript = typeof document !== 'undefined' ? document.currentScript : null; const kColors = [ - '#000000', // 0 - '#ffffff', // 1 - '#4c4c4c', // 2 - '#808080', // 3 - '#999999', // 4 - '#c0c0c0', // 5 - '#cccccc', // 6 - '#e5e5e5', // 7 - '#f2f2f2', // 8 - '#008000', // 9 - '#00ff00', // 10 - '#bfffa0', // 11 - '#ffd629', // 12 - '#ff99cc', // 13 - '#004080', // 14 - '#9fc0e1', // 15 - '#5580ff', // 16 - '#a9c9fa', // 17 - '#ff0080', // 18 - '#800080', // 19 - '#ffbfff', // 20 - '#e45b21', // 21 - '#ffbfaa', // 22 - '#008080', // 23 - '#ff0000', // 24 - '#fdc59f', // 25 - '#808000', // 26 - '#bfbf00', // 27 - '#824100', // 28 - '#007256', // 29 - '#008000', // 30 - '#000080', // Last + 1 - '#008080', // Last + 2 - '#800080', // Last + 3 - '#ff0000', // Last + 4 - '#0000ff', // Last + 5 - '#008000', // Last + 6 + '#000000', // 0 + '#ffffff', // 1 + '#4c4c4c', // 2 + '#808080', // 3 + '#999999', // 4 + '#c0c0c0', // 5 + '#cccccc', // 6 + '#e5e5e5', // 7 + '#f2f2f2', // 8 + '#008000', // 9 + '#00ff00', // 10 + '#bfffa0', // 11 + '#ffd629', // 12 + '#ff99cc', // 13 + '#004080', // 14 + '#9fc0e1', // 15 + '#5580ff', // 16 + '#a9c9fa', // 17 + '#ff0080', // 18 + '#800080', // 19 + '#ffbfff', // 20 + '#e45b21', // 21 + '#ffbfaa', // 22 + '#008080', // 23 + '#ff0000', // 24 + '#fdc59f', // 25 + '#808000', // 26 + '#bfbf00', // 27 + '#824100', // 28 + '#007256', // 29 + '#008000', // 30 + '#000080', // Last + 1 + '#008080', // Last + 2 + '#800080', // Last + 3 + '#ff0000', // Last + 4 + '#0000ff', // Last + 5 + '#008000' // Last + 6 ]; const kFontFaces = [ - 'quicktype,arial,helvetica,sans-serif', // 00 - QuickType - sans-serif variable font - 'quicktype condensed,arial narrow,arial,helvetica,sans-serif', // 01 - QuickType Condensed - thin sans-serif variable font - 'quicktypepi,quicktypeiipi', // 02 - QuickType Pi - 'quicktype mono,courier new,courier,monospace', // 03 - QuickType Mono - san-serif fixed font - 'ocr-a,courier new,courier,monospace', // 04 - OCR-A - OCR readable san-serif fixed font - 'ocr b mt,courier new,courier,monospace', // 05 - OCR-B MT - OCR readable san-serif fixed font -]; - -const kFontStyles = [ - // Face Size Bold Italic StyleID(Comment) - // ----- ---- ---- ----- ----------------- - [0, 6, 0, 0], //00 - [0, 8, 0, 0], //01 - [0, 10, 0, 0], //02 - [0, 12, 0, 0], //03 - [0, 14, 0, 0], //04 - [0, 18, 0, 0], //05 - [0, 6, 1, 0], //06 - [0, 8, 1, 0], //07 - [0, 10, 1, 0], //08 - [0, 12, 1, 0], //09 - [0, 14, 1, 0], //10 - [0, 18, 1, 0], //11 - [0, 6, 0, 1], //12 - [0, 8, 0, 1], //13 - [0, 10, 0, 1], //14 - [0, 12, 0, 1], //15 - [0, 14, 0, 1], //16 - [0, 18, 0, 1], //17 - [0, 6, 1, 1], //18 - [0, 8, 1, 1], //19 - [0, 10, 1, 1], //20 - [0, 12, 1, 1], //21 - [0, 14, 1, 1], //22 - [0, 18, 1, 1], //23 - [1, 6, 0, 0], //24 - [1, 8, 0, 0], //25 - [1, 10, 0, 0], //26 - [1, 12, 0, 0], //27 - [1, 14, 0, 0], //28 - [1, 18, 0, 0], //29 - [1, 6, 1, 0], //30 - [1, 8, 1, 0], //31 - [1, 10, 1, 0], //32 - [1, 12, 1, 0], //33 - [1, 14, 1, 0], //34 - [1, 18, 1, 0], //35 - [1, 6, 0, 1], //36 - [1, 8, 0, 1], //37 - [1, 10, 0, 1], //38 - [1, 12, 0, 1], //39 - [1, 14, 0, 1], //40 - [1, 18, 0, 1], //41 - [2, 8, 0, 0], //42 - [2, 10, 0, 0], //43 - [2, 12, 0, 0], //44 - [2, 14, 0, 0], //45 - [2, 18, 0, 0], //46 - [3, 8, 0, 0], //47 - [3, 10, 0, 0], //48 - [3, 12, 0, 0], //49 - [4, 12, 0, 0], //50 - [0, 9, 0, 0], //51 - [0, 9, 1, 0], //52 - [0, 9, 0, 1], //53 - [0, 9, 1, 1], //54 - [1, 9, 0, 0], //55 - [1, 9, 1, 0], //56 - [1, 9, 1, 1], //57 - [4, 10, 0, 0], //58 - [5, 10, 0, 0], //59 - [5, 12, 0, 0], //60 + "quicktype,arial,helvetica,sans-serif", // 00 - QuickType - sans-serif variable font + "quicktype condensed,arial narrow,arial,helvetica,sans-serif", // 01 - QuickType Condensed - thin sans-serif variable font + "quicktypepi,quicktypeiipi", // 02 - QuickType Pi + "quicktype mono,courier new,courier,monospace", // 03 - QuickType Mono - san-serif fixed font + "ocr-a,courier new,courier,monospace", // 04 - OCR-A - OCR readable san-serif fixed font + "ocr b mt,courier new,courier,monospace" // 05 - OCR-B MT - OCR readable san-serif fixed font + ]; + + const kFontStyles = [ + // Face Size Bold Italic StyleID(Comment) + // ----- ---- ---- ----- ----------------- + [0, 6, 0, 0], //00 + [0, 8, 0, 0], //01 + [0, 10, 0, 0], //02 + [0, 12, 0, 0], //03 + [0, 14, 0, 0], //04 + [0, 18, 0, 0], //05 + [0, 6, 1, 0], //06 + [0, 8, 1, 0], //07 + [0, 10, 1, 0], //08 + [0, 12, 1, 0], //09 + [0, 14, 1, 0], //10 + [0, 18, 1, 0], //11 + [0, 6, 0, 1], //12 + [0, 8, 0, 1], //13 + [0, 10, 0, 1], //14 + [0, 12, 0, 1], //15 + [0, 14, 0, 1], //16 + [0, 18, 0, 1], //17 + [0, 6, 1, 1], //18 + [0, 8, 1, 1], //19 + [0, 10, 1, 1], //20 + [0, 12, 1, 1], //21 + [0, 14, 1, 1], //22 + [0, 18, 1, 1], //23 + [1, 6, 0, 0], //24 + [1, 8, 0, 0], //25 + [1, 10, 0, 0], //26 + [1, 12, 0, 0], //27 + [1, 14, 0, 0], //28 + [1, 18, 0, 0], //29 + [1, 6, 1, 0], //30 + [1, 8, 1, 0], //31 + [1, 10, 1, 0], //32 + [1, 12, 1, 0], //33 + [1, 14, 1, 0], //34 + [1, 18, 1, 0], //35 + [1, 6, 0, 1], //36 + [1, 8, 0, 1], //37 + [1, 10, 0, 1], //38 + [1, 12, 0, 1], //39 + [1, 14, 0, 1], //40 + [1, 18, 0, 1], //41 + [2, 8, 0, 0], //42 + [2, 10, 0, 0], //43 + [2, 12, 0, 0], //44 + [2, 14, 0, 0], //45 + [2, 18, 0, 0], //46 + [3, 8, 0, 0], //47 + [3, 10, 0, 0], //48 + [3, 12, 0, 0], //49 + [4, 12, 0, 0], //50 + [0, 9, 0, 0], //51 + [0, 9, 1, 0], //52 + [0, 9, 0, 1], //53 + [0, 9, 1, 1], //54 + [1, 9, 0, 0], //55 + [1, 9, 1, 0], //56 + [1, 9, 1, 1], //57 + [4, 10, 0, 0], //58 + [5, 10, 0, 0], //59 + [5, 12, 0, 0] //60 ]; const dpi = 96.0; const gridXPerInch = 4.0; const gridYPerInch = 4.0; -const _pixelXPerGrid = dpi / gridXPerInch; -const _pixelYPerGrid = dpi / gridYPerInch; -const _pixelPerPoint = dpi / 72; - +const _pixelXPerGrid = dpi/gridXPerInch; +const _pixelYPerGrid = dpi/gridYPerInch; +const _pixelPerPoint = dpi/72; + class PDFUnit { - static toFixedFloat(fNum) { - return parseFloat(fNum.toFixed(3)); - } - - static colorCount() { - return kColors.length; - } - - static toPixelX(formX) { - return Math.round(formX * _pixelXPerGrid); - } - - static toPixelY(formY) { - return Math.round(formY * _pixelYPerGrid); - } - - static pointToPixel(point) { - // Point unit (1/72 an inch) to pixel units - return point * _pixelPerPoint; - } - - static getColorByIndex(clrId) { - return kColors[clrId]; - } - - static toFormPoint(viewportX, viewportY) { - return [viewportX / _pixelXPerGrid, viewportY / _pixelYPerGrid]; - } - - static toFormX(viewportX) { - return PDFUnit.toFixedFloat(viewportX / _pixelXPerGrid); - } - - static toFormY(viewportY) { - return PDFUnit.toFixedFloat(viewportY / _pixelYPerGrid); - } - - static findColorIndex(color) { - if (color.length === 4) color += '000'; - //MQZ. 07/29/2013: if color is not in dictionary, just return -1. The caller (pdffont, pdffill) will set the actual color - return kColors.indexOf(color); - } - - static dateToIso8601(date) { - // PDF spec p.160 - if (date.slice(0, 2) === 'D:') { - // D: prefix is optional - date = date.slice(2); - } - let tz = 'Z'; - let idx = date.search(/[Z+-]/); // timezone is optional - if (idx >= 0) { - tz = date.slice(idx); - if (tz !== 'Z') { - // timezone format OHH'mm' - tz = tz.slice(0, 3) + ':' + tz.slice(4, 6); - } - date = date.slice(0, idx); - } - let yr = date.slice(0, 4); // everything after year is optional - let mth = date.slice(4, 6) || '01'; - let day = date.slice(6, 8) || '01'; - let hr = date.slice(8, 10) || '00'; - let min = date.slice(10, 12) || '00'; - let sec = date.slice(12, 14) || '00'; - return yr + '-' + mth + '-' + day + 'T' + hr + ':' + min + ':' + sec + tz; - } + static toFixedFloat(fNum) { + return parseFloat(fNum.toFixed(3)); + } + + static colorCount() { + return kColors.length; + } + + static toPixelX(formX) { + return Math.round(formX * _pixelXPerGrid); + } + + static toPixelY(formY) { + return Math.round(formY * _pixelYPerGrid); + } + + static pointToPixel(point) {// Point unit (1/72 an inch) to pixel units + return point * _pixelPerPoint; + } + + static getColorByIndex(clrId) { + return kColors[clrId]; + } + + static toFormPoint(viewportX, viewportY) { + return [(viewportX / _pixelXPerGrid), (viewportY / _pixelYPerGrid)]; + } + + static toFormX(viewportX) { + return PDFUnit.toFixedFloat(viewportX / _pixelXPerGrid); + } + + static toFormY(viewportY) { + return PDFUnit.toFixedFloat(viewportY / _pixelYPerGrid); + } + + static findColorIndex(color) { + if (color.length === 4) + color += "000"; + //MQZ. 07/29/2013: if color is not in dictionary, just return -1. The caller (pdffont, pdffill) will set the actual color + return kColors.indexOf(color); + } + + static dateToIso8601(date) { + // PDF spec p.160 + if (date.slice(0, 2) === 'D:') { // D: prefix is optional + date = date.slice(2); + } + let tz = 'Z'; + let idx = date.search(/[Z+-]/); // timezone is optional + if (idx >= 0) { + tz = date.slice(idx); + if (tz !== 'Z') { // timezone format OHH'mm' + tz = tz.slice(0, 3) + ':' + tz.slice(4, 6); + } + date = date.slice(0, idx); + } + let yr = date.slice(0, 4); // everything after year is optional + let mth = date.slice(4, 6) || '01'; + let day = date.slice(6, 8) || '01'; + let hr = date.slice(8, 10) || '00'; + let min = date.slice(10, 12) || '00'; + let sec = date.slice(12, 14) || '00'; + return yr + '-' + mth + '-' + day + 'T' + hr + ':' + min + ':' + sec + tz; + } } class PDFLine { - constructor(x1, y1, x2, y2, lineWidth, color, dashed) { - this.x1 = x1; - this.y1 = y1; - this.x2 = x2; - this.y2 = y2; - this.lineWidth = lineWidth || 1.0; - this.color = color; - this.dashed = dashed; - } - - #setStartPoint(oneLine, x, y) { - oneLine.x = PDFUnit.toFormX(x); - oneLine.y = PDFUnit.toFormY(y); - } - - processLine(targetData) { - const xDelta = Math.abs(this.x2 - this.x1); - const yDelta = Math.abs(this.y2 - this.y1); - const minDelta = this.lineWidth; - - let oneLine = { - x: 0, - y: 0, - w: PDFUnit.toFixedFloat(this.lineWidth), - l: 0, - }; - - //MQZ Aug.28.2013, adding color support, using color dictionary and default to black - const clrId = PDFUnit.findColorIndex(this.color); - const colorObj = - clrId > 0 && clrId < PDFUnit.colorCount() - ? { clr: clrId } - : { oc: this.color }; - oneLine = { ...oneLine, ...colorObj }; - - //MQZ Aug.29 dashed line support - if (this.dashed) { - oneLine = oneLine = { ...oneLine, dsh: 1 }; - } - - if (yDelta < this.lineWidth && xDelta > minDelta) { - //HLine - if (this.lineWidth < 4 && xDelta / this.lineWidth < 4) { - nodeUtil.p2jinfo( - 'Skipped: short thick HLine: lineWidth = ' + - this.lineWidth + - ', xDelta = ' + - xDelta - ); - return; //skip short thick lines, like PA SPP lines behinds checkbox - } + constructor(x1, y1, x2, y2, lineWidth, color, dashed) { + this.x1 = x1; + this.y1 = y1; + this.x2 = x2; + this.y2 = y2; + this.lineWidth = lineWidth || 1.0; + this.color = color; + this.dashed = dashed; + } + + #setStartPoint(oneLine, x, y) { + oneLine.x = PDFUnit.toFormX(x); + oneLine.y = PDFUnit.toFormY(y); + } + + processLine(targetData) { + const xDelta = Math.abs(this.x2 - this.x1); + const yDelta = Math.abs(this.y2 - this.y1); + const minDelta = this.lineWidth; + + let oneLine = {x:0, y:0, w: PDFUnit.toFixedFloat(this.lineWidth), l:0}; + + //MQZ Aug.28.2013, adding color support, using color dictionary and default to black + const clrId = PDFUnit.findColorIndex(this.color); + const colorObj = (clrId > 0 && clrId < PDFUnit.colorCount()) ? {clr: clrId} : {oc: this.color}; + oneLine = {...oneLine, ...colorObj}; + + //MQZ Aug.29 dashed line support + if (this.dashed) { + oneLine = oneLine = {...oneLine, dsh: 1}; + } + + if ((yDelta < this.lineWidth) && (xDelta > minDelta)) { //HLine + if (this.lineWidth < 4 && (xDelta / this.lineWidth < 4)) { + nodeUtil.p2jinfo("Skipped: short thick HLine: lineWidth = " + this.lineWidth + ", xDelta = " + xDelta); + return; //skip short thick lines, like PA SPP lines behinds checkbox + } - oneLine.l = PDFUnit.toFormX(xDelta); - if (this.x1 > this.x2) this.#setStartPoint(oneLine, this.x2, this.y2); - else this.#setStartPoint(oneLine, this.x1, this.y1); - targetData.HLines.push(oneLine); - } else if (xDelta < this.lineWidth && yDelta > minDelta) { - //VLine - if (this.lineWidth < 4 && yDelta / this.lineWidth < 4) { - nodeUtil.p2jinfo( - 'Skipped: short thick VLine: lineWidth = ' + - this.lineWidth + - ', yDelta = ' + - yDelta - ); - return; //skip short think lines, like PA SPP lines behinds checkbox - } + oneLine.l = PDFUnit.toFormX(xDelta); + if (this.x1 > this.x2) + this.#setStartPoint(oneLine, this.x2, this.y2); + else + this.#setStartPoint(oneLine, this.x1, this.y1); + targetData.HLines.push(oneLine); + } + else if ((xDelta < this.lineWidth) && (yDelta > minDelta)) {//VLine + if (this.lineWidth < 4 && (yDelta / this.lineWidth < 4)) { + nodeUtil.p2jinfo("Skipped: short thick VLine: lineWidth = " + this.lineWidth + ", yDelta = " + yDelta); + return; //skip short think lines, like PA SPP lines behinds checkbox + } - oneLine.l = PDFUnit.toFormY(yDelta); - if (this.y1 > this.y2) this.#setStartPoint(oneLine, this.x2, this.y2); - else this.#setStartPoint(oneLine, this.x1, this.y1); - targetData.VLines.push(oneLine); - } - } + oneLine.l = PDFUnit.toFormY(yDelta); + if (this.y1 > this.y2) + this.#setStartPoint(oneLine, this.x2, this.y2); + else + this.#setStartPoint(oneLine, this.x1, this.y1); + targetData.VLines.push(oneLine); + } + } } -class PDFFill { - // constructor - constructor(x, y, width, height, color) { - this.x = x; - this.y = y; - this.width = width; - this.height = height; - this.color = color; - } - - processFill(targetData) { - //MQZ.07/29/2013: when color is not in color dictionary, set the original color (oc) - const clrId = PDFUnit.findColorIndex(this.color); - const colorObj = - clrId > 0 && clrId < PDFUnit.colorCount() - ? { clr: clrId } - : { oc: this.color }; - - const oneFill = { - x: PDFUnit.toFormX(this.x), - y: PDFUnit.toFormY(this.y), - w: PDFUnit.toFormX(this.width), - h: PDFUnit.toFormY(this.height), - ...colorObj, - }; - - if (oneFill.w < 2 && oneFill.h < 2) { - nodeUtil.p2jinfo( - 'Skipped: tiny fill: ' + oneFill.w + ' x ' + oneFill.h - ); - return; //skip short thick lines, like PA SPP lines behinds checkbox - } +class PDFFill{ + // constructor + constructor(x, y, width, height, color) { + this.x = x; + this.y = y; + this.width = width; + this.height = height; + this.color = color; + } + + processFill(targetData) { + //MQZ.07/29/2013: when color is not in color dictionary, set the original color (oc) + const clrId = PDFUnit.findColorIndex(this.color); + const colorObj = (clrId > 0 && clrId < PDFUnit.colorCount()) ? {clr: clrId} : {oc: this.color}; + + const oneFill = {x:PDFUnit.toFormX(this.x), + y:PDFUnit.toFormY(this.y), + w:PDFUnit.toFormX(this.width), + h:PDFUnit.toFormY(this.height), + ...colorObj}; + + + if (oneFill.w < 2 && oneFill.h < 2) { + nodeUtil.p2jinfo("Skipped: tiny fill: " + oneFill.w + " x " + oneFill.h); + return; //skip short thick lines, like PA SPP lines behinds checkbox + } - targetData.Fills.push(oneFill); - } + targetData.Fills.push(oneFill); + } } -const _boldSubNames = ['bd', 'bold', 'demi', 'black']; -const _stdFonts = [ - 'arial', - 'helvetica', - 'sans-serif ', - 'courier ', - 'monospace ', - 'ocr ', -]; +const _boldSubNames = ["bd", "bold", "demi", "black"]; +const _stdFonts = ["arial", "helvetica", "sans-serif ", "courier ","monospace ", "ocr "]; const DISTANCE_DELTA = 0.1; class PDFFont { - #initTypeName() { - let typeName = this.fontObj.name || this.fontObj.fallbackName; - if (!typeName) { - typeName = kFontFaces[0]; //default font family name - } - typeName = typeName.toLowerCase(); - return typeName; - } - - #initSubType() { - let subType = this.typeName; - let bold = false; - - let nameArray = this.typeName.split('+'); - if (Array.isArray(nameArray) && nameArray.length > 1) { - subType = nameArray[1].split('-'); - if (Array.isArray(subType) && subType.length > 1) { - let subName = subType[1].toLowerCase(); - bold = _boldSubNames.indexOf(subName) >= 0; - subType = subType[0]; - } - } - return { subType, bold }; - } - - #initSymbol() { - let isSymbol = - this.typeName.indexOf('symbol') > 0 || - kFontFaces[2].indexOf(this.subType) >= 0; - if (this.fontObj.isSymbolicFont) { - let mFonts = _stdFonts.filter( - (oneName) => this.typeName.indexOf(oneName) >= 0 - ); - - if (mFonts.length > 0) { - this.fontObj.isSymbolicFont = false; //lots of Arial-based font is detected as symbol in VA forms (301, 76-c, etc.) reset the flag for now - nodeUtil.p2jinfo( - 'Reset: isSymbolicFont (false) for ' + this.fontObj.name - ); - } - } else { - if (isSymbol) { - this.fontObj.isSymbolicFont = true; //text pdf: va_ind_760c - nodeUtil.p2jinfo( - 'Reset: isSymbolicFont (true) for ' + this.fontObj.name - ); - } - } - return isSymbol; - } - - #initSpaceWidth() { - let spaceWidth = this.fontObj.spaceWidth; - if (!spaceWidth) { - var spaceId = Array.isArray(this.fontObj.toFontChar) - ? this.fontObj.toFontChar.indexOf(32) - : -1; - spaceWidth = - spaceId >= 0 && Array.isArray(this.fontObj.widths) - ? this.fontObj.widths[spaceId] - : 250; - } - spaceWidth = PDFUnit.toFormX(spaceWidth) / 32; - return spaceWidth; - } - - // constructor - constructor(fontObj) { - this.fontObj = fontObj; - - this.typeName = this.#initTypeName(); - - const { subType, bold } = this.#initSubType(); - this.subType = subType; - this.bold = bold; - - this.isSymbol = this.#initSymbol(); - this.spaceWidth = this.#initSpaceWidth(); - - this.fontSize = 1; - this.faceIdx = 0; - this.italic = false; - this.fontStyleId = -1; - } - - /** sort text blocks by y then x */ - static compareBlockPos(t1, t2) { - if (t1.y < t2.y - DISTANCE_DELTA) { - return -1; - } - if (Math.abs(t1.y - t2.y) <= DISTANCE_DELTA) { - if (t1.x < t2.x - DISTANCE_DELTA) { + #initTypeName() { + let typeName = (this.fontObj.name || this.fontObj.fallbackName); + if (!typeName) { + typeName = kFontFaces[0]; //default font family name + } + typeName = typeName.toLowerCase(); + return typeName; + } + + #initSubType() { + let subType = this.typeName; + let bold = false; + + let nameArray = this.typeName.split('+'); + if (Array.isArray(nameArray) && nameArray.length > 1) { + subType = nameArray[1].split("-"); + if (Array.isArray(subType) && subType.length > 1) { + let subName = subType[1].toLowerCase(); + bold = _boldSubNames.indexOf(subName) >= 0; + subType = subType[0]; + } + } + return {subType, bold}; + } + + #initSymbol() { + let isSymbol = this.typeName.indexOf("symbol") > 0 || kFontFaces[2].indexOf(this.subType) >= 0; + if (this.fontObj.isSymbolicFont) { + let mFonts = _stdFonts.filter( (oneName) => (this.typeName.indexOf(oneName) >= 0) ); + + if (mFonts.length > 0) { + this.fontObj.isSymbolicFont = false; //lots of Arial-based font is detected as symbol in VA forms (301, 76-c, etc.) reset the flag for now + nodeUtil.p2jinfo("Reset: isSymbolicFont (false) for " + this.fontObj.name); + } + } + else { + if (isSymbol) { + this.fontObj.isSymbolicFont = true; //text pdf: va_ind_760c + nodeUtil.p2jinfo("Reset: isSymbolicFont (true) for " + this.fontObj.name); + } + } + return isSymbol; + } + + #initSpaceWidth() { + let spaceWidth = this.fontObj.spaceWidth; + if (!spaceWidth) { + var spaceId = Array.isArray(this.fontObj.toFontChar) ? this.fontObj.toFontChar.indexOf(32) : -1; + spaceWidth = (spaceId >= 0 && Array.isArray(this.fontObj.widths)) ? this.fontObj.widths[spaceId] : 250; + } + spaceWidth = PDFUnit.toFormX(spaceWidth) / 32; + return spaceWidth; + } + + // constructor + constructor(fontObj) { + this.fontObj = fontObj; + + this.typeName = this.#initTypeName(); + + const {subType, bold} = this.#initSubType(); + this.subType = subType; + this.bold = bold; + + this.isSymbol = this.#initSymbol(); + this.spaceWidth = this.#initSpaceWidth(); + + this.fontSize = 1; + this.faceIdx = 0; + this.italic = false; + this.fontStyleId = -1; + } + + /** sort text blocks by y then x */ + static compareBlockPos(t1, t2) { + if (t1.y < t2.y - DISTANCE_DELTA) { return -1; - } - if (Math.abs(t1.x - t2.x) <= DISTANCE_DELTA) { - return 0; - } - } - return 1; - } - - static haveSameStyle(t1, t2) { - let retVal = t1.R[0].S === t2.R[0].S; - if (retVal && t1.R[0].S < 0) { - for (let i = 0; i < t1.R[0].TS.length; i++) { - if (t1.R[0].TS[i] !== t2.R[0].TS[i]) { - retVal = false; - break; + } + if (Math.abs(t1.y - t2.y) <= DISTANCE_DELTA) { + if (t1.x < t2.x - DISTANCE_DELTA) { + return -1; } - } - } - if (retVal) { - // make sure both block are not rotated - retVal = - typeof t1.R[0].RA === 'undefined' && - typeof t2.R[0].RA === 'undefined'; - } - - return retVal; - } - - static getSpaceThreshHold(t1) { - return (PDFFont.getFontSize(t1) / 12) * t1.sw; - } - - static areAdjacentBlocks(t1, t2) { - const isInSameLine = Math.abs(t1.y - t2.y) <= DISTANCE_DELTA; - const isDistanceSmallerThanASpace = - t2.x - t1.x - t1.w < PDFFont.getSpaceThreshHold(t1); - - return isInSameLine && isDistanceSmallerThanASpace; - } - - static getFontSize(textBlock) { - const sId = textBlock.R[0].S; - return sId < 0 ? textBlock.R[0].TS[1] : kFontStyles[sId][1]; - } - - static areDuplicateBlocks(t1, t2) { - return ( - t1.x == t2.x && - t1.y == t2.y && - t1.R[0].T == t2.R[0].T && - PDFFont.haveSameStyle(t1, t2) - ); - } - - // private - #setFaceIndex() { - const fontObj = this.fontObj; - - this.bold = fontObj.bold; - if (!this.bold) { - this.bold = - this.typeName.indexOf('bold') >= 0 || - this.typeName.indexOf('black') >= 0; - } - this.italic = fontObj.italic; // fix https://github.com/modesty/pdf2json/issues/42 - // Extended the fix for https://github.com/modesty/pdf2json/issues/42 - if (!this.italic) { - this.italic = - this.typeName.indexOf('italic') >= 0 || - this.typeName.indexOf('oblique') >= 0; - } - // Added detection of hybrid dual bolditalic fonts - if ( - (!this.bold || !this.italic) && - this.typeName.indexOf('boldobl') >= 0 - ) { - this.bold = true; - this.italic = true; - } - - let typeName = this.subType; - if (fontObj.isSerifFont) { - if (kFontFaces[1].indexOf(typeName) >= 0) this.faceIdx = 1; - } else if (kFontFaces[2].indexOf(this.subType) >= 0) { - this.faceIdx = 2; - } else if (fontObj.isMonospace) { - this.faceIdx = 3; - - if (kFontFaces[4].indexOf(typeName) >= 0) this.faceIdx = 4; - else if (kFontFaces[5].indexOf(typeName) >= 0) this.faceIdx = 5; - } else if (fontObj.isSymbolicFont) { - this.faceIdx = 2; - } - - if (this.faceIdx == 0) { - if (this.typeName.indexOf('narrow') > 0) this.faceIdx = 1; - } - - // nodeUtil.p2jinfo"typeName = " + typeName + " => faceIdx = " + this.faceIdx); - } - - #getFontStyleIndex(fontSize) { - this.#setFaceIndex(); - - //MQZ Feb.28.2013. Adjust bold text fontsize to work around word spacing issue - this.fontSize = this.bold && fontSize > 12 ? fontSize + 1 : fontSize; - - let fsa = [ - this.faceIdx, - this.fontSize, - this.bold ? 1 : 0, - this.italic ? 1 : 0, - ]; - let retVal = -1; - - kFontStyles.forEach(function (element, index, list) { - if (retVal === -1) { - if ( - element[0] === fsa[0] && - element[1] === fsa[1] && - element[2] === fsa[2] && - element[3] === fsa[3] - ) { - retVal = index; + if (Math.abs(t1.x - t2.x) <= DISTANCE_DELTA) { + return 0; } - } - }); - - return retVal; - } - - #processSymbolicFont(str) { - let retVal = str; - - if (!str || str.length !== 1) return retVal; - - if (!this.fontObj.isSymbolicFont || !this.isSymbol) { - if (retVal == 'C' || retVal == 'G') { - //prevent symbolic encoding from the client - retVal = ' ' + retVal + ' '; //sample: va_ind_760c - } - return retVal; - } - - switch (str.charCodeAt(0)) { - case 20: - retVal = '\u2713'; - break; //check mark - case 70: - retVal = this.fontObj.type === 'CIDFontType0' ? '\u26A0' : '\u007D'; - break; //exclaimation in triangle OR right curly bracket - case 71: - retVal = '\u25b6'; - break; //right triangle - case 97: - retVal = '\u25b6'; - break; //right triangle - case 99: - retVal = this.isSymbol ? '\u2022' : '\u25b2'; - break; //up triangle. set to Bullet Dot for VA SchSCR - case 100: - retVal = '\u25bc'; - break; //down triangle - case 103: - retVal = '\u27A8'; - break; //right arrow. sample: va_ind_760pff and pmt - case 106: - retVal = ''; - break; //VA 301: string j character by the checkbox, hide it for now - case 114: - retVal = '\u2022'; - break; //Bullet dot - case 115: - retVal = '\u25b2'; - break; //up triangle - case 116: - retVal = '\u2022'; - break; //Bullet dot - case 118: - retVal = '\u2022'; - break; //Bullet dot - default: - nodeUtil.p2jinfo( - this.fontObj.type + - ' - SymbolicFont - (' + - this.fontObj.name + - ') : ' + - str.charCodeAt(0) + - '::' + - str.charCodeAt(1) + - ' => ' + - retVal - ); - } - - return retVal; - } - - #textRotationAngle(matrix2D) { - let retVal = 0; - if (matrix2D[0][0] === 0 && matrix2D[1][1] === 0) { - if (matrix2D[0][1] != 0 && matrix2D[1][0] != 0) { - if (matrix2D[0][1] / matrix2D[1][0] + 1 < 0.0001) retVal = 90; - } - } else if (matrix2D[0][0] !== 0 && matrix2D[1][1] !== 0) { - let r1 = Math.atan(-matrix2D[0][1] / matrix2D[0][0]); - let r2 = Math.atan(matrix2D[1][0] / matrix2D[1][1]); - if (Math.abs(r1) > 0.0001 && r1 - r2 < 0.0001) { - retVal = (r1 * 180) / Math.PI; - } - } - return retVal; - } - - // public instance methods - processText(p, str, maxWidth, color, fontSize, targetData, matrix2D) { - const text = this.#processSymbolicFont(str); - if (!text) { - return; - } - this.fontStyleId = this.#getFontStyleIndex(fontSize); - - // when this.fontStyleId === -1, it means the text style doesn't match any entry in the dictionary - // adding TS to better describe text style [fontFaceId, fontSize, 1/0 for bold, 1/0 for italic]; - const TS = [ - this.faceIdx, - this.fontSize, - this.bold ? 1 : 0, - this.italic ? 1 : 0, - ]; - - const clrId = PDFUnit.findColorIndex(color); - const colorObj = - clrId >= 0 && clrId < PDFUnit.colorCount() - ? { clr: clrId } - : { oc: color }; - - let textRun = { - T: this.flash_encode(text), - S: this.fontStyleId, - TS: TS, - }; - const rAngle = this.#textRotationAngle(matrix2D); - if (rAngle != 0) { - nodeUtil.p2jinfo(str + ': rotated ' + rAngle + ' degree.'); - textRun = { ...textRun, RA: rAngle }; - } + } + return 1; + } + + static haveSameStyle(t1, t2) { + let retVal = t1.R[0].S === t2.R[0].S; + if (retVal && t1.R[0].S < 0) { + for (let i = 0; i < t1.R[0].TS.length; i++) { + if (t1.R[0].TS[i] !== t2.R[0].TS[i]) { + retVal = false; + break; + } + } + } + if (retVal) { // make sure both block are not rotated + retVal = (typeof t1.R[0].RA === 'undefined') && (typeof t2.R[0].RA === 'undefined'); + } + + return retVal; + } + + static getSpaceThreshHold(t1) { + return (PDFFont.getFontSize(t1)/12) * t1.sw; + } + + static areAdjacentBlocks(t1, t2) { + const isInSameLine = Math.abs(t1.y - t2.y) <= DISTANCE_DELTA; + const isDistanceSmallerThanASpace = ((t2.x - t1.x - t1.w) < PDFFont.getSpaceThreshHold(t1)); + + return isInSameLine && isDistanceSmallerThanASpace; + } + + static getFontSize(textBlock) { + const sId = textBlock.R[0].S; + return (sId < 0) ? textBlock.R[0].TS[1] : kFontStyles[sId][1]; + } + + static areDuplicateBlocks(t1, t2) { + return t1.x == t2.x && t1.y == t2.y && t1.R[0].T == t2.R[0].T && PDFFont.haveSameStyle(t1, t2); + } + + // private + #setFaceIndex() { + const fontObj = this.fontObj; + + this.bold = fontObj.bold; + if (!this.bold) { + this.bold = this.typeName.indexOf("bold") >= 0 || this.typeName.indexOf("black") >= 0; + } + this.italic = fontObj.italic; // fix https://github.com/modesty/pdf2json/issues/42 + // Extended the fix for https://github.com/modesty/pdf2json/issues/42 + if (!this.italic) { + this.italic = this.typeName.indexOf("italic") >= 0 || this.typeName.indexOf("oblique") >= 0; + } + // Added detection of hybrid dual bolditalic fonts + if (((!this.bold) || (!this.italic)) && (this.typeName.indexOf("boldobl") >= 0)) { + this.bold = true; + this.italic = true; + } + + let typeName = this.subType; + if (fontObj.isSerifFont) { + if (kFontFaces[1].indexOf(typeName) >= 0) + this.faceIdx = 1; + } + else if (kFontFaces[2].indexOf(this.subType) >= 0) { + this.faceIdx = 2; + } + else if (fontObj.isMonospace) { + this.faceIdx = 3; + + if (kFontFaces[4].indexOf(typeName) >= 0) + this.faceIdx = 4; + else if (kFontFaces[5].indexOf(typeName) >= 0) + this.faceIdx = 5; + } + else if (fontObj.isSymbolicFont) { + this.faceIdx = 2; + } + + if (this.faceIdx == 0) { + if (this.typeName.indexOf("narrow") > 0) + this.faceIdx = 1; + } + +// nodeUtil.p2jinfo"typeName = " + typeName + " => faceIdx = " + this.faceIdx); + } + + #getFontStyleIndex(fontSize) { + this.#setFaceIndex(); + + //MQZ Feb.28.2013. Adjust bold text fontsize to work around word spacing issue + this.fontSize = (this.bold && (fontSize > 12)) ? fontSize + 1 : fontSize; + + let fsa = [this.faceIdx, this.fontSize, this.bold?1:0, this.italic?1:0]; + let retVal = -1; + + kFontStyles.forEach(function(element, index, list){ + if (retVal === -1) { + if (element[0] === fsa[0] && element[1] === fsa[1] && + element[2] === fsa[2] && element[3] === fsa[3]) { + retVal = index; + } + } + }); - const oneText = { - x: PDFUnit.toFormX(p.x) - 0.25, - y: PDFUnit.toFormY(p.y) - 0.75, - w: PDFUnit.toFixedFloat(maxWidth), - ...colorObj, //MQZ.07/29/2013: when color is not in color dictionary, set the original color (oc) - sw: this.spaceWidth, //font space width, use to merge adjacent text blocks - A: 'left', - R: [textRun], - }; + return retVal; + } - targetData.Texts.push(oneText); - } + #processSymbolicFont(str) { + let retVal = str; - flash_encode(str) { - let retVal = encodeURIComponent(str); - retVal = retVal.replace('%C2%96', '-'); - retVal = retVal.replace('%C2%91', '%27'); - retVal = retVal.replace('%C2%92', '%27'); - retVal = retVal.replace('%C2%82', '%27'); - retVal = retVal.replace('%C2%93', '%22'); - retVal = retVal.replace('%C2%94', '%22'); - retVal = retVal.replace('%C2%84', '%22'); - retVal = retVal.replace('%C2%8B', '%C2%AB'); - retVal = retVal.replace('%C2%9B', '%C2%BB'); + if (!str || str.length !== 1) + return retVal; - return retVal; - } - - clean() { - this.fontObj = null; - delete this.fontObj; - } + if (!this.fontObj.isSymbolicFont || !this.isSymbol) { + if (retVal == "C" || retVal == "G") { //prevent symbolic encoding from the client + retVal = " " + retVal + " "; //sample: va_ind_760c + } + return retVal; + } + + switch(str.charCodeAt(0)) { + case 20: retVal = '\u2713'; break; //check mark + case 70: retVal = (this.fontObj.type === "CIDFontType0") ? '\u26A0' : '\u007D'; break; //exclaimation in triangle OR right curly bracket + case 71: retVal = '\u25b6'; break; //right triangle + case 97: retVal = '\u25b6'; break; //right triangle + case 99: retVal = this.isSymbol ? '\u2022' : '\u25b2'; break; //up triangle. set to Bullet Dot for VA SchSCR + case 100: retVal = '\u25bc'; break; //down triangle + case 103: retVal = '\u27A8'; break; //right arrow. sample: va_ind_760pff and pmt + case 106: retVal = ''; break; //VA 301: string j character by the checkbox, hide it for now + case 114: retVal = '\u2022'; break; //Bullet dot + case 115: retVal = '\u25b2'; break; //up triangle + case 116: retVal = '\u2022'; break; //Bullet dot + case 118: retVal = '\u2022'; break; //Bullet dot + default: + nodeUtil.p2jinfo(this.fontObj.type + " - SymbolicFont - (" + this.fontObj.name + ") : " + + str.charCodeAt(0) + "::" + str.charCodeAt(1) + " => " + retVal); + } + + return retVal; + } + + #textRotationAngle(matrix2D) { + let retVal = 0; + if (matrix2D[0][0] === 0 && matrix2D[1][1] === 0) { + if (matrix2D[0][1] != 0 && matrix2D[1][0] != 0) { + if ((matrix2D[0][1] / matrix2D[1][0]) + 1 < 0.0001) + retVal = 90; + } + } + else if (matrix2D[0][0] !== 0 && matrix2D[1][1] !== 0) { + let r1 = Math.atan(-matrix2D[0][1] / matrix2D[0][0]); + let r2 = Math.atan(matrix2D[1][0] / matrix2D[1][1]); + if (Math.abs(r1) > 0.0001 && (r1 - r2 < 0.0001)) { + retVal = r1 * 180 / Math.PI; + } + } + return retVal; + } + + // public instance methods + processText(p, str, maxWidth, color, fontSize, targetData, matrix2D) { + const text = this.#processSymbolicFont(str); + if (!text) { + return; + } + this.fontStyleId = this.#getFontStyleIndex(fontSize); + + // when this.fontStyleId === -1, it means the text style doesn't match any entry in the dictionary + // adding TS to better describe text style [fontFaceId, fontSize, 1/0 for bold, 1/0 for italic]; + const TS = [this.faceIdx, this.fontSize, this.bold?1:0, this.italic?1:0]; + + const clrId = PDFUnit.findColorIndex(color); + const colorObj = (clrId >= 0 && clrId < PDFUnit.colorCount()) ? {clr: clrId} : {oc: color}; + + let textRun = { + T: this.flash_encode(text), + S: this.fontStyleId, + TS: TS + }; + const rAngle = this.#textRotationAngle(matrix2D); + if (rAngle != 0) { + nodeUtil.p2jinfo(str + ": rotated " + rAngle + " degree."); + textRun = {...textRun, RA: rAngle}; + } + + const oneText = {x: PDFUnit.toFormX(p.x) - 0.25, + y: PDFUnit.toFormY(p.y) - 0.75, + w: PDFUnit.toFixedFloat(maxWidth), + ...colorObj, //MQZ.07/29/2013: when color is not in color dictionary, set the original color (oc) + sw: this.spaceWidth, //font space width, use to merge adjacent text blocks + A: "left", + R: [textRun] + }; + + targetData.Texts.push(oneText); + } + + flash_encode(str) { + let retVal = encodeURIComponent(str); + retVal = retVal.replace("%C2%96", "-"); + retVal = retVal.replace("%C2%91", "%27"); + retVal = retVal.replace("%C2%92", "%27"); + retVal = retVal.replace("%C2%82", "%27"); + retVal = retVal.replace("%C2%93", "%22"); + retVal = retVal.replace("%C2%94", "%22"); + retVal = retVal.replace("%C2%84", "%22"); + retVal = retVal.replace("%C2%8B", "%C2%AB"); + retVal = retVal.replace("%C2%9B", "%C2%BB"); + + return retVal; + } + + clean() { + this.fontObj = null; + delete this.fontObj; + } } // alias some functions to make (compiled) code shorter -const { round: mr, sin: ms, cos: mc, abs, sqrt } = Math; +const {round: mr, sin: ms, cos: mc, abs, sqrt} = Math; // precompute "00" to "FF" const dec2hex = []; for (let i = 0; i < 16; i++) { - for (let j = 0; j < 16; j++) { - dec2hex[i * 16 + j] = i.toString(16) + j.toString(16); - } + for (let j = 0; j < 16; j++) { + dec2hex[i * 16 + j] = i.toString(16) + j.toString(16); + } } function createMatrixIdentity() { - return [ - [1, 0, 0], - [0, 1, 0], - [0, 0, 1], - ]; + return [ + [1, 0, 0], + [0, 1, 0], + [0, 0, 1] + ]; } function matrixMultiply(m1, m2) { - let result = createMatrixIdentity(); + let result = createMatrixIdentity(); - for (let x = 0; x < 3; x++) { - for (let y = 0; y < 3; y++) { - let sum = 0; + for (let x = 0; x < 3; x++) { + for (let y = 0; y < 3; y++) { + let sum = 0; - for (let z = 0; z < 3; z++) { - sum += m1[x][z] * m2[z][y]; - } + for (let z = 0; z < 3; z++) { + sum += m1[x][z] * m2[z][y]; + } - result[x][y] = sum; - } - } - return result; + result[x][y] = sum; + } + } + return result; } function copyState(o1, o2) { - o2.fillStyle = o1.fillStyle; - o2.lineCap = o1.lineCap; - o2.lineJoin = o1.lineJoin; - o2.lineWidth = o1.lineWidth; - o2.miterLimit = o1.miterLimit; - o2.shadowBlur = o1.shadowBlur; - o2.shadowColor = o1.shadowColor; - o2.shadowOffsetX = o1.shadowOffsetX; - o2.shadowOffsetY = o1.shadowOffsetY; - o2.strokeStyle = o1.strokeStyle; - o2.globalAlpha = o1.globalAlpha; - o2.arcScaleX_ = o1.arcScaleX_; - o2.arcScaleY_ = o1.arcScaleY_; - o2.lineScale_ = o1.lineScale_; - o2.dashArray = o1.dashArray; + o2.fillStyle = o1.fillStyle; + o2.lineCap = o1.lineCap; + o2.lineJoin = o1.lineJoin; + o2.lineWidth = o1.lineWidth; + o2.miterLimit = o1.miterLimit; + o2.shadowBlur = o1.shadowBlur; + o2.shadowColor = o1.shadowColor; + o2.shadowOffsetX = o1.shadowOffsetX; + o2.shadowOffsetY = o1.shadowOffsetY; + o2.strokeStyle = o1.strokeStyle; + o2.globalAlpha = o1.globalAlpha; + o2.arcScaleX_ = o1.arcScaleX_; + o2.arcScaleY_ = o1.arcScaleY_; + o2.lineScale_ = o1.lineScale_; + o2.dashArray = o1.dashArray; } function processStyle(styleString) { - let str, - alpha = 1; - - styleString = String(styleString); - if (styleString.substring(0, 3) == 'rgb') { - let start = styleString.indexOf('(', 3); - let end = styleString.indexOf(')', start + 1); - let guts = styleString.substring(start + 1, end).split(','); - - str = '#'; - for (let i = 0; i < 3; i++) { - str += dec2hex[Number(guts[i])]; - } - - if (guts.length == 4 && styleString.substring(3, 4) == 'a') { - alpha = guts[3]; - } - } else { - str = styleString; - } - - return { color: str, alpha: alpha }; + let str, alpha = 1; + + styleString = String(styleString); + if (styleString.substring(0, 3) == 'rgb') { + let start = styleString.indexOf('(', 3); + let end = styleString.indexOf(')', start + 1); + let guts = styleString.substring(start + 1, end).split(','); + + str = '#'; + for (let i = 0; i < 3; i++) { + str += dec2hex[Number(guts[i])]; + } + + if (guts.length == 4 && styleString.substring(3, 4) == 'a') { + alpha = guts[3]; + } + } else { + str = styleString; + } + + return {color:str, alpha:alpha}; } function processLineCap(lineCap) { - switch (lineCap) { - case 'butt': - return 'flat'; - case 'round': - return 'round'; - case 'square': - default: - return 'square'; - } + switch (lineCap) { + case 'butt': + return 'flat'; + case 'round': + return 'round'; + case 'square': + default: + return 'square'; + } } // Helper function that takes the already fixed cordinates. function bezierCurveToHelper(self, cp1, cp2, p) { - self.currentPath_.push({ - type: 'bezierCurveTo', - cp1x: cp1.x, - cp1y: cp1.y, - cp2x: cp2.x, - cp2y: cp2.y, - x: p.x, - y: p.y, - }); - self.currentX_ = p.x; - self.currentY_ = p.y; -} - -function matrixIsFinite(m) { - for (let j = 0; j < 3; j++) { - for (let k = 0; k < 2; k++) { - if (!isFinite(m[j][k]) || isNaN(m[j][k])) { - return false; - } - } - } - return true; -} - -function setM(ctx, m, updateLineScale) { - if (!matrixIsFinite(m)) { - return; - } - ctx.m_ = m; - - if (updateLineScale) { - // Get the line scale. - // Determinant of this.m_ means how much the area is enlarged by the - // transformation. So its square root can be used as a scale factor - // for width. - let det = m[0][0] * m[1][1] - m[0][1] * m[1][0]; - ctx.lineScale_ = sqrt(abs(det)); - } + self.currentPath_.push({ + type:'bezierCurveTo', + cp1x:cp1.x, + cp1y:cp1.y, + cp2x:cp2.x, + cp2y:cp2.y, + x:p.x, + y:p.y + }); + self.currentX_ = p.x; + self.currentY_ = p.y; } -class CanvasPattern_ { - constructor() {} -} - -// Gradient / Pattern Stubs -class CanvasGradient_ { - constructor(aType) { - this.type_ = aType; - this.x0_ = 0; - this.y0_ = 0; - this.r0_ = 0; - this.x1_ = 0; - this.y1_ = 0; - this.r1_ = 0; - this.colors_ = []; - } - addColorStop(aOffset, aColor) { - aColor = processStyle(aColor); - this.colors_.push({ - offset: aOffset, - color: aColor.color, - alpha: aColor.alpha, - }); - } -} - -/** - * This class implements CanvasRenderingContext2D interface as described by - * the WHATWG. - * @param {HTMLElement} surfaceElement The element that the 2D context should - * be associated with - */ -class CanvasRenderingContext2D_ { - constructor(canvasTarget, scaledWidth, scaledHeight) { - this.m_ = createMatrixIdentity(); - - this.mStack_ = []; - this.aStack_ = []; - this.currentPath_ = []; - - // Canvas context properties - this.strokeStyle = '#000'; - this.fillStyle = '#000'; - - this.lineWidth = 1; - this.lineJoin = 'miter'; - this.lineCap = 'butt'; - this.dashArray = []; - this.miterLimit = 1; - this.globalAlpha = 1; - - if (!('HLines' in canvasTarget) || !Array.isArray(canvasTarget.HLines)) - canvasTarget.HLines = []; - if (!('VLines' in canvasTarget) || !Array.isArray(canvasTarget.VLines)) - canvasTarget.VLines = []; - if (!('Fills' in canvasTarget) || !Array.isArray(canvasTarget.Fills)) - canvasTarget.Fills = []; - if (!('Texts' in canvasTarget) || !Array.isArray(canvasTarget.Texts)) - canvasTarget.Texts = []; - - this.canvas = canvasTarget; - - this.width = scaledWidth; - this.height = scaledHeight; - - this.arcScaleX_ = 1; - this.arcScaleY_ = 1; - this.lineScale_ = 1; - - this.currentFont = null; - } - - //private helper methods - #drawPDFLine(p1, p2, lineWidth, color) { - let dashedLine = - Array.isArray(this.dashArray) && this.dashArray.length > 1; - let pL = new PDFLine( - p1.x, - p1.y, - p2.x, - p2.y, - lineWidth, - color, - dashedLine - ); - pL.processLine(this.canvas); - } - - #drawPDFFill(cp, min, max, color) { - let width = max.x - min.x; - let height = max.y - min.y; - let pF = new PDFFill(cp.x, cp.y, width, height, color); - pF.processFill(this.canvas); - } - - #needRemoveRect(x, y, w, h) { - let retVal = Math.abs(w - Math.abs(h)) < 1 && w < 13; - if (retVal) { - nodeUtil.p2jinfo('Skipped: tiny rect: w=' + w + ', h=' + h); - } - return retVal; - } - - getContext(ctxType) { - return ctxType === '2d' ? this : null; - } - - setLineDash(lineDash) { - this.dashArray = lineDash; - } - - getLineDash() { - return this.dashArray; - } - - fillText(text, x, y, maxWidth, fontSize) { - if (!text || text.trim().length < 1) return; - let p = this.getCoords_(x, y); - - let a = processStyle(this.fillStyle || this.strokeStyle); - let color = !!a ? a.color : '#000000'; - - this.currentFont.processText( - p, - text, - maxWidth, - color, - fontSize, - this.canvas, - this.m_ - ); - } - - strokeText(text, x, y, maxWidth) { - //MQZ. 10/23/2012, yeah, no hollow text for now - this.fillText(text, x, y, maxWidth); - } - - measureText(text) { - console.warn('to be implemented: contextPrototype.measureText - ', text); - let chars = text.length || 1; - return { width: chars * (this.currentFont.spaceWidth || 5) }; - } - - setFont(fontObj) { - if (!!this.currentFont && typeof this.currentFont.clean === 'function') { - this.currentFont.clean(); - this.currentFont = null; - } - - this.currentFont = new PDFFont(fontObj); - } - - clearRect() { - console.warn('to be implemented: contextPrototype.clearRect'); - } - - beginPath() { - // TODO: Branch current matrix so that save/restore has no effect - // as per safari docs. - this.currentPath_ = []; - } - - moveTo(aX, aY) { - let p = this.getCoords_(aX, aY); - this.currentPath_.push({ type: 'moveTo', x: p.x, y: p.y }); - this.currentX_ = p.x; - this.currentY_ = p.y; - } - - lineTo(aX, aY) { - let p = this.getCoords_(aX, aY); - this.currentPath_.push({ type: 'lineTo', x: p.x, y: p.y }); - - this.currentX_ = p.x; - this.currentY_ = p.y; - } - - bezierCurveTo(aCP1x, aCP1y, aCP2x, aCP2y, aX, aY) { - let p = this.getCoords_(aX, aY); - let cp1 = this.getCoords_(aCP1x, aCP1y); - let cp2 = this.getCoords_(aCP2x, aCP2y); - bezierCurveToHelper(this, cp1, cp2, p); - } - - quadraticCurveTo(aCPx, aCPy, aX, aY) { - // the following is lifted almost directly from - // http://developer.mozilla.org/en/docs/Canvas_tutorial:Drawing_shapes - - let cp = this.getCoords_(aCPx, aCPy); - let p = this.getCoords_(aX, aY); - - let cp1 = { - x: this.currentX_ + (2.0 / 3.0) * (cp.x - this.currentX_), - y: this.currentY_ + (2.0 / 3.0) * (cp.y - this.currentY_), - }; - let cp2 = { - x: cp1.x + (p.x - this.currentX_) / 3.0, - y: cp1.y + (p.y - this.currentY_) / 3.0, - }; - - bezierCurveToHelper(this, cp1, cp2, p); - } - - arc(aX, aY, aRadius, aStartAngle, aEndAngle, aClockwise) { - let arcType = aClockwise ? 'at' : 'wa'; - - let xStart = aX + mc(aStartAngle) * aRadius; - let yStart = aY + ms(aStartAngle) * aRadius; - - let xEnd = aX + mc(aEndAngle) * aRadius; - let yEnd = aY + ms(aEndAngle) * aRadius; - - // IE won't render arches drawn counter clockwise if xStart == xEnd. - if (xStart == xEnd && !aClockwise) { - xStart += 0.125; // Offset xStart by 1/80 of a pixel. Use something - // that can be represented in binary - } - - let p = this.getCoords_(aX, aY); - let pStart = this.getCoords_(xStart, yStart); - let pEnd = this.getCoords_(xEnd, yEnd); - - this.currentPath_.push({ - type: arcType, - x: p.x, - y: p.y, - radius: aRadius, - xStart: pStart.x, - yStart: pStart.y, - xEnd: pEnd.x, - yEnd: pEnd.y, - }); - } - - rect(aX, aY, aWidth, aHeight) { - if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { - return; //try to remove the rectangle behind radio buttons and checkboxes - } - - this.moveTo(aX, aY); - this.lineTo(aX + aWidth, aY); - this.lineTo(aX + aWidth, aY + aHeight); - this.lineTo(aX, aY + aHeight); - this.closePath(); - } - - strokeRect(aX, aY, aWidth, aHeight) { - if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { - return; //try to remove the rectangle behind radio buttons and checkboxes - } - - let oldPath = this.currentPath_; - this.beginPath(); - - this.moveTo(aX, aY); - this.lineTo(aX + aWidth, aY); - this.lineTo(aX + aWidth, aY + aHeight); - this.lineTo(aX, aY + aHeight); - this.closePath(); - this.stroke(); - - this.currentPath_ = oldPath; - } - - fillRect(aX, aY, aWidth, aHeight) { - if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { - return; //try to remove the rectangle behind radio buttons and checkboxes - } - - let oldPath = this.currentPath_; - this.beginPath(); - - this.moveTo(aX, aY); - this.lineTo(aX + aWidth, aY); - this.lineTo(aX + aWidth, aY + aHeight); - this.lineTo(aX, aY + aHeight); - this.closePath(); - this.fill(); - - this.currentPath_ = oldPath; - } - - createLinearGradient(aX0, aY0, aX1, aY1) { - let gradient = new CanvasGradient_('gradient'); - gradient.x0_ = aX0; - gradient.y0_ = aY0; - gradient.x1_ = aX1; - gradient.y1_ = aY1; - return gradient; - } - - createRadialGradient(aX0, aY0, aR0, aX1, aY1, aR1) { - let gradient = new CanvasGradient_('gradientradial'); - gradient.x0_ = aX0; - gradient.y0_ = aY0; - gradient.r0_ = aR0; - gradient.x1_ = aX1; - gradient.y1_ = aY1; - gradient.r1_ = aR1; - return gradient; - } - - drawImage(image, var_args) { - //MQZ. no image drawing support for now - } - - getImageData(x, y, w, h) { - //MQZ. returns empty data buffer for now - return { - width: w, - height: h, - data: new Uint8Array(w * h * 4), - }; - } - - stroke(aFill) { - if (this.currentPath_.length < 2) { - return; - } - - let a = processStyle(aFill ? this.fillStyle : this.strokeStyle); - let color = a.color; - // let opacity = a.alpha * this.globalAlpha; - let lineWidth = this.lineScale_ * this.lineWidth; - - let min = { x: null, y: null }; - let max = { x: null, y: null }; - - for (let i = 0; i < this.currentPath_.length; i++) { - let p = this.currentPath_[i]; - - switch (p.type) { - case 'moveTo': - break; - case 'lineTo': - if (!aFill) { - //lines - if (i > 0) { - this.#drawPDFLine( - this.currentPath_[i - 1], - p, - lineWidth, - color - ); - } - } - break; - case 'close': - if (!aFill) { - //lines - if (i > 0) { - this.#drawPDFLine( - this.currentPath_[i - 1], - this.currentPath_[0], - lineWidth, - color - ); - } - } - p = null; - break; - case 'bezierCurveTo': - break; - case 'at': - case 'wa': - break; - } - - // Figure out dimensions so we can set fills' coordinates correctly - if (aFill && p) { - if (min.x == null || p.x < min.x) { - min.x = p.x; - } - if (max.x == null || p.x > max.x) { - max.x = p.x; - } - if (min.y == null || p.y < min.y) { - min.y = p.y; - } - if (max.y == null || p.y > max.y) { - max.y = p.y; - } - } - } - - if (aFill) { - //fill - this.#drawPDFFill(min, min, max, color); - } - } - - fill() { - this.stroke(true); - } - - closePath() { - this.currentPath_.push({ type: 'close' }); - } - - /** - * @private - */ - getCoords_(aX, aY) { - let m = this.m_; - return { - x: aX * m[0][0] + aY * m[1][0] + m[2][0], - y: aX * m[0][1] + aY * m[1][1] + m[2][1], - }; - } - - save() { - let o = {}; - copyState(this, o); - this.aStack_.push(o); - this.mStack_.push(this.m_); - this.m_ = matrixMultiply(createMatrixIdentity(), this.m_); - } - - restore() { - copyState(this.aStack_.pop(), this); - this.m_ = this.mStack_.pop(); - } - - translate(aX, aY) { - let m1 = [ - [1, 0, 0], - [0, 1, 0], - [aX, aY, 1], - ]; - - setM(this, matrixMultiply(m1, this.m_), false); - } - - rotate(aRot) { - let c = mc(aRot); - let s = ms(aRot); - - let m1 = [ - [c, s, 0], - [-s, c, 0], - [0, 0, 1], - ]; - - setM(this, matrixMultiply(m1, this.m_), false); - } - - scale(aX, aY) { - this.arcScaleX_ *= aX; - this.arcScaleY_ *= aY; - let m1 = [ - [aX, 0, 0], - [0, aY, 0], - [0, 0, 1], - ]; - - setM(this, matrixMultiply(m1, this.m_), true); - } - - transform(m11, m12, m21, m22, dx, dy) { - let m1 = [ - [m11, m12, 0], - [m21, m22, 0], - [dx, dy, 1], - ]; - - setM(this, matrixMultiply(m1, this.m_), true); - } - - setTransform(m11, m12, m21, m22, dx, dy) { - let m = [ - [m11, m12, 0], - [m21, m22, 0], - [dx, dy, 1], - ]; - - setM(this, m, true); - } - - /******** STUBS ********/ - clip() { - // TODO: Implement - } - - arcTo() { - // TODO: Implement - } - - createPattern() { - return new CanvasPattern_(); - } -} - -const kFBANotOverridable = 0x00000400; // indicates the field is read only by the user -const kFBARequired = 0x00000010; // indicates the field is required -const kMinHeight = 20; - -class PDFField { - static tabIndex = 0; - - static isWidgetSupported(field) { - let retVal = false; - - switch (field.fieldType) { - case 'Tx': - retVal = true; - break; //text input - case 'Btn': - if (field.fieldFlags & 32768) { - field.fieldType = 'Rd'; //radio button - } else if (field.fieldFlags & 65536) { - field.fieldType = 'Btn'; //push button - } else { - field.fieldType = 'Cb'; //checkbox - } - retVal = true; - break; - case 'Ch': - retVal = true; - break; //drop down - case 'Sig': - retVal = true; - break; //signature - default: - nodeUtil.p2jwarn( - 'Unsupported: field.fieldType of ' + field.fieldType - ); - break; - } - - return retVal; - } - - static isFormElement(field) { - let retVal = false; - switch (field.subtype) { - case 'Widget': - retVal = PDFField.isWidgetSupported(field); - break; - default: - nodeUtil.p2jwarn('Unsupported: field.type of ' + field.subtype); - break; - } - return retVal; - } - - // constructor - constructor(field, viewport, Fields, Boxsets) { - this.field = field; - this.viewport = viewport; - this.Fields = Fields; - this.Boxsets = Boxsets; - } - - // Normalize rectangle rect=[x1, y1, x2, y2] so that (x1,y1) < (x2,y2) - // For coordinate systems whose origin lies in the bottom-left, this - // means normalization to (BL,TR) ordering. For systems with origin in the - // top-left, this means (TL,BR) ordering. - static #normalizeRect(rect) { - const r = rect.slice(0); // clone rect - if (rect[0] > rect[2]) { - r[0] = rect[2]; - r[2] = rect[0]; - } - if (rect[1] > rect[3]) { - r[1] = rect[3]; - r[3] = rect[1]; - } - return r; - } - - #getFieldPosition(field) { - let viewPort = this.viewport; - let fieldRect = viewPort.convertToViewportRectangle(field.rect); - let rect = PDFField.#normalizeRect(fieldRect); +function matrixIsFinite(m) { + for (let j = 0; j < 3; j++) { + for (let k = 0; k < 2; k++) { + if (!isFinite(m[j][k]) || isNaN(m[j][k])) { + return false; + } + } + } + return true; +} - let height = rect[3] - rect[1]; - if (field.fieldType === 'Tx') { - if (height > kMinHeight + 2) { - rect[1] += 2; - height -= 2; - } - } else if (field.fieldType !== 'Ch') { - //checkbox, radio button, and link button - rect[1] -= 3; - } +function setM(ctx, m, updateLineScale) { + if (!matrixIsFinite(m)) { + return; + } + ctx.m_ = m; + + if (updateLineScale) { + // Get the line scale. + // Determinant of this.m_ means how much the area is enlarged by the + // transformation. So its square root can be used as a scale factor + // for width. + let det = m[0][0] * m[1][1] - m[0][1] * m[1][0]; + ctx.lineScale_ = sqrt(abs(det)); + } +} - height = height >= kMinHeight ? height : kMinHeight; +class CanvasPattern_ { + constructor() { + } +} - return { - x: PDFUnit.toFormX(rect[0]), - y: PDFUnit.toFormY(rect[1]), - w: PDFUnit.toFormX(rect[2] - rect[0]), - h: PDFUnit.toFormY(height), - }; - } +// Gradient / Pattern Stubs +class CanvasGradient_ { + constructor(aType) { + this.type_ = aType; + this.x0_ = 0; + this.y0_ = 0; + this.r0_ = 0; + this.x1_ = 0; + this.y1_ = 0; + this.r1_ = 0; + this.colors_ = []; + } + addColorStop(aOffset, aColor) { + aColor = processStyle(aColor); + this.colors_.push({offset:aOffset, + color:aColor.color, + alpha:aColor.alpha}); + } +} - #getFieldBaseData(field) { - let attributeMask = 0; - //PDF Spec p.676 TABLE 8.70 Field flags common to all field types - if (field.fieldFlags & 0x00000001) { - attributeMask |= kFBANotOverridable; - } - if (field.fieldFlags & 0x00000002) { - attributeMask |= kFBARequired; - } - let anData = { - id: { Id: field.fullName, EN: 0 }, - TI: field.TI, - AM: attributeMask, - }; - //PDF Spec p.675: add TU (AlternativeText) fields to provide accessibility info - if (field.alternativeText && field.alternativeText.length > 1) { - anData.TU = field.alternativeText; - } +/** + * This class implements CanvasRenderingContext2D interface as described by + * the WHATWG. + * @param {HTMLElement} surfaceElement The element that the 2D context should + * be associated with + */ +class CanvasRenderingContext2D_ { + constructor(canvasTarget, scaledWidth, scaledHeight) { + this.m_ = createMatrixIdentity(); + + this.mStack_ = []; + this.aStack_ = []; + this.currentPath_ = []; + + // Canvas context properties + this.strokeStyle = '#000'; + this.fillStyle = '#000'; + + this.lineWidth = 1; + this.lineJoin = 'miter'; + this.lineCap = 'butt'; + this.dashArray = []; + this.miterLimit = 1; + this.globalAlpha = 1; + + if (!("HLines" in canvasTarget) || !Array.isArray(canvasTarget.HLines)) + canvasTarget.HLines = []; + if (!("VLines" in canvasTarget) || !Array.isArray(canvasTarget.VLines)) + canvasTarget.VLines = []; + if (!("Fills" in canvasTarget) || !Array.isArray(canvasTarget.Fills)) + canvasTarget.Fills = []; + if (!("Texts" in canvasTarget) || !Array.isArray(canvasTarget.Texts)) + canvasTarget.Texts = []; + + this.canvas = canvasTarget; + + this.width = scaledWidth; + this.height = scaledHeight; + + this.arcScaleX_ = 1; + this.arcScaleY_ = 1; + this.lineScale_ = 1; + + this.currentFont = null; + } + + //private helper methods + #drawPDFLine(p1, p2, lineWidth, color) { + let dashedLine = Array.isArray(this.dashArray) && (this.dashArray.length > 1); + let pL = new PDFLine(p1.x, p1.y, p2.x, p2.y, lineWidth, color, dashedLine); + pL.processLine(this.canvas); + } + + #drawPDFFill(cp, min, max, color) { + let width = max.x - min.x; + let height = max.y - min.y; + let pF = new PDFFill(cp.x, cp.y, width, height, color); + pF.processFill(this.canvas); + } + + #needRemoveRect(x, y, w, h) { + let retVal = (Math.abs(w - Math.abs(h)) < 1 && w < 13); + if (retVal) { + nodeUtil.p2jinfo("Skipped: tiny rect: w=" + w + ", h=" + h); + } + return retVal; + } + + getContext(ctxType) { + return (ctxType === "2d") ? this : null; + } + + setLineDash(lineDash) { + this.dashArray = lineDash; + } + + getLineDash() { + return this.dashArray; + } + + fillText(text, x, y, maxWidth, fontSize) { + if (!text || text.trim().length < 1) + return; + let p = this.getCoords_(x, y); + + let a = processStyle(this.fillStyle || this.strokeStyle); + let color = (!!a) ? a.color : '#000000'; + + this.currentFont.processText(p, text, maxWidth, color, fontSize, this.canvas, this.m_); + }; + + strokeText(text, x, y, maxWidth) { + //MQZ. 10/23/2012, yeah, no hollow text for now + this.fillText(text, x, y, maxWidth); + } + + measureText(text) { + console.warn("to be implemented: contextPrototype.measureText - ", text); + let chars = text.length || 1; + return {width: chars * (this.currentFont.spaceWidth || 5)}; + } + + setFont(fontObj) { + if ((!!this.currentFont) && typeof(this.currentFont.clean) === "function") { + this.currentFont.clean(); + this.currentFont = null; + } + + this.currentFont = new PDFFont(fontObj); + } + + clearRect() { + console.warn("to be implemented: contextPrototype.clearRect"); + } + + beginPath() { + // TODO: Branch current matrix so that save/restore has no effect + // as per safari docs. + this.currentPath_ = []; + } + + moveTo(aX, aY) { + let p = this.getCoords_(aX, aY); + this.currentPath_.push({type:'moveTo', x:p.x, y:p.y}); + this.currentX_ = p.x; + this.currentY_ = p.y; + } + + lineTo(aX, aY) { + let p = this.getCoords_(aX, aY); + this.currentPath_.push({type:'lineTo', x:p.x, y:p.y}); + + this.currentX_ = p.x; + this.currentY_ = p.y; + } + + bezierCurveTo(aCP1x, aCP1y, aCP2x, aCP2y, aX, aY) { + let p = this.getCoords_(aX, aY); + let cp1 = this.getCoords_(aCP1x, aCP1y); + let cp2 = this.getCoords_(aCP2x, aCP2y); + bezierCurveToHelper(this, cp1, cp2, p); + } + + quadraticCurveTo(aCPx, aCPy, aX, aY) { + // the following is lifted almost directly from + // http://developer.mozilla.org/en/docs/Canvas_tutorial:Drawing_shapes + + let cp = this.getCoords_(aCPx, aCPy); + let p = this.getCoords_(aX, aY); + + let cp1 = { + x:this.currentX_ + 2.0 / 3.0 * (cp.x - this.currentX_), + y:this.currentY_ + 2.0 / 3.0 * (cp.y - this.currentY_) + }; + let cp2 = { + x:cp1.x + (p.x - this.currentX_) / 3.0, + y:cp1.y + (p.y - this.currentY_) / 3.0 + }; + + bezierCurveToHelper(this, cp1, cp2, p); + } + + arc(aX, aY, aRadius, aStartAngle, aEndAngle, aClockwise) { + let arcType = aClockwise ? 'at' : 'wa'; + + let xStart = aX + mc(aStartAngle) * aRadius; + let yStart = aY + ms(aStartAngle) * aRadius; + + let xEnd = aX + mc(aEndAngle) * aRadius; + let yEnd = aY + ms(aEndAngle) * aRadius; + + // IE won't render arches drawn counter clockwise if xStart == xEnd. + if (xStart == xEnd && !aClockwise) { + xStart += 0.125; // Offset xStart by 1/80 of a pixel. Use something + // that can be represented in binary + } + + let p = this.getCoords_(aX, aY); + let pStart = this.getCoords_(xStart, yStart); + let pEnd = this.getCoords_(xEnd, yEnd); + + this.currentPath_.push({type:arcType, + x:p.x, + y:p.y, + radius:aRadius, + xStart:pStart.x, + yStart:pStart.y, + xEnd:pEnd.x, + yEnd:pEnd.y}); + } + + rect(aX, aY, aWidth, aHeight) { + if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { + return;//try to remove the rectangle behind radio buttons and checkboxes + } + + this.moveTo(aX, aY); + this.lineTo(aX + aWidth, aY); + this.lineTo(aX + aWidth, aY + aHeight); + this.lineTo(aX, aY + aHeight); + this.closePath(); + } + + strokeRect(aX, aY, aWidth, aHeight) { + if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { + return;//try to remove the rectangle behind radio buttons and checkboxes + } + + let oldPath = this.currentPath_; + this.beginPath(); + + this.moveTo(aX, aY); + this.lineTo(aX + aWidth, aY); + this.lineTo(aX + aWidth, aY + aHeight); + this.lineTo(aX, aY + aHeight); + this.closePath(); + this.stroke(); + + this.currentPath_ = oldPath; + } + + fillRect(aX, aY, aWidth, aHeight) { + if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { + return;//try to remove the rectangle behind radio buttons and checkboxes + } + + let oldPath = this.currentPath_; + this.beginPath(); + + this.moveTo(aX, aY); + this.lineTo(aX + aWidth, aY); + this.lineTo(aX + aWidth, aY + aHeight); + this.lineTo(aX, aY + aHeight); + this.closePath(); + this.fill(); + + this.currentPath_ = oldPath; + } + + createLinearGradient(aX0, aY0, aX1, aY1) { + let gradient = new CanvasGradient_('gradient'); + gradient.x0_ = aX0; + gradient.y0_ = aY0; + gradient.x1_ = aX1; + gradient.y1_ = aY1; + return gradient; + } + + createRadialGradient(aX0, aY0, aR0, aX1, aY1, aR1) { + let gradient = new CanvasGradient_('gradientradial'); + gradient.x0_ = aX0; + gradient.y0_ = aY0; + gradient.r0_ = aR0; + gradient.x1_ = aX1; + gradient.y1_ = aY1; + gradient.r1_ = aR1; + return gradient; + } + + drawImage(image, var_args) { + //MQZ. no image drawing support for now + } + + getImageData(x, y, w, h) { + //MQZ. returns empty data buffer for now + return { + width:w, + height:h, + data:new Uint8Array(w * h * 4) + }; + } + + stroke(aFill) { + if (this.currentPath_.length < 2) { + return; + } + + let a = processStyle(aFill ? this.fillStyle : this.strokeStyle); + let color = a.color; +// let opacity = a.alpha * this.globalAlpha; + let lineWidth = this.lineScale_ * this.lineWidth; + + let min = {x:null, y:null}; + let max = {x:null, y:null}; + + for (let i = 0; i < this.currentPath_.length; i++) { + let p = this.currentPath_[i]; + + switch (p.type) { + case 'moveTo': + break; + case 'lineTo': + if (!aFill) { //lines + if (i > 0) { + this.#drawPDFLine(this.currentPath_[i-1], p, lineWidth, color); + } + } + break; + case 'close': + if (!aFill) { //lines + if (i > 0) { + this.#drawPDFLine(this.currentPath_[i-1], this.currentPath_[0], lineWidth, color); + } + } + p = null; + break; + case 'bezierCurveTo': + break; + case 'at': + case 'wa': + break; + } - if (field.alternativeID && field.alternativeID.length > 1) { - anData.TM = field.alternativeID; - } + // Figure out dimensions so we can set fills' coordinates correctly + if (aFill && p) { + if (min.x == null || p.x < min.x) { + min.x = p.x; + } + if (max.x == null || p.x > max.x) { + max.x = p.x; + } + if (min.y == null || p.y < min.y) { + min.y = p.y; + } + if (max.y == null || p.y > max.y) { + max.y = p.y; + } + } + } + + if (aFill) { //fill + this.#drawPDFFill(min, min, max, color); + } + } + + fill() { + this.stroke(true); + } + + closePath() { + this.currentPath_.push({type:'close'}); + } + + /** + * @private + */ + getCoords_ (aX, aY) { + let m = this.m_; + return { + x: (aX * m[0][0] + aY * m[1][0] + m[2][0]), + y: (aX * m[0][1] + aY * m[1][1] + m[2][1]) + }; + } + + save() { + let o = {}; + copyState(this, o); + this.aStack_.push(o); + this.mStack_.push(this.m_); + this.m_ = matrixMultiply(createMatrixIdentity(), this.m_); + } + + restore() { + copyState(this.aStack_.pop(), this); + this.m_ = this.mStack_.pop(); + } + + translate(aX, aY) { + let m1 = [ + [1, 0, 0], + [0, 1, 0], + [aX, aY, 1] + ]; + + setM(this, matrixMultiply(m1, this.m_), false); + } + + rotate(aRot) { + let c = mc(aRot); + let s = ms(aRot); + + let m1 = [ + [c, s, 0], + [-s, c, 0], + [0, 0, 1] + ]; + + setM(this, matrixMultiply(m1, this.m_), false); + } + + scale(aX, aY) { + this.arcScaleX_ *= aX; + this.arcScaleY_ *= aY; + let m1 = [ + [aX, 0, 0], + [0, aY, 0], + [0, 0, 1] + ]; + + setM(this, matrixMultiply(m1, this.m_), true); + } + + transform(m11, m12, m21, m22, dx, dy) { + let m1 = [ + [m11, m12, 0], + [m21, m22, 0], + [dx, dy, 1] + ]; + + setM(this, matrixMultiply(m1, this.m_), true); + } + + setTransform(m11, m12, m21, m22, dx, dy) { + let m = [ + [m11, m12, 0], + [m21, m22, 0], + [dx, dy, 1] + ]; + + setM(this, m, true); + } + + /******** STUBS ********/ + clip() { + // TODO: Implement + } + + arcTo() { + // TODO: Implement + } + + createPattern() { + return new CanvasPattern_(); + } +} - return Object.assign(anData, this.#getFieldPosition(field)); - } +const kFBANotOverridable = 0x00000400; // indicates the field is read only by the user +const kFBARequired = 0x00000010; // indicates the field is required +const kMinHeight = 20; - #addAlpha(field) { - const anData = Object.assign( - { +class PDFField { + static tabIndex = 0; + + static isWidgetSupported(field) { + let retVal = false; + + switch(field.fieldType) { + case 'Tx': retVal = true; break; //text input + case 'Btn': + if (field.fieldFlags & 32768) { + field.fieldType = 'Rd'; //radio button + } + else if (field.fieldFlags & 65536) { + field.fieldType = 'Btn'; //push button + } + else { + field.fieldType = 'Cb'; //checkbox + } + retVal = true; + break; + case 'Ch': retVal = true; break; //drop down + case 'Sig': retVal = true; break; //signature + default: + nodeUtil.p2jwarn("Unsupported: field.fieldType of " + field.fieldType); + break; + } + + return retVal; + } + + static isFormElement(field) { + let retVal = false; + switch(field.subtype) { + case 'Widget': retVal = PDFField.isWidgetSupported(field); break; + default: + nodeUtil.p2jwarn("Unsupported: field.type of " + field.subtype); + break; + } + return retVal; + } + + // constructor + constructor(field, viewport, Fields, Boxsets) { + this.field = field; + this.viewport = viewport; + this.Fields = Fields; + this.Boxsets = Boxsets; + } + + // Normalize rectangle rect=[x1, y1, x2, y2] so that (x1,y1) < (x2,y2) + // For coordinate systems whose origin lies in the bottom-left, this + // means normalization to (BL,TR) ordering. For systems with origin in the + // top-left, this means (TL,BR) ordering. + static #normalizeRect(rect) { + const r = rect.slice(0); // clone rect + if (rect[0] > rect[2]) { + r[0] = rect[2]; + r[2] = rect[0]; + } + if (rect[1] > rect[3]) { + r[1] = rect[3]; + r[3] = rect[1]; + } + return r; + } + + #getFieldPosition(field) { + let viewPort = this.viewport; + let fieldRect = viewPort.convertToViewportRectangle(field.rect); + let rect = PDFField.#normalizeRect(fieldRect); + + let height = rect[3] - rect[1]; + if (field.fieldType === 'Tx') { + if (height > kMinHeight + 2) { + rect[1] += 2; + height -= 2; + } + } + else if (field.fieldType !== 'Ch') { //checkbox, radio button, and link button + rect[1] -= 3; + } + + height = (height >= kMinHeight) ? height : kMinHeight; + + return { + x: PDFUnit.toFormX(rect[0]), + y: PDFUnit.toFormY(rect[1]), + w: PDFUnit.toFormX(rect[2] - rect[0]), + h: PDFUnit.toFormY(height) + }; + } + + #getFieldBaseData(field) { + let attributeMask = 0; + //PDF Spec p.676 TABLE 8.70 Field flags common to all field types + if (field.fieldFlags & 0x00000001) { + attributeMask |= kFBANotOverridable; + } + if (field.fieldFlags & 0x00000002) { + attributeMask |= kFBARequired; + } + + let anData = { + id: { Id: field.fullName, EN: 0}, + TI: field.TI, + AM: attributeMask + }; + //PDF Spec p.675: add TU (AlternativeText) fields to provide accessibility info + if (field.alternativeText && field.alternativeText.length > 1) { + anData.TU = field.alternativeText; + } + + if (field.alternativeID && field.alternativeID.length > 1) { + anData.TM = field.alternativeID; + } + + return Object.assign(anData, this.#getFieldPosition(field)); + } + + #addAlpha(field) { + const anData = Object.assign({ style: 48, T: { - Name: field.TName || 'alpha', - TypeInfo: {}, - }, - }, - this.#getFieldBaseData(field) - ); + Name: field.TName || "alpha", + TypeInfo: {} + } + }, this.#getFieldBaseData(field)); - if (field.MV) { - //field attributes: arbitrary mask value - anData.MV = field.MV; - } - if (field.fieldValue) { - anData.V = field.fieldValue; //read-only field value, like "self-prepared" - } + if (field.MV) { //field attributes: arbitrary mask value + anData.MV = field.MV; + } + if (field.fieldValue) { + anData.V = field.fieldValue; //read-only field value, like "self-prepared" + } - this.Fields.push(anData); - } + this.Fields.push(anData); + } - #addCheckBox(box) { - const anData = Object.assign( - { + #addCheckBox(box) { + const anData = Object.assign({ style: 48, T: { - Name: 'box', - TypeInfo: {}, - }, - }, - this.#getFieldBaseData(box) - ); - if (box.fieldValue) { - anData.checked = box.fieldValue !== 'Off'; - } + Name: "box", + TypeInfo: {} + } + }, this.#getFieldBaseData(box)); + if(box.fieldValue) { + anData.checked = box.fieldValue !== 'Off'; + } - this.Boxsets.push({ boxes: [anData] }); - } + this.Boxsets.push({boxes:[anData]}); + } - #addRadioButton(box) { - const anData = Object.assign( - { + #addRadioButton(box) { + const anData = Object.assign({ style: 48, T: { - Name: 'box', - TypeInfo: {}, - }, - }, - this.#getFieldBaseData(box) - ); - - anData.id.Id = box.value; - if ('checked' in box) { - anData.checked = box.checked; - } - - const rdGroup = this.Boxsets.filter( - (boxset) => - 'id' in boxset && 'Id' in boxset.id && boxset.id.Id === box.fullName - )[0]; - if (!!rdGroup && 'boxes' in rdGroup) { - rdGroup.boxes.push(anData); - } else { - this.Boxsets.push({ - boxes: [anData], - id: { Id: box.fullName, EN: 0 }, - }); - } - } - - #addLinkButton(field) { - const anData = Object.assign( - { + Name: "box", + TypeInfo: {} + } + }, this.#getFieldBaseData(box)); + + anData.id.Id = box.value; + if ('checked' in box) { + anData.checked = box.checked; + } + + const rdGroup = this.Boxsets.filter(boxset => ('id' in boxset) && ('Id' in boxset.id) && (boxset.id.Id === box.fullName))[0]; + if ((!!rdGroup) && ('boxes' in rdGroup)) { + rdGroup.boxes.push(anData); + } + else { + this.Boxsets.push({boxes:[anData], id: { Id: box.fullName, EN: 0}}); + } + } + + #addLinkButton(field) { + const anData = Object.assign({ style: 48, T: { - Name: 'link', + Name: "link" }, FL: { - form: { Id: field.FL }, - }, - }, - this.#getFieldBaseData(field) - ); + form: {Id: field.FL} + } + }, this.#getFieldBaseData(field)); - this.Fields.push(anData); - } + this.Fields.push(anData); + } - #addSelect(field) { - const anData = Object.assign( - { + #addSelect(field) { + const anData = Object.assign({ style: 48, T: { - Name: 'alpha', - TypeInfo: {}, - }, - }, - this.#getFieldBaseData(field) - ); - - anData.w -= 0.5; //adjust combobox width - anData.PL = { V: [], D: [] }; - field.value.forEach((ele, idx) => { - if (Array.isArray(ele)) { - anData.PL.D.push(ele[0]); - anData.PL.V.push(ele[1]); - } else { - anData.PL.D.push(ele); - anData.PL.V.push(ele); - } - }); - - // add field value to the object - if (field.fieldValue) { - anData.V = field.fieldValue; - } - this.Fields.push(anData); - } - - #addSignature(field) { - const anData = Object.assign( - { + Name: "alpha", + TypeInfo: {} + } + }, this.#getFieldBaseData(field)); + + anData.w -= 0.5; //adjust combobox width + anData.PL = {V: [], D: []}; + field.value.forEach( (ele, idx) => { + if (Array.isArray(ele)) { + anData.PL.D.push(ele[0]); + anData.PL.V.push(ele[1]); + } else { + anData.PL.D.push(ele); + anData.PL.V.push(ele); + } + }); + + // add field value to the object + if (field.fieldValue) { + anData.V = field.fieldValue; + } + this.Fields.push(anData); + }; + + #addSignature(field) { + const anData = Object.assign({ style: 48, T: { - Name: 'signature', - TypeInfo: {}, - }, - }, - this.#getFieldBaseData(field) - ); - - if (field.Sig) { - anData.Sig = {}; - if (field.Sig.Name) anData.Sig.Name = field.Sig.Name; - if (field.Sig.M) anData.Sig.M = PDFUnit.dateToIso8601(field.Sig.M); - if (field.Sig.Location) anData.Sig.Location = field.Sig.Location; - if (field.Sig.Reason) anData.Sig.Reason = field.Sig.Reason; - if (field.Sig.ContactInfo) - anData.Sig.ContactInfo = field.Sig.ContactInfo; - } - - this.Fields.push(anData); - } - - // public instance methods - processField() { - this.field.TI = PDFField.tabIndex++; - - switch (this.field.fieldType) { - case 'Tx': - this.#addAlpha(this.field); - break; - case 'Cb': - this.#addCheckBox(this.field); - break; - case 'Rd': - this.#addRadioButton(this.field); - break; - case 'Btn': - this.#addLinkButton(this.field); - break; - case 'Ch': - this.#addSelect(this.field); - break; - case 'Sig': - this.#addSignature(this.field); - break; - } - - this.clean(); - } - - clean() { - delete this.field; - delete this.viewport; - delete this.Fields; - delete this.Boxsets; - } - - //static public method to generate fieldsType object based on parser result - static getAllFieldsTypes(data) { - const isFieldReadOnly = (field) => { - return field.AM & kFBANotOverridable ? true : false; - }; - - const getFieldBase = (field) => { - return { - id: field.id.Id, - type: field.T.Name, - calc: isFieldReadOnly(field), - value: field.V || '', - }; - }; - - let retVal = []; - data.Pages.forEach((page) => { - page.Boxsets.forEach((boxsets) => { - if (boxsets.boxes.length > 1) { - //radio button - boxsets.boxes.forEach((box) => { - retVal.push({ - id: boxsets.id.Id, - type: 'radio', - calc: isFieldReadOnly(box), - value: box.id.Id, - }); - }); - } else { - //checkbox - retVal.push(getFieldBase(boxsets.boxes[0])); + Name: "signature", + TypeInfo: {} } - }); - - page.Fields.forEach((field) => retVal.push(getFieldBase(field))); - }); - return retVal; - } + }, this.#getFieldBaseData(field)); + + if (field.Sig) { + anData.Sig = {}; + if (field.Sig.Name) anData.Sig.Name = field.Sig.Name; + if (field.Sig.M) anData.Sig.M = PDFUnit.dateToIso8601(field.Sig.M); + if (field.Sig.Location) anData.Sig.Location = field.Sig.Location; + if (field.Sig.Reason) anData.Sig.Reason = field.Sig.Reason; + if (field.Sig.ContactInfo) anData.Sig.ContactInfo = field.Sig.ContactInfo; + } + + this.Fields.push(anData); + } + + // public instance methods + processField() { + this.field.TI = PDFField.tabIndex++; + + switch(this.field.fieldType) { + case 'Tx': this.#addAlpha(this.field); break; + case 'Cb': this.#addCheckBox(this.field); break; + case 'Rd': this.#addRadioButton(this.field);break; + case 'Btn':this.#addLinkButton(this.field); break; + case 'Ch': this.#addSelect(this.field); break; + case 'Sig': this.#addSignature(this.field); break; + } + + this.clean(); + } + + clean() { + delete this.field; + delete this.viewport; + delete this.Fields; + delete this.Boxsets; + } + + //static public method to generate fieldsType object based on parser result + static getAllFieldsTypes(data) { + const isFieldReadOnly = field => { + return (field.AM & kFBANotOverridable) ? true : false; + }; + + const getFieldBase = field => { + return {id: field.id.Id, type: field.T.Name, calc: isFieldReadOnly(field), value: field.V || ""}; + }; + + let retVal = []; + data.Pages.forEach( page => { + page.Boxsets.forEach( boxsets => { + if (boxsets.boxes.length > 1) { //radio button + boxsets.boxes.forEach( box => { + retVal.push({id: boxsets.id.Id, type: "radio", calc: isFieldReadOnly(box), value: box.id.Id}); + }); + } + else { //checkbox + retVal.push(getFieldBase(boxsets.boxes[0])); + } + }); + + page.Fields.forEach(field => retVal.push(getFieldBase(field))); + + }); + return retVal; + } } //BEGIN - MQZ 9/19/2012. Helper functions to parse acroForm elements function setupRadioButton(annotation, item) { - let asName = ''; - //PDF Spec p.689: parent item's DV holds the item's value that is selected by default - let po = annotation.get('Parent'); - if (po) { - po.forEach(function (key, val) { - if (key === 'DV') { - asName = val.name || ''; - } else if (key === 'TU') { - //radio buttons use the alternative text from the parent - item.alternativeText = val; - } else if (key == 'TM') { - item.alternativeID = val; - } - }); - } - - //PDF Spec p.606: get appearance dictionary - let ap = annotation.get('AP'); - //PDF Spec p.614 get normal appearance - let nVal = ap.get('N'); - //PDF Spec p.689 - nVal.forEach(function (key, value) { - if (key.toLowerCase() != 'off') { - //value if selected - item.value = key; //export value - item.checked = key === asName; //initial selection state - } - }); - - if (!item.value) item.value = 'off'; + let asName = ''; + //PDF Spec p.689: parent item's DV holds the item's value that is selected by default + let po = annotation.get('Parent'); + if (po) { + po.forEach(function(key, val){ + if (key === 'DV') { + asName = val.name || ''; + } + else if (key === 'TU') { + //radio buttons use the alternative text from the parent + item.alternativeText = val; + } else if( key == 'TM') { + item.alternativeID = val; + } + }); + } + + //PDF Spec p.606: get appearance dictionary + let ap = annotation.get('AP'); + //PDF Spec p.614 get normal appearance + let nVal = ap.get('N'); + //PDF Spec p.689 + nVal.forEach(function (key, value) { + if (key.toLowerCase() != "off") { + //value if selected + item.value = key; //export value + item.checked = (key === asName); //initial selection state + } + }); + + if (!item.value) + item.value = "off"; } function setupPushButton(annotation, item) { - //button label: PDF Spec p.640 - let mk = annotation.get('MK'); - if (mk) { - item.value = mk.get('CA') || ''; - } - - //button action: url when mouse up: PDF Spec:p.642 - item.FL = ''; - let ap = annotation.get('A'); - if (ap) { - let sp = ap.get('S'); - item.FL = ap.get(sp.name); - } + //button label: PDF Spec p.640 + let mk = annotation.get('MK'); + if(mk) { + item.value = mk.get('CA') || ''; + } + + //button action: url when mouse up: PDF Spec:p.642 + item.FL = ""; + let ap = annotation.get('A'); + if (ap) { + let sp = ap.get('S'); + item.FL = ap.get(sp.name); + } } function setupCheckBox(annotation, item) { - //PDF Spec p.606: get appearance dictionary - let ap = annotation.get('AP'); - //PDF Spec p.614 get normal appearance - let nVal = ap.get('N'); - - //PDF Spec p.689 - let i = 0; - nVal.forEach(function (key, value) { - i++; - if (i == 1) - //initial selection state - item.value = key; - }); + //PDF Spec p.606: get appearance dictionary + let ap = annotation.get('AP'); + //PDF Spec p.614 get normal appearance + let nVal = ap.get('N'); + + //PDF Spec p.689 + let i = 0; + nVal.forEach(function (key, value) { + i++; + if (i == 1) //initial selection state + item.value = key; + }); } function setupDropDown(annotation, item) { - //PDF Spec p.688 - item.value = annotation.get('Opt') || []; + //PDF Spec p.688 + item.value = annotation.get('Opt') || []; } function setupFieldAttributes(annotation, item) { - //MQZ. Jan.03.2013. additional-actions dictionary - //PDF Spec P.648. 8.5.2. Trigger Events - let aa = annotation.get('AA'); - if (!aa) { - return; - } - - //PDF Spec p.651 get format dictionary - let nVal = aa.get('F'); - if (!nVal) { - nVal = aa.get('K'); - if (!nVal) return; - } - - nVal.forEach(function (key, value) { - if (key === 'JS') { - processFieldAttribute(value, item); - } - }); + //MQZ. Jan.03.2013. additional-actions dictionary + //PDF Spec P.648. 8.5.2. Trigger Events + let aa = annotation.get('AA'); + if (!aa) { + return; + } + + //PDF Spec p.651 get format dictionary + let nVal = aa.get('F'); + if (!nVal) { + nVal = aa.get('K'); + if (!nVal) + return; + } + + nVal.forEach(function (key, value) { + if (key === "JS") { + processFieldAttribute(value, item); + } + }); } const AFSpecial_Format = ['zip', 'zip', 'phone', 'ssn', '']; @@ -1787,207 +1602,209 @@ const AFSpecial_Format = ['zip', 'zip', 'phone', 'ssn', '']; // let AFDate_FormatEx = ["m/d", "m/d/yy", "mm/dd/yy", "mm/yy", "d-mmm", "d-mmm-yy", "dd-mmm-yy", "yymm-dd", "mmm-yy", "mmmm-yy", "mmm d, yyyy", "mmmm d, yyyy", "m/d/yy h:MM tt", "m/d/yy HH:MM"]; function processFieldAttribute(jsFuncName, item) { - if (item.hasOwnProperty('TName')) return; - - if (!jsFuncName.split) return; - - let vParts = jsFuncName.split('('); - if (vParts.length !== 2) return; - - let funcName = vParts[0]; - let funcParam = vParts[1].split(')')[0]; - - switch (funcName) { - case 'AFSpecial_Format': - item.TName = AFSpecial_Format[Number(funcParam)]; - break; - case 'AFNumber_Format': - // nfs = funcParam.split(','); - //set the Money fields to use the Number type with no decimal places after, no commas, and bCurrencyPrepend is set as true; (o use a negative sign (fits the PDF layout and our print formatting as well). - // if (nfs[0] === '0' && nfs[1] === '1' && nfs[5]) - // item.TName = 'money'; - // else - item.TName = 'number'; - break; - case 'AFDate_FormatEx': - item.TName = 'date'; - item.MV = funcParam.replace(/^'+|^"+|'+$|"+$/g, ''); //mask value - break; - case 'AFSpecial_KeystrokeEx': //special format: "arbitrary mask" - let maskValue = funcParam.replace(/^'+|^"+|'+$|"+$/g, ''); //mask value - if (!!maskValue && maskValue.length > 0 && maskValue.length < 64) { - item.TName = 'mask'; //fixed length input - item.MV = maskValue; - } - break; - case 'AFPercent_Format': - item.TName = 'percent'; //funcParam => 2, 0, will specified how many decimal places - break; - } + if (item.hasOwnProperty('TName')) + return; + + if(!jsFuncName.split) + return; + + let vParts = jsFuncName.split('('); + if (vParts.length !== 2) + return; + + let funcName = vParts[0]; + let funcParam = vParts[1].split(')')[0]; + + switch (funcName) { + case 'AFSpecial_Format': + item.TName = AFSpecial_Format[Number(funcParam)]; + break; + case 'AFNumber_Format': +// nfs = funcParam.split(','); +//set the Money fields to use the Number type with no decimal places after, no commas, and bCurrencyPrepend is set as true; (o use a negative sign (fits the PDF layout and our print formatting as well). +// if (nfs[0] === '0' && nfs[1] === '1' && nfs[5]) +// item.TName = 'money'; +// else + item.TName = 'number'; + break; + case 'AFDate_FormatEx': + item.TName = 'date'; + item.MV = funcParam.replace(/^'+|^"+|'+$|"+$/g,''); //mask value + break; + case 'AFSpecial_KeystrokeEx': //special format: "arbitrary mask" + let maskValue = funcParam.replace(/^'+|^"+|'+$|"+$/g,''); //mask value + if ((!!maskValue) && maskValue.length > 0 && maskValue.length < 64) { + item.TName = 'mask'; //fixed length input + item.MV = maskValue; + } + break; + case 'AFPercent_Format': + item.TName = 'percent'; //funcParam => 2, 0, will specified how many decimal places + break; + } } function setupSignature(annotation, item) { - //PDF Spec p.695: field value is signature dict if signed - let sig = annotation.get('V'); - if (!sig) return; - - //PDF Spec p.728: get signature information - item.Sig = {}; - let name = sig.get('Name'); - if (name) item.Sig.Name = name; - let time = sig.get('M'); - if (time) item.Sig.M = time; - let location = sig.get('Location'); - if (location) item.Sig.Location = location; - let reason = sig.get('Reason'); - if (reason) item.Sig.Reason = reason; - let contactInfo = sig.get('ContactInfo'); - if (contactInfo) item.Sig.ContactInfo = contactInfo; + //PDF Spec p.695: field value is signature dict if signed + let sig = annotation.get('V'); + if (!sig) return; + + //PDF Spec p.728: get signature information + item.Sig = {}; + let name = sig.get('Name'); + if (name) item.Sig.Name = name; + let time = sig.get('M'); + if (time) item.Sig.M = time; + let location = sig.get('Location'); + if (location) item.Sig.Location = location; + let reason = sig.get('Reason'); + if (reason) item.Sig.Reason = reason; + let contactInfo = sig.get('ContactInfo'); + if (contactInfo) item.Sig.ContactInfo = contactInfo; } //END - MQZ 9/19/2012. Helper functions to parse acroForm elements class PDFAnno { - static processAnnotation(annotation, item) { - if (item.fieldType == 'Btn') { - //PDF Spec p.675 - if (item.fieldFlags & 32768) { - setupRadioButton(annotation, item); - } else if (item.fieldFlags & 65536) { - setupPushButton(annotation, item); - } else { - setupCheckBox(annotation, item); - } - } else if (item.fieldType == 'Ch') { - setupDropDown(annotation, item); - } else if (item.fieldType == 'Tx') { - setupFieldAttributes(annotation, item); - } else if (item.fieldType === 'Sig') { - setupSignature(annotation, item); - } else { - nodeUtil.p2jwarn('Unknown fieldType: ', item); - } - } + static processAnnotation(annotation, item) { + if (item.fieldType == 'Btn') { //PDF Spec p.675 + if (item.fieldFlags & 32768) { + setupRadioButton(annotation, item); + } + else if (item.fieldFlags & 65536) { + setupPushButton(annotation, item); + } + else { + setupCheckBox(annotation, item); + } + } + else if (item.fieldType == 'Ch') { + setupDropDown(annotation, item); + } + else if (item.fieldType == 'Tx') { + setupFieldAttributes(annotation, item); + } + else if (item.fieldType === 'Sig') { + setupSignature(annotation, item); + } + else { + nodeUtil.p2jwarn("Unknown fieldType: ", item); + } + } } class PDFImage { - #_src = ''; - #_onload = null; + #_src = ''; + #_onload = null; - set onload(val) { - this.#_onload = typeof val === 'function' ? val : null; - } + set onload(val) { + this.#_onload = typeof val === 'function' ? val : null; + } - get onload() { - return this.#_onload; - } + get onload() { + return this.#_onload; + } - set src(val) { - this.#_src = val; - if (this.#_onload) this.#_onload(); - } + set src(val) { + this.#_src = val; + if (this.#_onload) this.#_onload(); + } - get src() { - return this.#_src; - } + get src() { + return this.#_src; + } - btoa(val) { - if (typeof window === 'undefined') { - return new Buffer.from(val, 'ascii').toString('base64'); - } else if (typeof window.btoa === 'function') return window.btoa(val); + btoa(val) { + if (typeof window === 'undefined') { + return (new Buffer.from(val, 'ascii')).toString('base64'); + } + else if (typeof window.btoa === 'function') + return window.btoa(val); + + return ""; + } - return ''; - } } class PTIXmlParser { - xmlData = null; - ptiPageArray = []; - - // constructor - constructor() { - this.xmlData = null; - this.ptiPageArray = []; - } - - parseXml(filePath, callback) { - fs.readFile(filePath, 'utf8', (err, data) => { - if (err) { - callback(err); - } else { - this.xmlData = data; - - var parser = new xmldom.DOMParser(); - var dom = parser.parseFromString(this.xmlData); - var root = dom.documentElement; - - var xmlFields = root.getElementsByTagName('field'); - var fields = []; - - for (var i = 0; i < xmlFields.length; i++) { - var id = xmlFields[i].getAttribute('id'); - var xPos = xmlFields[i].getAttribute('x'); - var yPos = xmlFields[i].getAttribute('y'); - var width = xmlFields[i].getAttribute('width'); - var height = xmlFields[i].getAttribute('height'); - var type = xmlFields[i].getAttribute('xsi:type'); - var page = xmlFields[i].getAttribute('page'); - var fontName = xmlFields[i].getAttribute('fontName'); - var fontSize = xmlFields[i].getAttribute('fontSize'); - - var item = {}; - - var rectLeft = parseInt(xPos) - 21; //was 23.5 - var rectTop = parseInt(yPos) - 20; //was 23 - var rectRight = parseInt(rectLeft) + parseInt(width) - 4; - var rectBottom = parseInt(rectTop) + parseInt(height) - 4; - - item.fieldType = 'Tx'; - if (type == 'Boolean') { - item.fieldType = 'Btn'; - } else if (type == 'SSN' || type == 'Phone' || type == 'zip') { - item.TName = type.toLowerCase(); - } - item.alternativeText = ''; - item.fullName = id; - item.fontSize = fontSize; - item.subtype = 'Widget'; - - item.rect = [rectLeft, rectTop, rectRight, rectBottom]; - - fields.push(item); - - this.ptiPageArray[parseInt(page)] = fields; - } - } - callback(); - }); - } - - getFields(pageNum) { - return this.ptiPageArray[pageNum]; - } + xmlData = null; + ptiPageArray = []; + + // constructor + constructor() { + this.xmlData = null; + this.ptiPageArray = []; + } + + parseXml(filePath, callback) { + fs.readFile(filePath, 'utf8', (err, data) => { + if (err) { + callback(err); + } + else { + this.xmlData = data; + + var parser = new xmldom.DOMParser(); + var dom = parser.parseFromString(this.xmlData); + var root = dom.documentElement; + + var xmlFields = root.getElementsByTagName("field"); + var fields = []; + + for(var i=0;i resolve(outputPath)); - outputStream.on('error', (err) => reject(err)); - return outputStream; - } - - #pdfParser = null; - #chunks = []; - #parsedData = { Pages: [] }; - #_flush_callback = null; - - constructor(pdfParser, options) { - super(options); - this.#pdfParser = pdfParser; - - this.#chunks = []; - - // this.#pdfParser.on("pdfParser_dataReady", evtData => { - // this.push(evtData); - // this.#_flush_callback(); - // this.emit('end', null); - // }); - this.#pdfParser.on( - 'readable', - (meta) => (this.#parsedData = { ...meta, Pages: [] }) - ); - this.#pdfParser.on('data', (page) => { - if (!page) { - this.push(this.#parsedData); - this.#_flush_callback(); - } else this.#parsedData.Pages.push(page); - }); - } - - //implements transform stream - _transform(chunk, enc, callback) { - this.#chunks.push( - Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk, enc) - ); - callback(); - } - - _flush(callback) { - this.#_flush_callback = callback; - this.#pdfParser.parseBuffer(Buffer.concat(this.#chunks)); - } + static createContentStream(jsonObj) { + const rStream = new stream.Readable({objectMode: true}); + rStream.push(jsonObj); + rStream.push(null); + return rStream; + } + + static createOutputStream(outputPath, resolve, reject) { + const outputStream = fs.createWriteStream(outputPath); + outputStream.on('finish', () => resolve(outputPath)); + outputStream.on('error', err => reject(err) ); + return outputStream; + } + + #pdfParser = null; + #chunks = []; + #parsedData = {Pages:[]}; + #_flush_callback = null; + + constructor(pdfParser, options) { + super(options); + this.#pdfParser = pdfParser; + + this.#chunks = []; + + // this.#pdfParser.on("pdfParser_dataReady", evtData => { + // this.push(evtData); + // this.#_flush_callback(); + // this.emit('end', null); + // }); + this.#pdfParser.on("readable", meta => this.#parsedData = {...meta, Pages:[]}); + this.#pdfParser.on("data", page => { + if (!page) { + this.push(this.#parsedData); + this.#_flush_callback(); + } + else + this.#parsedData.Pages.push(page); + }); + } + + //implements transform stream + _transform(chunk, enc, callback) { + this.#chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk, enc)); + callback(); + } + + _flush(callback) { + this.#_flush_callback = callback; + this.#pdfParser.parseBuffer(Buffer.concat(this.#chunks)); + } + + _destroy() { + super.removeAllListeners(); + this.#pdfParser = null; + this.#chunks = []; + } +} - _destroy() { - super.removeAllListeners(); - this.#pdfParser = null; - this.#chunks = []; - } -} class StringifyStream extends stream.Transform { - constructor(options) { - super(options); + constructor(options) { + super(options); - this._readableState.objectMode = false; - this._writableState.objectMode = true; - } + this._readableState.objectMode = false; + this._writableState.objectMode = true; + } - _transform(obj, encoding, callback) { - this.push(JSON.stringify(obj)); - callback(); - } + _transform(obj, encoding, callback){ + this.push(JSON.stringify(obj)); + callback(); + } } -class PDFParser extends events.EventEmitter { - // inherit from event emitter - //public static - static get colorDict() { - return kColors; - } - static get fontFaceDict() { - return kFontFaces; - } - static get fontStyleDict() { - return kFontStyles; - } - - //private static - static #maxBinBufferCount = 10; - static #binBuffer = {}; - - //private - #password = ''; - - #context = null; // service context object, only used in Web Service project; null in command line - - #pdfFilePath = null; //current PDF file to load and parse, null means loading/parsing not started - #pdfFileMTime = null; // last time the current pdf was modified, used to recognize changes and ignore cache - #data = null; //if file read success, data is PDF content; if failed, data is "err" object - #PDFJS = null; //will be initialized in constructor - #processFieldInfoXML = false; //disable additional _fieldInfo.xml parsing and merging (do NOT set to true) - - // constructor - constructor(context, needRawText, password) { - //call constructor for super class - super(); - - // private - // service context object, only used in Web Service project; null in command line - this.#context = context; - - this.#pdfFilePath = null; //current PDF file to load and parse, null means loading/parsing not started - this.#pdfFileMTime = null; // last time the current pdf was modified, used to recognize changes and ignore cache - this.#data = null; //if file read success, data is PDF content; if failed, data is "err" object - this.#processFieldInfoXML = false; //disable additional _fieldInfo.xml parsing and merging (do NOT set to true) - - this.#PDFJS = new PDFJSClass(needRawText); - this.#password = password; - } - - //private methods, needs to invoked by [funcName].call(this, ...) - #onPDFJSParseDataReady(data) { - if (!data) { - //v1.1.2: data===null means end of parsed data - nodeUtil.p2jinfo('PDF parsing completed.'); - this.emit('pdfParser_dataReady', this.#data); - } else { - this.#data = { ...this.#data, ...data }; - } - } - - #onPDFJSParserDataError(err) { - this.#data = null; - this.emit('pdfParser_dataError', { parserError: err }); - // this.emit("error", err); - } - - #startParsingPDF(buffer) { - this.#data = {}; - - this.#PDFJS.on('pdfjs_parseDataReady', (data) => - this.#onPDFJSParseDataReady(data) - ); - this.#PDFJS.on('pdfjs_parseDataError', (err) => - this.#onPDFJSParserDataError(err) - ); - - //v1.3.0 the following Readable Stream-like events are replacement for the top two custom events - this.#PDFJS.on('readable', (meta) => this.emit('readable', meta)); - this.#PDFJS.on('data', (data) => this.emit('data', data)); - this.#PDFJS.on('error', (err) => this.#onPDFJSParserDataError(err)); - - this.#PDFJS.parsePDFData( - buffer || PDFParser.#binBuffer[this.binBufferKey], - this.#password - ); - } - - #processBinaryCache() { - if (this.binBufferKey in PDFParser.#binBuffer) { - this.#startParsingPDF(); - return true; - } - - const allKeys = Object.keys(PDFParser.#binBuffer); - if (allKeys.length > PDFParser.#maxBinBufferCount) { - const idx = this.id % PDFParser.#maxBinBufferCount; - const key = allKeys[idx]; - PDFParser.#binBuffer[key] = null; - delete PDFParser.#binBuffer[key]; - - nodeUtil.p2jinfo('re-cycled cache for ' + key); - } - - return false; - } - - //public getter - get data() { - return this.#data; - } - get binBufferKey() { - return this.#pdfFilePath + this.#pdfFileMTime; - } - - //public APIs - createParserStream() { - return new ParserStream(this, { - objectMode: true, - bufferSize: 64 * 1024, - }); - } - - async loadPDF(pdfFilePath, verbosity) { - nodeUtil.verbosity(verbosity || 0); - nodeUtil.p2jinfo('about to load PDF file ' + pdfFilePath); - - this.#pdfFilePath = pdfFilePath; - - try { - this.#pdfFileMTime = fs.statSync(pdfFilePath).mtimeMs; - if (this.#processFieldInfoXML) { - this.#PDFJS.tryLoadFieldInfoXML(pdfFilePath); - } - - if (this.#processBinaryCache()) return; - - PDFParser.#binBuffer[this.binBufferKey] = await promises.readFile( - pdfFilePath - ); - nodeUtil.p2jinfo(`Load OK: ${pdfFilePath}`); - this.#startParsingPDF(); - } catch (err) { - nodeUtil.p2jerror(`Load Failed: ${pdfFilePath} - ${err}`); - this.emit('pdfParser_dataError', err); - } - } - - // Introduce a way to directly process buffers without the need to write it to a temporary file - parseBuffer(pdfBuffer) { - this.#startParsingPDF(pdfBuffer); - } - - getRawTextContent() { - return this.#PDFJS.getRawTextContent(); - } - getRawTextContentStream() { - return ParserStream.createContentStream(this.getRawTextContent()); - } - - getAllFieldsTypes() { - return this.#PDFJS.getAllFieldsTypes(); - } - getAllFieldsTypesStream() { - return ParserStream.createContentStream(this.getAllFieldsTypes()); - } - - getMergedTextBlocksIfNeeded() { - return this.#PDFJS.getMergedTextBlocksIfNeeded(); - } - getMergedTextBlocksStream() { - return ParserStream.createContentStream( - this.getMergedTextBlocksIfNeeded() - ); - } - - destroy() { - // invoked with stream transform process - super.removeAllListeners(); - - //context object will be set in Web Service project, but not in command line utility - if (this.#context) { - this.#context.destroy(); - this.#context = null; - } - - this.#pdfFilePath = null; - this.#pdfFileMTime = null; - this.#data = null; - this.#processFieldInfoXML = false; //disable additional _fieldInfo.xml parsing and merging (do NOT set to true) +class PDFParser extends events.EventEmitter { // inherit from event emitter + //public static + static get colorDict() {return kColors; } + static get fontFaceDict() { return kFontFaces; } + static get fontStyleDict() { return kFontStyles; } + + //private static + static #maxBinBufferCount = 10; + static #binBuffer = {}; + + //private + #password = ""; + + #context = null; // service context object, only used in Web Service project; null in command line + + #pdfFilePath = null; //current PDF file to load and parse, null means loading/parsing not started + #pdfFileMTime = null; // last time the current pdf was modified, used to recognize changes and ignore cache + #data = null; //if file read success, data is PDF content; if failed, data is "err" object + #PDFJS = null; //will be initialized in constructor + #processFieldInfoXML = false;//disable additional _fieldInfo.xml parsing and merging (do NOT set to true) + + // constructor + constructor(context, needRawText, password) { + //call constructor for super class + super(); + + // private + // service context object, only used in Web Service project; null in command line + this.#context = context; + + this.#pdfFilePath = null; //current PDF file to load and parse, null means loading/parsing not started + this.#pdfFileMTime = null; // last time the current pdf was modified, used to recognize changes and ignore cache + this.#data = null; //if file read success, data is PDF content; if failed, data is "err" object + this.#processFieldInfoXML = false;//disable additional _fieldInfo.xml parsing and merging (do NOT set to true) + + this.#PDFJS = new PDFJSClass(needRawText); + this.#password = password; + } + + //private methods, needs to invoked by [funcName].call(this, ...) + #onPDFJSParseDataReady(data) { + if (!data) { //v1.1.2: data===null means end of parsed data + nodeUtil.p2jinfo("PDF parsing completed."); + this.emit("pdfParser_dataReady", this.#data); + } + else { + this.#data = {...this.#data, ...data}; + } + } + + #onPDFJSParserDataError(err) { + this.#data = null; + this.emit("pdfParser_dataError", {"parserError": err}); + // this.emit("error", err); + } + + #startParsingPDF(buffer) { + this.#data = {}; + + this.#PDFJS.on("pdfjs_parseDataReady", data => this.#onPDFJSParseDataReady(data)); + this.#PDFJS.on("pdfjs_parseDataError", err => this.#onPDFJSParserDataError(err)); + + //v1.3.0 the following Readable Stream-like events are replacement for the top two custom events + this.#PDFJS.on("readable", meta => this.emit("readable", meta)); + this.#PDFJS.on("data", data => this.emit("data", data)); + this.#PDFJS.on("error", err => this.#onPDFJSParserDataError(err)); + + this.#PDFJS.parsePDFData(buffer || PDFParser.#binBuffer[this.binBufferKey], this.#password); + } + + #processBinaryCache() { + if (this.binBufferKey in PDFParser.#binBuffer) { + this.#startParsingPDF(); + return true; + } + + const allKeys = Object.keys(PDFParser.#binBuffer); + if (allKeys.length > PDFParser.#maxBinBufferCount) { + const idx = this.id % PDFParser.#maxBinBufferCount; + const key = allKeys[idx]; + PDFParser.#binBuffer[key] = null; + delete PDFParser.#binBuffer[key]; + + nodeUtil.p2jinfo("re-cycled cache for " + key); + } + + return false; + } + + //public getter + get data() { return this.#data; } + get binBufferKey() { return this.#pdfFilePath + this.#pdfFileMTime; } + + //public APIs + createParserStream() { + return new ParserStream(this, {objectMode: true, bufferSize: 64 * 1024}); + } + + async loadPDF(pdfFilePath, verbosity) { + nodeUtil.verbosity(verbosity || 0); + nodeUtil.p2jinfo("about to load PDF file " + pdfFilePath); + + this.#pdfFilePath = pdfFilePath; + + try { + this.#pdfFileMTime = fs.statSync(pdfFilePath).mtimeMs; + if (this.#processFieldInfoXML) { + this.#PDFJS.tryLoadFieldInfoXML(pdfFilePath); + } - this.#PDFJS.destroy(); - this.#PDFJS = null; - } + if (this.#processBinaryCache()) + return; + + PDFParser.#binBuffer[this.binBufferKey] = await promises.readFile(pdfFilePath); + nodeUtil.p2jinfo(`Load OK: ${pdfFilePath}`); + this.#startParsingPDF(); + } + catch(err) { + nodeUtil.p2jerror(`Load Failed: ${pdfFilePath} - ${err}`); + this.emit("pdfParser_dataError", err); + } + } + + // Introduce a way to directly process buffers without the need to write it to a temporary file + parseBuffer(pdfBuffer) { + this.#startParsingPDF(pdfBuffer); + } + + getRawTextContent() { return this.#PDFJS.getRawTextContent(); } + getRawTextContentStream() { return ParserStream.createContentStream(this.getRawTextContent()); } + + getAllFieldsTypes() { return this.#PDFJS.getAllFieldsTypes(); }; + getAllFieldsTypesStream() { return ParserStream.createContentStream(this.getAllFieldsTypes()); } + + getMergedTextBlocksIfNeeded() { return this.#PDFJS.getMergedTextBlocksIfNeeded(); } + getMergedTextBlocksStream() { return ParserStream.createContentStream(this.getMergedTextBlocksIfNeeded()) } + + destroy() { // invoked with stream transform process + super.removeAllListeners(); + + //context object will be set in Web Service project, but not in command line utility + if (this.#context) { + this.#context.destroy(); + this.#context = null; + } + + this.#pdfFilePath = null; + this.#pdfFileMTime = null; + this.#data = null; + this.#processFieldInfoXML = false;//disable additional _fieldInfo.xml parsing and merging (do NOT set to true) + + this.#PDFJS.destroy(); + this.#PDFJS = null; + } } module.exports = PDFParser; diff --git a/rollup.config.js b/rollup.config.js index 9b462295..7e0a3b39 100644 --- a/rollup.config.js +++ b/rollup.config.js @@ -1,3 +1,4 @@ +import nodeResolve from '@rollup/plugin-node-resolve'; import replace from '@rollup/plugin-replace'; import builtins from 'rollup-plugin-node-builtins'; @@ -18,8 +19,7 @@ export default [ output: { file: 'pdfparser.cjs', format: 'cjs', - name: 'pdfparser', - exports: 'default', + // exports: 'named', }, treeshake: false, plugins: [ diff --git a/rollup/addDestructedImports.js b/rollup/addDestructedImports.js new file mode 100644 index 00000000..79121441 --- /dev/null +++ b/rollup/addDestructedImports.js @@ -0,0 +1,46 @@ +import fs from 'fs'; +import path from 'path'; +import { fileURLToPath } from 'url'; +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); +// Specify the directory where you want to search for pdfparser.cjs +const directoryToSearch = path.join(__dirname, '..'); + +// Function to search for pdfparser.cjs and update the file +function updatePdfParser(filePath) { + // Read the content of the file + const content = fs.readFileSync(filePath, 'utf8'); + + // Check if the file contains the line we want to modify + if (content.includes("var xmldom = require('@xmldom/xmldom');")) { + // Add the line underneath + const updatedContent = content.replace( + "var xmldom = require('@xmldom/xmldom');", + "var xmldom = require('@xmldom/xmldom');\nvar DOMParser = xmldom.DOMParser;" + ).replace( + "require('buffer');", + "var { Blob } = require('buffer');" + ).replace("const PDFJS = {};", "const PDFJS = {};\nvar Image = PDFImage") + + // Write the updated content back to the file + fs.writeFileSync(filePath, updatedContent, 'utf8'); + console.log(`Updated ${filePath} successfully.`); + } else { + console.log(`File ${filePath} does not contain the required line.`); + } +} + +// Function to recursively search for pdfparser.cjs +function searchForPdfParser(directory) { + fs.readdirSync(directory).forEach((file) => { + const filePath = path.join(directory, file); + if (fs.statSync(filePath).isDirectory()) { + searchForPdfParser(filePath); + } else if (file === 'pdfparser.cjs') { + updatePdfParser(filePath); + } + }); +} + +// Start searching from the specified directory +searchForPdfParser(directoryToSearch); From cb66f23b2a97575962630f57c161a461a274f5d6 Mon Sep 17 00:00:00 2001 From: Isaac <91521821+isimisi@users.noreply.github.com> Date: Fri, 29 Sep 2023 11:24:45 +0200 Subject: [PATCH 5/5] A standalone single space will be included --- lib/pdfcanvas.js | 1124 ++++++++++++++++---------------- lib/pdffont.js | 691 +++++++++++--------- pdfparser.cjs | 1595 +++++++++++++++++++++++++--------------------- 3 files changed, 1814 insertions(+), 1596 deletions(-) diff --git a/lib/pdfcanvas.js b/lib/pdfcanvas.js index beb45731..4283c4f5 100644 --- a/lib/pdfcanvas.js +++ b/lib/pdfcanvas.js @@ -1,166 +1,167 @@ -import nodeUtil from "util"; -import PDFLine from "./pdfline.js"; -import PDFFill from "./pdffill.js"; -import PDFFont from "./pdffont.js"; +import nodeUtil from 'util'; +import PDFLine from './pdfline.js'; +import PDFFill from './pdffill.js'; +import PDFFont from './pdffont.js'; // alias some functions to make (compiled) code shorter -const {round: mr, sin: ms, cos: mc, abs, sqrt} = Math; +const { round: mr, sin: ms, cos: mc, abs, sqrt } = Math; // precompute "00" to "FF" const dec2hex = []; for (let i = 0; i < 16; i++) { - for (let j = 0; j < 16; j++) { - dec2hex[i * 16 + j] = i.toString(16) + j.toString(16); - } + for (let j = 0; j < 16; j++) { + dec2hex[i * 16 + j] = i.toString(16) + j.toString(16); + } } function createMatrixIdentity() { - return [ - [1, 0, 0], - [0, 1, 0], - [0, 0, 1] - ]; + return [ + [1, 0, 0], + [0, 1, 0], + [0, 0, 1], + ]; } function matrixMultiply(m1, m2) { - let result = createMatrixIdentity(); + let result = createMatrixIdentity(); - for (let x = 0; x < 3; x++) { - for (let y = 0; y < 3; y++) { - let sum = 0; + for (let x = 0; x < 3; x++) { + for (let y = 0; y < 3; y++) { + let sum = 0; - for (let z = 0; z < 3; z++) { - sum += m1[x][z] * m2[z][y]; - } + for (let z = 0; z < 3; z++) { + sum += m1[x][z] * m2[z][y]; + } - result[x][y] = sum; - } - } - return result; + result[x][y] = sum; + } + } + return result; } function copyState(o1, o2) { - o2.fillStyle = o1.fillStyle; - o2.lineCap = o1.lineCap; - o2.lineJoin = o1.lineJoin; - o2.lineWidth = o1.lineWidth; - o2.miterLimit = o1.miterLimit; - o2.shadowBlur = o1.shadowBlur; - o2.shadowColor = o1.shadowColor; - o2.shadowOffsetX = o1.shadowOffsetX; - o2.shadowOffsetY = o1.shadowOffsetY; - o2.strokeStyle = o1.strokeStyle; - o2.globalAlpha = o1.globalAlpha; - o2.arcScaleX_ = o1.arcScaleX_; - o2.arcScaleY_ = o1.arcScaleY_; - o2.lineScale_ = o1.lineScale_; - o2.dashArray = o1.dashArray; + o2.fillStyle = o1.fillStyle; + o2.lineCap = o1.lineCap; + o2.lineJoin = o1.lineJoin; + o2.lineWidth = o1.lineWidth; + o2.miterLimit = o1.miterLimit; + o2.shadowBlur = o1.shadowBlur; + o2.shadowColor = o1.shadowColor; + o2.shadowOffsetX = o1.shadowOffsetX; + o2.shadowOffsetY = o1.shadowOffsetY; + o2.strokeStyle = o1.strokeStyle; + o2.globalAlpha = o1.globalAlpha; + o2.arcScaleX_ = o1.arcScaleX_; + o2.arcScaleY_ = o1.arcScaleY_; + o2.lineScale_ = o1.lineScale_; + o2.dashArray = o1.dashArray; } function processStyle(styleString) { - let str, alpha = 1; - - styleString = String(styleString); - if (styleString.substring(0, 3) == 'rgb') { - let start = styleString.indexOf('(', 3); - let end = styleString.indexOf(')', start + 1); - let guts = styleString.substring(start + 1, end).split(','); - - str = '#'; - for (let i = 0; i < 3; i++) { - str += dec2hex[Number(guts[i])]; - } - - if (guts.length == 4 && styleString.substring(3, 4) == 'a') { - alpha = guts[3]; - } - } else { - str = styleString; - } - - return {color:str, alpha:alpha}; + let str, + alpha = 1; + + styleString = String(styleString); + if (styleString.substring(0, 3) == 'rgb') { + let start = styleString.indexOf('(', 3); + let end = styleString.indexOf(')', start + 1); + let guts = styleString.substring(start + 1, end).split(','); + + str = '#'; + for (let i = 0; i < 3; i++) { + str += dec2hex[Number(guts[i])]; + } + + if (guts.length == 4 && styleString.substring(3, 4) == 'a') { + alpha = guts[3]; + } + } else { + str = styleString; + } + + return { color: str, alpha: alpha }; } function processLineCap(lineCap) { - switch (lineCap) { - case 'butt': - return 'flat'; - case 'round': - return 'round'; - case 'square': - default: - return 'square'; - } + switch (lineCap) { + case 'butt': + return 'flat'; + case 'round': + return 'round'; + case 'square': + default: + return 'square'; + } } // Helper function that takes the already fixed cordinates. function bezierCurveToHelper(self, cp1, cp2, p) { - self.currentPath_.push({ - type:'bezierCurveTo', - cp1x:cp1.x, - cp1y:cp1.y, - cp2x:cp2.x, - cp2y:cp2.y, - x:p.x, - y:p.y - }); - self.currentX_ = p.x; - self.currentY_ = p.y; + self.currentPath_.push({ + type: 'bezierCurveTo', + cp1x: cp1.x, + cp1y: cp1.y, + cp2x: cp2.x, + cp2y: cp2.y, + x: p.x, + y: p.y, + }); + self.currentX_ = p.x; + self.currentY_ = p.y; } function matrixIsFinite(m) { - for (let j = 0; j < 3; j++) { - for (let k = 0; k < 2; k++) { - if (!isFinite(m[j][k]) || isNaN(m[j][k])) { - return false; - } - } - } - return true; + for (let j = 0; j < 3; j++) { + for (let k = 0; k < 2; k++) { + if (!isFinite(m[j][k]) || isNaN(m[j][k])) { + return false; + } + } + } + return true; } function setM(ctx, m, updateLineScale) { - if (!matrixIsFinite(m)) { - return; - } - ctx.m_ = m; - - if (updateLineScale) { - // Get the line scale. - // Determinant of this.m_ means how much the area is enlarged by the - // transformation. So its square root can be used as a scale factor - // for width. - let det = m[0][0] * m[1][1] - m[0][1] * m[1][0]; - ctx.lineScale_ = sqrt(abs(det)); - } + if (!matrixIsFinite(m)) { + return; + } + ctx.m_ = m; + + if (updateLineScale) { + // Get the line scale. + // Determinant of this.m_ means how much the area is enlarged by the + // transformation. So its square root can be used as a scale factor + // for width. + let det = m[0][0] * m[1][1] - m[0][1] * m[1][0]; + ctx.lineScale_ = sqrt(abs(det)); + } } class CanvasPattern_ { - constructor() { - } + constructor() {} } // Gradient / Pattern Stubs class CanvasGradient_ { - constructor(aType) { - this.type_ = aType; - this.x0_ = 0; - this.y0_ = 0; - this.r0_ = 0; - this.x1_ = 0; - this.y1_ = 0; - this.r1_ = 0; - this.colors_ = []; - } - addColorStop(aOffset, aColor) { - aColor = processStyle(aColor); - this.colors_.push({offset:aOffset, - color:aColor.color, - alpha:aColor.alpha}); - } + constructor(aType) { + this.type_ = aType; + this.x0_ = 0; + this.y0_ = 0; + this.r0_ = 0; + this.x1_ = 0; + this.y1_ = 0; + this.r1_ = 0; + this.colors_ = []; + } + addColorStop(aOffset, aColor) { + aColor = processStyle(aColor); + this.colors_.push({ + offset: aOffset, + color: aColor.color, + alpha: aColor.alpha, + }); + } } - /** * This class implements CanvasRenderingContext2D interface as described by * the WHATWG. @@ -168,431 +169,462 @@ class CanvasGradient_ { * be associated with */ export default class CanvasRenderingContext2D_ { - constructor(canvasTarget, scaledWidth, scaledHeight) { - this.m_ = createMatrixIdentity(); - - this.mStack_ = []; - this.aStack_ = []; - this.currentPath_ = []; - - // Canvas context properties - this.strokeStyle = '#000'; - this.fillStyle = '#000'; - - this.lineWidth = 1; - this.lineJoin = 'miter'; - this.lineCap = 'butt'; - this.dashArray = []; - this.miterLimit = 1; - this.globalAlpha = 1; - - if (!("HLines" in canvasTarget) || !Array.isArray(canvasTarget.HLines)) - canvasTarget.HLines = []; - if (!("VLines" in canvasTarget) || !Array.isArray(canvasTarget.VLines)) - canvasTarget.VLines = []; - if (!("Fills" in canvasTarget) || !Array.isArray(canvasTarget.Fills)) - canvasTarget.Fills = []; - if (!("Texts" in canvasTarget) || !Array.isArray(canvasTarget.Texts)) - canvasTarget.Texts = []; - - this.canvas = canvasTarget; - - this.width = scaledWidth; - this.height = scaledHeight; - - this.arcScaleX_ = 1; - this.arcScaleY_ = 1; - this.lineScale_ = 1; - - this.currentFont = null; - } - - //private helper methods - #drawPDFLine(p1, p2, lineWidth, color) { - let dashedLine = Array.isArray(this.dashArray) && (this.dashArray.length > 1); - let pL = new PDFLine(p1.x, p1.y, p2.x, p2.y, lineWidth, color, dashedLine); - pL.processLine(this.canvas); - } - - #drawPDFFill(cp, min, max, color) { - let width = max.x - min.x; - let height = max.y - min.y; - let pF = new PDFFill(cp.x, cp.y, width, height, color); - pF.processFill(this.canvas); - } - - #needRemoveRect(x, y, w, h) { - let retVal = (Math.abs(w - Math.abs(h)) < 1 && w < 13); - if (retVal) { - nodeUtil.p2jinfo("Skipped: tiny rect: w=" + w + ", h=" + h); - } - return retVal; - } - - getContext(ctxType) { - return (ctxType === "2d") ? this : null; - } - - setLineDash(lineDash) { - this.dashArray = lineDash; - } - - getLineDash() { - return this.dashArray; - } - - fillText(text, x, y, maxWidth, fontSize) { - if (!text || text.trim().length < 1) - return; - let p = this.getCoords_(x, y); - - let a = processStyle(this.fillStyle || this.strokeStyle); - let color = (!!a) ? a.color : '#000000'; - - this.currentFont.processText(p, text, maxWidth, color, fontSize, this.canvas, this.m_); - }; - - strokeText(text, x, y, maxWidth) { - //MQZ. 10/23/2012, yeah, no hollow text for now - this.fillText(text, x, y, maxWidth); - } - - measureText(text) { - console.warn("to be implemented: contextPrototype.measureText - ", text); - let chars = text.length || 1; - return {width: chars * (this.currentFont.spaceWidth || 5)}; - } - - setFont(fontObj) { - if ((!!this.currentFont) && typeof(this.currentFont.clean) === "function") { - this.currentFont.clean(); - this.currentFont = null; - } - - this.currentFont = new PDFFont(fontObj); - } - - clearRect() { - console.warn("to be implemented: contextPrototype.clearRect"); - } - - beginPath() { - // TODO: Branch current matrix so that save/restore has no effect - // as per safari docs. - this.currentPath_ = []; - } - - moveTo(aX, aY) { - let p = this.getCoords_(aX, aY); - this.currentPath_.push({type:'moveTo', x:p.x, y:p.y}); - this.currentX_ = p.x; - this.currentY_ = p.y; - } - - lineTo(aX, aY) { - let p = this.getCoords_(aX, aY); - this.currentPath_.push({type:'lineTo', x:p.x, y:p.y}); - - this.currentX_ = p.x; - this.currentY_ = p.y; - } - - bezierCurveTo(aCP1x, aCP1y, aCP2x, aCP2y, aX, aY) { - let p = this.getCoords_(aX, aY); - let cp1 = this.getCoords_(aCP1x, aCP1y); - let cp2 = this.getCoords_(aCP2x, aCP2y); - bezierCurveToHelper(this, cp1, cp2, p); - } - - quadraticCurveTo(aCPx, aCPy, aX, aY) { - // the following is lifted almost directly from - // http://developer.mozilla.org/en/docs/Canvas_tutorial:Drawing_shapes - - let cp = this.getCoords_(aCPx, aCPy); - let p = this.getCoords_(aX, aY); - - let cp1 = { - x:this.currentX_ + 2.0 / 3.0 * (cp.x - this.currentX_), - y:this.currentY_ + 2.0 / 3.0 * (cp.y - this.currentY_) - }; - let cp2 = { - x:cp1.x + (p.x - this.currentX_) / 3.0, - y:cp1.y + (p.y - this.currentY_) / 3.0 - }; - - bezierCurveToHelper(this, cp1, cp2, p); - } - - arc(aX, aY, aRadius, aStartAngle, aEndAngle, aClockwise) { - let arcType = aClockwise ? 'at' : 'wa'; - - let xStart = aX + mc(aStartAngle) * aRadius; - let yStart = aY + ms(aStartAngle) * aRadius; - - let xEnd = aX + mc(aEndAngle) * aRadius; - let yEnd = aY + ms(aEndAngle) * aRadius; - - // IE won't render arches drawn counter clockwise if xStart == xEnd. - if (xStart == xEnd && !aClockwise) { - xStart += 0.125; // Offset xStart by 1/80 of a pixel. Use something - // that can be represented in binary - } - - let p = this.getCoords_(aX, aY); - let pStart = this.getCoords_(xStart, yStart); - let pEnd = this.getCoords_(xEnd, yEnd); - - this.currentPath_.push({type:arcType, - x:p.x, - y:p.y, - radius:aRadius, - xStart:pStart.x, - yStart:pStart.y, - xEnd:pEnd.x, - yEnd:pEnd.y}); - } - - rect(aX, aY, aWidth, aHeight) { - if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { - return;//try to remove the rectangle behind radio buttons and checkboxes - } - - this.moveTo(aX, aY); - this.lineTo(aX + aWidth, aY); - this.lineTo(aX + aWidth, aY + aHeight); - this.lineTo(aX, aY + aHeight); - this.closePath(); - } - - strokeRect(aX, aY, aWidth, aHeight) { - if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { - return;//try to remove the rectangle behind radio buttons and checkboxes - } - - let oldPath = this.currentPath_; - this.beginPath(); - - this.moveTo(aX, aY); - this.lineTo(aX + aWidth, aY); - this.lineTo(aX + aWidth, aY + aHeight); - this.lineTo(aX, aY + aHeight); - this.closePath(); - this.stroke(); - - this.currentPath_ = oldPath; - } - - fillRect(aX, aY, aWidth, aHeight) { - if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { - return;//try to remove the rectangle behind radio buttons and checkboxes - } - - let oldPath = this.currentPath_; - this.beginPath(); - - this.moveTo(aX, aY); - this.lineTo(aX + aWidth, aY); - this.lineTo(aX + aWidth, aY + aHeight); - this.lineTo(aX, aY + aHeight); - this.closePath(); - this.fill(); - - this.currentPath_ = oldPath; - } - - createLinearGradient(aX0, aY0, aX1, aY1) { - let gradient = new CanvasGradient_('gradient'); - gradient.x0_ = aX0; - gradient.y0_ = aY0; - gradient.x1_ = aX1; - gradient.y1_ = aY1; - return gradient; - } - - createRadialGradient(aX0, aY0, aR0, aX1, aY1, aR1) { - let gradient = new CanvasGradient_('gradientradial'); - gradient.x0_ = aX0; - gradient.y0_ = aY0; - gradient.r0_ = aR0; - gradient.x1_ = aX1; - gradient.y1_ = aY1; - gradient.r1_ = aR1; - return gradient; - } - - drawImage(image, var_args) { - //MQZ. no image drawing support for now - } - - getImageData(x, y, w, h) { - //MQZ. returns empty data buffer for now - return { - width:w, - height:h, - data:new Uint8Array(w * h * 4) - }; - } - - stroke(aFill) { - if (this.currentPath_.length < 2) { - return; - } - - let a = processStyle(aFill ? this.fillStyle : this.strokeStyle); - let color = a.color; -// let opacity = a.alpha * this.globalAlpha; - let lineWidth = this.lineScale_ * this.lineWidth; - - let min = {x:null, y:null}; - let max = {x:null, y:null}; - - for (let i = 0; i < this.currentPath_.length; i++) { - let p = this.currentPath_[i]; - - switch (p.type) { - case 'moveTo': - break; - case 'lineTo': - if (!aFill) { //lines - if (i > 0) { - this.#drawPDFLine(this.currentPath_[i-1], p, lineWidth, color); - } - } - break; - case 'close': - if (!aFill) { //lines - if (i > 0) { - this.#drawPDFLine(this.currentPath_[i-1], this.currentPath_[0], lineWidth, color); - } - } - p = null; - break; - case 'bezierCurveTo': - break; - case 'at': - case 'wa': - break; + constructor(canvasTarget, scaledWidth, scaledHeight) { + this.m_ = createMatrixIdentity(); + + this.mStack_ = []; + this.aStack_ = []; + this.currentPath_ = []; + + // Canvas context properties + this.strokeStyle = '#000'; + this.fillStyle = '#000'; + + this.lineWidth = 1; + this.lineJoin = 'miter'; + this.lineCap = 'butt'; + this.dashArray = []; + this.miterLimit = 1; + this.globalAlpha = 1; + + if (!('HLines' in canvasTarget) || !Array.isArray(canvasTarget.HLines)) + canvasTarget.HLines = []; + if (!('VLines' in canvasTarget) || !Array.isArray(canvasTarget.VLines)) + canvasTarget.VLines = []; + if (!('Fills' in canvasTarget) || !Array.isArray(canvasTarget.Fills)) + canvasTarget.Fills = []; + if (!('Texts' in canvasTarget) || !Array.isArray(canvasTarget.Texts)) + canvasTarget.Texts = []; + + this.canvas = canvasTarget; + + this.width = scaledWidth; + this.height = scaledHeight; + + this.arcScaleX_ = 1; + this.arcScaleY_ = 1; + this.lineScale_ = 1; + + this.currentFont = null; + } + + //private helper methods + #drawPDFLine(p1, p2, lineWidth, color) { + let dashedLine = + Array.isArray(this.dashArray) && this.dashArray.length > 1; + let pL = new PDFLine( + p1.x, + p1.y, + p2.x, + p2.y, + lineWidth, + color, + dashedLine + ); + pL.processLine(this.canvas); + } + + #drawPDFFill(cp, min, max, color) { + let width = max.x - min.x; + let height = max.y - min.y; + let pF = new PDFFill(cp.x, cp.y, width, height, color); + pF.processFill(this.canvas); + } + + #needRemoveRect(x, y, w, h) { + let retVal = Math.abs(w - Math.abs(h)) < 1 && w < 13; + if (retVal) { + nodeUtil.p2jinfo('Skipped: tiny rect: w=' + w + ', h=' + h); + } + return retVal; + } + + getContext(ctxType) { + return ctxType === '2d' ? this : null; + } + + setLineDash(lineDash) { + this.dashArray = lineDash; + } + + getLineDash() { + return this.dashArray; + } + + fillText(text, x, y, maxWidth, fontSize) { + if (!text || (!text.length === 1 && text.trim().length < 1)) return; + let p = this.getCoords_(x, y); + + let a = processStyle(this.fillStyle || this.strokeStyle); + let color = !!a ? a.color : '#000000'; + + this.currentFont.processText( + p, + text, + maxWidth, + color, + fontSize, + this.canvas, + this.m_ + ); + } + + strokeText(text, x, y, maxWidth) { + //MQZ. 10/23/2012, yeah, no hollow text for now + this.fillText(text, x, y, maxWidth); + } + + measureText(text) { + console.warn('to be implemented: contextPrototype.measureText - ', text); + let chars = text.length || 1; + return { width: chars * (this.currentFont.spaceWidth || 5) }; + } + + setFont(fontObj) { + if (!!this.currentFont && typeof this.currentFont.clean === 'function') { + this.currentFont.clean(); + this.currentFont = null; + } + + this.currentFont = new PDFFont(fontObj); + } + + clearRect() { + console.warn('to be implemented: contextPrototype.clearRect'); + } + + beginPath() { + // TODO: Branch current matrix so that save/restore has no effect + // as per safari docs. + this.currentPath_ = []; + } + + moveTo(aX, aY) { + let p = this.getCoords_(aX, aY); + this.currentPath_.push({ type: 'moveTo', x: p.x, y: p.y }); + this.currentX_ = p.x; + this.currentY_ = p.y; + } + + lineTo(aX, aY) { + let p = this.getCoords_(aX, aY); + this.currentPath_.push({ type: 'lineTo', x: p.x, y: p.y }); + + this.currentX_ = p.x; + this.currentY_ = p.y; + } + + bezierCurveTo(aCP1x, aCP1y, aCP2x, aCP2y, aX, aY) { + let p = this.getCoords_(aX, aY); + let cp1 = this.getCoords_(aCP1x, aCP1y); + let cp2 = this.getCoords_(aCP2x, aCP2y); + bezierCurveToHelper(this, cp1, cp2, p); + } + + quadraticCurveTo(aCPx, aCPy, aX, aY) { + // the following is lifted almost directly from + // http://developer.mozilla.org/en/docs/Canvas_tutorial:Drawing_shapes + + let cp = this.getCoords_(aCPx, aCPy); + let p = this.getCoords_(aX, aY); + + let cp1 = { + x: this.currentX_ + (2.0 / 3.0) * (cp.x - this.currentX_), + y: this.currentY_ + (2.0 / 3.0) * (cp.y - this.currentY_), + }; + let cp2 = { + x: cp1.x + (p.x - this.currentX_) / 3.0, + y: cp1.y + (p.y - this.currentY_) / 3.0, + }; + + bezierCurveToHelper(this, cp1, cp2, p); + } + + arc(aX, aY, aRadius, aStartAngle, aEndAngle, aClockwise) { + let arcType = aClockwise ? 'at' : 'wa'; + + let xStart = aX + mc(aStartAngle) * aRadius; + let yStart = aY + ms(aStartAngle) * aRadius; + + let xEnd = aX + mc(aEndAngle) * aRadius; + let yEnd = aY + ms(aEndAngle) * aRadius; + + // IE won't render arches drawn counter clockwise if xStart == xEnd. + if (xStart == xEnd && !aClockwise) { + xStart += 0.125; // Offset xStart by 1/80 of a pixel. Use something + // that can be represented in binary + } + + let p = this.getCoords_(aX, aY); + let pStart = this.getCoords_(xStart, yStart); + let pEnd = this.getCoords_(xEnd, yEnd); + + this.currentPath_.push({ + type: arcType, + x: p.x, + y: p.y, + radius: aRadius, + xStart: pStart.x, + yStart: pStart.y, + xEnd: pEnd.x, + yEnd: pEnd.y, + }); + } + + rect(aX, aY, aWidth, aHeight) { + if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { + return; //try to remove the rectangle behind radio buttons and checkboxes + } + + this.moveTo(aX, aY); + this.lineTo(aX + aWidth, aY); + this.lineTo(aX + aWidth, aY + aHeight); + this.lineTo(aX, aY + aHeight); + this.closePath(); + } + + strokeRect(aX, aY, aWidth, aHeight) { + if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { + return; //try to remove the rectangle behind radio buttons and checkboxes + } + + let oldPath = this.currentPath_; + this.beginPath(); + + this.moveTo(aX, aY); + this.lineTo(aX + aWidth, aY); + this.lineTo(aX + aWidth, aY + aHeight); + this.lineTo(aX, aY + aHeight); + this.closePath(); + this.stroke(); + + this.currentPath_ = oldPath; + } + + fillRect(aX, aY, aWidth, aHeight) { + if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { + return; //try to remove the rectangle behind radio buttons and checkboxes + } + + let oldPath = this.currentPath_; + this.beginPath(); + + this.moveTo(aX, aY); + this.lineTo(aX + aWidth, aY); + this.lineTo(aX + aWidth, aY + aHeight); + this.lineTo(aX, aY + aHeight); + this.closePath(); + this.fill(); + + this.currentPath_ = oldPath; + } + + createLinearGradient(aX0, aY0, aX1, aY1) { + let gradient = new CanvasGradient_('gradient'); + gradient.x0_ = aX0; + gradient.y0_ = aY0; + gradient.x1_ = aX1; + gradient.y1_ = aY1; + return gradient; + } + + createRadialGradient(aX0, aY0, aR0, aX1, aY1, aR1) { + let gradient = new CanvasGradient_('gradientradial'); + gradient.x0_ = aX0; + gradient.y0_ = aY0; + gradient.r0_ = aR0; + gradient.x1_ = aX1; + gradient.y1_ = aY1; + gradient.r1_ = aR1; + return gradient; + } + + drawImage(image, var_args) { + //MQZ. no image drawing support for now + } + + getImageData(x, y, w, h) { + //MQZ. returns empty data buffer for now + return { + width: w, + height: h, + data: new Uint8Array(w * h * 4), + }; + } + + stroke(aFill) { + if (this.currentPath_.length < 2) { + return; + } + + let a = processStyle(aFill ? this.fillStyle : this.strokeStyle); + let color = a.color; + // let opacity = a.alpha * this.globalAlpha; + let lineWidth = this.lineScale_ * this.lineWidth; + + let min = { x: null, y: null }; + let max = { x: null, y: null }; + + for (let i = 0; i < this.currentPath_.length; i++) { + let p = this.currentPath_[i]; + + switch (p.type) { + case 'moveTo': + break; + case 'lineTo': + if (!aFill) { + //lines + if (i > 0) { + this.#drawPDFLine( + this.currentPath_[i - 1], + p, + lineWidth, + color + ); + } + } + break; + case 'close': + if (!aFill) { + //lines + if (i > 0) { + this.#drawPDFLine( + this.currentPath_[i - 1], + this.currentPath_[0], + lineWidth, + color + ); + } + } + p = null; + break; + case 'bezierCurveTo': + break; + case 'at': + case 'wa': + break; + } + + // Figure out dimensions so we can set fills' coordinates correctly + if (aFill && p) { + if (min.x == null || p.x < min.x) { + min.x = p.x; } - - // Figure out dimensions so we can set fills' coordinates correctly - if (aFill && p) { - if (min.x == null || p.x < min.x) { - min.x = p.x; - } - if (max.x == null || p.x > max.x) { - max.x = p.x; - } - if (min.y == null || p.y < min.y) { - min.y = p.y; - } - if (max.y == null || p.y > max.y) { - max.y = p.y; - } + if (max.x == null || p.x > max.x) { + max.x = p.x; } - } - - if (aFill) { //fill - this.#drawPDFFill(min, min, max, color); - } - } - - fill() { - this.stroke(true); - } - - closePath() { - this.currentPath_.push({type:'close'}); - } - - /** - * @private - */ - getCoords_ (aX, aY) { - let m = this.m_; - return { - x: (aX * m[0][0] + aY * m[1][0] + m[2][0]), - y: (aX * m[0][1] + aY * m[1][1] + m[2][1]) - }; - } - - save() { - let o = {}; - copyState(this, o); - this.aStack_.push(o); - this.mStack_.push(this.m_); - this.m_ = matrixMultiply(createMatrixIdentity(), this.m_); - } - - restore() { - copyState(this.aStack_.pop(), this); - this.m_ = this.mStack_.pop(); - } - - translate(aX, aY) { - let m1 = [ - [1, 0, 0], - [0, 1, 0], - [aX, aY, 1] - ]; - - setM(this, matrixMultiply(m1, this.m_), false); - } - - rotate(aRot) { - let c = mc(aRot); - let s = ms(aRot); - - let m1 = [ - [c, s, 0], - [-s, c, 0], - [0, 0, 1] - ]; - - setM(this, matrixMultiply(m1, this.m_), false); - } - - scale(aX, aY) { - this.arcScaleX_ *= aX; - this.arcScaleY_ *= aY; - let m1 = [ - [aX, 0, 0], - [0, aY, 0], - [0, 0, 1] - ]; - - setM(this, matrixMultiply(m1, this.m_), true); - } - - transform(m11, m12, m21, m22, dx, dy) { - let m1 = [ - [m11, m12, 0], - [m21, m22, 0], - [dx, dy, 1] - ]; - - setM(this, matrixMultiply(m1, this.m_), true); - } - - setTransform(m11, m12, m21, m22, dx, dy) { - let m = [ - [m11, m12, 0], - [m21, m22, 0], - [dx, dy, 1] - ]; - - setM(this, m, true); - } - - /******** STUBS ********/ - clip() { - // TODO: Implement - } - - arcTo() { - // TODO: Implement - } - - createPattern() { - return new CanvasPattern_(); - } -} \ No newline at end of file + if (min.y == null || p.y < min.y) { + min.y = p.y; + } + if (max.y == null || p.y > max.y) { + max.y = p.y; + } + } + } + + if (aFill) { + //fill + this.#drawPDFFill(min, min, max, color); + } + } + + fill() { + this.stroke(true); + } + + closePath() { + this.currentPath_.push({ type: 'close' }); + } + + /** + * @private + */ + getCoords_(aX, aY) { + let m = this.m_; + return { + x: aX * m[0][0] + aY * m[1][0] + m[2][0], + y: aX * m[0][1] + aY * m[1][1] + m[2][1], + }; + } + + save() { + let o = {}; + copyState(this, o); + this.aStack_.push(o); + this.mStack_.push(this.m_); + this.m_ = matrixMultiply(createMatrixIdentity(), this.m_); + } + + restore() { + copyState(this.aStack_.pop(), this); + this.m_ = this.mStack_.pop(); + } + + translate(aX, aY) { + let m1 = [ + [1, 0, 0], + [0, 1, 0], + [aX, aY, 1], + ]; + + setM(this, matrixMultiply(m1, this.m_), false); + } + + rotate(aRot) { + let c = mc(aRot); + let s = ms(aRot); + + let m1 = [ + [c, s, 0], + [-s, c, 0], + [0, 0, 1], + ]; + + setM(this, matrixMultiply(m1, this.m_), false); + } + + scale(aX, aY) { + this.arcScaleX_ *= aX; + this.arcScaleY_ *= aY; + let m1 = [ + [aX, 0, 0], + [0, aY, 0], + [0, 0, 1], + ]; + + setM(this, matrixMultiply(m1, this.m_), true); + } + + transform(m11, m12, m21, m22, dx, dy) { + let m1 = [ + [m11, m12, 0], + [m21, m22, 0], + [dx, dy, 1], + ]; + + setM(this, matrixMultiply(m1, this.m_), true); + } + + setTransform(m11, m12, m21, m22, dx, dy) { + let m = [ + [m11, m12, 0], + [m21, m22, 0], + [dx, dy, 1], + ]; + + setM(this, m, true); + } + + /******** STUBS ********/ + clip() { + // TODO: Implement + } + + arcTo() { + // TODO: Implement + } + + createPattern() { + return new CanvasPattern_(); + } +} diff --git a/lib/pdffont.js b/lib/pdffont.js index f8790dc1..ade4b4a0 100644 --- a/lib/pdffont.js +++ b/lib/pdffont.js @@ -1,313 +1,390 @@ -import nodeUtil from "util"; -import PDFUnit from "./pdfunit.js"; -import {kFontFaces, kFontStyles} from "./pdfconst.js"; - -const _boldSubNames = ["bd", "bold", "demi", "black"]; -const _stdFonts = ["arial", "helvetica", "sans-serif ", "courier ","monospace ", "ocr "]; +import nodeUtil from 'util'; +import PDFUnit from './pdfunit.js'; +import { kFontFaces, kFontStyles } from './pdfconst.js'; + +const _boldSubNames = ['bd', 'bold', 'demi', 'black']; +const _stdFonts = [ + 'arial', + 'helvetica', + 'sans-serif ', + 'courier ', + 'monospace ', + 'ocr ', +]; const DISTANCE_DELTA = 0.1; export default class PDFFont { - #initTypeName() { - let typeName = (this.fontObj.name || this.fontObj.fallbackName); - if (!typeName) { - typeName = kFontFaces[0]; //default font family name - } - typeName = typeName.toLowerCase(); - return typeName; - } - - #initSubType() { - let subType = this.typeName; - let bold = false; - - let nameArray = this.typeName.split('+'); - if (Array.isArray(nameArray) && nameArray.length > 1) { - subType = nameArray[1].split("-"); - if (Array.isArray(subType) && subType.length > 1) { - let subName = subType[1].toLowerCase(); - bold = _boldSubNames.indexOf(subName) >= 0; - subType = subType[0]; - } - } - return {subType, bold}; - } - - #initSymbol() { - let isSymbol = this.typeName.indexOf("symbol") > 0 || kFontFaces[2].indexOf(this.subType) >= 0; - if (this.fontObj.isSymbolicFont) { - let mFonts = _stdFonts.filter( (oneName) => (this.typeName.indexOf(oneName) >= 0) ); - - if (mFonts.length > 0) { - this.fontObj.isSymbolicFont = false; //lots of Arial-based font is detected as symbol in VA forms (301, 76-c, etc.) reset the flag for now - nodeUtil.p2jinfo("Reset: isSymbolicFont (false) for " + this.fontObj.name); - } - } - else { - if (isSymbol) { - this.fontObj.isSymbolicFont = true; //text pdf: va_ind_760c - nodeUtil.p2jinfo("Reset: isSymbolicFont (true) for " + this.fontObj.name); - } - } - return isSymbol; - } - - #initSpaceWidth() { - let spaceWidth = this.fontObj.spaceWidth; - if (!spaceWidth) { - var spaceId = Array.isArray(this.fontObj.toFontChar) ? this.fontObj.toFontChar.indexOf(32) : -1; - spaceWidth = (spaceId >= 0 && Array.isArray(this.fontObj.widths)) ? this.fontObj.widths[spaceId] : 250; - } - spaceWidth = PDFUnit.toFormX(spaceWidth) / 32; - return spaceWidth; - } - - // constructor - constructor(fontObj) { - this.fontObj = fontObj; - - this.typeName = this.#initTypeName(); - - const {subType, bold} = this.#initSubType(); - this.subType = subType; - this.bold = bold; - - this.isSymbol = this.#initSymbol(); - this.spaceWidth = this.#initSpaceWidth(); - - this.fontSize = 1; - this.faceIdx = 0; - this.italic = false; - this.fontStyleId = -1; - } - - /** sort text blocks by y then x */ - static compareBlockPos(t1, t2) { - if (t1.y < t2.y - DISTANCE_DELTA) { + #initTypeName() { + let typeName = this.fontObj.name || this.fontObj.fallbackName; + if (!typeName) { + typeName = kFontFaces[0]; //default font family name + } + typeName = typeName.toLowerCase(); + return typeName; + } + + #initSubType() { + let subType = this.typeName; + let bold = false; + + let nameArray = this.typeName.split('+'); + if (Array.isArray(nameArray) && nameArray.length > 1) { + subType = nameArray[1].split('-'); + if (Array.isArray(subType) && subType.length > 1) { + let subName = subType[1].toLowerCase(); + bold = _boldSubNames.indexOf(subName) >= 0; + subType = subType[0]; + } + } + return { subType, bold }; + } + + #initSymbol() { + let isSymbol = + this.typeName.indexOf('symbol') > 0 || + kFontFaces[2].indexOf(this.subType) >= 0; + if (this.fontObj.isSymbolicFont) { + let mFonts = _stdFonts.filter( + (oneName) => this.typeName.indexOf(oneName) >= 0 + ); + + if (mFonts.length > 0) { + this.fontObj.isSymbolicFont = false; //lots of Arial-based font is detected as symbol in VA forms (301, 76-c, etc.) reset the flag for now + nodeUtil.p2jinfo( + 'Reset: isSymbolicFont (false) for ' + this.fontObj.name + ); + } + } else { + if (isSymbol) { + this.fontObj.isSymbolicFont = true; //text pdf: va_ind_760c + nodeUtil.p2jinfo( + 'Reset: isSymbolicFont (true) for ' + this.fontObj.name + ); + } + } + return isSymbol; + } + + #initSpaceWidth() { + let spaceWidth = this.fontObj.spaceWidth; + if (!spaceWidth) { + var spaceId = Array.isArray(this.fontObj.toFontChar) + ? this.fontObj.toFontChar.indexOf(32) + : -1; + spaceWidth = + spaceId >= 0 && Array.isArray(this.fontObj.widths) + ? this.fontObj.widths[spaceId] + : 250; + } + spaceWidth = PDFUnit.toFormX(spaceWidth) / 32; + return spaceWidth; + } + + // constructor + constructor(fontObj) { + this.fontObj = fontObj; + + this.typeName = this.#initTypeName(); + + const { subType, bold } = this.#initSubType(); + this.subType = subType; + this.bold = bold; + + this.isSymbol = this.#initSymbol(); + this.spaceWidth = this.#initSpaceWidth(); + + this.fontSize = 1; + this.faceIdx = 0; + this.italic = false; + this.fontStyleId = -1; + } + + /** sort text blocks by y then x */ + static compareBlockPos(t1, t2) { + if (t1.y < t2.y - DISTANCE_DELTA) { + return -1; + } + if (Math.abs(t1.y - t2.y) <= DISTANCE_DELTA) { + if (t1.x < t2.x - DISTANCE_DELTA) { return -1; - } - if (Math.abs(t1.y - t2.y) <= DISTANCE_DELTA) { - if (t1.x < t2.x - DISTANCE_DELTA) { - return -1; - } - if (Math.abs(t1.x - t2.x) <= DISTANCE_DELTA) { - return 0; - } - } - return 1; - } - - static haveSameStyle(t1, t2) { - let retVal = t1.R[0].S === t2.R[0].S; - if (retVal && t1.R[0].S < 0) { - for (let i = 0; i < t1.R[0].TS.length; i++) { - if (t1.R[0].TS[i] !== t2.R[0].TS[i]) { - retVal = false; - break; - } - } - } - if (retVal) { // make sure both block are not rotated - retVal = (typeof t1.R[0].RA === 'undefined') && (typeof t2.R[0].RA === 'undefined'); - } - - return retVal; - } - - static getSpaceThreshHold(t1) { - return (PDFFont.getFontSize(t1)/12) * t1.sw; - } - - static areAdjacentBlocks(t1, t2) { - const isInSameLine = Math.abs(t1.y - t2.y) <= DISTANCE_DELTA; - const isDistanceSmallerThanASpace = ((t2.x - t1.x - t1.w) < PDFFont.getSpaceThreshHold(t1)); - - return isInSameLine && isDistanceSmallerThanASpace; - } - - static getFontSize(textBlock) { - const sId = textBlock.R[0].S; - return (sId < 0) ? textBlock.R[0].TS[1] : kFontStyles[sId][1]; - } - - static areDuplicateBlocks(t1, t2) { - return t1.x == t2.x && t1.y == t2.y && t1.R[0].T == t2.R[0].T && PDFFont.haveSameStyle(t1, t2); - } - - // private - #setFaceIndex() { - const fontObj = this.fontObj; - - this.bold = fontObj.bold; - if (!this.bold) { - this.bold = this.typeName.indexOf("bold") >= 0 || this.typeName.indexOf("black") >= 0; - } - this.italic = fontObj.italic; // fix https://github.com/modesty/pdf2json/issues/42 - // Extended the fix for https://github.com/modesty/pdf2json/issues/42 - if (!this.italic) { - this.italic = this.typeName.indexOf("italic") >= 0 || this.typeName.indexOf("oblique") >= 0; - } - // Added detection of hybrid dual bolditalic fonts - if (((!this.bold) || (!this.italic)) && (this.typeName.indexOf("boldobl") >= 0)) { - this.bold = true; - this.italic = true; - } - - let typeName = this.subType; - if (fontObj.isSerifFont) { - if (kFontFaces[1].indexOf(typeName) >= 0) - this.faceIdx = 1; - } - else if (kFontFaces[2].indexOf(this.subType) >= 0) { - this.faceIdx = 2; - } - else if (fontObj.isMonospace) { - this.faceIdx = 3; - - if (kFontFaces[4].indexOf(typeName) >= 0) - this.faceIdx = 4; - else if (kFontFaces[5].indexOf(typeName) >= 0) - this.faceIdx = 5; - } - else if (fontObj.isSymbolicFont) { - this.faceIdx = 2; - } - - if (this.faceIdx == 0) { - if (this.typeName.indexOf("narrow") > 0) - this.faceIdx = 1; - } - -// nodeUtil.p2jinfo"typeName = " + typeName + " => faceIdx = " + this.faceIdx); - } - - #getFontStyleIndex(fontSize) { - this.#setFaceIndex(); - - //MQZ Feb.28.2013. Adjust bold text fontsize to work around word spacing issue - this.fontSize = (this.bold && (fontSize > 12)) ? fontSize + 1 : fontSize; - - let fsa = [this.faceIdx, this.fontSize, this.bold?1:0, this.italic?1:0]; - let retVal = -1; - - kFontStyles.forEach(function(element, index, list){ - if (retVal === -1) { - if (element[0] === fsa[0] && element[1] === fsa[1] && - element[2] === fsa[2] && element[3] === fsa[3]) { - retVal = index; - } - } - }); - - return retVal; - } - - #processSymbolicFont(str) { - let retVal = str; - - if (!str || str.length !== 1) - return retVal; - - if (!this.fontObj.isSymbolicFont || !this.isSymbol) { - if (retVal == "C" || retVal == "G") { //prevent symbolic encoding from the client - retVal = " " + retVal + " "; //sample: va_ind_760c - } - return retVal; - } - - switch(str.charCodeAt(0)) { - case 20: retVal = '\u2713'; break; //check mark - case 70: retVal = (this.fontObj.type === "CIDFontType0") ? '\u26A0' : '\u007D'; break; //exclaimation in triangle OR right curly bracket - case 71: retVal = '\u25b6'; break; //right triangle - case 97: retVal = '\u25b6'; break; //right triangle - case 99: retVal = this.isSymbol ? '\u2022' : '\u25b2'; break; //up triangle. set to Bullet Dot for VA SchSCR - case 100: retVal = '\u25bc'; break; //down triangle - case 103: retVal = '\u27A8'; break; //right arrow. sample: va_ind_760pff and pmt - case 106: retVal = ''; break; //VA 301: string j character by the checkbox, hide it for now - case 114: retVal = '\u2022'; break; //Bullet dot - case 115: retVal = '\u25b2'; break; //up triangle - case 116: retVal = '\u2022'; break; //Bullet dot - case 118: retVal = '\u2022'; break; //Bullet dot - default: - nodeUtil.p2jinfo(this.fontObj.type + " - SymbolicFont - (" + this.fontObj.name + ") : " + - str.charCodeAt(0) + "::" + str.charCodeAt(1) + " => " + retVal); - } - - return retVal; - } - - #textRotationAngle(matrix2D) { - let retVal = 0; - if (matrix2D[0][0] === 0 && matrix2D[1][1] === 0) { - if (matrix2D[0][1] != 0 && matrix2D[1][0] != 0) { - if ((matrix2D[0][1] / matrix2D[1][0]) + 1 < 0.0001) - retVal = 90; + } + if (Math.abs(t1.x - t2.x) <= DISTANCE_DELTA) { + return 0; + } + } + return 1; + } + + static haveSameStyle(t1, t2) { + let retVal = t1.R[0].S === t2.R[0].S; + if (retVal && t1.R[0].S < 0) { + for (let i = 0; i < t1.R[0].TS.length; i++) { + if (t1.R[0].TS[i] !== t2.R[0].TS[i]) { + retVal = false; + break; } - } - else if (matrix2D[0][0] !== 0 && matrix2D[1][1] !== 0) { - let r1 = Math.atan(-matrix2D[0][1] / matrix2D[0][0]); - let r2 = Math.atan(matrix2D[1][0] / matrix2D[1][1]); - if (Math.abs(r1) > 0.0001 && (r1 - r2 < 0.0001)) { - retVal = r1 * 180 / Math.PI; + } + } + if (retVal) { + // make sure both block are not rotated + retVal = + typeof t1.R[0].RA === 'undefined' && + typeof t2.R[0].RA === 'undefined'; + } + + return retVal; + } + + static getSpaceThreshHold(t1) { + return (PDFFont.getFontSize(t1) / 12) * t1.sw; + } + + static areAdjacentBlocks(t1, t2) { + const isInSameLine = Math.abs(t1.y - t2.y) <= DISTANCE_DELTA; + const isDistanceSmallerThanASpace = + t2.x - t1.x - t1.w < PDFFont.getSpaceThreshHold(t1); + + return isInSameLine && isDistanceSmallerThanASpace; + } + + static getFontSize(textBlock) { + const sId = textBlock.R[0].S; + return sId < 0 ? textBlock.R[0].TS[1] : kFontStyles[sId][1]; + } + + static areDuplicateBlocks(t1, t2) { + return ( + t1.x == t2.x && + t1.y == t2.y && + t1.R[0].T == t2.R[0].T && + PDFFont.haveSameStyle(t1, t2) + ); + } + + // private + #setFaceIndex() { + const fontObj = this.fontObj; + + this.bold = fontObj.bold; + if (!this.bold) { + this.bold = + this.typeName.indexOf('bold') >= 0 || + this.typeName.indexOf('black') >= 0; + } + this.italic = fontObj.italic; // fix https://github.com/modesty/pdf2json/issues/42 + // Extended the fix for https://github.com/modesty/pdf2json/issues/42 + if (!this.italic) { + this.italic = + this.typeName.indexOf('italic') >= 0 || + this.typeName.indexOf('oblique') >= 0; + } + // Added detection of hybrid dual bolditalic fonts + if ( + (!this.bold || !this.italic) && + this.typeName.indexOf('boldobl') >= 0 + ) { + this.bold = true; + this.italic = true; + } + + let typeName = this.subType; + if (fontObj.isSerifFont) { + if (kFontFaces[1].indexOf(typeName) >= 0) this.faceIdx = 1; + } else if (kFontFaces[2].indexOf(this.subType) >= 0) { + this.faceIdx = 2; + } else if (fontObj.isMonospace) { + this.faceIdx = 3; + + if (kFontFaces[4].indexOf(typeName) >= 0) this.faceIdx = 4; + else if (kFontFaces[5].indexOf(typeName) >= 0) this.faceIdx = 5; + } else if (fontObj.isSymbolicFont) { + this.faceIdx = 2; + } + + if (this.faceIdx == 0) { + if (this.typeName.indexOf('narrow') > 0) this.faceIdx = 1; + } + + // nodeUtil.p2jinfo"typeName = " + typeName + " => faceIdx = " + this.faceIdx); + } + + #getFontStyleIndex(fontSize) { + this.#setFaceIndex(); + + //MQZ Feb.28.2013. Adjust bold text fontsize to work around word spacing issue + this.fontSize = this.bold && fontSize > 12 ? fontSize + 1 : fontSize; + + let fsa = [ + this.faceIdx, + this.fontSize, + this.bold ? 1 : 0, + this.italic ? 1 : 0, + ]; + let retVal = -1; + + kFontStyles.forEach(function (element, index, list) { + if (retVal === -1) { + if ( + element[0] === fsa[0] && + element[1] === fsa[1] && + element[2] === fsa[2] && + element[3] === fsa[3] + ) { + retVal = index; } - } - return retVal; - } - - // public instance methods - processText(p, str, maxWidth, color, fontSize, targetData, matrix2D) { - const text = this.#processSymbolicFont(str); - if (!text) { - return; - } - this.fontStyleId = this.#getFontStyleIndex(fontSize); - - // when this.fontStyleId === -1, it means the text style doesn't match any entry in the dictionary - // adding TS to better describe text style [fontFaceId, fontSize, 1/0 for bold, 1/0 for italic]; - const TS = [this.faceIdx, this.fontSize, this.bold?1:0, this.italic?1:0]; - - const clrId = PDFUnit.findColorIndex(color); - const colorObj = (clrId >= 0 && clrId < PDFUnit.colorCount()) ? {clr: clrId} : {oc: color}; - - let textRun = { - T: this.flash_encode(text), - S: this.fontStyleId, - TS: TS - }; - const rAngle = this.#textRotationAngle(matrix2D); - if (rAngle != 0) { - nodeUtil.p2jinfo(str + ": rotated " + rAngle + " degree."); - textRun = {...textRun, RA: rAngle}; - } - - const oneText = {x: PDFUnit.toFormX(p.x) - 0.25, - y: PDFUnit.toFormY(p.y) - 0.75, - w: PDFUnit.toFixedFloat(maxWidth), - ...colorObj, //MQZ.07/29/2013: when color is not in color dictionary, set the original color (oc) - sw: this.spaceWidth, //font space width, use to merge adjacent text blocks - A: "left", - R: [textRun] - }; - - targetData.Texts.push(oneText); - } - - flash_encode(str) { - let retVal = encodeURIComponent(str); - retVal = retVal.replace("%C2%96", "-"); - retVal = retVal.replace("%C2%91", "%27"); - retVal = retVal.replace("%C2%92", "%27"); - retVal = retVal.replace("%C2%82", "%27"); - retVal = retVal.replace("%C2%93", "%22"); - retVal = retVal.replace("%C2%94", "%22"); - retVal = retVal.replace("%C2%84", "%22"); - retVal = retVal.replace("%C2%8B", "%C2%AB"); - retVal = retVal.replace("%C2%9B", "%C2%BB"); - - return retVal; - } - - clean() { - this.fontObj = null; - delete this.fontObj; - } -} \ No newline at end of file + } + }); + + return retVal; + } + + #processSymbolicFont(str) { + let retVal = str; + + if (!str || str.length !== 1) return retVal; + + if (!this.fontObj.isSymbolicFont || !this.isSymbol) { + if (retVal == 'C' || retVal == 'G') { + //prevent symbolic encoding from the client + retVal = ' ' + retVal + ' '; //sample: va_ind_760c + } + return retVal; + } + + switch (str.charCodeAt(0)) { + case 20: + retVal = '\u2713'; + break; //check mark + case 70: + retVal = this.fontObj.type === 'CIDFontType0' ? '\u26A0' : '\u007D'; + break; //exclaimation in triangle OR right curly bracket + case 71: + retVal = '\u25b6'; + break; //right triangle + case 97: + retVal = '\u25b6'; + break; //right triangle + case 99: + retVal = this.isSymbol ? '\u2022' : '\u25b2'; + break; //up triangle. set to Bullet Dot for VA SchSCR + case 100: + retVal = '\u25bc'; + break; //down triangle + case 103: + retVal = '\u27A8'; + break; //right arrow. sample: va_ind_760pff and pmt + case 106: + retVal = ''; + break; //VA 301: string j character by the checkbox, hide it for now + case 114: + retVal = '\u2022'; + break; //Bullet dot + case 115: + retVal = '\u25b2'; + break; //up triangle + case 116: + retVal = '\u2022'; + break; //Bullet dot + case 118: + retVal = '\u2022'; + break; //Bullet dot + default: + nodeUtil.p2jinfo( + this.fontObj.type + + ' - SymbolicFont - (' + + this.fontObj.name + + ') : ' + + str.charCodeAt(0) + + '::' + + str.charCodeAt(1) + + ' => ' + + retVal + ); + } + + return retVal; + } + + #textRotationAngle(matrix2D) { + let retVal = 0; + if (matrix2D[0][0] === 0 && matrix2D[1][1] === 0) { + if (matrix2D[0][1] != 0 && matrix2D[1][0] != 0) { + if (matrix2D[0][1] / matrix2D[1][0] + 1 < 0.0001) retVal = 90; + } + } else if (matrix2D[0][0] !== 0 && matrix2D[1][1] !== 0) { + let r1 = Math.atan(-matrix2D[0][1] / matrix2D[0][0]); + let r2 = Math.atan(matrix2D[1][0] / matrix2D[1][1]); + if (Math.abs(r1) > 0.0001 && r1 - r2 < 0.0001) { + retVal = (r1 * 180) / Math.PI; + } + } + return retVal; + } + + // public instance methods + processText(p, str, maxWidth, color, fontSize, targetData, matrix2D) { + const text = this.#processSymbolicFont(str); + if (!text) { + return; + } + this.fontStyleId = this.#getFontStyleIndex(fontSize); + + // when this.fontStyleId === -1, it means the text style doesn't match any entry in the dictionary + // adding TS to better describe text style [fontFaceId, fontSize, 1/0 for bold, 1/0 for italic]; + const TS = [ + this.faceIdx, + this.fontSize, + this.bold ? 1 : 0, + this.italic ? 1 : 0, + ]; + + const clrId = PDFUnit.findColorIndex(color); + const colorObj = + clrId >= 0 && clrId < PDFUnit.colorCount() + ? { clr: clrId } + : { oc: color }; + + let textRun = { + T: this.flash_encode(text), + S: this.fontStyleId, + TS: TS, + }; + const rAngle = this.#textRotationAngle(matrix2D); + if (rAngle != 0) { + nodeUtil.p2jinfo(str + ': rotated ' + rAngle + ' degree.'); + textRun = { ...textRun, RA: rAngle }; + } + + const oneText = { + x: PDFUnit.toFormX(p.x) - 0.25, + y: PDFUnit.toFormY(p.y) - 0.75, + w: PDFUnit.toFixedFloat(maxWidth), + ...colorObj, //MQZ.07/29/2013: when color is not in color dictionary, set the original color (oc) + sw: this.spaceWidth, //font space width, use to merge adjacent text blocks + A: 'left', + R: [textRun], + }; + + targetData.Texts.push(oneText); + } + + flash_encode(str) { + let retVal = encodeURIComponent(str); + retVal = retVal.replace('%C2%96', '-'); + retVal = retVal.replace('%C2%91', '%27'); + retVal = retVal.replace('%C2%92', '%27'); + retVal = retVal.replace('%C2%82', '%27'); + retVal = retVal.replace('%C2%93', '%22'); + retVal = retVal.replace('%C2%94', '%22'); + retVal = retVal.replace('%C2%84', '%22'); + retVal = retVal.replace('%C2%8B', '%C2%AB'); + retVal = retVal.replace('%C2%9B', '%C2%BB'); + + return retVal; + } + + clean() { + this.fontObj = null; + delete this.fontObj; + } +} diff --git a/pdfparser.cjs b/pdfparser.cjs index 974ee4ca..fde9066b 100644 --- a/pdfparser.cjs +++ b/pdfparser.cjs @@ -296,474 +296,552 @@ class PDFFill{ } } -const _boldSubNames = ["bd", "bold", "demi", "black"]; -const _stdFonts = ["arial", "helvetica", "sans-serif ", "courier ","monospace ", "ocr "]; +const _boldSubNames = ['bd', 'bold', 'demi', 'black']; +const _stdFonts = [ + 'arial', + 'helvetica', + 'sans-serif ', + 'courier ', + 'monospace ', + 'ocr ', +]; const DISTANCE_DELTA = 0.1; class PDFFont { - #initTypeName() { - let typeName = (this.fontObj.name || this.fontObj.fallbackName); - if (!typeName) { - typeName = kFontFaces[0]; //default font family name - } - typeName = typeName.toLowerCase(); - return typeName; - } + #initTypeName() { + let typeName = this.fontObj.name || this.fontObj.fallbackName; + if (!typeName) { + typeName = kFontFaces[0]; //default font family name + } + typeName = typeName.toLowerCase(); + return typeName; + } - #initSubType() { - let subType = this.typeName; - let bold = false; - - let nameArray = this.typeName.split('+'); - if (Array.isArray(nameArray) && nameArray.length > 1) { - subType = nameArray[1].split("-"); - if (Array.isArray(subType) && subType.length > 1) { - let subName = subType[1].toLowerCase(); - bold = _boldSubNames.indexOf(subName) >= 0; - subType = subType[0]; - } - } - return {subType, bold}; - } + #initSubType() { + let subType = this.typeName; + let bold = false; + + let nameArray = this.typeName.split('+'); + if (Array.isArray(nameArray) && nameArray.length > 1) { + subType = nameArray[1].split('-'); + if (Array.isArray(subType) && subType.length > 1) { + let subName = subType[1].toLowerCase(); + bold = _boldSubNames.indexOf(subName) >= 0; + subType = subType[0]; + } + } + return { subType, bold }; + } - #initSymbol() { - let isSymbol = this.typeName.indexOf("symbol") > 0 || kFontFaces[2].indexOf(this.subType) >= 0; - if (this.fontObj.isSymbolicFont) { - let mFonts = _stdFonts.filter( (oneName) => (this.typeName.indexOf(oneName) >= 0) ); + #initSymbol() { + let isSymbol = + this.typeName.indexOf('symbol') > 0 || + kFontFaces[2].indexOf(this.subType) >= 0; + if (this.fontObj.isSymbolicFont) { + let mFonts = _stdFonts.filter( + (oneName) => this.typeName.indexOf(oneName) >= 0 + ); + + if (mFonts.length > 0) { + this.fontObj.isSymbolicFont = false; //lots of Arial-based font is detected as symbol in VA forms (301, 76-c, etc.) reset the flag for now + nodeUtil.p2jinfo( + 'Reset: isSymbolicFont (false) for ' + this.fontObj.name + ); + } + } else { + if (isSymbol) { + this.fontObj.isSymbolicFont = true; //text pdf: va_ind_760c + nodeUtil.p2jinfo( + 'Reset: isSymbolicFont (true) for ' + this.fontObj.name + ); + } + } + return isSymbol; + } - if (mFonts.length > 0) { - this.fontObj.isSymbolicFont = false; //lots of Arial-based font is detected as symbol in VA forms (301, 76-c, etc.) reset the flag for now - nodeUtil.p2jinfo("Reset: isSymbolicFont (false) for " + this.fontObj.name); - } - } - else { - if (isSymbol) { - this.fontObj.isSymbolicFont = true; //text pdf: va_ind_760c - nodeUtil.p2jinfo("Reset: isSymbolicFont (true) for " + this.fontObj.name); - } - } - return isSymbol; - } + #initSpaceWidth() { + let spaceWidth = this.fontObj.spaceWidth; + if (!spaceWidth) { + var spaceId = Array.isArray(this.fontObj.toFontChar) + ? this.fontObj.toFontChar.indexOf(32) + : -1; + spaceWidth = + spaceId >= 0 && Array.isArray(this.fontObj.widths) + ? this.fontObj.widths[spaceId] + : 250; + } + spaceWidth = PDFUnit.toFormX(spaceWidth) / 32; + return spaceWidth; + } - #initSpaceWidth() { - let spaceWidth = this.fontObj.spaceWidth; - if (!spaceWidth) { - var spaceId = Array.isArray(this.fontObj.toFontChar) ? this.fontObj.toFontChar.indexOf(32) : -1; - spaceWidth = (spaceId >= 0 && Array.isArray(this.fontObj.widths)) ? this.fontObj.widths[spaceId] : 250; - } - spaceWidth = PDFUnit.toFormX(spaceWidth) / 32; - return spaceWidth; - } + // constructor + constructor(fontObj) { + this.fontObj = fontObj; - // constructor - constructor(fontObj) { - this.fontObj = fontObj; + this.typeName = this.#initTypeName(); - this.typeName = this.#initTypeName(); + const { subType, bold } = this.#initSubType(); + this.subType = subType; + this.bold = bold; - const {subType, bold} = this.#initSubType(); - this.subType = subType; - this.bold = bold; + this.isSymbol = this.#initSymbol(); + this.spaceWidth = this.#initSpaceWidth(); - this.isSymbol = this.#initSymbol(); - this.spaceWidth = this.#initSpaceWidth(); + this.fontSize = 1; + this.faceIdx = 0; + this.italic = false; + this.fontStyleId = -1; + } - this.fontSize = 1; - this.faceIdx = 0; - this.italic = false; - this.fontStyleId = -1; - } - - /** sort text blocks by y then x */ - static compareBlockPos(t1, t2) { - if (t1.y < t2.y - DISTANCE_DELTA) { + /** sort text blocks by y then x */ + static compareBlockPos(t1, t2) { + if (t1.y < t2.y - DISTANCE_DELTA) { + return -1; + } + if (Math.abs(t1.y - t2.y) <= DISTANCE_DELTA) { + if (t1.x < t2.x - DISTANCE_DELTA) { return -1; - } - if (Math.abs(t1.y - t2.y) <= DISTANCE_DELTA) { - if (t1.x < t2.x - DISTANCE_DELTA) { - return -1; - } - if (Math.abs(t1.x - t2.x) <= DISTANCE_DELTA) { - return 0; - } - } - return 1; - } + } + if (Math.abs(t1.x - t2.x) <= DISTANCE_DELTA) { + return 0; + } + } + return 1; + } - static haveSameStyle(t1, t2) { - let retVal = t1.R[0].S === t2.R[0].S; - if (retVal && t1.R[0].S < 0) { - for (let i = 0; i < t1.R[0].TS.length; i++) { - if (t1.R[0].TS[i] !== t2.R[0].TS[i]) { - retVal = false; - break; - } + static haveSameStyle(t1, t2) { + let retVal = t1.R[0].S === t2.R[0].S; + if (retVal && t1.R[0].S < 0) { + for (let i = 0; i < t1.R[0].TS.length; i++) { + if (t1.R[0].TS[i] !== t2.R[0].TS[i]) { + retVal = false; + break; } - } - if (retVal) { // make sure both block are not rotated - retVal = (typeof t1.R[0].RA === 'undefined') && (typeof t2.R[0].RA === 'undefined'); - } - - return retVal; - } - - static getSpaceThreshHold(t1) { - return (PDFFont.getFontSize(t1)/12) * t1.sw; - } - - static areAdjacentBlocks(t1, t2) { - const isInSameLine = Math.abs(t1.y - t2.y) <= DISTANCE_DELTA; - const isDistanceSmallerThanASpace = ((t2.x - t1.x - t1.w) < PDFFont.getSpaceThreshHold(t1)); - - return isInSameLine && isDistanceSmallerThanASpace; - } + } + } + if (retVal) { + // make sure both block are not rotated + retVal = + typeof t1.R[0].RA === 'undefined' && + typeof t2.R[0].RA === 'undefined'; + } - static getFontSize(textBlock) { - const sId = textBlock.R[0].S; - return (sId < 0) ? textBlock.R[0].TS[1] : kFontStyles[sId][1]; - } + return retVal; + } - static areDuplicateBlocks(t1, t2) { - return t1.x == t2.x && t1.y == t2.y && t1.R[0].T == t2.R[0].T && PDFFont.haveSameStyle(t1, t2); - } + static getSpaceThreshHold(t1) { + return (PDFFont.getFontSize(t1) / 12) * t1.sw; + } - // private - #setFaceIndex() { - const fontObj = this.fontObj; + static areAdjacentBlocks(t1, t2) { + const isInSameLine = Math.abs(t1.y - t2.y) <= DISTANCE_DELTA; + const isDistanceSmallerThanASpace = + t2.x - t1.x - t1.w < PDFFont.getSpaceThreshHold(t1); - this.bold = fontObj.bold; - if (!this.bold) { - this.bold = this.typeName.indexOf("bold") >= 0 || this.typeName.indexOf("black") >= 0; - } - this.italic = fontObj.italic; // fix https://github.com/modesty/pdf2json/issues/42 - // Extended the fix for https://github.com/modesty/pdf2json/issues/42 - if (!this.italic) { - this.italic = this.typeName.indexOf("italic") >= 0 || this.typeName.indexOf("oblique") >= 0; - } - // Added detection of hybrid dual bolditalic fonts - if (((!this.bold) || (!this.italic)) && (this.typeName.indexOf("boldobl") >= 0)) { - this.bold = true; - this.italic = true; - } + return isInSameLine && isDistanceSmallerThanASpace; + } - let typeName = this.subType; - if (fontObj.isSerifFont) { - if (kFontFaces[1].indexOf(typeName) >= 0) - this.faceIdx = 1; - } - else if (kFontFaces[2].indexOf(this.subType) >= 0) { - this.faceIdx = 2; - } - else if (fontObj.isMonospace) { - this.faceIdx = 3; + static getFontSize(textBlock) { + const sId = textBlock.R[0].S; + return sId < 0 ? textBlock.R[0].TS[1] : kFontStyles[sId][1]; + } - if (kFontFaces[4].indexOf(typeName) >= 0) - this.faceIdx = 4; - else if (kFontFaces[5].indexOf(typeName) >= 0) - this.faceIdx = 5; - } - else if (fontObj.isSymbolicFont) { - this.faceIdx = 2; - } + static areDuplicateBlocks(t1, t2) { + return ( + t1.x == t2.x && + t1.y == t2.y && + t1.R[0].T == t2.R[0].T && + PDFFont.haveSameStyle(t1, t2) + ); + } - if (this.faceIdx == 0) { - if (this.typeName.indexOf("narrow") > 0) - this.faceIdx = 1; - } + // private + #setFaceIndex() { + const fontObj = this.fontObj; -// nodeUtil.p2jinfo"typeName = " + typeName + " => faceIdx = " + this.faceIdx); - } + this.bold = fontObj.bold; + if (!this.bold) { + this.bold = + this.typeName.indexOf('bold') >= 0 || + this.typeName.indexOf('black') >= 0; + } + this.italic = fontObj.italic; // fix https://github.com/modesty/pdf2json/issues/42 + // Extended the fix for https://github.com/modesty/pdf2json/issues/42 + if (!this.italic) { + this.italic = + this.typeName.indexOf('italic') >= 0 || + this.typeName.indexOf('oblique') >= 0; + } + // Added detection of hybrid dual bolditalic fonts + if ( + (!this.bold || !this.italic) && + this.typeName.indexOf('boldobl') >= 0 + ) { + this.bold = true; + this.italic = true; + } - #getFontStyleIndex(fontSize) { - this.#setFaceIndex(); + let typeName = this.subType; + if (fontObj.isSerifFont) { + if (kFontFaces[1].indexOf(typeName) >= 0) this.faceIdx = 1; + } else if (kFontFaces[2].indexOf(this.subType) >= 0) { + this.faceIdx = 2; + } else if (fontObj.isMonospace) { + this.faceIdx = 3; + + if (kFontFaces[4].indexOf(typeName) >= 0) this.faceIdx = 4; + else if (kFontFaces[5].indexOf(typeName) >= 0) this.faceIdx = 5; + } else if (fontObj.isSymbolicFont) { + this.faceIdx = 2; + } - //MQZ Feb.28.2013. Adjust bold text fontsize to work around word spacing issue - this.fontSize = (this.bold && (fontSize > 12)) ? fontSize + 1 : fontSize; + if (this.faceIdx == 0) { + if (this.typeName.indexOf('narrow') > 0) this.faceIdx = 1; + } - let fsa = [this.faceIdx, this.fontSize, this.bold?1:0, this.italic?1:0]; - let retVal = -1; + // nodeUtil.p2jinfo"typeName = " + typeName + " => faceIdx = " + this.faceIdx); + } - kFontStyles.forEach(function(element, index, list){ - if (retVal === -1) { - if (element[0] === fsa[0] && element[1] === fsa[1] && - element[2] === fsa[2] && element[3] === fsa[3]) { - retVal = index; - } + #getFontStyleIndex(fontSize) { + this.#setFaceIndex(); + + //MQZ Feb.28.2013. Adjust bold text fontsize to work around word spacing issue + this.fontSize = this.bold && fontSize > 12 ? fontSize + 1 : fontSize; + + let fsa = [ + this.faceIdx, + this.fontSize, + this.bold ? 1 : 0, + this.italic ? 1 : 0, + ]; + let retVal = -1; + + kFontStyles.forEach(function (element, index, list) { + if (retVal === -1) { + if ( + element[0] === fsa[0] && + element[1] === fsa[1] && + element[2] === fsa[2] && + element[3] === fsa[3] + ) { + retVal = index; } - }); - - return retVal; - } + } + }); - #processSymbolicFont(str) { - let retVal = str; + return retVal; + } - if (!str || str.length !== 1) - return retVal; + #processSymbolicFont(str) { + let retVal = str; - if (!this.fontObj.isSymbolicFont || !this.isSymbol) { - if (retVal == "C" || retVal == "G") { //prevent symbolic encoding from the client - retVal = " " + retVal + " "; //sample: va_ind_760c - } - return retVal; - } - - switch(str.charCodeAt(0)) { - case 20: retVal = '\u2713'; break; //check mark - case 70: retVal = (this.fontObj.type === "CIDFontType0") ? '\u26A0' : '\u007D'; break; //exclaimation in triangle OR right curly bracket - case 71: retVal = '\u25b6'; break; //right triangle - case 97: retVal = '\u25b6'; break; //right triangle - case 99: retVal = this.isSymbol ? '\u2022' : '\u25b2'; break; //up triangle. set to Bullet Dot for VA SchSCR - case 100: retVal = '\u25bc'; break; //down triangle - case 103: retVal = '\u27A8'; break; //right arrow. sample: va_ind_760pff and pmt - case 106: retVal = ''; break; //VA 301: string j character by the checkbox, hide it for now - case 114: retVal = '\u2022'; break; //Bullet dot - case 115: retVal = '\u25b2'; break; //up triangle - case 116: retVal = '\u2022'; break; //Bullet dot - case 118: retVal = '\u2022'; break; //Bullet dot - default: - nodeUtil.p2jinfo(this.fontObj.type + " - SymbolicFont - (" + this.fontObj.name + ") : " + - str.charCodeAt(0) + "::" + str.charCodeAt(1) + " => " + retVal); - } + if (!str || str.length !== 1) return retVal; - return retVal; - } + if (!this.fontObj.isSymbolicFont || !this.isSymbol) { + if (retVal == 'C' || retVal == 'G') { + //prevent symbolic encoding from the client + retVal = ' ' + retVal + ' '; //sample: va_ind_760c + } + return retVal; + } - #textRotationAngle(matrix2D) { - let retVal = 0; - if (matrix2D[0][0] === 0 && matrix2D[1][1] === 0) { - if (matrix2D[0][1] != 0 && matrix2D[1][0] != 0) { - if ((matrix2D[0][1] / matrix2D[1][0]) + 1 < 0.0001) - retVal = 90; - } - } - else if (matrix2D[0][0] !== 0 && matrix2D[1][1] !== 0) { - let r1 = Math.atan(-matrix2D[0][1] / matrix2D[0][0]); - let r2 = Math.atan(matrix2D[1][0] / matrix2D[1][1]); - if (Math.abs(r1) > 0.0001 && (r1 - r2 < 0.0001)) { - retVal = r1 * 180 / Math.PI; - } - } - return retVal; - } + switch (str.charCodeAt(0)) { + case 20: + retVal = '\u2713'; + break; //check mark + case 70: + retVal = this.fontObj.type === 'CIDFontType0' ? '\u26A0' : '\u007D'; + break; //exclaimation in triangle OR right curly bracket + case 71: + retVal = '\u25b6'; + break; //right triangle + case 97: + retVal = '\u25b6'; + break; //right triangle + case 99: + retVal = this.isSymbol ? '\u2022' : '\u25b2'; + break; //up triangle. set to Bullet Dot for VA SchSCR + case 100: + retVal = '\u25bc'; + break; //down triangle + case 103: + retVal = '\u27A8'; + break; //right arrow. sample: va_ind_760pff and pmt + case 106: + retVal = ''; + break; //VA 301: string j character by the checkbox, hide it for now + case 114: + retVal = '\u2022'; + break; //Bullet dot + case 115: + retVal = '\u25b2'; + break; //up triangle + case 116: + retVal = '\u2022'; + break; //Bullet dot + case 118: + retVal = '\u2022'; + break; //Bullet dot + default: + nodeUtil.p2jinfo( + this.fontObj.type + + ' - SymbolicFont - (' + + this.fontObj.name + + ') : ' + + str.charCodeAt(0) + + '::' + + str.charCodeAt(1) + + ' => ' + + retVal + ); + } - // public instance methods - processText(p, str, maxWidth, color, fontSize, targetData, matrix2D) { - const text = this.#processSymbolicFont(str); - if (!text) { - return; - } - this.fontStyleId = this.#getFontStyleIndex(fontSize); + return retVal; + } - // when this.fontStyleId === -1, it means the text style doesn't match any entry in the dictionary - // adding TS to better describe text style [fontFaceId, fontSize, 1/0 for bold, 1/0 for italic]; - const TS = [this.faceIdx, this.fontSize, this.bold?1:0, this.italic?1:0]; + #textRotationAngle(matrix2D) { + let retVal = 0; + if (matrix2D[0][0] === 0 && matrix2D[1][1] === 0) { + if (matrix2D[0][1] != 0 && matrix2D[1][0] != 0) { + if (matrix2D[0][1] / matrix2D[1][0] + 1 < 0.0001) retVal = 90; + } + } else if (matrix2D[0][0] !== 0 && matrix2D[1][1] !== 0) { + let r1 = Math.atan(-matrix2D[0][1] / matrix2D[0][0]); + let r2 = Math.atan(matrix2D[1][0] / matrix2D[1][1]); + if (Math.abs(r1) > 0.0001 && r1 - r2 < 0.0001) { + retVal = (r1 * 180) / Math.PI; + } + } + return retVal; + } - const clrId = PDFUnit.findColorIndex(color); - const colorObj = (clrId >= 0 && clrId < PDFUnit.colorCount()) ? {clr: clrId} : {oc: color}; - - let textRun = { - T: this.flash_encode(text), - S: this.fontStyleId, - TS: TS - }; - const rAngle = this.#textRotationAngle(matrix2D); - if (rAngle != 0) { - nodeUtil.p2jinfo(str + ": rotated " + rAngle + " degree."); - textRun = {...textRun, RA: rAngle}; - } + // public instance methods + processText(p, str, maxWidth, color, fontSize, targetData, matrix2D) { + const text = this.#processSymbolicFont(str); + if (!text) { + return; + } + this.fontStyleId = this.#getFontStyleIndex(fontSize); + + // when this.fontStyleId === -1, it means the text style doesn't match any entry in the dictionary + // adding TS to better describe text style [fontFaceId, fontSize, 1/0 for bold, 1/0 for italic]; + const TS = [ + this.faceIdx, + this.fontSize, + this.bold ? 1 : 0, + this.italic ? 1 : 0, + ]; + + const clrId = PDFUnit.findColorIndex(color); + const colorObj = + clrId >= 0 && clrId < PDFUnit.colorCount() + ? { clr: clrId } + : { oc: color }; + + let textRun = { + T: this.flash_encode(text), + S: this.fontStyleId, + TS: TS, + }; + const rAngle = this.#textRotationAngle(matrix2D); + if (rAngle != 0) { + nodeUtil.p2jinfo(str + ': rotated ' + rAngle + ' degree.'); + textRun = { ...textRun, RA: rAngle }; + } - const oneText = {x: PDFUnit.toFormX(p.x) - 0.25, - y: PDFUnit.toFormY(p.y) - 0.75, - w: PDFUnit.toFixedFloat(maxWidth), - ...colorObj, //MQZ.07/29/2013: when color is not in color dictionary, set the original color (oc) - sw: this.spaceWidth, //font space width, use to merge adjacent text blocks - A: "left", - R: [textRun] - }; + const oneText = { + x: PDFUnit.toFormX(p.x) - 0.25, + y: PDFUnit.toFormY(p.y) - 0.75, + w: PDFUnit.toFixedFloat(maxWidth), + ...colorObj, //MQZ.07/29/2013: when color is not in color dictionary, set the original color (oc) + sw: this.spaceWidth, //font space width, use to merge adjacent text blocks + A: 'left', + R: [textRun], + }; - targetData.Texts.push(oneText); - } + targetData.Texts.push(oneText); + } - flash_encode(str) { - let retVal = encodeURIComponent(str); - retVal = retVal.replace("%C2%96", "-"); - retVal = retVal.replace("%C2%91", "%27"); - retVal = retVal.replace("%C2%92", "%27"); - retVal = retVal.replace("%C2%82", "%27"); - retVal = retVal.replace("%C2%93", "%22"); - retVal = retVal.replace("%C2%94", "%22"); - retVal = retVal.replace("%C2%84", "%22"); - retVal = retVal.replace("%C2%8B", "%C2%AB"); - retVal = retVal.replace("%C2%9B", "%C2%BB"); + flash_encode(str) { + let retVal = encodeURIComponent(str); + retVal = retVal.replace('%C2%96', '-'); + retVal = retVal.replace('%C2%91', '%27'); + retVal = retVal.replace('%C2%92', '%27'); + retVal = retVal.replace('%C2%82', '%27'); + retVal = retVal.replace('%C2%93', '%22'); + retVal = retVal.replace('%C2%94', '%22'); + retVal = retVal.replace('%C2%84', '%22'); + retVal = retVal.replace('%C2%8B', '%C2%AB'); + retVal = retVal.replace('%C2%9B', '%C2%BB'); - return retVal; - } + return retVal; + } - clean() { - this.fontObj = null; - delete this.fontObj; - } + clean() { + this.fontObj = null; + delete this.fontObj; + } } // alias some functions to make (compiled) code shorter -const {round: mr, sin: ms, cos: mc, abs, sqrt} = Math; +const { round: mr, sin: ms, cos: mc, abs, sqrt } = Math; // precompute "00" to "FF" const dec2hex = []; for (let i = 0; i < 16; i++) { - for (let j = 0; j < 16; j++) { - dec2hex[i * 16 + j] = i.toString(16) + j.toString(16); - } + for (let j = 0; j < 16; j++) { + dec2hex[i * 16 + j] = i.toString(16) + j.toString(16); + } } function createMatrixIdentity() { - return [ - [1, 0, 0], - [0, 1, 0], - [0, 0, 1] - ]; + return [ + [1, 0, 0], + [0, 1, 0], + [0, 0, 1], + ]; } function matrixMultiply(m1, m2) { - let result = createMatrixIdentity(); + let result = createMatrixIdentity(); - for (let x = 0; x < 3; x++) { - for (let y = 0; y < 3; y++) { - let sum = 0; + for (let x = 0; x < 3; x++) { + for (let y = 0; y < 3; y++) { + let sum = 0; - for (let z = 0; z < 3; z++) { - sum += m1[x][z] * m2[z][y]; - } + for (let z = 0; z < 3; z++) { + sum += m1[x][z] * m2[z][y]; + } - result[x][y] = sum; - } - } - return result; + result[x][y] = sum; + } + } + return result; } function copyState(o1, o2) { - o2.fillStyle = o1.fillStyle; - o2.lineCap = o1.lineCap; - o2.lineJoin = o1.lineJoin; - o2.lineWidth = o1.lineWidth; - o2.miterLimit = o1.miterLimit; - o2.shadowBlur = o1.shadowBlur; - o2.shadowColor = o1.shadowColor; - o2.shadowOffsetX = o1.shadowOffsetX; - o2.shadowOffsetY = o1.shadowOffsetY; - o2.strokeStyle = o1.strokeStyle; - o2.globalAlpha = o1.globalAlpha; - o2.arcScaleX_ = o1.arcScaleX_; - o2.arcScaleY_ = o1.arcScaleY_; - o2.lineScale_ = o1.lineScale_; - o2.dashArray = o1.dashArray; + o2.fillStyle = o1.fillStyle; + o2.lineCap = o1.lineCap; + o2.lineJoin = o1.lineJoin; + o2.lineWidth = o1.lineWidth; + o2.miterLimit = o1.miterLimit; + o2.shadowBlur = o1.shadowBlur; + o2.shadowColor = o1.shadowColor; + o2.shadowOffsetX = o1.shadowOffsetX; + o2.shadowOffsetY = o1.shadowOffsetY; + o2.strokeStyle = o1.strokeStyle; + o2.globalAlpha = o1.globalAlpha; + o2.arcScaleX_ = o1.arcScaleX_; + o2.arcScaleY_ = o1.arcScaleY_; + o2.lineScale_ = o1.lineScale_; + o2.dashArray = o1.dashArray; } function processStyle(styleString) { - let str, alpha = 1; - - styleString = String(styleString); - if (styleString.substring(0, 3) == 'rgb') { - let start = styleString.indexOf('(', 3); - let end = styleString.indexOf(')', start + 1); - let guts = styleString.substring(start + 1, end).split(','); - - str = '#'; - for (let i = 0; i < 3; i++) { - str += dec2hex[Number(guts[i])]; - } + let str, + alpha = 1; + + styleString = String(styleString); + if (styleString.substring(0, 3) == 'rgb') { + let start = styleString.indexOf('(', 3); + let end = styleString.indexOf(')', start + 1); + let guts = styleString.substring(start + 1, end).split(','); + + str = '#'; + for (let i = 0; i < 3; i++) { + str += dec2hex[Number(guts[i])]; + } - if (guts.length == 4 && styleString.substring(3, 4) == 'a') { - alpha = guts[3]; - } - } else { - str = styleString; - } + if (guts.length == 4 && styleString.substring(3, 4) == 'a') { + alpha = guts[3]; + } + } else { + str = styleString; + } - return {color:str, alpha:alpha}; + return { color: str, alpha: alpha }; } function processLineCap(lineCap) { - switch (lineCap) { - case 'butt': - return 'flat'; - case 'round': - return 'round'; - case 'square': - default: - return 'square'; - } + switch (lineCap) { + case 'butt': + return 'flat'; + case 'round': + return 'round'; + case 'square': + default: + return 'square'; + } } // Helper function that takes the already fixed cordinates. function bezierCurveToHelper(self, cp1, cp2, p) { - self.currentPath_.push({ - type:'bezierCurveTo', - cp1x:cp1.x, - cp1y:cp1.y, - cp2x:cp2.x, - cp2y:cp2.y, - x:p.x, - y:p.y - }); - self.currentX_ = p.x; - self.currentY_ = p.y; + self.currentPath_.push({ + type: 'bezierCurveTo', + cp1x: cp1.x, + cp1y: cp1.y, + cp2x: cp2.x, + cp2y: cp2.y, + x: p.x, + y: p.y, + }); + self.currentX_ = p.x; + self.currentY_ = p.y; } function matrixIsFinite(m) { - for (let j = 0; j < 3; j++) { - for (let k = 0; k < 2; k++) { - if (!isFinite(m[j][k]) || isNaN(m[j][k])) { - return false; - } - } - } - return true; + for (let j = 0; j < 3; j++) { + for (let k = 0; k < 2; k++) { + if (!isFinite(m[j][k]) || isNaN(m[j][k])) { + return false; + } + } + } + return true; } function setM(ctx, m, updateLineScale) { - if (!matrixIsFinite(m)) { - return; - } - ctx.m_ = m; - - if (updateLineScale) { - // Get the line scale. - // Determinant of this.m_ means how much the area is enlarged by the - // transformation. So its square root can be used as a scale factor - // for width. - let det = m[0][0] * m[1][1] - m[0][1] * m[1][0]; - ctx.lineScale_ = sqrt(abs(det)); - } + if (!matrixIsFinite(m)) { + return; + } + ctx.m_ = m; + + if (updateLineScale) { + // Get the line scale. + // Determinant of this.m_ means how much the area is enlarged by the + // transformation. So its square root can be used as a scale factor + // for width. + let det = m[0][0] * m[1][1] - m[0][1] * m[1][0]; + ctx.lineScale_ = sqrt(abs(det)); + } } class CanvasPattern_ { - constructor() { - } + constructor() {} } // Gradient / Pattern Stubs class CanvasGradient_ { - constructor(aType) { - this.type_ = aType; - this.x0_ = 0; - this.y0_ = 0; - this.r0_ = 0; - this.x1_ = 0; - this.y1_ = 0; - this.r1_ = 0; - this.colors_ = []; - } - addColorStop(aOffset, aColor) { - aColor = processStyle(aColor); - this.colors_.push({offset:aOffset, - color:aColor.color, - alpha:aColor.alpha}); - } + constructor(aType) { + this.type_ = aType; + this.x0_ = 0; + this.y0_ = 0; + this.r0_ = 0; + this.x1_ = 0; + this.y1_ = 0; + this.r1_ = 0; + this.colors_ = []; + } + addColorStop(aOffset, aColor) { + aColor = processStyle(aColor); + this.colors_.push({ + offset: aOffset, + color: aColor.color, + alpha: aColor.alpha, + }); + } } - /** * This class implements CanvasRenderingContext2D interface as described by * the WHATWG. @@ -771,433 +849,464 @@ class CanvasGradient_ { * be associated with */ class CanvasRenderingContext2D_ { - constructor(canvasTarget, scaledWidth, scaledHeight) { - this.m_ = createMatrixIdentity(); - - this.mStack_ = []; - this.aStack_ = []; - this.currentPath_ = []; - - // Canvas context properties - this.strokeStyle = '#000'; - this.fillStyle = '#000'; - - this.lineWidth = 1; - this.lineJoin = 'miter'; - this.lineCap = 'butt'; - this.dashArray = []; - this.miterLimit = 1; - this.globalAlpha = 1; - - if (!("HLines" in canvasTarget) || !Array.isArray(canvasTarget.HLines)) - canvasTarget.HLines = []; - if (!("VLines" in canvasTarget) || !Array.isArray(canvasTarget.VLines)) - canvasTarget.VLines = []; - if (!("Fills" in canvasTarget) || !Array.isArray(canvasTarget.Fills)) - canvasTarget.Fills = []; - if (!("Texts" in canvasTarget) || !Array.isArray(canvasTarget.Texts)) - canvasTarget.Texts = []; - - this.canvas = canvasTarget; - - this.width = scaledWidth; - this.height = scaledHeight; - - this.arcScaleX_ = 1; - this.arcScaleY_ = 1; - this.lineScale_ = 1; - - this.currentFont = null; - } - - //private helper methods - #drawPDFLine(p1, p2, lineWidth, color) { - let dashedLine = Array.isArray(this.dashArray) && (this.dashArray.length > 1); - let pL = new PDFLine(p1.x, p1.y, p2.x, p2.y, lineWidth, color, dashedLine); - pL.processLine(this.canvas); - } + constructor(canvasTarget, scaledWidth, scaledHeight) { + this.m_ = createMatrixIdentity(); + + this.mStack_ = []; + this.aStack_ = []; + this.currentPath_ = []; + + // Canvas context properties + this.strokeStyle = '#000'; + this.fillStyle = '#000'; + + this.lineWidth = 1; + this.lineJoin = 'miter'; + this.lineCap = 'butt'; + this.dashArray = []; + this.miterLimit = 1; + this.globalAlpha = 1; + + if (!('HLines' in canvasTarget) || !Array.isArray(canvasTarget.HLines)) + canvasTarget.HLines = []; + if (!('VLines' in canvasTarget) || !Array.isArray(canvasTarget.VLines)) + canvasTarget.VLines = []; + if (!('Fills' in canvasTarget) || !Array.isArray(canvasTarget.Fills)) + canvasTarget.Fills = []; + if (!('Texts' in canvasTarget) || !Array.isArray(canvasTarget.Texts)) + canvasTarget.Texts = []; + + this.canvas = canvasTarget; + + this.width = scaledWidth; + this.height = scaledHeight; + + this.arcScaleX_ = 1; + this.arcScaleY_ = 1; + this.lineScale_ = 1; + + this.currentFont = null; + } - #drawPDFFill(cp, min, max, color) { - let width = max.x - min.x; - let height = max.y - min.y; - let pF = new PDFFill(cp.x, cp.y, width, height, color); - pF.processFill(this.canvas); - } + //private helper methods + #drawPDFLine(p1, p2, lineWidth, color) { + let dashedLine = + Array.isArray(this.dashArray) && this.dashArray.length > 1; + let pL = new PDFLine( + p1.x, + p1.y, + p2.x, + p2.y, + lineWidth, + color, + dashedLine + ); + pL.processLine(this.canvas); + } - #needRemoveRect(x, y, w, h) { - let retVal = (Math.abs(w - Math.abs(h)) < 1 && w < 13); - if (retVal) { - nodeUtil.p2jinfo("Skipped: tiny rect: w=" + w + ", h=" + h); - } - return retVal; - } + #drawPDFFill(cp, min, max, color) { + let width = max.x - min.x; + let height = max.y - min.y; + let pF = new PDFFill(cp.x, cp.y, width, height, color); + pF.processFill(this.canvas); + } - getContext(ctxType) { - return (ctxType === "2d") ? this : null; - } + #needRemoveRect(x, y, w, h) { + let retVal = Math.abs(w - Math.abs(h)) < 1 && w < 13; + if (retVal) { + nodeUtil.p2jinfo('Skipped: tiny rect: w=' + w + ', h=' + h); + } + return retVal; + } - setLineDash(lineDash) { - this.dashArray = lineDash; - } + getContext(ctxType) { + return ctxType === '2d' ? this : null; + } - getLineDash() { - return this.dashArray; - } + setLineDash(lineDash) { + this.dashArray = lineDash; + } - fillText(text, x, y, maxWidth, fontSize) { - if (!text || text.trim().length < 1) - return; - let p = this.getCoords_(x, y); + getLineDash() { + return this.dashArray; + } - let a = processStyle(this.fillStyle || this.strokeStyle); - let color = (!!a) ? a.color : '#000000'; + fillText(text, x, y, maxWidth, fontSize) { + if (!text || (!text.length === 1 && text.trim().length < 1)) return; + let p = this.getCoords_(x, y); + + let a = processStyle(this.fillStyle || this.strokeStyle); + let color = !!a ? a.color : '#000000'; + + this.currentFont.processText( + p, + text, + maxWidth, + color, + fontSize, + this.canvas, + this.m_ + ); + } - this.currentFont.processText(p, text, maxWidth, color, fontSize, this.canvas, this.m_); - }; + strokeText(text, x, y, maxWidth) { + //MQZ. 10/23/2012, yeah, no hollow text for now + this.fillText(text, x, y, maxWidth); + } - strokeText(text, x, y, maxWidth) { - //MQZ. 10/23/2012, yeah, no hollow text for now - this.fillText(text, x, y, maxWidth); - } + measureText(text) { + console.warn('to be implemented: contextPrototype.measureText - ', text); + let chars = text.length || 1; + return { width: chars * (this.currentFont.spaceWidth || 5) }; + } - measureText(text) { - console.warn("to be implemented: contextPrototype.measureText - ", text); - let chars = text.length || 1; - return {width: chars * (this.currentFont.spaceWidth || 5)}; - } + setFont(fontObj) { + if (!!this.currentFont && typeof this.currentFont.clean === 'function') { + this.currentFont.clean(); + this.currentFont = null; + } - setFont(fontObj) { - if ((!!this.currentFont) && typeof(this.currentFont.clean) === "function") { - this.currentFont.clean(); - this.currentFont = null; - } + this.currentFont = new PDFFont(fontObj); + } - this.currentFont = new PDFFont(fontObj); - } + clearRect() { + console.warn('to be implemented: contextPrototype.clearRect'); + } - clearRect() { - console.warn("to be implemented: contextPrototype.clearRect"); - } + beginPath() { + // TODO: Branch current matrix so that save/restore has no effect + // as per safari docs. + this.currentPath_ = []; + } - beginPath() { - // TODO: Branch current matrix so that save/restore has no effect - // as per safari docs. - this.currentPath_ = []; - } + moveTo(aX, aY) { + let p = this.getCoords_(aX, aY); + this.currentPath_.push({ type: 'moveTo', x: p.x, y: p.y }); + this.currentX_ = p.x; + this.currentY_ = p.y; + } - moveTo(aX, aY) { - let p = this.getCoords_(aX, aY); - this.currentPath_.push({type:'moveTo', x:p.x, y:p.y}); - this.currentX_ = p.x; - this.currentY_ = p.y; - } + lineTo(aX, aY) { + let p = this.getCoords_(aX, aY); + this.currentPath_.push({ type: 'lineTo', x: p.x, y: p.y }); - lineTo(aX, aY) { - let p = this.getCoords_(aX, aY); - this.currentPath_.push({type:'lineTo', x:p.x, y:p.y}); + this.currentX_ = p.x; + this.currentY_ = p.y; + } - this.currentX_ = p.x; - this.currentY_ = p.y; - } + bezierCurveTo(aCP1x, aCP1y, aCP2x, aCP2y, aX, aY) { + let p = this.getCoords_(aX, aY); + let cp1 = this.getCoords_(aCP1x, aCP1y); + let cp2 = this.getCoords_(aCP2x, aCP2y); + bezierCurveToHelper(this, cp1, cp2, p); + } - bezierCurveTo(aCP1x, aCP1y, aCP2x, aCP2y, aX, aY) { - let p = this.getCoords_(aX, aY); - let cp1 = this.getCoords_(aCP1x, aCP1y); - let cp2 = this.getCoords_(aCP2x, aCP2y); - bezierCurveToHelper(this, cp1, cp2, p); - } + quadraticCurveTo(aCPx, aCPy, aX, aY) { + // the following is lifted almost directly from + // http://developer.mozilla.org/en/docs/Canvas_tutorial:Drawing_shapes - quadraticCurveTo(aCPx, aCPy, aX, aY) { - // the following is lifted almost directly from - // http://developer.mozilla.org/en/docs/Canvas_tutorial:Drawing_shapes + let cp = this.getCoords_(aCPx, aCPy); + let p = this.getCoords_(aX, aY); - let cp = this.getCoords_(aCPx, aCPy); - let p = this.getCoords_(aX, aY); + let cp1 = { + x: this.currentX_ + (2.0 / 3.0) * (cp.x - this.currentX_), + y: this.currentY_ + (2.0 / 3.0) * (cp.y - this.currentY_), + }; + let cp2 = { + x: cp1.x + (p.x - this.currentX_) / 3.0, + y: cp1.y + (p.y - this.currentY_) / 3.0, + }; - let cp1 = { - x:this.currentX_ + 2.0 / 3.0 * (cp.x - this.currentX_), - y:this.currentY_ + 2.0 / 3.0 * (cp.y - this.currentY_) - }; - let cp2 = { - x:cp1.x + (p.x - this.currentX_) / 3.0, - y:cp1.y + (p.y - this.currentY_) / 3.0 - }; + bezierCurveToHelper(this, cp1, cp2, p); + } - bezierCurveToHelper(this, cp1, cp2, p); - } + arc(aX, aY, aRadius, aStartAngle, aEndAngle, aClockwise) { + let arcType = aClockwise ? 'at' : 'wa'; - arc(aX, aY, aRadius, aStartAngle, aEndAngle, aClockwise) { - let arcType = aClockwise ? 'at' : 'wa'; + let xStart = aX + mc(aStartAngle) * aRadius; + let yStart = aY + ms(aStartAngle) * aRadius; - let xStart = aX + mc(aStartAngle) * aRadius; - let yStart = aY + ms(aStartAngle) * aRadius; + let xEnd = aX + mc(aEndAngle) * aRadius; + let yEnd = aY + ms(aEndAngle) * aRadius; - let xEnd = aX + mc(aEndAngle) * aRadius; - let yEnd = aY + ms(aEndAngle) * aRadius; + // IE won't render arches drawn counter clockwise if xStart == xEnd. + if (xStart == xEnd && !aClockwise) { + xStart += 0.125; // Offset xStart by 1/80 of a pixel. Use something + // that can be represented in binary + } - // IE won't render arches drawn counter clockwise if xStart == xEnd. - if (xStart == xEnd && !aClockwise) { - xStart += 0.125; // Offset xStart by 1/80 of a pixel. Use something - // that can be represented in binary - } + let p = this.getCoords_(aX, aY); + let pStart = this.getCoords_(xStart, yStart); + let pEnd = this.getCoords_(xEnd, yEnd); + + this.currentPath_.push({ + type: arcType, + x: p.x, + y: p.y, + radius: aRadius, + xStart: pStart.x, + yStart: pStart.y, + xEnd: pEnd.x, + yEnd: pEnd.y, + }); + } - let p = this.getCoords_(aX, aY); - let pStart = this.getCoords_(xStart, yStart); - let pEnd = this.getCoords_(xEnd, yEnd); - - this.currentPath_.push({type:arcType, - x:p.x, - y:p.y, - radius:aRadius, - xStart:pStart.x, - yStart:pStart.y, - xEnd:pEnd.x, - yEnd:pEnd.y}); - } + rect(aX, aY, aWidth, aHeight) { + if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { + return; //try to remove the rectangle behind radio buttons and checkboxes + } - rect(aX, aY, aWidth, aHeight) { - if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { - return;//try to remove the rectangle behind radio buttons and checkboxes - } + this.moveTo(aX, aY); + this.lineTo(aX + aWidth, aY); + this.lineTo(aX + aWidth, aY + aHeight); + this.lineTo(aX, aY + aHeight); + this.closePath(); + } - this.moveTo(aX, aY); - this.lineTo(aX + aWidth, aY); - this.lineTo(aX + aWidth, aY + aHeight); - this.lineTo(aX, aY + aHeight); - this.closePath(); - } + strokeRect(aX, aY, aWidth, aHeight) { + if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { + return; //try to remove the rectangle behind radio buttons and checkboxes + } - strokeRect(aX, aY, aWidth, aHeight) { - if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { - return;//try to remove the rectangle behind radio buttons and checkboxes - } + let oldPath = this.currentPath_; + this.beginPath(); - let oldPath = this.currentPath_; - this.beginPath(); + this.moveTo(aX, aY); + this.lineTo(aX + aWidth, aY); + this.lineTo(aX + aWidth, aY + aHeight); + this.lineTo(aX, aY + aHeight); + this.closePath(); + this.stroke(); - this.moveTo(aX, aY); - this.lineTo(aX + aWidth, aY); - this.lineTo(aX + aWidth, aY + aHeight); - this.lineTo(aX, aY + aHeight); - this.closePath(); - this.stroke(); + this.currentPath_ = oldPath; + } - this.currentPath_ = oldPath; - } + fillRect(aX, aY, aWidth, aHeight) { + if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { + return; //try to remove the rectangle behind radio buttons and checkboxes + } - fillRect(aX, aY, aWidth, aHeight) { - if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { - return;//try to remove the rectangle behind radio buttons and checkboxes - } + let oldPath = this.currentPath_; + this.beginPath(); - let oldPath = this.currentPath_; - this.beginPath(); + this.moveTo(aX, aY); + this.lineTo(aX + aWidth, aY); + this.lineTo(aX + aWidth, aY + aHeight); + this.lineTo(aX, aY + aHeight); + this.closePath(); + this.fill(); - this.moveTo(aX, aY); - this.lineTo(aX + aWidth, aY); - this.lineTo(aX + aWidth, aY + aHeight); - this.lineTo(aX, aY + aHeight); - this.closePath(); - this.fill(); + this.currentPath_ = oldPath; + } - this.currentPath_ = oldPath; - } + createLinearGradient(aX0, aY0, aX1, aY1) { + let gradient = new CanvasGradient_('gradient'); + gradient.x0_ = aX0; + gradient.y0_ = aY0; + gradient.x1_ = aX1; + gradient.y1_ = aY1; + return gradient; + } - createLinearGradient(aX0, aY0, aX1, aY1) { - let gradient = new CanvasGradient_('gradient'); - gradient.x0_ = aX0; - gradient.y0_ = aY0; - gradient.x1_ = aX1; - gradient.y1_ = aY1; - return gradient; - } + createRadialGradient(aX0, aY0, aR0, aX1, aY1, aR1) { + let gradient = new CanvasGradient_('gradientradial'); + gradient.x0_ = aX0; + gradient.y0_ = aY0; + gradient.r0_ = aR0; + gradient.x1_ = aX1; + gradient.y1_ = aY1; + gradient.r1_ = aR1; + return gradient; + } - createRadialGradient(aX0, aY0, aR0, aX1, aY1, aR1) { - let gradient = new CanvasGradient_('gradientradial'); - gradient.x0_ = aX0; - gradient.y0_ = aY0; - gradient.r0_ = aR0; - gradient.x1_ = aX1; - gradient.y1_ = aY1; - gradient.r1_ = aR1; - return gradient; - } + drawImage(image, var_args) { + //MQZ. no image drawing support for now + } - drawImage(image, var_args) { - //MQZ. no image drawing support for now - } + getImageData(x, y, w, h) { + //MQZ. returns empty data buffer for now + return { + width: w, + height: h, + data: new Uint8Array(w * h * 4), + }; + } - getImageData(x, y, w, h) { - //MQZ. returns empty data buffer for now - return { - width:w, - height:h, - data:new Uint8Array(w * h * 4) - }; - } + stroke(aFill) { + if (this.currentPath_.length < 2) { + return; + } - stroke(aFill) { - if (this.currentPath_.length < 2) { - return; - } + let a = processStyle(aFill ? this.fillStyle : this.strokeStyle); + let color = a.color; + // let opacity = a.alpha * this.globalAlpha; + let lineWidth = this.lineScale_ * this.lineWidth; + + let min = { x: null, y: null }; + let max = { x: null, y: null }; + + for (let i = 0; i < this.currentPath_.length; i++) { + let p = this.currentPath_[i]; + + switch (p.type) { + case 'moveTo': + break; + case 'lineTo': + if (!aFill) { + //lines + if (i > 0) { + this.#drawPDFLine( + this.currentPath_[i - 1], + p, + lineWidth, + color + ); + } + } + break; + case 'close': + if (!aFill) { + //lines + if (i > 0) { + this.#drawPDFLine( + this.currentPath_[i - 1], + this.currentPath_[0], + lineWidth, + color + ); + } + } + p = null; + break; + case 'bezierCurveTo': + break; + case 'at': + case 'wa': + break; + } - let a = processStyle(aFill ? this.fillStyle : this.strokeStyle); - let color = a.color; -// let opacity = a.alpha * this.globalAlpha; - let lineWidth = this.lineScale_ * this.lineWidth; - - let min = {x:null, y:null}; - let max = {x:null, y:null}; - - for (let i = 0; i < this.currentPath_.length; i++) { - let p = this.currentPath_[i]; - - switch (p.type) { - case 'moveTo': - break; - case 'lineTo': - if (!aFill) { //lines - if (i > 0) { - this.#drawPDFLine(this.currentPath_[i-1], p, lineWidth, color); - } - } - break; - case 'close': - if (!aFill) { //lines - if (i > 0) { - this.#drawPDFLine(this.currentPath_[i-1], this.currentPath_[0], lineWidth, color); - } - } - p = null; - break; - case 'bezierCurveTo': - break; - case 'at': - case 'wa': - break; + // Figure out dimensions so we can set fills' coordinates correctly + if (aFill && p) { + if (min.x == null || p.x < min.x) { + min.x = p.x; } - - // Figure out dimensions so we can set fills' coordinates correctly - if (aFill && p) { - if (min.x == null || p.x < min.x) { - min.x = p.x; - } - if (max.x == null || p.x > max.x) { - max.x = p.x; - } - if (min.y == null || p.y < min.y) { - min.y = p.y; - } - if (max.y == null || p.y > max.y) { - max.y = p.y; - } + if (max.x == null || p.x > max.x) { + max.x = p.x; } - } + if (min.y == null || p.y < min.y) { + min.y = p.y; + } + if (max.y == null || p.y > max.y) { + max.y = p.y; + } + } + } - if (aFill) { //fill - this.#drawPDFFill(min, min, max, color); - } - } + if (aFill) { + //fill + this.#drawPDFFill(min, min, max, color); + } + } - fill() { - this.stroke(true); - } + fill() { + this.stroke(true); + } - closePath() { - this.currentPath_.push({type:'close'}); - } + closePath() { + this.currentPath_.push({ type: 'close' }); + } - /** - * @private - */ - getCoords_ (aX, aY) { - let m = this.m_; - return { - x: (aX * m[0][0] + aY * m[1][0] + m[2][0]), - y: (aX * m[0][1] + aY * m[1][1] + m[2][1]) - }; - } + /** + * @private + */ + getCoords_(aX, aY) { + let m = this.m_; + return { + x: aX * m[0][0] + aY * m[1][0] + m[2][0], + y: aX * m[0][1] + aY * m[1][1] + m[2][1], + }; + } - save() { - let o = {}; - copyState(this, o); - this.aStack_.push(o); - this.mStack_.push(this.m_); - this.m_ = matrixMultiply(createMatrixIdentity(), this.m_); - } + save() { + let o = {}; + copyState(this, o); + this.aStack_.push(o); + this.mStack_.push(this.m_); + this.m_ = matrixMultiply(createMatrixIdentity(), this.m_); + } - restore() { - copyState(this.aStack_.pop(), this); - this.m_ = this.mStack_.pop(); - } + restore() { + copyState(this.aStack_.pop(), this); + this.m_ = this.mStack_.pop(); + } - translate(aX, aY) { - let m1 = [ - [1, 0, 0], - [0, 1, 0], - [aX, aY, 1] - ]; + translate(aX, aY) { + let m1 = [ + [1, 0, 0], + [0, 1, 0], + [aX, aY, 1], + ]; - setM(this, matrixMultiply(m1, this.m_), false); - } + setM(this, matrixMultiply(m1, this.m_), false); + } - rotate(aRot) { - let c = mc(aRot); - let s = ms(aRot); + rotate(aRot) { + let c = mc(aRot); + let s = ms(aRot); - let m1 = [ - [c, s, 0], - [-s, c, 0], - [0, 0, 1] - ]; + let m1 = [ + [c, s, 0], + [-s, c, 0], + [0, 0, 1], + ]; - setM(this, matrixMultiply(m1, this.m_), false); - } + setM(this, matrixMultiply(m1, this.m_), false); + } - scale(aX, aY) { - this.arcScaleX_ *= aX; - this.arcScaleY_ *= aY; - let m1 = [ - [aX, 0, 0], - [0, aY, 0], - [0, 0, 1] - ]; + scale(aX, aY) { + this.arcScaleX_ *= aX; + this.arcScaleY_ *= aY; + let m1 = [ + [aX, 0, 0], + [0, aY, 0], + [0, 0, 1], + ]; - setM(this, matrixMultiply(m1, this.m_), true); - } + setM(this, matrixMultiply(m1, this.m_), true); + } - transform(m11, m12, m21, m22, dx, dy) { - let m1 = [ - [m11, m12, 0], - [m21, m22, 0], - [dx, dy, 1] - ]; + transform(m11, m12, m21, m22, dx, dy) { + let m1 = [ + [m11, m12, 0], + [m21, m22, 0], + [dx, dy, 1], + ]; - setM(this, matrixMultiply(m1, this.m_), true); - } + setM(this, matrixMultiply(m1, this.m_), true); + } - setTransform(m11, m12, m21, m22, dx, dy) { - let m = [ - [m11, m12, 0], - [m21, m22, 0], - [dx, dy, 1] - ]; + setTransform(m11, m12, m21, m22, dx, dy) { + let m = [ + [m11, m12, 0], + [m21, m22, 0], + [dx, dy, 1], + ]; - setM(this, m, true); - } + setM(this, m, true); + } - /******** STUBS ********/ - clip() { - // TODO: Implement - } + /******** STUBS ********/ + clip() { + // TODO: Implement + } - arcTo() { - // TODO: Implement - } + arcTo() { + // TODO: Implement + } - createPattern() { - return new CanvasPattern_(); - } + createPattern() { + return new CanvasPattern_(); + } } const kFBANotOverridable = 0x00000400; // indicates the field is read only by the user