From 689caf15410e7fdb8062768acc2eb58495a87dda Mon Sep 17 00:00:00 2001 From: Slobodan Todorov Date: Tue, 24 Jan 2017 16:41:03 +0100 Subject: [PATCH 01/66] Extended the fix for issue #42 and added additional hybrid dual bolditalic font detection --- lib/pdffont.js | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/pdffont.js b/lib/pdffont.js index 99591048..f54e9b27 100644 --- a/lib/pdffont.js +++ b/lib/pdffont.js @@ -215,6 +215,15 @@ let PDFFont = (function PFPFontClosure() { this.bold = this.typeName.indexOf("bold") >= 0 || this.typeName.indexOf("black") >= 0; } this.italic = fontObj.italic; // fix https://github.com/modesty/pdf2json/issues/42 + // Extended the fix for https://github.com/modesty/pdf2json/issues/42 + if (!this.italic) { + this.italic = this.typeName.indexOf("italic") >= 0 || this.typeName.indexOf("oblique") >= 0; + } + // Added detection of hybrid dual bolditalic fonts + if (((!this.bold) || (!this.italic)) && (this.typeName.indexOf("boldobl") >= 0)) { + this.bold = true; + this.italic = true; + } let typeName = this.subType; if (fontObj.isSerifFont) { @@ -382,4 +391,3 @@ let PDFFont = (function PFPFontClosure() { })(); module.exports = PDFFont; - From 0e0f416e5bf9c5a75647b2d27d5609ff647e733d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B7=B4=E9=87=8C=E5=88=87=E7=BD=97?= Date: Thu, 20 Apr 2017 14:44:55 +0800 Subject: [PATCH 02/66] chore(ignore) add .idea to .gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 6ef748fb..24aeaffc 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,4 @@ *.out node_modules/ target/ - +.idea From 0e37002fbdccc30400918016c63925f0b3f6c573 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B7=B4=E9=87=8C=E5=88=87=E7=BD=97?= Date: Thu, 20 Apr 2017 14:45:04 +0800 Subject: [PATCH 03/66] fix(canvas) remove leading word spacing after render --- base/display/canvas.js | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/base/display/canvas.js b/base/display/canvas.js index 30fb0010..d9300e54 100755 --- a/base/display/canvas.js +++ b/base/display/canvas.js @@ -1095,19 +1095,22 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { this.processingType3 = null; } else { ctx.save(); - var tx = 0; //MQZ Dec.04.2013 handles leading word spacing + var tx = 0; if (wordSpacing !== 0) { var firstGlyph = _.find(glyphs, function(g) { return _.isObject(g);}); if (firstGlyph && (firstGlyph.fontChar === ' ' || firstGlyph.unicode === ' ')) { if (_.find(glyphs, function(g) { return _.isObject(g) && g.unicode !== ' ';})) { - current.x += wordSpacing * fontSize * textHScale; + tx = wordSpacing * fontSize * textHScale; } } } + current.x += tx this.applyTextTransforms(); + current.x -= tx + // MQZ-GYJ Apr.20.2017 handles leading word spacing over var lineWidth = current.lineWidth; var a1 = current.textMatrix[0], b1 = current.textMatrix[1]; From 9482c6a1ae85b164382d911a16d75657d0e5993e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B7=B4=E9=87=8C=E5=88=87=E7=BD=97?= Date: Thu, 20 Apr 2017 14:47:33 +0800 Subject: [PATCH 04/66] fix(canvas) split word when spacing is a positive number but very big --- base/display/canvas.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/base/display/canvas.js b/base/display/canvas.js index d9300e54..25dcee41 100755 --- a/base/display/canvas.js +++ b/base/display/canvas.js @@ -1289,7 +1289,8 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { } } else { - if (-e >= spaceWidth) { + //MQZ-GYJ. Apr.20.2017 split word when spacing is a positive number but very big + if (Math.abs(e) >= spaceWidth) { if (vertical) { current.y += spacingLength; } else { From 14c96267fc8335379bb5942cd32dcc90d710f0d8 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Date: Wed, 6 Dec 2017 17:08:03 +0000 Subject: [PATCH 05/66] Fixed bug on some pdfs with fields --- lib/pdfanno.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/pdfanno.js b/lib/pdfanno.js index 594561bc..6f35dfa7 100644 --- a/lib/pdfanno.js +++ b/lib/pdfanno.js @@ -112,6 +112,9 @@ let PDFAnno = (function PDFAnnoClosure() { if (item.hasOwnProperty('TName')) return; + if(!jsFuncName.split) + return; + let vParts = jsFuncName.split('('); if (vParts.length !== 2) return; From 30b4e6ec404598578ae734088d2526ab2e9c9a83 Mon Sep 17 00:00:00 2001 From: David Evans <5390145+bbcrddave@users.noreply.github.com> Date: Wed, 18 Apr 2018 14:33:16 +0100 Subject: [PATCH 06/66] Document Pages->Texts->R->TS in readme.md This field is documented in the code but was missing from the README. --- readme.md | 1 + 1 file changed, 1 insertion(+) diff --git a/readme.md b/readme.md index d68c99c6..a604a932 100644 --- a/readme.md +++ b/readme.md @@ -153,6 +153,7 @@ Each page object within 'Pages' array describes page elements and attributes wit * 'R': an array of text run, each text run object has two main fields: * 'T': actual text * 'S': style index from style dictionary. More info about 'Style Dictionary' can be found at 'Dictionary Reference' section + * 'TS': [fontFaceId, fontSize, 1/0 for bold, 1/0 for italic] v0.4.5 added support when fields attributes information is defined in external xml file. pdf2json will always try load field attributes xml file based on file name convention (pdfFileName.pdf's field XML file must be named pdfFileName_fieldInfo.xml in the same directory). If found, fields info will be injected. From 030d85d59a256590290b89306730c53b3f46e120 Mon Sep 17 00:00:00 2001 From: Ran Halprin Date: Mon, 21 May 2018 11:31:16 -0700 Subject: [PATCH 07/66] Do not fail if url is undefined --- base/shared/annotation.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/shared/annotation.js b/base/shared/annotation.js index f7a23a6d..01830797 100755 --- a/base/shared/annotation.js +++ b/base/shared/annotation.js @@ -667,7 +667,7 @@ var LinkAnnotation = (function LinkAnnotationClosure() { // Lets URLs beginning with 'www.' default to using the 'http://' protocol. function addDefaultProtocolToUrl(url) { - if (url.indexOf('www.') === 0) { + if (url && url.indexOf('www.') === 0) { return ('http://' + url); } return url; From 008e0b66ff514653655b58a0ea6a39fd790471bc Mon Sep 17 00:00:00 2001 From: ejmin Date: Sun, 22 Jul 2018 21:56:20 -0500 Subject: [PATCH 08/66] users can get the selected value from dropdowns --- lib/pdffield.js | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/pdffield.js b/lib/pdffield.js index a7a56477..528f6af2 100644 --- a/lib/pdffield.js +++ b/lib/pdffield.js @@ -223,7 +223,11 @@ let PDFField = (function PDFFieldClosure() { anData.PL.D.push(ele[0]); anData.PL.V.push(ele[1]); }); - + + // add field value to the object + if (field.fieldValue) { + anData.V = field.fieldValue; + } this.Fields.push(anData); }; From e2084360979df36bdcb52847f98031b53f308ba4 Mon Sep 17 00:00:00 2001 From: Kevin Jiang Date: Sat, 13 Oct 2018 12:20:12 -0400 Subject: [PATCH 09/66] modify travisCI --- .travis.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 96c921b2..9fbbb8e0 100755 --- a/.travis.yml +++ b/.travis.yml @@ -1,3 +1,5 @@ language: node_js node_js: - - "4.5.0" + - "8.9.0" +before_install: + - "npm install" \ No newline at end of file From b0d5b8189b37cb894cd9b4ffd1ce092451764643 Mon Sep 17 00:00:00 2001 From: Kevin Jiang Date: Sat, 13 Oct 2018 12:22:40 -0400 Subject: [PATCH 10/66] Move back to 4.5.0 --- .travis.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 9fbbb8e0..96c921b2 100755 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,3 @@ language: node_js node_js: - - "8.9.0" -before_install: - - "npm install" \ No newline at end of file + - "4.5.0" From b113498260eae695fd473cb014abf7e2af53f994 Mon Sep 17 00:00:00 2001 From: Kevin Jiang Date: Sat, 13 Oct 2018 13:21:49 -0400 Subject: [PATCH 11/66] I hope this wasn't the problem --- test/p2j.forms.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/p2j.forms.sh b/test/p2j.forms.sh index d647558f..f9491c5f 100755 --- a/test/p2j.forms.sh +++ b/test/p2j.forms.sh @@ -1,6 +1,6 @@ #!/bin/bash STARTTIME=$(date +%s) -AGENCIES=("dc" "de" "ef" "fd" "nd" "or" "pa" "sc" "va") +AGENCIES=$("dc" "de" "ef" "fd" "nd" "or" "pa" "sc" "va") for i in "${AGENCIES[@]}" do sh ./p2j.one.sh $i From 29a799de925a7de30600605318e5321450a81267 Mon Sep 17 00:00:00 2001 From: Kevin Jiang Date: Sat, 13 Oct 2018 13:40:26 -0400 Subject: [PATCH 12/66] Updated Version. 8.9.0 works well, but only in gitBash in admin mode. --- .travis.yml | 2 +- package-lock.json | 45 +++++++++++++++++++++++++++++++++++++++++++++ package.json | 3 +-- test/p2j.forms.sh | 4 ++-- test/p2j.one.sh | 2 +- 5 files changed, 50 insertions(+), 6 deletions(-) create mode 100644 package-lock.json diff --git a/.travis.yml b/.travis.yml index 96c921b2..4808846d 100755 --- a/.travis.yml +++ b/.travis.yml @@ -1,3 +1,3 @@ language: node_js node_js: - - "4.5.0" + - "8.9.0" diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 00000000..cb6e8830 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,45 @@ +{ + "name": "pdf2json", + "version": "1.1.8", + "lockfileVersion": 1, + "requires": true, + "dependencies": { + "async": { + "version": "2.6.1", + "resolved": "https://registry.npmjs.org/async/-/async-2.6.1.tgz", + "integrity": "sha512-fNEiL2+AZt6AlAw/29Cr0UDe4sRAHCpEHh54WMz+Bb7QfNcFw4h3loofyJpLeQs4Yx7yuqu/2dLgM5hKOs6HlQ==", + "requires": { + "lodash": "^4.17.10" + } + }, + "lodash": { + "version": "4.17.11", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.11.tgz", + "integrity": "sha512-cQKh8igo5QUhZ7lg38DYWAxMvjSAKG0A8wGSVimP07SIUEK2UO+arSRKbRZWtelMtN5V0Hkwh5ryOto/SshYIg==" + }, + "minimist": { + "version": "0.0.10", + "resolved": "http://registry.npmjs.org/minimist/-/minimist-0.0.10.tgz", + "integrity": "sha1-3j+YVD2/lggr5IrRoMfNqDYwHc8=" + }, + "optimist": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/optimist/-/optimist-0.6.1.tgz", + "integrity": "sha1-2j6nRob6IaGaERwybpDrFaAZZoY=", + "requires": { + "minimist": "~0.0.1", + "wordwrap": "~0.0.2" + } + }, + "wordwrap": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-0.0.3.tgz", + "integrity": "sha1-o9XabNXAvAAI03I0u68b7WMFkQc=" + }, + "xmldom": { + "version": "0.1.27", + "resolved": "https://registry.npmjs.org/xmldom/-/xmldom-0.1.27.tgz", + "integrity": "sha1-1QH5ezvbQDr4757MIFcxh6rawOk=" + } + } +} diff --git a/package.json b/package.json index 291424c6..140853f8 100644 --- a/package.json +++ b/package.json @@ -42,8 +42,7 @@ "optimist": "^0.6.1", "async": "^2.0.1" }, - "devDependencies": { - }, + "devDependencies": {}, "bundledDependencies": [ "xmldom", "lodash", diff --git a/test/p2j.forms.sh b/test/p2j.forms.sh index f9491c5f..f15e094a 100755 --- a/test/p2j.forms.sh +++ b/test/p2j.forms.sh @@ -1,6 +1,6 @@ -#!/bin/bash +#!/usr/bin/env bash STARTTIME=$(date +%s) -AGENCIES=$("dc" "de" "ef" "fd" "nd" "or" "pa" "sc" "va") +AGENCIES=("dc" "de" "ef" "fd" "nd" "or" "pa" "sc" "va") for i in "${AGENCIES[@]}" do sh ./p2j.one.sh $i diff --git a/test/p2j.one.sh b/test/p2j.one.sh index ef27706d..61a592ac 100755 --- a/test/p2j.one.sh +++ b/test/p2j.one.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash IN_DIR_BASE=./pdf/ OUT_DIR_BASE=./target/ DATA_DIR_BASE=./data/ From e8728ef7d2ec763cd5ff6b9a285b240e1fd45b31 Mon Sep 17 00:00:00 2001 From: Kevin Jiang Date: Sat, 13 Oct 2018 14:39:53 -0400 Subject: [PATCH 13/66] try removing the for loop --- test/p2j.forms.sh | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/test/p2j.forms.sh b/test/p2j.forms.sh index f15e094a..24cb92c1 100755 --- a/test/p2j.forms.sh +++ b/test/p2j.forms.sh @@ -1,9 +1,21 @@ #!/usr/bin/env bash STARTTIME=$(date +%s) -AGENCIES=("dc" "de" "ef" "fd" "nd" "or" "pa" "sc" "va") -for i in "${AGENCIES[@]}" -do - sh ./p2j.one.sh $i -done +# AGENCIES=("dc" "de" "ef" "fd" "nd" "or" "pa" "sc" "va") +# for i in "${AGENCIES[@]}" +# do +# sh ./p2j.one.sh $i +# done + +# try doing it manually. Maybe travis ci doesn't support bash arrays? +sh ./p2j.one.sh dc +sh ./p2j.one.sh de +sh ./p2j.one.sh ef +sh ./p2j.one.sh fd +sh ./p2j.one.sh nd +sh ./p2j.one.sh or +sh ./p2j.one.sh pa +sh ./p2j.one.sh sc +sh ./p2j.one.sh va + ENDTIME=$(date +%s) echo "It takes $(($ENDTIME - $STARTTIME)) seconds to process all PDFs ..." From 85d1abfa780d13d6406c49b4d7fe3159b0861e5b Mon Sep 17 00:00:00 2001 From: Kevin Jiang Date: Mon, 15 Oct 2018 09:19:58 -0400 Subject: [PATCH 14/66] Update p2j.forms.sh --- test/p2j.forms.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/p2j.forms.sh b/test/p2j.forms.sh index 24cb92c1..2c6fb90f 100755 --- a/test/p2j.forms.sh +++ b/test/p2j.forms.sh @@ -6,7 +6,8 @@ STARTTIME=$(date +%s) # sh ./p2j.one.sh $i # done -# try doing it manually. Maybe travis ci doesn't support bash arrays? +# Travis CI doesn't seem to support arrays in bash for testing. +# Reverting to a bunch of commands so that build button can be shown. sh ./p2j.one.sh dc sh ./p2j.one.sh de sh ./p2j.one.sh ef From 91efb1aad3aee7b178fc8c7e2f625c9ef329c1dd Mon Sep 17 00:00:00 2001 From: Adam Stern Date: Tue, 16 Oct 2018 13:49:00 -0400 Subject: [PATCH 15/66] Updated lodash to remove vulnerabilities --- package-lock.json | 45 +++++++++++++++++++++++++++++++++++++++++++++ package.json | 7 +++---- 2 files changed, 48 insertions(+), 4 deletions(-) create mode 100644 package-lock.json diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 00000000..f63099cb --- /dev/null +++ b/package-lock.json @@ -0,0 +1,45 @@ +{ + "name": "pdf2json", + "version": "1.1.9", + "lockfileVersion": 1, + "requires": true, + "dependencies": { + "async": { + "version": "2.6.1", + "resolved": "https://registry.npmjs.org/async/-/async-2.6.1.tgz", + "integrity": "sha512-fNEiL2+AZt6AlAw/29Cr0UDe4sRAHCpEHh54WMz+Bb7QfNcFw4h3loofyJpLeQs4Yx7yuqu/2dLgM5hKOs6HlQ==", + "requires": { + "lodash": "^4.17.10" + } + }, + "lodash": { + "version": "4.17.11", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.11.tgz", + "integrity": "sha512-cQKh8igo5QUhZ7lg38DYWAxMvjSAKG0A8wGSVimP07SIUEK2UO+arSRKbRZWtelMtN5V0Hkwh5ryOto/SshYIg==" + }, + "minimist": { + "version": "0.0.10", + "resolved": "http://registry.npmjs.org/minimist/-/minimist-0.0.10.tgz", + "integrity": "sha1-3j+YVD2/lggr5IrRoMfNqDYwHc8=" + }, + "optimist": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/optimist/-/optimist-0.6.1.tgz", + "integrity": "sha1-2j6nRob6IaGaERwybpDrFaAZZoY=", + "requires": { + "minimist": "~0.0.1", + "wordwrap": "~0.0.2" + } + }, + "wordwrap": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-0.0.3.tgz", + "integrity": "sha1-o9XabNXAvAAI03I0u68b7WMFkQc=" + }, + "xmldom": { + "version": "0.1.27", + "resolved": "https://registry.npmjs.org/xmldom/-/xmldom-0.1.27.tgz", + "integrity": "sha1-1QH5ezvbQDr4757MIFcxh6rawOk=" + } + } +} diff --git a/package.json b/package.json index 291424c6..189c9692 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "pdf2json", - "version": "1.1.8", + "version": "1.1.9", "description": "A PDF file parser that converts PDF binaries to text based JSON, powered by porting a fork of PDF.JS to Node.js", "keywords": [ "pdf", @@ -38,12 +38,11 @@ }, "dependencies": { "xmldom": "^0.1.22", - "lodash": "^4.15.0", + "lodash": "^4.17.11", "optimist": "^0.6.1", "async": "^2.0.1" }, - "devDependencies": { - }, + "devDependencies": {}, "bundledDependencies": [ "xmldom", "lodash", From c61de0151be31712c78191a566a14d359b39d9fd Mon Sep 17 00:00:00 2001 From: MasonVX Date: Mon, 28 Jan 2019 23:46:43 +0100 Subject: [PATCH 16/66] added check for newer file version --- pdfparser.js | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pdfparser.js b/pdfparser.js index e0e84b08..d5e62d57 100644 --- a/pdfparser.js +++ b/pdfparser.js @@ -43,11 +43,11 @@ let PDFParser = (function () { this.PDFJS.on("pdfjs_parseDataReady", _onPDFJSParseDataReady.bind(this)); this.PDFJS.on("pdfjs_parseDataError", _onPDFJSParserDataError.bind(this)); - this.PDFJS.parsePDFData(buffer || _binBuffer[this.pdfFilePath]); + this.PDFJS.parsePDFData(buffer || _binBuffer[this.pdfFilePath + this.pdfFileMTime]); }; let _processBinaryCache = function() { - if (_.has(_binBuffer, this.pdfFilePath)) { + if (_.has(_binBuffer, this.pdfFilePath + this.pdfFileMTime)) { _startParsingPDF.call(this); return true; } @@ -72,7 +72,7 @@ let PDFParser = (function () { this.emit("pdfParser_dataError", this); } else { - _binBuffer[this.pdfFilePath] = data; + _binBuffer[this.pdfFilePath + + this.pdfFileMTime] = data; _startParsingPDF.call(this); } }; @@ -99,7 +99,8 @@ let PDFParser = (function () { // service context object, only used in Web Service project; null in command line this.context = context; - this.pdfFilePath = null; //current PDF file to load and parse, null means loading/parsing not started + this.pdfFilePath = null; //current PDF file to load and parse, null means loading/parsing not started + this.pdfFileMTime = null; // last time the current pdf was modified, used to recognize changes and ignore cache this.data = null; //if file read success, data is PDF content; if failed, data is "err" object this.PDFJS = new PDFJS(needRawText); this.processFieldInfoXML = false;//disable additional _fieldInfo.xml parsing and merging @@ -135,6 +136,7 @@ let PDFParser = (function () { nodeUtil.p2jinfo("about to load PDF file " + pdfFilePath); this.pdfFilePath = pdfFilePath; + this.pdfFileMTime = fs.statSync(pdfFilePath).mtimeMs if (this.processFieldInfoXML) { this.PDFJS.tryLoadFieldInfoXML(pdfFilePath); } @@ -169,6 +171,7 @@ let PDFParser = (function () { } this.pdfFilePath = null; + this.pdfFileMTime = null; this.data = null; this.chunks = null; From 9145e4d5232b4cfd9c8a998d94010d41c89a1f42 Mon Sep 17 00:00:00 2001 From: Leonardo Gatica Date: Wed, 30 Jan 2019 23:07:43 -0300 Subject: [PATCH 17/66] fix(deps): update async to v2.6.1 to remove vulnerabilities --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 291424c6..c72227b1 100644 --- a/package.json +++ b/package.json @@ -40,7 +40,7 @@ "xmldom": "^0.1.22", "lodash": "^4.15.0", "optimist": "^0.6.1", - "async": "^2.0.1" + "async": "^2.6.1" }, "devDependencies": { }, From c833ecb20ab58e12579be7975ca394fa83884469 Mon Sep 17 00:00:00 2001 From: drabinowitz Date: Fri, 30 Aug 2019 15:14:15 -0400 Subject: [PATCH 18/66] Update lodash to 4.17.15 --- package.json | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/package.json b/package.json index 291424c6..ca9023cc 100644 --- a/package.json +++ b/package.json @@ -38,12 +38,11 @@ }, "dependencies": { "xmldom": "^0.1.22", - "lodash": "^4.15.0", + "lodash": "^4.17.15", "optimist": "^0.6.1", "async": "^2.0.1" }, - "devDependencies": { - }, + "devDependencies": {}, "bundledDependencies": [ "xmldom", "lodash", From 3bd6a4ba07aa69677bfa49c21d7822fcb3471217 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 2 Nov 2019 17:16:31 +0000 Subject: [PATCH 19/66] build(deps): bump lodash from 4.17.11 to 4.17.13 Bumps [lodash](https://github.com/lodash/lodash) from 4.17.11 to 4.17.13. - [Release notes](https://github.com/lodash/lodash/releases) - [Commits](https://github.com/lodash/lodash/compare/4.17.11...4.17.13) Signed-off-by: dependabot[bot] --- package-lock.json | 6 +++--- package.json | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/package-lock.json b/package-lock.json index f63099cb..0f4e0959 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13,9 +13,9 @@ } }, "lodash": { - "version": "4.17.11", - "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.11.tgz", - "integrity": "sha512-cQKh8igo5QUhZ7lg38DYWAxMvjSAKG0A8wGSVimP07SIUEK2UO+arSRKbRZWtelMtN5V0Hkwh5ryOto/SshYIg==" + "version": "4.17.13", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.13.tgz", + "integrity": "sha512-vm3/XWXfWtRua0FkUyEHBZy8kCPjErNBT9fJx8Zvs+U6zjqPbTUOpkaoum3O5uiA8sm+yNMHXfYkTUHFoMxFNA==" }, "minimist": { "version": "0.0.10", diff --git a/package.json b/package.json index 96fb4c40..fce076b8 100644 --- a/package.json +++ b/package.json @@ -38,7 +38,7 @@ }, "dependencies": { "xmldom": "^0.1.22", - "lodash": "^4.17.15", + "lodash": "^4.17.13", "optimist": "^0.6.1", "async": "^2.6.1" }, From 2bc80814af1d90e0f44ee8b8934970028c6b3522 Mon Sep 17 00:00:00 2001 From: Srijan R Shetty Date: Tue, 18 Feb 2020 11:50:33 +0530 Subject: [PATCH 20/66] public API to set password for an encrypted PDF --- lib/pdf.js | 4 ++-- pdfparser.js | 8 +++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/lib/pdf.js b/lib/pdf.js index d31a7626..d5856a06 100644 --- a/lib/pdf.js +++ b/lib/pdf.js @@ -252,10 +252,10 @@ let PDFJSClass = (function () { }; - cls.prototype.parsePDFData = function(arrayBuffer) { + cls.prototype.parsePDFData = function(arrayBuffer, password) { this.pdfDocument = null; - let parameters = {password: '', data: arrayBuffer}; + let parameters = {password: password, data: arrayBuffer}; PDFJS.getDocument(parameters).then( pdfDocument => this.load(pdfDocument, 1), error => this.raiseErrorEvent("An error occurred while parsing the PDF: " + error) diff --git a/pdfparser.js b/pdfparser.js index e0e84b08..1be396cc 100644 --- a/pdfparser.js +++ b/pdfparser.js @@ -15,6 +15,8 @@ let PDFParser = (function () { let _binBuffer = {}; let _maxBinBufferCount = 10; + let _password = ''; + //private methods, needs to invoked by [funcName].call(this, ...) let _onPDFJSParseDataReady = function(data) { if (!data) { //v1.1.2: data===null means end of parsed data @@ -43,7 +45,7 @@ let PDFParser = (function () { this.PDFJS.on("pdfjs_parseDataReady", _onPDFJSParseDataReady.bind(this)); this.PDFJS.on("pdfjs_parseDataError", _onPDFJSParserDataError.bind(this)); - this.PDFJS.parsePDFData(buffer || _binBuffer[this.pdfFilePath]); + this.PDFJS.parsePDFData(buffer || _binBuffer[this.pdfFilePath], _password); }; let _processBinaryCache = function() { @@ -130,6 +132,10 @@ let PDFParser = (function () { nodeUtil.verbosity(verbosity || 0); }; + PdfParser.prototype.setPassword = function(password) { + _password = password; + }; + PdfParser.prototype.loadPDF = function(pdfFilePath, verbosity) { this.setVerbosity(verbosity); nodeUtil.p2jinfo("about to load PDF file " + pdfFilePath); From 465eb53cdbcbf27c727df2ae09d68d422a41ac51 Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Sun, 15 Mar 2020 10:54:11 -0700 Subject: [PATCH 21/66] MAINT: update depdencies and rebuild with merged PRs --- package-lock.json | 15 ++++++--------- package.json | 4 ++-- readme.md | 7 +++++++ 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/package-lock.json b/package-lock.json index 0f4e0959..d96195ad 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5,12 +5,9 @@ "requires": true, "dependencies": { "async": { - "version": "2.6.1", - "resolved": "https://registry.npmjs.org/async/-/async-2.6.1.tgz", - "integrity": "sha512-fNEiL2+AZt6AlAw/29Cr0UDe4sRAHCpEHh54WMz+Bb7QfNcFw4h3loofyJpLeQs4Yx7yuqu/2dLgM5hKOs6HlQ==", - "requires": { - "lodash": "^4.17.10" - } + "version": "3.2.0", + "resolved": "https://artifact.devsnc.com/content/groups/npm-all/async/-/async-3.2.0.tgz", + "integrity": "sha512-TR2mEZFVOj2pLStYxLht7TyfuRzaydfpxr3k9RpHIzMgw7A64dzsdqCxH1WJyQdoe8T10nDXd9wnEigmiuHIZw==" }, "lodash": { "version": "4.17.13", @@ -37,9 +34,9 @@ "integrity": "sha1-o9XabNXAvAAI03I0u68b7WMFkQc=" }, "xmldom": { - "version": "0.1.27", - "resolved": "https://registry.npmjs.org/xmldom/-/xmldom-0.1.27.tgz", - "integrity": "sha1-1QH5ezvbQDr4757MIFcxh6rawOk=" + "version": "0.3.0", + "resolved": "https://artifact.devsnc.com/content/groups/npm-all/xmldom/-/xmldom-0.3.0.tgz", + "integrity": "sha512-z9s6k3wxE+aZHgXYxSTpGDo7BYOUfJsIRyoZiX6HTjwpwfS2wpQBQKa2fD+ShLyPkqDYo5ud7KitmLZ2Cd6r0g==" } } } diff --git a/package.json b/package.json index fce076b8..8997a6ae 100644 --- a/package.json +++ b/package.json @@ -37,10 +37,10 @@ "pdf2json": "./bin/pdf2json" }, "dependencies": { - "xmldom": "^0.1.22", + "xmldom": "^0.3.0", "lodash": "^4.17.13", "optimist": "^0.6.1", - "async": "^2.6.1" + "async": "^3.2.0" }, "devDependencies": {}, "bundledDependencies": [ diff --git a/readme.md b/readme.md index a604a932..54d489b3 100644 --- a/readme.md +++ b/readme.md @@ -17,6 +17,13 @@ To update with latest version: To Run in RESTful Web Service or as Commandline Utility * More details can be found at the bottom of this document. +## Test + +>npm i +>npm run test + +Check _./test/target/_ for output JSON and test files. + ## Code Example * Parse a PDF file then write to a JSON file: From 19ceb411951f896d1d4f6ff276588f41ec1af5cc Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Sun, 15 Mar 2020 11:26:17 -0700 Subject: [PATCH 22/66] MAINT: update node version --- package-lock.json | 14 +++++++------- package.json | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/package-lock.json b/package-lock.json index d96195ad..fa8e9609 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,6 @@ { "name": "pdf2json", - "version": "1.1.9", + "version": "1.2.0", "lockfileVersion": 1, "requires": true, "dependencies": { @@ -10,18 +10,18 @@ "integrity": "sha512-TR2mEZFVOj2pLStYxLht7TyfuRzaydfpxr3k9RpHIzMgw7A64dzsdqCxH1WJyQdoe8T10nDXd9wnEigmiuHIZw==" }, "lodash": { - "version": "4.17.13", - "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.13.tgz", - "integrity": "sha512-vm3/XWXfWtRua0FkUyEHBZy8kCPjErNBT9fJx8Zvs+U6zjqPbTUOpkaoum3O5uiA8sm+yNMHXfYkTUHFoMxFNA==" + "version": "4.17.15", + "resolved": "https://artifact.devsnc.com/content/groups/npm-all/lodash/-/lodash-4.17.15.tgz", + "integrity": "sha512-8xOcRHvCjnocdS5cpwXQXVzmmh5e5+saE2QGoeQmbKmRS6J3VQppPOIt0MnmE+4xlZoumy0GPG0D0MVIQbNA1A==" }, "minimist": { "version": "0.0.10", - "resolved": "http://registry.npmjs.org/minimist/-/minimist-0.0.10.tgz", + "resolved": "https://artifact.devsnc.com/content/groups/npm-all/minimist/-/minimist-0.0.10.tgz", "integrity": "sha1-3j+YVD2/lggr5IrRoMfNqDYwHc8=" }, "optimist": { "version": "0.6.1", - "resolved": "https://registry.npmjs.org/optimist/-/optimist-0.6.1.tgz", + "resolved": "https://artifact.devsnc.com/content/groups/npm-all/optimist/-/optimist-0.6.1.tgz", "integrity": "sha1-2j6nRob6IaGaERwybpDrFaAZZoY=", "requires": { "minimist": "~0.0.1", @@ -30,7 +30,7 @@ }, "wordwrap": { "version": "0.0.3", - "resolved": "https://registry.npmjs.org/wordwrap/-/wordwrap-0.0.3.tgz", + "resolved": "https://artifact.devsnc.com/content/groups/npm-all/wordwrap/-/wordwrap-0.0.3.tgz", "integrity": "sha1-o9XabNXAvAAI03I0u68b7WMFkQc=" }, "xmldom": { diff --git a/package.json b/package.json index 8997a6ae..a51a3444 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "pdf2json", - "version": "1.1.9", + "version": "1.2.0", "description": "A PDF file parser that converts PDF binaries to text based JSON, powered by porting a fork of PDF.JS to Node.js", "keywords": [ "pdf", @@ -31,7 +31,7 @@ "test-misc": "node pdf2json.js -f ./test/pdf/misc/ -o ./test/target/misc/ -c -m" }, "engines": { - "node": ">=4.5" + "node": ">=10.15.1" }, "bin": { "pdf2json": "./bin/pdf2json" From 417ced2e65cdc1097b813d15d48b19637dd43245 Mon Sep 17 00:00:00 2001 From: Craig Date: Fri, 17 Jul 2020 14:06:35 -0400 Subject: [PATCH 23/66] Handle dropdown options with only a name --- lib/pdffield.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/pdffield.js b/lib/pdffield.js index 528f6af2..ca99981a 100644 --- a/lib/pdffield.js +++ b/lib/pdffield.js @@ -220,6 +220,9 @@ let PDFField = (function PDFFieldClosure() { anData.w -= 0.5; //adjust combobox width anData.PL = {V: [], D: []}; _.each(field.value, function(ele, idx) { + if (!Array.isArray(ele)) { + ele = [ele, ele]; + } anData.PL.D.push(ele[0]); anData.PL.V.push(ele[1]); }); From 2fc926982167687f4a59f35dff43d841cd0e48f6 Mon Sep 17 00:00:00 2001 From: Seth Westphal Date: Mon, 14 Dec 2020 19:09:33 -0600 Subject: [PATCH 24/66] Update dependencies. --- package-lock.json | 12 ++++++------ package.json | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/package-lock.json b/package-lock.json index fa8e9609..c06127c1 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,9 +10,9 @@ "integrity": "sha512-TR2mEZFVOj2pLStYxLht7TyfuRzaydfpxr3k9RpHIzMgw7A64dzsdqCxH1WJyQdoe8T10nDXd9wnEigmiuHIZw==" }, "lodash": { - "version": "4.17.15", - "resolved": "https://artifact.devsnc.com/content/groups/npm-all/lodash/-/lodash-4.17.15.tgz", - "integrity": "sha512-8xOcRHvCjnocdS5cpwXQXVzmmh5e5+saE2QGoeQmbKmRS6J3VQppPOIt0MnmE+4xlZoumy0GPG0D0MVIQbNA1A==" + "version": "4.17.20", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.20.tgz", + "integrity": "sha512-PlhdFcillOINfeV7Ni6oF1TAEayyZBoZ8bcshTHqOYJYlrqzRK5hagpagky5o4HfCzzd1TRkXPMFq6cKk9rGmA==" }, "minimist": { "version": "0.0.10", @@ -34,9 +34,9 @@ "integrity": "sha1-o9XabNXAvAAI03I0u68b7WMFkQc=" }, "xmldom": { - "version": "0.3.0", - "resolved": "https://artifact.devsnc.com/content/groups/npm-all/xmldom/-/xmldom-0.3.0.tgz", - "integrity": "sha512-z9s6k3wxE+aZHgXYxSTpGDo7BYOUfJsIRyoZiX6HTjwpwfS2wpQBQKa2fD+ShLyPkqDYo5ud7KitmLZ2Cd6r0g==" + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/xmldom/-/xmldom-0.4.0.tgz", + "integrity": "sha512-2E93k08T30Ugs+34HBSTQLVtpi6mCddaY8uO+pMNk1pqSjV5vElzn4mmh6KLxN3hki8rNcHSYzILoh3TEWORvA==" } } } diff --git a/package.json b/package.json index a51a3444..175042bb 100644 --- a/package.json +++ b/package.json @@ -37,10 +37,10 @@ "pdf2json": "./bin/pdf2json" }, "dependencies": { - "xmldom": "^0.3.0", - "lodash": "^4.17.13", + "async": "^3.2.0", + "lodash": "^4.17.20", "optimist": "^0.6.1", - "async": "^3.2.0" + "xmldom": "^0.4.0" }, "devDependencies": {}, "bundledDependencies": [ From c3af5bbdd682c4bc13e1af6116378a6352b10cb2 Mon Sep 17 00:00:00 2001 From: qlagraula <46192621+qlagraula@users.noreply.github.com> Date: Fri, 18 Dec 2020 14:30:28 +0100 Subject: [PATCH 25/66] Fix annotation bug --- lib/pdfanno.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/pdfanno.js b/lib/pdfanno.js index 6f35dfa7..a5735edf 100644 --- a/lib/pdfanno.js +++ b/lib/pdfanno.js @@ -44,7 +44,9 @@ let PDFAnno = (function PDFAnnoClosure() { function setupPushButton(annotation, item) { //button label: PDF Spec p.640 let mk = annotation.get('MK'); - item.value = mk.get('CA') || ''; + if(mk) { + item.value = mk.get('CA') || ''; + } //button action: url when mouse up: PDF Spec:p.642 item.FL = ""; From 3b2915fc99a15020e0001c3b0c806e5c40b5ee89 Mon Sep 17 00:00:00 2001 From: Craig McNeill Date: Thu, 28 Jan 2021 10:42:56 -0500 Subject: [PATCH 26/66] Changed how dropdown option with only a name is added to PL --- lib/pdffield.js | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/pdffield.js b/lib/pdffield.js index ca99981a..2ca90c28 100644 --- a/lib/pdffield.js +++ b/lib/pdffield.js @@ -220,11 +220,13 @@ let PDFField = (function PDFFieldClosure() { anData.w -= 0.5; //adjust combobox width anData.PL = {V: [], D: []}; _.each(field.value, function(ele, idx) { - if (!Array.isArray(ele)) { - ele = [ele, ele]; + if (Array.isArray(ele)) { + anData.PL.D.push(ele[0]); + anData.PL.V.push(ele[1]); + } else { + anData.PL.D.push(ele); + anData.PL.V.push(ele); } - anData.PL.D.push(ele[0]); - anData.PL.V.push(ele[1]); }); // add field value to the object From b07268381f9221a9697b62633941bfd957ca667e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 13 Mar 2021 02:14:18 +0000 Subject: [PATCH 27/66] build(deps): bump xmldom from 0.4.0 to 0.5.0 Bumps [xmldom](https://github.com/xmldom/xmldom) from 0.4.0 to 0.5.0. - [Release notes](https://github.com/xmldom/xmldom/releases) - [Changelog](https://github.com/xmldom/xmldom/blob/master/CHANGELOG.md) - [Commits](https://github.com/xmldom/xmldom/compare/0.4.0...0.5.0) Signed-off-by: dependabot[bot] --- package-lock.json | 6 +++--- package.json | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/package-lock.json b/package-lock.json index c06127c1..030ae295 100644 --- a/package-lock.json +++ b/package-lock.json @@ -34,9 +34,9 @@ "integrity": "sha1-o9XabNXAvAAI03I0u68b7WMFkQc=" }, "xmldom": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/xmldom/-/xmldom-0.4.0.tgz", - "integrity": "sha512-2E93k08T30Ugs+34HBSTQLVtpi6mCddaY8uO+pMNk1pqSjV5vElzn4mmh6KLxN3hki8rNcHSYzILoh3TEWORvA==" + "version": "0.5.0", + "resolved": "https://registry.npmjs.org/xmldom/-/xmldom-0.5.0.tgz", + "integrity": "sha512-Foaj5FXVzgn7xFzsKeNIde9g6aFBxTPi37iwsno8QvApmtg7KYrr+OPyRHcJF7dud2a5nGRBXK3n0dL62Gf7PA==" } } } diff --git a/package.json b/package.json index 175042bb..eb6c517b 100644 --- a/package.json +++ b/package.json @@ -40,7 +40,7 @@ "async": "^3.2.0", "lodash": "^4.17.20", "optimist": "^0.6.1", - "xmldom": "^0.4.0" + "xmldom": "^0.5.0" }, "devDependencies": {}, "bundledDependencies": [ From 256ef14c5d8a801c49e0c0c9e4acc9fa396421ac Mon Sep 17 00:00:00 2001 From: Alexander Wunschik Date: Tue, 6 Apr 2021 13:57:59 +0200 Subject: [PATCH 28/66] fix license type to be valid spdx identifier --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index eb6c517b..de059cf5 100644 --- a/package.json +++ b/package.json @@ -62,7 +62,7 @@ }, "licenses": [ { - "type": "Apache v2", + "type": " Apache-2.0", "url": "https://github.com/modesty/pdf2json/blob/master/license.txt" } ], From 995bc50019e8b7fbbf5641a22f2eb1fa23999d52 Mon Sep 17 00:00:00 2001 From: Simon Varney Date: Tue, 20 Apr 2021 11:22:53 +0200 Subject: [PATCH 29/66] Replace optimist with yargs --- lib/p2jcmd.js | 8 +-- package-lock.json | 122 ++++++++++++++++++++++++++++++++++++++++------ package.json | 4 +- 3 files changed, 114 insertions(+), 20 deletions(-) diff --git a/lib/p2jcmd.js b/lib/p2jcmd.js index 065c6453..703478ed 100644 --- a/lib/p2jcmd.js +++ b/lib/p2jcmd.js @@ -11,7 +11,7 @@ let nodeUtil = require("util"), const _PRO_TIMER = `${pkInfo.name}@${pkInfo.version} [${pkInfo.homepage}]`; -let optimist = require('optimist') +let yargs = require('yargs') .usage("\n" + _PRO_TIMER + "\n\nUsage: $0 -f|--file [-o|output_dir]") .alias('v', 'version') .describe('v', 'Display version.\n') @@ -32,7 +32,7 @@ let optimist = require('optimist') .alias('r', 'stream') .describe('r', '(optional) when specified, will process and parse with buffer/object transform stream rather than file system (Experimental).\n'); -const argv = optimist.argv; +const argv = yargs.argv; const VERBOSITY_LEVEL = (_.has(argv, 's') ? 0 : 5); const PROCESS_RAW_TEXT_CONTENT = _.has(argv, 'c'); @@ -265,11 +265,11 @@ let PDFProcessor = (function () { retVal = false; } else if (_.has(argv, 'h')) { - optimist.showHelp(); + yargs.showHelp(); retVal = false; } else if (!_.has(argv, 'f')) { - optimist.showHelp(); + yargs.showHelp(); console.log("-f is required to specify input directory or file."); retVal = false; } diff --git a/package-lock.json b/package-lock.json index 030ae295..563fb602 100644 --- a/package-lock.json +++ b/package-lock.json @@ -4,39 +4,133 @@ "lockfileVersion": 1, "requires": true, "dependencies": { + "ansi-regex": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.0.tgz", + "integrity": "sha512-bY6fj56OUQ0hU1KjFNDQuJFezqKdrAyFdIevADiqrWHwSlbmBNMHp5ak2f40Pm8JTFyM2mqxkG6ngkHO11f/lg==" + }, + "ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "requires": { + "color-convert": "^2.0.1" + } + }, "async": { "version": "3.2.0", "resolved": "https://artifact.devsnc.com/content/groups/npm-all/async/-/async-3.2.0.tgz", "integrity": "sha512-TR2mEZFVOj2pLStYxLht7TyfuRzaydfpxr3k9RpHIzMgw7A64dzsdqCxH1WJyQdoe8T10nDXd9wnEigmiuHIZw==" }, + "cliui": { + "version": "7.0.4", + "resolved": "https://registry.npmjs.org/cliui/-/cliui-7.0.4.tgz", + "integrity": "sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==", + "requires": { + "string-width": "^4.2.0", + "strip-ansi": "^6.0.0", + "wrap-ansi": "^7.0.0" + } + }, + "color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "requires": { + "color-name": "~1.1.4" + } + }, + "color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==" + }, + "emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==" + }, + "escalade": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz", + "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==" + }, + "get-caller-file": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", + "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==" + }, + "is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==" + }, "lodash": { "version": "4.17.20", "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.20.tgz", "integrity": "sha512-PlhdFcillOINfeV7Ni6oF1TAEayyZBoZ8bcshTHqOYJYlrqzRK5hagpagky5o4HfCzzd1TRkXPMFq6cKk9rGmA==" }, - "minimist": { - "version": "0.0.10", - "resolved": "https://artifact.devsnc.com/content/groups/npm-all/minimist/-/minimist-0.0.10.tgz", - "integrity": "sha1-3j+YVD2/lggr5IrRoMfNqDYwHc8=" + "require-directory": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", + "integrity": "sha1-jGStX9MNqxyXbiNE/+f3kqam30I=" + }, + "string-width": { + "version": "4.2.2", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.2.tgz", + "integrity": "sha512-XBJbT3N4JhVumXE0eoLU9DCjcaF92KLNqTmFCnG1pf8duUxFGwtP6AD6nkjw9a3IdiRtL3E2w3JDiE/xi3vOeA==", + "requires": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.0" + } }, - "optimist": { - "version": "0.6.1", - "resolved": "https://artifact.devsnc.com/content/groups/npm-all/optimist/-/optimist-0.6.1.tgz", - "integrity": "sha1-2j6nRob6IaGaERwybpDrFaAZZoY=", + "strip-ansi": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.0.tgz", + "integrity": "sha512-AuvKTrTfQNYNIctbR1K/YGTR1756GycPsg7b9bdV9Duqur4gv6aKqHXah67Z8ImS7WEz5QVcOtlfW2rZEugt6w==", "requires": { - "minimist": "~0.0.1", - "wordwrap": "~0.0.2" + "ansi-regex": "^5.0.0" } }, - "wordwrap": { - "version": "0.0.3", - "resolved": "https://artifact.devsnc.com/content/groups/npm-all/wordwrap/-/wordwrap-0.0.3.tgz", - "integrity": "sha1-o9XabNXAvAAI03I0u68b7WMFkQc=" + "wrap-ansi": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", + "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "requires": { + "ansi-styles": "^4.0.0", + "string-width": "^4.1.0", + "strip-ansi": "^6.0.0" + } }, "xmldom": { "version": "0.5.0", "resolved": "https://registry.npmjs.org/xmldom/-/xmldom-0.5.0.tgz", "integrity": "sha512-Foaj5FXVzgn7xFzsKeNIde9g6aFBxTPi37iwsno8QvApmtg7KYrr+OPyRHcJF7dud2a5nGRBXK3n0dL62Gf7PA==" + }, + "y18n": { + "version": "5.0.8", + "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", + "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==" + }, + "yargs": { + "version": "16.2.0", + "resolved": "https://registry.npmjs.org/yargs/-/yargs-16.2.0.tgz", + "integrity": "sha512-D1mvvtDG0L5ft/jGWkLpG1+m0eQxOfaBvTNELraWj22wSVUMWxZUvYgJYcKh6jGGIkJFhH4IZPQhR4TKpc8mBw==", + "requires": { + "cliui": "^7.0.2", + "escalade": "^3.1.1", + "get-caller-file": "^2.0.5", + "require-directory": "^2.1.1", + "string-width": "^4.2.0", + "y18n": "^5.0.5", + "yargs-parser": "^20.2.2" + } + }, + "yargs-parser": { + "version": "20.2.7", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.7.tgz", + "integrity": "sha512-FiNkvbeHzB/syOjIUxFDCnhSfzAL8R5vs40MgLFBorXACCOAEaWu0gRZl14vG8MR9AOJIZbmkjhusqBYZ3HTHw==" } } } diff --git a/package.json b/package.json index eb6c517b..b0dd430c 100644 --- a/package.json +++ b/package.json @@ -39,8 +39,8 @@ "dependencies": { "async": "^3.2.0", "lodash": "^4.17.20", - "optimist": "^0.6.1", - "xmldom": "^0.5.0" + "xmldom": "^0.5.0", + "yargs": "^16.2.0" }, "devDependencies": {}, "bundledDependencies": [ From 9c815f942c43076a144a4f84ab4e7d453e15d96e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 10 May 2021 11:37:36 +0000 Subject: [PATCH 30/66] build(deps): bump lodash from 4.17.20 to 4.17.21 Bumps [lodash](https://github.com/lodash/lodash) from 4.17.20 to 4.17.21. - [Release notes](https://github.com/lodash/lodash/releases) - [Commits](https://github.com/lodash/lodash/compare/4.17.20...4.17.21) Signed-off-by: dependabot[bot] --- package-lock.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/package-lock.json b/package-lock.json index 030ae295..8de8d4ee 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,9 +10,9 @@ "integrity": "sha512-TR2mEZFVOj2pLStYxLht7TyfuRzaydfpxr3k9RpHIzMgw7A64dzsdqCxH1WJyQdoe8T10nDXd9wnEigmiuHIZw==" }, "lodash": { - "version": "4.17.20", - "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.20.tgz", - "integrity": "sha512-PlhdFcillOINfeV7Ni6oF1TAEayyZBoZ8bcshTHqOYJYlrqzRK5hagpagky5o4HfCzzd1TRkXPMFq6cKk9rGmA==" + "version": "4.17.21", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", + "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==" }, "minimist": { "version": "0.0.10", From 57873776ec046d53567c9afea3e97137b10b0a67 Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Tue, 8 Jun 2021 12:28:53 -0700 Subject: [PATCH 31/66] update package.json, prep for release --- .gitignore | 2 ++ package.json | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 24aeaffc..0b361641 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,5 @@ node_modules/ target/ .idea +.npmrc + diff --git a/package.json b/package.json index c72aed93..77326bc6 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "pdf2json", - "version": "1.2.0", - "description": "A PDF file parser that converts PDF binaries to text based JSON, powered by porting a fork of PDF.JS to Node.js", + "version": "1.2.1", + "description": "PDF file parser that converts PDF binaries to text based JSON, powered by porting a fork of PDF.JS to Node.js", "keywords": [ "pdf", "pdf parser", From b0285525d418dd9cde9625ee9c37ee6df364ab13 Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Tue, 8 Jun 2021 12:38:07 -0700 Subject: [PATCH 32/66] release v1.2.2 --- package.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index 77326bc6..ec31eaa6 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "pdf2json", - "version": "1.2.1", + "version": "1.2.2", "description": "PDF file parser that converts PDF binaries to text based JSON, powered by porting a fork of PDF.JS to Node.js", "keywords": [ "pdf", @@ -62,7 +62,7 @@ }, "licenses": [ { - "type": " Apache-2.0", + "type": "Apache-2.0", "url": "https://github.com/modesty/pdf2json/blob/master/license.txt" } ], From 885d07365ef440f72f0a5700e1549ee0c6b36632 Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Sat, 12 Jun 2021 12:03:58 -0700 Subject: [PATCH 33/66] dependency updates --- package-lock.json | 14 +++++++------- package.json | 22 +++++++++------------- 2 files changed, 16 insertions(+), 20 deletions(-) diff --git a/package-lock.json b/package-lock.json index f067951b..5ac106de 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,6 @@ { "name": "pdf2json", - "version": "1.2.0", + "version": "1.2.2", "lockfileVersion": 1, "requires": true, "dependencies": { @@ -104,9 +104,9 @@ } }, "xmldom": { - "version": "0.5.0", - "resolved": "https://registry.npmjs.org/xmldom/-/xmldom-0.5.0.tgz", - "integrity": "sha512-Foaj5FXVzgn7xFzsKeNIde9g6aFBxTPi37iwsno8QvApmtg7KYrr+OPyRHcJF7dud2a5nGRBXK3n0dL62Gf7PA==" + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/xmldom/-/xmldom-0.6.0.tgz", + "integrity": "sha512-iAcin401y58LckRZ0TkI4k0VSM1Qg0KGSc3i8rU+xrxe19A/BN1zHyVSJY7uoutVlaTSzYyk/v5AmkewAP7jtg==" }, "y18n": { "version": "5.0.8", @@ -114,9 +114,9 @@ "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==" }, "yargs": { - "version": "16.2.0", - "resolved": "https://registry.npmjs.org/yargs/-/yargs-16.2.0.tgz", - "integrity": "sha512-D1mvvtDG0L5ft/jGWkLpG1+m0eQxOfaBvTNELraWj22wSVUMWxZUvYgJYcKh6jGGIkJFhH4IZPQhR4TKpc8mBw==", + "version": "17.0.1", + "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.0.1.tgz", + "integrity": "sha512-xBBulfCc8Y6gLFcrPvtqKz9hz8SO0l1Ni8GgDekvBX2ro0HRQImDGnikfc33cgzcYUSncapnNcZDjVFIH3f6KQ==", "requires": { "cliui": "^7.0.2", "escalade": "^3.1.1", diff --git a/package.json b/package.json index ec31eaa6..7a86e425 100644 --- a/package.json +++ b/package.json @@ -31,23 +31,23 @@ "test-misc": "node pdf2json.js -f ./test/pdf/misc/ -o ./test/target/misc/ -c -m" }, "engines": { - "node": ">=10.15.1" + "node": ">=12.20.0" }, "bin": { "pdf2json": "./bin/pdf2json" }, "dependencies": { "async": "^3.2.0", - "lodash": "^4.17.20", - "xmldom": "^0.5.0", - "yargs": "^16.2.0" + "lodash": "^4.17.21", + "xmldom": "^0.6.0", + "yargs": "^17.0.1" }, "devDependencies": {}, "bundledDependencies": [ - "xmldom", + "async", "lodash", - "optimist", - "async" + "xmldom", + "yargs" ], "maintainers": [ { @@ -60,11 +60,7 @@ "bugs": { "url": "http://github.com/modesty/pdf2json/issues" }, - "licenses": [ - { - "type": "Apache-2.0", - "url": "https://github.com/modesty/pdf2json/blob/master/license.txt" - } - ], + "private": false, + "license": "Apache-2.0", "readme": "https://github.com/modesty/pdf2json/blob/master/readme.md" } From 6a2d89409fce2560347d1ef6d222c1ee1624fbc8 Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Sat, 12 Jun 2021 12:24:18 -0700 Subject: [PATCH 34/66] publish v1.2.3 --- package-lock.json | 2 +- package.json | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/package-lock.json b/package-lock.json index 5ac106de..86aa9907 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,6 @@ { "name": "pdf2json", - "version": "1.2.2", + "version": "1.2.3", "lockfileVersion": 1, "requires": true, "dependencies": { diff --git a/package.json b/package.json index 7a86e425..59b74069 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "pdf2json", - "version": "1.2.2", + "version": "1.2.3", "description": "PDF file parser that converts PDF binaries to text based JSON, powered by porting a fork of PDF.JS to Node.js", "keywords": [ "pdf", @@ -60,7 +60,6 @@ "bugs": { "url": "http://github.com/modesty/pdf2json/issues" }, - "private": false, "license": "Apache-2.0", "readme": "https://github.com/modesty/pdf2json/blob/master/readme.md" } From 909bbb34ecb927ea9dda28d69e1a34de90f1283c Mon Sep 17 00:00:00 2001 From: dumdam <58562199+dumdam@users.noreply.github.com> Date: Thu, 15 Jul 2021 14:27:03 +0530 Subject: [PATCH 35/66] Fix "Callback must be a function" error Fixing "TypeError [ERR_INVALID_CALLBACK]: Callback must be a function. Received undefined". Issues: #169 and #191 --- readme.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/readme.md b/readme.md index 54d489b3..8c94fc21 100644 --- a/readme.md +++ b/readme.md @@ -62,7 +62,7 @@ Or, call directly with buffer: pdfParser.on("pdfParser_dataError", errData => console.error(errData.parserError) ); pdfParser.on("pdfParser_dataReady", pdfData => { - fs.writeFile("./pdf2json/test/F1040EZ.content.txt", pdfParser.getRawTextContent()); + fs.writeFile("./pdf2json/test/F1040EZ.content.txt", pdfParser.getRawTextContent(), ()=>{console.log("Done.");}); }); pdfParser.loadPDF("./pdf2json/test/pdf/fd/form/F1040EZ.pdf"); @@ -78,7 +78,7 @@ Or, call directly with buffer: pdfParser.on("pdfParser_dataError", errData => console.error(errData.parserError) ); pdfParser.on("pdfParser_dataReady", pdfData => { - fs.writeFile("./pdf2json/test/F1040EZ.fields.json", JSON.stringify(pdfParser.getAllFieldsTypes())); + fs.writeFile("./pdf2json/test/F1040EZ.fields.json", JSON.stringify(pdfParser.getAllFieldsTypes()), ()=>{console.log("Done.");}); }); pdfParser.loadPDF("./pdf2json/test/pdf/fd/form/F1040EZ.pdf"); From a857928d109f25fabbdd8b6925a716aad35698c7 Mon Sep 17 00:00:00 2001 From: Craig Date: Fri, 13 Nov 2020 18:28:59 -0500 Subject: [PATCH 36/66] Fixed infinite loop with paintFormXObjectEnd --- base/display/canvas.js | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/base/display/canvas.js b/base/display/canvas.js index 25dcee41..9089d7b4 100755 --- a/base/display/canvas.js +++ b/base/display/canvas.js @@ -365,7 +365,6 @@ var CanvasExtraState = (function CanvasExtraStateClosure() { this.fillAlpha = 1; this.strokeAlpha = 1; this.lineWidth = 1; - this.paintFormXObjectDepth = 0; this.old = old; } @@ -1516,7 +1515,6 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { paintFormXObjectBegin: function CanvasGraphics_paintFormXObjectBegin(matrix, bbox) { this.save(); - this.current.paintFormXObjectDepth++; this.baseTransformStack.push(this.baseTransform); if (matrix && isArray(matrix) && 6 == matrix.length) @@ -1534,12 +1532,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { }, paintFormXObjectEnd: function CanvasGraphics_paintFormXObjectEnd() { - var depth = this.current.paintFormXObjectDepth; - do { - this.restore(); - // some pdf don't close all restores inside object - // closing those for them - } while (this.current.paintFormXObjectDepth >= depth); + this.restore(); this.baseTransform = this.baseTransformStack.pop(); }, From 368f12b5c5c1068690fdeb4d5c901ef5180faf16 Mon Sep 17 00:00:00 2001 From: Craig McNeill Date: Wed, 28 Jul 2021 11:29:38 -0400 Subject: [PATCH 37/66] Warn instead of error when name token longer than spec --- base/core/parser.js | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/base/core/parser.js b/base/core/parser.js index bb2f1783..06f3e3e0 100755 --- a/base/core/parser.js +++ b/base/core/parser.js @@ -534,9 +534,8 @@ var Lexer = (function LexerClosure() { str += String.fromCharCode(ch); } } - if (str.length > 128) { - error('Warning: name token is longer than allowed by the spec: ' + - str.length); + if (str.length > 127) { + warn('Name token is longer than allowed by the spec: ' + str.length); } return new Name(str); }, From 7cb6ad6fc287f9b3399765dd8771e82548efdc15 Mon Sep 17 00:00:00 2001 From: Craig McNeill Date: Thu, 29 Jul 2021 09:58:16 -0400 Subject: [PATCH 38/66] Safer paintFormXObjectEnd --- base/display/canvas.js | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/base/display/canvas.js b/base/display/canvas.js index 9089d7b4..59e7cab3 100755 --- a/base/display/canvas.js +++ b/base/display/canvas.js @@ -365,6 +365,7 @@ var CanvasExtraState = (function CanvasExtraStateClosure() { this.fillAlpha = 1; this.strokeAlpha = 1; this.lineWidth = 1; + this.paintFormXObjectDepth = 0; this.old = old; } @@ -1515,6 +1516,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { paintFormXObjectBegin: function CanvasGraphics_paintFormXObjectBegin(matrix, bbox) { this.save(); + this.current.paintFormXObjectDepth++; this.baseTransformStack.push(this.baseTransform); if (matrix && isArray(matrix) && 6 == matrix.length) @@ -1532,7 +1534,13 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { }, paintFormXObjectEnd: function CanvasGraphics_paintFormXObjectEnd() { - this.restore(); + var depth = this.current.paintFormXObjectDepth; + do { + this.restore(); + this.current.paintFormXObjectDepth--; + // some pdf don't close all restores inside object + // closing those for them + } while (this.current.paintFormXObjectDepth >= depth); this.baseTransform = this.baseTransformStack.pop(); }, From 75a3f0eac00750426c92d87bc960f2355028e740 Mon Sep 17 00:00:00 2001 From: h3ku Date: Fri, 20 Aug 2021 13:49:48 +0200 Subject: [PATCH 39/66] Update xmldom to 0.7.1 --- lib/pdf.js | 2 +- lib/ptixmlinject.js | 2 +- package-lock.json | 10 +++++----- package.json | 4 ++-- readme.md | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/pdf.js b/lib/pdf.js index d5856a06..e213b178 100644 --- a/lib/pdf.js +++ b/lib/pdf.js @@ -4,7 +4,7 @@ let nodeUtil = require("util"), nodeEvents = require("events"), fs = require('fs'), _ = require('lodash'), - DOMParser = require('xmldom').DOMParser, + DOMParser = require('@xmldom/xmldom').DOMParser, PDFCanvas = require('./pdfcanvas.js'), PDFUnit = require('./pdfunit.js'), PDFField = require('./pdffield.js'), diff --git a/lib/ptixmlinject.js b/lib/ptixmlinject.js index e2977f96..6ef87fdf 100644 --- a/lib/ptixmlinject.js +++ b/lib/ptixmlinject.js @@ -4,7 +4,7 @@ var nodeUtil = require("util"), nodeEvents = require("events"), fs = require('fs'), _ = require('lodash'), -DOMParser = require('xmldom').DOMParser, +DOMParser = require('@xmldom/xmldom').DOMParser, PDFCanvas = require('./pdfcanvas.js'), PDFUnit = require('./pdfunit.js'), PDFField = require('./pdffield.js'), diff --git a/package-lock.json b/package-lock.json index 86aa9907..acb6cca9 100644 --- a/package-lock.json +++ b/package-lock.json @@ -4,6 +4,11 @@ "lockfileVersion": 1, "requires": true, "dependencies": { + "@xmldom/xmldom": { + "version": "0.7.1", + "resolved": "https://registry.npmjs.org/@xmldom/xmldom/-/xmldom-0.7.1.tgz", + "integrity": "sha512-EOzJBMOjJ657nmlTt5RsyEwJrMTMu0aX15pI96GmpyFPj33a9J4mkcEk0KqYGplqInQ6JsPUxv/R25jR+I5ADA==" + }, "ansi-regex": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.0.tgz", @@ -103,11 +108,6 @@ "strip-ansi": "^6.0.0" } }, - "xmldom": { - "version": "0.6.0", - "resolved": "https://registry.npmjs.org/xmldom/-/xmldom-0.6.0.tgz", - "integrity": "sha512-iAcin401y58LckRZ0TkI4k0VSM1Qg0KGSc3i8rU+xrxe19A/BN1zHyVSJY7uoutVlaTSzYyk/v5AmkewAP7jtg==" - }, "y18n": { "version": "5.0.8", "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", diff --git a/package.json b/package.json index 59b74069..6ebe9942 100644 --- a/package.json +++ b/package.json @@ -39,14 +39,14 @@ "dependencies": { "async": "^3.2.0", "lodash": "^4.17.21", - "xmldom": "^0.6.0", + "@xmldom/xmldom": "^0.7.0", "yargs": "^17.0.1" }, "devDependencies": {}, "bundledDependencies": [ "async", "lodash", - "xmldom", + "@xmldom/xmldom", "yargs" ], "maintainers": [ diff --git a/readme.md b/readme.md index 8c94fc21..502f46da 100644 --- a/readme.md +++ b/readme.md @@ -610,7 +610,7 @@ In order to run pdf.js in Node.js, we have to address those dependencies and als * pdf.js' global objects (like PDFJS and globalScope) need to be wrapped in a node module's scope * API Dependencies * XHR Level 2: I don't need XMLHttpRequest to load PDF asynchronously in node.js, so replaced it with node's fs (File System) to load PDF file based on request parameters; - * DOMParser: pdf.js instantiates DOMParser to parse XML based PDF meta data, I used xmldom node module to replace this browser JS library dependency. xmldom can be found at https://github.com/jindw/xmldom; + * DOMParser: pdf.js instantiates DOMParser to parse XML based PDF meta data, I used xmldom node module to replace this browser JS library dependency. xmldom can be found at https://github.com/xmldom/xmldom; * Web Worker: pdf.js has "fake worker" code built in, not much works need to be done, only need to stay aware the parsing would occur in the same thread, not in background worker thread; * Canvas: in order to keep pdf.js code intact as much as possible, I decided to create a HTML5 Canvas API implementation in a node module. It's named as 'PDFCanvas' and has the same API as HTML5 Canvas does, so no change in pdf.js' canvas.js file, we just need to replace the browser's Canvas API with PDFCanvas. This way, when 2D context API invoked, PDFCanvas just write it to a JS object based on the json format above, rather than drawing graphics on html5 canvas; * Extend/Modify pdf.js From 08aa555d72bf90d0a690f269ce23475cf8edb88e Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Sat, 4 Sep 2021 13:11:51 -0700 Subject: [PATCH 40/66] publish @1.2.4 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 6ebe9942..f0f237c7 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "pdf2json", - "version": "1.2.3", + "version": "1.2.4", "description": "PDF file parser that converts PDF binaries to text based JSON, powered by porting a fork of PDF.JS to Node.js", "keywords": [ "pdf", From fa5105d124de3a81778a431109b0860155b94f5a Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Sun, 19 Sep 2021 14:18:27 -0700 Subject: [PATCH 41/66] fix issue #243, also add test PDF for best error handling --- base/core/obj.js | 1 + lib/p2jcmd.js | 39 ++-- package-lock.json | 243 ++++++++++++++++++++++- package.json | 10 +- test/pdf/misc/i243_problem_file_anon.pdf | Bin 0 -> 4280 bytes 5 files changed, 256 insertions(+), 37 deletions(-) create mode 100644 test/pdf/misc/i243_problem_file_anon.pdf diff --git a/base/core/obj.js b/base/core/obj.js index 171ea100..15951e92 100755 --- a/base/core/obj.js +++ b/base/core/obj.js @@ -1001,6 +1001,7 @@ var XRef = (function XRefClosure() { throw e; } log('(while reading XRef): ' + e); + error(e); } if (recoveryMode) diff --git a/lib/p2jcmd.js b/lib/p2jcmd.js index 703478ed..a4a42992 100644 --- a/lib/p2jcmd.js +++ b/lib/p2jcmd.js @@ -168,7 +168,7 @@ let PDF2JSONUtil = (function () { // public, this instance copies this.inputDir = path.normalize(inputDir); this.inputFile = inputFile; - this.inputPath = this.inputDir + path.sep + this.inputFile; + this.inputPath = path.join(this.inputDir, this.inputFile); this.outputDir = path.normalize(argv.o || inputDir); this.outputFile = null; @@ -305,10 +305,9 @@ let PDFProcessor = (function () { } }; - cls.prototype.complete = function(err) { - let statusMsg = "\n%d input files\t%d success\t%d fail\t%d warning."; - console.log(statusMsg, this.inputCount, this.successCount, this.failedCount, this.warningCount); - + cls.prototype.complete = function(statusMsg) { + console.log(`\n${this.inputCount} input files\t${this.successCount} success\t${this.failedCount} fail\t${this.warningCount} warning.`); + console.log(statusMsg); process.nextTick( () => { console.timeEnd(_PRO_TIMER); //let exitCode = (this.inputCount === this.successCount) ? 0 : 1; @@ -326,28 +325,26 @@ let PDFProcessor = (function () { }; cls.prototype.processFiles = function(inputDir, files) { - let fId = 0; + let fId = 0; + this.processStatusMsg = []; this.p2j = new PDF2JSONUtil(inputDir, files[fId], this); this.p2j.processFile( function processPDFFile(err) { - if (err) { - this.complete(err); + this.processStatusMsg.push(err ? `✗ ${err} - ${files[fId]}` : `✓ Parse Success - ${files[fId]}`); + + fId++; + if (fId >= this.inputCount) { + this.complete(this.processStatusMsg); } else { - fId++; - if (fId >= this.inputCount) { - this.complete(null); + if (this.p2j) { + this.p2j.destroy(); + this.p2j = null; } - else { - if (this.p2j) { - this.p2j.destroy(); - this.p2j = null; - } - this.p2j = new PDF2JSONUtil(inputDir, files[fId], this); - this.p2j.processFile(processPDFFile.bind(this)); - } - } - }.bind(this)); + this.p2j = new PDF2JSONUtil(inputDir, files[fId], this); + this.p2j.processFile(processPDFFile.bind(this)); + } + }.bind(this) ); }; cls.prototype.processOneDirectory = function () { diff --git a/package-lock.json b/package-lock.json index acb6cca9..c06948ef 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,13 +1,234 @@ { "name": "pdf2json", - "version": "1.2.3", - "lockfileVersion": 1, + "version": "1.2.4", + "lockfileVersion": 2, "requires": true, + "packages": { + "": { + "name": "pdf2json", + "version": "1.2.4", + "bundleDependencies": [ + "async", + "lodash", + "@xmldom/xmldom", + "yargs" + ], + "license": "Apache-2.0", + "dependencies": { + "@xmldom/xmldom": "^0.7.5", + "async": "^3.2.1", + "lodash": "^4.17.21", + "yargs": "^17.1.1" + }, + "bin": { + "pdf2json": "bin/pdf2json" + }, + "devDependencies": {}, + "engines": { + "node": ">=12.20.0" + } + }, + "node_modules/@xmldom/xmldom": { + "version": "0.7.5", + "resolved": "https://registry.npmjs.org/@xmldom/xmldom/-/xmldom-0.7.5.tgz", + "integrity": "sha512-V3BIhmY36fXZ1OtVcI9W+FxQqxVLsPKcNjWigIaa81dLC9IolJl5Mt4Cvhmr0flUnjSpTdrbMTSbXqYqV5dT6A==", + "inBundle": true, + "engines": { + "node": ">=10.0.0" + } + }, + "node_modules/ansi-regex": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.0.tgz", + "integrity": "sha512-bY6fj56OUQ0hU1KjFNDQuJFezqKdrAyFdIevADiqrWHwSlbmBNMHp5ak2f40Pm8JTFyM2mqxkG6ngkHO11f/lg==", + "inBundle": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "inBundle": true, + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/async": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/async/-/async-3.2.1.tgz", + "integrity": "sha512-XdD5lRO/87udXCMC9meWdYiR+Nq6ZjUfXidViUZGu2F1MO4T3XwZ1et0hb2++BgLfhyJwy44BGB/yx80ABx8hg==", + "inBundle": true + }, + "node_modules/cliui": { + "version": "7.0.4", + "resolved": "https://registry.npmjs.org/cliui/-/cliui-7.0.4.tgz", + "integrity": "sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==", + "inBundle": true, + "dependencies": { + "string-width": "^4.2.0", + "strip-ansi": "^6.0.0", + "wrap-ansi": "^7.0.0" + } + }, + "node_modules/color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "inBundle": true, + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", + "inBundle": true + }, + "node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "inBundle": true + }, + "node_modules/escalade": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz", + "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==", + "inBundle": true, + "engines": { + "node": ">=6" + } + }, + "node_modules/get-caller-file": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", + "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", + "inBundle": true, + "engines": { + "node": "6.* || 8.* || >= 10.*" + } + }, + "node_modules/is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "inBundle": true, + "engines": { + "node": ">=8" + } + }, + "node_modules/lodash": { + "version": "4.17.21", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", + "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==", + "inBundle": true + }, + "node_modules/require-directory": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", + "integrity": "sha1-jGStX9MNqxyXbiNE/+f3kqam30I=", + "inBundle": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/string-width": { + "version": "4.2.2", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.2.tgz", + "integrity": "sha512-XBJbT3N4JhVumXE0eoLU9DCjcaF92KLNqTmFCnG1pf8duUxFGwtP6AD6nkjw9a3IdiRtL3E2w3JDiE/xi3vOeA==", + "inBundle": true, + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-ansi": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.0.tgz", + "integrity": "sha512-AuvKTrTfQNYNIctbR1K/YGTR1756GycPsg7b9bdV9Duqur4gv6aKqHXah67Z8ImS7WEz5QVcOtlfW2rZEugt6w==", + "inBundle": true, + "dependencies": { + "ansi-regex": "^5.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/wrap-ansi": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", + "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "inBundle": true, + "dependencies": { + "ansi-styles": "^4.0.0", + "string-width": "^4.1.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/y18n": { + "version": "5.0.8", + "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", + "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", + "inBundle": true, + "engines": { + "node": ">=10" + } + }, + "node_modules/yargs": { + "version": "17.1.1", + "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.1.1.tgz", + "integrity": "sha512-c2k48R0PwKIqKhPMWjeiF6y2xY/gPMUlro0sgxqXpbOIohWiLNXWslsootttv7E1e73QPAMQSg5FeySbVcpsPQ==", + "inBundle": true, + "dependencies": { + "cliui": "^7.0.2", + "escalade": "^3.1.1", + "get-caller-file": "^2.0.5", + "require-directory": "^2.1.1", + "string-width": "^4.2.0", + "y18n": "^5.0.5", + "yargs-parser": "^20.2.2" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/yargs-parser": { + "version": "20.2.7", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.7.tgz", + "integrity": "sha512-FiNkvbeHzB/syOjIUxFDCnhSfzAL8R5vs40MgLFBorXACCOAEaWu0gRZl14vG8MR9AOJIZbmkjhusqBYZ3HTHw==", + "inBundle": true, + "engines": { + "node": ">=10" + } + } + }, "dependencies": { "@xmldom/xmldom": { - "version": "0.7.1", - "resolved": "https://registry.npmjs.org/@xmldom/xmldom/-/xmldom-0.7.1.tgz", - "integrity": "sha512-EOzJBMOjJ657nmlTt5RsyEwJrMTMu0aX15pI96GmpyFPj33a9J4mkcEk0KqYGplqInQ6JsPUxv/R25jR+I5ADA==" + "version": "0.7.5", + "resolved": "https://registry.npmjs.org/@xmldom/xmldom/-/xmldom-0.7.5.tgz", + "integrity": "sha512-V3BIhmY36fXZ1OtVcI9W+FxQqxVLsPKcNjWigIaa81dLC9IolJl5Mt4Cvhmr0flUnjSpTdrbMTSbXqYqV5dT6A==" }, "ansi-regex": { "version": "5.0.0", @@ -23,9 +244,9 @@ } }, "async": { - "version": "3.2.0", - "resolved": "https://artifact.devsnc.com/content/groups/npm-all/async/-/async-3.2.0.tgz", - "integrity": "sha512-TR2mEZFVOj2pLStYxLht7TyfuRzaydfpxr3k9RpHIzMgw7A64dzsdqCxH1WJyQdoe8T10nDXd9wnEigmiuHIZw==" + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/async/-/async-3.2.1.tgz", + "integrity": "sha512-XdD5lRO/87udXCMC9meWdYiR+Nq6ZjUfXidViUZGu2F1MO4T3XwZ1et0hb2++BgLfhyJwy44BGB/yx80ABx8hg==" }, "cliui": { "version": "7.0.4", @@ -114,9 +335,9 @@ "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==" }, "yargs": { - "version": "17.0.1", - "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.0.1.tgz", - "integrity": "sha512-xBBulfCc8Y6gLFcrPvtqKz9hz8SO0l1Ni8GgDekvBX2ro0HRQImDGnikfc33cgzcYUSncapnNcZDjVFIH3f6KQ==", + "version": "17.1.1", + "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.1.1.tgz", + "integrity": "sha512-c2k48R0PwKIqKhPMWjeiF6y2xY/gPMUlro0sgxqXpbOIohWiLNXWslsootttv7E1e73QPAMQSg5FeySbVcpsPQ==", "requires": { "cliui": "^7.0.2", "escalade": "^3.1.1", diff --git a/package.json b/package.json index f0f237c7..f9343d65 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "pdf2json", - "version": "1.2.4", + "version": "1.2.5", "description": "PDF file parser that converts PDF binaries to text based JSON, powered by porting a fork of PDF.JS to Node.js", "keywords": [ "pdf", @@ -28,7 +28,7 @@ "main": "./pdfparser.js", "scripts": { "test": "cd ./test && sh p2j.forms.sh", - "test-misc": "node pdf2json.js -f ./test/pdf/misc/ -o ./test/target/misc/ -c -m" + "test-misc": "node pdf2json.js -f ./test/pdf/misc/ -o ./test/target/misc/ -c -m && echo \"\nExpected: 3 success, 2 failure \" " }, "engines": { "node": ">=12.20.0" @@ -37,10 +37,10 @@ "pdf2json": "./bin/pdf2json" }, "dependencies": { - "async": "^3.2.0", + "async": "^3.2.1", "lodash": "^4.17.21", - "@xmldom/xmldom": "^0.7.0", - "yargs": "^17.0.1" + "@xmldom/xmldom": "^0.7.5", + "yargs": "^17.1.1" }, "devDependencies": {}, "bundledDependencies": [ diff --git a/test/pdf/misc/i243_problem_file_anon.pdf b/test/pdf/misc/i243_problem_file_anon.pdf new file mode 100644 index 0000000000000000000000000000000000000000..d4652dc303a8451ac648ccfbd757b6e516f37ae1 GIT binary patch literal 4280 zcmZ{o*A}YUl7{bj3IhfVBtcM22#Sa(A_hc2K~M~Upl6)!i+=WYt)0$ZyPFFK{M7ub zW{oM6kAlLF>ljP`SO(2R&JE~v0EA1t(>qEs!}n&L7wNGfn1OO6Hsd|MHUWNG*6j zzMkJR_Vd2}Rm>k{KtQhtc;Wd+&E%i8BQr0&ysw$&S5X0E96a9N1O~#cI8mfY-gBd~ zmqEslZ^6G^m_K~d_jkrmKyY#h&vQ0*0!kcSFn;j=5hkG9F4k6;SFUAfB`p|M|?rmvl~}-6m_%vr zLLm&0_zE~7#TYL?@RHxt`{|HNHj2>|<;Wc)?)WfBp$E|lMbWViHL_n!SVP}6&a{}K zqB{EARPYX(HxZ!#`!p$m)AP{qZ`1Xl^n}L6^7)hXB9~2wmk|jntI#+XY1xXg%WgKy zHWlKh&8un|FRQk=^?YYnOS0*(X8~I%-S)Eg)cQ=9%f9NPJc6;;>~P%~7vwI&5YM8n z^oKcpR)jnLeRZ>}XIC2w4|-4K;KO9*gm9~AdW^FImT$wU6bMFi7}dHNud*e}QJ z@YpdbDC6KC56BKp(Hm$7?XY?a8?e=SnCGHHtt;~dJbc$?JMmq&r9OA)d8?<|{*0h$ zdv@CuZh7q~O*yxCniEu`{ejs&&?&K0jj0~7^TD%E!y@l4y6SM;GpSU7#i?&~m+NCB zbPiK{<=HdWN!)x)N<`r>>|KrsjihkG?=Y_kuysc%Np(qE_@cOC zaJE?6dRkmJ1l8f|O?4mLDwVE!h3lI#*PlMT@zLb#4DytN92^#>S!@@t&1WDpO`!SA z!;|jt)xEAeF(zn3XI+eEw4<2IbXw}m&icJYs@wZ)KMUK9La)BoUv|FN30e_T$zu}| z)rFDY6+B4;#D2TvjiS-!1g`AHaOAyg`iZ^`j@#1hc6pr@M#3(bi33ikR;HVC52_gG zv=X5(bs#}v10QOKEnU7D9tVt+vLr7jQw6u4p(zic#%NU_5)wuWYho8s+&s>vhO)ji zX{)KtS>?=BSOUzdO{=%<%%sAAy*F8QbUM5NZ`B?bnF8;PC``IU{)$y+6c=B0>S*;F ze6A|kRC8t3F6E(iy52CWvr4bb4pC+s)VH=a(&LSI>c{Fh=j*9qHXHa0AQdCezwYECu=&76{eI>XT@# zUf{twK<2XaM5|A#?eI}ZT;6>2@yNA`x#hIS*IMqa{jBYeO0K0E?>jI1^@23ZaZg<1 zPpDU*k9zSfeg>@)mGD5*)s}(FpEY^YY(+Ur9bG;f2IAVwxjvPH9=Jcahsb_oQuIb= zr~Yy?PJKr3)mr{WuNQC(o&_B2$Vs>dh|6+D8vPpl+Q<@@T!-Zbr>+?fpLUWatmtF3 zH9yU$QU0mqS>re^z8!vkWzg39HT00|hfqT-TAT+C60e$q!t#Vl(TyU%M3j-*EDPz_CnODm-@ z0hCErP?+~=QH0ygc*#sIE#KHMbz@}p9vFA(*33=LzZH;9O>1=0Ab1T|JA9|OFKXVM zs@aYlGkxmyndl~9ZIeCmsPzNiD3(ULrOIt%*%%I?Wg#@0;_|pZ;QPKlB1c_)P^BZd z{yp2@I|+H9($Y80i|jC^nx8BsAS&{O-3e#Sl9Gj0!h0H8vZTuQLAN9eeog?-X)nYNSfZtG%`?_7e7X!dmf!P~m9`&&jXVs!9ZYsJ5+oJ(e5glDJMHz~dL- zxvA4Ttvzf!TmDfAxA&4*HsoL$p}P{+p>t2=0u}{P(F{wSRUxHTrmI}X@3Wy@P&NEB zQi=P!Qv}qixSijc3lg=(?b{5_o0#P+n0TIM^@)VJokAI&h>O}wn!FI%Z)1b7982>{ z8Qey!-8nuisXLPl&0bPKl4thV#X7RdIaKt*1+DfXyja=GNtPFi#YwBOv6y!YOg7J& zo>jh2sFA8j2wx;k-t1)0Ex$K5$3kRBJ9`2t`=69E1Y%g(ST0(r!IyrwTr4lNm?A!MHieo>(}SFsOMJvG5((( zP}noBv*Fl6Jn+&*j#-gzv$Qe01tG$mjmpg17iLe@9>XF*cfzAHgM(;}go<3_v*C(f z^_P1Ev8qUG*!JMJDwaX?JW@15H66g;r; zZCVjpE{cay17^3#@OYWZubw0Ts6Y5Do*vT8-3Ph+zy}A;;M9ywuU^u9H%|8;GMPir z2RmFrvt_)?b(MkJ^i2$V&-0zwy_qGRdo(L!$OFt2ePTF$qu@HM3zS+}R3EbQ+Hl`L z)pA}R<|J8;*y;YLfkoS94vPvq-rIHXkTdGLFtXn9t(4vCvoZ4oG~QhC?urQ}?KL;k z*Hqana8A22d|Y;OLh9{v_f1r@{PMjzzzX)GYiRi=D+Qwp{}GMq0_svTb}1gp4GI`* z-(4^mmF(-4x=n_nQ}|p%`cVNBFjh{pRo)9cCiMFu9aO(>8;Mi{3)$qPvFn>BqkX&U z-6z>*9k&ny=z??KB?X^|=)|g2Y9FfF(FQYqKGxc$VeM|ogV~%bITafjO3dW6sR{KD z9q`#YYTron25v57FSUp|iUF^KX%D_@Ha=LhFrtuS^{=%#u*MA`5Zv%eD0f-cw(~OY;ek4&dt)IWx4|_FZejzdX zgzrq_KX~omiRS}o1}6dZ&m+?$$p)G6Gk#-S+I zqW&584?mP@{=*Mz5yTfU|5gk7ZREEff7co~{>A9OYfY^2?Zn?TilD!(`n%R_G``UK zca0*N|Hwhn#IN(fX!4(a^mhaJ)vxuX@?W&?-0)xLhP9gi=pRnC=wEw^qXhY@AC6Mk zuYNe)Z2iaBck+FJc*)N%4Xfb4y&vlUQKRUc`MG2OL=62X^W%Gj`Wu^{lK0OQgVQvD L0foY7F$Vq@`&XXT literal 0 HcmV?d00001 From 231cfdf071326367c6c6f4ff117c888cc2ea1c91 Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Sun, 19 Sep 2021 14:36:19 -0700 Subject: [PATCH 42/66] fix issue #221, also add i221_tianjin_invoice.pdf to test --- base/display/metadata.js | 2 +- package.json | 2 +- test/pdf/misc/i221_tianjin_invoice.pdf | Bin 0 -> 39274 bytes 3 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 test/pdf/misc/i221_tianjin_invoice.pdf diff --git a/base/display/metadata.js b/base/display/metadata.js index 029ad77f..6f62537d 100755 --- a/base/display/metadata.js +++ b/base/display/metadata.js @@ -57,7 +57,7 @@ var Metadata = PDFJS.Metadata = (function MetadataClosure() { var doc = this.metaDocument; var rdf = doc.documentElement; - if (rdf.nodeName.toLowerCase() !== 'rdf:rdf') { // Wrapped in + if (rdf && rdf.nodeName.toLowerCase() !== 'rdf:rdf') { // Wrapped in rdf = rdf.firstChild; while (rdf && rdf.nodeName.toLowerCase() !== 'rdf:rdf') rdf = rdf.nextSibling; diff --git a/package.json b/package.json index f9343d65..86087bf5 100644 --- a/package.json +++ b/package.json @@ -28,7 +28,7 @@ "main": "./pdfparser.js", "scripts": { "test": "cd ./test && sh p2j.forms.sh", - "test-misc": "node pdf2json.js -f ./test/pdf/misc/ -o ./test/target/misc/ -c -m && echo \"\nExpected: 3 success, 2 failure \" " + "test-misc": "node pdf2json.js -f ./test/pdf/misc/ -o ./test/target/misc/ -c -m && echo \"\nExpected: 4 success, 2 failure \" " }, "engines": { "node": ">=12.20.0" diff --git a/test/pdf/misc/i221_tianjin_invoice.pdf b/test/pdf/misc/i221_tianjin_invoice.pdf new file mode 100644 index 0000000000000000000000000000000000000000..adf695a2282d557d2f95f8100448c7ece2d32c15 GIT binary patch literal 39274 zcmb@t1yo!?lQxXITX1(BU~u=~?#|%u8r&ghaCdii4H7&MJPe*da0qTc$-dw2dv^EV z-A{VXncLH+Zr!K)sp_hGZx4-%q%;dVD;E;Y&hG325+^x;+{w%qNl=hY!^hczP2I+d zO~uv8*;w4e#@_sGRLt4=?OVyz!Gc~^U4lXQt+Awa{xF103bULJ2wCbWM=~K(g6T;3~bUi_U;z0Y+~k4W)`d}D)I_8?r)d# zF#E^2srlQmHy419)z!_^*v;CMolU~R)m@+c4WODKo4Aj=g_^0Ol?AyzfSiMulbj34 zN6yL1!@)&vNdAA-*d&}B-7Os5-N*$2KwfrU01u}H7ZAY5Zf?nGZVoc#PI6)SCfH&w|ARr$fFP{Yn$YI98%?aQJSXi=~g3S5&EZO;Zc!3sX z9A-dHK6aob50Hb~9LUKHGUwt0nwpxM^O57F@h$>^v4cW)|FLTwDP5 zH^O*9W~OFlyq0fP0|0LvBUw1U5oL?S{qOukN7>BQ0_6UOIQKvPBJJer@a7M0P9Clx z3%55<`G>$;&k;Zl{6ij_BL_LxpUz+35f^vzexndT&d$!s%E?F03uNWtHvFsSj{}mk zvniUoyV`h@|7HG7>`fG4$fjW7XytBA&I$Zqek5&g>TV%v0dg|85EgcGceOBeK=RIE z(g0`xAjAoLYPqZmLiNsthvm`JaG%=(A$KMec>@t*_WzccX7{r}Ab zz#Bfm8@fNU@IOG$&ih}ZSJ!k#^qi2Ln*0F;fu;>L`ybKr{BNTDCpx6n6v_Xv`Tv3j z4!-}|kbkD(3~#UcFAuN~iczqm|IwyDzVUy|!2gL8{WmN?L%cP}|A>>FtgPMtcK;C? zJls5QOPRfstGcr($bwDM!qWz1p(Z2#*83AADyFUuf8tZz#@$WD!d1e_!P&|2ZP_E| zeOvD>%xysKPOjwaTz^z}I5@if)%;hT8#(76pZ|}i{bS+&PXlr7WKGQRGdTv#!1HT4 z&rjl_I7J?bolBS06;TSGgNq~04sd@*-OKahQTb!}k5-P&Vm9ydtwBEoBTyq@Y+sX( z_StHOvaSEtU?@Dg5ggk!l(CPN?E>&u&^KP~B?3~xH|gBib~T*iFj>ffH<`B)uK`b1 zUxAE>zdx+DTcP)JrBrqsUZd)u{)j(=dPds;0h=uc=UZYS@NA?rWkwC^Bl5P2WlLA< z_DU9kfHEw3GG)Hz0KR5oG{k* zpC_RIjGYC0Gr@x^A-t@36MY@4f=8lbnprTGoW zJ)r@3wiSfwMeNDs#f*%3S%zCsJW=N_!%= zT8H%Aw%@f?OW4gKrXuEgOZC!)3U}fc`t- z8^R6k$ibC>F7}rj=|B1ey)#>F6)sySDHlr|B9|LO`Tks6h}g->h5fp4#^#j>xorCn zgJACqjzw$#$husyK^<+#zo_HWk&LwAavAZ;kRnG-`{y--{%X7!GOwby2HF#CyOx>+ z{~bz7>`IDlJ6lC7EB^uecOFh7XX2_G`PSap`??MZ``2P3pJmownRVTx2{-lZR#JuG zKi2iU=k&Cliz{>49RNE@{>xCNlg?V)>gcwA)z_};f1zn?=j20UJ~6`QQ|Q0?Uq17( zr55mr|6B6b)tC3?)-RaGSJzxcq5kv$Fvoq1>6Kp`g1zc|YJ(G{#s1^&W+ zTst`aq_O{TeDm=AYkafwz2!qUyFVH8Uz6Zlw&wn)P}gUZbu_o|wlM#2jR%HGAApdW&?TFrL>n=Hp)Lc+7 zoT%e~=fjypi|HrKknGBAi;@+JI@s+@K1aCXDNR(ohfA#Hf~VmELI~!_*py-# z+5q+ynG2wHn|NY({2S-i@0jy6qC2NAaE8bI2}}f?IidPcx#m0&uUONZk?;+i34jiI z&m5KKopEJk<44BJ<3OwKM8#_u#C};N#$vPL#L3}x4JlJixl_Z;b$na!GgNrsBxa;S z%AssCIL|BG6r6X0@r`A7->dLSnBkDd|J!v7=Vg_vqr$+``0(z#40+D4?|kpzj%9E; z1p}s9lb*lbd2Y3!A<=~mTG1*N!iOum>qz1HNU~OwFb27mlbeLHrjM@z^CC4l937OE z`pTsW@@I?{2XoW><%_=fk|0Hi}8(v)pPVztSYREF>(29rs zL`Q-qnQp(h3=^j}k1#6dM?S_5GT2CEisP_Kj`j);#(&!=q4wBExbS4W9Ny-9)qX zITei`N`KT8&3J_L#hT6iAR^s|)dSGe?r-Z_Mbf_93JXj4Rr1O_^d4!EO9i@;!)3;L zDU;;hAU`S(T1*$i4(yG6x!%8WoGlQcjpeN&8VS z>+C)#Sv@7RfIn9}PqgvJRvVkv7YZ(|ubHDllV!km#NW8>=SA0?7ks50eAz=o-F>y# z{CXEdnIGimW3qEegOdgoLd}CKpTzdF#-&oJBH}NEbajbDim)_e$1G|}?+WTcToZ8kT)SB2 z1025N|EPdV;i?jU-an#`_lq|eNiQu^y)iNb1Y&drG`3LYpj8ukZGbHc4tf{8&8$@ENJx!gG`$s5)uMT5I-R2G8|&o`zA2GgKMaM#JTlRRo>a}jGe-$S&x7!G*5;L)SY!-Ml z>cmK|AeXl@wcQF4eq@kd9yy+_=*n_?1m4>9eL%;ZO zlNoRGlj(h&3*3piY_hc=f3mquXWeVtfT5D~)%$)GJ#~)#ySe>7Va$%%@8VyljEyZ! zyczmBs3Nk?`UHO*kDeypJS*i1MF;IRd=e_g*93O|fGbpG9Gx zTEG~wNi})91%$qgcZ_Fl{=CYe3ct- z|0x~)QXu-=Hk~?3s;!Mo_kaygPr4YaF&zWWM&agec*F^}>{oP@Kz1Z7P|Vs^9N! zj#Vkxn0gN<3o-lX%YnyLADI(>-Mlevdz02KTzU8JuQXBMHZm4)NZRWA^fO$Yotgx6 zFf6#x!l7Kh*E;$4!wM;GPi8fjb?y<+DParonoG_Pbm570_mmc9EO0@DhaIp~REd9g zH!a(Y;RqVz={L1$5`nZ%6`pt{O^ARq7;otNu)eE=p35y*cqNfvckV{k7I@_T>8E%o zorZBwVVC-&cwb=s4e_J$+1B^{$F38n4(8!8zjyV6!tdCv)IE3(Ug=&T-n9o7{U^21 zKix4-OrdKNHP$uED<|u{_*omv3H@nOxQw22t@lEJyf-m8}|JyK-)y-hRkC z?ygSVqHI6Bvf0e0B_sH1gKBu03nF%N954i}1;)|wqmizJgH$&;RtY9KC(l-%G&i>v z-JbEU9n%Lkw6BKQwuWuMgjW~pJ)Ud3UBS=quBZkRjBk@Sc%nuP z49<4Ag3%%BLu0u|X+*}{1c*unR1D@!ry%sWdJ?S9rQskjo-6@H4dPdw@%7*R!z!+q zGdlqKWG`+Yx~+4~%rfmZ0z)F~ofDw}-vhPQ`ycQKMp?ZCPcrQ-L!rc8PY2I>Ns;Xr zr%U&e7X9~w1i~!jj#9<_@=+)|%QEfPQ5gG-4 zf2;^CYxg135w-6#qa;_QB@vUeDyLanD*Yi z&l5dgAE!&q8fbMh@%`i$n)1*K%$x#$V*gS70u($=mr^bMu@cjIp7?RP!UNo@(O)*| zN$v;f31wu;Pu@NNn04Jif|8?EFyGk7pod-;9=a4;PLG0F^OA3%=Jho24#G?gu1|=C zqpGe4g7LCv>_1R&O}-0bqw*xup!+p#Bi5B`QAHXDfB?2=m?meFFP$JolU{y zJ_vqYy?G&BLET4y2(nelioXC2KSUiu!F5E>-XO>cRZS;QP2y?_-tYEcW)F{-@G07> z#u#dfEfo*zqA^H*@8EjkbYyxqKABN;o{4&;$=t=wRYymOR0= z?zPH`C>p9A4c{ZfJj#tvHJ(h-dLMcgXYH&e=d z4*%lI6KIYWS#gSwGaYGXPNEKUqSAzcbKzatQq0nTf6E$m3A_kPvU)_$J&rnFz_$sx z`M8+332@+P*7ibiWhwYpb_T^c%sJ5WDC+mg8MlO`3I|X3H*+YrUi0O=Fu4f zL`}A!Lsp)yQ2l^wwMFBM^>eO*o`hyxeBrfYAP;t zQxEGxs|Rv6qO4o*JXe=b3%PrS4V2GDStZ*<@?En!kM%fhLRnjmsauS9afBno?p zuquBTQWhAW!ou}VCR2Vds-+a^cbTLR&Q0Lx7raV;FB%;2qO^`#z>{2AKb&jDKF83P zv8#&xlVq}G3G3vCm}u~>>5pvS4SnKa>FY=w_6?^#oqnLdT<@~oC7jk_wy}j5_#F~Z zv5Q;pqeWJ+q!hyyd1LEB`&CFX5}c~za0{jBQXN^MWHEDU)q5~umIzIrmS(5sNAtCC zOH*n+06yNnt)^tCJb5nYZjFh8-a+Tf?5oKr@kY%_YQ~W}ZmAezDE!5jo<)#&_1B1^ zFT7HjhDRJL4R@i5&l!U1So*Qw7-5D}Fkr7Q?p5SELps zS1mmx%}SfXx2|&=ofV6c&#}~(B>BaO|1x^!`3-yixEY+aVeond0D9sV-r>LkqJvuD zm^~Tndfdyeo(Vp+%-5cf3+57T5E{_Tpnm!pGP=K0vx1I7DY)X*b-2;6-*7E(n+H|2 z6cQpOMlp?`%2+||a>dqA^Z+j2-6TfFPo-PyU4hQvm~L0y)~7>dN$;sXDdaVn}q3IX#2? z`^ci8ypC$oks5TZ@GuzLxIy7x5_v_>AR!q~AZ;FekXb;j9;N{y?aB};Au4S^E~Xgh zZ#NTAwoE4w_0o~dn#m5eHWnJ&Hlp~D2J$$Ka6pNRj4ui<5K~E;$l$oqx-Am&!a8>? zYsl)GQ&($<)b6GsSePziLs8ob^E{0n_8<`rCRksLZP<5E8cXHpyZju4g2zWva0k=^ zRWvLG^mRf7?SuMUs)HsSC<3Y_JU_@NaMD|d+6{229T&%AiI41&a3Mprw~v zU>xh?-%CJ!@j%H#wLLL8ZE1 zxa|Ep;BzHIOew$6y%)2!rx(yP+5ab=wL*L<6dBGfk7bns4pg zNWVe$!$()9;ZGr0N+%vD8E6otKUBmo1;h8yj*;fC#5wF63|9DqgbF=Wpl%p;nTlQX z?Y54qAYtB3_>R#HP<>H=go#f$ud7rT`0diJ?05vfF-?_@FxkGV_5}PY#@JFfb1{r> zVL_=V!9!CZg|wC#v=UED3)#Qr_}y|G{QU#pYHUvBT0!v?)C54#TWku2+*#8Mg(h}Z zhXDy`vr(CXrL5x&w3h)*{f>CCl+YRdkghA#0Igz|NrV_5oVmwG4Re0V3*$fH_oFnf zBjmwrTkP0vEl$7DDGee}CbgZWM?O^(c0~t*OBT`*O61ns?K}bboinbaR9nO(?73H4 zIjW;BUc>6Wme5BZKbP5`C56KJ#n*@grdcKrBIF>}dwDzYC2~&HPd$xk2Lc@L!5@vE zh&Y-N%~lo4ce70{EnI(H@=hty&4Bw$xFv@dowa{9`IAvS{O-bjr!DT8(~4}#$o~FR z0ie7f_#8ZCiGHIOwa^k#OafZP`m~XrEBRol9egBFnvB*jw$s} zp`o|To5a7={JPuh!Ajt_v!gnX9%hZZoxC3n(#;=6oIx>=a8SGAtWz9adQ|Bak~2^zpG?Vu0v3m7V1pA+K@YR&+auBK*)PWj56@ z1$oK$2k&9g+E?K-EgY%IS9sRq=i1?i^Ug z_+f*~!@Y~kmCKGNGD&}m6(Yk~V>Wg8otNn0Ag{%oo1NP@4Y&=_0M7@{H=+gUd z(!Xp3H$b*cOo(P#pNdMoyJj~ye4xHxD{&V~cnzkO9yZb7L#9PS~1#vw%QoUPG*r-%X`SMludUFRRAftDNlz z@{>Lr9{42FEOsDYvCReM2gEPorE{0YjXG>D^Djhr=>!)qsIjpqzdU|cH{ZQiM^Aqr z`2MM|Xst86U{m!{MHzfm4iBcj*rQ1Bb?Ek4D)7GiJ<$Ez!2C;<`2MFcsj~7GJEYct z!Q2X z(mi(+J-|8L=Fub)_WRK5i?lyP)Q)XlbojlxstuM;QUk%+(kg-RciW{8O}AvwJhNK; zXIeLf$2`T~Egul>vZKzvSl$kD3E52E3-nEy|CS{)Y-0Q9?J!u`P_KDM`F_=l_bE+T zdH<+qq|i@(;ZpMWOq8`3$x3t<^#~P9nL;}dd3IR)yoPQHlBLJ|z$(&vd=&9<+OQw}V zopKK8Iy#^x?GZvjbU5QuPI`ph7=+o9=+?1N2ddXmPcEtr?0nLrZ!*wM>Z{bC?kuB3 zU{#o&rCxsersQS}nXKdI7*;w}>k0gFZ5^FLgCniX2ypnC&8$oTL()!2Ie~iLvsv6L zpSv-Wq-y1?fLDm`VA_;0<8|I4r(O>Ici{T-LiJ~*lrzH+^UJh_Hz4E& zLAiMjaYlatRSPeFYj%1#wc>j;ELt;{N#WogdzLuwnplz9Y_sqq#X2aRpyAUop}r{_ zYqJBaOJ&5k`2*=*T_ew`BJFCz%4T{5&0}G09RmfABgeAj5`#RXgl+P^*NT*9icrMO z8A!y`t6A~MKk9__dLUAl#i;@FsBzJP+2V)&YtBY@iWsg_mP^jY2-t3Ik&nr%(Ju{t zWXkm=ET^A&|Bi3-W~I~u1={Us)2^xLLyd=+dQxS3v@->DpI=pPCAK2zh` zv=_h3erf3Gg^bq*`;#{NnYViat)YFL;4`bEqg`r?U3TRQd#M_BOSx+xlV_ zO_Q(jKbDMN7!G5K$fvgOQ_vez)Zlu-y4vFU-wSN=)fa{b@NfM;Zk@Q>C(7y0tw0{sMQJ{c|h0C>o!R6fQz6iDkP zVtZvSTR@4-XRwLT$L0v||wW5)lvaZ#X@tpo2<0Ie$e4LqtuDUyp`%Aw1A*C1m`G zrYq^gg*Ux=vXbvn^z12#eBW=B3S!BEsZP@8^xUc5Xi+vI45*JMRw-{hpwcPcrq_=w#j=C!;@B zL*G?&)NSZIcinvp6qBYlgtXVJrmF%_XxPg2Q|o+~X4h|(Z+=o5l5|l@N}x-_k%jCO z8GNbotx09%iqk8xIoaJIa|!pp+wbA%LO!TapkL$>LzqHn4E*l0*W@{>Wjksu-zjls z%C#WVLZl%W1TiF* ztSq$NuT2!FDV5=J7vZ#NKhE4d>XnZXF$h6+RaVxSJ zXjYkmDD;Nwi;;CPgU7;t*3@>|MOB0uMNR2)V-QCWl;-$WuO5al)sDWHFU@Ulq?DY3 zda5!%Fov!FrXaY{>%VtUDe%P-FZR_3qr#LGtRuj1j!Pv zk{2{FK!z9`niwpaHQ$W3J;@jcLw#A|g+6xsG0{Zw<#+6^pLhpGP+5~j#DZYN0$}AZ zs9!IKMWOZ*UU}`#)P)v`@!wyHNaP(>%4p~J-so*U^tCQIBI~Bn)G*<-q4k}Y=KDX2 zyrz={AkBVQZ_0b9^#VUU&Dtjf>gz4#66fjnLMuPPA8ihKefNPQMF;!59+BxEU_aj- zrG?i895OitfUElM*;tNFMOv7jPHshBFW^0%yc#Tox`S@LMDEUO(QP+hzmg5_{@iYC z*%Ej?rdy(0Qf9W*y%0QMt5p_md{OQ=57o{n-I94%UJ>U#x^34U0!;?OiQx4%EK4a< zE}2)dzPCAFR%S-qOu7K$)RNw+97}s2Yj#sSp*&Cg5UN{jX{$+UbrOO*`yoVzTfEAU zXCch0WYai-MV4TBj=kK>e29CV@Ut0d`>83D03ZTe9(KV#ZOIxz^X*M(aFF5QqryAW^Z_t+bCA!TB z5@}}%INU1J6><__*B=;OG+&#^W3cYKJ7IC$af&2wm!t-_&I+sRYF1m3=r3=5QniWS z!Ld?=iO{RTGl2vyD2F69GZ?qBJ2kJ`!gXtyBSyAQ<&}K9xhhhcmlj8CWiUPSDcswr zj%NuhG^`FsakGDn!lZZF@JOdFA}eKsCfpTurmFC<}Dc$g4r1yrIK-jaJakr6k5 z?2vNs6EEDTAt30w%cou^r%3h2H{MS%`wN7_$?eTo?#)=6Q0L>d_gZRL%+BGIIz#Pl z`YJN5Rr-5af|fX{FVdoL@LcPS%h$dNh z7&S{j?jTpn`0`Zg$Q%s&M-RF^F4eWRk@6%3Pi9N4-67^vqOI>yoiy=7HQ9C&vLf~B z`ZAmsXy;%xYomA_ky7PVv~Avq&*XYfmHDwheEb;?>%!mxTHdA}jK0+k!{0|SeeCVt z`aCkti-MnLSP=fq3_k7ZS_HRwz+!CwdFNGYnDmt=43L#F^hzbe*08kc^qx5%8dl0?B)dXv5qn-L?XTLC)#|E`MG(xp5wW@o9C7~y1;kb@78;vL1o#S z?(n$v?pu;S9}G5$Ss||^$qC?c@Taj)ECmB*#5?L@KA77ylcG^xr-*Mf#AX);f=EnZ z{{6NaOkbUA{k<*I`lc+`I1fKxS3R698dOZP{c@D@W#M$>` z4`{L7^YS2>AMgU-V_(_gdsn3SzBpu)|5EUc;q1_k2Qk7Xb)&NGLOd!G0(ksqvcI?9 zv)xbGbxnUx`Y4M3ea^TY;i=C-qTV8`vgV7>yiJNG>4hy%P+{eh_COmbNJNvn8{Bn_S`lZBxm^9i%ZB2*^ zh9JZmBWA0IPXi)YtZ5LS!0-nc5L&mfcMRo@v#vxXv_ty7s9&IPpfQp(XfG?%x=<0I0^+- z?Hj*7GPUe$wH+`Gd^6lT!5n-4m@&p!$#QuEZPz`35tYuWq+Y_EggF<)Vx68Bity2K z$-8~d6~7>^a~HCFED#+MfvGBUlRCmHyzUJ72HfITZE0(I*_CZi*h({w??uIAbhnjV zKIpiC20+H3_6ae{GcOS}LD`ZJBBTUCth09BgpQ7JbNtro61`Vl0A6y4`p97kNjI=< z7}<=i{L-y;##532ucmPwrGz9brPxw}yoerx7L{Q2nGS2UNch}ExUEL!i5yZHA2MX~ zYgD3;Xjll5WazX33%P&(vUQ!&Hr2ZOLAQGzNL8TQqcNEP0kD}O#=(#U&&{zT$oG41EF)+U3(ML7bzF%(N!B-OK$JnPXJECie z$K0Po!&1bljWJ=+aZKrx5h2&-;M=~CcPrNLc?Tm_2{U9A@@ z0n`C_8{>{eF3D|)pGs+w$E1cJzIgKQQQsDw;@oYl3ax34ROuUv-V~#TT+ga8e|Oai zHw+Vk6&~~x)JJBO=08*78o0o!n$EM}cjHpLWZcz0P^jJYxi$oEjr(lQn*Uzpf$i|1>9^);&@@;iegqV}4mb&*u zTy>)<+Bw|u(Nyl#{m&=bVA%0a1e;!toYpK1}x zLmGaSNFOzu8mc++fIo?9gDDs{{BZ1CS32dF;!u|k#)mT@F~|9J&f*G{aJ)ImSmfP< z;k^qPqKu#zfRTAcRC1LtW>%lvd9{^NPSCIm;L<6Yaw?Wdwgv&{0L4e#2`U@yhezvq z)GvM;@z0FP*|D)~WqlvNC5Ue&L%l#E!coE`>dT>R55XL<#%HnDo{+FIorUJB4Nz#= z{n#3eE0h55?D6fC8s3sI&bE>n7NaL===d!;tQ3#bd@RgUN{`j$lBYO4*pGRE!iu8B zM>q%zpj>dLW_ngFCk@htSjwaLV(Qx^=ngG*>h9&)BSDVU_ROm+q%YRB5=_kdNk$>h z`LHPQH|!*Y1$i*FgLeq=3D)6I7|}9-pJt#050*`xMrpzTIRWpn zh06h1U|`yBxR1K`YpJ7c;1%!ASk=B;2L;9Fqr7QP0kD)d{o!4F&FrcjN!m<0HwL(O zI5Z^5w7g86716;7c{g>aLN~qJaD>;xK8h=7GmSc5O^f0jHGx4 zjJU&D0xJK$sa?tpShG)&&M~^r6L))1y(yd){cs%TyxS0tHWrHZWw$}XBIfC$W8Fe< z6e0Eka`$%SN^|ULps>g^XXY~qg39we5;p>ObK3f$(d=3)Pz(ZyA9Q%AwY1^&MfjHE z7lm#9UCu}1J5l|4p?qz#Dy|b~;d}@Vyn&zT4V>)|B*snPMeDvKn#>=Z^3y&Pb8@_6 zv^g=qY=J**3iE{*ADc5WKY$mAthKtZ2HeZNQyntSk!lLOJT%IBed;PyGWO$GJH^C8 z=*6?Y%O`-GJ1t55I3kaUcgpDYJNEml>ikq2=<*S1NH^avhErj<=OF)v^qrYZ&|O4M zF(30;A$C!H6l25ILHt^6^*aRQr7YO`3PVRqjVp(EL23w+ zAHgPEk?Ot){@2mCJ@2;u|hW^EULl?D=6ESBvEmY{LeLNpyZQxJ|gJ&%uB5 zVRFgIj_ga7tW7ni)l$hU3}EFPM>Ec+)vpGr@q$b-9y1J*>|!KBJSp`CAHm~ODfyff zoUFU%#s`xEKKL`M(TUYHmFfAzmd;0_T9ntIdXU+APPK!yUg2^9B^D!cD9q%?d;G0i zSpjF_E)vU>Rd?!>LK4PtiF_?u4@udujCJDYPtmSQ_g^kv>A+Vwdd-yito>(A9$_p_GQ^dGKc=`7qw$ab-bP-1*N{`J+_Ax%k+~?%{LG2NI5X5W{Am zi*hULI~eh{LFZ=-{u~tWIYQgZ{ zS%;s37n3vE>>7f(VL;nSndhxnqb7L0X`5SQvC~VDFaG&Eu32TD<%RD>T zZ_7&TL_=juY6uqXsPWxu@GxY_t=KLo#NeYUi}OCaFfDPIxCa0pyK>xVKM(819Op?$ z2djE>Q<}`7Hy_p;%}D+Ul3Z9>pM9J~+K(DWDZNR3F2vRun=SpK97(ul#EWddCu=*Q zM&kTrERIVJqkT)uOslLYMEen$DiFBL)sNsZ0e9Mz=@JZPd{t8s7u8vKy2ccR~^eSWI?epL|8rFkKfCxhw zXyb{nc1;o_6ho}In=Jw3oAmK>-`M-`J0ggkT&SI1lU}qW zx!(cTLFI%EoI{?fOP}Q8#wg^1r`b%_uR+Iltd(#tIR`ok)yA94Ch2{(0B4 z+=9kf@)1!eUdkeI$OA~xl?%x)(iLg70Y+Xp@a7n8>D=#s0PMT#Wyyr;pU_-g&yDcP z-RcA5n(A3s^&J{2TD-`^*bmjbh@rN6f_1ACRt?16TlBGWCdvl5#d|^yJP15Nk(?#f zx+gKAMb4Bo4Ghc?Fgondlnr>ytF3bHcaad-$*a`(7D=t-o$?3e1_IEyJdPqe{g6Ga zU^9RAT{m5iLK~GD>efu!o4F#K5#)_Pbchx~QAux2OzM^`MDSwoiIypthjQy&X^7&N z2zg)Yg=%E6Vq@MK;Cb2OQ3;$KqgE}tsBX3O=M7&n=|>jVo@~KyMHZ~*SmA^M@LHK# zRL|CGXd6{_MuU^*oubiH9q5jizOa=7zn`w)x7aI@dofA-_R`)pPU!{NC3p!&NNQZe zZ2fi~ci8(5RaMqgrU5v{Y7 z{4oVmf4TU*`}v$G)^cAkh52mMe`F3yld64*GzhWYo03Xa{oud^5_^Y09`^31W8=4x zsGCN6bB|5mk~(s34>vn5mRp8}RPByTO;uWh6*xDh0d6P$gK&kuA@S*59P2wmOs6Wr3Vq(H(;*RPD0TLR0 zoAtA`g~nCWN4z?_4~b}|3(=4z4E&`IPwj2G>7w9EjR1P{7ub2+0d-;=-mcEK|Bs0E z2404_Uo3&YHohwm`Mxbc4U^6HeK!!owlS~}$@$FUNVQ63zdCVUaF4PimW}&+2EP^A ztn;k@5u^3!&!EO02Y>8SHwS(=LM|~yf^U0PG}_Ku4OwZw5mhosx4bOnT4;vazS`!+ zz%%$esQ9<$wO*vuaO~do1j&@sz(a{-$PvgzfNKinD3#13o)}fBkHIkup-0Q?kdF8y zr46%NaaVae-=q^^7{)H^c-#OVc+A*%6Hi4=Ub3M0NVt7d{H$9tlE2gh1hbdV%XgY2 zf`8?9JY;L)wc)bpQKK+GO^B)$70aJOYWRlrRJrg>we7Q3Xzc;Y>vl`ygAwN4V=-k` zw6O`aKP*uzO)rtfYl}pgApOry$2h8))mIZJ875Y@UvEPq7w& zyj}1fd}WE%ueq`P^sRHru_gw{?)e$j5RW2f{4r_bQK8qc6s^e(N{;o;h*C}Yb*#l%dP0<%%YTUJJ@#l$nd0F)dR%GWVN+vrkX>*((Bh_7(^~t;x zvcql*vo5daj8_oey`<>$D$hQ!T)&D4FNS~gEJw@*(7E-0R3p3U$1&R`Ly6QCT?l;^ z>Mp|G)Y0-Ex%ouy8tR{&@3^~xeZdYu9XB^RN6$dH@6_=O(nWX2N#&R1EK40!v1dG# zUERwnYITvw4MI|f^0}6)G7re_KA4++&{@8b4f!ET7>sEdCZsq%Vw`6B!9)1=#R&kA zfTUACbGNM##;+}HuxNfYaB{gt0!_HMZXqS}&ihbDx-xMYIB)7n5N|Bs>MY)NaBy4) z_f-92%(wJ+wi9)JM}z}dS}ZNKms7;RWC?F>zKlqK|D66g-@U}%ZCkbo1L=o7itv(Q z2gC+09-ah^v73=LW>T=f1{siws2qmxEAPULVnF8xjF?K{wC62q_6-S#bI{(=1oLP3 zOHK6Ch?;6z5mWMg-NVl>#}h1&-{)6_g`?*%PT-hjksHWAr3+m6^mq)8 zwbwL?1^C~_Rw*Tp=AtamE3mN-Ee4r*vR4;5Ixb{*pFQRcJgws3`{6F{WPCtQd9*z;1Aak@^?Fikh<7+WV-S8c&4ZhhD|eo>?OCB!PLV_jooa1Uu_Y4uEKy*7LL|$0lid;0L4sB7Y)04Ma?M#5KKw0I zoBaeWZ4cK@oz-aeL(jK&yxBYZ3d_?ms8Ta>EP7dzPAWt!*K{8hY|7wUoLcja(x$|l zC7LRkOjz0u7UW1ANrKgi*Gp1WvK#%5i^lE?&c4}3f2K%Y-RfI7_-y~#%h5sS{C z&>}$g?IbSoA_|q4iGR*_$Qc(vn(N5+>)22W?IO?9KXmjLz75B-Fx4RPL#V)_$`eXA z3#x{kG0f7k5&6t*9_y9tSbf+CrpNYl8v4u&9sTMh6b-e-fjCJyMfN$O5W9<*1mD*W>(i{~upTE4G`wbMjb%n%>8c@DK! zQ}yN+;vBNwJ%b2a#51VF0>xc~+F{NsL6+rEyv$?8=lD3Ze2UqwR_OIx)Zxc` zk9cpm*K4ifsU8*2tY*oYps7o+n}(~(r;rT;##KT$f#iZRigQLt1Q*vi&03WWK}5~I zl)&>fDXiE~?^$3J%~D`|S^vr=gDvxsAd)>dOP}Jm)@h zQD^#OdRjLPLDjoD@V^i=qWZ)digj!nwNunFMrB%woLX+6SGB908~Rk&s0J1-bR#af z6h||cKAn>seT;o^lDH98d&e-Ds3ni(2j2ZWDIV=Rp&v@!m-2|V-FDY??&VzO<~rbf zt_G&`NRZD@7C_zc84>e_-r@7b^*q~hfD?~HjeNEnb|{GYSQJZI)F9i9awyF2O5lKo zwbx}`(2V889v=9S0^P!uzZ87_~WjNA3j-Od|E(wBcFGlD`}CX81WtuvSn z22Q1Tqh_*)jq+5DUm>=lpSpyo-yk|`slL*;!nahq@$leczq>3j_iW#HNw!$T_?>VE zC9f^ola|FhemeUxCsr;u>XR1&5y0RCbYioTi7ao0(0 zKx5JLJRCibYMG50s3TEDJYXd{LH%@(unbZ%m|^OWQc*}mc-uGsa@g6%c)-2k@ZsSj zd-@etws;adVFuWNnZJyd3m<9Bw4&?5g`;tkm|bKCq2*RwZ>0NoFUq zrJr`>nr}LwcG8`R%W+)Jwb?qMPU%Xulp2RJT<8d)u0NH0>0Wje7}#SC&qFfX>+u6-9&NG&56BbH~90bHKYmtbG$KTo}YN)Tz0WOA?wG7&J z|EeGDgAj`cWQLQR?edjv9ag&13K~i(pkmb4aPKpH7uTn>jZ4Jh(TZi3X@B=1L}mKN zBG^C|6*D|R6Z-S-`63u=+DlbL6kBEmFzBQ+W3AyW$cE!_@zvfqhTT@V2^_j(y2BrO zc5n2H`cSsxdj6E{6u5SNyTswp6RD?b0e51@c1Hz*`Cuy_MXS94OuHR^(DT7aq^B|& zhdEO`g!@}+{1jF7+B^A6CV;pPkH&PSm6Jg%IBEck_E}bBX~aB5rzzqY)`jYY8LJ(W zN}jQoklTUUC#}i99okt)D^@pM8_$EbAom*a_$XI4oiUfEst03kr5QA!8hPCUl`tic|YNX(l*>?yQuQJ66S74x&Z-fd`y!$2nQl+Hy^ z0nBH@tTlD$HMsGlZHwWkO4N5apAjOyc*81)_5(#-Vp@VYb7;%`0vFWFXV~3uU{x+# zPfJEMyDG|UTT^ej5k}>}(pM8#yRGQO?XoBm%L7zES-m$C^TXe6)wP{nv&C5bt^FvK zRDzlfm#)&cmw>{wj_X{{gzBsiA@&*MH!M(#d1;oqhL0K*r4bCH@T+dRV|&OqedPVH zhF>JRr?&K=JtD!xxY9d_+O$IsRmc>4#iYL0=*8b42Hy}X$rblBpyE;2ilB$aZC^>Y zrDGqIP-BDtKD=T1u%x06*+jE3KhS~!16fVv8aeOxvH*Eu@jPwt6y~ee+Y@NYG#UGe z#Hvvip;x-m>PMl7ArcmLkX7xEvx2O30FXiMW3mOtkS?d4UQQ++#)>5`U->G;JoSy3 zrqm9-@P{)UOao34Hp6*3h z?3W*ItK8kfm$zj%|_OyulD61jSHI4v=%J>|2%1+c@B&SY2fpcj|$P!M0;ye0gDPhv1sH~ z%@7gaFcU^x%hl$z<`JhXP(ZDzlY_++*q+gsUk{@`Z(v+jH*CK){EY36wzAy!Sor_- z{u9grwF%+{lGq%3V?M!PfMV82pWWEyZ|$rl#BF5vT3?JKK=VnYf{J7ryJ3MyO_Eg< z0{Z-<>O*-4XqtYrl&`jO5o4_S9A<4;!MwZdlmRJ-fZZMGbI%;Y4&i43=#}O9N=J;R z1;`f(S(Z~Pl^monVO|d8TJ9}p1o=>aC_YY|sBycFq6M&((8rF? ztS|vDN2jTo`Av=e(bYCxKqW*^-|KQPaka9p0`byUAGLB=dZE^DhnaAN4g5EB%mUX( zLjse_K41RdwYbwd{K2m(-7t)_l8;kcN1iC1Rf^IhTvX2+AYi5NFj)FrU>5~UkdzoA zN`tzSMW=;nN+QuH@V%n5DxbyAruURq1;$;OMjNQw+B#Ik-&1utP7r9X17dhh?5lsIxyRUoUteXAVCP9 zss8izmUUSkC#HqCES+u;_qokl0{6f#(W-nS3rL&kArUTz>hXYjQVnC0Kg~W-<|~Hn zy??8f;}B7d@s6)&r9qb+13Y38oZrf5`THBE!y4?qs1`Kh__|V!&;#UJ;=g0w8 z;Jl}33ez|J=P8*fe?y2m1~>nY?_<{=y8sVO%tdn<{Oz&e8Jc)T1X45vRq1;`d^9dZsio_Hq4#D)6Z5A+kTD)dEsy{$_-+Qig18BCcwZUPnLxf8!jNPV$VaRWWO`m0= zcJi4z=2IX!phZ=j&O-4qz87lxihObk(7P0Hk{w_>;9e^gPfDJ=l7?2TKt*N?u>$OT z&>wF*JFJYE=UCC649DQ5zr!fbZCRdQPe^-0b_q=r>QHf3?z@bM*>C5-t~yUxW4Fx+>EuD9;| z3GhIM+;wC9VgD{!Gi;2=2SPc;H5sqZ;;e2MPQ4V}XRA$%=R;YjxQ@10dzEZU$6W*` zwmNkz_a<23Lr4Vk#v}$ylN@p0huG?z-Z3t@DE+xvZ`O+RQOEZNPdB|c^vuy-H#N1j z?wFuhInWpkolzPg{sH@x8F%1SS+|WUlI@%4RlFvjgYTZ&kGrZL-(e ztdAMq))hS6Zy;B8jh;B~F>QwZ2OE&8==0)cUsT;uvjUsR=h3U3ot#tO{Pip#nqu8x z_wkmkihLntgjP*r;~di9)1j{h11?4ZJSx8H0}SJBu!@rIohjg9L1DfVyy~ZCQZM2R z&;2D=nW>7O6{JK(7{`z5Lsl+6t|HDP*OPgWg)#!KK*RMMX{k^|i#8X?C~%mNUoh(e z0LFd84x>gbH3NFgoiij*sNcP*#fYjW2Y)}TEx8R)FQA?05H5PI4{}{X)H5>-*_RjW zRG<5#@(U?$2>ZSo_)|{CX8USqA03r|z6f$6bmi=O*jm zC6{$vF;7g4ju;xOFJ0X-a$SXc8>$;?ShY?c><}|@4p|7@>Z-}ed3J;D{YM`~84 zF+-e5zQqqjB|#3Zc6%!Fli8ZnZ;$EqfU37Y?xlPNWk=E_qu~hOg0SPBBrNkST+pvr-j=9XS6u{m4u7PvpdgLwOJ|dNuB@O3nb$~lrzpaN{xVLN z$wSD6B&>j_s*(Z|DGuZxBh)#$R|_C zQyoVXQjc@?vwnZv@hKo6>0oA4ifef-~$`rmJkqc zE=B0MM`D3-ZC7Z1QE39YT_Lry_()Mo9bnB#d@Da_(2c>4#{Itjr{1l&@@q0Yzab>m zm_=4awqm&&4jyrIayaWEB@57KK+WxAuS~=iTJHQW`>WYs?1^csmSr0Rv?48c$dgnG z{0!r_GNvPq;4DiKoO=U71Jy~twujz?5jqA7S$F0)&T_>-f+%K~Sfg()a(ZVjb9>{? z#JGNaQttcSL`(;w19zEb{TqQ}l)Tul?XxMgZ^;_tFNuF%l6j#U;APBrZdjG8oE{7W zcsed=MKjfd4vt);TTWk9xXdTuuh^H3$B=D|4{$hNV84pZWMnL;H?lg{l3--UBlMP3 zEoB@qK>4jZkT(TH$9r%= zy$m3JnexA(7SvB8UY6q7ch0kD}CY?b&z}WGJ;@xXg5%6+Lg0>nah)l5`mG z9pXEzH{(#Xy`$lU>N+B|`N61J?TAlo{;USAA)oAv?1#Dd#C{CM7CD~(37T?29^D#W zC5!XJy=L(pD-JyKGKPlH)JJPG(b!k z3yxMf&cs#fRIYc!X@Gy|u^GE!?y6fGZ-;>Pw>h*w;;RZhw&;9$oUv8IJ2RnY;NA}M zb?}S5j5gnMJBxR%ygP_<4XR*^=C4;8X!m}xa``7au(*>ZPk1JlMtVV8pW(9l21?6_ z>XHf$tyDS#09$i9Z)P>@fu=qdfaGsRlgcGFbZJ|VSPvC^{ICrF&d3WDT}ch}aPmtIP188Q#<`NkQ2b$XmF=gi)@kWE4`{O0%G zh@+w-o}aV}+e^;TWjTCXkoDHT63DV-oNCcMwma;WH5xR($@=`2f4F22xe2=l9IC)I z1z~MfsZ58^A2BMYtx+lXv!g zZi>&J{c1R=2SqpdH+lJOK+}Y)oBnT97A~n zNjDAu@)y$n^SGQcS{@2D3$e3pE7eOzhrgze$WF+y$oYC1xDWA(&R3_%N};NQ8s5g| z3iR~U#92VF7|Yfj1ph_&v_FVDK4qJS;aK|k@DYZD@DXJaJyWrd6Q4*a#A+tHrbS5T zQ%qmTCwozP-9`BlRDEJ4BG`Hb;u8^tT(3V`tl|{%ZN3%tW^sphObnA+T619tE*RQc zqSt3Oa*#E&6wGOTVJL)y%zu2RX#F`~NJe{NFACWvT*f|!d8VP>Z(2df0xkF`INksN zsv-ZDHir&U5xy;fy|^LXLld`E)BVL3g~3bsq6uN|QaCE81UG~c7MwDagrQtojp6RB zXJ5BYzt`%#(a5JF{Z$~gZ5mZ_u*+}e5$R+2q-L#C@30CICtCP)tn=R-k|-v}{x-A5 z5OlM1r|_&8g)Zy-(p1NFghTrQn*BqGy4MPGv_7<(`p`vyKEh$Y3Cn=~kr+Y;+6uvff@0sG9E>fxngStb>r_(O#U7X~#b_7_&nI(4LqGf$OR54FBA z?mHc%Rf z10e#{#h#M~#_~r*&>WdxP7?y~EC7t}-(oojD6fOR8u9y~0ATA7BH3VKyqXPhjc+dc zu}7ay9@Owp?)kHS{~Uu$X|@lDjizL@tjbW^hM+<_w05!u=qy{l{ERrDda2jI`=#r; za$3urM?IAl^!oL&z+CotoQ~y0tAASyq1}HzvEAYAnDlr@sDTF_dPPrco5wQ!n%weT zIZfZL^;eM|Y)A=xl( zkO2DLWKy-iRL2rbQJxF*o#{u|Hj|`r}l{1vHMS z#h;q8Jh{ZcIy9+Y5^l|8*@<+i7EEv*(hORvj6)z6`PfE3fGO?~_+9RQemm0YuUO)u zrfZJ0}`-bENnO!SiJHQ-Yq z{fSlF)cK~wAq(y3ZCDj1@c5R0hw-K8&zz@_F7S_9@NuZJQ`V@$i1lJRemcH_opH=N zW*B3bHh$z^2j!}9{3*B#^hzRC{-*l>lA^I>kgB1tN>|Wo2YhpXQ=9_uE9-uF?Cp@_ zdZ+0e_CX^JC(6m?gHDcnnSn z&nx*+Y))01UlUE_Kg1&zI7Sp#$JS-MJ+MewjeI`ZpVM`jY&g<_P-C8&2BBoZ)|95D z%l6X*cYX}m{f~+ zxTT}k8?#>5D)syIzV$hSQOd4%uL?kORpbyX|I7L3el@^Wm@97ky*{!<7#=>J-Pf%Y zHNl<}@=hJ=kx-J|l>q9RI46OG4ogT=UrJulW}y75Y3`x#9pfO+3A5Ns>D^sBL3MFI z(Ul~;pop)xJI3AQW(J=U^`dmg&T+i}lWyZQv04jdRI;&KD~LDe^hNzx?G5j}!}UP> zXWyy6_{Wm!E6C*QV$tsNZm>3Ub}0i*^9bj0hySfHR|AP4<;VRsR4cb{C-b6I?{mlR zJN0{s1Gwb`42{)2ecXI9njB!zL|+TA**zNeL}JM4SDPXHYNea(X>0$+3wrQ#cQ4-o zd4GyTVz@z=ypNoIDosJFp{3bBLKDxtkf)`Io}l^oNKyUbNKtiS$JkWHTT%Ea3{^+< z^?l&@9#y|+{_n)ma=-moaH#srWJ>cB#nJKy-Q@Y8C#3z4@atQlb_yFGUG0?{ofVXB z?kCOUdguF&o^svf_oLr6cbmFyXz#;qhR?wsTj&$R_u`-x#7<+#3+`8CC=jA;=iui1 z4OonG@u{}|N=sQV!jKj(hE0iPm|T)ifo21`SqoG*}eUZj14wDshz*7iKxZ?N7Wa^$T_o%Q72$d|q!JhnT)xcPhj z+S&VRTomJse&2#c12?`GdU^13`QUFl2V1-Qe6?~NhR<_*?T_wyZ2WfNFRBSTEY9In z;GaykduGf|k*4o#?p_-NYjJ4Q;us)mHnI$Ifo7*8AiWl!Q9DchTHh>C)4I~ry(isQ zYN+2A29|j+Q&Vvz^Fqkd;mlTN|npcgk5izeqR>>xUmIeg;ac9^d7GnI{Qc~0p zOA8*aYNdpn2u=uL5m(d7JPVbOxQ9fa=By%?#N`OAZyb`Lb1I>i-=ZP?5_q=Jxk9>M1BwLlr(?C2IjU#cO z92K9Ab_?mVN6ohB6TgwzFyw3+jCqG|h(7!?4a;i_x8BGe3xFnf1mOFC<^aH85gc}l z%G}^3<>)E@O(a7up&th%LJI#f?AL{gqd}*XPbP`{Nb^Wo!l zow9xDA7dIvIX|b=Bnv+&!$L=vq(ajjiXLw?my`$c%U|EbykFCWOn$jm*yr*5~T>I9RdUZ)sq7C+JCk?uhX%6GN~p;P;b{?ecktf z$qapq(MbJ+OJ7Lc$KkzxMLA#JClU7;dY;21ZaXGlBX9E5MRz!`Z&&KP&Op>ywWYnv zKTrIKd&1S?dM-DGM+BeqvHdrtyKXgo&ArbaM^vwg83jR;4sJ!fPf7PVrMWXZZ(c`M z6Q#)lifYQ}47DX&pZSB3lZDBl6)S=qJqISb_n|Rl^uFuIqnoeEx=7B%1;msZ>X;u9G{13 zEV}CvU8~0gKDo4Om+8&*wFI@Tjg?gP5Jy&iV8HjH1WFeZNv=HWel#O?K9OCi$+d2A zYZFGP6vW>vgb9xr&|RG$+wP2($nFf`@a~mug)b69U1);`ii5GH0E^WnUwB<7^`OJw z!fl%V1pqK`b! z8d=Xs=>d*V7CCpc_E{3$@DLpqG$JRO;#+L$ngSIgzSvibR}swKuTaf3kD`Q%Bbv^H z=i>BcA73O*?HlElq(TKh#f}rpIVGyeO1PSh(25|8PNiUuG#ZG?fn3Vf^Oit2eTOJGKk63S2=((9&#m7Kuj)0u|*bQt+`*(J7x zy&u=ogt&IhHVjr|uu+zIAHh89az_iBwS=szsnLJN*`*TVXeE4W@|f++J9BT5e1(GE z8LMUW<9vCU_rUla2Q?2_N&&U$#L=e7-TrLWZ+$dh9$LxqV$&?kdi&tkQ+I_m=`Q%# zSgt)RzEOZ69fdcfZrcb~UiUYi*z((VPiaTs^4Lm8fcrTynzjn(9y;qco}9$+a3n@@ zS4%JW%QIJ-bcqaA<=PfqY(u=_C6p$v>WYa|Gvd z*HS!6Rvr1}QWE8DsWy}8CC&KIrEtC<49eDWQ~l`4AC@PK=091FXy$N_dFM-O{&tL< zw9Ul9NVgzW?_>ca08^eKY0>M#et}Gl!icMLnX+@)Hn7Nn;sT1Nn}lZE9h`rG7}8h| zH>#CX@~>8f6_HzznN32H*_JvE&mNm$$vM_hN(qi&X_4oX#Esxu&D?oRu_~K-D$+;^ zs^U+%Tf>-#qf~{mIz)?Ntov|kfJsx9J?BwSGM7%{m*QVb4w(t7uds*}TsGG7Z+7v5Z25} z$>Z7AxuYx|r`lsv)b?c<#nx+$f=!YYjEm&^JxS$n8{C{?^@*@!pGHiZc%{jjPSHHw z`}J}9W|gID!dgoA8P{{?3nrS9?xB;%8Q4a5iwrU~dD-gcuxC$-;8ozAhD`<&Tg99h zj~o`uV_Spf{S(-YOLU^nIshGKIuN}%aS++E0s5!ap1`l5%GgHRxj{1WxVMipt?7nr z*3Hjb>j^Q(zl7wPf3`zoBDUJ;RTBkjkLz|j01NY3$7#VWN*DPQzj3t>!Mep4qcO@t zdwt~8d{=w_l5e9iDO>TwA}w9Wgg)1eyg3&rgm18VWMIC)d-*K!x=7c|+!e1a%?30_ zT76R&JXP!tOPQnxszsHXxacLa4^!xLH~(31uP#M8ajwwnAGzgI#x`4@GRhE3urfs= z*XmeSblZP8NwP2jxeh}^C?1FcwQ1o%JO=onh^#WpupI0tp14d_o0}pxNiI9d*hoJ> zfi-WrSge%FSCE>l6D|HQ=|m2WY+yGpXK7rNdO?X%(Z#BFsIX?VFj&O5j)=SHf=Ko11zOoM8e ze`c+x*>wuz-E&Gbn+~DSYBZM#tHv`5AuDa!!mMg_=&ec$vA!1SXjm&Z@`+t(1JeiS zafed#gZ!$J{@l`6N^-+^EBtuDs!stuz^OY<*DN<+o>Wv0|6tVEjryn~Xz4bq{YYZ@ z9+n#4Dh{%Zbrzb=wE&uNT%{BN$rjQIt=TOVK3G0~#Rk}Y<&w~h76^W5W0z9){|$1g zh+G^=4$b(Q;(=uFOqWbq7tx;db4MN~YR_PsGIyyh4jQ#TL+-LwM1`AN1%}^?h4M4H z62;`2(nq_SBN^BdIbK;Zc&OvBp8DGsZlSSCv5MIRsB528S#^ zCK&$tlErWL+NG33bKr5~*`U*zavI)DFb1P)JewELj{hV+J*8ogD#-c4qtc6<4I9d2>tFG;fP*L0=M`;62dV zHGogg`FbiFUJb4?{pCkccWk2^5izd(W$ZSz%op`RG_FOwrbPkAI&Ru?AKGe8soRwL zxP6d}HN2eQIbqIf#b}X%|G-73KcpmTpwCgWQ;ChL+56O;)poh_Dv&9erI3s0Da8)U z#LACGb{%QMw>bV!qu&a*AM{KS_?=%(np;Cin){Ni*+yv5T>HY+xZl1y)DKD(u4HHN zE!Aj6_c+WBspw4_={3`-!1ZtU2Mz<>pW>vA61gj+Igru23)#yA!v& zR#d;FtzqlmYYWoEP+r@e?xA(!4>e32lNFW9#&zce9iuQe&9n{_n`7B)(VMmt@~3xv zaMUGDJDE8SP3F8#44ad?eIP!1mVHqQISdZUrtPz?7fNlJe>|^nIa{nE9N3x8HZW#O z$LSUbt$7?U8?A!^3JKS=EgD!?+h^}w6{dLJda_fwy zNbY`lb1#;>4-YxygJn*eqPrfqzmJXHUB=n1LBgUZacrcmHKM1uuOf|%H)y`Fr+F_>NdlcEtS#3Ey-~A)5&jLAbjmX4=`%*ip2usJbdfK@^K8%IG=;yoY-{+~Z(_n> z_eZxCEax);wA8~$e!b580wn3i;TEHkxx>U9D%QB)1P!0h+)n|`;n(0hm>Tl?&pr`_ z_iBU63wp}(AVey7g){F}nB5zVZbI`5&NL1EXT^nPNRpy;AW=+tTg{=rW~2;_3MyVl z#c*3SRDc2VbS;AF_1jTjzD0^lh}DzlUS31ahxnWs+>7~Tn|`5vf2vIVS}cLfK_)a{ zuhi`wDx-%9Pz~c?ua&L4n`y7@wxBus&VdYV_UUXgKNps$tE#j1Xzpkp6Y-XG9e%B2 zPYTuGfTQO*?Tx+ewU-Q$pngQwylpTWR;cSJ&Rg>;W^`)Kh%}yTv99^JvM1IPtFeS2 z{MGc(SRp|oOEDWh7r8T1p}TNnh3$Ojlf6NVzp0B85U&-|-f2}+$t!hkg*HoGyxKA# zm6M5Kq^%Ba%pH2q3wrI6we@V?*F*2Aw{X@-${Bgwj=HVXTZ8Q53Ki=?IJ@1_&-N8| zYr~Ig?cr?Ek$8&H5H7h*E?Ma6&eTvT%Vy({KayjkbM?hrUx4{O`5{6%$xlhV^Jr$OvVO-lnqDWY}*hXceV1iXNYhuG9nx@ZE&E4L)cv z10%0@Hqr}q^}Ng2Xu#T{<68ZcWjRt`k!TjoFj#sycaNt^US3;HB@EP7#->8t zje8So`Dm5vq}iYlRW8e2(r(vDL8?dd8#RW~M)JtFg8CGT;*ju53D<6hA(lFqjFzuy zO^E~5cRPO2O@k$ETUrY$Zk!G_V&BegOpCYetMptCPCk<&PF#U1%scG8fY`tLR^-yu z)?UqIaD|2A=G}&00>qqt>(O@EDf7$Xo3FZ*lhO8Ax0V-~rYH+}PrDNnUrLQO9J&ef z5jZbCp=>5#Bj&Uzn+=sAlbudqX33cMpItYBk<&HJ32xz^A;fK#=!cF-nvajpkGwty z4YO;=wOuS(m1tX=w+XQoiuT-wYL<$1J-QMmbwVhJDbGt8I6t(1c7<^8bNo-UJ%cvl z;mc8n$M+so6W>xSKQ^UHYB(T&|0}H4Y~0T z!IlLO)bHQFmGGe~@L^a6>vTH3z~;@pbVz8Hhh;G;ToPrb7hFRH&S~XLsD_|6-K&qt zdVQak&1w+<8 zYWfr{7i|@Av=%<6s!5Vr)AV_RKXe16$=A*>6V|bZNas1vI{kR?=NgaRYUkjcG|!uy z6|aDfU_1OBWG|gS&m}3hr}Jy6e3p=~MAGy45#EJ!)~ire0z>6u9zDmEwgws5SGlIB zAyvUQBZpJ@PZc6~iU~#;R5^UYYa3Y^i|@m>8c4!;0z0jcxn)(+41&4#xDhno(9|V9TYOKiJzvjw|hiDzR*^uOpga zvZK}Sn6Bw1qf_lVMd9?l5`q=7QL0D^MK3M-RU~za_9NQ8TnHPWDZO#t*Lh&j*12)t z*?AB%N@fxgCswJwXd}&%Fda=)=;&%N$6U5mqt2_q4ec*g-)}oHa z2Q%y5W1G9BKD0~ppQ+y2Y^#gEZLg+tZ1~O6J^p#ZGD*W8)vfO;Me@W*S{iC$;+w;K zh>tZZb;)Wvhtpas;Y;gR;#p1yc`*C;Wq$sF|X3>&*3o`Ynu)w63Z&|Rll4_T2-z(&fdST%qnjH;hpe^12NMI=`9Tx@)7a-kjIW4`vS$h=}xL!u= zHX;uIc+%vHZ7wn_+2yjV7PnoXq?%gvR!4qQ)27jJInJ2tLHQmesbzMW5k9F=ry&T-Vt58&F?_DY~kCpv7mM=;pN)AnH8A(L}K zhmD*=9|r*Yee}o*Bd&yUi|Cb-eBik)Mtx;jDbh7T{EO-avmN-o#scZ^^O&1Koig#cNlWyDq1q(`zDnHI>8MCW$UmzbE*{We(Xy@KlM`31Lz0A`G!14=0j5D6`6Ea! zCV+ycN)&Cl3AM8Idd?P;#m5OcZM_hRA=ua-|Kt>}z05L?R!;(k*$TPI&>8*9%^%bM z3^aj`y6sN0uW6ShHnr$(t$uhnI^ka(qt48dg7r8>{5hA=hdk?vxA(3nSi*IBDMwG? zSviegG)QaT9_Evpm=vWKw{@ETLYuL9abfrNpr@2$m#~P)B%DAE*kS`d?4j^cNEnTqS?Yv)1_7;4e%`-BqB6Gu!Lh z0C7hq|9$9t6_O>?l8)v_{}ZTZFuUfjHNvXeHs_vA?X4ZTpkoa&tce^g1=qzYTlR3O zEG^yFSqF|PZ9g48l=aIttF~&bLF4A~urIYO7%4~<(Lc%+>8^iWSZmejm3_@TCiVsk zJ~=LQr>L(%HeNDXh{Z$o)PMJVS~%n?#gXfoX`PJx9}uv-hT|bY#)oZRZc0niTOC znI+ivqiLlaVf2g9a4kRUgdutIJZL00MC|bc75G(3lT8T`ws0S~8fak|+Uepsjh|?f zFUrRm8G^e?_w<$>x;BsraAoTP}S(h~wvJ8b$G>;0&y zMOHN??>HsR>uYsk=ac0Fbur4w*5>{08u1^?;@XwU-3?}AjhRvUt7 zm(xf#U*u$UcFyKf=0vB7S>ollPK`w^@i3+JRWngM)i=<)ARSijL;qEn@?LnTteRbK zD=TBPW!_h~35YddUsf^uA$lsapKQhfmj>5h5~XJ`t#PWlKqC;s0BnQL^q;ZhR%CIG z*Y&h#{FrVtf-7xD=0E`mfYApI{>u}UvSY&OQ3eFxAj_7;xQ&=GzqVZ{v|%`3x|L{) zjg}zz$fnGDtBUKO!j%LN#WxN3cbyM-V8X#Oqdz~!Nakf?EOE;?Vuk4>gY-92vDJBl zbrbDHO7RXOc~TuZoI6PT@C0+2D?frI&ghy-wmd zfz^%MAkS!v-_vD=!5d&w|RpSZV3%{=j{?8o=MGRhN`=>_sYzX-V6wJz?3&{52R;VCF zEZ&A@=J<1M?WE5r$6u13%Qi)J3%P%juM2qBR|waTP7xOM0lfN)PCITTQDtWzKJskuE94GpV-HtNZo7(JA6Y zXdi61%PpwLK5K|5uTtH&JbtOU3ld30NhcLu2sr0CM~4NPt4BJEbe{^(@pvr8b>p_D zgsp;pgj=rB_vZSpnMDB9KkW%#FVii(J^CMZQ!`tuF)m-fQ4n!Asyc1Qf4geOiqdI0 zY8W@COnp6&YJAvZa_5*WOQ{Xd|4T=2iu@?U!<}_ii&0))a25}0%2o;tbtVZ3CoNLk zwR~U*{auZv7|-=};*CEwV^vKJG}9F3&Jno@r;^Jx>(!st$2zp~K9G*+Vcph#443zg zTBTY8-nt{tWkdz#;{_?Ac`RFqUlcoxZ|<*Y0pm$u zi9(YadT1eilyZQDj=WZk*zA+du6Z+2xCsAJOPdRW1jn8W`c}APpoloUJ&w#D1;A)W zZT_IRMFv~4D7pYJixL0Rm_HWr`GD<}j0l%px);p~4Vec>W73575o{IxccF|XlcS;z z4_`A?Q@3eR6fSk0*>Twt>6dM?>}in%UT!%L%32SM)De9D(JtY@6S2O@XhD$`+RRXg z(9D80t3ET&ODZ-tUYnT?;pP5ZE9~~{59YS3q$@n9s&krRzoevdw9YdPGpL+F;U5Lc zb++agD)Aej3Pt_dlmd{|Lez3;SyUkAyJFAEbd0~2yG9)=)jITC&@jCRR*p8$g{O^t zoVaVEw!onIuiSb*(Nns=tDD-xw?z1Vt<=SGPk~3-8m&l81b+6cf5Ye1V&+|X zSTWv)qpY`%AN)rIDcyq>xxzx|zZfy;)IdCIvqCG?(#@${m6kcV=t)QT9tcc zq|hW15BzS{5cCNfS&j*9FE4tT$C7oL^!m7U+{s_xLl=OIRrL0Oh+ zSH7~ca)~F{o_!)`{W2l|yOcNtYyGAzfc~{vD96A+|3{=2*tyQ6O8JsxpJq{RVf+#n zwO!>T{A7BrlMCr&BhBflKJ{o-B5gnktp`q2%+5Uc`>k_#IROy~ zH|pMNa-sC!M;h;Dy8LsNxJ;fH_8a|@t{wBXCe=C>L)EU8tU2eGyCJv71&=7k_GQnZ?{0YkNr1#k;4PhJ)_$+ zS$^*8C@3WVO2d};64#W$C#B|g^I3};BbG$rE=OlmDK`4VxOxks;FB0xx9$28%3uoD z)732jp(!bEn+{fHc~8?;NTX+&+JVVwH`TJZ82uEh!N4eQwFvyWK+(mezru@AKCCtN z=k#vQ^j{4o>2FA=D0}gBY})>z2r{v{vo4ElD8P`FQgZnUJU1I;-HQrU?L-9G5etUZ zt%q0E$1jwj_TlgoP-*7!seaeCf2%B9OD&yBS-E!}N6`zt_{8#ap&jnc7j_9!W`2F# zXv?)MsYT(~NHThy1Mgv|3wjDyA9?=sfvP(9vl1@IN_Ou#p(2j*8#*&liB+kardsbrKN%+N-`owv zKqxY|?AP-!_6p0mjG@Z8i73SJvK~{xXS67~I-e8^6+cO-LY67HU<6?de7n*hhUyF< zTNNn%DC|Y`kGYb}`l2{Bhzf~53_dbC^Cs_{ugGkz?ONP1Tv2vOV~W)XO%}KyRQdQC zdYm<_aYDa*{t0j890plKnn{X8Z)wW~UK?t#>G?qVC&2L;xOFZjX22^KB9m8-Akx4M zM|oVJTh=Zkn~xq^3hx}n7JZ|1+@XfywChH1bw)nLIX_EYa>66R1MajdHN9MEvee5X z(OCF7qO=dz)`PAokE^Qh+(OYTm@rTB5$G^hCd2Uohqg=QQTh|wTY3Qim0-#>6|W|P z_AYioG6mNr)1WK_GX1|%Lbx|gRa+g_BTM@zRQ5&Cc1wP<`Vx63l?$tYsUg2>}K6lP{<>KQ%K2c(WHv69^| zP=Mh-b>#Wa`O8~16u|Fv6c)kQtZKO`$YnQc=%aXit*_&;W)jb2C8nnF`YD0XbbP*z z3UdJBO?{E)L&Co|MJ_Jl20Y}sL>1x7DmJU#y^c0X9GL_(%(?w-@QXjBz<&WQ@TxotGJq8EuHRWcXL7fX~i4 zSzCHXLqw)hO_>1sAgj({R$MNN;Fox^0W{fyKff!65a{F9irN%$GLwSFLf5|Iy_{GcP>Gxwj zP~OYP!qeo`=GODEU&rqY^+FE#O>fG2&@nS2jRGgS)!9eATPEOv0YsJ zgl)Chy-kA!Hn$zUwipaPVhap{|2IRJby$m%ft)mfZ=d}>s^3ygUcUOJmUi9!<6`Pn z5c$w(MN6N;l(_#izncI!-%`C)d`al;CMI=_Ja`Ix{O>ha0@PBt(D{8gQXmb*A(UFd zG~SSJ$k7T76P+d!B~pYt@gkseR#K%diE!R1E(*%G_0L(^B*h%tvkXUU+7-CZlPc?0 zZ@8&l-c+$np^l18mu%zHI~Ru4%iqCcI>4-Z{V#D5I#FNLG;2qZ>yT+MT5hqJl!M z++r@drVZ4({2a5!98ZgG^aJr3g=M)|-E{37_K=n!WbHAg0t!bvx67(8+J()UNoQA%vy-#jHS9BJ8RwnxKQC`#-$o*&nxM?7B3QL2T%PO z7^G9zi6qZU>fP_JzP@=7zKLL1%Ih^viRnTm6{LL%evjhM>Oq_T;ctBVKh<4%G@D(w zFE!OX)L11bqEtj8s$z(0YF05*Ly(9u5<@9kv$m+zP`pZwZI!m@pyu{9R8ccgLlr44 z)u?%B<%WLmz3+GXuJ6D5$IUt`&w0+-`}ys2)^F{dwX&bH2Zu+2G#>-!<39@={afo! zL0j_9kg3ApX93K0X(u6J+}bSUaCEqBD9JhPVlr&?#exavsZes!**BBcQ4EgbayWTv z17*z|#{6b7(GWMDtb3-iy0>`U;(`;NbA(6xb!54yb^M64v>`mO*!>F!$0d-`#M75s z{;QY^tmFsX#Uyc(PxY)n{`TH;Pbuo!$R<_CI7i@KIcim}$ifnIzI3Rs;={GX$CSsm zNzh4L|1MVxTMFuVcgoPJ_@5J2`Z>YZ;#|I`Xzdjq=Jq-)J+qi`KSOe>PTE_8uCUM@XrJ}f~P#NSK-C%cl5PL*}+Lkb`8tt4Y>vENO6gK zn5K1~N2f_<${e|nGUo4{oklh6DgM`UzFw_dN<3nxbop?vWuq=vEjGy1_Ouxn@o-l; z|9y;ZeqEpUb8X)iaVb*}pTGCERSB;{*%n*&6TuJK{k(s|ltqLhcBD;oN(aEE7>@0j z;H(>FGkrMZs|u`Mr#!l-KlirnW=<`Wg^ifb#FCl0si5{!SD1~wl+P0dr+||8H?ylD z<43>cm@;~T@$*i^E1vauA}o|TK=F9$PTbK6*Vqkd(p~Xw^?CYuWYY)YT?Pqm-GGUA z6-o){rQpu3TCM7KHxwwvCRTr{?8N*1S>dnR60gM4j!hXV>Dz!L68H(u>ga)uB24wY z$dQp669fDrD@^R!v#>F4+(Q^D)r4y(g#M4oal`Py_o(GA3Gne*@tTygf{Hq_=N4rA zeSD%ZzIX29Ell6_((ua5YM;of7i*|`JU5=w@`Wy)w`Vu{G=pB@HLXlHXMQ^2N8=1?aEZJyM&#rho{6`P|J=5||9_}(xru~%oxbFYee zMZRkV)vyP1vAh60${w#*`2Y5@5N;^Qgy8)@Xe41#^{YugzHzXwKw}~{EmElbcWEO9 zPY}*PT&A#w%Q-qIiK@KgB)?FG}yI$xGdr{MA6rGnb^M6w=7j{~B6Jn=5xQzr53CXeiCL zqSYpF1N8zlTMptpnHov8Y~&6DU#J(Q<7=M%DIF8B3cbR$zk|*qP?z`vbByyw20;;H zAjUJ2dL*h-9Zdp3YFrchZ@~S|iaEvl;L11y;`{sdQ;rSNx@&qq+W&o!3DHCrXFOOi@wG1cbnyJtNZG#>NWwa+`1AX#-fv;kRVoK&g>O+0ENIBE z{{i2(UVkx3F} zU&Qa@{(|^USib*ni#V8Yc8!ot1a=l&BT3n&L zE2VKvg~+@7o{I}U4Nw8#-Mrc1@PDP>0@Prz|D1a3lx|GVBj=5y?g?tkQB+Wi=p)IX zBhgwEM2G-!xL8fJBQ)7_qmriWH1>|gO{B%PsB3b#xMjDV=hEUAm_*aJEMjj>Ro+P5 z>iWt{VF`$&`C@@r#eHIU;hysRGcHzzrmJ-ywjyd?bgA^tPUng{<97$Q=BB6THP=H< zN4!_0-d_jaD5dJ?*Iu1?F%>bl4tt2UZ|^j%&@NpHJhK}5IiA*>N{NCHG^; zdIjcaR}Mf?EN?iga#F82G5FA&Twf6b{UG`qd=0^>7oTypgk@1qTqc`KCk2}@jHC}R zKB`*$-bg8#of>sUjSL$X*R{XwzAZR5?CTNw?vd`pYf)Ai{sw|o8U1|%Uh?rJ;Aewe z@%nuH7@0J%lj@0ikA`?FzC+CO`s|1gW-M_TyeCWL&@U&7OOux@H!6FN^jkhpNxYWIwPXP&y+c2Oi4NERy0Fv2KnShOZR)n=o zS9~#&g%4s<^{Pu1)0ze#AWlDKifGao1zHWNPgluMpSBPy%bH^5yT5z3YlSx+`$U-f zG`7|?3SxcS=JkoZTjvC`N?AF!cL;>zeEmD~R5gBM9+Y0T{GiOaDJBT*feu20EQ8&G z!U8ZLJ5!VY%t^mnPI!LA(FE%J)zCqGRmpm#aIo-6K}I(#lZm}!MGEJd6nINYC7(^| z=CWriE&Zvt>$%b0?Yu24enSjoB9|VvI;S7&lFRw+%mWtWX(gR@{dSEF6)>ZShNQ)3Lt-0T$k9T#)631R1lMu7O@2xd$ zKK+5~r_3o#WenDk)BA#i|HMk!<5LsEigq{Iuqw6EN4=d!X)y|s{$nD8ab@o}UOo^% zTO;zE3eDYtbH=VW`#LfP2gqpZWXvbw{nVC_>cgg~D2wxKr7)NucAV*0c0o`AEh0z( zHUoAm47yT0bE2KUgOD#C^C;V^sH7!Rog)_V@?zYgeNcpa4>$S(r>&aWnV}lx)HM1T z3eGD14auKpH2isUb6TtW5;yO--9S&H&vF{I@@olWrU>>=IxM8t$iTawRLZWUSG~Mb zK=yikrsZ=<^LFa-^-(LHrKP$MS7PD14G7(dkcIIy0826xk1(YcO zwlA%%4MO7m_lFKN!26bE(&^MSBJUAlmfL{sQ)$t15l|-<>QN za{8YtApN309o#a=4`c=aL+Az*1A_n>Fm3I?AR-3s#~ym8(K^ys03$Lr=D+Lywms`G zxYCy5j;Rc%jyaEeL^;)0scIq}fsRk)$m?UrgUDm`V(p19I5@LA?21@CkgU=;5wS^W z20@t{OllSaynJlP7lzgPd_~gzTMbJr#Y{QiDz7KhRqd(f-Ns&HnPig=oCjaZtsI!l z9|?k3-bb-lg(M0JQ;_;r$vKE*39AZ*=m&40Zn}Od(W1V`)>&G!U!hoYk*%&zMKeFz z3}0G)+E`?RDYu&KKF%>hK^V`ZNkO0CJ3~CI&KS(;=6j zAI~FG&TH9PFxsKWSlya=&%ngW0jGRT_Qq^fYSX*S00kABgr zDh7{)jykmHlTO?xsUIH=QUto8?_B@D24D7teF-|_TO#>NAaQXzCboBdee>&Q>SytZ zOHui;o?N!D$d5Qs#ICyJc2)TJ$Gx8DjaFUqh+=)n8ve}a6hTDZr6&riDQ}z?A>=yw z&`9nncjNF*=2F0JT)uiDJGthYUmY=bCthmFHV2W4Yzy0oq{T5N_z<1kZP<^ikXmXnMP_|9h=4 zbi?jU-Me++)1Ov)BhPPaPMXT9L4OQzwt79evbG>^KWVEz|FggCZhjyxWV6Il=j+6V zG0_n3x|`6rQZ}H*16t7%eEFhje!JKue>O(FNy2XLbCo-*0EcqkywTjZFj_BVJ*Vr& z()1}8ru96Xyv38FLwi~C!{mE57^_?k& zl>Eq|;8>x|EK6(uPT2(DB|~ecvyrj49ioE*w(eL7iUSjfHTuk*(uRET#I~R8x8QDo z6Tl2hoK?eR8OL@N0~f3wOL}c`DIc7xBDH!5@!7%SM4^y!$-+n1v;vsC?~{uQF3deA zY6hc6pB>pI5m86VrPK7Rm#aw*Rvgpv%*+O6)_~t?DS69IWtZ5Wf)CIGu38hyzt~jWavRm zMbFF{d!Z;@A4$iN^`p33dT0i8XDQn|MUukt2S%J6eb=P85ZOF$4er;Ijh~JA>C3Ut z<;OKXl_v(K^?h8psq|u8JYr4gQOCXVa|y(+C>2K0JUxSqOgrQ2vb^6p9K5}h0Ig9R zvN945Kv^*^j(1F)oR#D7vJHjQtPQb4EL+>{hg7;I7%IJvzGB%hi{kpK@=6K9{;^vo z@M>HNjbXamH-|TOkM&yNOf$pt7~b#y5PP!BK!?Ln`RhsD6aFtmyr#2xE~QNwD(RoqlH zRM0T22Na?KKJ5-sgJGah7#IV_s=2$Pv8vimAX`UzNj-q7)34xRhr(d#KpvP-M-@80 z)87JzyQ}LK?e6vH>(SAKG+~nKM^^rBM%@?;Kgo4~m9Pm`iONukVdLg~%_N;IYX1np zuLOum0{e5vb+Jo_iiGFuzVuD57>v3XaGT*sp&X#_CAakJo(EnGXo9L7QrB)G`6oeF pRzMyS^@>J2q%O+i`TPC_2BC>Tp+pRp9dcR&qRK8Sd(Irm{$Ge#IZXfn literal 0 HcmV?d00001 From ebdad12b5df3e34335ca4702318396ae7c0b0c6d Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Sat, 25 Sep 2021 11:42:31 -0700 Subject: [PATCH 43/66] refactor test scripts, update README for test with exceptions --- .travis.yml | 2 +- lib/pdfimage.js | 2 +- package.json | 2 +- readme.md | 15 ++++++++++++--- test/p2j.forms.sh | 28 ++++++++++++++-------------- test/p2j.one.sh | 18 ++++++++++-------- 6 files changed, 39 insertions(+), 28 deletions(-) diff --git a/.travis.yml b/.travis.yml index 4808846d..8949c899 100755 --- a/.travis.yml +++ b/.travis.yml @@ -1,3 +1,3 @@ language: node_js node_js: - - "8.9.0" + - "12.20.0" diff --git a/lib/pdfimage.js b/lib/pdfimage.js index 317866a8..a9056eff 100644 --- a/lib/pdfimage.js +++ b/lib/pdfimage.js @@ -25,7 +25,7 @@ let PDFImage = (function() { this.btoa = function(val) { if (typeof window === 'undefined') { - return (new Buffer(val, 'ascii')).toString('base64'); + return (new Buffer.from(val, 'ascii')).toString('base64'); } else if (typeof window.btoa === 'function') return window.btoa(val); diff --git a/package.json b/package.json index 86087bf5..797ba647 100644 --- a/package.json +++ b/package.json @@ -28,7 +28,7 @@ "main": "./pdfparser.js", "scripts": { "test": "cd ./test && sh p2j.forms.sh", - "test-misc": "node pdf2json.js -f ./test/pdf/misc/ -o ./test/target/misc/ -c -m && echo \"\nExpected: 4 success, 2 failure \" " + "test-misc": "cd ./test && sh p2j.one.sh misc . \"Expected: 4 success, 2 exception with stack trace\" " }, "engines": { "node": ">=12.20.0" diff --git a/readme.md b/readme.md index 502f46da..bc27d3cd 100644 --- a/readme.md +++ b/readme.md @@ -19,10 +19,19 @@ To Run in RESTful Web Service or as Commandline Utility ## Test ->npm i ->npm run test +After install, run command line: -Check _./test/target/_ for output JSON and test files. +> npm run test + +It'll scan and parse *260* PDF AcroForm files under *_./test/pdf_*, runs with *_-s -t -c -m_* command line options, generates primary output JSON, additional text content JSON, form fields JSON and merged text JSON file for each PDF. It usually takes ~20s in my MacBook Pro to complete, check *_./test/target/_* for outputs. + +### Test Exceptions + +After install, run command line: + +> npm run test-misc + +It'll scan and parse 6 PDF files under *_./test/pdf/misc_*, also runs with *_-s -t -c -m_* command line options, generates primary output JSON, additional text content JSON, form fields JSON and merged text JSON file for 4 PDF fields, catches exceptions with stack trace, one for _unsupported encryption algorithm_, another one for _Invalid XRef stream header_. ## Code Example diff --git a/test/p2j.forms.sh b/test/p2j.forms.sh index 2c6fb90f..1a3715e8 100755 --- a/test/p2j.forms.sh +++ b/test/p2j.forms.sh @@ -1,22 +1,22 @@ #!/usr/bin/env bash STARTTIME=$(date +%s) -# AGENCIES=("dc" "de" "ef" "fd" "nd" "or" "pa" "sc" "va") -# for i in "${AGENCIES[@]}" -# do -# sh ./p2j.one.sh $i -# done +AGENCIES=("dc" "de" "ef" "fd" "nd" "or" "pa" "sc" "va") +for i in "${AGENCIES[@]}" +do + sh ./p2j.one.sh $i form "NO Exception, All Parsed OK" +done # Travis CI doesn't seem to support arrays in bash for testing. # Reverting to a bunch of commands so that build button can be shown. -sh ./p2j.one.sh dc -sh ./p2j.one.sh de -sh ./p2j.one.sh ef -sh ./p2j.one.sh fd -sh ./p2j.one.sh nd -sh ./p2j.one.sh or -sh ./p2j.one.sh pa -sh ./p2j.one.sh sc -sh ./p2j.one.sh va +# sh ./p2j.one.sh dc +# sh ./p2j.one.sh de +# sh ./p2j.one.sh ef +# sh ./p2j.one.sh fd +# sh ./p2j.one.sh nd +# sh ./p2j.one.sh or +# sh ./p2j.one.sh pa +# sh ./p2j.one.sh sc +# sh ./p2j.one.sh va ENDTIME=$(date +%s) echo "It takes $(($ENDTIME - $STARTTIME)) seconds to process all PDFs ..." diff --git a/test/p2j.one.sh b/test/p2j.one.sh index 61a592ac..33b29f7e 100755 --- a/test/p2j.one.sh +++ b/test/p2j.one.sh @@ -1,22 +1,24 @@ #!/usr/bin/env bash -IN_DIR_BASE=./pdf/ -OUT_DIR_BASE=./target/ -DATA_DIR_BASE=./data/ +IN_DIR_BASE=./pdf +OUT_DIR_BASE=./target +DATA_DIR_BASE=./data PDF2JSON=../pdf2json.js AGENCY_NAME=$1 +FORM_BASE=$2 +EXPECTED_RESULT=$3 echo "-----------------------------------------------------" echo "Clean up existing $AGENCY_NAME JSON" echo "-----------------------------------------------------" -rm -rfv $OUT_DIR_BASE$AGENCY_NAME/ +rm -rfv $OUT_DIR_BASE/$AGENCY_NAME echo "-----------------------------------------------------" echo "Update $AGENCY_NAME PDF" echo "-----------------------------------------------------" -mkdir -p $OUT_DIR_BASE$AGENCY_NAME/form/ -node $PDF2JSON -f $IN_DIR_BASE$AGENCY_NAME/form/ -o $OUT_DIR_BASE$AGENCY_NAME/form/ -s -t -c -diff -rq $OUT_DIR_BASE$AGENCY_NAME/form/ $DATA_DIR_BASE$AGENCY_NAME/form/ +mkdir -p $OUT_DIR_BASE/$AGENCY_NAME/$FORM_BASE +node --trace-deprecation $PDF2JSON -f $IN_DIR_BASE/$AGENCY_NAME/$FORM_BASE -o $OUT_DIR_BASE/$AGENCY_NAME/$FORM_BASE -s -t -c -m +# diff -rq $OUT_DIR_BASE$AGENCY_NAME/$FORM_BASE/ $DATA_DIR_BASE$AGENCY_NAME/$FORM_BASE/ echo "-----------------------------------------------------" -echo "All JSON and PDF are updated for $AGENCY_NAME" +echo "Expected for $AGENCY_NAME : $EXPECTED_RESULT" echo "-----------------------------------------------------" From 82192e327af7d1ac3bf54da372e47b266c1d4fca Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Sat, 2 Oct 2021 09:59:53 -0700 Subject: [PATCH 44/66] bump dependency versions --- package-lock.json | 74 +++++++++++++++++++++++------------------------ package.json | 2 +- 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/package-lock.json b/package-lock.json index c06948ef..6b1f542b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "pdf2json", - "version": "1.2.4", + "version": "1.2.5", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "pdf2json", - "version": "1.2.4", + "version": "1.2.5", "bundleDependencies": [ "async", "lodash", @@ -18,7 +18,7 @@ "@xmldom/xmldom": "^0.7.5", "async": "^3.2.1", "lodash": "^4.17.21", - "yargs": "^17.1.1" + "yargs": "^17.2.1" }, "bin": { "pdf2json": "bin/pdf2json" @@ -38,9 +38,9 @@ } }, "node_modules/ansi-regex": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.0.tgz", - "integrity": "sha512-bY6fj56OUQ0hU1KjFNDQuJFezqKdrAyFdIevADiqrWHwSlbmBNMHp5ak2f40Pm8JTFyM2mqxkG6ngkHO11f/lg==", + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", "inBundle": true, "engines": { "node": ">=8" @@ -145,26 +145,26 @@ } }, "node_modules/string-width": { - "version": "4.2.2", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.2.tgz", - "integrity": "sha512-XBJbT3N4JhVumXE0eoLU9DCjcaF92KLNqTmFCnG1pf8duUxFGwtP6AD6nkjw9a3IdiRtL3E2w3JDiE/xi3vOeA==", + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", "inBundle": true, "dependencies": { "emoji-regex": "^8.0.0", "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.0" + "strip-ansi": "^6.0.1" }, "engines": { "node": ">=8" } }, "node_modules/strip-ansi": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.0.tgz", - "integrity": "sha512-AuvKTrTfQNYNIctbR1K/YGTR1756GycPsg7b9bdV9Duqur4gv6aKqHXah67Z8ImS7WEz5QVcOtlfW2rZEugt6w==", + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", "inBundle": true, "dependencies": { - "ansi-regex": "^5.0.0" + "ansi-regex": "^5.0.1" }, "engines": { "node": ">=8" @@ -197,9 +197,9 @@ } }, "node_modules/yargs": { - "version": "17.1.1", - "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.1.1.tgz", - "integrity": "sha512-c2k48R0PwKIqKhPMWjeiF6y2xY/gPMUlro0sgxqXpbOIohWiLNXWslsootttv7E1e73QPAMQSg5FeySbVcpsPQ==", + "version": "17.2.1", + "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.2.1.tgz", + "integrity": "sha512-XfR8du6ua4K6uLGm5S6fA+FIJom/MdJcFNVY8geLlp2v8GYbOXD4EB1tPNZsRn4vBzKGMgb5DRZMeWuFc2GO8Q==", "inBundle": true, "dependencies": { "cliui": "^7.0.2", @@ -215,9 +215,9 @@ } }, "node_modules/yargs-parser": { - "version": "20.2.7", - "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.7.tgz", - "integrity": "sha512-FiNkvbeHzB/syOjIUxFDCnhSfzAL8R5vs40MgLFBorXACCOAEaWu0gRZl14vG8MR9AOJIZbmkjhusqBYZ3HTHw==", + "version": "20.2.9", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.9.tgz", + "integrity": "sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==", "inBundle": true, "engines": { "node": ">=10" @@ -231,9 +231,9 @@ "integrity": "sha512-V3BIhmY36fXZ1OtVcI9W+FxQqxVLsPKcNjWigIaa81dLC9IolJl5Mt4Cvhmr0flUnjSpTdrbMTSbXqYqV5dT6A==" }, "ansi-regex": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.0.tgz", - "integrity": "sha512-bY6fj56OUQ0hU1KjFNDQuJFezqKdrAyFdIevADiqrWHwSlbmBNMHp5ak2f40Pm8JTFyM2mqxkG6ngkHO11f/lg==" + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==" }, "ansi-styles": { "version": "4.3.0", @@ -302,21 +302,21 @@ "integrity": "sha1-jGStX9MNqxyXbiNE/+f3kqam30I=" }, "string-width": { - "version": "4.2.2", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.2.tgz", - "integrity": "sha512-XBJbT3N4JhVumXE0eoLU9DCjcaF92KLNqTmFCnG1pf8duUxFGwtP6AD6nkjw9a3IdiRtL3E2w3JDiE/xi3vOeA==", + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", "requires": { "emoji-regex": "^8.0.0", "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.0" + "strip-ansi": "^6.0.1" } }, "strip-ansi": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.0.tgz", - "integrity": "sha512-AuvKTrTfQNYNIctbR1K/YGTR1756GycPsg7b9bdV9Duqur4gv6aKqHXah67Z8ImS7WEz5QVcOtlfW2rZEugt6w==", + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", "requires": { - "ansi-regex": "^5.0.0" + "ansi-regex": "^5.0.1" } }, "wrap-ansi": { @@ -335,9 +335,9 @@ "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==" }, "yargs": { - "version": "17.1.1", - "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.1.1.tgz", - "integrity": "sha512-c2k48R0PwKIqKhPMWjeiF6y2xY/gPMUlro0sgxqXpbOIohWiLNXWslsootttv7E1e73QPAMQSg5FeySbVcpsPQ==", + "version": "17.2.1", + "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.2.1.tgz", + "integrity": "sha512-XfR8du6ua4K6uLGm5S6fA+FIJom/MdJcFNVY8geLlp2v8GYbOXD4EB1tPNZsRn4vBzKGMgb5DRZMeWuFc2GO8Q==", "requires": { "cliui": "^7.0.2", "escalade": "^3.1.1", @@ -349,9 +349,9 @@ } }, "yargs-parser": { - "version": "20.2.7", - "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.7.tgz", - "integrity": "sha512-FiNkvbeHzB/syOjIUxFDCnhSfzAL8R5vs40MgLFBorXACCOAEaWu0gRZl14vG8MR9AOJIZbmkjhusqBYZ3HTHw==" + "version": "20.2.9", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.9.tgz", + "integrity": "sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==" } } } diff --git a/package.json b/package.json index 797ba647..b8443544 100644 --- a/package.json +++ b/package.json @@ -40,7 +40,7 @@ "async": "^3.2.1", "lodash": "^4.17.21", "@xmldom/xmldom": "^0.7.5", - "yargs": "^17.1.1" + "yargs": "^17.2.1" }, "devDependencies": {}, "bundledDependencies": [ From 1f544df9952389000597d26ad9df9a99fbd8e748 Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Sat, 2 Oct 2021 18:29:06 -0700 Subject: [PATCH 45/66] refactor pdfparser.js with es6 class and stream --- .travis.yml | 2 +- lib/p2jcmd.js | 13 +-- package.json | 13 ++- pdf2json.js | 2 - pdfparser.js | 272 ++++++++++++++++++++++++++------------------------ 5 files changed, 162 insertions(+), 140 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8949c899..ca5f442d 100755 --- a/.travis.yml +++ b/.travis.yml @@ -1,3 +1,3 @@ language: node_js node_js: - - "12.20.0" + - "14.18.0" diff --git a/lib/p2jcmd.js b/lib/p2jcmd.js index a4a42992..1d5aff08 100644 --- a/lib/p2jcmd.js +++ b/lib/p2jcmd.js @@ -136,13 +136,13 @@ let PDF2JSONUtil = (function () { this.pdfParser = new PDFParser(null, PROCESS_RAW_TEXT_CONTENT); this.pdfParser.on("pdfParser_dataError", evtData => _onPdfParserError.call(this, evtData, callback)); - let outputStream = fs.createWriteStream(this.outputPath); + const outputStream = fs.createWriteStream(this.outputPath); outputStream.on('finish', () => _onPrimarySuccess.call(this, callback)); outputStream.on('error', err => _onPrimaryError.call(this, callback)); - console.log("Transcoding " + this.inputFile + " to - " + this.outputPath); + console.log("Transcoding Stream " + this.inputFile + " to - " + this.outputPath); let inputStream = fs.createReadStream(this.inputPath, {bufferSize: 64 * 1024}); - inputStream.pipe(this.pdfParser).pipe(new StringifyStream()).pipe(outputStream); + inputStream.pipe(this.pdfParser.createParserStream()).pipe(new StringifyStream()).pipe(outputStream); }; let _parseOnePDF = function(callback) { @@ -159,7 +159,7 @@ let PDF2JSONUtil = (function () { }); }); - console.log("Transcoding " + this.inputFile + " to - " + this.outputPath); + console.log("Transcoding File " + this.inputFile + " to - " + this.outputPath); this.pdfParser.loadPDF(this.inputPath, VERBOSITY_LEVEL); }; @@ -306,8 +306,9 @@ let PDFProcessor = (function () { }; cls.prototype.complete = function(statusMsg) { - console.log(`\n${this.inputCount} input files\t${this.successCount} success\t${this.failedCount} fail\t${this.warningCount} warning.`); - console.log(statusMsg); + console.info(`\n${this.inputCount} input files\t${this.successCount} success\t${this.failedCount} fail\t${this.warningCount} warning.`); + if (statusMsg) + console.log(statusMsg); process.nextTick( () => { console.timeEnd(_PRO_TIMER); //let exitCode = (this.inputCount === this.successCount) ? 0 : 1; diff --git a/package.json b/package.json index b8443544..ed3e1af2 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "pdf2json", - "version": "1.2.5", + "version": "1.3.0", "description": "PDF file parser that converts PDF binaries to text based JSON, powered by porting a fork of PDF.JS to Node.js", "keywords": [ "pdf", @@ -28,10 +28,17 @@ "main": "./pdfparser.js", "scripts": { "test": "cd ./test && sh p2j.forms.sh", - "test-misc": "cd ./test && sh p2j.one.sh misc . \"Expected: 4 success, 2 exception with stack trace\" " + "test-misc": "cd ./test && sh p2j.one.sh misc . \"Expected: 4 success, 2 exception with stack trace\" ", + "parse": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form", + "parse-s": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s", + "parse-t": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t", + "parse-c": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c", + "parse-m": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c -m", + "parse-r": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c -m -r" }, "engines": { - "node": ">=12.20.0" + "node": ">=14.18.0", + "npm": "~6.14.15" }, "bin": { "pdf2json": "./bin/pdf2json" diff --git a/pdf2json.js b/pdf2json.js index 540608ca..eedf5f24 100644 --- a/pdf2json.js +++ b/pdf2json.js @@ -1,4 +1,2 @@ -'use strict'; - var P2JCMD = require('./lib/p2jcmd'); new P2JCMD().start(); diff --git a/pdfparser.js b/pdfparser.js index 0704c457..552e5d00 100644 --- a/pdfparser.js +++ b/pdfparser.js @@ -1,192 +1,208 @@ -'use strict'; - -let fs = require('fs'), - stream = require('stream'), +const fs = require("fs"), + {EventEmitter} = require("events"), + {Transform, Readable} = require("stream"), nodeUtil = require("util"), _ = require("lodash"), async = require("async"), PDFJS = require("./lib/pdf.js"); -let PDFParser = (function () { - // private static - let _nextId = 1; - let _name = 'PDFParser'; +class ParserStream extends Transform { + static createContentStream(jsonObj) { + const rStream = new Readable({objectMode: true}); + rStream.push(jsonObj); + rStream.push(null); + return rStream; + } + + #pdfParser = null; + #chunks = []; + + constructor(pdfParser, options) { + super(options); + this.#pdfParser = pdfParser; - let _binBuffer = {}; - let _maxBinBufferCount = 10; + this.#chunks = []; + } - let _password = ''; + //implements transform stream + _transform(chunk, enc, callback) { + this.#chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk, enc)); + callback(); + } + + _flush(callback) { + this.#pdfParser.on("pdfParser_dataReady", evtData => { + this.push(evtData); + callback(); + this.emit('end', null); + }); + this.#pdfParser.parseBuffer(Buffer.concat(this.#chunks)); + } + + _destroy() { + super.removeAllListeners(); + this.#pdfParser = null; + this.#chunks = []; + } +} + +class PDFParser extends EventEmitter { // inherit from event emitter + //private static + static #_nextId = 0; + static #_maxBinBufferCount = 10; + static #_binBuffer = {}; + + //private + #_id = 0; + #password = ""; + + #context = null; // service context object, only used in Web Service project; null in command line + + #pdfFilePath = null; //current PDF file to load and parse, null means loading/parsing not started + #pdfFileMTime = null; // last time the current pdf was modified, used to recognize changes and ignore cache + #data = null; //if file read success, data is PDF content; if failed, data is "err" object + #PDFJS = null; //will be initialized in constructor + #processFieldInfoXML = false;//disable additional _fieldInfo.xml parsing and merging + + // constructor + constructor(context, needRawText, password) { + //call constructor for super class + super({objectMode: true, bufferSize: 64 * 1024}); + + // private + this.#_id = PDFParser.#_nextId++; + + // service context object, only used in Web Service project; null in command line + this.#context = context; + + this.#pdfFilePath = null; //current PDF file to load and parse, null means loading/parsing not started + this.#pdfFileMTime = null; // last time the current pdf was modified, used to recognize changes and ignore cache + this.#data = null; //if file read success, data is PDF content; if failed, data is "err" object + this.#processFieldInfoXML = false;//disable additional _fieldInfo.xml parsing and merging + + this.#PDFJS = new PDFJS(needRawText); + this.#password = password; + } + + get id() { return this.#_id; } + get name() { return `${PDFParser.name}_${this.#_id}`; } + get data() { return this.#data; } + get binBufferKey() { return this.#pdfFilePath + this.#pdfFileMTime; } //private methods, needs to invoked by [funcName].call(this, ...) - let _onPDFJSParseDataReady = function(data) { + #onPDFJSParseDataReady(data) { if (!data) { //v1.1.2: data===null means end of parsed data nodeUtil.p2jinfo("PDF parsing completed."); - let output = {"formImage": this.data}; + const output = {"formImage": this.#data}; this.emit("pdfParser_dataReady", output); - if (typeof this.flushCallback === 'function') { - this.push(output); - this.flushCallback(); - this.flushCallback = null; - } } else { - Object.assign(this.data, data); + this.#data = {...this.#data, data}; } - }; + } - let _onPDFJSParserDataError = function(data) { - this.data = null; + #onPDFJSParserDataError(data) { + this.#data = null; this.emit("pdfParser_dataError", {"parserError": data}); - }; + } - let _startParsingPDF = function(buffer) { - this.data = {}; + #startParsingPDF(buffer) { + this.#data = {}; - this.PDFJS.on("pdfjs_parseDataReady", _onPDFJSParseDataReady.bind(this)); - this.PDFJS.on("pdfjs_parseDataError", _onPDFJSParserDataError.bind(this)); + this.#PDFJS.on("pdfjs_parseDataReady", this.#onPDFJSParseDataReady.bind(this)); + this.#PDFJS.on("pdfjs_parseDataError", this.#onPDFJSParserDataError.bind(this)); - this.PDFJS.parsePDFData(buffer || _binBuffer[this.pdfFilePath + this.pdfFileMTime], _password); - }; + this.#PDFJS.parsePDFData(buffer || PDFParser.#_binBuffer[this.binBufferKey], this.#password); + } - let _processBinaryCache = function() { - if (_.has(_binBuffer, this.pdfFilePath + this.pdfFileMTime)) { - _startParsingPDF.call(this); + #processBinaryCache() { + if (_.has(PDFParser.#_binBuffer, this.binBufferKey)) { + this.#startParsingPDF(); return true; } - let allKeys = _.keys(_binBuffer); - if (allKeys.length > _maxBinBufferCount) { - let idx = this.get_id() % _maxBinBufferCount; - let key = allKeys[idx]; - _binBuffer[key] = null; - delete _binBuffer[key]; + const allKeys = _.keys(PDFParser.#_binBuffer); + if (allKeys.length > PDFParser.#_maxBinBufferCount) { + const idx = this.id % PDFParser.#_maxBinBufferCount; + const key = allKeys[idx]; + PDFParser.#_binBuffer[key] = null; + delete PDFParser.#_binBuffer[key]; nodeUtil.p2jinfo("re-cycled cache for " + key); } return false; - }; + } - let _processPDFContent = function(err, data) { + #processPDFContent(err, data) { nodeUtil.p2jinfo("Load PDF file status:" + (!!err ? "Error!" : "Success!") ); if (err) { - this.data = err; - this.emit("pdfParser_dataError", this); + this.#data = null; + this.emit("pdfParser_dataError", err); } else { - _binBuffer[this.pdfFilePath + + this.pdfFileMTime] = data; - _startParsingPDF.call(this); + PDFParser.#_binBuffer[this.binBufferKey] = data; + this.#startParsingPDF(); } }; - let _createContentStream = function(jsonObj) { - let rStream = new stream.Readable({objectMode: true}); - rStream.push(jsonObj); - rStream.push(null); - return rStream; - }; - - // constructor - function PdfParser(context, needRawText) { - //call constructor for super class - stream.Transform.call(this, {objectMode: true, bufferSize: 64 * 1024}); - - // private - let _id = _nextId++; - - // public (every instance will have their own copy of these methods, needs to be lightweight) - this.get_id = () => _id; - this.get_name = () => _name + _id; - - // service context object, only used in Web Service project; null in command line - this.context = context; - - this.pdfFilePath = null; //current PDF file to load and parse, null means loading/parsing not started - this.pdfFileMTime = null; // last time the current pdf was modified, used to recognize changes and ignore cache - this.data = null; //if file read success, data is PDF content; if failed, data is "err" object - this.PDFJS = new PDFJS(needRawText); - this.processFieldInfoXML = false;//disable additional _fieldInfo.xml parsing and merging - - this.chunks = []; - this.flushCallback = null; - } - // inherit from event emitter - nodeUtil.inherits(PdfParser, stream.Transform); - - //implements transform stream - PdfParser.prototype._transform = function (chunk, enc, callback) { - this.chunks.push(Buffer.isBuffer(chunk) ? chunk : new Buffer(chunk, enc)); - callback(); - }; - - PdfParser.prototype._flush = function(callback) { - this.flushCallback = callback; - this.parseBuffer(Buffer.concat(this.chunks)); - }; - - PdfParser.prototype.fq = async.queue( (task, callback) => { + fq = async.queue( (task, callback) => { fs.readFile(task.path, callback); }, 100); //public APIs - PdfParser.prototype.setVerbosity = function(verbosity) { - nodeUtil.verbosity(verbosity || 0); - }; + createParserStream() { + return new ParserStream(this, {objectMode: true, bufferSize: 64 * 1024}); + } - PdfParser.prototype.setPassword = function(password) { - _password = password; - }; - - PdfParser.prototype.loadPDF = function(pdfFilePath, verbosity) { - this.setVerbosity(verbosity); + loadPDF(pdfFilePath, verbosity) { + nodeUtil.verbosity(verbosity || 0); nodeUtil.p2jinfo("about to load PDF file " + pdfFilePath); - this.pdfFilePath = pdfFilePath; - this.pdfFileMTime = fs.statSync(pdfFilePath).mtimeMs - if (this.processFieldInfoXML) { - this.PDFJS.tryLoadFieldInfoXML(pdfFilePath); + this.#pdfFilePath = pdfFilePath; + this.#pdfFileMTime = fs.statSync(pdfFilePath).mtimeMs; + if (this.#processFieldInfoXML) { + this.#PDFJS.tryLoadFieldInfoXML(pdfFilePath); } - if (_processBinaryCache.call(this)) + if (this.#processBinaryCache()) return; - this.fq.push({path: pdfFilePath}, _processPDFContent.bind(this)); + this.fq.push({path: pdfFilePath}, this.#processPDFContent.bind(this)); }; // Introduce a way to directly process buffers without the need to write it to a temporary file - PdfParser.prototype.parseBuffer = function(pdfBuffer) { - _startParsingPDF.call(this, pdfBuffer); + parseBuffer(pdfBuffer) { + this.#startParsingPDF(pdfBuffer); }; - PdfParser.prototype.getRawTextContent = function() { return this.PDFJS.getRawTextContent(); }; - PdfParser.prototype.getRawTextContentStream = function() { return _createContentStream(this.getRawTextContent()); }; + getRawTextContent() { return this.#PDFJS.getRawTextContent(); } + getRawTextContentStream() { return ParserStream.createContentStream(this.getRawTextContent()); } - PdfParser.prototype.getAllFieldsTypes = function() { return this.PDFJS.getAllFieldsTypes(); }; - PdfParser.prototype.getAllFieldsTypesStream = function() { return _createContentStream(this.getAllFieldsTypes()); }; + getAllFieldsTypes() { return this.#PDFJS.getAllFieldsTypes(); }; + getAllFieldsTypesStream() { return ParserStream.createContentStream(this.getAllFieldsTypes()); } - PdfParser.prototype.getMergedTextBlocksIfNeeded = function() { return {"formImage": this.PDFJS.getMergedTextBlocksIfNeeded()}; }; - PdfParser.prototype.getMergedTextBlocksStream = function() { return _createContentStream(this.getMergedTextBlocksIfNeeded()); }; + getMergedTextBlocksIfNeeded() { return {"formImage": this.#PDFJS.getMergedTextBlocksIfNeeded()}; } + getMergedTextBlocksStream() { return ParserStream.createContentStream(this.getMergedTextBlocksIfNeeded()) } - PdfParser.prototype.destroy = function() { - this.removeAllListeners(); + destroy() { // invoked with stream transform process + super.removeAllListeners(); //context object will be set in Web Service project, but not in command line utility - if (this.context) { - this.context.destroy(); - this.context = null; + if (this.#context) { + this.#context.destroy(); + this.#context = null; } - this.pdfFilePath = null; - this.pdfFileMTime = null; - this.data = null; - this.chunks = null; + this.#pdfFilePath = null; + this.#pdfFileMTime = null; + this.#data = null; + this.#processFieldInfoXML = false;//disable additional _fieldInfo.xml parsing and merging - this.PDFJS.destroy(); - this.PDFJS = null; - }; - - return PdfParser; -})(); + this.#PDFJS.destroy(); + this.#PDFJS = null; + } +} module.exports = PDFParser; From ac42179ef554e0574d201439f2d2a57e7c25af09 Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Sun, 3 Oct 2021 12:55:12 -0700 Subject: [PATCH 46/66] add lib/parserstream to centralize stream utils --- base/shared/util.js | 16 +++++----- lib/p2jcmd.js | 68 +++++++++++++++---------------------------- lib/parserstream.js | 71 +++++++++++++++++++++++++++++++++++++++++++++ package.json | 2 +- pdf2json.js | 2 +- pdfparser.js | 55 +++++------------------------------ test/p2j.forms.sh | 2 +- test/p2j.one.sh | 2 +- 8 files changed, 115 insertions(+), 103 deletions(-) create mode 100644 lib/parserstream.js diff --git a/base/shared/util.js b/base/shared/util.js index d7b3213d..55d61e19 100755 --- a/base/shared/util.js +++ b/base/shared/util.js @@ -1241,18 +1241,18 @@ nodeUtil.p2jinfo = info; nodeUtil.p2jwarn = warn; nodeUtil.p2jerror = error; nodeUtil.verbosity = function(verbo) { - if (!isNaN(verbo)) { + if (isNaN(verbo)) { + verbosity = WARNINGS; + } + else { if (verbo <= ERRORS) { - verbo = ERRORS; + verbosity = ERRORS; } else if (verbo >= INFOS) { - verbo = INFOS; + verbosity = INFOS; } - - verbosity = verbo; - } - else { - verbosity = ERRORS; + else + verbosity = verbo; } }; nodeUtil.verbosity(); diff --git a/lib/p2jcmd.js b/lib/p2jcmd.js index 1d5aff08..1044895a 100644 --- a/lib/p2jcmd.js +++ b/lib/p2jcmd.js @@ -1,13 +1,13 @@ -'use strict'; -let nodeUtil = require("util"), - stream = require('stream'), +const nodeUtil = require("util"), fs = require('fs'), path = require('path'), _ = require('lodash'), - PDFParser = require("../pdfparser"), + async = require("async"), + + {ParserStream, StringifyStream} = require('./parserstream'), pkInfo = require('../package.json'), - async = require("async"); + PDFParser = require("../pdfparser"); const _PRO_TIMER = `${pkInfo.name}@${pkInfo.version} [${pkInfo.homepage}]`; @@ -33,7 +33,10 @@ let yargs = require('yargs') .describe('r', '(optional) when specified, will process and parse with buffer/object transform stream rather than file system (Experimental).\n'); const argv = yargs.argv; +const ONLY_SHOW_VERSION = _.has(argv, 'v'); +const ONLY_SHOW_HELP = _.has(argv, 'h'); const VERBOSITY_LEVEL = (_.has(argv, 's') ? 0 : 5); +const HAS_INPUT_DIR_OR_FILE = _.has(argv, 'f'); const PROCESS_RAW_TEXT_CONTENT = _.has(argv, 'c'); const PROCESS_FIELDS_CONTENT = _.has(argv, 't'); @@ -42,19 +45,6 @@ const PROCESS_WITH_STREAM = _.has(argv, 'r'); let PDF2JSONUtil = (function () { - function StringifyStream(){ - stream.Transform.call(this); - - this._readableState.objectMode = false; - this._writableState.objectMode = true; - } - nodeUtil.inherits(StringifyStream, stream.Transform); - - StringifyStream.prototype._transform = function(obj, encoding, callback){ - this.push(JSON.stringify(obj)); - callback(); - }; - let _continue = function(callback, err) { if (err) console.error(err); @@ -67,30 +57,19 @@ let PDF2JSONUtil = (function () { _continue.call(this, callback, "Parse Exception: " + evtData.parserError); }; - let _createOutputStream = function(outputPath, callback) { - let outputStream = fs.createWriteStream(outputPath); - outputStream.on('finish', () => { - callback(null, outputPath); - }); - outputStream.on('error', err => { - callback({"streamError": err}, outputPath); - }); - - return outputStream; - }; - + let _generateMergedTextBlocksStream = function(callback) { - let outputStream = _createOutputStream.call(this, this.outputPath.replace(".json", ".merged.json"), callback); + const outputStream = ParserStream.createOutputStream(this.outputPath.replace(".json", ".merged.json"), callback); this.pdfParser.getMergedTextBlocksStream().pipe(new StringifyStream()).pipe(outputStream); }; let _generateRawTextContentStream = function(callback) { - let outputStream = _createOutputStream.call(this, this.outputPath.replace(".json", ".content.txt"), callback); + const outputStream = ParserStream.createOutputStream(this.outputPath.replace(".json", ".content.txt"), callback); this.pdfParser.getRawTextContentStream().pipe(outputStream); }; let _generateFieldsTypesStream = function(callback) { - let outputStream = _createOutputStream.call(this, this.outputPath.replace(".json", ".fields.json"), callback); + const outputStream = ParserStream.createOutputStream(this.outputPath.replace(".json", ".fields.json"), callback); this.pdfParser.getAllFieldsTypesStream().pipe(new StringifyStream()).pipe(outputStream); }; @@ -127,7 +106,7 @@ let PDF2JSONUtil = (function () { }; let _onPrimaryError = function(err, callback) { - console.error("Output Exception: [" + this.inputPath + "] => [" + this.outputPath + "]: " + err); + nodeUtil.p2jerror("Output Exception: [" + this.inputPath + "] => [" + this.outputPath + "]: " + err); this.curProcessor.failedCount++; callback(err); }; @@ -140,7 +119,7 @@ let PDF2JSONUtil = (function () { outputStream.on('finish', () => _onPrimarySuccess.call(this, callback)); outputStream.on('error', err => _onPrimaryError.call(this, callback)); - console.log("Transcoding Stream " + this.inputFile + " to - " + this.outputPath); + nodeUtil.p2jinfo("Transcoding Stream " + this.inputFile + " to - " + this.outputPath); let inputStream = fs.createReadStream(this.inputPath, {bufferSize: 64 * 1024}); inputStream.pipe(this.pdfParser.createParserStream()).pipe(new StringifyStream()).pipe(outputStream); }; @@ -159,7 +138,7 @@ let PDF2JSONUtil = (function () { }); }); - console.log("Transcoding File " + this.inputFile + " to - " + this.outputPath); + nodeUtil.p2jinfo("Transcoding File " + this.inputFile + " to - " + this.outputPath); this.pdfParser.loadPDF(this.inputPath, VERBOSITY_LEVEL); }; @@ -200,7 +179,7 @@ let PDF2JSONUtil = (function () { this.outputFile = path.basename(this.inputPath, inExtName) + ".json"; this.outputPath = path.normalize(this.outputDir + "/" + this.outputFile); if (fs.existsSync(this.outputPath)) - console.log("\nOutput file will be replaced - " + this.outputPath); + nodeUtil.p2jwarn("Output file will be replaced - " + this.outputPath); else { let fod = fs.openSync(this.outputPath, "wx"); if (!fod) @@ -258,24 +237,25 @@ let PDFProcessor = (function () { cls.prototype.initialize = function(){ console.time(_PRO_TIMER); + nodeUtil.verbosity(VERBOSITY_LEVEL); let retVal = true; try { - if (_.has(argv, 'v')) { + if (ONLY_SHOW_VERSION) { console.log(pkInfo.version); retVal = false; } - else if (_.has(argv, 'h')) { + else if (ONLY_SHOW_HELP) { yargs.showHelp(); retVal = false; } - else if (!_.has(argv, 'f')) { + else if (!HAS_INPUT_DIR_OR_FILE) { yargs.showHelp(); - console.log("-f is required to specify input directory or file."); + console.error("-f is required to specify input directory or file."); retVal = false; } } catch(e) { - console.log("Exception: " + e.message); + console.error("Exception: " + e.message); retVal = false; } return retVal; @@ -305,10 +285,10 @@ let PDFProcessor = (function () { } }; - cls.prototype.complete = function(statusMsg) { - console.info(`\n${this.inputCount} input files\t${this.successCount} success\t${this.failedCount} fail\t${this.warningCount} warning.`); + cls.prototype.complete = function(statusMsg) { if (statusMsg) console.log(statusMsg); + console.log(`\n${this.inputCount} input files\t${this.successCount} success\t${this.failedCount} fail\t${this.warningCount} warning`); process.nextTick( () => { console.timeEnd(_PRO_TIMER); //let exitCode = (this.inputCount === this.successCount) ? 0 : 1; diff --git a/lib/parserstream.js b/lib/parserstream.js new file mode 100644 index 00000000..eb6f589b --- /dev/null +++ b/lib/parserstream.js @@ -0,0 +1,71 @@ +const {Transform, Readable} = require("stream"), + fs = require('fs'); + +class ParserStream extends Transform { + static createContentStream(jsonObj) { + const rStream = new Readable({objectMode: true}); + rStream.push(jsonObj); + rStream.push(null); + return rStream; + } + + static createOutputStream(outputPath, callback) { + const outputStream = fs.createWriteStream(outputPath); + outputStream.on('finish', () => { + callback(null, outputPath); + }); + outputStream.on('error', err => { + callback({"streamError": err}, outputPath); + }); + + return outputStream; + } + + #pdfParser = null; + #chunks = []; + + constructor(pdfParser, options) { + super(options); + this.#pdfParser = pdfParser; + + this.#chunks = []; + } + + //implements transform stream + _transform(chunk, enc, callback) { + this.#chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk, enc)); + callback(); + } + + _flush(callback) { + this.#pdfParser.on("pdfParser_dataReady", evtData => { + this.push(evtData); + callback(); + this.emit('end', null); + }); + this.#pdfParser.parseBuffer(Buffer.concat(this.#chunks)); + } + + _destroy() { + super.removeAllListeners(); + this.#pdfParser = null; + this.#chunks = []; + } +} + + +class StringifyStream extends Transform { + constructor(options) { + super(options); + + this._readableState.objectMode = false; + this._writableState.objectMode = true; + } + + _transform(obj, encoding, callback){ + this.push(JSON.stringify(obj)); + callback(); + } +} + +module.exports = {ParserStream, StringifyStream}; \ No newline at end of file diff --git a/package.json b/package.json index ed3e1af2..b4057c59 100644 --- a/package.json +++ b/package.json @@ -34,7 +34,7 @@ "parse-t": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t", "parse-c": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c", "parse-m": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c -m", - "parse-r": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c -m -r" + "parse-r": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form -o ./test/target/fd/form -s -t -c -m -r" }, "engines": { "node": ">=14.18.0", diff --git a/pdf2json.js b/pdf2json.js index eedf5f24..19939fbf 100644 --- a/pdf2json.js +++ b/pdf2json.js @@ -1,2 +1,2 @@ -var P2JCMD = require('./lib/p2jcmd'); +const P2JCMD = require('./lib/p2jcmd'); new P2JCMD().start(); diff --git a/pdfparser.js b/pdfparser.js index 552e5d00..5f4e8d7d 100644 --- a/pdfparser.js +++ b/pdfparser.js @@ -1,50 +1,11 @@ const fs = require("fs"), {EventEmitter} = require("events"), - {Transform, Readable} = require("stream"), nodeUtil = require("util"), _ = require("lodash"), async = require("async"), - PDFJS = require("./lib/pdf.js"); - -class ParserStream extends Transform { - static createContentStream(jsonObj) { - const rStream = new Readable({objectMode: true}); - rStream.push(jsonObj); - rStream.push(null); - return rStream; - } - - #pdfParser = null; - #chunks = []; - - constructor(pdfParser, options) { - super(options); - this.#pdfParser = pdfParser; - - this.#chunks = []; - } - - //implements transform stream - _transform(chunk, enc, callback) { - this.#chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk, enc)); - callback(); - } - - _flush(callback) { - this.#pdfParser.on("pdfParser_dataReady", evtData => { - this.push(evtData); - callback(); - this.emit('end', null); - }); - this.#pdfParser.parseBuffer(Buffer.concat(this.#chunks)); - } + PDFJS = require("./lib/pdf.js"), + {ParserStream} = require("./lib/parserstream"); - _destroy() { - super.removeAllListeners(); - this.#pdfParser = null; - this.#chunks = []; - } -} class PDFParser extends EventEmitter { // inherit from event emitter //private static @@ -57,6 +18,7 @@ class PDFParser extends EventEmitter { // inherit from event emitter #password = ""; #context = null; // service context object, only used in Web Service project; null in command line + #fq = null; //async queue for reading files #pdfFilePath = null; //current PDF file to load and parse, null means loading/parsing not started #pdfFileMTime = null; // last time the current pdf was modified, used to recognize changes and ignore cache @@ -67,13 +29,16 @@ class PDFParser extends EventEmitter { // inherit from event emitter // constructor constructor(context, needRawText, password) { //call constructor for super class - super({objectMode: true, bufferSize: 64 * 1024}); + super(); // private this.#_id = PDFParser.#_nextId++; // service context object, only used in Web Service project; null in command line this.#context = context; + this.#fq = async.queue( (task, callback) => { + fs.readFile(task.path, callback); + }, 1); this.#pdfFilePath = null; //current PDF file to load and parse, null means loading/parsing not started this.#pdfFileMTime = null; // last time the current pdf was modified, used to recognize changes and ignore cache @@ -146,10 +111,6 @@ class PDFParser extends EventEmitter { // inherit from event emitter } }; - fq = async.queue( (task, callback) => { - fs.readFile(task.path, callback); - }, 100); - //public APIs createParserStream() { return new ParserStream(this, {objectMode: true, bufferSize: 64 * 1024}); @@ -168,7 +129,7 @@ class PDFParser extends EventEmitter { // inherit from event emitter if (this.#processBinaryCache()) return; - this.fq.push({path: pdfFilePath}, this.#processPDFContent.bind(this)); + this.#fq.push({path: pdfFilePath}, this.#processPDFContent.bind(this)); }; // Introduce a way to directly process buffers without the need to write it to a temporary file diff --git a/test/p2j.forms.sh b/test/p2j.forms.sh index 1a3715e8..286fa0c8 100755 --- a/test/p2j.forms.sh +++ b/test/p2j.forms.sh @@ -3,7 +3,7 @@ STARTTIME=$(date +%s) AGENCIES=("dc" "de" "ef" "fd" "nd" "or" "pa" "sc" "va") for i in "${AGENCIES[@]}" do - sh ./p2j.one.sh $i form "NO Exception, All Parsed OK" + sh ./p2j.one.sh $i form "Expected: NO Exception, All Parsed OK" done # Travis CI doesn't seem to support arrays in bash for testing. diff --git a/test/p2j.one.sh b/test/p2j.one.sh index 33b29f7e..7d1b759f 100755 --- a/test/p2j.one.sh +++ b/test/p2j.one.sh @@ -20,5 +20,5 @@ node --trace-deprecation $PDF2JSON -f $IN_DIR_BASE/$AGENCY_NAME/$FORM_BASE -o $O # diff -rq $OUT_DIR_BASE$AGENCY_NAME/$FORM_BASE/ $DATA_DIR_BASE$AGENCY_NAME/$FORM_BASE/ echo "-----------------------------------------------------" -echo "Expected for $AGENCY_NAME : $EXPECTED_RESULT" +echo "$IN_DIR_BASE/$AGENCY_NAME/$FORM_BASE : $EXPECTED_RESULT" echo "-----------------------------------------------------" From b507b2343bc28588ae7db35bd1bd074da1f1e26f Mon Sep 17 00:00:00 2001 From: Alberto Villa Date: Mon, 4 Oct 2021 00:10:14 +0200 Subject: [PATCH 47/66] Prevent `Cannot read property 'vertical' of undefined` On some documents `CanvasGraphics.showText` can be run prior to `CanvasGraphics.setFont`, thus causing a `TypeError`. This quick hack allows such documents to work. Fixes modesty/pdf2json#244. --- base/display/canvas.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/display/canvas.js b/base/display/canvas.js index 59e7cab3..f0cf817b 100755 --- a/base/display/canvas.js +++ b/base/display/canvas.js @@ -1036,7 +1036,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { showText: function CanvasGraphics_showText(glyphs, skipTextSelection) { var ctx = this.ctx; var current = this.current; - var font = current.font; + var font = current.font || {}; var fontSize = current.fontSize; var fontSizeScale = current.fontSizeScale; var charSpacing = current.charSpacing; From 129904bb4878fe738ecc01a3d4e9e0773a0529e2 Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Sun, 3 Oct 2021 16:47:14 -0700 Subject: [PATCH 48/66] refactor lib/p2jcmd.js with es6 class --- lib/p2jcmd.js | 271 ++++++++++++++++++++++++++++---------------------- pdfparser.js | 30 +++--- 2 files changed, 167 insertions(+), 134 deletions(-) diff --git a/lib/p2jcmd.js b/lib/p2jcmd.js index 1044895a..4d344feb 100644 --- a/lib/p2jcmd.js +++ b/lib/p2jcmd.js @@ -11,7 +11,7 @@ const nodeUtil = require("util"), const _PRO_TIMER = `${pkInfo.name}@${pkInfo.version} [${pkInfo.homepage}]`; -let yargs = require('yargs') +const yargs = require('yargs') .usage("\n" + _PRO_TIMER + "\n\nUsage: $0 -f|--file [-o|output_dir]") .alias('v', 'version') .describe('v', 'Display version.\n') @@ -43,121 +43,133 @@ const PROCESS_FIELDS_CONTENT = _.has(argv, 't'); const PROCESS_MERGE_BROKEN_TEXT_BLOCKS = _.has(argv, 'm'); const PROCESS_WITH_STREAM = _.has(argv, 'r'); -let PDF2JSONUtil = (function () { +const INPUT_DIR_OR_FILE = argv.f; - let _continue = function(callback, err) { +class PDFProcessor { + inputDir = null; + inputFile = null; + inputPath = null; + + outputDir = null; + outputFile = null; + outputPath = null; + + pdfParser = null; + curCLI = null; + + // constructor + constructor(inputDir, inputFile, curCLI) { + // public, this instance copies + this.inputDir = path.normalize(inputDir); + this.inputFile = inputFile; + this.inputPath = path.join(this.inputDir, this.inputFile); + + this.outputDir = path.normalize(argv.o || inputDir); + this.outputFile = null; + this.outputPath = null; + + this.pdfParser = null; + this.curCLI = curCLI; + } + + //private methods + #continue(callback, err) { if (err) console.error(err); - if (nodeUtil.isFunction(callback)) + if (typeof callback === "function") callback(err); - }; - - let _onPdfParserError = function(evtData, callback) { - this.curProcessor.failedCount++; - _continue.call(this, callback, "Parse Exception: " + evtData.parserError); - }; + } + #onPdfParserError(evtData, callback) { + this.curCLI.addResultCount(evtData.parserError); + this.#continue(callback, evtData.parserError); + } - let _generateMergedTextBlocksStream = function(callback) { + #generateMergedTextBlocksStream(callback) { const outputStream = ParserStream.createOutputStream(this.outputPath.replace(".json", ".merged.json"), callback); this.pdfParser.getMergedTextBlocksStream().pipe(new StringifyStream()).pipe(outputStream); - }; + } - let _generateRawTextContentStream = function(callback) { + #generateRawTextContentStream(callback) { const outputStream = ParserStream.createOutputStream(this.outputPath.replace(".json", ".content.txt"), callback); this.pdfParser.getRawTextContentStream().pipe(outputStream); - }; + } - let _generateFieldsTypesStream = function(callback) { + #generateFieldsTypesStream(callback) { const outputStream = ParserStream.createOutputStream(this.outputPath.replace(".json", ".fields.json"), callback); this.pdfParser.getAllFieldsTypesStream().pipe(new StringifyStream()).pipe(outputStream); - }; + } - let _processAdditionalStreams = function(outputTasks, callback) { + #processAdditionalStreams(outputTasks, callback) { if (PROCESS_FIELDS_CONTENT) {//needs to generate fields.json file - outputTasks.push(cbFunc => _generateFieldsTypesStream.call(this, cbFunc)); + outputTasks.push(cbFunc => this.#generateFieldsTypesStream(cbFunc)); } if (PROCESS_RAW_TEXT_CONTENT) {//needs to generate content.txt file - outputTasks.push(cbFunc => _generateRawTextContentStream.call(this, cbFunc)); + outputTasks.push(cbFunc => this.#generateRawTextContentStream(cbFunc)); } if (PROCESS_MERGE_BROKEN_TEXT_BLOCKS) {//needs to generate json file with merged broken text blocks - outputTasks.push(cbFunc => _generateMergedTextBlocksStream.call(this, cbFunc)); + outputTasks.push(cbFunc => this.#generateMergedTextBlocksStream(cbFunc)); } if (outputTasks.length > 0) { - async.series(outputTasks, function (err, results) { + async.series(outputTasks, (err, results) => { if (err) { - console.error("Additional streams Error: " + err); + console.error("Additional streams Error: \n", err); } else { - console.log("Additional streams OK: \n", results); + console.log("Additional streams OK: \n", results.map(r => `✓ Success - ${r}`)); } - _continue.call(this, callback); + this.#continue(callback); }); } else { - _continue.call(this, callback); + this.#continue(callback); } - }; + } - let _onPrimarySuccess = function(callback) { - console.log("SUCCESS: [" + this.inputPath + "] => [" + this.outputPath + "]"); - this.curProcessor.successCount++; - _processAdditionalStreams.call(this, [], callback); - }; + #onPrimarySuccess(callback) { + this.curCLI.addResultCount(); + this.#processAdditionalStreams([], callback); + } - let _onPrimaryError = function(err, callback) { - nodeUtil.p2jerror("Output Exception: [" + this.inputPath + "] => [" + this.outputPath + "]: " + err); - this.curProcessor.failedCount++; + #onPrimaryError(err, callback) { + // nodeUtil.p2jerror("Output Exception: [" + this.inputPath + "] => [" + this.outputPath + "]: " + err); + this.curCLI.addResultCount(err); callback(err); - }; + } - let _parseOnePDFStream = function(callback) { + #parseOnePDFStream(callback) { this.pdfParser = new PDFParser(null, PROCESS_RAW_TEXT_CONTENT); - this.pdfParser.on("pdfParser_dataError", evtData => _onPdfParserError.call(this, evtData, callback)); + this.pdfParser.on("pdfParser_dataError", evtData => this.#onPdfParserError(evtData, callback)); const outputStream = fs.createWriteStream(this.outputPath); - outputStream.on('finish', () => _onPrimarySuccess.call(this, callback)); - outputStream.on('error', err => _onPrimaryError.call(this, callback)); + outputStream.on('finish', () => this.#onPrimarySuccess(callback)); + outputStream.on('error', err => this.#onPrimaryError(err, callback)); nodeUtil.p2jinfo("Transcoding Stream " + this.inputFile + " to - " + this.outputPath); let inputStream = fs.createReadStream(this.inputPath, {bufferSize: 64 * 1024}); inputStream.pipe(this.pdfParser.createParserStream()).pipe(new StringifyStream()).pipe(outputStream); }; - let _parseOnePDF = function(callback) { + #parseOnePDF(callback) { this.pdfParser = new PDFParser(null, PROCESS_RAW_TEXT_CONTENT); - this.pdfParser.on("pdfParser_dataError", evtData => _onPdfParserError.call(this, evtData, callback)); + this.pdfParser.on("pdfParser_dataError", evtData => this.#onPdfParserError(evtData, callback)); this.pdfParser.on("pdfParser_dataReady", evtData => { fs.writeFile(this.outputPath, JSON.stringify(evtData), err => { if(err) { - _onPrimaryError.call(this, callback); + this.#onPrimaryError(err, callback); } else { - _onPrimarySuccess.call(this, callback); + this.#onPrimarySuccess(callback); } }); }); nodeUtil.p2jinfo("Transcoding File " + this.inputFile + " to - " + this.outputPath); this.pdfParser.loadPDF(this.inputPath, VERBOSITY_LEVEL); - }; - - // constructor - let cls = function (inputDir, inputFile, curProcessor) { - // public, this instance copies - this.inputDir = path.normalize(inputDir); - this.inputFile = inputFile; - this.inputPath = path.join(this.inputDir, this.inputFile); - - this.outputDir = path.normalize(argv.o || inputDir); - this.outputFile = null; - this.outputPath = null; - - this.pdfParser = null; - this.curProcessor = curProcessor; - }; + } - cls.prototype.validateParams = function() { + //public methods + validateParams() { let retVal = null; if (!fs.existsSync(this.inputDir)) @@ -168,11 +180,11 @@ let PDF2JSONUtil = (function () { retVal = "Input error: output directory doesn't exist - " + this.outputDir + "."; if (retVal != null) { - this.curProcessor.failedCount += 1; + this.curCLI.addResultCount(retVal); return retVal; } - let inExtName = path.extname(this.inputFile).toLowerCase(); + const inExtName = path.extname(this.inputFile).toLowerCase(); if (inExtName !== '.pdf') retVal = "Input error: input file name doesn't have pdf extention - " + this.inputFile + "."; else { @@ -192,9 +204,9 @@ let PDF2JSONUtil = (function () { } return retVal; - }; + } - cls.prototype.destroy = function() { + destroy() { this.inputDir = null; this.inputFile = null; this.inputPath = null; @@ -205,37 +217,46 @@ let PDF2JSONUtil = (function () { this.pdfParser.destroy(); } this.pdfParser = null; - this.curProcessor = null; - }; + this.curCLI = null; + } - cls.prototype.processFile = function(callback) { + processFile(callback) { let validateMsg = this.validateParams(); if (!!validateMsg) { - _continue.call(this, callback, validateMsg); + this.#continue(callback, validateMsg); } else if (PROCESS_WITH_STREAM) { - _parseOnePDFStream.call(this, callback); + this.#parseOnePDFStream(callback); } else { - _parseOnePDF.call(this, callback); + this.#parseOnePDF(callback); } - }; + } - return cls; -})(); + getOutputFile = function() { + return path.join(this.outputDir, this.outputFile); + } +} + +class PDFCLI { + inputCount = 0; + successCount = 0; + failedCount = 0; + warningCount = 0; + statusMsgs = []; -let PDFProcessor = (function () { // constructor - let cls = function () { + constructor() { this.inputCount = 0; this.successCount = 0; this.failedCount = 0; this.warningCount = 0; + this.statusMsgs = []; this.p2j = null; - }; + } - cls.prototype.initialize = function(){ + initialize() { console.time(_PRO_TIMER); nodeUtil.verbosity(VERBOSITY_LEVEL); let retVal = true; @@ -259,9 +280,9 @@ let PDFProcessor = (function () { retVal = false; } return retVal; - }; + } - cls.prototype.start = function(){ + start() { if (!this.initialize()) { console.timeEnd(_PRO_TIMER); return; @@ -270,8 +291,7 @@ let PDFProcessor = (function () { try { console.log("\n" + _PRO_TIMER); - let inputStatus = fs.statSync(argv.f); - + const inputStatus = fs.statSync(INPUT_DIR_OR_FILE); if (inputStatus.isFile()) { this.processOneFile(); } @@ -283,38 +303,39 @@ let PDFProcessor = (function () { console.error("Exception: " + e.message); console.timeEnd(_PRO_TIMER); } - }; + } - cls.prototype.complete = function(statusMsg) { - if (statusMsg) - console.log(statusMsg); - console.log(`\n${this.inputCount} input files\t${this.successCount} success\t${this.failedCount} fail\t${this.warningCount} warning`); + complete() { + if (this.statusMsgs.length > 0) + console.log(this.statusMsgs); + console.log(`${this.inputCount} input files\t${this.successCount} success\t${this.failedCount} fail\t${this.warningCount} warning`); process.nextTick( () => { - console.timeEnd(_PRO_TIMER); - //let exitCode = (this.inputCount === this.successCount) ? 0 : 1; - process.exit(0); + console.timeEnd(_PRO_TIMER); + process.exit((this.inputCount === this.successCount) ? 0 : 1); }); - }; + } - cls.prototype.processOneFile = function () { - let inputDir = path.dirname(argv.f); - let inputFile = path.basename(argv.f); + processOneFile() { + const inputDir = path.dirname(INPUT_DIR_OR_FILE); + const inputFile = path.basename(INPUT_DIR_OR_FILE); this.inputCount = 1; - this.p2j = new PDF2JSONUtil(inputDir, inputFile, this); - this.p2j.processFile( data => this.complete(data) ); - }; - - cls.prototype.processFiles = function(inputDir, files) { - let fId = 0; - this.processStatusMsg = []; - this.p2j = new PDF2JSONUtil(inputDir, files[fId], this); + this.p2j = new PDFProcessor(inputDir, inputFile, this); + this.p2j.processFile( err => { + this.addStatusMsg(err, `${path.join(inputDir, inputFile)} => ${err ?? this.p2j.getOutputFile()}`); + this.complete(); + }); + } + + processFiles(inputDir, files) { + let fId = 0; + this.p2j = new PDFProcessor(inputDir, files[fId], this); this.p2j.processFile( function processPDFFile(err) { - this.processStatusMsg.push(err ? `✗ ${err} - ${files[fId]}` : `✓ Parse Success - ${files[fId]}`); + this.addStatusMsg(err, `${path.join(inputDir, files[fId])} => ${err ?? this.p2j.getOutputFile()}`); fId++; if (fId >= this.inputCount) { - this.complete(this.processStatusMsg); + this.complete(); } else { if (this.p2j) { @@ -322,31 +343,43 @@ let PDFProcessor = (function () { this.p2j = null; } - this.p2j = new PDF2JSONUtil(inputDir, files[fId], this); + this.p2j = new PDFProcessor(inputDir, files[fId], this); this.p2j.processFile(processPDFFile.bind(this)); } }.bind(this) ); - }; + } - cls.prototype.processOneDirectory = function () { - let inputDir = path.normalize(argv.f); + processOneDirectory() { + let inputDir = path.normalize(INPUT_DIR_OR_FILE); fs.readdir(inputDir, (err, files) => { - let _iChars = "!@#$%^&*()+=[]\\\';,/{}|\":<>?~`.-_ "; - let pdfFiles = files.filter( file => file.substr(-4).toLowerCase() === '.pdf' && _iChars.indexOf(file.substr(0,1)) < 0 ); - - this.inputCount = pdfFiles.length; - if (this.inputCount > 0) { - this.processFiles(inputDir, pdfFiles); + if (err) { + this.addStatusMsg(true, `[${inputDir}] - ${err.toString()}`); + this.complete(); } else { - console.log("No PDF files found. [" + inputDir + "]."); - this.complete(null); + const _iChars = "!@#$%^&*()+=[]\\\';,/{}|\":<>?~`.-_ "; + const pdfFiles = files.filter( file => file.substr(-4).toLowerCase() === '.pdf' && _iChars.indexOf(file.substr(0,1)) < 0 ); + + this.inputCount = pdfFiles.length; + if (this.inputCount > 0) { + this.processFiles(inputDir, pdfFiles); + } + else { + this.addStatusMsg(true, `[${inputDir}] - No PDF files found`); + this.complete(); + } } }); - }; + } + + addStatusMsg(error, oneMsg) { + this.statusMsgs.push(error ? `✗ Error - ${oneMsg}` : `✓ Success - ${oneMsg}`); + } - return cls; -})(); + addResultCount(error) { + (error ? this.failedCount++ : this.successCount++); + } +} -module.exports = PDFProcessor; +module.exports = PDFCLI; diff --git a/pdfparser.js b/pdfparser.js index 5f4e8d7d..7463abe6 100644 --- a/pdfparser.js +++ b/pdfparser.js @@ -9,12 +9,12 @@ const fs = require("fs"), class PDFParser extends EventEmitter { // inherit from event emitter //private static - static #_nextId = 0; - static #_maxBinBufferCount = 10; - static #_binBuffer = {}; + static #nextId = 0; + static #maxBinBufferCount = 10; + static #binBuffer = {}; //private - #_id = 0; + #id = 0; #password = ""; #context = null; // service context object, only used in Web Service project; null in command line @@ -32,7 +32,7 @@ class PDFParser extends EventEmitter { // inherit from event emitter super(); // private - this.#_id = PDFParser.#_nextId++; + this.#id = PDFParser.#nextId++; // service context object, only used in Web Service project; null in command line this.#context = context; @@ -49,8 +49,8 @@ class PDFParser extends EventEmitter { // inherit from event emitter this.#password = password; } - get id() { return this.#_id; } - get name() { return `${PDFParser.name}_${this.#_id}`; } + get id() { return this.#id; } + get name() { return `${PDFParser.name}_${this.#id}`; } get data() { return this.#data; } get binBufferKey() { return this.#pdfFilePath + this.#pdfFileMTime; } @@ -77,21 +77,21 @@ class PDFParser extends EventEmitter { // inherit from event emitter this.#PDFJS.on("pdfjs_parseDataReady", this.#onPDFJSParseDataReady.bind(this)); this.#PDFJS.on("pdfjs_parseDataError", this.#onPDFJSParserDataError.bind(this)); - this.#PDFJS.parsePDFData(buffer || PDFParser.#_binBuffer[this.binBufferKey], this.#password); + this.#PDFJS.parsePDFData(buffer || PDFParser.#binBuffer[this.binBufferKey], this.#password); } #processBinaryCache() { - if (_.has(PDFParser.#_binBuffer, this.binBufferKey)) { + if (_.has(PDFParser.#binBuffer, this.binBufferKey)) { this.#startParsingPDF(); return true; } - const allKeys = _.keys(PDFParser.#_binBuffer); - if (allKeys.length > PDFParser.#_maxBinBufferCount) { - const idx = this.id % PDFParser.#_maxBinBufferCount; + const allKeys = _.keys(PDFParser.#binBuffer); + if (allKeys.length > PDFParser.#maxBinBufferCount) { + const idx = this.id % PDFParser.#maxBinBufferCount; const key = allKeys[idx]; - PDFParser.#_binBuffer[key] = null; - delete PDFParser.#_binBuffer[key]; + PDFParser.#binBuffer[key] = null; + delete PDFParser.#binBuffer[key]; nodeUtil.p2jinfo("re-cycled cache for " + key); } @@ -106,7 +106,7 @@ class PDFParser extends EventEmitter { // inherit from event emitter this.emit("pdfParser_dataError", err); } else { - PDFParser.#_binBuffer[this.binBufferKey] = data; + PDFParser.#binBuffer[this.binBufferKey] = data; this.#startParsingPDF(); } }; From 3247e747f762c41a4d7709d81cc7db2640681004 Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Sat, 9 Oct 2021 10:03:03 -0700 Subject: [PATCH 49/66] refactor pdf.js with es6 class --- lib/p2jcmd.js | 9 +- lib/pdf.js | 252 ++++++++++++++++++++++------------------------ package-lock.json | 7 +- package.json | 2 +- 4 files changed, 127 insertions(+), 143 deletions(-) diff --git a/lib/p2jcmd.js b/lib/p2jcmd.js index 4d344feb..855962fe 100644 --- a/lib/p2jcmd.js +++ b/lib/p2jcmd.js @@ -74,8 +74,6 @@ class PDFProcessor { //private methods #continue(callback, err) { - if (err) - console.error(err); if (typeof callback === "function") callback(err); } @@ -112,11 +110,11 @@ class PDFProcessor { } if (outputTasks.length > 0) { - async.series(outputTasks, (err, results) => { + async.series(outputTasks, (err, results) => {//additional streams process complete if (err) { - console.error("Additional streams Error: \n", err); + this.curCLI.addStatusMsg(err, `[+]=> ${err}`); } else { - console.log("Additional streams OK: \n", results.map(r => `✓ Success - ${r}`)); + results.forEach( r => this.curCLI.addStatusMsg(null, `[+]=> ${r}`)); } this.#continue(callback); }); @@ -132,7 +130,6 @@ class PDFProcessor { } #onPrimaryError(err, callback) { - // nodeUtil.p2jerror("Output Exception: [" + this.inputPath + "] => [" + this.outputPath + "]: " + err); this.curCLI.addResultCount(err); callback(err); } diff --git a/lib/pdf.js b/lib/pdf.js index e213b178..ccb4697e 100644 --- a/lib/pdf.js +++ b/lib/pdf.js @@ -1,7 +1,5 @@ -'use strict'; - -let nodeUtil = require("util"), - nodeEvents = require("events"), +const nodeUtil = require("util"), + {EventEmitter} = require("events"), fs = require('fs'), _ = require('lodash'), DOMParser = require('@xmldom/xmldom').DOMParser, @@ -64,36 +62,41 @@ _pdfjsFiles.forEach( (fieldName, idx, arr) => _fileContent += fs.readFileSync(_b eval(_fileContent); ////////////////////////////////start of helper classes -let PDFPageParser = (function () { - // private static - let _nextId = 1; - let _name = 'PDFPageParser'; - - let RenderingStates = { - INITIAL: 0, - RUNNING: 1, - PAUSED: 2, - FINISHED: 3 +class PDFPageParser{ + //static + static RenderingStates = { + INITIAL: 0, + RUNNING: 1, + PAUSED: 2, + FINISHED: 3 }; - - let _addField = function(field) { - if (!PDFField.isFormElement(field)) + + // private + #width = 0; + #height = 0; + + //public + id = -1; + pdfPage = null; + ptiParser = null; + scale = 0; + viewport = null; + renderingState = -1; + Fields = null; + Boxsets = null; + + #_addField (field) { + if (!PDFField.isFormElement(field)) { + nodeUtil.p2jwarn("NOT valid form element", field); return; + } - let oneField = new PDFField(field, this.viewport, this.Fields, this.Boxsets); + const oneField = new PDFField(field, this.viewport, this.Fields, this.Boxsets); oneField.processField(); - }; + } // constructor - let cls = function (pdfPage, id, scale, ptiParser) { - nodeEvents.EventEmitter.call(this); - // private - let _id = _nextId++; - - // public (every instance will have their own copy of these methods, needs to be lightweight) - this.get_id = () => _id; - this.get_name = () => _name + _id; - + constructor(pdfPage, id, scale, ptiParser) { // public, this instance copies this.id = id; this.pdfPage = pdfPage; @@ -104,66 +107,58 @@ let PDFPageParser = (function () { //leave out the 2nd parameter in order to use page's default rotation (for both portrait and landscape form) this.viewport = this.pdfPage.getViewport(this.scale); - this.renderingState = RenderingStates.INITIAL; + this.renderingState = PDFPageParser.RenderingStates.INITIAL; //form elements other than radio buttons and check boxes this.Fields = []; //form elements: radio buttons and check boxes this.Boxsets = []; + } + + get width() { + return PDFUnit.toFormX(this.viewport.width); + } - //public properties - Object.defineProperty(this, 'width', { - get:function () { - return PDFUnit.toFormX(this.viewport.width); - }, - enumerable:true - }); - - Object.defineProperty(this, 'height', { - get:function () { - return PDFUnit.toFormY(this.viewport.height); - }, - enumerable:true - }); - }; - // inherit from event emitter - nodeUtil.inherits(cls, nodeEvents.EventEmitter); + get height() { + return PDFUnit.toFormY(this.viewport.height); + } - cls.prototype.destroy = function() { + destroy() { this.pdfPage.destroy(); this.pdfPage = null; this.ptiParser = null; this.Fields = null; this.Boxsets = null; - }; + } - cls.prototype.getPagePoint = function(x, y) { + getPagePoint(x, y) { return this.viewport.convertToPdfPoint(x, y); - }; + } - cls.prototype.parsePage = function(callback, errorCallBack) { - if (this.renderingState !== RenderingStates.INITIAL) - error('Must be in new state before drawing'); + parsePage(callback, errorCallBack) { + if (this.renderingState !== PDFPageParser.RenderingStates.INITIAL) { + errorCallBack('Must be in new state before drawing'); + return; + } - this.renderingState = RenderingStates.RUNNING; + this.renderingState = PDFPageParser.RenderingStates.RUNNING; - let canvas = createScratchCanvas(1, 1); - let ctx = canvas.getContext('2d'); + const canvas = createScratchCanvas(1, 1); + const ctx = canvas.getContext('2d'); function pageViewDrawCallback(error) { - this.renderingState = RenderingStates.FINISHED; + this.renderingState = PDFPageParser.RenderingStates.FINISHED; if (error) { - let errMsg = 'An error occurred while rendering the page ' + (this.id + 1) + + errorCallBack('An error occurred while rendering the page ' + (this.id + 1) + ':\n' + error.message + - ':\n' + error.stack; - errorCallBack(errMsg); + ':\n' + error.stack ); } else { if (this.ptiParser) { - let extraFields = this.ptiParser.getFields(parseInt(this.id) + 1); - _.each(extraFields, _.bind(_addField, this)); + const extraFields = this.ptiParser.getFields(parseInt(this.id) + 1); + _.each(extraFields, _.bind(this.#_addField, this)); } _.extend(this, ctx.canvas); @@ -174,7 +169,7 @@ let PDFPageParser = (function () { } } - let renderContext = { + const renderContext = { canvasContext:ctx, viewport:this.viewport }; @@ -183,51 +178,44 @@ let PDFPageParser = (function () { data => { this.pdfPage.getAnnotations().then( fields => { - _.each(fields, _.bind(_addField, this)); + _.each(fields, _.bind(this.#_addField, this)); pageViewDrawCallback.call(this, null); }, - err => console.error("pdfPage.getAnnotations error:" + err)); + err => errorCallBack("pdfPage.getAnnotations error:" + err)); }, err => pageViewDrawCallback.call(this, err) ); - }; - - return cls; - -})(); + } +} ////////////////////////////////Start of Node.js Module -let PDFJSClass = (function () { - // private static - let _nextId = 1; - let _name = 'PDFJSClass'; - let _sufInfo = "_fieldInfo.xml"; - - let _getMetaDataString = function(metadata, key){ +class PDFJSClass extends EventEmitter { + static _getMetaDataString(metadata, key){ let retVal = "unknown"; if (metadata && metadata.has(key)) { retVal = encodeURIComponent(metadata.get(key)); } return retVal; - }; + } - let _getMetaDataInt = function(metadata, key){ - let retVal = _getMetaDataString(metadata, key); + static _getMetaDataInt(metadata, key){ + let retVal = PDFJSClass._getMetaDataString(metadata, key); retVal = parseInt(retVal); if (retVal == null || isNaN(retVal)) retVal = -1; return retVal; - }; + } - // constructor - let cls = function (needRawText) { - nodeEvents.EventEmitter.call(this); - // private - let _id = _nextId++; + pdfDocument = null; + pages = null; + pageWidth = 0; + rawTextContents = null; - // public (every instance will have their own copy of these methods, needs to be lightweight) - this.get_id = () => _id; - this.get_name = () => _name + _id; + needRawText = null; + + // constructor + constructor(needRawText) { + super(); // public, this instance copies this.pdfDocument = null; @@ -236,34 +224,33 @@ let PDFJSClass = (function () { this.rawTextContents = []; this.needRawText = needRawText; - }; - // inherit from event emitter - nodeUtil.inherits(cls, nodeEvents.EventEmitter); + } - cls.prototype.raiseErrorEvent = function(errMsg) { + raiseErrorEvent(errMsg) { console.error(errMsg); process.nextTick( () => this.emit("pdfjs_parseDataError", errMsg)); return errMsg; - }; + } - cls.prototype.raiseReadyEvent = function(data) { + raiseReadyEvent(data) { process.nextTick( () => this.emit("pdfjs_parseDataReady", data) ); return data; - }; + } - cls.prototype.parsePDFData = function(arrayBuffer, password) { + parsePDFData(arrayBuffer, password) { this.pdfDocument = null; - let parameters = {password: password, data: arrayBuffer}; + const parameters = {password: password, data: arrayBuffer}; PDFJS.getDocument(parameters).then( pdfDocument => this.load(pdfDocument, 1), error => this.raiseErrorEvent("An error occurred while parsing the PDF: " + error) ); }; - cls.prototype.tryLoadFieldInfoXML = function(pdfFilePath) { - let fieldInfoXMLPath = pdfFilePath.replace(".pdf", _sufInfo); + tryLoadFieldInfoXML(pdfFilePath) { + const _sufInfo = "_fieldInfo.xml"; + const fieldInfoXMLPath = pdfFilePath.replace(".pdf", _sufInfo); if ((fieldInfoXMLPath.indexOf(_sufInfo) < 1) || (!fs.existsSync(fieldInfoXMLPath))) { return; } @@ -280,9 +267,9 @@ let PDFJSClass = (function () { nodeUtil.p2jinfo("fieldInfo XML loaded."); } }); - }; + } - cls.prototype.load = function(pdfDocument, scale) { + load(pdfDocument, scale) { this.pdfDocument = pdfDocument; return this.loadMetaData().then( @@ -291,7 +278,7 @@ let PDFJSClass = (function () { ); }; - cls.prototype.loadMetaData = function() { + loadMetaData() { return this.pdfDocument.getMetadata().then( data => { this.documentInfo = data.info; @@ -300,11 +287,11 @@ let PDFJSClass = (function () { }, error => this.raiseErrorEvent("pdfDocument.getMetadata error: " + error) ); - }; + } - cls.prototype.parseMetaData = function() { - let info = this.documentInfo; - let metadata = this.metadata; + parseMetaData() { + const info = this.documentInfo; + const metadata = this.metadata; let pdfTile = ""; if (metadata && metadata.has('dc:title')) { @@ -313,28 +300,28 @@ let PDFJSClass = (function () { else if (info && info['Title']) pdfTile = info['Title']; - let formAttr = {AgencyId:"", Name: "", MC: false, Max: 1, Parent:""}; + const formAttr = {AgencyId:"", Name: "", MC: false, Max: 1, Parent:""}; if (metadata) { - formAttr.AgencyId = _getMetaDataString(metadata, 'pdfx:agencyid'); + formAttr.AgencyId = PDFJSClass._getMetaDataString(metadata, 'pdfx:agencyid'); if (formAttr.AgencyId != "unknown") pdfTile = formAttr.AgencyId; - formAttr.Name = _getMetaDataString(metadata, 'pdfx:name'); - formAttr.MC = _getMetaDataString(metadata, 'pdfx:mc') === 'true'; - formAttr.Max = _getMetaDataInt(metadata, 'pdfx:max'); - formAttr.Parent = _getMetaDataInt(metadata, 'pdfx:parent'); + formAttr.Name = PDFJSClass._getMetaDataString(metadata, 'pdfx:name'); + formAttr.MC = PDFJSClass._getMetaDataString(metadata, 'pdfx:mc') === 'true'; + formAttr.Max = PDFJSClass._getMetaDataInt(metadata, 'pdfx:max'); + formAttr.Parent = PDFJSClass._getMetaDataInt(metadata, 'pdfx:parent'); } this.raiseReadyEvent({Transcoder: _PARSER_SIG, Agency:pdfTile, Id: formAttr}); - }; + } - cls.prototype.loadPages = function() { - let pagesCount = this.pdfDocument.numPages; - let pagePromises = []; + loadPages() { + const pagesCount = this.pdfDocument.numPages; + const pagePromises = []; for (let i = 1; i <= pagesCount; i++) pagePromises.push(this.pdfDocument.getPage(i)); - let pagesPromise = PDFJS.Promise.all(pagePromises); + const pagesPromise = PDFJS.Promise.all(pagePromises); nodeUtil.p2jinfo("PDF loaded. pagesCount = " + pagesCount); @@ -344,11 +331,11 @@ let PDFJSClass = (function () { ); }; - cls.prototype.parsePage = function(promisedPages, id, scale) { + parsePage(promisedPages, id, scale) { nodeUtil.p2jinfo("start to parse page:" + (id+1)); - let pdfPage = promisedPages[id]; - let pageParser = new PDFPageParser(pdfPage, id, scale, this.ptiParser); + const pdfPage = promisedPages[id]; + const pageParser = new PDFPageParser(pdfPage, id, scale, this.ptiParser); function continueOnNextPage() { nodeUtil.p2jinfo("complete parsing page:" + (id+1)); @@ -368,7 +355,7 @@ let PDFJSClass = (function () { if (!this.pageWidth) //get PDF width this.pageWidth = pageParser.width; - let page = {Height: pageParser.height, + const page = {Height: pageParser.height, HLines: pageParser.HLines, VLines: pageParser.VLines, Fills:pageParser.Fills, @@ -397,9 +384,9 @@ let PDFJSClass = (function () { }, errMsg => this.raiseErrorEvent("parsePage error:" + errMsg) ); - }; + } - cls.prototype.getRawTextContent = function() { + getRawTextContent() { let retVal = ""; if (!this.needRawText) return retVal; @@ -428,13 +415,13 @@ let PDFJSClass = (function () { }); return retVal; - }; + } - cls.prototype.getAllFieldsTypes = function() { + getAllFieldsTypes() { return PDFField.getAllFieldsTypes({Pages:this.pages || [], Width: this.pageWidth}); - }; + } - cls.prototype.getMergedTextBlocksIfNeeded = function() { + getMergedTextBlocksIfNeeded() { for (let p = 0; p < this.pages.length; p++) { let prevText = null; let page = this.pages[p]; @@ -477,9 +464,9 @@ let PDFJSClass = (function () { } return {Pages:this.pages, Width: this.pageWidth}; - }; + } - cls.prototype.destroy = function() { + destroy() { this.removeAllListeners(); if (this.pdfDocument) @@ -488,10 +475,9 @@ let PDFJSClass = (function () { this.pages = null; this.rawTextContents = null; - }; + } - return cls; -})(); +} module.exports = PDFJSClass; diff --git a/package-lock.json b/package-lock.json index 6b1f542b..f1b8e81a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "pdf2json", - "version": "1.2.5", + "version": "1.3.0", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "pdf2json", - "version": "1.2.5", + "version": "1.3.0", "bundleDependencies": [ "async", "lodash", @@ -25,7 +25,8 @@ }, "devDependencies": {}, "engines": { - "node": ">=12.20.0" + "node": ">=14.18.0", + "npm": "~6.14.15" } }, "node_modules/@xmldom/xmldom": { diff --git a/package.json b/package.json index b4057c59..8572f532 100644 --- a/package.json +++ b/package.json @@ -34,7 +34,7 @@ "parse-t": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t", "parse-c": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c", "parse-m": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c -m", - "parse-r": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form -o ./test/target/fd/form -s -t -c -m -r" + "parse-r": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form -o ./test/target/fd/form -t -c -m -r" }, "engines": { "node": ">=14.18.0", From 84893cfe24e05ce22c79f43685936627a2623a24 Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Sat, 9 Oct 2021 16:06:48 -0700 Subject: [PATCH 50/66] include full metadata in parsed data --- lib/pdf.js | 42 ++---------------------------------------- pdfparser.js | 8 ++++---- 2 files changed, 6 insertions(+), 44 deletions(-) diff --git a/lib/pdf.js b/lib/pdf.js index ccb4697e..9ec66985 100644 --- a/lib/pdf.js +++ b/lib/pdf.js @@ -190,22 +190,6 @@ class PDFPageParser{ ////////////////////////////////Start of Node.js Module class PDFJSClass extends EventEmitter { - static _getMetaDataString(metadata, key){ - let retVal = "unknown"; - if (metadata && metadata.has(key)) { - retVal = encodeURIComponent(metadata.get(key)); - } - return retVal; - } - - static _getMetaDataInt(metadata, key){ - let retVal = PDFJSClass._getMetaDataString(metadata, key); - retVal = parseInt(retVal); - if (retVal == null || isNaN(retVal)) - retVal = -1; - return retVal; - } - pdfDocument = null; pages = null; pageWidth = 0; @@ -282,7 +266,7 @@ class PDFJSClass extends EventEmitter { return this.pdfDocument.getMetadata().then( data => { this.documentInfo = data.info; - this.metadata = data.metadata; + this.metadata = data.metadata.metadata; this.parseMetaData(); }, error => this.raiseErrorEvent("pdfDocument.getMetadata error: " + error) @@ -290,29 +274,7 @@ class PDFJSClass extends EventEmitter { } parseMetaData() { - const info = this.documentInfo; - const metadata = this.metadata; - - let pdfTile = ""; - if (metadata && metadata.has('dc:title')) { - pdfTile = metadata.get('dc:title'); - } - else if (info && info['Title']) - pdfTile = info['Title']; - - const formAttr = {AgencyId:"", Name: "", MC: false, Max: 1, Parent:""}; - if (metadata) { - formAttr.AgencyId = PDFJSClass._getMetaDataString(metadata, 'pdfx:agencyid'); - if (formAttr.AgencyId != "unknown") - pdfTile = formAttr.AgencyId; - - formAttr.Name = PDFJSClass._getMetaDataString(metadata, 'pdfx:name'); - formAttr.MC = PDFJSClass._getMetaDataString(metadata, 'pdfx:mc') === 'true'; - formAttr.Max = PDFJSClass._getMetaDataInt(metadata, 'pdfx:max'); - formAttr.Parent = PDFJSClass._getMetaDataInt(metadata, 'pdfx:parent'); - } - - this.raiseReadyEvent({Transcoder: _PARSER_SIG, Agency:pdfTile, Id: formAttr}); + this.raiseReadyEvent({Transcoder: _PARSER_SIG, Meta: {...this.documentInfo, Metadata: this.metadata}}); } loadPages() { diff --git a/pdfparser.js b/pdfparser.js index 7463abe6..db426196 100644 --- a/pdfparser.js +++ b/pdfparser.js @@ -24,7 +24,7 @@ class PDFParser extends EventEmitter { // inherit from event emitter #pdfFileMTime = null; // last time the current pdf was modified, used to recognize changes and ignore cache #data = null; //if file read success, data is PDF content; if failed, data is "err" object #PDFJS = null; //will be initialized in constructor - #processFieldInfoXML = false;//disable additional _fieldInfo.xml parsing and merging + #processFieldInfoXML = false;//disable additional _fieldInfo.xml parsing and merging (do NOT set to true) // constructor constructor(context, needRawText, password) { @@ -43,7 +43,7 @@ class PDFParser extends EventEmitter { // inherit from event emitter this.#pdfFilePath = null; //current PDF file to load and parse, null means loading/parsing not started this.#pdfFileMTime = null; // last time the current pdf was modified, used to recognize changes and ignore cache this.#data = null; //if file read success, data is PDF content; if failed, data is "err" object - this.#processFieldInfoXML = false;//disable additional _fieldInfo.xml parsing and merging + this.#processFieldInfoXML = false;//disable additional _fieldInfo.xml parsing and merging (do NOT set to true) this.#PDFJS = new PDFJS(needRawText); this.#password = password; @@ -62,7 +62,7 @@ class PDFParser extends EventEmitter { // inherit from event emitter this.emit("pdfParser_dataReady", output); } else { - this.#data = {...this.#data, data}; + this.#data = {...this.#data, ...data}; } } @@ -158,7 +158,7 @@ class PDFParser extends EventEmitter { // inherit from event emitter this.#pdfFilePath = null; this.#pdfFileMTime = null; this.#data = null; - this.#processFieldInfoXML = false;//disable additional _fieldInfo.xml parsing and merging + this.#processFieldInfoXML = false;//disable additional _fieldInfo.xml parsing and merging (do NOT set to true) this.#PDFJS.destroy(); this.#PDFJS = null; From 18d2d7a8741e36711abebb297274d1d7781df763 Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Sat, 9 Oct 2021 19:48:05 -0700 Subject: [PATCH 51/66] add Readable stream like events, like readable, data, error to PDF.js --- lib/pdf.js | 16 +++++++++++++--- pdfparser.js | 8 ++++++-- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/lib/pdf.js b/lib/pdf.js index 9ec66985..22fd8623 100644 --- a/lib/pdf.js +++ b/lib/pdf.js @@ -62,7 +62,7 @@ _pdfjsFiles.forEach( (fieldName, idx, arr) => _fileContent += fs.readFileSync(_b eval(_fileContent); ////////////////////////////////start of helper classes -class PDFPageParser{ +class PDFPageParser { //static static RenderingStates = { INITIAL: 0, @@ -213,6 +213,8 @@ class PDFJSClass extends EventEmitter { raiseErrorEvent(errMsg) { console.error(errMsg); process.nextTick( () => this.emit("pdfjs_parseDataError", errMsg)); + this.emit("error", errMsg); + this.emit('close'); return errMsg; } @@ -274,7 +276,9 @@ class PDFJSClass extends EventEmitter { } parseMetaData() { - this.raiseReadyEvent({Transcoder: _PARSER_SIG, Meta: {...this.documentInfo, Metadata: this.metadata}}); + const meta = {Transcoder: _PARSER_SIG, Meta: {...this.documentInfo, Metadata: this.metadata}}; + this.raiseReadyEvent(meta); + this.emit("readable", meta); } loadPages() { @@ -303,6 +307,9 @@ class PDFJSClass extends EventEmitter { nodeUtil.p2jinfo("complete parsing page:" + (id+1)); if (id === (this.pdfDocument.numPages - 1) ) { this.raiseReadyEvent({Pages:this.pages, Width: this.pageWidth}); + this.emit("data", {Width: this.pageWidth}); + this.emit("data", null); + this.emit("end", true); //v1.1.2: signal end of parsed data with null process.nextTick(() => this.raiseReadyEvent(null)); @@ -317,7 +324,9 @@ class PDFJSClass extends EventEmitter { if (!this.pageWidth) //get PDF width this.pageWidth = pageParser.width; - const page = {Height: pageParser.height, + const page = { + Width: pageParser.width, + Height: pageParser.height, HLines: pageParser.HLines, VLines: pageParser.VLines, Fills:pageParser.Fills, @@ -329,6 +338,7 @@ class PDFJSClass extends EventEmitter { }; this.pages.push(page); + this.emit("data", page); if (this.needRawText) { pdfPage.getTextContent().then( diff --git a/pdfparser.js b/pdfparser.js index db426196..7b2a63d2 100644 --- a/pdfparser.js +++ b/pdfparser.js @@ -77,6 +77,10 @@ class PDFParser extends EventEmitter { // inherit from event emitter this.#PDFJS.on("pdfjs_parseDataReady", this.#onPDFJSParseDataReady.bind(this)); this.#PDFJS.on("pdfjs_parseDataError", this.#onPDFJSParserDataError.bind(this)); + // this.#PDFJS.on("readable", meta => console.log("readable", meta)); + // this.#PDFJS.on("data", data => console.log("data", data)); + // this.#PDFJS.on("error", err => console.log("error", err)); + this.#PDFJS.parsePDFData(buffer || PDFParser.#binBuffer[this.binBufferKey], this.#password); } @@ -130,12 +134,12 @@ class PDFParser extends EventEmitter { // inherit from event emitter return; this.#fq.push({path: pdfFilePath}, this.#processPDFContent.bind(this)); - }; + } // Introduce a way to directly process buffers without the need to write it to a temporary file parseBuffer(pdfBuffer) { this.#startParsingPDF(pdfBuffer); - }; + } getRawTextContent() { return this.#PDFJS.getRawTextContent(); } getRawTextContentStream() { return ParserStream.createContentStream(this.getRawTextContent()); } From fd29bb3616fe797d9c78341538fe820221e07e29 Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Sat, 9 Oct 2021 20:15:29 -0700 Subject: [PATCH 52/66] update README for major refactoring v1.3.0 --- readme.md | 44 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/readme.md b/readme.md index bc27d3cd..f2d263e9 100644 --- a/readme.md +++ b/readme.md @@ -136,9 +136,9 @@ returns an array of field objects. Current parsed data has four main sub objects to describe the PDF document. -* 'Agency': the main text identifier for the PDF document. If Id.AgencyId present, it'll be same, otherwise it'll be set as document title; * 'Transcoder': pdf2json version number -* 'Id': the XML meta data that embedded in PDF document +* 'Agency': the main text identifier for the PDF document. If Id.AgencyId present, it'll be same, otherwise it'll be set as document title; (_deprecated since v1.3.0, see notes below_) +* 'Id': the XML meta data that embedded in PDF document (_deprecated since v1.3.0, see notes below_) * all forms attributes metadata are defined in "Custom" tab of "Document Properties" dialog in Acrobat Pro; * v0.1.22 added support for the following custom properties: * AgencyId: default "unknown"; @@ -146,9 +146,38 @@ Current parsed data has four main sub objects to describe the PDF document. * MC: default false; * Max: default -1; * Parent: parent name, default "unknown"; + * *_v1.3.0_*: 'Agency' and 'Id' are replaced with full metadata, example: for `./test/pdf/fd/form/F1040.pdf`, full metadata is: + ````json + Meta: { + PDFFormatVersion: '1.7', + IsAcroFormPresent: true, + IsXFAPresent: false, + Author: 'SE:W:CAR:MP', + Subject: 'U.S. Individual Income Tax Return', + Creator: 'Adobe Acrobat Pro 10.1.8', + Producer: 'Adobe Acrobat Pro 10.1.8', + CreationDate: "D:20131203133943-08'00'", + ModDate: "D:20140131180702-08'00'", + Metadata: { + 'xmp:modifydate': '2014-01-31T18:07:02-08:00', + 'xmp:createdate': '2013-12-03T13:39:43-08:00', + 'xmp:metadatadate': '2014-01-31T18:07:02-08:00', + 'xmp:creatortool': 'Adobe Acrobat Pro 10.1.8', + 'dc:format': 'application/pdf', + 'dc:description': 'U.S. Individual Income Tax Return', + 'dc:creator': 'SE:W:CAR:MP', + 'xmpmm:documentid': 'uuid:4d81e082-7ef2-4df7-b07b-8190e5d3eadf', + 'xmpmm:instanceid': 'uuid:7ea96d1c-3d2f-284a-a469-f0f284a093de', + 'pdf:producer': 'Adobe Acrobat Pro 10.1.8', + 'adhocwf:state': '1', + 'adhocwf:version': '1.1' + } + } + ```` * 'Pages': array of 'Page' object that describes each page in the PDF, including sizes, lines, fills and texts within the page. More info about 'Page' object can be found at 'Page Object Reference' section * 'Width': the PDF page width in page unit + ### Page object Reference Each page object within 'Pages' array describes page elements and attributes with 5 main fields: @@ -789,6 +818,17 @@ In order to support this auto merging capability, text block objects have an add * event "pdfParser_dataReady": {"formImage": parseOutput} * v1.0.8 fixed [issue 27](https://github.com/modesty/pdf2json/issues/27), it converts x coordinate with the same ratio as y, which is 24 (96/4), rather than 8.7 (96/11), please adjust client renderer accordingly when position all elements' x coordinate. +* v1.3.0: output data field, `Agency` and `Id` are replaced with `Meta`, JSON of the PDF's full metadata. (See above for details) + +**Major Refactoring** +* v1.3.0 has the major refactoring since 2015. Primary updates including: + * Full PDF metadata support (see page format and breaking changes for details) + * Better Stream support with test _`npm run parse-r`_, plus Readable Stream like events are added to PDF.js, including _`readable`_, _`data`_, _`end`_, `error` that can be optional replacement for customed events (_`pdfjs_parseDataReady`_, and _`pdfjs_parseDataError`_) for granular data chunk flow control, like _`readable`_ with Meta, _`data`_ sequence for each PDF page result, rather than _`pdfjs_parseDataReady`_ combines all pages in one shot + * Better performace, near ~20% improvements with PDFs under _test_ directory + * Better exception handling, fixes a few uncaught exception errors + * More test coverage, 4 more test scripts added, see _package.json_ for details + * Refactor to ES6 class for major entry modules + * Upgrade to Node v14.18.0 LTSs ### Install on Ubuntu From 5c08c38f622fb5ea5f5461ef43a46142ee779a59 Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Sun, 10 Oct 2021 11:42:38 -0700 Subject: [PATCH 53/66] 1. remove formImage and Width; 2. add the use new stream events, readable/data 3. add page level stream control in ParserStream --- lib/parserstream.js | 23 ++++++++++++++++++----- lib/pdf.js | 31 +++++++++---------------------- pdfparser.js | 17 +++++++++-------- readme.md | 11 ++++++----- 4 files changed, 42 insertions(+), 40 deletions(-) diff --git a/lib/parserstream.js b/lib/parserstream.js index eb6f589b..f54ef621 100644 --- a/lib/parserstream.js +++ b/lib/parserstream.js @@ -23,12 +23,29 @@ class ParserStream extends Transform { #pdfParser = null; #chunks = []; + #parsedData = {Pages:[]}; + #_flush_callback = null; constructor(pdfParser, options) { super(options); this.#pdfParser = pdfParser; this.#chunks = []; + + // this.#pdfParser.on("pdfParser_dataReady", evtData => { + // this.push(evtData); + // this.#_flush_callback(); + // this.emit('end', null); + // }); + this.#pdfParser.on("readable", meta => this.#parsedData = {...meta, Pages:[]}); + this.#pdfParser.on("data", page => { + if (!page) { + this.push(this.#parsedData); + this.#_flush_callback(); + } + else + this.#parsedData.Pages.push(page); + }); } //implements transform stream @@ -38,11 +55,7 @@ class ParserStream extends Transform { } _flush(callback) { - this.#pdfParser.on("pdfParser_dataReady", evtData => { - this.push(evtData); - callback(); - this.emit('end', null); - }); + this.#_flush_callback = callback; this.#pdfParser.parseBuffer(Buffer.concat(this.#chunks)); } diff --git a/lib/pdf.js b/lib/pdf.js index 22fd8623..409a513c 100644 --- a/lib/pdf.js +++ b/lib/pdf.js @@ -51,10 +51,10 @@ const _PARSER_SIG = `${pkInfo.name}@${pkInfo.version} [${pkInfo.homepage}]`; //////replacing HTML5 canvas with PDFCanvas (in-memory canvas) function createScratchCanvas(width, height) { return new PDFCanvas({}, width, height); } -let PDFJS = {}; -let globalScope = {console: console}; +const PDFJS = {}; +const globalScope = {console}; -let _basePath = __dirname + "/../base/"; +const _basePath = __dirname + "/../base/"; let _fileContent = ''; _pdfjsFiles.forEach( (fieldName, idx, arr) => _fileContent += fs.readFileSync(_basePath + fieldName, 'utf8') ); @@ -71,10 +71,6 @@ class PDFPageParser { FINISHED: 3 }; - // private - #width = 0; - #height = 0; - //public id = -1; pdfPage = null; @@ -192,7 +188,6 @@ class PDFPageParser { class PDFJSClass extends EventEmitter { pdfDocument = null; pages = null; - pageWidth = 0; rawTextContents = null; needRawText = null; @@ -204,7 +199,6 @@ class PDFJSClass extends EventEmitter { // public, this instance copies this.pdfDocument = null; this.pages = []; - this.pageWidth = 0; this.rawTextContents = []; this.needRawText = needRawText; @@ -214,7 +208,6 @@ class PDFJSClass extends EventEmitter { console.error(errMsg); process.nextTick( () => this.emit("pdfjs_parseDataError", errMsg)); this.emit("error", errMsg); - this.emit('close'); return errMsg; } @@ -262,7 +255,7 @@ class PDFJSClass extends EventEmitter { () => this.loadPages(), error => this.raiseErrorEvent("loadMetaData error: " + error) ); - }; + } loadMetaData() { return this.pdfDocument.getMetadata().then( @@ -295,7 +288,7 @@ class PDFJSClass extends EventEmitter { promisedPages => this.parsePage(promisedPages, 0, 1.5), error => this.raiseErrorEvent("pagesPromise error: " + error) ); - }; + } parsePage(promisedPages, id, scale) { nodeUtil.p2jinfo("start to parse page:" + (id+1)); @@ -306,13 +299,10 @@ class PDFJSClass extends EventEmitter { function continueOnNextPage() { nodeUtil.p2jinfo("complete parsing page:" + (id+1)); if (id === (this.pdfDocument.numPages - 1) ) { - this.raiseReadyEvent({Pages:this.pages, Width: this.pageWidth}); - this.emit("data", {Width: this.pageWidth}); - this.emit("data", null); - this.emit("end", true); - + this.raiseReadyEvent({Pages:this.pages}); //v1.1.2: signal end of parsed data with null process.nextTick(() => this.raiseReadyEvent(null)); + this.emit("data", null); } else { process.nextTick(() => this.parsePage(promisedPages, ++id, scale)); @@ -321,9 +311,6 @@ class PDFJSClass extends EventEmitter { pageParser.parsePage( data => { - if (!this.pageWidth) //get PDF width - this.pageWidth = pageParser.width; - const page = { Width: pageParser.width, Height: pageParser.height, @@ -390,7 +377,7 @@ class PDFJSClass extends EventEmitter { } getAllFieldsTypes() { - return PDFField.getAllFieldsTypes({Pages:this.pages || [], Width: this.pageWidth}); + return PDFField.getAllFieldsTypes({Pages:this.pages || []}); } getMergedTextBlocksIfNeeded() { @@ -435,7 +422,7 @@ class PDFJSClass extends EventEmitter { page.Texts = page.Texts.filter( t => !t.merged); } - return {Pages:this.pages, Width: this.pageWidth}; + return {Pages:this.pages}; } destroy() { diff --git a/pdfparser.js b/pdfparser.js index 7b2a63d2..c10a4335 100644 --- a/pdfparser.js +++ b/pdfparser.js @@ -58,17 +58,17 @@ class PDFParser extends EventEmitter { // inherit from event emitter #onPDFJSParseDataReady(data) { if (!data) { //v1.1.2: data===null means end of parsed data nodeUtil.p2jinfo("PDF parsing completed."); - const output = {"formImage": this.#data}; - this.emit("pdfParser_dataReady", output); + this.emit("pdfParser_dataReady", this.#data); } else { this.#data = {...this.#data, ...data}; } } - #onPDFJSParserDataError(data) { + #onPDFJSParserDataError(err) { this.#data = null; - this.emit("pdfParser_dataError", {"parserError": data}); + this.emit("pdfParser_dataError", {"parserError": err}); + this.emit("error", err); } #startParsingPDF(buffer) { @@ -77,9 +77,10 @@ class PDFParser extends EventEmitter { // inherit from event emitter this.#PDFJS.on("pdfjs_parseDataReady", this.#onPDFJSParseDataReady.bind(this)); this.#PDFJS.on("pdfjs_parseDataError", this.#onPDFJSParserDataError.bind(this)); - // this.#PDFJS.on("readable", meta => console.log("readable", meta)); - // this.#PDFJS.on("data", data => console.log("data", data)); - // this.#PDFJS.on("error", err => console.log("error", err)); + //v1.3.0 the following Readable Stream-like events are replacement for the top two custom events + this.#PDFJS.on("readable", meta => this.emit("readable", meta)); + this.#PDFJS.on("data", data => this.emit("data", data)); + this.#PDFJS.on("error", this.#onPDFJSParserDataError.bind(this)); this.#PDFJS.parsePDFData(buffer || PDFParser.#binBuffer[this.binBufferKey], this.#password); } @@ -147,7 +148,7 @@ class PDFParser extends EventEmitter { // inherit from event emitter getAllFieldsTypes() { return this.#PDFJS.getAllFieldsTypes(); }; getAllFieldsTypesStream() { return ParserStream.createContentStream(this.getAllFieldsTypes()); } - getMergedTextBlocksIfNeeded() { return {"formImage": this.#PDFJS.getMergedTextBlocksIfNeeded()}; } + getMergedTextBlocksIfNeeded() { return this.#PDFJS.getMergedTextBlocksIfNeeded(); } getMergedTextBlocksStream() { return ParserStream.createContentStream(this.getMergedTextBlocksIfNeeded()) } destroy() { // invoked with stream transform process diff --git a/readme.md b/readme.md index f2d263e9..25009435 100644 --- a/readme.md +++ b/readme.md @@ -118,7 +118,7 @@ See [p2jcmd.js](https://github.com/modesty/pdf2json/blob/master/lib/p2jcmd.js) f function loadPDF(pdfFilePath); ```` If failed, event "pdfParser_dataError" will be raised with error object: {"parserError": errObj}; -If success, event "pdfParser_dataReady" will be raised with output data object: {"formImage": parseOutput}, which can be saved as json file (in command line) or serialized to json when running in web service. +If success, event "pdfParser_dataReady" will be raised with output data object: {"formImage": parseOutput}, which can be saved as json file (in command line) or serialized to json when running in web service. __note__: "formImage" is removed from v1.3.0, see breaking changes for details. * Get all textual content from "pdfParser_dataReady" event handler: ````javascript @@ -815,16 +815,17 @@ In order to support this auto merging capability, text block objects have an add * v1.1.4 unified event data structure: **only when you handle these top level events, no change if you use commandline** * event "pdfParser_dataError": {"parserError": errObj} - * event "pdfParser_dataReady": {"formImage": parseOutput} + * event "pdfParser_dataReady": {"formImage": parseOutput} __note__: "formImage" is removed from v1.3.0, see breaking changes for details. * v1.0.8 fixed [issue 27](https://github.com/modesty/pdf2json/issues/27), it converts x coordinate with the same ratio as y, which is 24 (96/4), rather than 8.7 (96/11), please adjust client renderer accordingly when position all elements' x coordinate. -* v1.3.0: output data field, `Agency` and `Id` are replaced with `Meta`, JSON of the PDF's full metadata. (See above for details) +* v1.3.0 output data field, `Agency` and `Id` are replaced with `Meta`, JSON of the PDF's full metadata. (See above for details). Each page object also added `Width` property besides `Height`. **Major Refactoring** * v1.3.0 has the major refactoring since 2015. Primary updates including: * Full PDF metadata support (see page format and breaking changes for details) - * Better Stream support with test _`npm run parse-r`_, plus Readable Stream like events are added to PDF.js, including _`readable`_, _`data`_, _`end`_, `error` that can be optional replacement for customed events (_`pdfjs_parseDataReady`_, and _`pdfjs_parseDataError`_) for granular data chunk flow control, like _`readable`_ with Meta, _`data`_ sequence for each PDF page result, rather than _`pdfjs_parseDataReady`_ combines all pages in one shot - * Better performace, near ~20% improvements with PDFs under _test_ directory + * Simplify root properties, besides the addition of `Meta` as root property, unnecessary "formImage" is removed from v1.3.0, also `Width` is move from root to each page object under `Pages`. + * Improved Stream support with test _`npm run parse-r`_, plus new events are added to PDF.js, including _`readable`_, _`data`_, _`end`_, _`error`_. These new Readable Stream like events can be optional replacement for customed events (_`pdfjs_parseDataReady`_, and _`pdfjs_parseDataError`_). It offers more granular data chunk flow control, like _`readable`_ with Meta, _`data`_ sequence for each PDF page result, instead of _`pdfjs_parseDataReady`_ combines all pages in one shot. See `./lib/parserstream.js` for more details + * Greater performance, near ~20% improvements with PDFs under _test_ directory * Better exception handling, fixes a few uncaught exception errors * More test coverage, 4 more test scripts added, see _package.json_ for details * Refactor to ES6 class for major entry modules From c0c6b464d30195c286041a874f486c42e59499be Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Mon, 11 Oct 2021 18:49:29 -0700 Subject: [PATCH 54/66] update README for v1.3.o --- readme.md | 78 +++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 58 insertions(+), 20 deletions(-) diff --git a/readme.md b/readme.md index 25009435..93db278c 100644 --- a/readme.md +++ b/readme.md @@ -25,23 +25,32 @@ After install, run command line: It'll scan and parse *260* PDF AcroForm files under *_./test/pdf_*, runs with *_-s -t -c -m_* command line options, generates primary output JSON, additional text content JSON, form fields JSON and merged text JSON file for each PDF. It usually takes ~20s in my MacBook Pro to complete, check *_./test/target/_* for outputs. -### Test Exceptions +### Test Exception Handlings After install, run command line: > npm run test-misc -It'll scan and parse 6 PDF files under *_./test/pdf/misc_*, also runs with *_-s -t -c -m_* command line options, generates primary output JSON, additional text content JSON, form fields JSON and merged text JSON file for 4 PDF fields, catches exceptions with stack trace, one for _unsupported encryption algorithm_, another one for _Invalid XRef stream header_. +It'll scan and parse 6 PDF files under *_./test/pdf/misc_*, also runs with *_-s -t -c -m_* command line options, generates primary output JSON, additional text content JSON, form fields JSON and merged text JSON file for 4 PDF fields, while catches exceptions with stack trace, one for _unsupported encryption algorithm_, another one for _Invalid XRef stream header_. + +### Test Streams +After install, run command line: + +> npm run parse-r + +It scans 165 PDF files under *../test/pdf/fd/form_*, parses with [Stream API](https://nodejs.org/dist/latest-v14.x/docs/api/stream.html), then generates output to *_./test/target/fd/form_*. + +More test scripts with different commandline options can be found at *_package.json_*. ## Code Example * Parse a PDF file then write to a JSON file: ````javascript - let fs = require('fs'), + const fs = require('fs'), PDFParser = require("pdf2json"); - let pdfParser = new PDFParser(); + const pdfParser = new PDFParser(); pdfParser.on("pdfParser_dataError", errData => console.error(errData.parserError) ); pdfParser.on("pdfParser_dataReady", pdfData => { @@ -61,13 +70,21 @@ Or, call directly with buffer: }) ```` +Or, use more granular page level parsing events (v1.3.0) + +````javascript + pdfParser.on("readable", meta => console.log("PDF Metadata", meta) ); + pdfParser.on("data", page => console.log(page ? "One page paged" : "All pages parsed", page)); + pdfParser.on("error", err => console.erro("Parser Error", err); +```` + * Parse a PDF then write a .txt file (which only contains textual content of the PDF) ````javascript - let fs = require('fs'), + const fs = require('fs'), PDFParser = require("pdf2json"); - let pdfParser = new PDFParser(this,1); + const pdfParser = new PDFParser(this,1); pdfParser.on("pdfParser_dataError", errData => console.error(errData.parserError) ); pdfParser.on("pdfParser_dataReady", pdfData => { @@ -80,10 +97,10 @@ Or, call directly with buffer: * Parse a PDF then write a fields.json file that only contains interactive forms' fields information: ````javascript - let fs = require('fs'), + const fs = require('fs'), PDFParser = require("pdf2json"); - let pdfParser = new PDFParser(); + const pdfParser = new PDFParser(); pdfParser.on("pdfParser_dataError", errData => console.error(errData.parserError) ); pdfParser.on("pdfParser_dataReady", pdfData => { @@ -96,14 +113,37 @@ Or, call directly with buffer: Alternatively, you can pipe input and output streams: (requires v1.1.4) ````javascript - let fs = require('fs'), + const fs = require('fs'), PDFParser = require("pdf2json"); - let inputStream = fs.createReadStream("./pdf2json/test/pdf/fd/form/F1040EZ.pdf", {bufferSize: 64 * 1024}); - let outputStream = fs.createWriteStream("./pdf2json/test/target/fd/form/F1040EZ.json"); + const inputStream = fs.createReadStream("./pdf2json/test/pdf/fd/form/F1040EZ.pdf", {bufferSize: 64 * 1024}); + const outputStream = fs.createWriteStream("./pdf2json/test/target/fd/form/F1040EZ.json"); inputStream.pipe(new PDFParser()).pipe(new StringifyStream()).pipe(outputStream); ```` + +With v1.3.0, last line above changes to +````javascript + inputStream.pipe(this.pdfParser.createParserStream()).pipe(new StringifyStream()).pipe(outputStream); +```` + +For additional output streams support: +````javascript + #generateMergedTextBlocksStream(callback) { + const outputStream = ParserStream.createOutputStream(this.outputPath.replace(".json", ".merged.json"), callback); + this.pdfParser.getMergedTextBlocksStream().pipe(new StringifyStream()).pipe(outputStream); + } + + #generateRawTextContentStream(callback) { + const outputStream = ParserStream.createOutputStream(this.outputPath.replace(".json", ".content.txt"), callback); + this.pdfParser.getRawTextContentStream().pipe(outputStream); + } + + #generateFieldsTypesStream(callback) { + const outputStream = ParserStream.createOutputStream(this.outputPath.replace(".json", ".fields.json"), callback); + this.pdfParser.getAllFieldsTypesStream().pipe(new StringifyStream()).pipe(outputStream); + } +```` See [p2jcmd.js](https://github.com/modesty/pdf2json/blob/master/lib/p2jcmd.js) for more details. @@ -113,6 +153,11 @@ See [p2jcmd.js](https://github.com/modesty/pdf2json/blob/master/lib/p2jcmd.js) f * pdfParser_dataError: will be raised when parsing failed * pdfParser_dataReady: when parsing succeeded +* alternative events: (v1.3.0) + * readable: first event dispatched after PDF file metadata is parsed and before processing any page + * data: one parsed page succeeded, null means last page has been processed, signle end of data stream + * error: exception or error occured + * start to parse PDF file from specified file path asynchronously: ````javascript function loadPDF(pdfFilePath); @@ -183,6 +228,7 @@ Current parsed data has four main sub objects to describe the PDF document. Each page object within 'Pages' array describes page elements and attributes with 5 main fields: * 'Height': height of the page in page unit +* 'Width': width of the page in page unit, moved from root to page object in v1.3.0 * 'HLines': horizontal line array, each line has 'x', 'y' in relative coordinates for positioning, and 'w' for width, plus 'l' for length. Both width and length are in page unit * 'Vline': vertical line array, each line has 'x', 'y' in relative coordinates for positioning, and 'w' for width, plus 'l' for length. Both width and length are in page unit; * v0.4.3 added Line color support. Default is 'black', other wise set in 'clr' if found in color dictionary, or 'oc' field if not found in dictionary; @@ -818,6 +864,7 @@ In order to support this auto merging capability, text block objects have an add * event "pdfParser_dataReady": {"formImage": parseOutput} __note__: "formImage" is removed from v1.3.0, see breaking changes for details. * v1.0.8 fixed [issue 27](https://github.com/modesty/pdf2json/issues/27), it converts x coordinate with the same ratio as y, which is 24 (96/4), rather than 8.7 (96/11), please adjust client renderer accordingly when position all elements' x coordinate. + * v1.3.0 output data field, `Agency` and `Id` are replaced with `Meta`, JSON of the PDF's full metadata. (See above for details). Each page object also added `Width` property besides `Height`. **Major Refactoring** @@ -888,12 +935,3 @@ Licensed under the [Apache License Version 2.0](https://github.com/modesty/pdf2j ## Support I'm currently running this project in my spare time. Thanks all for your [stars](https://github.com/modesty/pdf2json/stargazers) and [supports](https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=modestyZ%40gmail%2ecom&lc=GB&item_name=modesty%20zhang&item_number=git%40github%2ecom%3amodesty%2fpdf2json%2egit¤cy_code=USD&bn=PP%2dDonationsBF%3abtn_donate_SM%2egif%3aNonHosted). - - - - - - - - - From 5b5a18d55d32c52d0a09945b0c0e12175dd4e73b Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Wed, 13 Oct 2021 17:07:18 -0700 Subject: [PATCH 55/66] refactor anno, fill, image, line and unit with es6 class --- lib/pdf.js | 1 + lib/pdfanno.js | 284 ++++++++++++++++++++++-------------------------- lib/pdffill.js | 46 +++----- lib/pdfimage.js | 41 ++++--- lib/pdfline.js | 62 ++++------- lib/pdfunit.js | 86 ++++++--------- 6 files changed, 218 insertions(+), 302 deletions(-) diff --git a/lib/pdf.js b/lib/pdf.js index 409a513c..3f71ff48 100644 --- a/lib/pdf.js +++ b/lib/pdf.js @@ -1,5 +1,6 @@ const nodeUtil = require("util"), {EventEmitter} = require("events"), + {Blob} = require("buffer"), fs = require('fs'), _ = require('lodash'), DOMParser = require('@xmldom/xmldom').DOMParser, diff --git a/lib/pdfanno.js b/lib/pdfanno.js index a5735edf..b610278d 100644 --- a/lib/pdfanno.js +++ b/lib/pdfanno.js @@ -1,184 +1,157 @@ -'use strict'; - -let nodeUtil = require("util"), - _ = require("lodash"), - PDFUnit = require('./pdfunit.js'); - -let PDFAnno = (function PDFAnnoClosure() { - //BEGIN - MQZ 9/19/2012. Helper functions to parse acroForm elements - function setupRadioButton(annotation, item) { - let asName = ''; - //PDF Spec p.689: parent item's DV holds the item's value that is selected by default - let po = annotation.get('Parent'); - if (po) { - po.forEach(function(key, val){ - if (key === 'DV') { - asName = val.name || ''; - } - else if (key === 'TU') { - //radio buttons use the alternative text from the parent - item.alternativeText = val; - } else if( key == 'TM') { - item.alternativeID = val; - } - }); - } - - //PDF Spec p.606: get appearance dictionary - let ap = annotation.get('AP'); - //PDF Spec p.614 get normal appearance - let nVal = ap.get('N'); - //PDF Spec p.689 - nVal.forEach(function (key, value) { - if (key.toLowerCase() != "off") { - //value if selected - item.value = key; //export value - item.checked = (key === asName); //initial selection state +const nodeUtil = require("util"); + +//BEGIN - MQZ 9/19/2012. Helper functions to parse acroForm elements +function setupRadioButton(annotation, item) { + let asName = ''; + //PDF Spec p.689: parent item's DV holds the item's value that is selected by default + let po = annotation.get('Parent'); + if (po) { + po.forEach(function(key, val){ + if (key === 'DV') { + asName = val.name || ''; + } + else if (key === 'TU') { + //radio buttons use the alternative text from the parent + item.alternativeText = val; + } else if( key == 'TM') { + item.alternativeID = val; } }); - - if (!item.value) - item.value = "off"; } - function setupPushButton(annotation, item) { - //button label: PDF Spec p.640 - let mk = annotation.get('MK'); - if(mk) { - item.value = mk.get('CA') || ''; + //PDF Spec p.606: get appearance dictionary + let ap = annotation.get('AP'); + //PDF Spec p.614 get normal appearance + let nVal = ap.get('N'); + //PDF Spec p.689 + nVal.forEach(function (key, value) { + if (key.toLowerCase() != "off") { + //value if selected + item.value = key; //export value + item.checked = (key === asName); //initial selection state } + }); - //button action: url when mouse up: PDF Spec:p.642 - item.FL = ""; - let ap = annotation.get('A'); - if (ap) { - let sp = ap.get('S'); - item.FL = ap.get(sp.name); - } - } + if (!item.value) + item.value = "off"; +} - function setupCheckBox(annotation, item) { - //PDF Spec p.606: get appearance dictionary - let ap = annotation.get('AP'); - //PDF Spec p.614 get normal appearance - let nVal = ap.get('N'); - - //PDF Spec p.689 - let i = 0; - nVal.forEach(function (key, value) { - i++; - if (i == 1) //initial selection state - item.value = key; - }); +function setupPushButton(annotation, item) { + //button label: PDF Spec p.640 + let mk = annotation.get('MK'); + if(mk) { + item.value = mk.get('CA') || ''; } - function setupDropDown(annotation, item) { - //PDF Spec p.688 - item.value = annotation.get('Opt') || []; + //button action: url when mouse up: PDF Spec:p.642 + item.FL = ""; + let ap = annotation.get('A'); + if (ap) { + let sp = ap.get('S'); + item.FL = ap.get(sp.name); + } +} + +function setupCheckBox(annotation, item) { + //PDF Spec p.606: get appearance dictionary + let ap = annotation.get('AP'); + //PDF Spec p.614 get normal appearance + let nVal = ap.get('N'); + + //PDF Spec p.689 + let i = 0; + nVal.forEach(function (key, value) { + i++; + if (i == 1) //initial selection state + item.value = key; + }); +} + +function setupDropDown(annotation, item) { + //PDF Spec p.688 + item.value = annotation.get('Opt') || []; +} + +function setupFieldAttributes(annotation, item) { + //MQZ. Jan.03.2013. additional-actions dictionary + //PDF Spec P.648. 8.5.2. Trigger Events + let aa = annotation.get('AA'); + if (!aa) { + return; } - function setupFieldAttributes(annotation, item) { - //MQZ. Jan.03.2013. additional-actions dictionary - //PDF Spec P.648. 8.5.2. Trigger Events - let aa = annotation.get('AA'); - if (!aa) { + //PDF Spec p.651 get format dictionary + let nVal = aa.get('F'); + if (!nVal) { + nVal = aa.get('K'); + if (!nVal) return; - } + } - //PDF Spec p.651 get format dictionary - let nVal = aa.get('F'); - if (!nVal) { - nVal = aa.get('K'); - if (!nVal) - return; + nVal.forEach(function (key, value) { + if (key === "JS") { + processFieldAttribute(value, item); } + }); +} - nVal.forEach(function (key, value) { - if (key === "JS") { - processFieldAttribute(value, item); - } - }); - } - - let AFSpecial_Format = ['zip', 'zip', 'phone', 'ssn', '']; +const AFSpecial_Format = ['zip', 'zip', 'phone', 'ssn', '']; // let AFNumber_Format = ['nDec', 'sepStyle', 'negStyle', 'currStyle', 'strCurrency', 'bCurrencyPrepend']; - //– nDec is the number of places after the decimal point; - //– sepStyle is an integer denoting whether to use a separator or not. If sepStyle=0, use commas. If sepStyle=1, do not separate. - //– negStyle is the formatting used for negative numbers: 0 = MinusBlack, 1 = Red, 2 = ParensBlack, 3 = ParensRed - //– currStyle is the currency style - not used - //- strCurrency is the currency symbol - //– bCurrencyPrepend +//– nDec is the number of places after the decimal point; +//– sepStyle is an integer denoting whether to use a separator or not. If sepStyle=0, use commas. If sepStyle=1, do not separate. +//– negStyle is the formatting used for negative numbers: 0 = MinusBlack, 1 = Red, 2 = ParensBlack, 3 = ParensRed +//– currStyle is the currency style - not used +//- strCurrency is the currency symbol +//– bCurrencyPrepend // let AFDate_FormatEx = ["m/d", "m/d/yy", "mm/dd/yy", "mm/yy", "d-mmm", "d-mmm-yy", "dd-mmm-yy", "yymm-dd", "mmm-yy", "mmmm-yy", "mmm d, yyyy", "mmmm d, yyyy", "m/d/yy h:MM tt", "m/d/yy HH:MM"]; - function processFieldAttribute(jsFuncName, item) { - if (item.hasOwnProperty('TName')) - return; +function processFieldAttribute(jsFuncName, item) { + if (item.hasOwnProperty('TName')) + return; - if(!jsFuncName.split) - return; + if(!jsFuncName.split) + return; - let vParts = jsFuncName.split('('); - if (vParts.length !== 2) - return; + let vParts = jsFuncName.split('('); + if (vParts.length !== 2) + return; - let funcName = vParts[0]; - let funcParam = vParts[1].split(')')[0]; + let funcName = vParts[0]; + let funcParam = vParts[1].split(')')[0]; - switch (funcName) { - case 'AFSpecial_Format': - item.TName = AFSpecial_Format[Number(funcParam)]; - break; - case 'AFNumber_Format': + switch (funcName) { + case 'AFSpecial_Format': + item.TName = AFSpecial_Format[Number(funcParam)]; + break; + case 'AFNumber_Format': // nfs = funcParam.split(','); //set the Money fields to use the Number type with no decimal places after, no commas, and bCurrencyPrepend is set as true; (o use a negative sign (fits the PDF layout and our print formatting as well). // if (nfs[0] === '0' && nfs[1] === '1' && nfs[5]) // item.TName = 'money'; // else - item.TName = 'number'; - break; - case 'AFDate_FormatEx': - item.TName = 'date'; - item.MV = funcParam.replace(/^'+|^"+|'+$|"+$/g,''); //mask value - break; - case 'AFSpecial_KeystrokeEx': //special format: "arbitrary mask" - let maskValue = funcParam.replace(/^'+|^"+|'+$|"+$/g,''); //mask value - if ((!!maskValue) && maskValue.length > 0 && maskValue.length < 64) { - item.TName = 'mask'; //fixed length input - item.MV = maskValue; - } - break; - case 'AFPercent_Format': - item.TName = 'percent'; //funcParam => 2, 0, will specified how many decimal places - break; - } + item.TName = 'number'; + break; + case 'AFDate_FormatEx': + item.TName = 'date'; + item.MV = funcParam.replace(/^'+|^"+|'+$|"+$/g,''); //mask value + break; + case 'AFSpecial_KeystrokeEx': //special format: "arbitrary mask" + let maskValue = funcParam.replace(/^'+|^"+|'+$|"+$/g,''); //mask value + if ((!!maskValue) && maskValue.length > 0 && maskValue.length < 64) { + item.TName = 'mask'; //fixed length input + item.MV = maskValue; + } + break; + case 'AFPercent_Format': + item.TName = 'percent'; //funcParam => 2, 0, will specified how many decimal places + break; } +} - //END - MQZ 9/19/2012. Helper functions to parse acroForm elements +//END - MQZ 9/19/2012. Helper functions to parse acroForm elements - // private static - let _nextId = 1; - let _name = 'PDFAnno'; - - // constructor - let cls = function (field, viewport, Fields, Boxsets) { - // private - let _id = _nextId++; - - // public (every instance will have their own copy of these methods, needs to be lightweight) - this.get_id = function () { - return _id; - }; - this.get_name = function () { - return _name + _id; - }; - }; - - cls.prototype.clean = function () { - delete this.get_id; - delete this.get_name; - }; - - cls.processAnnotation = function (annotation, item) { +class PDFAnno { + static processAnnotation(annotation, item) { if (item.fieldType == 'Btn') { //PDF Spec p.675 if (item.fieldFlags & 32768) { setupRadioButton(annotation, item); @@ -196,10 +169,11 @@ let PDFAnno = (function PDFAnnoClosure() { else if (item.fieldType == 'Tx') { setupFieldAttributes(annotation, item); } - }; - - return cls; -})(); + else { + nodeUtil.p2jwarn("Unknown fieldType: ", item); + } + } +} module.exports = PDFAnno; diff --git a/lib/pdffill.js b/lib/pdffill.js index b6e19ca1..caf47c68 100644 --- a/lib/pdffill.js +++ b/lib/pdffill.js @@ -1,51 +1,37 @@ -'use strict'; -let nodeUtil = require("util"), - _ = require("lodash"), +const nodeUtil = require("util"), PDFUnit = require('./pdfunit.js'); -let PDFFill = (function PFPLineClosure() { - 'use strict'; - // private static - let _nextId = 1; - let _name = 'PDFFill'; - +class PDFFill{ // constructor - let cls = function (x, y, width, height, color) { - // private - let _id = _nextId++; - - // public (every instance will have their own copy of these methods, needs to be lightweight) - this.get_id = function() { return _id; }; - this.get_name = function() { return _name + _id; }; - + constructor(x, y, width, height, color) { this.x = x; this.y = y; this.width = width; this.height = height; this.color = color; - }; + } - // public (every instance will share the same method, but has no access to private fields defined in constructor) - cls.prototype.processFill = function (targetData) { - let clrId = PDFUnit.findColorIndex(this.color); + processFill(targetData) { + //MQZ.07/29/2013: when color is not in color dictionary, set the original color (oc) + const clrId = PDFUnit.findColorIndex(this.color); + const colorObj = (clrId > 0 && clrId < PDFUnit.colorCount()) ? {clr: clrId} : {oc: this.color}; - let oneFill = {x:PDFUnit.toFormX(this.x), + const oneFill = {x:PDFUnit.toFormX(this.x), y:PDFUnit.toFormY(this.y), w:PDFUnit.toFormX(this.width), h:PDFUnit.toFormY(this.height), - clr: clrId}; + ...colorObj}; - //MQZ.07/29/2013: when color is not in color dictionary, set the original color (oc) - if (clrId < 0) { - oneFill = _.extend({oc: this.color}, oneFill); + + if (oneFill.w < 2 && oneFill.h < 2) { + nodeUtil.p2jinfo("Skipped: tiny fill: " + oneFill.w + " x " + oneFill.h); + return; //skip short thick lines, like PA SPP lines behinds checkbox } targetData.Fills.push(oneFill); - }; - - return cls; -})(); + } +} module.exports = PDFFill; diff --git a/lib/pdfimage.js b/lib/pdfimage.js index a9056eff..56b9a549 100644 --- a/lib/pdfimage.js +++ b/lib/pdfimage.js @@ -1,29 +1,26 @@ -'use strict'; -////////////////////////////////start of fake image -let PDFImage = (function() { - 'use strict'; - let _src = ''; - let _onload = null; +class PDFImage { + #_src = ''; + #_onload = null; - this.__defineSetter__("onload", function(val) { - _onload = val; - }); + set onload(val) { + this.#_onload = typeof val === 'function' ? val : null; + } - this.__defineGetter__("onload", function() { - return _onload; - }); + get onload() { + return this.#_onload; + } - this.__defineSetter__("src", function(val) { - _src = val; - if (_onload) _onload(); - }); + set src(val) { + this.#_src = val; + if (this.#_onload) this.#_onload(); + } - this.__defineGetter__("src", function() { - return _src; - }); + get src() { + return this.#_src; + } - this.btoa = function(val) { + btoa(val) { if (typeof window === 'undefined') { return (new Buffer.from(val, 'ascii')).toString('base64'); } @@ -31,8 +28,8 @@ let PDFImage = (function() { return window.btoa(val); return ""; - }; + } -}); +} module.exports = PDFImage; diff --git a/lib/pdfline.js b/lib/pdfline.js index c119bd6e..4d610810 100644 --- a/lib/pdfline.js +++ b/lib/pdfline.js @@ -1,23 +1,8 @@ -'use strict'; -let nodeUtil = require("util"), - _ = require("lodash"), +const nodeUtil = require("util"), PDFUnit = require('./pdfunit.js'); -let PDFLine = (function PFPLineClosure() { - 'use strict'; - // private static - let _nextId = 1; - let _name = 'PDFLine'; - - // constructor - let cls = function (x1, y1, x2, y2, lineWidth, color, dashed) { - // private - let _id = _nextId++; - - // public (every instance will have their own copy of these methods, needs to be lightweight) - this.get_id = function() { return _id; }; - this.get_name = function() { return _name + _id; }; - +class PDFLine { + constructor(x1, y1, x2, y2, lineWidth, color, dashed) { this.x1 = x1; this.y1 = y1; this.x2 = x2; @@ -25,33 +10,28 @@ let PDFLine = (function PFPLineClosure() { this.lineWidth = lineWidth || 1.0; this.color = color; this.dashed = dashed; - }; + } - let _setStartPoint = function(oneLine, x, y) { + #setStartPoint(oneLine, x, y) { oneLine.x = PDFUnit.toFormX(x); oneLine.y = PDFUnit.toFormY(y); - }; + } - // public (every instance will share the same method, but has no access to private fields defined in constructor) - cls.prototype.processLine = function (targetData) { - let xDelta = Math.abs(this.x2 - this.x1); - let yDelta = Math.abs(this.y2 - this.y1); - let minDelta = this.lineWidth; + processLine(targetData) { + const xDelta = Math.abs(this.x2 - this.x1); + const yDelta = Math.abs(this.y2 - this.y1); + const minDelta = this.lineWidth; let oneLine = {x:0, y:0, w: PDFUnit.toFixedFloat(this.lineWidth), l:0}; //MQZ Aug.28.2013, adding color support, using color dictionary and default to black - let clrId = PDFUnit.findColorIndex(this.color); - if (clrId < 0) { - oneLine = _.extend({oc: this.color}, oneLine); - } - else if (clrId > 0 && clrId < (PDFUnit.colorCount() - 1)) { - oneLine = _.extend({clr: clrId}, oneLine); - } + const clrId = PDFUnit.findColorIndex(this.color); + const colorObj = (clrId > 0 && clrId < PDFUnit.colorCount()) ? {clr: clrId} : {oc: this.color}; + oneLine = {...oneLine, ...colorObj}; //MQZ Aug.29 dashed line support if (this.dashed) { - oneLine = _.extend({dsh: 1}, oneLine); + oneLine = oneLine = {...oneLine, dsh: 1}; } if ((yDelta < this.lineWidth) && (xDelta > minDelta)) { //HLine @@ -62,9 +42,9 @@ let PDFLine = (function PFPLineClosure() { oneLine.l = PDFUnit.toFormX(xDelta); if (this.x1 > this.x2) - _setStartPoint.call(this, oneLine, this.x2, this.y2); + this.#setStartPoint(oneLine, this.x2, this.y2); else - _setStartPoint.call(this, oneLine, this.x1, this.y1); + this.#setStartPoint(oneLine, this.x1, this.y1); targetData.HLines.push(oneLine); } else if ((xDelta < this.lineWidth) && (yDelta > minDelta)) {//VLine @@ -75,15 +55,13 @@ let PDFLine = (function PFPLineClosure() { oneLine.l = PDFUnit.toFormY(yDelta); if (this.y1 > this.y2) - _setStartPoint.call(this, oneLine, this.x2, this.y2); + this.#setStartPoint(oneLine, this.x2, this.y2); else - _setStartPoint.call(this, oneLine, this.x1, this.y1); + this.#setStartPoint(oneLine, this.x1, this.y1); targetData.VLines.push(oneLine); } - }; - - return cls; -})(); + } +} module.exports = PDFLine; diff --git a/lib/pdfunit.js b/lib/pdfunit.js index 10890455..9bc56432 100644 --- a/lib/pdfunit.js +++ b/lib/pdfunit.js @@ -1,21 +1,13 @@ -'use strict'; -let nodeUtil = require("util"); -let PDFUnit = (function PFPUnitClosure() { - 'use strict'; - // private static - let _nextId = 1; - let _name = 'PDFUnit'; + const dpi = 96.0; + const gridXPerInch = 4.0; + const gridYPerInch = 4.0; - let dpi = 96.0; - let gridXPerInch = 4.0; - let gridYPerInch = 4.0; - - let _pixelXPerGrid = dpi/gridXPerInch; - let _pixelYPerGrid = dpi/gridYPerInch; - let _pixelPerPoint = dpi/72; - - let kColors = [ + const _pixelXPerGrid = dpi/gridXPerInch; + const _pixelYPerGrid = dpi/gridYPerInch; + const _pixelPerPoint = dpi/72; + + const kColors = [ '#000000', // 0 '#ffffff', // 1 '#4c4c4c', // 2 @@ -52,65 +44,53 @@ let PDFUnit = (function PFPUnitClosure() { '#800080', // Last + 3 '#ff0000', // Last + 4 '#0000ff', // Last + 5 - '#008000', // Last + 6 - '#000000' // Last + 7 + '#008000' // Last + 6 ]; - // constructor - let cls = function () { - // private - let _id = _nextId++; - - // public (every instance will have their own copy of these methods, needs to be lightweight) - this.get_id = function() { return _id; }; - this.get_name = function() { return _name + _id; }; - }; - - cls.toFixedFloat = function(fNum) { +class PDFUnit { + static toFixedFloat(fNum) { return parseFloat(fNum.toFixed(3)); - }; + } - cls.colorCount = function() { + static colorCount() { return kColors.length; - }; + } - cls.toPixelX = function(formX) { + static toPixelX(formX) { return Math.round(formX * _pixelXPerGrid); - }; + } - cls.toPixelY = function(formY) { + static toPixelY(formY) { return Math.round(formY * _pixelYPerGrid); - }; + } - cls.pointToPixel = function(point) {// Point unit (1/72 an inch) to pixel units + static pointToPixel(point) {// Point unit (1/72 an inch) to pixel units return point * _pixelPerPoint; - }; + } - cls.getColorByIndex = function(clrId) { + static getColorByIndex(clrId) { return kColors[clrId]; - }; + } - cls.toFormPoint = function(viewportX, viewportY) { + static toFormPoint(viewportX, viewportY) { return [(viewportX / _pixelXPerGrid), (viewportY / _pixelYPerGrid)]; - }; + } - cls.toFormX = function(viewportX) { - return cls.toFixedFloat(viewportX / _pixelXPerGrid); - }; + static toFormX(viewportX) { + return PDFUnit.toFixedFloat(viewportX / _pixelXPerGrid); + } - cls.toFormY = function(viewportY) { - return cls.toFixedFloat(viewportY / _pixelYPerGrid); - }; + static toFormY(viewportY) { + return PDFUnit.toFixedFloat(viewportY / _pixelYPerGrid); + } - cls.findColorIndex = function(color) { + static findColorIndex(color) { if (color.length === 4) color += "000"; //MQZ. 07/29/2013: if color is not in dictionary, just return -1. The caller (pdffont, pdffill) will set the actual color return kColors.indexOf(color); - }; - - return cls; -})(); + } +} module.exports = PDFUnit; From 972964f51b6efaa3502c7fafa8cc0631df5ce412 Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Fri, 15 Oct 2021 16:16:45 -0700 Subject: [PATCH 56/66] refactor pdffields and pdfcanvas with es6 class --- lib/p2jcmd.js | 13 +- lib/pdf.js | 20 +- lib/pdfcanvas.js | 486 +++++++++++++++++++++----------------------- lib/pdfconst.js | 117 +++++++++++ lib/pdffield.js | 189 ++++++++--------- lib/pdffill.js | 2 +- lib/pdffont.js | 268 +++++++++--------------- lib/pdfline.js | 2 +- lib/pdfunit.js | 53 +---- lib/ptixmlinject.js | 52 ++--- pdfparser.js | 10 +- readme.md | 25 ++- 12 files changed, 601 insertions(+), 636 deletions(-) create mode 100644 lib/pdfconst.js diff --git a/lib/p2jcmd.js b/lib/p2jcmd.js index 855962fe..1b5b6e8b 100644 --- a/lib/p2jcmd.js +++ b/lib/p2jcmd.js @@ -1,12 +1,11 @@ const nodeUtil = require("util"), - fs = require('fs'), - path = require('path'), - _ = require('lodash'), - async = require("async"), - - {ParserStream, StringifyStream} = require('./parserstream'), - pkInfo = require('../package.json'), + fs = require("fs"), + path = require("path"), + _ = require("lodash"), + async = require("async"), + {ParserStream, StringifyStream} = require("./parserstream"), + pkInfo = require("../package.json"), PDFParser = require("../pdfparser"); const _PRO_TIMER = `${pkInfo.name}@${pkInfo.version} [${pkInfo.homepage}]`; diff --git a/lib/pdf.js b/lib/pdf.js index 3f71ff48..5aeef26d 100644 --- a/lib/pdf.js +++ b/lib/pdf.js @@ -1,16 +1,16 @@ const nodeUtil = require("util"), {EventEmitter} = require("events"), {Blob} = require("buffer"), - fs = require('fs'), - _ = require('lodash'), - DOMParser = require('@xmldom/xmldom').DOMParser, - PDFCanvas = require('./pdfcanvas.js'), - PDFUnit = require('./pdfunit.js'), - PDFField = require('./pdffield.js'), - PDFAnno = require('./pdfanno.js'), - Image = require('./pdfimage.js'), - pkInfo = require('../package.json'), - PDFFont = require('./pdffont'); + fs = require("fs"), + _ = require("lodash"), + DOMParser = require("@xmldom/xmldom").DOMParser, + PDFCanvas = require("./pdfcanvas"), + PDFUnit = require("./pdfunit"), + PDFField = require("./pdffield"), + PDFAnno = require("./pdfanno"), + Image = require("./pdfimage"), + pkInfo = require("../package.json"), + PDFFont = require("./pdffont"); const _pdfjsFiles = [ 'shared/util.js', diff --git a/lib/pdfcanvas.js b/lib/pdfcanvas.js index 6b94725e..de2c2ab8 100644 --- a/lib/pdfcanvas.js +++ b/lib/pdfcanvas.js @@ -1,124 +1,175 @@ -'use strict'; -let nodeUtil = require("util"), +const nodeUtil = require("util"), _ = require('lodash'), PDFLine = require('./pdfline'), PDFFill = require('./pdffill'), PDFFont = require('./pdffont'); -(function () { - // private static - let _nextId = 1; - let _name = 'PDFCanvas'; - - // alias some functions to make (compiled) code shorter - let m = Math; - let mr = m.round; - let ms = m.sin; - let mc = m.cos; - let abs = m.abs; - let sqrt = m.sqrt; - - // precompute "00" to "FF" - let dec2hex = []; - for (let i = 0; i < 16; i++) { - for (let j = 0; j < 16; j++) { - dec2hex[i * 16 + j] = i.toString(16) + j.toString(16); - } - } +// alias some functions to make (compiled) code shorter +const {round: mr, sin: ms, cos: mc, abs, sqrt} = Math; - function createMatrixIdentity() { - return [ - [1, 0, 0], - [0, 1, 0], - [0, 0, 1] - ]; +// precompute "00" to "FF" +const dec2hex = []; +for (let i = 0; i < 16; i++) { + for (let j = 0; j < 16; j++) { + dec2hex[i * 16 + j] = i.toString(16) + j.toString(16); } +} - function matrixMultiply(m1, m2) { - let result = createMatrixIdentity(); +function createMatrixIdentity() { + return [ + [1, 0, 0], + [0, 1, 0], + [0, 0, 1] + ]; +} - for (let x = 0; x < 3; x++) { - for (let y = 0; y < 3; y++) { - let sum = 0; +function matrixMultiply(m1, m2) { + let result = createMatrixIdentity(); - for (let z = 0; z < 3; z++) { - sum += m1[x][z] * m2[z][y]; - } + for (let x = 0; x < 3; x++) { + for (let y = 0; y < 3; y++) { + let sum = 0; - result[x][y] = sum; - } - } - return result; - } - - function copyState(o1, o2) { - o2.fillStyle = o1.fillStyle; - o2.lineCap = o1.lineCap; - o2.lineJoin = o1.lineJoin; - o2.lineWidth = o1.lineWidth; - o2.miterLimit = o1.miterLimit; - o2.shadowBlur = o1.shadowBlur; - o2.shadowColor = o1.shadowColor; - o2.shadowOffsetX = o1.shadowOffsetX; - o2.shadowOffsetY = o1.shadowOffsetY; - o2.strokeStyle = o1.strokeStyle; - o2.globalAlpha = o1.globalAlpha; - o2.arcScaleX_ = o1.arcScaleX_; - o2.arcScaleY_ = o1.arcScaleY_; - o2.lineScale_ = o1.lineScale_; - o2.dashArray = o1.dashArray; - } - - function processStyle(styleString) { - let str, alpha = 1; - - styleString = String(styleString); - if (styleString.substring(0, 3) == 'rgb') { - let start = styleString.indexOf('(', 3); - let end = styleString.indexOf(')', start + 1); - let guts = styleString.substring(start + 1, end).split(','); - - str = '#'; - for (let i = 0; i < 3; i++) { - str += dec2hex[Number(guts[i])]; + for (let z = 0; z < 3; z++) { + sum += m1[x][z] * m2[z][y]; } - if (guts.length == 4 && styleString.substr(3, 1) == 'a') { - alpha = guts[3]; - } - } else { - str = styleString; + result[x][y] = sum; + } + } + return result; +} + +function copyState(o1, o2) { + o2.fillStyle = o1.fillStyle; + o2.lineCap = o1.lineCap; + o2.lineJoin = o1.lineJoin; + o2.lineWidth = o1.lineWidth; + o2.miterLimit = o1.miterLimit; + o2.shadowBlur = o1.shadowBlur; + o2.shadowColor = o1.shadowColor; + o2.shadowOffsetX = o1.shadowOffsetX; + o2.shadowOffsetY = o1.shadowOffsetY; + o2.strokeStyle = o1.strokeStyle; + o2.globalAlpha = o1.globalAlpha; + o2.arcScaleX_ = o1.arcScaleX_; + o2.arcScaleY_ = o1.arcScaleY_; + o2.lineScale_ = o1.lineScale_; + o2.dashArray = o1.dashArray; +} + +function processStyle(styleString) { + let str, alpha = 1; + + styleString = String(styleString); + if (styleString.substring(0, 3) == 'rgb') { + let start = styleString.indexOf('(', 3); + let end = styleString.indexOf(')', start + 1); + let guts = styleString.substring(start + 1, end).split(','); + + str = '#'; + for (let i = 0; i < 3; i++) { + str += dec2hex[Number(guts[i])]; } - return {color:str, alpha:alpha}; + if (guts.length == 4 && styleString.substr(3, 1) == 'a') { + alpha = guts[3]; + } + } else { + str = styleString; } - function processLineCap(lineCap) { - switch (lineCap) { - case 'butt': - return 'flat'; - case 'round': - return 'round'; - case 'square': - default: - return 'square'; + return {color:str, alpha:alpha}; +} + +function processLineCap(lineCap) { + switch (lineCap) { + case 'butt': + return 'flat'; + case 'round': + return 'round'; + case 'square': + default: + return 'square'; + } +} + +// Helper function that takes the already fixed cordinates. +function bezierCurveToHelper(self, cp1, cp2, p) { + self.currentPath_.push({ + type:'bezierCurveTo', + cp1x:cp1.x, + cp1y:cp1.y, + cp2x:cp2.x, + cp2y:cp2.y, + x:p.x, + y:p.y + }); + self.currentX_ = p.x; + self.currentY_ = p.y; +} + +function matrixIsFinite(m) { + for (let j = 0; j < 3; j++) { + for (let k = 0; k < 2; k++) { + if (!isFinite(m[j][k]) || isNaN(m[j][k])) { + return false; + } } } + return true; +} - /** - * This class implements CanvasRenderingContext2D interface as described by - * the WHATWG. - * @param {HTMLElement} surfaceElement The element that the 2D context should - * be associated with - */ - function CanvasRenderingContext2D_(canvasTarget, scaledWidth, scaledHeight) { - // private - let _id = _nextId++; +function setM(ctx, m, updateLineScale) { + if (!matrixIsFinite(m)) { + return; + } + ctx.m_ = m; + + if (updateLineScale) { + // Get the line scale. + // Determinant of this.m_ means how much the area is enlarged by the + // transformation. So its square root can be used as a scale factor + // for width. + let det = m[0][0] * m[1][1] - m[0][1] * m[1][0]; + ctx.lineScale_ = sqrt(abs(det)); + } +} - // public (every instance will have their own copy of these methods, needs to be lightweight) - this.get_id = function() { return _id; }; - this.get_name = function() { return _name + _id; }; +class CanvasPattern_ { + constructor() { + } +} +// Gradient / Pattern Stubs +class CanvasGradient_ { + constructor(aType) { + this.type_ = aType; + this.x0_ = 0; + this.y0_ = 0; + this.r0_ = 0; + this.x1_ = 0; + this.y1_ = 0; + this.r1_ = 0; + this.colors_ = []; + } + addColorStop(aOffset, aColor) { + aColor = processStyle(aColor); + this.colors_.push({offset:aOffset, + color:aColor.color, + alpha:aColor.alpha}); + } +} + + +/** + * This class implements CanvasRenderingContext2D interface as described by + * the WHATWG. + * @param {HTMLElement} surfaceElement The element that the 2D context should + * be associated with + */ +class CanvasRenderingContext2D_ { + constructor(canvasTarget, scaledWidth, scaledHeight) { this.m_ = createMatrixIdentity(); this.mStack_ = []; @@ -158,42 +209,40 @@ let nodeUtil = require("util"), } //private helper methods - let _drawPDFLine = function(p1, p2, lineWidth, color) { + #drawPDFLine(p1, p2, lineWidth, color) { let dashedLine = _.isArray(this.dashArray) && (this.dashArray.length > 1); let pL = new PDFLine(p1.x, p1.y, p2.x, p2.y, lineWidth, color, dashedLine); pL.processLine(this.canvas); - }; + } - let _drawPDFFill = function(cp, min, max, color) { + #drawPDFFill(cp, min, max, color) { let width = max.x - min.x; let height = max.y - min.y; let pF = new PDFFill(cp.x, cp.y, width, height, color); pF.processFill(this.canvas); - }; + } - let _needRemoveRect = function(x, y, w, h) { + #needRemoveRect(x, y, w, h) { let retVal = (Math.abs(w - Math.abs(h)) < 1 && w < 13); if (retVal) { nodeUtil.p2jinfo("Skipped: tiny rect: w=" + w + ", h=" + h); } return retVal; - }; - - let contextPrototype = CanvasRenderingContext2D_.prototype; + } - contextPrototype.getContext = function(ctxType) { + getContext(ctxType) { return (ctxType === "2d") ? this : null; - }; + } - contextPrototype.setLineDash = function(lineDash) { + setLineDash(lineDash) { this.dashArray = lineDash; - }; + } - contextPrototype.getLineDash= function() { + getLineDash() { return this.dashArray; - }; + } - contextPrototype.fillText = function(text, x, y, maxWidth, fontSize) { + fillText(text, x, y, maxWidth, fontSize) { if (!text || text.trim().length < 1) return; let p = this.getCoords_(x, y); @@ -204,73 +253,59 @@ let nodeUtil = require("util"), this.currentFont.processText(p, text, maxWidth, color, fontSize, this.canvas, this.m_); }; - contextPrototype.strokeText = function(text, x, y, maxWidth) { + strokeText(text, x, y, maxWidth) { //MQZ. 10/23/2012, yeah, no hollow text for now this.fillText(text, x, y, maxWidth); - }; + } - contextPrototype.measureText = function(text) { + measureText(text) { console.warn("to be implemented: contextPrototype.measureText - ", text); let chars = text.length || 1; return {width: chars * (this.currentFont.spaceWidth || 5)}; - }; + } - contextPrototype.setFont = function(fontObj) { + setFont(fontObj) { if ((!!this.currentFont) && _.isFunction(this.currentFont.clean)) { this.currentFont.clean(); this.currentFont = null; } this.currentFont = new PDFFont(fontObj); - }; + } - contextPrototype.clearRect = function () { - }; + clearRect() { + console.warn("to be implemented: contextPrototype.clearRect"); + } - contextPrototype.beginPath = function () { + beginPath() { // TODO: Branch current matrix so that save/restore has no effect // as per safari docs. this.currentPath_ = []; - }; + } - contextPrototype.moveTo = function (aX, aY) { + moveTo(aX, aY) { let p = this.getCoords_(aX, aY); this.currentPath_.push({type:'moveTo', x:p.x, y:p.y}); this.currentX_ = p.x; this.currentY_ = p.y; - }; + } - contextPrototype.lineTo = function (aX, aY) { + lineTo(aX, aY) { let p = this.getCoords_(aX, aY); this.currentPath_.push({type:'lineTo', x:p.x, y:p.y}); this.currentX_ = p.x; this.currentY_ = p.y; - }; + } - contextPrototype.bezierCurveTo = function (aCP1x, aCP1y, aCP2x, aCP2y, aX, aY) { + bezierCurveTo(aCP1x, aCP1y, aCP2x, aCP2y, aX, aY) { let p = this.getCoords_(aX, aY); let cp1 = this.getCoords_(aCP1x, aCP1y); let cp2 = this.getCoords_(aCP2x, aCP2y); - bezierCurveTo(this, cp1, cp2, p); - }; - - // Helper function that takes the already fixed cordinates. - function bezierCurveTo(self, cp1, cp2, p) { - self.currentPath_.push({ - type:'bezierCurveTo', - cp1x:cp1.x, - cp1y:cp1.y, - cp2x:cp2.x, - cp2y:cp2.y, - x:p.x, - y:p.y - }); - self.currentX_ = p.x; - self.currentY_ = p.y; + bezierCurveToHelper(this, cp1, cp2, p); } - contextPrototype.quadraticCurveTo = function (aCPx, aCPy, aX, aY) { + quadraticCurveTo(aCPx, aCPy, aX, aY) { // the following is lifted almost directly from // http://developer.mozilla.org/en/docs/Canvas_tutorial:Drawing_shapes @@ -286,10 +321,10 @@ let nodeUtil = require("util"), y:cp1.y + (p.y - this.currentY_) / 3.0 }; - bezierCurveTo(this, cp1, cp2, p); - }; + bezierCurveToHelper(this, cp1, cp2, p); + } - contextPrototype.arc = function (aX, aY, aRadius, aStartAngle, aEndAngle, aClockwise) { + arc(aX, aY, aRadius, aStartAngle, aEndAngle, aClockwise) { let arcType = aClockwise ? 'at' : 'wa'; let xStart = aX + mc(aStartAngle) * aRadius; @@ -316,11 +351,10 @@ let nodeUtil = require("util"), yStart:pStart.y, xEnd:pEnd.x, yEnd:pEnd.y}); + } - }; - - contextPrototype.rect = function (aX, aY, aWidth, aHeight) { - if (_needRemoveRect.call(this, aX, aY, aWidth, aHeight)) { + rect(aX, aY, aWidth, aHeight) { + if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { return;//try to remove the rectangle behind radio buttons and checkboxes } @@ -329,10 +363,10 @@ let nodeUtil = require("util"), this.lineTo(aX + aWidth, aY + aHeight); this.lineTo(aX, aY + aHeight); this.closePath(); - }; + } - contextPrototype.strokeRect = function (aX, aY, aWidth, aHeight) { - if (_needRemoveRect.call(this, aX, aY, aWidth, aHeight)) { + strokeRect(aX, aY, aWidth, aHeight) { + if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { return;//try to remove the rectangle behind radio buttons and checkboxes } @@ -347,10 +381,10 @@ let nodeUtil = require("util"), this.stroke(); this.currentPath_ = oldPath; - }; + } - contextPrototype.fillRect = function (aX, aY, aWidth, aHeight) { - if (_needRemoveRect.call(this, aX, aY, aWidth, aHeight)) { + fillRect(aX, aY, aWidth, aHeight) { + if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) { return;//try to remove the rectangle behind radio buttons and checkboxes } @@ -365,18 +399,18 @@ let nodeUtil = require("util"), this.fill(); this.currentPath_ = oldPath; - }; + } - contextPrototype.createLinearGradient = function (aX0, aY0, aX1, aY1) { + createLinearGradient(aX0, aY0, aX1, aY1) { let gradient = new CanvasGradient_('gradient'); gradient.x0_ = aX0; gradient.y0_ = aY0; gradient.x1_ = aX1; gradient.y1_ = aY1; return gradient; - }; + } - contextPrototype.createRadialGradient = function (aX0, aY0, aR0, aX1, aY1, aR1) { + createRadialGradient(aX0, aY0, aR0, aX1, aY1, aR1) { let gradient = new CanvasGradient_('gradientradial'); gradient.x0_ = aX0; gradient.y0_ = aY0; @@ -385,22 +419,22 @@ let nodeUtil = require("util"), gradient.y1_ = aY1; gradient.r1_ = aR1; return gradient; - }; + } - contextPrototype.drawImage = function (image, var_args) { + drawImage(image, var_args) { //MQZ. no image drawing support for now - }; + } - contextPrototype.getImageData = function (x, y, w, h) { + getImageData(x, y, w, h) { //MQZ. returns empty data buffer for now return { width:w, height:h, data:new Uint8Array(w * h * 4) }; - }; + } - contextPrototype.stroke = function (aFill) { + stroke(aFill) { if (this.currentPath_.length < 2) { return; } @@ -422,14 +456,14 @@ let nodeUtil = require("util"), case 'lineTo': if (!aFill) { //lines if (i > 0) { - _drawPDFLine.call(this, this.currentPath_[i-1], p, lineWidth, color); + this.#drawPDFLine(this.currentPath_[i-1], p, lineWidth, color); } } break; case 'close': if (!aFill) { //lines if (i > 0) { - _drawPDFLine.call(this, this.currentPath_[i-1], this.currentPath_[0], lineWidth, color); + this.#drawPDFLine(this.currentPath_[i-1], this.currentPath_[0], lineWidth, color); } } p = null; @@ -459,70 +493,43 @@ let nodeUtil = require("util"), } if (aFill) { //fill - _drawPDFFill.call(this, min, min, max, color); + this.#drawPDFFill(min, min, max, color); } - }; + } - contextPrototype.fill = function () { + fill() { this.stroke(true); - }; + } - contextPrototype.closePath = function () { + closePath() { this.currentPath_.push({type:'close'}); - }; + } /** * @private */ - contextPrototype.getCoords_ = function (aX, aY) { + getCoords_ (aX, aY) { let m = this.m_; return { x: (aX * m[0][0] + aY * m[1][0] + m[2][0]), y: (aX * m[0][1] + aY * m[1][1] + m[2][1]) }; - }; + } - contextPrototype.save = function () { + save() { let o = {}; copyState(this, o); this.aStack_.push(o); this.mStack_.push(this.m_); this.m_ = matrixMultiply(createMatrixIdentity(), this.m_); - }; + } - contextPrototype.restore = function () { + restore() { copyState(this.aStack_.pop(), this); this.m_ = this.mStack_.pop(); - }; - - function matrixIsFinite(m) { - for (let j = 0; j < 3; j++) { - for (let k = 0; k < 2; k++) { - if (!isFinite(m[j][k]) || isNaN(m[j][k])) { - return false; - } - } - } - return true; - } - - function setM(ctx, m, updateLineScale) { - if (!matrixIsFinite(m)) { - return; - } - ctx.m_ = m; - - if (updateLineScale) { - // Get the line scale. - // Determinant of this.m_ means how much the area is enlarged by the - // transformation. So its square root can be used as a scale factor - // for width. - let det = m[0][0] * m[1][1] - m[0][1] * m[1][0]; - ctx.lineScale_ = sqrt(abs(det)); - } } - contextPrototype.translate = function (aX, aY) { + translate(aX, aY) { let m1 = [ [1, 0, 0], [0, 1, 0], @@ -530,9 +537,9 @@ let nodeUtil = require("util"), ]; setM(this, matrixMultiply(m1, this.m_), false); - }; + } - contextPrototype.rotate = function (aRot) { + rotate(aRot) { let c = mc(aRot); let s = ms(aRot); @@ -543,9 +550,9 @@ let nodeUtil = require("util"), ]; setM(this, matrixMultiply(m1, this.m_), false); - }; + } - contextPrototype.scale = function (aX, aY) { + scale(aX, aY) { this.arcScaleX_ *= aX; this.arcScaleY_ *= aY; let m1 = [ @@ -555,9 +562,9 @@ let nodeUtil = require("util"), ]; setM(this, matrixMultiply(m1, this.m_), true); - }; + } - contextPrototype.transform = function (m11, m12, m21, m22, dx, dy) { + transform(m11, m12, m21, m22, dx, dy) { let m1 = [ [m11, m12, 0], [m21, m22, 0], @@ -565,9 +572,9 @@ let nodeUtil = require("util"), ]; setM(this, matrixMultiply(m1, this.m_), true); - }; + } - contextPrototype.setTransform = function (m11, m12, m21, m22, dx, dy) { + setTransform(m11, m12, m21, m22, dx, dy) { let m = [ [m11, m12, 0], [m21, m22, 0], @@ -575,47 +582,24 @@ let nodeUtil = require("util"), ]; setM(this, m, true); - }; + } /******** STUBS ********/ - contextPrototype.clip = function () { + clip() { // TODO: Implement - }; + } - contextPrototype.arcTo = function () { + arcTo() { // TODO: Implement - }; - - contextPrototype.createPattern = function () { - return new CanvasPattern_; - }; - - // Gradient / Pattern Stubs - function CanvasGradient_(aType) { - this.type_ = aType; - this.x0_ = 0; - this.y0_ = 0; - this.r0_ = 0; - this.x1_ = 0; - this.y1_ = 0; - this.r1_ = 0; - this.colors_ = []; } - CanvasGradient_.prototype.addColorStop = function (aOffset, aColor) { - aColor = processStyle(aColor); - this.colors_.push({offset:aOffset, - color:aColor.color, - alpha:aColor.alpha}); - }; - - function CanvasPattern_() { + createPattern() { + return new CanvasPattern_(); } +} - // set up externs - module.exports = CanvasRenderingContext2D_; +// set up externs +module.exports = CanvasRenderingContext2D_; // CanvasRenderingContext2D = CanvasRenderingContext2D_; // CanvasGradient = CanvasGradient_; -// CanvasPattern = CanvasPattern_; - -})(); +// CanvasPattern = CanvasPattern_; \ No newline at end of file diff --git a/lib/pdfconst.js b/lib/pdfconst.js new file mode 100644 index 00000000..e3cfef39 --- /dev/null +++ b/lib/pdfconst.js @@ -0,0 +1,117 @@ +const kColors = [ + '#000000', // 0 + '#ffffff', // 1 + '#4c4c4c', // 2 + '#808080', // 3 + '#999999', // 4 + '#c0c0c0', // 5 + '#cccccc', // 6 + '#e5e5e5', // 7 + '#f2f2f2', // 8 + '#008000', // 9 + '#00ff00', // 10 + '#bfffa0', // 11 + '#ffd629', // 12 + '#ff99cc', // 13 + '#004080', // 14 + '#9fc0e1', // 15 + '#5580ff', // 16 + '#a9c9fa', // 17 + '#ff0080', // 18 + '#800080', // 19 + '#ffbfff', // 20 + '#e45b21', // 21 + '#ffbfaa', // 22 + '#008080', // 23 + '#ff0000', // 24 + '#fdc59f', // 25 + '#808000', // 26 + '#bfbf00', // 27 + '#824100', // 28 + '#007256', // 29 + '#008000', // 30 + '#000080', // Last + 1 + '#008080', // Last + 2 + '#800080', // Last + 3 + '#ff0000', // Last + 4 + '#0000ff', // Last + 5 + '#008000' // Last + 6 +]; + +const kFontFaces = [ + "quicktype,arial,helvetica,sans-serif", // 00 - QuickType - sans-serif variable font + "quicktype condensed,arial narrow,arial,helvetica,sans-serif", // 01 - QuickType Condensed - thin sans-serif variable font + "quicktypepi,quicktypeiipi", // 02 - QuickType Pi + "quicktype mono,courier new,courier,monospace", // 03 - QuickType Mono - san-serif fixed font + "ocr-a,courier new,courier,monospace", // 04 - OCR-A - OCR readable san-serif fixed font + "ocr b mt,courier new,courier,monospace" // 05 - OCR-B MT - OCR readable san-serif fixed font + ]; + + const kFontStyles = [ + // Face Size Bold Italic StyleID(Comment) + // ----- ---- ---- ----- ----------------- + [0, 6, 0, 0], //00 + [0, 8, 0, 0], //01 + [0, 10, 0, 0], //02 + [0, 12, 0, 0], //03 + [0, 14, 0, 0], //04 + [0, 18, 0, 0], //05 + [0, 6, 1, 0], //06 + [0, 8, 1, 0], //07 + [0, 10, 1, 0], //08 + [0, 12, 1, 0], //09 + [0, 14, 1, 0], //10 + [0, 18, 1, 0], //11 + [0, 6, 0, 1], //12 + [0, 8, 0, 1], //13 + [0, 10, 0, 1], //14 + [0, 12, 0, 1], //15 + [0, 14, 0, 1], //16 + [0, 18, 0, 1], //17 + [0, 6, 1, 1], //18 + [0, 8, 1, 1], //19 + [0, 10, 1, 1], //20 + [0, 12, 1, 1], //21 + [0, 14, 1, 1], //22 + [0, 18, 1, 1], //23 + [1, 6, 0, 0], //24 + [1, 8, 0, 0], //25 + [1, 10, 0, 0], //26 + [1, 12, 0, 0], //27 + [1, 14, 0, 0], //28 + [1, 18, 0, 0], //29 + [1, 6, 1, 0], //30 + [1, 8, 1, 0], //31 + [1, 10, 1, 0], //32 + [1, 12, 1, 0], //33 + [1, 14, 1, 0], //34 + [1, 18, 1, 0], //35 + [1, 6, 0, 1], //36 + [1, 8, 0, 1], //37 + [1, 10, 0, 1], //38 + [1, 12, 0, 1], //39 + [1, 14, 0, 1], //40 + [1, 18, 0, 1], //41 + [2, 8, 0, 0], //42 + [2, 10, 0, 0], //43 + [2, 12, 0, 0], //44 + [2, 14, 0, 0], //45 + [2, 18, 0, 0], //46 + [3, 8, 0, 0], //47 + [3, 10, 0, 0], //48 + [3, 12, 0, 0], //49 + [4, 12, 0, 0], //50 + [0, 9, 0, 0], //51 + [0, 9, 1, 0], //52 + [0, 9, 0, 1], //53 + [0, 9, 1, 1], //54 + [1, 9, 0, 0], //55 + [1, 9, 1, 0], //56 + [1, 9, 1, 1], //57 + [4, 10, 0, 0], //58 + [5, 10, 0, 0], //59 + [5, 12, 0, 0] //60 +]; + + +module.exports = {kColors, kFontFaces, kFontStyles}; \ No newline at end of file diff --git a/lib/pdffield.js b/lib/pdffield.js index 2ca90c28..9ff0d0af 100644 --- a/lib/pdffield.js +++ b/lib/pdffield.js @@ -1,53 +1,15 @@ -'use strict'; - -let nodeUtil = require("util"), +const nodeUtil = require("util"), _ = require("lodash"), - PDFUnit = require('./pdfunit.js'); - -let PDFField = (function PDFFieldClosure() { - 'use strict'; - // private static - let _nextId = 1; - let _name = 'PDFField'; - let _tabIndex = 0; - - let kFBANotOverridable = 0x00000400; // indicates the field is read only by the user - let kFBARequired = 0x00000010; // indicates the field is required - let kMinHeight = 20; - - // constructor - let cls = function (field, viewport, Fields, Boxsets) { - // private - let _id = _nextId++; + PDFUnit = require("./pdfunit"); - // public (every instance will have their own copy of these methods, needs to be lightweight) - this.get_id = function() { return _id; }; - this.get_name = function() { return _name + _id; }; +const kFBANotOverridable = 0x00000400; // indicates the field is read only by the user +const kFBARequired = 0x00000010; // indicates the field is required +const kMinHeight = 20; - this.field = field; - this.viewport = viewport; - this.Fields = Fields; - this.Boxsets = Boxsets; - }; - - // Normalize rectangle rect=[x1, y1, x2, y2] so that (x1,y1) < (x2,y2) - // For coordinate systems whose origin lies in the bottom-left, this - // means normalization to (BL,TR) ordering. For systems with origin in the - // top-left, this means (TL,BR) ordering. - let _normalizeRect = function(rect) { - let r = rect.slice(0); // clone rect - if (rect[0] > rect[2]) { - r[0] = rect[2]; - r[2] = rect[0]; - } - if (rect[1] > rect[3]) { - r[1] = rect[3]; - r[3] = rect[1]; - } - return r; - }; +class PDFField { + static tabIndex = 0; - cls.isWidgetSupported = function(field) { + static isWidgetSupported(field) { let retVal = false; switch(field.fieldType) { @@ -71,23 +33,48 @@ let PDFField = (function PDFFieldClosure() { } return retVal; - }; + } - cls.isFormElement = function(field) { + static isFormElement(field) { let retVal = false; switch(field.subtype) { - case 'Widget': retVal = cls.isWidgetSupported(field); break; + case 'Widget': retVal = PDFField.isWidgetSupported(field); break; default: nodeUtil.p2jwarn("Unsupported: field.type of " + field.subtype); break; } return retVal; - }; + } + + // constructor + constructor(field, viewport, Fields, Boxsets) { + this.field = field; + this.viewport = viewport; + this.Fields = Fields; + this.Boxsets = Boxsets; + } + + // Normalize rectangle rect=[x1, y1, x2, y2] so that (x1,y1) < (x2,y2) + // For coordinate systems whose origin lies in the bottom-left, this + // means normalization to (BL,TR) ordering. For systems with origin in the + // top-left, this means (TL,BR) ordering. + static #normalizeRect(rect) { + const r = rect.slice(0); // clone rect + if (rect[0] > rect[2]) { + r[0] = rect[2]; + r[2] = rect[0]; + } + if (rect[1] > rect[3]) { + r[1] = rect[3]; + r[3] = rect[1]; + } + return r; + } - let _getFieldPosition = function(field) { + #getFieldPosition(field) { let viewPort = this.viewport; let fieldRect = viewPort.convertToViewportRectangle(field.rect); - let rect = _normalizeRect(fieldRect); + let rect = PDFField.#normalizeRect(fieldRect); let height = rect[3] - rect[1]; if (field.fieldType === 'Tx') { @@ -108,9 +95,9 @@ let PDFField = (function PDFFieldClosure() { w: PDFUnit.toFormX(rect[2] - rect[0]), h: PDFUnit.toFormY(height) }; - }; + } - let _getFieldBaseData = function(field) { + #getFieldBaseData(field) { let attributeMask = 0; //PDF Spec p.676 TABLE 8.70 Field flags common to all field types if (field.fieldFlags & 0x00000001) { @@ -134,17 +121,17 @@ let PDFField = (function PDFFieldClosure() { anData.TM = field.alternativeID; } - return _.extend(anData, _getFieldPosition.call(this, field)); - }; + return _.extend(anData, this.#getFieldPosition(field)); + } - let _addAlpha = function(field) { + #addAlpha(field) { let anData = _.extend({ style: 48, T: { Name: field.TName || "alpha", TypeInfo: {} } - }, _getFieldBaseData.call(this, field)); + }, this.#getFieldBaseData(field)); if (field.MV) { //field attributes: arbitrary mask value anData.MV = field.MV; @@ -154,47 +141,44 @@ let PDFField = (function PDFFieldClosure() { } this.Fields.push(anData); - }; + } - let _addCheckBox = function(box) { + #addCheckBox(box) { let anData = _.extend({ style: 48, T: { Name: "box", TypeInfo: {} } - }, _getFieldBaseData.call(this, box)); + }, this.#getFieldBaseData(box)); this.Boxsets.push({boxes:[anData]}); - }; + } - let _addRadioButton = function(box) { + #addRadioButton(box) { let anData = _.extend({ style: 48, T: { Name: "box", TypeInfo: {} } - }, _getFieldBaseData.call(this, box)); + }, this.#getFieldBaseData(box)); anData.id.Id = box.value; if (_.has(box, 'checked')) { anData.checked = box.checked; } - let rdGroup = _.find(this.Boxsets, function(boxset) { - return _.has(boxset, 'id') && _.has(boxset.id, 'Id') && (boxset.id.Id === box.fullName); - }); - + const rdGroup = _.find(this.Boxsets, boxset => _.has(boxset, 'id') && _.has(boxset.id, 'Id') && (boxset.id.Id === box.fullName)); if ((!!rdGroup) && (_.has(rdGroup, 'boxes'))) { rdGroup.boxes.push(anData); } else { this.Boxsets.push({boxes:[anData], id: { Id: box.fullName, EN: 0}}); } - }; + } - let _addLinkButton = function(field) { + #addLinkButton(field) { let anData = _.extend({ style: 48, T: { @@ -203,23 +187,23 @@ let PDFField = (function PDFFieldClosure() { FL: { form: {Id: field.FL} } - }, _getFieldBaseData.call(this, field)); + }, this.#getFieldBaseData(field)); this.Fields.push(anData); - }; + } - let _addSelect = function(field) { + #addSelect(field) { let anData = _.extend({ style: 48, T: { Name: "alpha", TypeInfo: {} } - }, _getFieldBaseData.call(this, field)); + }, this.#getFieldBaseData(field)); anData.w -= 0.5; //adjust combobox width anData.PL = {V: [], D: []}; - _.each(field.value, function(ele, idx) { + _.each(field.value, (ele, idx) => { if (Array.isArray(ele)) { anData.PL.D.push(ele[0]); anData.PL.V.push(ele[1]); @@ -236,49 +220,43 @@ let PDFField = (function PDFFieldClosure() { this.Fields.push(anData); }; - // public (every instance will share the same method, but has no access to private fields defined in constructor) - cls.prototype.processField = function () { - - this.field.TI = _tabIndex++; + // public instance methods + processField() { + this.field.TI = PDFField.tabIndex++; switch(this.field.fieldType) { - case 'Tx': _addAlpha.call(this, this.field); break; - case 'Cb': _addCheckBox.call(this, this.field); break; - case 'Rd': _addRadioButton.call(this, this.field);break; - case 'Btn':_addLinkButton.call(this, this.field); break; - case 'Ch': _addSelect.call(this, this.field); break; + case 'Tx': this.#addAlpha(this.field); break; + case 'Cb': this.#addCheckBox(this.field); break; + case 'Rd': this.#addRadioButton(this.field);break; + case 'Btn':this.#addLinkButton(this.field); break; + case 'Ch': this.#addSelect(this.field); break; } this.clean(); - }; - - cls.prototype.clean = function() { - delete this.get_id; - delete this.get_name; + } + clean() { delete this.field; delete this.viewport; delete this.Fields; delete this.Boxsets; - }; + } //static public method to generate fieldsType object based on parser result - cls.getAllFieldsTypes = function(data) { - - function isFieldReadOnly(field) { + static getAllFieldsTypes(data) { + const isFieldReadOnly = field => { return (field.AM & kFBANotOverridable) ? true : false; - } + }; - function getFieldBase(field) { + const getFieldBase = field => { return {id: field.id.Id, type: field.T.Name, calc: isFieldReadOnly(field), value: field.V || ""}; - } + }; let retVal = []; - - _.each(data.Pages, function(page) { - _.each(page.Boxsets, function(boxsets) { + _.each(data.Pages, page => { + _.each(page.Boxsets, boxsets => { if (boxsets.boxes.length > 1) { //radio button - _.each(boxsets.boxes, function(box) { + _.each(boxsets.boxes, box => { retVal.push({id: boxsets.id.Id, type: "radio", calc: isFieldReadOnly(box), value: box.id.Id}); }); } @@ -287,15 +265,12 @@ let PDFField = (function PDFFieldClosure() { } }); - _.each(page.Fields, function(field){ - retVal.push(getFieldBase(field)); - }); + _.each(page.Fields, field => retVal.push(getFieldBase(field))); + }); return retVal; - }; - - return cls; -})(); + } +} module.exports = PDFField; diff --git a/lib/pdffill.js b/lib/pdffill.js index caf47c68..f144aa26 100644 --- a/lib/pdffill.js +++ b/lib/pdffill.js @@ -1,6 +1,6 @@ const nodeUtil = require("util"), - PDFUnit = require('./pdfunit.js'); + PDFUnit = require("./pdfunit"); class PDFFill{ // constructor diff --git a/lib/pdffont.js b/lib/pdffont.js index f54e9b27..5b2168de 100644 --- a/lib/pdffont.js +++ b/lib/pdffont.js @@ -1,127 +1,42 @@ -'use strict'; - -let nodeUtil = require("util"), +const nodeUtil = require("util"), _ = require("lodash"), - PDFUnit = require('./pdfunit.js'); - -let PDFFont = (function PFPFontClosure() { - // private static - let _nextId = 1; - let _name = 'PDFFont'; - - let _boldSubNames = ["bd", "bold", "demi", "black"]; - let _stdFonts = ["arial", "helvetica", "sans-serif ", "courier ","monospace ", "ocr "]; - - let _kFontFaces = [ - "quicktype,arial,helvetica,sans-serif", // 00 - QuickType - sans-serif variable font - "quicktype condensed,arial narrow,arial,helvetica,sans-serif", // 01 - QuickType Condensed - thin sans-serif variable font - "quicktypepi,quicktypeiipi", // 02 - QuickType Pi - "quicktype mono,courier new,courier,monospace", // 03 - QuickType Mono - san-serif fixed font - "ocr-a,courier new,courier,monospace", // 04 - OCR-A - OCR readable san-serif fixed font - "ocr b mt,courier new,courier,monospace" // 05 - OCR-B MT - OCR readable san-serif fixed font - ]; - - let _kFontStyles = [ - // Face Size Bold Italic StyleID(Comment) - // ----- ---- ---- ----- ----------------- - [0, 6, 0, 0], //00 - [0, 8, 0, 0], //01 - [0, 10, 0, 0], //02 - [0, 12, 0, 0], //03 - [0, 14, 0, 0], //04 - [0, 18, 0, 0], //05 - [0, 6, 1, 0], //06 - [0, 8, 1, 0], //07 - [0, 10, 1, 0], //08 - [0, 12, 1, 0], //09 - [0, 14, 1, 0], //10 - [0, 18, 1, 0], //11 - [0, 6, 0, 1], //12 - [0, 8, 0, 1], //13 - [0, 10, 0, 1], //14 - [0, 12, 0, 1], //15 - [0, 14, 0, 1], //16 - [0, 18, 0, 1], //17 - [0, 6, 1, 1], //18 - [0, 8, 1, 1], //19 - [0, 10, 1, 1], //20 - [0, 12, 1, 1], //21 - [0, 14, 1, 1], //22 - [0, 18, 1, 1], //23 - [1, 6, 0, 0], //24 - [1, 8, 0, 0], //25 - [1, 10, 0, 0], //26 - [1, 12, 0, 0], //27 - [1, 14, 0, 0], //28 - [1, 18, 0, 0], //29 - [1, 6, 1, 0], //30 - [1, 8, 1, 0], //31 - [1, 10, 1, 0], //32 - [1, 12, 1, 0], //33 - [1, 14, 1, 0], //34 - [1, 18, 1, 0], //35 - [1, 6, 0, 1], //36 - [1, 8, 0, 1], //37 - [1, 10, 0, 1], //38 - [1, 12, 0, 1], //39 - [1, 14, 0, 1], //40 - [1, 18, 0, 1], //41 - [2, 8, 0, 0], //42 - [2, 10, 0, 0], //43 - [2, 12, 0, 0], //44 - [2, 14, 0, 0], //45 - [2, 18, 0, 0], //46 - [3, 8, 0, 0], //47 - [3, 10, 0, 0], //48 - [3, 12, 0, 0], //49 - [4, 12, 0, 0], //50 - [0, 9, 0, 0], //51 - [0, 9, 1, 0], //52 - [0, 9, 0, 1], //53 - [0, 9, 1, 1], //54 - [1, 9, 0, 0], //55 - [1, 9, 1, 0], //56 - [1, 9, 1, 1], //57 - [4, 10, 0, 0], //58 - [5, 10, 0, 0], //59 - [5, 12, 0, 0] //60 - ]; - - - // constructor - let cls = function (fontObj) { - // private - let _id = _nextId++; + PDFUnit = require("./pdfunit"), + {kFontFaces, kFontStyles} = require("./pdfconst"); - // public (every instance will have their own copy of these methods, needs to be lightweight) - this.get_id = function() { return _id; }; - this.get_name = function() { return _name + _id; }; +const _boldSubNames = ["bd", "bold", "demi", "black"]; +const _stdFonts = ["arial", "helvetica", "sans-serif ", "courier ","monospace ", "ocr "]; +const DISTANCE_DELTA = 0.1; - this.fontObj = fontObj; - let typeName = (fontObj.name || fontObj.fallbackName); +class PDFFont { + #initTypeName() { + let typeName = (this.fontObj.name || this.fontObj.fallbackName); if (!typeName) { - typeName = _kFontFaces[0]; //default font family name + typeName = kFontFaces[0]; //default font family name } typeName = typeName.toLowerCase(); - this.typeName = typeName; + return typeName; + } + + #initSubType() { + let subType = this.typeName; + let bold = false; - let subType = typeName; - let nameArray = typeName.split('+'); + let nameArray = this.typeName.split('+'); if (_.isArray(nameArray) && nameArray.length > 1) { subType = nameArray[1].split("-"); if (_.isArray(subType) && subType.length > 1) { - if (!this.bold) { - let subName = subType[1].toLowerCase(); - this.bold = _boldSubNames.indexOf(subName) >= 0; - } + let subName = subType[1].toLowerCase(); + bold = _boldSubNames.indexOf(subName) >= 0; subType = subType[0]; } } - this.subType = subType; + return {subType, bold}; + } - this.isSymbol = typeName.indexOf("symbol") > 0 || _kFontFaces[2].indexOf(this.subType) >= 0; + #initSymbol() { + let isSymbol = this.typeName.indexOf("symbol") > 0 || kFontFaces[2].indexOf(this.subType) >= 0; if (this.fontObj.isSymbolicFont) { - let mFonts = _stdFonts.filter( (oneName) => (typeName.indexOf(oneName) >= 0) ); + let mFonts = _stdFonts.filter( (oneName) => (this.typeName.indexOf(oneName) >= 0) ); if (mFonts.length > 0) { this.fontObj.isSymbolicFont = false; //lots of Arial-based font is detected as symbol in VA forms (301, 76-c, etc.) reset the flag for now @@ -129,32 +44,45 @@ let PDFFont = (function PFPFontClosure() { } } else { - if (this.isSymbol) { + if (isSymbol) { this.fontObj.isSymbolicFont = true; //text pdf: va_ind_760c nodeUtil.p2jinfo("Reset: isSymbolicFont (true) for " + this.fontObj.name); } - } + } + return isSymbol; + } + + #initSpaceWidth() { + let spaceWidth = this.fontObj.spaceWidth; + if (!spaceWidth) { + var spaceId = Array.isArray(this.fontObj.toFontChar) ? this.fontObj.toFontChar.indexOf(32) : -1; + spaceWidth = (spaceId >= 0 && Array.isArray(this.fontObj.widths)) ? this.fontObj.widths[spaceId] : 250; + } + spaceWidth = PDFUnit.toFormX(spaceWidth) / 32; + return spaceWidth; + } - this.fontSize = 1; + // constructor + constructor(fontObj) { + this.fontObj = fontObj; - this.faceIdx = 0; - this.bold = false; - this.italic = false; + this.typeName = this.#initTypeName(); - this.fontStyleId = -1; + const {subType, bold} = this.#initSubType(); + this.subType = subType; + this.bold = bold; - this.spaceWidth = fontObj.spaceWidth; - if (!this.spaceWidth) { - var spaceId = Array.isArray(fontObj.toFontChar) ? fontObj.toFontChar.indexOf(32) : -1; - this.spaceWidth = (spaceId >= 0 && Array.isArray(fontObj.widths)) ? fontObj.widths[spaceId] : 250; - } - this.spaceWidth = PDFUnit.toFormX(this.spaceWidth) / 32; - }; + this.isSymbol = this.#initSymbol(); + this.spaceWidth = this.#initSpaceWidth(); - // public static - /** sort text blocks by y then x */ - const DISTANCE_DELTA = 0.1; - cls.compareBlockPos = function(t1, t2) { + this.fontSize = 1; + this.faceIdx = 0; + this.italic = false; + this.fontStyleId = -1; + } + + /** sort text blocks by y then x */ + static compareBlockPos(t1, t2) { if (t1.y < t2.y - DISTANCE_DELTA) { return -1; } @@ -167,9 +95,9 @@ let PDFFont = (function PFPFontClosure() { } } return 1; - }; + } - cls.haveSameStyle = function(t1, t2) { + static haveSameStyle(t1, t2) { let retVal = t1.R[0].S === t2.R[0].S; if (retVal && t1.R[0].S < 0) { for (let i = 0; i < t1.R[0].TS.length; i++) { @@ -184,31 +112,31 @@ let PDFFont = (function PFPFontClosure() { } return retVal; - }; + } - cls.getSpaceThreshHold = function(t1) { + static getSpaceThreshHold(t1) { return (PDFFont.getFontSize(t1)/12) * t1.sw; - }; + } - cls.areAdjacentBlocks = function(t1, t2) { - let isInSameLine = Math.abs(t1.y - t2.y) <= DISTANCE_DELTA; - let isDistanceSmallerThanASpace = ((t2.x - t1.x - t1.w) < cls.getSpaceThreshHold(t1)); + static areAdjacentBlocks(t1, t2) { + const isInSameLine = Math.abs(t1.y - t2.y) <= DISTANCE_DELTA; + const isDistanceSmallerThanASpace = ((t2.x - t1.x - t1.w) < PDFFont.getSpaceThreshHold(t1)); return isInSameLine && isDistanceSmallerThanASpace; - }; + } - cls.getFontSize = function(textBlock) { - let sId = textBlock.R[0].S; - return (sId < 0) ? textBlock.R[0].TS[1] : _kFontStyles[sId][1]; - }; + static getFontSize(textBlock) { + const sId = textBlock.R[0].S; + return (sId < 0) ? textBlock.R[0].TS[1] : kFontStyles[sId][1]; + } - cls.areDuplicateBlocks = function(t1, t2) { - return t1.x == t2.x && t1.y == t2.y && t1.R[0].T == t2.R[0].T && cls.haveSameStyle(t1, t2); - }; + static areDuplicateBlocks(t1, t2) { + return t1.x == t2.x && t1.y == t2.y && t1.R[0].T == t2.R[0].T && PDFFont.haveSameStyle(t1, t2); + } // private - let _setFaceIndex = function() { - let fontObj = this.fontObj; + #setFaceIndex() { + const fontObj = this.fontObj; this.bold = fontObj.bold; if (!this.bold) { @@ -227,18 +155,18 @@ let PDFFont = (function PFPFontClosure() { let typeName = this.subType; if (fontObj.isSerifFont) { - if (_kFontFaces[1].indexOf(typeName) >= 0) + if (kFontFaces[1].indexOf(typeName) >= 0) this.faceIdx = 1; } - else if (_kFontFaces[2].indexOf(this.subType) >= 0) { + else if (kFontFaces[2].indexOf(this.subType) >= 0) { this.faceIdx = 2; } else if (fontObj.isMonospace) { this.faceIdx = 3; - if (_kFontFaces[4].indexOf(typeName) >= 0) + if (kFontFaces[4].indexOf(typeName) >= 0) this.faceIdx = 4; - else if (_kFontFaces[5].indexOf(typeName) >= 0) + else if (kFontFaces[5].indexOf(typeName) >= 0) this.faceIdx = 5; } else if (fontObj.isSymbolicFont) { @@ -251,10 +179,10 @@ let PDFFont = (function PFPFontClosure() { } // nodeUtil.p2jinfo"typeName = " + typeName + " => faceIdx = " + this.faceIdx); - }; + } - let _getFontStyleIndex = function(fontSize) { - _setFaceIndex.call(this); + #getFontStyleIndex(fontSize) { + this.#setFaceIndex(); //MQZ Feb.28.2013. Adjust bold text fontsize to work around word spacing issue this.fontSize = (this.bold && (fontSize > 12)) ? fontSize + 1 : fontSize; @@ -262,7 +190,7 @@ let PDFFont = (function PFPFontClosure() { let fsa = [this.faceIdx, this.fontSize, this.bold?1:0, this.italic?1:0]; let retVal = -1; - _kFontStyles.forEach(function(element, index, list){ + kFontStyles.forEach(function(element, index, list){ if (retVal === -1) { if (element[0] === fsa[0] && element[1] === fsa[1] && element[2] === fsa[2] && element[3] === fsa[3]) { @@ -272,9 +200,9 @@ let PDFFont = (function PFPFontClosure() { }); return retVal; - }; + } - let _processSymbolicFont = function(str) { + #processSymbolicFont(str) { let retVal = str; if (!str || str.length !== 1) @@ -306,9 +234,9 @@ let PDFFont = (function PFPFontClosure() { } return retVal; - }; + } - let _textRotationAngle = function (matrix2D) { + #textRotationAngle(matrix2D) { let retVal = 0; if (matrix2D[0][0] === 0 && matrix2D[1][1] === 0) { if (matrix2D[0][1] != 0 && matrix2D[1][0] != 0) { @@ -324,15 +252,15 @@ let PDFFont = (function PFPFontClosure() { } } return retVal; - }; + } - // public (every instance will share the same method, but has no access to private fields defined in constructor) - cls.prototype.processText = function (p, str, maxWidth, color, fontSize, targetData, matrix2D) { - let text = _processSymbolicFont.call(this, str); + // public instance methods + processText(p, str, maxWidth, color, fontSize, targetData, matrix2D) { + let text = this.#processSymbolicFont(str); if (!text) { return; } - this.fontStyleId = _getFontStyleIndex.call(this, fontSize); + this.fontStyleId = this.#getFontStyleIndex(fontSize); // when this.fontStyleId === -1, it means the text style doesn't match any entry in the dictionary // adding TS to better describe text style [fontFaceId, fontSize, 1/0 for bold, 1/0 for italic]; @@ -358,16 +286,16 @@ let PDFFont = (function PFPFontClosure() { oneText = _.extend({oc: color}, oneText); } - let rAngle = _textRotationAngle.call(this, matrix2D); + let rAngle = this.#textRotationAngle(matrix2D); if (rAngle != 0) { nodeUtil.p2jinfo(str + ": rotated " + rAngle + " degree."); _.extend(oneText.R[0], {RA: rAngle}); } targetData.Texts.push(oneText); - }; + } - cls.prototype.flash_encode = function(str) { + flash_encode(str) { let retVal = encodeURIComponent(str); retVal = retVal.replace("%C2%96", "-"); retVal = retVal.replace("%C2%91", "%27"); @@ -380,14 +308,12 @@ let PDFFont = (function PFPFontClosure() { retVal = retVal.replace("%C2%9B", "%C2%BB"); return retVal; - }; + } - cls.prototype.clean = function() { + clean() { this.fontObj = null; delete this.fontObj; - }; - - return cls; -})(); + } +} module.exports = PDFFont; diff --git a/lib/pdfline.js b/lib/pdfline.js index 4d610810..7278d7e2 100644 --- a/lib/pdfline.js +++ b/lib/pdfline.js @@ -1,5 +1,5 @@ const nodeUtil = require("util"), - PDFUnit = require('./pdfunit.js'); + PDFUnit = require("./pdfunit"); class PDFLine { constructor(x1, y1, x2, y2, lineWidth, color, dashed) { diff --git a/lib/pdfunit.js b/lib/pdfunit.js index 9bc56432..2b198f60 100644 --- a/lib/pdfunit.js +++ b/lib/pdfunit.js @@ -1,52 +1,13 @@ +const {kColors} = require("./pdfconst"); - const dpi = 96.0; - const gridXPerInch = 4.0; - const gridYPerInch = 4.0; +const dpi = 96.0; +const gridXPerInch = 4.0; +const gridYPerInch = 4.0; - const _pixelXPerGrid = dpi/gridXPerInch; - const _pixelYPerGrid = dpi/gridYPerInch; - const _pixelPerPoint = dpi/72; +const _pixelXPerGrid = dpi/gridXPerInch; +const _pixelYPerGrid = dpi/gridYPerInch; +const _pixelPerPoint = dpi/72; - const kColors = [ - '#000000', // 0 - '#ffffff', // 1 - '#4c4c4c', // 2 - '#808080', // 3 - '#999999', // 4 - '#c0c0c0', // 5 - '#cccccc', // 6 - '#e5e5e5', // 7 - '#f2f2f2', // 8 - '#008000', // 9 - '#00ff00', // 10 - '#bfffa0', // 11 - '#ffd629', // 12 - '#ff99cc', // 13 - '#004080', // 14 - '#9fc0e1', // 15 - '#5580ff', // 16 - '#a9c9fa', // 17 - '#ff0080', // 18 - '#800080', // 19 - '#ffbfff', // 20 - '#e45b21', // 21 - '#ffbfaa', // 22 - '#008080', // 23 - '#ff0000', // 24 - '#fdc59f', // 25 - '#808000', // 26 - '#bfbf00', // 27 - '#824100', // 28 - '#007256', // 29 - '#008000', // 30 - '#000080', // Last + 1 - '#008080', // Last + 2 - '#800080', // Last + 3 - '#ff0000', // Last + 4 - '#0000ff', // Last + 5 - '#008000' // Last + 6 - ]; - class PDFUnit { static toFixedFloat(fNum) { return parseFloat(fNum.toFixed(3)); diff --git a/lib/ptixmlinject.js b/lib/ptixmlinject.js index 6ef87fdf..3e49a289 100644 --- a/lib/ptixmlinject.js +++ b/lib/ptixmlinject.js @@ -1,39 +1,26 @@ -'use strict'; +const fs = require("fs"), + DOMParser = require("@xmldom/xmldom").DOMParser; -var nodeUtil = require("util"), -nodeEvents = require("events"), -fs = require('fs'), -_ = require('lodash'), -DOMParser = require('@xmldom/xmldom').DOMParser, -PDFCanvas = require('./pdfcanvas.js'), -PDFUnit = require('./pdfunit.js'), -PDFField = require('./pdffield.js'), -PDFAnno = require('./pdfanno.js'), -Image = require('./pdfimage.js'), -pkInfo = require('../package.json'); - -var xmlData; - -var PTIXmlParser = (function () { - 'use strict'; - - var ptiPageArray = []; +class PTIXmlParser { + xmlData = null; + ptiPageArray = []; // constructor - var cls = function () { - }; + constructor() { + this.xmlData = null; + this.ptiPageArray = []; + } - cls.prototype.parseXml = function (filePath,callback) { - - fs.readFile(filePath, 'utf8', function (err,data) { + parseXml(filePath, callback) { + fs.readFile(filePath, 'utf8', (err, data) => { if (err) { callback(err); } else { - xmlData = data; + this.xmlData = data; var parser = new DOMParser(); - var dom = parser.parseFromString(xmlData); + var dom = parser.parseFromString(this.xmlData); var root = dom.documentElement; var xmlFields = root.getElementsByTagName("field"); @@ -73,19 +60,18 @@ var PTIXmlParser = (function () { fields.push(item); - ptiPageArray[parseInt(page)]=fields; + this.ptiPageArray[parseInt(page)]=fields; } } callback(); }); - }; + } - cls.prototype.getFields = function(pageNum) { - return ptiPageArray[pageNum]; - }; - return cls; -})(); + getFields(pageNum) { + return this.ptiPageArray[pageNum]; + } +} module.exports = PTIXmlParser; diff --git a/pdfparser.js b/pdfparser.js index c10a4335..30fc00cd 100644 --- a/pdfparser.js +++ b/pdfparser.js @@ -3,8 +3,9 @@ const fs = require("fs"), nodeUtil = require("util"), _ = require("lodash"), async = require("async"), - PDFJS = require("./lib/pdf.js"), - {ParserStream} = require("./lib/parserstream"); + PDFJS = require("./lib/pdf"), + {ParserStream} = require("./lib/parserstream"), + {kColors, kFontFaces, kFontStyles} = require("./lib/pdfconst"); class PDFParser extends EventEmitter { // inherit from event emitter @@ -54,6 +55,11 @@ class PDFParser extends EventEmitter { // inherit from event emitter get data() { return this.#data; } get binBufferKey() { return this.#pdfFilePath + this.#pdfFileMTime; } + + get colorDict() {return kColors}; + get fontFaceDict() { return kFontFaces; } + get fontStyleDict() { return kFontStyles; } + //private methods, needs to invoked by [funcName].call(this, ...) #onPDFJSParseDataReady(data) { if (!data) { //v1.1.2: data===null means end of parsed data diff --git a/readme.md b/readme.md index 93db278c..310258fc 100644 --- a/readme.md +++ b/readme.md @@ -255,8 +255,8 @@ This dictionary data contract design will allow the output just reference a dict It does require the client of the payload to have the same dictionary definition to make sense out of it when render the parser output on to screen. * Color Dictionary - - var kColors = [ +````javascript + const kColors = [ '#000000', // 0 '#ffffff', // 1 '#4c4c4c', // 2 @@ -296,11 +296,11 @@ It does require the client of the payload to have the same dictionary definition '#008000', // Last + 6 '#000000' // Last + 7 ]; - +```` * Style Dictionary: - - var _kFontFaces = [ +````javascript + const kFontFaces = [ "QuickType,Arial,Helvetica,sans-serif", // 00 - QuickType - sans-serif variable font "QuickType Condensed,Arial Narrow,Arial,Helvetica,sans-serif", // 01 - QuickType Condensed - thin sans-serif variable font "QuickTypePi", // 02 - QuickType Pi @@ -309,7 +309,7 @@ It does require the client of the payload to have the same dictionary definition "OCR B MT,Courier New,Courier,monospace" // 05 - OCR-B MT - OCR readable san-serif fixed font ]; - var _kFontStyles = [ + const kFontStyles = [ // Face Size Bold Italic StyleID(Comment) // ----- ---- ---- ----- ----------------- [0, 6, 0, 0], //00 @@ -374,7 +374,17 @@ It does require the client of the payload to have the same dictionary definition [5, 10, 0, 0], //59 [5, 12, 0, 0] //60 ]; - +```` +v1.3.0: to access these dictionary programactically, do either +````javascript + const {kColors, kFontFaces, kFontStyles} = require("./lib/pdfconst"); +```` +or via getters of your instanace of PDFParser: +````javascript + console.dir(this.pdfParser.colorDict); + console.dir(this.pdfParser.fontFaceDict); + console.dir(this.pdfParser.fontStyleDict); +```` ## Interactive Forms Elements @@ -875,6 +885,7 @@ In order to support this auto merging capability, text block objects have an add * Greater performance, near ~20% improvements with PDFs under _test_ directory * Better exception handling, fixes a few uncaught exception errors * More test coverage, 4 more test scripts added, see _package.json_ for details + * Easier access to dictionaries, including color, font face and font style, see Dictionary reference section for details * Refactor to ES6 class for major entry modules * Upgrade to Node v14.18.0 LTSs From ceb73d2757feccd06df1e2916ab1f1e90ead37f8 Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Sat, 16 Oct 2021 14:14:43 -0700 Subject: [PATCH 57/66] add testingWithTable.pdf for issue 242 --- base/display/canvas.js | 4 ++++ package.json | 5 +++-- readme.md | 2 +- test/pdf/misc/i242_testingWithTable.pdf | Bin 0 -> 48727 bytes 4 files changed, 8 insertions(+), 3 deletions(-) create mode 100644 test/pdf/misc/i242_testingWithTable.pdf diff --git a/base/display/canvas.js b/base/display/canvas.js index f0cf817b..09d31890 100755 --- a/base/display/canvas.js +++ b/base/display/canvas.js @@ -1621,6 +1621,10 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { }, endGroup: function CanvasGraphics_endGroup(group) { + //MQZ. make sure endGroup is always invoked after beginGroup + if (this.groupLevel == 0) + this.beginGroup(group); + this.groupLevel--; var groupCtx = this.ctx; this.ctx = this.groupStack.pop(); diff --git a/package.json b/package.json index 8572f532..c2490b1a 100644 --- a/package.json +++ b/package.json @@ -28,13 +28,14 @@ "main": "./pdfparser.js", "scripts": { "test": "cd ./test && sh p2j.forms.sh", - "test-misc": "cd ./test && sh p2j.one.sh misc . \"Expected: 4 success, 2 exception with stack trace\" ", + "test-misc": "cd ./test && sh p2j.one.sh misc . \"Expected: 5 success, 2 exception with stack trace\" ", "parse": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form", "parse-s": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s", "parse-t": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t", "parse-c": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c", "parse-m": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c -m", - "parse-r": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form -o ./test/target/fd/form -t -c -m -r" + "parse-r": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form -o ./test/target/fd/form -t -c -m -r", + "parse-242": "node --trace-deprecation pdf2json.js -f ./test/pdf/misc/i242_testingWithTable.pdf -o ./test/target/misc" }, "engines": { "node": ">=14.18.0", diff --git a/readme.md b/readme.md index 310258fc..bae722f4 100644 --- a/readme.md +++ b/readme.md @@ -31,7 +31,7 @@ After install, run command line: > npm run test-misc -It'll scan and parse 6 PDF files under *_./test/pdf/misc_*, also runs with *_-s -t -c -m_* command line options, generates primary output JSON, additional text content JSON, form fields JSON and merged text JSON file for 4 PDF fields, while catches exceptions with stack trace, one for _unsupported encryption algorithm_, another one for _Invalid XRef stream header_. +It'll scan and parse 7 PDF files under *_./test/pdf/misc_*, also runs with *_-s -t -c -m_* command line options, generates primary output JSON, additional text content JSON, form fields JSON and merged text JSON file for 5 PDF fields, while catches exceptions with stack trace, one for _unsupported encryption algorithm_, another one for _Invalid XRef stream header_. ### Test Streams After install, run command line: diff --git a/test/pdf/misc/i242_testingWithTable.pdf b/test/pdf/misc/i242_testingWithTable.pdf new file mode 100644 index 0000000000000000000000000000000000000000..b41278bf84f4b861b050b2a86488d55885e3fe70 GIT binary patch literal 48727 zcmcG!1yE(pvMz`;&^R>iE*sytyE_|qcXxMdoCX@V#@(TDXx!bkaks{O=<}Z&_rCYe zM9f6Y?5J3)Rz+52=9gdQl3f&vqT=*G1`b4u?VZV4L^vQ5keG?s-pC4(mzPo2($3V- z$CT3;hA_j7? z=<@R;n%bHCU5EI8`~B6y@!vW`O^xkMOcf2CY@I(eirCw@+S)l2vob0=nVMJ{yVyGs z1DW~x8O1GaTuhx9#cd2-O#hj}C?%@R%OWZw#KHmuvM>vaak8?qbBeNXaImogxrEt; zS%t-b{JcO8pa?6gFsm2`Gba-lD;F1=kcbegI46^kI4iRdD+j+Wqm-SAsfYGo&w;Fe zugvT}Tx9w%5yU8KYG>|ZLCgmHYmeK%{A6JS+1tB(NHG6PLiqz8Vjz%>QN`22lu-j@ zYR0Ip4J2kJ27X}R?BZlnd;Y(XbapXxa`A97HA94BLWHBB5K|CGg!?aa zKVSv2{x_^5#9Une5ws6DKVtchTRU@r(`f%mJfpb?S9bF#mUw5gB5b7E`}zCE`}e`_*XC#4b4rRKRo{@G9N<5E-Fr@rvC)& zFX=%4gh}LMj*Y!Jqnf3uyQ$MhXw6KWOzn(6<^q{G|BC%LeLu+lG4OArMC|QcOzm9$ zin0A`H4!I!2Vr{;?Z4{Tfy~4lT+F(R5>EE64u8q4%qVK=W@&5+k`QK8W>j%9v~zYa z{Hw_GA2a?X_%An1EDisUWj=D^L)U+K12T2CcXj$O@nc~z4;Kk#myb;N=u0Sbd|303 z#}dk%A5{Do6n|6Z!`Kf2@ef)9|N8LZ)CY_0jg?JZv>89ro>9fr!-Y}G_5(BFe@-I* zoTNUO`$6)*3W%8)mH!3fKgKwJT&oxwIsf(MFWvv*{Xe1l9|Zgl2`g^t^$}YD5lZiwN+_9{D1Obq_|zxFND7kw#`AL#q0X@6gJ zvh|qAboF(o5yg1ED{g!1=#M{s#5A%6)AMTzaV9f5ETJe1C07TDm-Y~gTF4^}s+~CE zt-*l&b=z}u3D4a%CGFxnY-Z8rq=hFLvL}vQE~e7Tl3F(5RB3hH6GrMJ31*1 z27cYefZ#=g%T=i(#}|@rFtxY#oBx=Pe@Xst>i>s${=14W1OG<>@yM)<=YfX+qJ%sP zzV#+W7%*h;Fk+#=Pr|?j8FdTO(cLsC`n>o%@QF5nrUZK?m`{B)$(0*chYRp;4R{D# zyeTF?8pan?@xfvf)yPOGG3Zgyu^^Xx^x*(O6KaVyX`$14Vh?j0weD2|tq>1`O^0(O zT~Ep~EevF~B%{(i`FVMm#>KSOKmGNbj_#EiX=N^nnJX6CMe4a-_d+?)`B7iK>{4^syfc)%9V31Eo{W4@w_N$i&RT`oAD`>I>tcqKbRRbuTYNnV2R;>6$PWA|;F?dH6|`IUWKW5QsoI z7lIA}{kh{aE_^O3`r-zPE{3im6{K`cUX*Talqr4l@iEO3rda&gd-n4Fw0=IS2;+)M zzU}0B1)IrXX1BK9X-_{^(D%4v7)V3xrSa_wE1{z8|&(J|WK3g|wE71>zxNMWJW#PYrL;JvM=bE4>lV|DS? zRF#VN;&aB6%rW3v(@g?TTcc?UI@#7M%k%`A=sITvLB>zBqtR=ki3@6P(>-hcTad$Sb=HA@dJKvwZI`ZtD?fH*4u#u>3 z_vd-L_}i7p zSpKIa>ODL58TzX6@Hkw<7Q8`FGsOtx2HoAj?6GjDJ<8 z6XonH?rX${5g!s_bVmQO=dW-l{U2wu|9$jd0!tTz5AS34!xH~3{XZstbO&WEjUWST zh!48%qP^G=v1ui2~M^dqQVBNp?HsP=P z`gRnJ^A!4iu6n;CPU?3;aZfcU=?^KPDp9t0sgnJx?b6ZbOU}sR72?{cb6TK(@%M-G z-L9h0hA(fc+HgFLSH_c^doCC1$R+x3QBCpD89 z=gW!*>lM>vx-#A+skA_HsexE^eWLK|)3&erQY-(cRHW zWVI0Y-TRbcvYzZhgl#>3#{*YuKB@OW8B$W|2E=W41qb6j@k+Frqte^dpIaehj zQ>6GeL~o-;5CAK+>s7O$8X ztNk7ZxISt~H8=0V_CmHNwZ3ks(Abw*MK#o+YgQ=yY(o_=q@NnWnrJi)G+S2S5{3b) z^C8wyeK&c7 zl&7Nizm%t*BGova!zpAH&jj=B;;EAiQ0{AKZz`ej=8sUST9gxPQ0!3^Pz&UMz>_0v z(iGupVPx9nTPn1?nv>5ZV>tnBDrVlIU0PaJ+G*+ZO?CC(t%dbHq)ZJee$Jq!Zrz$h z`rRFFSQ!D+_EbYT%PE@Ac`4=ze~#^GCve074P1Z!e=pw0vLkU=wNFw%PVOOWH|G913f!G)%2A|IoO# zUO`N?M_$4eOE!xMH?R`ICaP_;%noYQCzsFSYqb`|u#B50@K0E+p#NDu6yjP5_W7D% zwc?VrlUV3_nKra2Ti6yNlM!!>#)Zrz)|W||Sn)jq_D<}?mr_58QM{Hq6|@D{a}72k z?$|^hrrlhRVH>-+X4~GvHgd35+Q`bUQ-?zlTHVqco*iX0$QE)i-TgW!{yRsdmOku< z?-?3J7;G0oNNCm|q?0UM`#ePo zaZekq;wZNU=BIUVN^T;<)yB%^Cd%o%#XX>*5V+=w_$7F$tLrExX0_XV92WBH7vUm> zH!qS(tMsMi`bZo5{Ek z(_kTPYg;&1H9Oxd4~$43weLYUo9A2gRDsCSMT&X`yj%vPMBOSS=LP`lOey3c@F zEj>A;W*V(-T;(__iM&B9-2?_ffH6oltE&x={xQ1K&~|DLf3*V_vDmQS5p~9zw!MW; zZzf}~A2A#jYZ3t#^y^e7l7tD}anY!H%ncg1mcF+=Qzs{8Gpk_FH~8LB*7rW zDp8v}e)>MHdq0AW8>waGT(7ZeVZ@y^41 z1|OT$`ojL%FH;k?uyFa0`1oxgp$EG^US%hupLH2Y54kJtU|Y+_V=cxtPk(7v1#4%> z$l_OZ;%w&^iG4a$E^kkT^_J0z@-~34`d#YbeduIGskWs`u_0+^TUwqaY4}B2tn3S- z(ytO5rTEx@!*P836BXk-VB8(wq)e!sR{7!bLu5CnrYMhs@Yp?cQh|?2Lp* z;F)4$w{5p|Kz3GWGABzuzd6#LquQrAh^PQsaJ~`#p!U@%Qgge#o-?IkA9Bna2*5em1aNCLGYSqiWz`XvFf}7hNMS&j6i@{&2OSE8@NL@# z04?K~jvicqO)b`m4q0%>C?)`SlAe}`+yLDqS}X`139w!GHL-^wR1U5j zy%wZV@by~{omd095-7ERN!l=8m@!lyP8CiJjsXfDo-iR#H!>geLfwxFK$5u%Mh`D| z(2Kk%;H2&sD0ondv@dwjimWbp07XKAPE)qw0sJZ37a$+X{$kLJT%Byxd=e&l_Dhk9g}Fzfro0OdVz;#WJ-aDQY0U! zow)xR#7os*58@^3p9S$!^e2LNN&4*oZ*=`TAYRIT1_0srZBzhZ@-_*8Fliec;3hpy z*^dM`Pub=NG|AM;MdqScf<6^At4DelG;2pzf^5ERD}dH0`m;bb$=gT(yOeEqz#Dac zC1{PbpAuk~Y?P1AA?5gSkaX-oUzBpJL|>G2Y(!U=ax6qwm%M@jFr*j-qo+tYmZF~& zYEbo4fEpwnYtgZ!YSkjyKo!ZtVSGCO@0(8`I`iAs|G({Yi}|0nvc>#=(?VB6ER-1# z%n%$&%#h5;115^+Jby(C<(f%n$+*ZfkwG^^f@mWqD7AzdcvJb&8|YH?K5I!ggjfLg z==0`;^PqM!iDZeOtw_@)SW_a$NUX@YX;ak@(&bpAN&+n?m3)~hKdVTTYUSbP;fT(Q zs*27=6e)F-CZ!9rrdfPZqELdGP7_6ocoE$bMf*B{_8m2b3|EFV%z||qM>PMdICNHF zE+`LG=sxOjZ^4;YqK=|&BlW17!#FY~B#}ROS(I3n z(D%~5MvI0SjBv*Dr-GnoP)m1JsQV*Q#d$^NsxY=B0Jmh*sCm6f_mG81sJ;p}1k=9^ zoVX5&e(l2M+5>J8rx!)_l{Qk6xCI+>4&g_hGY>CzU!o6{^SV%dg~j-b<|LAyXbbtt zS0rx0_c|1A2)Q;Xp5Ui>MD3No{iMF7oZc{)Nz3I3l0p-zM(YrtP>;|f@k%?Sp!q{O zP3Y7qc*8sGEIJALX8tvs@<3Ts;R~NoL&o8x${$B*KJkXAL!}Y#kV8W!e$gA^X-H9? zh?SCEB1ZAJabTfZM-=E;e2C>!IIe$V)7SJ004wTh}W_E4+L0kN=(vNiHhdZaB#>k)E# zFX?CCA#!Xuo0RsBE6~r zMinJXYCTI5`HwQ~zbOetz(3WTE5d7NGey#LpPIV^F?E=m4|9k>!yIX0G8NbykOH#$ zH0|J&{E6ToATl_%F*h%FM@P|cY8n9-Gny70k>U?31_~nVz}D-8Lh;=1+>S3UxYN10 zx;8O^qK`t)4pRZS9T0N@7CE1eB|Lp2o<5ze3w4y5ROjjI_QKDNv5INBI(ihB+$OG8}>!>SW6MO@D17f2gb8p4^XwM7s zrL}PSHl1SdmgW-fW)!pfgyOfD0rDUHfD6x2{6*)fNnruV*9Oz36Tm3lVq!A%B3g1< z)q2UZdBZ*DCH8Olvz~+eZnExP;I%GtKb->~cKy_?;27zmd@kY=`1@tK72>rc-@>vGTU)=`i6i#Y*p0X%`@-2~ld`-D}i-Tt5K#Im%zGsetY z?7}ua`=U9J-5^?=cr~~Zth8cwS_hN|mIu%V(gq0RZtPjui`-x@nob+*rif3vRK|6# zH!^tDTQ{bkwJMIg>vmu39z18f9@tvm<(%m637C)4ba`TMR6AASHXfg)f!*r^&bmhUeiZ;P8)4F1J##| zR*mkB*v!892l{U%L-{ostOe~u1wysXLcsHD*qkL5ylm2q^fUi z2XvAKJwp^Jzc41-w0WRJbhYzX^Pex}%cg2CQ2VuJ{pJkRbXRHKH*}B1j&^7}1ytTx za=n8Mp;7HC9H8$$wUS<;>OPD*Aq@{&6@_W{N#gy2vsgWtQU`g;i%5MG!fc=QmUpRvLCa`A9`is7iiV6e;gF0b5vQ(uxf{HrYC zL;SlcfptQ4f_Fl9LUevY0(%Z*444e;g3JNWf$)dq2j_?8hj@eY`Fsk=54Hib0l5M7 z2GI`Lj@S;>4tol<0j39|2dM|82d)RL2ch@r4vZ0O5^@rJ5@Hfe0a5{60YU++El>b5 z4l)kBEkGhrA|O04JOCyTCLl`?uLiQ2s2O<)$p$h=kQN2(9O@kW9J&cYJ3s*39o!uv zBakOxHE=aR0OBXaPq2)DjKG*~x$c;5#O~W}*Y3t{w(j|E)$Z@z7~Mnxqk)Lsrrm|z zUIqjXuu~8^2s)oCpccT&HP5EZVy`D1-?%0O1|JVN-|-J}c3zw_{r~R=A0fcp!3GJs z9bp{S!14b-?f#eU`boz-t_l9Z0Q|C zFJ8Pe{7uhBZ=E%wTZ3^lc#VJb{&+oe2eW-z+Tm3v#;A5Cx(>1V0ZH|U)S!=A*Z31> zTcbg3F|Qrs;^&LSA(LKd1Uo?oboTIw&B)m{6c zwM1b*Y#*AQVs3aN zXI8yF!vVtnS8=N954%uG*ZQKkd;3%BHYu6+Z|ehggIYEVNhW{Rz&p~`BkEtNnnG>+ z7lFR*q@CgGm6bXHjdjGU15L&h7v?+~8T$gN8w!s`qBg;EjPE=o zWQT8gv!1S3K2pDOKC{f^p%-SGC~Sp!CG!ntn={r?#joW1^2f}{oMPP3td7zvGS*LC zdM0%gynpSl`|)t~4nB4rV}qM2v9LvT z*D@&WgcY^mJA`lJeaPbq*E~A?^A_>!XIQ6+*IGLKyt>0^>DWghnzlh`qpVrczCx)d zmXl*hzjgpIb-K4m@+NG2#Pg~}#)ML1FQw#2)r?dWbMoc8=R zeII*1%=5JI)*R^h?77kQys>Kj>=oy0pqJxYeReWGd*oc8<;I$4B`_H~;8$kon{UO4 za_g%U1I6Fp+=&`Pj^&|z?{UBDdG9eD{nNZ!+HSJJ+M$m|Jiqt%QBwKMN#<;qLe7Rr zlAo_LZ`dDXCN6IWyH+JOT$PUDCDZO2`Fz|co(|iFVk+{Y!8BRNqX^g|x1DMCo7(WM=|Muk z92;Q^h%J-7kXWXJeWlm%@&C3-#U)q2PPm=818mf zb3J`jn|-B({Aa_%lj3?!?NyYn+xi*aLoeGs=Pr35$^JSzF3xJF<0sp|Nn*dhsl6yr z_R7vaFjX_1DmatMR`_g76ylw_(qiMG0lXz^OuQp6eT-5$l4^0jw0PCb$)t$fJu+^a zB0q!bHnMKT*yt4C7?DbDojISa&91U;5e+7S`L2kGg#*f}{_;l=Q?@jlI69xC_dD{q#a&| z%*$7;K;)gH$5(7L`(}bSJc#>C)NB;d{QgoywPa$=_`=~1!<$V+YTK8Sy@b65x_Wp^ zT52Sk190Z6;w;=sM$&7jwi51~*qfDb8q4A4&fe^8Mj>%k=+!xpc|0POX6!mJSYRZ7 z>ji9MiPN!+W-sG5ZUD#o(~9&?uwG{bYUA|hRqi0m*N)_(;pYRlHb5j$yEj7Qt~INL z=1K5P{b|C3_71CT!z$u|h_J=R6tdWjq6hi+!YJtMhweSeq}^BR9o?qFl>V<$>zf$| zo?Q$3m$22gKbQQ3L|Un9JCtaHxUTP#R~fFCZcIk<(*YCbdb&ydbAA-Iz;H#(M7=oV zTj4|s>Ecy_95W7rGh&POb>6CpiV}LQZP+`ymB823M6$E@JPpbd z#Cl&D80DC&EAFhwO@;J&{wF%qMFQ0H2Hf_WC!~uk+n6O|!j%RODze*FA<+9^6es=Y#2Y<_J zR%!b-VBEm9vkj^!x;uzzn)C3E9AENln@-M)K@h<53H|1|#A$2d)rPI9lfHavVk!TM z?B<a^mr=8xNsFE(NFeAW98RMWLqvKTFZqu{O#@&c86HxfuAZH4m)}sJYUwCxM#O%S6A3KSeSRQM~Pb@<;%{(r+m`^bP?ps zJsJ0@kh<~J77t_7e4#`>F|t+u<2(Jz3!{AVaEwF$Ry-zj8_Q0(xZbB3CNl(fX@jjQ0;HXcYrlfYW)c6=l#T-@L;Yzdu8Q3;_23o1qGuzC2 zmp-M?ZeZwX>xgmD#mUVqETED3tph&ZmHkSyDO}4(dw%;Uy}NjRbsy>6Xy{pOo$t5k z&-35z;k@c_e1Qnx?7EaHJ!hjZa9u|-_71!A>*n4Gl9hMlk$n-#@6F0gNHdT=PK}{j zg^{ZCo2{L+evku&OAJXEhQQL%;s+a;OgC}c(rIV|!RSK?bEXz!!3>j5Ws(lTpJt{Z zZ?z!-kno|Gt(;@Uhw8LkUl(v#sJYzE-ml4TFuHIiFM6H`z4?RExj)goYTd4=^#)Eq zZIz;J$q0lnHl)3|UOG8jz1?h;^0gJ^vqV~bWlqPRsF;gpkeW`dnp1Iak2sLe~91q$h# zN4rxMHt$4BKT~ikVD)7xSv|W(UcFDL5>S9!@G(QOdyv$_AsxFiYN3Rqh+;_oey znV}>r9N-_%3aBkqfd%>uRQ@;ZAcOpi&uz=9b{E^$ayb%xff>XaBu}w<>@7V9zYd)n zi=7`F%h00+jm9}$v@I8nz2(_e8TUoUx4=uCqwu+B)zSWJN4(FmtsM8f+3p9@KNjwk z>xUIAZO>)be#_z&qB}!Y8OXS}Cilo8y$oq%<6|}KSyp2hPx;+5sgZI#SGmFZUS7o@ zaZ7Mm%63$Cp$3&a<95YHyh>CSIG=pJ?Ozy>47w!{vxgTe?=*4!ZnJZR?>D|`b)Nla z2PvF-yFBP3QJm!X_7fCZ=QJ+hCfUxmQ^4W={ETQBblIrT;8izG%we7PmZFHAqTpM{ zKEGe5ELg&6Yxw4FMSP*BRUbWgAJn-tRoPJ(6Kso*MmH3}vBq#}zIEPUeMDQuyH+gV z^|)DkFBpO?K{(@Z)N3=@CtaJ+53|@pn%qNbm3(A%rmHXTZ3BKRK!9o)-fax&w6_BH zvM}lz@%L#NF=HTit~l7&nWM6m#R`?#Mt6+HHiEmwS#;;w388d|r-0s=(~gFhH1w)y z8uB~6#SMiCbo(@m1Hq@Dz@-4n4lBQ#hupyal@kJ$1AQmv3f5i3aFs?K6_FeUjQh>V^FTu8VI6aOVtt!u z{^|i{o|t}Xaun4HbdwO{0o0Q!Gk7J{gCM8 z-+@>YG{@gD24pu?f`{{)PCctmdV zDL((yv-|D(qOWr1FEKt&ZmploxEapUklzQfp&#_93R|Q`F){K3tx)X7y$&j3u=4W= zDHEg!w_zDrx?2zWuL>fZGY`zWBb~lzC-@5wRDdHz#YyOw$fPL{U@=2WG*HHaDk#3+97*k z7aBu)mLByBX8VLE-+4gc$-~X%k0YD2%gqZO$|iYLZ@1{CI`L$7#q7=)Zy6p!O`_*E z1%%Uc__3-pzNsA(%>mM29Q3;T<{U$uvdmbPs$hZwn5YAKThcgrkz{LOFeWZDWy z<2Q(4IJPj%UI|NaeepDZTvf#by|6|+$`qMZQy?sO9%L%W#Aatonyg)coQ0w5wIm$g1A=rvtM?jys~vv zXN{HX^U>~j%Q*+>YA!Uz&JZxL1^rZ2v}0HuE@@e*FtxWr*eZeqq^>qFH3V?{j7F=v zHj*q>JztX9kVjNxz@`xKUJKv4a7*l~nnPv=HVoo;(cN*_xNX|aopyGEi`@zR-hOQT z@{9g4#-z!roC0mm?s~yXiOgG5JeYiJ4|O;J>5>D6880q9ltI&4>eHgfuQ+h_MR1Y< zeCYJfAPh3zl(-e=v$uF-=osnKISb@bmyEF`*fC44X{(9GoB_hWl4$f@}PB8d3 z0fJelbJvz5?l^%vZHPmmGr<6xw9trc@23K&k{K)ebtE9tH z6^uk=pJ!zK7`yg`R@!E4+ecZ5E)%ML8bl}vu8*uXEbk`DIRel=vyX7#+evi zh+2QGU6g=DO^vpzL#WRlDuLIB-#^DyH4`?6bj3$9==o_jIr4I&vayZyo@?YYk;*xc zS%ugI1MHuCu58D>(ISo>Yk!{l9~nGpibvwYo<#3IZMvzW&n z)V@miZNqzXs5=tpaw7;G{8vc&=G}CRRQ>lxZ|{h%6q^s!2;$;C=a3z44D|pN+Yv-JP)D~ zq3$ZHEvaibaydqv{J-ZE{Q^7k>L?YWk==Dwg4;l08afP7%?hS1Mr2GK6DNsxh`aT{ zdQ!~R7I&0t!JQl$I_pj2|Mdwhw1a%A9FGqN%6#?}!nVjED6w4-(;NA`6?5BqYlG?i zTZ|N~EttnD1rg%2?MkhadB9CV;{ zh)?qn0q>Q)<4974EHzScRo_dUr)6QoFtQRuCx0w1Z4#j^f%j>t_)ae+2S=Zzfo)g0 z&as3Z8Ay3#^kZI0J7cG-DB|2&9iFc7Xv@=G0(eyOo4Q}d&cpVh_GQJwGG7NQiSm=a}v1lzCH`m33)n7F%BUzczOj(h~0 z19T^)@2g~Sr`#=gPv^lbERDl*zkjl{$)C{(vE*6X*kV0!r|OCNGd@E|AQSO&{qfj& z`>~#Z#2y>F#tsw!a&0@PQ{|xr^dwBS4nF+Yf(AyHB!r~BMrH~9I4;0aOWAVIPeX~)?)MtzCY=O6-p?@%Uew;@_DW)UH0NUx6MVtM7?MEU}| z5JfN4;uWAH79}*!p;{X|PKbw$Yg@x$z{2gEAG+cTDM(^4oDnpVP}vYEsA)3^4$pSE zaPX$W!@>E(wv4&BI{C3s-7ZglbHEYJ)}c{B{mtelSQ^U|;01pQz*8qwo|{D|nmu?f zC@Tr}5QsSYWsdTP&X8=JEPHEsp=`bl#Ryp~Y>L$j7NbK-pVTR* z%isWh=kUoRDs9_hOdAi)mSt7xW^`a}d$6Pc*K&UD@YUX)&PzTq;5t_4NX zJY(7{8>S8;;C=j=pUla_4b-V%)O+JA@N?x02OrGI3I2F6#Jw!HXzyyc~T>my{DiVi^v z!E;0pT+i4E9DX~Vzer%O`^~Gy5q4tJiw5}5si<97dbYhz#97KL^H4D*TKm3jaz)tb zAKY6QzU2WojVfyXR{Xdna9WY{#pP^cdXfD*m^$wv0#-l$I8SXey8O%PMva|sk~fb9 zniGplgK8F(Tv{lZ`BsgmA3KNh zPwE?h%{O-o+J&*xeWx3Orn@su6WnGlrirjWer~Fl2obdr91bQDbDRh*kP@k02eZFx z9U`MdLE!{>CUI?vEVxED%`ZG5Zs|3%(D1M$Ax(x*W!j^rFEyNgXT%gja@9lZvI1Xd zFEht<<105A?5U}jrQL$(h;{5!9CMcDoEbNrwb~a~^B5Y3|N7(w_nUNTWqxUiMK{axp(7gUP0<+xM(a?PqWrh^EM7yHzwdlI z>eF*QXjtdi@)w58Dy4FP}M!FiI23QY9b~Z0Wm}IoMRC#?w*5%2dI}S3oPd z>Z*2~!6CLO$o9&aA%2B zhP~z$no~o9x9XOhJ>c`?)t5N!2GqoL0o6Gm?A2`%Zm%bntA+-L4h2E)bGE-{Y4cfN zX;0CwAPeSrg#LmPJRT6FZFY0JTWif6XV zX@#Q_kj$w=|1DsBT#0bl7(OMx7DS37y~ytN^8$OkhCR@@k`zYW^ee+OA%n-x87v^_ zSP%Ji5bTbW5{-~N%7xUw@(+ykVTixkXV`Gm`18pTe(Rega?g+*5EA|FE`IW88gKCz z`0om7NIT&-D;yQw5MQ@=_pKmMzklqVoZ_OwSC7X}d3HrIcbpQ~;wmJ^l>M#%IL7vZ z)yZ+y&|F7t-HH~mN#n1`cIY;V6F7{Wa5ozm4eV>4>Ds`j9>`nQ?3%WzcR4%9Nv&=r z(n~pvTwY;L^ZN7*HOjm%%nC3&E?qNFL+}Ra)yw_v)$@+pJfc8rB;(Cvp6L)A&c(BK=Z8ydsS=%5tdSH=DXO<%PS zs_D!bt_?T9%D2$!;jIX)TZYIL%5z$J-`(ZjwzRcyL;JY$8ow+kOk5+g>_g`8OLV2etos~;Z(W{UD*ETeDFa;aU)C0ltk&*tszBkrgfxSW9;^8 z0zrX1NhGeK{Yx&$muo}9?E5~I>ik&?Q`=@ z?PXL3wK*z+L?TEfq2RZ_b90h-e@Md1KP5ptajPHo9?$cS&U*~qB9_nRYDNv7JFKR} zt|s_a)_Q4AnJ{Y!A1y?>9IN=8-Ua?xcYT*Y42IkS^~K_#{G{DT{DMPk?e49MnM^64 ziK~?=O%c}g{qcvRMBZ*VK2wk{vTy7Ssi{dmE~E{6A1Om#hABS=JdS{FGuaK!6-z1P zhDb7`6`!uIu-lIZw^dJieLDcQKqdOQWGH`rf#_vrqe}Tvxmr@F(YSD;M@NvcCk2dn z4&=vn-C3mYIpOtwt+6q_g8o@jFhd2PU`{c9rLk?SS|BCUc9*sd<=B`Ps1vDp@j#~x zavCr9W!j$0SI@kqu4>)3AZ8yk-m++F)6n}f)=Z2nv39)vFZWkU<`^Vm*uJo;j}LCd z+}AeNxISWhe3!^hibK2Chm?K66%rgH5hYtDw}M7b1#}e{OoqG7Y!X|h5n`HZ32`Y{ z1#*W)J9Qz_4#2{1IN#32kB=S)f{Mrj5Lx{=H%pGoFBm%i;8v#)HZSU_E+RCjAyQ#D zKpH+31bxPDh8m5U{PX7nZdHc{EWof2sVtdUj$>GGlct9Y1qC;#(@p$!Cfu z8Ghgb1BKV?eml?F9!mMqi{!M(mkCC6ns&ww5Vd3H(2T90fZ1hVe~pQ&^l>EnsrM3$ z6c0#np|mNpMhMmm6}qGz+F6N0~J zJlWBpmX50g7O#MxqLaOq>=V@$b&x)xJ;dDysE1HUa7dyem0jG{?}=qwY5nFeZZ^67 z;z4jPqsVx=$m?yYYILdHbBvj;K-XLu!lg0;r&*U-8uuUtv2itf#v4p~7M zDlqA>tgaQH#9N7yexRaVY%g3bCNxDZbaa%L`_20F`qxtvp*!Vw2Ub&I4N-<&hV#SZ z1MamPR+tsi>2Eeh)TjVx&KLJycScOfX(xLPcoD9?q^;5{#VJT=XbXo3;#F$Qw`QMv`17cJJ~Z^!T$w-K!3lAu75_#NEz0(h;ER52%9?o^VVOt zVy)#9t|}2-Fdct^f0@U4@dOm-%8H-)E0_Gp(bm*)MQSa8p3(9NMeWO*j;mA@#C@I+ zPv|n%rmEG2Kl9gTo@GFm3@dfhCQQIf0Yn;+`fR(@l?$*WPEZDuFHw$WYpL33JtmS&#^X^AX%RMV>OueFpMW13}#&nN$o> zf#=pSu`Rg|;hD)t`sD>WnC050ae2W^-Cehp8p#a0Xjwbm^m4oGZlehnujX z|E>AkQtOrHI1a|LFv!hGLjrjFZZ(!Um_^Ke2H{y|5$!wU-2#-NUcYofU0Mq3Ev)Q? zSI#QxCCIql7p|ziP$a1Z`ASru(#1%VAUQ-yVEzDCBj6-MqbUQ+J8f{msEcMHmW&CX zJ;@l%ekbqZS@N40fuRs#_>4Od1b@^}L~6)kP47i)-??HGd?D3E?4h%aBvn0&XjP^= zco8RaTe<2Y+IgCsN8rRaWJMqw0&1bJ^oLdgdgRnDjMD5w!cI>spqCTQazlm#x~;7F z*9+j_t*%~XiMHbaA@;eSgX-$B1UW;8%^lS5*J)yhmf+_bRZh$CRhOzxH~gr{;GH}}zR)za_m0uL8|u@Y;LV-{`qZhhD`;=GpUa2A z7t#*@wGBqhlORS{&!o0;?W-(Jqq(ci3^Yu^md_}lDcOF7fvsLca;0^}u2~PcVq8xW zG%&7qi(sX2la)oCV{@(Cj$J*w#@oyULlY*kK6j`-`;pm1vYg8V6eGqQ?1yTY@V8lP06vTshEsgJd4or~?q<`?0C=pJ`GjYKQ;f_TGif zomL}>Tg-I(%z=Tv-2+&DaR}66wi8_}fk^M|ijYPd?yxMM&OW%ey5EzM`W?>-e1%XK4IgPE9Ry>fc}5LA+BMe6|r zQ2W#RjH&t940t@8K<9Mqjbm6UmDmqbSHG929w#S zls45v_EcZKxOp&3fz%WN%2fcMQ};|K{p}Ok?(KaMG&OSfx-92#!*!J{lFQ|Hx$}LA z)L_m2!zOgvDjI3Y^*M_qK%lch=fA^k+>tk?NZTD1QQIUIti3 z4q4NTG`45`JP0z3-L{Cz=fZoE{BLz#gIw+W1-QBo(lxsrhAm{10@{%LZVFN zGsPRmnG8WQdiXj>6t~ zfJUtW2QLC2{I|FPhlxD(-XV%}UA~O$0dG|T_C(?@({S(BPM}LUpi3nFeR`Rh5)R-G zirQNTF8 znAB$@llp9A5-yQq;1SrP#qWfjoDBTQ@Ng{x=mL%x&ww#`IAO`DpPT1jfJ1=euVota z_#%(;(1q2{gx4jxMU-k93G`y~+I->#xSYPZ+rPCipGMN?{PKTI7gv8q0Yi{*j1_yi zo|F5FlWF`%r%W1|=AfG}7*M#O^N)`~GOcrI)mQwBDid?EjHdl)(ucBKiN{ z^;|{T|F~&i)#hzqTT-_$!7XDLVRUxQ-rl9To`(-yz=}6_J&BOf<`=9YXAvS{r|Np{ zd+ez@Ro4UAJMcXxpzlG4HQzG_fc8S5=g?LofU*_NiMOg~w?edA7qNEP1fA2A(~eF8 z!MrZVK$95Um9y-xj9hazUG3?S)XgE9IRIyteFiZy+MiH43+{zjbhWqWmSlnAwgcf= zy5M@7E*TDz??P3x(9J_O!IOW2OH8MMoj~EHFQ%Kb@v4VR!-axXZA}Rjwnt{IT_}V9 zsSfj&ZS#K-pZRBI9e^^}J2xDVS7#mQowJYatq8t=MNK=rQWW^i4Q-Xp@+zM>TS>qy zKB1-^RuH7@KvO{4q5Dj#eeLalb%e3+p?49##3%y^EcJ8X%sBRa>{jA`f-{tUrWZSp z&8laN@Qlh$3g}(fXtQbtxC#1|vnn-NMD1t1_Y(K%RWpsA=v6a2NLJvb<*FHRvux!q zqs;>=?9?fX%|u`XO+AM*K;v6nX2T-LO8_!7g&kf?VJ29jFq?=2r~zd128sYrIDWZ| z-HJ^hWn@fy!WQ)ZJd+J(0h?OH`eYOB_)+2rTly#bD57s((zuAqy3Gm4j}&kjFa47O z8aSIfmIyd|d_^8|!&F_wZe=Y?DTZeV+-NpE(bf|($z9Q|tOR#(;G`|pHIk^`QVW=K zn?`R#r&#M_KEHt#c}}$R%%=-ea@m>hZV_8ZtAi6gHkV+LI;OJW!3`f6*rU!U_g+q7 zv)~PNanM+osB70`9{+yo4b1=_0}K=M;K19nXZvo zZNA4hHJKPJd4-P6-4e_56k~PRM6a!d@2>dr5=UBqhiqZVzOG!^?y&z~_PzwLsVe*X zy_bD!(x&@DU-y=#?dx93*3uojk(RPK&@@TgK$^rP-JFWGA{0SI2Lb(Nlto97kwx6s zDhMbl4k$9C4yfY}f;tMy_MLm*dudu0(V6-GXTC4*$je>MJ->6#J@?%A`d(salqx_a zQ-%ckMdnnbR+==FASx4m13MG$y>F)Of_N_A<9Tb590&B-$QwmBjfiGQQ4r2&t+?Zg ze*yNy^378Jh#+l*KaOP~2{+Ww3T3JwO;DIxp{pxJrKBs8qv(6e|?BBxH?I<)z2wCWS+85D7$nNx8*w1q}re z{wY=Y^T;SaRc>S$L^a&o1*m1$$K|B!!jiHyS~M~Kfq|O90A)m0X-drGlA5%79Mx$T zzM(W=>onp{KB{*VsVZyYD+}W*E8`0V=;_!yMKqzz%}voTB=c}dbPAcWF@NNSdUZf__;Fzfe;#HD_E@ zPD+$m2upz!h~)8kC5Z+4{0MDa_L$^}DZx*tP0fjt`T1+Y!XpCIaOV*g;~x?qt`aE& z{nT28FgqP)L6GTN|wfJ$)1Uk{xcHn{GD6o1~T1%4(vhR9LEEasqR#eQr~AX--aQ zwaMLQl|;_GHZ*dov#Yji%JmD&7iP`3<=Mh#&mA*2K;IPHL={dFPm+&K_e<|uXq($K zDLs8s(_GuaE=gi*OKc*M@kYiQ{_Gb7GVq_6-^jZ1i9{m;e#r$_3)PBJc(dSSf!gV3FkJ7_6j##<}{a7Q-lq{$Xk&_k1~oef=T#F|j5pKpUYEqxb)VlVd`J zQi)2X@R!OX1O0+j644=%1idIH5uJN8bK1CQsaz?N2Zkp`NJS#0G$aZCDiUV{F`j6~ zI;Mr8|G`ll`jM!aj}mdYpwZ*$Aqv5mWK~&KY^po*7858CpaR}gu!;^C;5Vuse{#;U_Tl8K1oV6q0p+9jM0{d z&|wBC_r>Ff;co2y2Kld%A3J6z= zMM{4ac!HZfB9{MM4>6jpP=9g2t|+i*yVAw~uE#m_HK51EQ31QGe9wqq!W;2AoYxO~ zP*urArS(_v(!GqN(iU}1n8BK&1B*i61jKt;K~8NB$l6t zUp#@k@?fe!r8r+MSNkcpJQ=K!EB8sm!(xJ0DO98}EHE%EJcw#;R|ZCh1W6W(L~{AL zLi7uS3FJ8H8euo&=ja_8L9F&EYTpiPtRT%BJOh8_V;FtJ#y|W6pZvy8U8C|lALg(1 z3w$8AJjH+NHE|O&<77b+KUs2NMRIZTwJ8<~CJ~j~I7N zP*_lwHdHAI3R1_8t4>X-oxPxBDQKBU#!yv4Cs9RAcrf4U|X&Bh$2@+WO*(T5Wi!HaICiHasmhFgPqYAUq^kE)5R!YcnL3IkPR;&q7NfaB! zA%Rf=eNx5IApd~CpaAm6CPhGKV1T$?D3nUiJsNxk#65J3l#P9(v7dPqzFiyA{6 zepE*3*b2p|keZF+1h$8(3zLxG^9%gc3jd|Laj||ACymKYj*|L|6_QwHd`w<_ZkQ%@ zYGD^SG5E8wn(**Q_0qJ837Nqe6a9nyQ~U#E;sAeT1XC0rQ&cr`oD*xh8SXt65G6$R zqebDd5F1Ze!^1@xg=1g9S{71=2@w%U?R(U!h9rH6n4F!8RmQ&n^|PUocj?H$Z?aLr z31uylV#jBUf!v^$M5Y!d)J~geD3z&I^5XKDsz@Cjou3MwQZO%$nGj=Yt(mO<4cVQ3 zOWdP06_6#7Ip5Tn7gSXr*Gf$(vC(Wu_k`or!;;ty}ml z5k@Ep|LscZS!y@frlf`lwBrQj#$*MtKhr1tSS^z!&S*C0EL|VwA1+YS5&i^uVWa3a zp+@vAp&Y|pOM@_F_P~I z0U_N^Py|Xyf|#I~a;odxt&;CsaaMcrf1b#^OVH_mjwtItMW0+93354Fe=!LO;R>Ea zB&NJkRE%lm#UzfrDq4g{;?L1z!z2~_Q}oU+BiSOB6%9X&ps>iJCJea1@!wd>y@RB+*Jbkd7Qh7W!<=<0C2d z#<;@xx=}PU-Vi_kvh-|%F5$rCC^05+byC$R(kIPHGWqELB}(4@E9hxVFO8sAedHq_ z`N&5;^3iBYiNLgW1fBGek9_1KANj~fKKiex^FH#Ck9_1KANj~fKKfVD*l}Z*rz(FD zRi)mi3%H0fG1dCWM?UhAk9_1KANj~fKJt-|eB`5FK=XX$qyK*if3GZ!iiNWP6)6pF z5#VoU`{5cYQ-q&z8&?((@xm9lve1i1M1%@I;>u#Lb_vlX{D~_|iLs)ETvLpO6}DG?N$!Ifo%CiGLTEGO$d{0bs9^arl2B!a?XxUxzj z2+QNjenbNB2nbRr1LXol4zA4lnJu$^X3MOf*|N}!hxIdC7JId`erC(8pV>0&XSU4x znJu$^X3MOf*|N$nlpe>GSwC+gXd;VXh)kjYN;QOqFcMC}PPpJ-E8&K=5-2$d2R<91 z#R4T8kq$k@gcXP;^w8Euv_qc@*Ua!~2JS96n}AbAR6?l*TFgW@aMXb`GstKpdU1&+ zszGuu$nC_4Rw%V$O=$RM2cBLS%VW?Uof#qfP-Whi3J?* zRH(H>E9&h8{ahY<8ll~SZLnTepH}P#nwSFh7U)5(2JD5AHnMW;oTW5I>4aV*wwL#A zH;i@SmQLU@VXx89-j3U9h$_$tdB=js*|1;7;6t){q1Fv0 zH=YL<*w+GOE7r-0eSz$?z`2d{6f4z@ZDKKP*dim=)P_-9c%G{8th7Q4ipNguK^K;5 z=H|kJZDM`l!cpM@sRk}y(Tq8`b{?ezn;;y=OBCOY6WAda)lmxmD`zZ$A2LTqGm=*?Do|Xy9YijOQ9fuES#ut799-!Ol>ZXO;{&M$iiA7-1Kmd5oZm0plAv{B)dM+VEVVo{RHU;YC(AagoDEejZ54 zg3NLGNOxmQ6OKfLvcNM3!z=URJlV#@wZp@WBA1L;`0c66yMI$>dik@^X<}#q%i({w_M-iHvUT8Id?yQe_ zsjN&Z)&XhIinV3)94qTmG3vr`?!dOQUf{=}S)GM3*i6%lJO453Enu@~H};beucu4-tedmcf>)XquPJurU+jA{hTU0` zfj?zL1YcS@tHZDLt@rw1cXyiGpWS%oj3f5R(H0Evilg-!?-c{c64n;>YJty{PS2Hx z3Gcl&oJS0owUmuL!-%M2^MRc^vsSU?PP|^&{A9xWn1#zNtQ^E?#o7PzG0J9f8#gn< z()kr)@w%#L$NA2}`59$(70#Dt&IbMpgZJ{tsMg_`G2pU^;IFSn=ef}`qs?8_3I3Mg>u8leI8~z#mh$~uR8?q^>Eelh~c{$p8L|-O*bmL z@xC8D+KTTAETin^^%}kg8(uZTI}tyJI)cADLw8|(-Ru=14t)P*#qrkewZpPnw_sh& z+)mZ$nM?0n!p>m^H{&k6N~|6o_;o!pmag8b-a9;NrT0!VGNy+;(T)As@hj(%-witP z-6iW8vzG=Yd`6hVewhy(Mz6iYeT96?W_}a4fZy>7M`UsKYOD*FFLl*z!+Qk3%Xr_P z@%zN3a>~e2E}SdaxoY9;zVx1JxXg??J=VK$l-jU#b`@QG=lg3TfZyLLi89<a%c;D<^1N zN40B!zZPVnv1PhOH{bKB&an4EwJH zKI=hqB)eX>`smFGVMxB8pu?%w+V~ZO+5mJt$ zm(`w+P8OriOA{6k&8@dCd{om{SR%dA%6>1nJe_LB5=1Seu*ZEGDyn zuCW-Mc9*@?O_$i64!hIfw%Bdybg|V+>n&~VZWpaLyUflmvngFwX>M_vyXiWI+1A+W zFw@nBUVEpTw%Xe)M%rk1^g7W98cAld=|uFAucP$_tD~K+G}w%G;{s@%YHzpEm7OLR z!f9-`xM-`loL0M&o?>aST8suO%^?Dh9YAT9z0+wl(@5QJgVRiR+Dv9A?QS>IHC2st zwZ&+*xy<8fm)T64J6g;pli5UD*;d+Qb{U-(2Qm+1n9Od2#p+5gF<31vP7A^?&>eOV z2sj3t3#2$Lt#qrQ!(#2FyDjc^+SS?Owwh_D9dIqSHc$#!-R2G$X)}QXoHn!5l}=Z= z=~lDB-RU&DXr~#hu($!k=+e=y4g>hrXmCIo4e98#x-AY6Y3uATJAu<>c4Gl9+Tpar zY#?nxy47m$rrW`}w57vgFuG}rjdmkngJv)UY_tKEy_IgUv|+(4klWnjhQXEv=5(4f zGs#7F7;L??u@k10)f>6b22OSwz!s;)g*<9DbkLm+1OpP=pv7gm9@yP>FsKWeWT0UJ zI#?_eJjQl|6BIK$)Ai=IPOHJ`iK{{$x)8-fZWFi&O+j9IR`v+jyPXD;xx?UGfb76i z>WR!Y@VEmt8|`4e&0=b@t_TCIb3&a^>1x{MTA0J9V7a9W_(2rOp!G{7{PogEgp8^pErVypOY1Xn@a*qwZ#6(Q&@ zwjJWY)M<3!)?waVZl}e_1|$!Fg$y_X$ZRy>X1r~?dWHF@MooyMYmI0jexSWy#(~H9buK^_H(n;`A$?E{e zQH?kbA~(;sgH6aau$HWl*s!}trb*<=5s6Zzsz;OK!ixv&1{auN6a+SS#-yWLosjg< z5`l%-1~wxfgRfvlVK{AXfn;YxPBGv_&Bxi*TY(fZxLkIl#em`i))}NMo7=$VQ;QWm zlZ+&eu$FG%GWEd}tdR+4Lv|i6$%peSYW4~@9T#p$d%oLhfjDKckt`>>Qvm{AM94xN z%H@_;^l8Qpadd(~u6Dd&L3B$eTD&gQ$VCZQm;r{n%qT_M9TqmDURG6h!2=|_XgE(} zCA!<~9apdsErU*{4U{os0VX@-P^`;*v(e25*l-*}@S7}n;T5tW4r#2*?6u+A>~6GV z*ko?umN6SCT$igI?FTL95i8f=WvCOuyW9{=7MLQ>&T}OPp!HT+MmN-zH_j;5m(f)X zbiKZ=sj9TBlujycfO?XSo>A3USvS3r1`d63ZR0GuuADBeokd?$Ra>f~%VyT=%NiQ! zIz3%gQ(s+G25nWfCDqeQt76q`J7O zMn{(x*A!Qj;Zb!UMUUAy?PgS#;U>TQvKr7}Z{~EB@3DaI zvA{3zt?#kGzwfbt9RKq@8u;&gG{DZD@9}`|@qq8~fba3Z=xpwLL@@FZ0q-5(V*=k} z0^ehT|G38lur}H68W6;VP+~E0se|I)$S25T__X3T`>%8q3S*Q?5_rg!SIe(bA$|;W zRs3o-=C@ugzea=k4___6zdzy^lwK{rRtx-vf*%R=4Fw_Q7s9_F*dPd)78RuAaFQaD z$Z#Ty979Y7PfjK4i5cW-bz-j<217O(zs}Hc20@ft(>&4_NO5))#>F9Uz z+W_l1z}g2`Zv)mxfOQhE&I?YHQo)ZT`d@HGj>Hl~dts>oD;coz0jmVC8USk^VA%m{ zAzX_QKKtRw`hP2dqlK zngv*`fYl9HO8{#PU_As_PXg91!1^;_eFRuv0oED82~q)A5rCBhSlNIzc_fx-nHQEn zx+w&#$$(V@Sn~j@1F&uctQCN@9R76T8QdEQrZg&ix z?(grB3PBQ6bg~E?z$6y;tXR?9?-)FZNA&k^+lz)sNm458C;IUr#bQzO$^L$^fD{Xl z;&!nRhCBL)7|l_ZA0I@&KtM`GYuB#Dx;3xGs-4Ca$DmZIp^H2fhg3S~5K>@Y(NIy5 z5LtPYE;`B&IPOwWULRstHVBk%CHaE3ig4BBIMWyMIr$q7Y@@kSAFN+f+?rK2AhdhkFn=c!$b(7{^Bd88llN^#Gwr=Thk zk`m4-xC~zDZw77UBqbMl+|n-;l5+71IIy!h$Iz#74SNQ;##7^cqY$#b5fTc~QQ$bq zm6}05vOwc;j$A>?RYT1~&EWgBchYy#w*oB!l1m7=)Z-ov=LBINI7A^P705rAa*zT6 zfrC6Al0e@=VC(7c8yM)rL;HV?i_rY^Ks-vNClw^6;9bP|NP&GM7g2I4?Qs&G4$xVM z1L&-$+5@UD>X8TtrI6#52ni*)M+0GWk}splz}9Gny`@l*3iVLvQ0Us^waF_gS5%@1 zzeReBbU><*5DG7UX{2IO$~p`@q?C|K83jjS`2ZZ{g}Aq%6uGRlKq&&mf?=N(DJ0PA zaoRv19?p7g03ULaln=YDAMwlBKI}NDkd%r!$MtjT27(J>LrGFfasNo)DJ95vAVVQz z>Q`{;pU3r{MWC|~d2hH5?xQH|KGm@MNR`;*KDLb9hYS;kdX6;?r5~x(9F060d2+&A zy2IAP*1grQzrJhvp5;50JC#Z)p_E-1W)GD~NSUl}w^+Pz_wF~lR8mqU7r-&TZ6`X~ z#>=G8KcRK^?(;)KEfZ8?0G`m?+>4`S6RBL@qoOF4*fVqpq=-NZ>FekKK}vZ+ z&tcx03%H?gH&`GRD?^#);Rf*pF|_XPgV6I6IkqwA06+SPk3yk&=p z9;HlHTN_tScd_LrR4#TJT6A==v%{vNOM0DF9bI9zFTiIfoSkMUqo z1Bvy3e^K|tXCSLm)M{oR>UUz<*u|BLPpL==wRRwC5j6EvB$=sTWMWaOpMVM%5sX1B zPZg6wav+}~g=-s_X^hURC35|k{zzg1raF9|ZpVK)Lx0zqgougp5+)4XAh==kJB^Pu zor!*N)%dLs8K*VH->`NdbUHI2+{p|GHm?;>Bt>cAYJ}L`)1OCnhFhFi;oXeNqb(@{ zy}GfQg6TrBmYUv>sb&08MXHt0Fu2++wl=rjmZ@RKI#z#erT$C~YXC%NlA6%X6~|(z5gO^V157^Ybzjm^jv= z$V*z(z`oGI43M$j9wtSEV1QH;&@QJ2;I82jMO?(j*9MaV^FH3${u}Y49!IUa+5iVoloJKSab0{xEIFXa8qL)47^`>vNy} z#IUa|kP0b1y)>v|ZJPWJV(Y#oLsLz!7QFD`^3*SPF3x@;btrt>k4g868Arj9=d`=} z_fKtJbKS=u?zBI-qOjs4jbgKN@Y;p(C4Pqb~EFERMS^?BJg3 zc0c&TwtC(DvtOURjohA8Nh%z6uy%0jAEfL*$>)hM&m&@*dq4lV(&%YSZ98*!sF_B@5G|}>u3@UyR;(>sT*AErVsu(Zy*T7pU_q@J+$%V)49_&8 zS;UhFQJD@b%uKNq7N$rfAxU91GnJ|2>kPGc0te9D-F+zlv-1j)-HaCLn;=v&a$clB zI%+`*P?W5hL%e@tL*=r+*NJD5` zuD|7P2`>u^rKf8DPVV@dt;Ae=bYgm$Bc<~|ouw|gXUChropd7XPc_@ObY|(}MXOgF zuKergrDs+cf@jTrV|(iKyYJUu`_d4TB>DPKbyDw+ou{Ves={hEWWMtL!SLAIl4LnW z`ETA=8F_2xttEdroYMH%#(ZnweS3SXPli3bq-R6E=>_txV;>aVIL}|xxKcFxuQ%>U zz9!(loPlK-$<6tiZ`#7&9&o*PBvzdX5( z+WO?dYGOhOv_!YhsK`eNzn>u;+6;sQ=%yS%-ofHb!7 z-;yknnM8KcM|=01%yfgL4ZnZ`6N}!w%*5F&pDD=9%wm9Y*ladjW!(Q3{kVR?W%_?M z8x7w2RQyiK9ryP42Axf8KI+gEHkv^Di$zWoic zx3YSJB=?h9s&jq6-}~w8k8k<@(7o#D+yD6YMIkR8dhPxwa?iQShqQNPua2qAI{k9| zdg9R+8eY5EmNMt269u;Z@18xTReZ%~qkiy6ADeCxP`dH#%+*rTvr+|K>1yAhmPPxU z$9;LB?d59^?0t6glRLF*7(MFt7eX$2pn@qIz0>3{S*R+~reljfq~&B~7o-(r=Vhgt3UV`B4Ov;aIjzPKDX`LJ`lMcTaNyyP{QTI*J2t-7N!@+< z6nM!DWp}u67KCttIEE;O07mgU51rDO{4}Njr$B>O3Y^Zs)rwaNEc;miJ_}v}!p$g= z;#!itAfy;#)N&RKP$VG^j(%^(%k_KX>ef%|dH49~b8kHV&d@hMMl>C7*lVc}y|ZiI zv5(H)Gw1Gk{sqZHqB8B#dwU0;Yu)_bvtLou<}ADaj+yy8Ya_SDzNUHO@PKAh?#V5&d+&(<<4wzsBt`zcHEQXk z^b0ctH80t2UYqsRV>>eHn|>qS9<+RKl<`TI^5a9-C#uJ;DtjpF=1Hq2&8X^*TY7%G zX3wpkNQ0)mk~%wc&baxjHa;-8U{$jHn_XMJe7-E?^_JS39&ZeqyH?{?7VZ+Yj!&Zk!A$<9vb>zadyJ4OvR~;Nu~$$?alR*pDbNbw^p(2g@rSJ*wePe@ZNoEcI{oh z&tCE2Q2NT_+ke=`bR3&swegEpU3;IG?m9pI`z@|~@uN-q!`^=O`<1UPiTt*IK3Vrf z#7(Xp2j|32E}R*9Wbo^@T~!Zdyq~aa;@r26<&@qT_1vAxu7OD>b{$SzC!}t#{PDyG z)PBKw*aRf737lXz0l6W#JqPFh$kCUu^Kd$q%kD~C`nzv+CNeBo0MVEk#)OP$lzD;` z!Z4N1oAJYWQ*XCJ@`TW}v|5Y?x0x>Pbhq1`7I!a7?o2+D!(?Y>o?_X!#e&oH%yT^?KL73W$CgUer~HI#PA+{tdS7<@l0SU=kG4qN+24LP z81?06wHwyG6xZs#BV2-pAkN9*s}-c5eLykXnow?0WOPfz;O;=1X2 z<=+LmGxJxhU~Egio5lR$r-g@B@Ay1s^}^E!wBP;TV$M7ss z)sUZqP?$io$9w^u7q!}Ql8F~Epw$dX8`RxXvMa0*!|0%*yYhxL;(sN3fp3a(Rek=D zR}FeBWoozfk+ro&0{Vx2FQbo0DZsN~aaaLn`e~Cr|DkuuFyEp)EG%^SO9Xh?IDTh> z0Elsg3Bc!kwaxep0H22A=PoB4*ykWmoREcmRv3ZUW;>4;k-dw&{&w*s@M5nCl1)g( z*UXMOEwRIpzJS$NC>R5D4w3XoI*Hmp?-y@WGcxfRrhFX;cAJ1;1Q-DNn}DG9SA1f| zd+2@b^(Xyd`dIT4Q(E1a^;HL{H?BnwJjPm^gQ}1bM_fZVVP%gSj-9DR_VQg!4LMlr z2!3cG0kg^-jM+EpSXbe6USvcRlvrMeSvcMORs%FU+HeNK+L2~7I&Uj5XjzsyKA9Hy z9M}9NYk^JqBs2A_f{fJRmG_^=F}X;t5A36d>xC@x(}N)4*|lW#3x|~2thuKTx@imM zoR-iYWfw+&>_VADp)_}dLp!DqYp$Jyz=m5OUg`5MYq!lCQHV9}HdnVk^X=rmPs#<_NPEGc z2e3=40D(OQAg~_%0|nd}J?ki7{&N_BKlQhmVl)7JRY=6oDCD<@07b`!8X8bh{|XJ& z^&$#j6#wyvg#iAZ9bg7Kz;t%db*^mwQ#h{(@>*Z$RRXX1t~(32oFof%w<`ouU97Iw zvZ)EP7~hF&@cODRCrTRrpp>5>&lqbu!SUsWW*SPoHLo!E}`7b^US={4B z7F;s8^IFZPJk&=8Q$q6QQF^xT*X6MxcCP1c*kWVEBv%6o7Tu7gIkz&l!-ZGe+yQC{%E^#%Qm!mL?U zU6HAFq9-4x_|C%NtfY!;t9qzUob89ND^Jk|!$lx3!o`f@PAbD&9pa(8>Lz3HSnnTINETrX6se|-OVb8C3a>+ngL;YNep zwz(D&$AS2?x8}w`X<7R4Ti3#h&x4hIqs>J8G1?27>7+5D6b3ITjXi{WlX4K_S*@J# z((!^@V>BH88!e<2en#=kzTKA1BPaEf?{S#5^%m9r4BLI9Mnza3=)zVz{dmH zf7}iX65RR!!Ysz_IQp9fgm{Hxy$>Hn`bUHS`!+fN1G~h~5~56IOpG6LF><{;*K@tT z<_2QvrK7)RE&6WcZXzW_e-wxvT1YI;d2NRkMn3C{I3$$^J-2nlJ2O}JTx<`PGo#tp z6RD`R+#3EgB>qc-_7tR3qtWonrNw{&??x$=VxqfmLPqRq11tMJ&dlRI!lojNdv#CS zbXTqhjcKtX5qWPkw-@)`6phYOAEo*{(AUJoNH4--OEMzj)0Q5}fejQ|Qh4i%uCQ|E z(fmL8BeO{g+6qBV#@^x_0iiBA=f>lgn$9d5>=^p2(S28CE>!mVM1?%9dvKAvA{S1~ zG3VBVE^JADA>N7>8l7)bdhA+o!&n`1AJTIF+VzR*R|5j4toJ*qp^wN5$5k!KFApj1 zkqF4Cc24#W4J)mQXx+!kRsvFhYZ0_z=6=v7iut>dGjSqe0g;D zD(bCDOdRfCTwV|)pO+tbR7~uib@$d8b7jAfq{SA^_LQwEQk=iR%j@R+zwU2mX&BaN z=Z4P?`6|C9z9+k#>tlLBHn{7&NG#?sHR8?1iFY$0+N$!3wf9Ghsod|mc(E`hM(UGM zws_f!p)~GY!E$3zjVW<-CK4n3c6xU%R>*Yi1x4CF@=e93&!=agxak0miq8Oz#Vmbz zI5Nchtme~v2MfzaoTC&OvlT5FGp7To(q6gTRpj1GK9%Qq#KFR7f0OQmyeL-)&M0UV zTiARzBqZ>G%~2RvjMXC)fu#~4u#|&AAON5JPlM~n(}CX>M@aa44CC{=5OXl2I5)4n z(4XR435wzdHrEOO(%%}gpy;M;HA}xVec`tkc)nr8-H?ogTQkB#fX}8ToG1stj--Ht z|8Pr*-S>a_BFo`q{;Ou&ZR~E9_!FC12p}d~!}OB)TLob*@@xYr4;$q=iZ#17id!`1 zdbFXPTjTCsJiB?H+4{-Yv-R7Y6`0C7FBlN+s(76;jzsXvavMNa0+KQfgYRehm=3h0 zunhapp%PyX-RQh_mX=mx9T$eV0%ECOt*^P=L8Yy>B{B6+*5&(L>d|NqZueZFuG|sm zCaML~cCanX876JTbc?PzYCIZs5_6bpKedIgr6f4_!o*4w!gqP^UgmP68&WzklBM-; z_`5RnR$RB!ETe>UuC87&;z`l4uMKReFF_A_^ERkDrL!WnMKatkrcF)>Cr@SNJi@-$ znh^;ia0h}q>mB6$i@3yxWiG zHp!$rAhcN|q2E>vLG+t%)Ucv>)+dftQ7UK@<8b*-BK4>V8U0z~aEkj?5y?Cd7ytNW2MRr!b=3eX)ORVfJ`gR-W_}`Vij?=V<@)Gz-~4 zxl6<6aUQu!J*pnQ{8wI$Rh$qEnbz6X9Wl5THpfBMEm$x)5~pC}QXoDHrYIR_=}Y#` zeuT1n&Ddj4Y{8x&9)@|6d|lzJ;(qPb9e(+}vtC0wQHC|EL;Vx0gfA1`&QI=)RTH@a z?)4nW{_S0q{_WTIuus1{E~C;{mLsiYu1^d?O~dVnBENlZc&g%7EMi1a3cQUnV{ zM2bpB>7Ymv5Tz?v5m6C%Cy3>{a((xE@BY_%YdymX=VWHjo;|^1}Q>13K`@}q6pSSii0TACYY6ZO359`B7?>?sn?&#?Dk_4pW?&7xkDC+T zR9hWv1~w<*yvRfx3Ge0P4>tEF;=d9qD}xR3&ThD0*cekuDIg4Hy(d#Vncz)w!jl2U zntl{*GYXD^2hO$4P{3yIUum?>&>$$3DvK$=RSOUmAkYFfgNMR5%8W?_Co??78f>hg z1vba~QNX&MxFdM=uLtbcgRU*m+LS=;00aS>QAplSl&`{&K~S(c&XG(V4Rrvp{#@Jzf3iw^sDQ;v&u znOZu%mWVs0*;UNWmU4zWnm*Q)a*4Y&Sk+lldqhud{szeJh{vJ2VF#;FKZh~mD-&CP z@ggf8cKJld!iz?aj2LbXq*cFbzJWgGdo3fah?0=`*mXSL=sYPP44r(jn*%O&GAA$5 zVA|fz{XT=hlT$qx66@OU&IT>FZ6`Jk)@l`D%!tx zQ65qn94($7x>4f`4)SlVp6MQnwg3+W&6wS*`7|IZI+t1zl&)JwxX@Lgl2sDaRj_ku z=HX~}2vdES%M$xcLdmh+J34fp6rM}-8b9T*6f|6ksulR~siw14dF-yhs4tC0eS!X($&Y*%WX0F)9a`Bp1{pyR#+ zFgz-gDTMSXaXHSa7}T}478h;Q>EnB+?k{r@0$#Ja$=a38AHU9g+btRH8UcG!F1>G= z+2onwn42<7&Q)_=LgG?*IWM^C7WFJK@-cZz~@jKFfPgwp!CEkpR zzZZ37t2_Ur{JUK#$it5oZ;zW^DxVZFKVRG-WnF$NtQ=`liVTEM?5(C^PB5Npw|rRS zArZ$kDmmUcdSL8i;E9iK-ZC7=)z-)o%?h(vQo_A&3yE9bSSca^>tRm#h)cRy)A<{Q zp~gFeyyG-pua4UJ&qO1$)btv?MH&N7%r>x)u_(?Hs5hnTaEed8oW_{lwiCNPgn0;U!p`2h$ne4ZPu_~i-?6~+GH0_9OmvE&!`^7zb7y9|`Y^Ndg(74`V zPet91K6c))S#`n`TJjv?GT1yHrFmdsF`7sRg_epAOh@639@ZQo(jSseqB-QFr(smc zO=lEUk)q9Aw#z9-GU?IQiyR%Uhpc9EJ7Nx*Fo@H+)KwZHold9amRUJ)h3x9oo4yRz z;40+t7BzLC?-;OyZWF#3EhSooJ|IksN>AdYks@C@*jK6@CE!>|A{AS#{~%8}zH} z)Lu!%UhSM6o~L7x%rU{c+;}?v3Xg-jiSC@yJ@r)MO8lu97~D#8x4rS`;6+V{d7JVX z7JVJ&UHzhru&291O5a1afg01dXwqK*-uCq2EQO+MyWZ%9wC>dB0*O6{b3oW7fD}tI zGZ5@op?vJwTY4;nf)`Y!>8s;U|792f#q24W$Yv&_rW)bEQYQ?6ls|t_J}s|E9pim; z1|wV_UVuQ(2F0r$>X( z59IK%0odGAM-N6Y(8bm<&WojRqMt>^egfT5U}vP&ew@dvE=kTk{76_mYOfc!oL_8e zD(BI&ai?fc3t!WNi*Z-?NE}w~O|gE`5D}V>V8xT!LQgqofY% zR6ZYHeDuQYZ0>;_XLbVGu@le^;4z4FR*@Ac1$1QsWzo2tG8+fF5P?p;?b3-=z0xmq za_U(5!k&9Y@^MSI(;0y(d#^gj@?)DqtxU9;=)CIq=6T=*5uIIFb<(jcPDU@pKG{g) zX{{@l_l9zzM)(YZyxv1W=6m+sW8jAKhq)+9QZ5?y_Nx8G31Pk#QibBoJ^c(+h3=-O zm9R<@;TQp~ru#V^XAYTYvjLb;4161# zCXElv#ccy-T8!MsCxYoatw53_Zp(KnPTd;;6_1!4SNwZBF|SIdwAGPRN77|Q#aKZ} z7X)+XBu8tYe_+}<0o%TlsX>RD1;OuaJGKKN*+sV9SwYW!Y0L?-Eok4SaBrY3HoZLr z8=!b^I8BH3nV`_vGkbMYUtqVi`@2snVj&WHT!QOQbOt|pwzR8f|ChTg^K3!VQ3CyM z^+HUz^|`VIfUE1!K`i>xJ>QQhWbzkWHb{%9&rY3@B~VZFK%G_lDqK@7m@8^;h<+WX znpj9MmI;l$LK_f%yQ!YxLSPKnbMo|3$oU(&ac9-SMh-Xm(jI||aW^@r*XwhSvtS-kjG}xW+z#!XMhis*(Tg z7qpfeiA(`OG2g5{w7fjn2n7FXH6Cz0YDV$=@x|@>#3pH!G>+s8FV0@dKE}?=)^lNM zEZCRN$LDdHS91RF9>kPtuf})OOaaVdwFiU?cxhmHq;ct_smTgcQ%3)tmRa8%~~hbkEbbV(CLF9x;Q_B`;%i zgbU-Pwcbg~x)zBuNXCceb0vsOn70&4nDf17Gj1t8&yZj`l^Ai%4%f54&XO^mJ2eKc zC$=ABlwQ`{)e zt>JAglYOassca$UNb!Oe!5n}^a84N%BR|c^Lw^U`)YbCTJWYU}-ce2XbK6Ks&o^pb zv*X*%@N9R?LwFO&+|uOwLYQh|-1{z*%N=wDZ8Rq85B6*4z;khsT!K{u(u-0;>bF|9k4N+MZg(1Uw=m+&0}!*hQFH#V-i7P#kQwR zpDkkMvnD(``VumI=y}?1@`I^>hi~Yn2OU?Ml#bP3l$|UYB z;CbYyxem7H47_Y|hP*bsbK1yf*otK)XwHAyA-VZkLpPXF8UA!`3;x-4{n-Jv1zQ430p4ldO9eUvp{Z_>GVlVM zc{@_54klHOuZzY8>5o-oy$+;7VBTb$sQ-g;ZVU&8`bXpW*%dd10G_X!t{KP$=RwAU zv1$Oea18K5d>z7fTCfxDdll8yhW@Rcx*^!m+k@gp^wHEK1c>Vf-5xI8a)9Z!1Zc~dLRS)O8 zRWA7EzPr1SMlf9LTrnvzO*^jadiBP~FQ4br176CmuB{C1o$9q$DhBZmRUQGhW?qzX z$~~-UqGfp8v#~_frEEwv;pkJ?M^ z*WW!87F4=k{XkFGDLQW|=>w5h-*MM@2RHBh^fP&nk`9r1^-J2~ORt`<&bgZQA%Ufm_0sx| zpM)QLlJ0IDKFB{O_)>azwenK7Uc|LZy`a?$J-@6iyhC+`atS)>SveQEJk09b9bIhr zS{vziTrUIoVima<4aF=A!c*?eS1NKvvWnZ_Tm3?N z9_;MC*+jgq6WbU%ft$#K>dT$cP*-EpI!NM=6cKD~65nwh+xqmJR6RCr(4paAa^_5g z!<}4kO~8RmQ^GY}M|!ThNkKz%}WTiXR)WOJgLqb{)a)nnnTQmEERMnCUH1S7}an;Ix^1hPYTe zXwHnNwFC3ZfOxkRL5kQdHi*|p5Vs+!&-RpwCiiYR15nJb6T{N4FyQUV3p@B#Zbv6FgaLcr?B@ClEzU*6O2cG)C#J%OQ ze)OIYdNq%ydqNZ?qvW73EW@tQbk^1irxcEsclEYmU3K9%+RraOc<>kZa~)`$QSKSB zOOdvsm!h^MK$|!%4EC_U8C+IvG3ikaV75zO;^qNy9E&x$n5$}<=poXd7%zd$O~O02 zq;u58_X~0|x;K%}_EfgsZ)g&XM_@zFaYg9xVyr@M@Vl2d=gh?Mi{9LRTIRi?4=!g$ ziP+P=%W9U6UGV)ydbR-0404ZtTf!+jQgUW}#kre};XHe)ZNg~^UIR7)+p=azFiSQ4~O0ujM&D` zJ=0&4^>q)IxlqeG>iAfBly_tbZ6qNPrxX^pz`^RnmK;0dcy70-yksy(llvVulN z)cj|=7Jw%jx(%K-UcG(%o&W4>yR0vF4i_UE$8)-uTTyr4-x@x4fLv-f%KGkD)X|X` z722il%9Q|ktbNVHw;!H!E=v3QG$kbL+u1g4TliuzX>C-$s)Pl^*Xp>_X$2FRxPVbW*$Q$5( z9FzO3I{LVDrB|=*{8I0|#W$9ZC+78z_>CW`lp+o@&usO*Mzns~Syi)@{Tk8!X(`Vh zm?BbQ$M)NpH1nsOt+S5q2AzIy56Vw8TaLznyIOo!96db-z)!Ru#(<|=rj`50y~umr zOK(*rc)vv<<}$_!uKlRyv7V9h`5y(+haYkWVaY-1K=kxLSD6D{bBi8r;GDev2l=`NPP|IDt<5;ALiGbpPii0$nQ(1W7q zk_zE(6dKNbUh}oNuRHe%Kao^Do4nE)fJO{-ZXc`*UWCeBa$|-u?ZYRuU8Es$qNP`mQuy z?YgWG@InJ)YSE&1p4XeP@3qfLTR&m^T=lz}t8pg&)H*V-jqg%MSW;C6}g+u15_akfz)_td#@LDO`#vZHLe-MWOGtrZpr z)=4{r-wMW`$CBVW?*c6CmtV};oG`pnY`=K3Xf}P?|LDnApfneYm<;SYvJQ?-s!KR; zLT8N$LX%@l@Y!V1TfZZ+vFd%Z@-S%hcO%o%(BSL(W{9WYoN*M~4r9Z zj`$;PUP>abYidP6Zq7;~7V?G=Ln0RM>ZTJw!kYycnL7ozJ7JtfRFs(&{S^E>iJkyA zke{cA7g@niNdyPnBEc&F=hR}b2xy&z;;tm3My&u@8XAMJ1QH%3FNc(Mf(6bylZVK7;MLYD0BMZx*W0*d;T$u~Y4c(N18jYx4Lc!8*VagGFU zijs&3wWIH^jd6Jrzjx$CmRldAoD;zl?1v+Q0UrSTZyFgI{=S)~=cX;4oxV3CdXqfX z2j%Pp#(UsBsrTf79--jxV*x}}1ZF`23)~$5ZbAWXWdKL0ECekJ#h63o6(9hVC|L*s zI8g-u3+0couJbS?IJ>#{Z^i=(OvleWexm$W9vjN_A94Aa_V0WQ4HYy9PTtf3>uM-_ zd%HO+0JrzxST(qYthy!~CW}C#5wd6vI7${JkAPy-k?II_P3Zav|K9c&zPesy3eL+3 z|BbIB4(@_LprNuD7YIZa0R_|#?TmoP!Z3Ik3gZk#!ksYR`2N)PC%(>33NC<4f}<$o zh(r%JCmdC&z(i*kMevWR4SGc|RoQ-0zOS0SUJ9r-a8-f7Usmf?-*xeS>g2~j{-=5X z`2OkSAA$P^u0L@7BLw~t@gME_1J^%7;2#nH(XRg*T+BZVb0GOhNyHa0yEn7^q5na= zL;hQ|LmmmlI)DVH%|jh!<>nCMlG0w^E^y~Q6G;D3mw{9)5&@wdhToooUP*jxcaI&|qhGXm2LrPbo`Zv$ufs zFHdUb8J6JT?de4ZA;88YAZg8sLLh;lFd*)_Ny3+|hP5JG129bCfPo5A*F*!(4An0E z*(_C7grcBW1VSC5iGraa7z73*uZC4aXrUo$S_qgL0;OyV*7b5B06A{!d2QcdLBoLz zH3D#p5%M(=&&(|VxLE<&m_18VybIXE8VY3TL4jnpZy7?aPaGX-wu-kTzhp6BiFpB1 z7u12IZDslprW%6Zr!!9%8A)@DeJfLcv1-W7P8|>FI0s8w1PH;_>#xLr zOp{+R`Ax|mxPAl9uT_8G`W2Jkl>C9~H{kqQ^#`tBG5Jl&AGm%4&aYL!gKJYS6!1qk z{EdyD#Q<^hAC~_ne$u}+hIcez0SXi_)})szZLPjqm05p ze zk%=p%dn~A^gS`yTALUv#Id|>&C;q9}S99rvJ)%3#IfTi;rA8Uv-bcAFWhK7K`IP_K zFGhxj#j?bq!gr>g~>nwn4-IBzlXN&D! z()40@rEU54E`)Wl4K)awl@30)^_OWI&B08Q@1BW&OZilxQk%6m&Hv+vqin;glU*-` zyAKPWmfSb86sgx-{(yrN9hs{_l%9K>h#*|N# zAyudK0arp)TPhbqoB7lvM|8X!%h8;)INW;vW(O&G8gWQcd^`W%Ff|g8nVH#|(V`_X z)h0!FOv{pVrnL}yfXei1`lez|0|--RJd~QrzYt1gR(b=9J~(}^PE|%yqp0I-Sjz^* z;f4_@gCu{^La}`kGg9W$r_AudlES)OW|un{*0VR`GYgE<*o~-ZnxYuce8)_Fmjg>? zHyxcuQoZG677j7dR{x6)@$pRZ^7J~URZAH~TM9RlHxorcN%Xxv{33cW@wB;*LHq$& zjVP1&$R0uQ_7EP_TyP@@qguokUwuC{%jmqQmTo}cKRh2G@ODAH(vBX)Y9!^e#nOS1>)$xA2@c>MwA?g@n zaH-;Ov$ijgyjgI;P>fE$`$HF!;DdYo(9GgyXYZ4j%N4ceLCkBF3vcIYgAfG;N36Y9 zrkcx%J;f2)S58j!FUDUOCC^{ZT3B`=2$4FO57w>0-z)V*`A`>t73`hw6UYhPQ!+~*V^P!O%%V87W#_fwkU z=3y52e=W>{me-I+V$~s Date: Mon, 18 Oct 2021 11:37:29 -0700 Subject: [PATCH 58/66] make sure only one error event raised when exception thrown, and process exits with code 0 --- lib/p2jcmd.js | 2 +- lib/pdf.js | 2 +- package.json | 3 ++- pdfparser.js | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/p2jcmd.js b/lib/p2jcmd.js index 1b5b6e8b..e9824010 100644 --- a/lib/p2jcmd.js +++ b/lib/p2jcmd.js @@ -307,7 +307,7 @@ class PDFCLI { console.log(`${this.inputCount} input files\t${this.successCount} success\t${this.failedCount} fail\t${this.warningCount} warning`); process.nextTick( () => { console.timeEnd(_PRO_TIMER); - process.exit((this.inputCount === this.successCount) ? 0 : 1); + // process.exit((this.inputCount === this.successCount) ? 0 : 1); }); } diff --git a/lib/pdf.js b/lib/pdf.js index 5aeef26d..defd4fc1 100644 --- a/lib/pdf.js +++ b/lib/pdf.js @@ -208,7 +208,7 @@ class PDFJSClass extends EventEmitter { raiseErrorEvent(errMsg) { console.error(errMsg); process.nextTick( () => this.emit("pdfjs_parseDataError", errMsg)); - this.emit("error", errMsg); + // this.emit("error", errMsg); return errMsg; } diff --git a/package.json b/package.json index c2490b1a..4794313a 100644 --- a/package.json +++ b/package.json @@ -35,7 +35,8 @@ "parse-c": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c", "parse-m": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c -m", "parse-r": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form -o ./test/target/fd/form -t -c -m -r", - "parse-242": "node --trace-deprecation pdf2json.js -f ./test/pdf/misc/i242_testingWithTable.pdf -o ./test/target/misc" + "parse-242": "node --trace-deprecation pdf2json.js -f ./test/pdf/misc/i242_testingWithTable.pdf -o ./test/target/misc", + "parse-e": "node --trace-deprecation pdf2json.js -f ./test/pdf/misc/i43_encrypted.pdf -o ./test/target/misc" }, "engines": { "node": ">=14.18.0", diff --git a/pdfparser.js b/pdfparser.js index 30fc00cd..26dd513f 100644 --- a/pdfparser.js +++ b/pdfparser.js @@ -74,7 +74,7 @@ class PDFParser extends EventEmitter { // inherit from event emitter #onPDFJSParserDataError(err) { this.#data = null; this.emit("pdfParser_dataError", {"parserError": err}); - this.emit("error", err); + // this.emit("error", err); } #startParsingPDF(buffer) { From 1ffba32236b6052f782645be6e0e33667a5996f3 Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Mon, 18 Oct 2021 13:23:00 -0700 Subject: [PATCH 59/66] catch and handle canvas exceptions --- base/core/crypto.js | 6 +++--- base/display/api.js | 9 +++++++-- lib/pdf.js | 11 +++++------ package.json | 3 ++- 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/base/core/crypto.js b/base/core/crypto.js index 11f4902f..c46e9281 100755 --- a/base/core/crypto.js +++ b/base/core/crypto.js @@ -553,17 +553,17 @@ var CipherTransformFactory = (function CipherTransformFactoryClosure() { function CipherTransformFactory(dict, fileId, password) { var filter = dict.get('Filter'); if (!isName(filter) || filter.name != 'Standard') - error('unknown encryption method'); + error('Error: unknown encryption method'); this.dict = dict; var algorithm = dict.get('V'); if (!isInt(algorithm) || (algorithm != 1 && algorithm != 2 && algorithm != 4)) - error('unsupported encryption algorithm'); + error('Error: unsupported encryption algorithm'); this.algorithm = algorithm; var keyLength = dict.get('Length') || 40; if (!isInt(keyLength) || keyLength < 40 || (keyLength % 8) !== 0) - error('invalid key length'); + error('Error: invalid key length'); // prepare keys var ownerPassword = stringToBytes(dict.get('O')).subarray(0, 32); var userPassword = stringToBytes(dict.get('U')).subarray(0, 32); diff --git a/base/display/api.js b/base/display/api.js index 344cb20f..c47a758e 100755 --- a/base/display/api.js +++ b/base/display/api.js @@ -412,8 +412,13 @@ var PDFPageProxy = (function PDFPageProxyClosure() { return; } stats.time('Rendering'); - internalRenderTask.initalizeGraphics(transparency); - internalRenderTask.operatorListChanged(); + try {//MQZ. catch canvas drawing exceptions + internalRenderTask.initalizeGraphics(transparency); + internalRenderTask.operatorListChanged(); + } + catch(err) { + complete(err); + } }, function pageDisplayReadPromiseError(reason) { complete(reason); diff --git a/lib/pdf.js b/lib/pdf.js index defd4fc1..59f1bfff 100644 --- a/lib/pdf.js +++ b/lib/pdf.js @@ -148,9 +148,8 @@ class PDFPageParser { this.renderingState = PDFPageParser.RenderingStates.FINISHED; if (error) { - errorCallBack('An error occurred while rendering the page ' + (this.id + 1) + - ':\n' + error.message + - ':\n' + error.stack ); + console.error(error); + errorCallBack(`Error: Page ${this.id + 1}: ${error.message}`); } else { if (this.ptiParser) { @@ -161,7 +160,7 @@ class PDFPageParser { _.extend(this, ctx.canvas); this.stats = this.pdfPage.stats; - nodeUtil.p2jinfo('page ' + (this.id + 1) + ' is rendered successfully.'); + nodeUtil.p2jinfo(`Success: Page ${this.id + 1}`); callback(); } } @@ -224,7 +223,7 @@ class PDFJSClass extends EventEmitter { const parameters = {password: password, data: arrayBuffer}; PDFJS.getDocument(parameters).then( pdfDocument => this.load(pdfDocument, 1), - error => this.raiseErrorEvent("An error occurred while parsing the PDF: " + error) + error => this.raiseErrorEvent(error) ); }; @@ -342,7 +341,7 @@ class PDFJSClass extends EventEmitter { continueOnNextPage.call(this); } }, - errMsg => this.raiseErrorEvent("parsePage error:" + errMsg) + errMsg => this.raiseErrorEvent(errMsg) ); } diff --git a/package.json b/package.json index 4794313a..0a351649 100644 --- a/package.json +++ b/package.json @@ -36,7 +36,8 @@ "parse-m": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c -m", "parse-r": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form -o ./test/target/fd/form -t -c -m -r", "parse-242": "node --trace-deprecation pdf2json.js -f ./test/pdf/misc/i242_testingWithTable.pdf -o ./test/target/misc", - "parse-e": "node --trace-deprecation pdf2json.js -f ./test/pdf/misc/i43_encrypted.pdf -o ./test/target/misc" + "parse-e": "node --trace-deprecation pdf2json.js -f ./test/pdf/misc/i43_encrypted.pdf -o ./test/target/misc", + "parse-e2": "node --trace-deprecation pdf2json.js -f ./test/pdf/misc/i243_problem_file_anon.pdf -o ./test/target/misc" }, "engines": { "node": ">=14.18.0", From 75871bb03385c019b7c1e775b6c54ecef3ca8a50 Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Sun, 24 Oct 2021 09:27:14 -0700 Subject: [PATCH 60/66] accommodate higher version of npm --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 0a351649..2186359e 100644 --- a/package.json +++ b/package.json @@ -41,7 +41,7 @@ }, "engines": { "node": ">=14.18.0", - "npm": "~6.14.15" + "npm": ">=6.14.15" }, "bin": { "pdf2json": "./bin/pdf2json" From 3369e1bc4a79980d91185adc1a4e4c3dc9f21644 Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Sun, 24 Oct 2021 09:52:55 -0700 Subject: [PATCH 61/66] optional chainning and nullish coalescing for metadata null case --- lib/pdf.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pdf.js b/lib/pdf.js index 59f1bfff..7b6db72f 100644 --- a/lib/pdf.js +++ b/lib/pdf.js @@ -261,7 +261,7 @@ class PDFJSClass extends EventEmitter { return this.pdfDocument.getMetadata().then( data => { this.documentInfo = data.info; - this.metadata = data.metadata.metadata; + this.metadata = data.metadata?.metadata ?? {}; this.parseMetaData(); }, error => this.raiseErrorEvent("pdfDocument.getMetadata error: " + error) From 0eaa7a23f71428b52fdb1b1262e8827ccf7c21fe Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Sun, 24 Oct 2021 14:35:20 -0700 Subject: [PATCH 62/66] prep for v1.3.1 release --- package.json | 2 +- pdfparser.js | 12 +++--------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/package.json b/package.json index 2186359e..c0085144 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "pdf2json", - "version": "1.3.0", + "version": "1.3.1", "description": "PDF file parser that converts PDF binaries to text based JSON, powered by porting a fork of PDF.JS to Node.js", "keywords": [ "pdf", diff --git a/pdfparser.js b/pdfparser.js index 26dd513f..05bed84c 100644 --- a/pdfparser.js +++ b/pdfparser.js @@ -9,13 +9,11 @@ const fs = require("fs"), class PDFParser extends EventEmitter { // inherit from event emitter - //private static - static #nextId = 0; + //private static static #maxBinBufferCount = 10; static #binBuffer = {}; - //private - #id = 0; + //private #password = ""; #context = null; // service context object, only used in Web Service project; null in command line @@ -33,8 +31,6 @@ class PDFParser extends EventEmitter { // inherit from event emitter super(); // private - this.#id = PDFParser.#nextId++; - // service context object, only used in Web Service project; null in command line this.#context = context; this.#fq = async.queue( (task, callback) => { @@ -50,12 +46,10 @@ class PDFParser extends EventEmitter { // inherit from event emitter this.#password = password; } - get id() { return this.#id; } - get name() { return `${PDFParser.name}_${this.#id}`; } + //public getter get data() { return this.#data; } get binBufferKey() { return this.#pdfFilePath + this.#pdfFileMTime; } - get colorDict() {return kColors}; get fontFaceDict() { return kFontFaces; } get fontStyleDict() { return kFontStyles; } From 06a405f50c39f9a9b3c22d59ad0948c65c9758a7 Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Fri, 29 Oct 2021 15:30:16 -0700 Subject: [PATCH 63/66] remove lodash, prep for v2 release --- base/display/canvas.js | 6 +- lib/p2jcmd.js | 19 ++-- lib/pdf.js | 32 +++--- lib/pdfcanvas.js | 13 ++- lib/pdffield.js | 29 +++--- lib/pdffont.js | 39 ++++--- package-lock.json | 231 +---------------------------------------- package.json | 4 +- pdfparser.js | 16 +-- readme.md | 36 ++++--- 10 files changed, 97 insertions(+), 328 deletions(-) diff --git a/base/display/canvas.js b/base/display/canvas.js index 09d31890..55149342 100755 --- a/base/display/canvas.js +++ b/base/display/canvas.js @@ -1099,11 +1099,9 @@ var CanvasGraphics = (function CanvasGraphicsClosure() { //MQZ Dec.04.2013 handles leading word spacing var tx = 0; if (wordSpacing !== 0) { - var firstGlyph = _.find(glyphs, function(g) { return _.isObject(g);}); + var firstGlyph = glyphs.filter(g => g && ('fontChar' in g || 'unicode' in g))[0]; if (firstGlyph && (firstGlyph.fontChar === ' ' || firstGlyph.unicode === ' ')) { - if (_.find(glyphs, function(g) { return _.isObject(g) && g.unicode !== ' ';})) { - tx = wordSpacing * fontSize * textHScale; - } + tx = wordSpacing * fontSize * textHScale; } } diff --git a/lib/p2jcmd.js b/lib/p2jcmd.js index e9824010..1ea02ca6 100644 --- a/lib/p2jcmd.js +++ b/lib/p2jcmd.js @@ -2,7 +2,6 @@ const nodeUtil = require("util"), fs = require("fs"), path = require("path"), - _ = require("lodash"), async = require("async"), {ParserStream, StringifyStream} = require("./parserstream"), pkInfo = require("../package.json"), @@ -32,15 +31,15 @@ const yargs = require('yargs') .describe('r', '(optional) when specified, will process and parse with buffer/object transform stream rather than file system (Experimental).\n'); const argv = yargs.argv; -const ONLY_SHOW_VERSION = _.has(argv, 'v'); -const ONLY_SHOW_HELP = _.has(argv, 'h'); -const VERBOSITY_LEVEL = (_.has(argv, 's') ? 0 : 5); -const HAS_INPUT_DIR_OR_FILE = _.has(argv, 'f'); - -const PROCESS_RAW_TEXT_CONTENT = _.has(argv, 'c'); -const PROCESS_FIELDS_CONTENT = _.has(argv, 't'); -const PROCESS_MERGE_BROKEN_TEXT_BLOCKS = _.has(argv, 'm'); -const PROCESS_WITH_STREAM = _.has(argv, 'r'); +const ONLY_SHOW_VERSION = ('v' in argv); +const ONLY_SHOW_HELP = ('h' in argv); +const VERBOSITY_LEVEL = (('s' in argv) ? 0 : 5); +const HAS_INPUT_DIR_OR_FILE = ('f' in argv); + +const PROCESS_RAW_TEXT_CONTENT = ('c' in argv); +const PROCESS_FIELDS_CONTENT = ('t' in argv); +const PROCESS_MERGE_BROKEN_TEXT_BLOCKS = ('m' in argv); +const PROCESS_WITH_STREAM = ('r' in argv); const INPUT_DIR_OR_FILE = argv.f; diff --git a/lib/pdf.js b/lib/pdf.js index 7b6db72f..16061794 100644 --- a/lib/pdf.js +++ b/lib/pdf.js @@ -2,7 +2,6 @@ const nodeUtil = require("util"), {EventEmitter} = require("events"), {Blob} = require("buffer"), fs = require("fs"), - _ = require("lodash"), DOMParser = require("@xmldom/xmldom").DOMParser, PDFCanvas = require("./pdfcanvas"), PDFUnit = require("./pdfunit"), @@ -79,8 +78,10 @@ class PDFPageParser { scale = 0; viewport = null; renderingState = -1; + Fields = null; Boxsets = null; + ctxCanvas = null; #_addField (field) { if (!PDFField.isFormElement(field)) { @@ -110,15 +111,15 @@ class PDFPageParser { this.Fields = []; //form elements: radio buttons and check boxes this.Boxsets = []; + this.ctxCanvas = {}; } - get width() { - return PDFUnit.toFormX(this.viewport.width); - } - - get height() { - return PDFUnit.toFormY(this.viewport.height); - } + get width() { return PDFUnit.toFormX(this.viewport.width); } + get height() { return PDFUnit.toFormY(this.viewport.height); } + get HLines() { return this.ctxCanvas.HLines; } + get VLines() { return this.ctxCanvas.VLines; } + get Fills() { return this.ctxCanvas.Fills; } + get Texts() { return this.ctxCanvas.Texts; } destroy() { this.pdfPage.destroy(); @@ -127,6 +128,7 @@ class PDFPageParser { this.ptiParser = null; this.Fields = null; this.Boxsets = null; + this.ctxCanvas = null; } getPagePoint(x, y) { @@ -154,10 +156,10 @@ class PDFPageParser { else { if (this.ptiParser) { const extraFields = this.ptiParser.getFields(parseInt(this.id) + 1); - _.each(extraFields, _.bind(this.#_addField, this)); + extraFields.forEach( field => this.#_addField(field) ); } - - _.extend(this, ctx.canvas); + + this.ctxCanvas = ctx.canvas; this.stats = this.pdfPage.stats; nodeUtil.p2jinfo(`Success: Page ${this.id + 1}`); @@ -174,7 +176,7 @@ class PDFPageParser { data => { this.pdfPage.getAnnotations().then( fields => { - _.each(fields, _.bind(this.#_addField, this)); + fields.forEach(field => this.#_addField(field)); pageViewDrawCallback.call(this, null); }, err => errorCallBack("pdfPage.getAnnotations error:" + err)); @@ -316,7 +318,7 @@ class PDFJSClass extends EventEmitter { Height: pageParser.height, HLines: pageParser.HLines, VLines: pageParser.VLines, - Fills:pageParser.Fills, + Fills: pageParser.Fills, //needs to keep current default output format, text content will output to a separate file if '-c' command line argument is set // Content:pdfPage.getTextContent(), Texts: pageParser.Texts, @@ -350,9 +352,9 @@ class PDFJSClass extends EventEmitter { if (!this.needRawText) return retVal; - _.each(this.rawTextContents, function(textContent, index) { + this.rawTextContents.forEach( (textContent, index) => { let prevText = null; - _.each(textContent.bidiTexts, function(textObj, idx) { + textContent.bidiTexts.forEach( (textObj, idx) => { if (prevText) { if (Math.abs(textObj.y - prevText.y) <= 9) { prevText.str += textObj.str; diff --git a/lib/pdfcanvas.js b/lib/pdfcanvas.js index de2c2ab8..39901ec7 100644 --- a/lib/pdfcanvas.js +++ b/lib/pdfcanvas.js @@ -1,5 +1,4 @@ const nodeUtil = require("util"), - _ = require('lodash'), PDFLine = require('./pdfline'), PDFFill = require('./pdffill'), PDFFont = require('./pdffont'); @@ -187,13 +186,13 @@ class CanvasRenderingContext2D_ { this.miterLimit = 1; this.globalAlpha = 1; - if (!_.has(canvasTarget, "HLines") || !_.isArray(canvasTarget.HLines)) + if (!("HLines" in canvasTarget) || !Array.isArray(canvasTarget.HLines)) canvasTarget.HLines = []; - if (!_.has(canvasTarget, "VLines") || !_.isArray(canvasTarget.VLines)) + if (!("VLines" in canvasTarget) || !Array.isArray(canvasTarget.VLines)) canvasTarget.VLines = []; - if (!_.has(canvasTarget, "Fills") || !_.isArray(canvasTarget.Fills)) + if (!("Fills" in canvasTarget) || !Array.isArray(canvasTarget.Fills)) canvasTarget.Fills = []; - if (!_.has(canvasTarget, "Texts") || !_.isArray(canvasTarget.Texts)) + if (!("Texts" in canvasTarget) || !Array.isArray(canvasTarget.Texts)) canvasTarget.Texts = []; this.canvas = canvasTarget; @@ -210,7 +209,7 @@ class CanvasRenderingContext2D_ { //private helper methods #drawPDFLine(p1, p2, lineWidth, color) { - let dashedLine = _.isArray(this.dashArray) && (this.dashArray.length > 1); + let dashedLine = Array.isArray(this.dashArray) && (this.dashArray.length > 1); let pL = new PDFLine(p1.x, p1.y, p2.x, p2.y, lineWidth, color, dashedLine); pL.processLine(this.canvas); } @@ -265,7 +264,7 @@ class CanvasRenderingContext2D_ { } setFont(fontObj) { - if ((!!this.currentFont) && _.isFunction(this.currentFont.clean)) { + if ((!!this.currentFont) && typeof(this.currentFont.clean) === "function") { this.currentFont.clean(); this.currentFont = null; } diff --git a/lib/pdffield.js b/lib/pdffield.js index 9ff0d0af..be56c76f 100644 --- a/lib/pdffield.js +++ b/lib/pdffield.js @@ -1,5 +1,4 @@ const nodeUtil = require("util"), - _ = require("lodash"), PDFUnit = require("./pdfunit"); const kFBANotOverridable = 0x00000400; // indicates the field is read only by the user @@ -121,11 +120,11 @@ class PDFField { anData.TM = field.alternativeID; } - return _.extend(anData, this.#getFieldPosition(field)); + return Object.assign(anData, this.#getFieldPosition(field)); } #addAlpha(field) { - let anData = _.extend({ + const anData = Object.assign({ style: 48, T: { Name: field.TName || "alpha", @@ -144,7 +143,7 @@ class PDFField { } #addCheckBox(box) { - let anData = _.extend({ + const anData = Object.assign({ style: 48, T: { Name: "box", @@ -156,7 +155,7 @@ class PDFField { } #addRadioButton(box) { - let anData = _.extend({ + const anData = Object.assign({ style: 48, T: { Name: "box", @@ -165,12 +164,12 @@ class PDFField { }, this.#getFieldBaseData(box)); anData.id.Id = box.value; - if (_.has(box, 'checked')) { + if ('checked' in box) { anData.checked = box.checked; } - const rdGroup = _.find(this.Boxsets, boxset => _.has(boxset, 'id') && _.has(boxset.id, 'Id') && (boxset.id.Id === box.fullName)); - if ((!!rdGroup) && (_.has(rdGroup, 'boxes'))) { + const rdGroup = this.Boxsets.filter(boxset => ('id' in boxset) && ('Id' in boxset.id) && (boxset.id.Id === box.fullName))[0]; + if ((!!rdGroup) && ('boxes' in rdGroup)) { rdGroup.boxes.push(anData); } else { @@ -179,7 +178,7 @@ class PDFField { } #addLinkButton(field) { - let anData = _.extend({ + const anData = Object.assign({ style: 48, T: { Name: "link" @@ -193,7 +192,7 @@ class PDFField { } #addSelect(field) { - let anData = _.extend({ + const anData = Object.assign({ style: 48, T: { Name: "alpha", @@ -203,7 +202,7 @@ class PDFField { anData.w -= 0.5; //adjust combobox width anData.PL = {V: [], D: []}; - _.each(field.value, (ele, idx) => { + field.value.forEach( (ele, idx) => { if (Array.isArray(ele)) { anData.PL.D.push(ele[0]); anData.PL.V.push(ele[1]); @@ -253,10 +252,10 @@ class PDFField { }; let retVal = []; - _.each(data.Pages, page => { - _.each(page.Boxsets, boxsets => { + data.Pages.forEach( page => { + page.Boxsets.forEach( boxsets => { if (boxsets.boxes.length > 1) { //radio button - _.each(boxsets.boxes, box => { + boxsets.boxes.forEach( box => { retVal.push({id: boxsets.id.Id, type: "radio", calc: isFieldReadOnly(box), value: box.id.Id}); }); } @@ -265,7 +264,7 @@ class PDFField { } }); - _.each(page.Fields, field => retVal.push(getFieldBase(field))); + page.Fields.forEach(field => retVal.push(getFieldBase(field))); }); return retVal; diff --git a/lib/pdffont.js b/lib/pdffont.js index 5b2168de..0a43a55e 100644 --- a/lib/pdffont.js +++ b/lib/pdffont.js @@ -1,5 +1,4 @@ const nodeUtil = require("util"), - _ = require("lodash"), PDFUnit = require("./pdfunit"), {kFontFaces, kFontStyles} = require("./pdfconst"); @@ -22,9 +21,9 @@ class PDFFont { let bold = false; let nameArray = this.typeName.split('+'); - if (_.isArray(nameArray) && nameArray.length > 1) { + if (Array.isArray(nameArray) && nameArray.length > 1) { subType = nameArray[1].split("-"); - if (_.isArray(subType) && subType.length > 1) { + if (Array.isArray(subType) && subType.length > 1) { let subName = subType[1].toLowerCase(); bold = _boldSubNames.indexOf(subName) >= 0; subType = subType[0]; @@ -264,33 +263,33 @@ class PDFFont { // when this.fontStyleId === -1, it means the text style doesn't match any entry in the dictionary // adding TS to better describe text style [fontFaceId, fontSize, 1/0 for bold, 1/0 for italic]; - let TS = [this.faceIdx, this.fontSize, this.bold?1:0, this.italic?1:0]; - - let clrId = PDFUnit.findColorIndex(color); + const TS = [this.faceIdx, this.fontSize, this.bold?1:0, this.italic?1:0]; + + const clrId = PDFUnit.findColorIndex(color); + const colorObj = (clrId > 0 && clrId < PDFUnit.colorCount()) ? {clr: clrId} : {oc: this.color}; + + let textRun = { + T: this.flash_encode(text), + S: this.fontStyleId, + TS: TS + }; + const rAngle = this.#textRotationAngle(matrix2D); + if (rAngle != 0) { + nodeUtil.p2jinfo(str + ": rotated " + rAngle + " degree."); + textRun = {...textRun, RA: rAngle}; + } let oneText = {x: PDFUnit.toFormX(p.x) - 0.25, y: PDFUnit.toFormY(p.y) - 0.75, w: PDFUnit.toFixedFloat(maxWidth), sw: this.spaceWidth, //font space width, use to merge adjacent text blocks - clr: clrId, A: "left", - R: [{ - T: this.flash_encode(text), - S: this.fontStyleId, - TS: TS - }] + R: [textRun] }; //MQZ.07/29/2013: when color is not in color dictionary, set the original color (oc) - if (clrId < 0) { - oneText = _.extend({oc: color}, oneText); - } + oneText = {...oneText, ...colorObj}; - let rAngle = this.#textRotationAngle(matrix2D); - if (rAngle != 0) { - nodeUtil.p2jinfo(str + ": rotated " + rAngle + " degree."); - _.extend(oneText.R[0], {RA: rAngle}); - } targetData.Texts.push(oneText); } diff --git a/package-lock.json b/package-lock.json index f1b8e81a..3c90a846 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,230 +1,8 @@ { "name": "pdf2json", - "version": "1.3.0", - "lockfileVersion": 2, + "version": "1.3.1", + "lockfileVersion": 1, "requires": true, - "packages": { - "": { - "name": "pdf2json", - "version": "1.3.0", - "bundleDependencies": [ - "async", - "lodash", - "@xmldom/xmldom", - "yargs" - ], - "license": "Apache-2.0", - "dependencies": { - "@xmldom/xmldom": "^0.7.5", - "async": "^3.2.1", - "lodash": "^4.17.21", - "yargs": "^17.2.1" - }, - "bin": { - "pdf2json": "bin/pdf2json" - }, - "devDependencies": {}, - "engines": { - "node": ">=14.18.0", - "npm": "~6.14.15" - } - }, - "node_modules/@xmldom/xmldom": { - "version": "0.7.5", - "resolved": "https://registry.npmjs.org/@xmldom/xmldom/-/xmldom-0.7.5.tgz", - "integrity": "sha512-V3BIhmY36fXZ1OtVcI9W+FxQqxVLsPKcNjWigIaa81dLC9IolJl5Mt4Cvhmr0flUnjSpTdrbMTSbXqYqV5dT6A==", - "inBundle": true, - "engines": { - "node": ">=10.0.0" - } - }, - "node_modules/ansi-regex": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "inBundle": true, - "engines": { - "node": ">=8" - } - }, - "node_modules/ansi-styles": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", - "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", - "inBundle": true, - "dependencies": { - "color-convert": "^2.0.1" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/async": { - "version": "3.2.1", - "resolved": "https://registry.npmjs.org/async/-/async-3.2.1.tgz", - "integrity": "sha512-XdD5lRO/87udXCMC9meWdYiR+Nq6ZjUfXidViUZGu2F1MO4T3XwZ1et0hb2++BgLfhyJwy44BGB/yx80ABx8hg==", - "inBundle": true - }, - "node_modules/cliui": { - "version": "7.0.4", - "resolved": "https://registry.npmjs.org/cliui/-/cliui-7.0.4.tgz", - "integrity": "sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==", - "inBundle": true, - "dependencies": { - "string-width": "^4.2.0", - "strip-ansi": "^6.0.0", - "wrap-ansi": "^7.0.0" - } - }, - "node_modules/color-convert": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", - "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", - "inBundle": true, - "dependencies": { - "color-name": "~1.1.4" - }, - "engines": { - "node": ">=7.0.0" - } - }, - "node_modules/color-name": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", - "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", - "inBundle": true - }, - "node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "inBundle": true - }, - "node_modules/escalade": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz", - "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==", - "inBundle": true, - "engines": { - "node": ">=6" - } - }, - "node_modules/get-caller-file": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", - "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", - "inBundle": true, - "engines": { - "node": "6.* || 8.* || >= 10.*" - } - }, - "node_modules/is-fullwidth-code-point": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", - "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", - "inBundle": true, - "engines": { - "node": ">=8" - } - }, - "node_modules/lodash": { - "version": "4.17.21", - "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", - "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==", - "inBundle": true - }, - "node_modules/require-directory": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", - "integrity": "sha1-jGStX9MNqxyXbiNE/+f3kqam30I=", - "inBundle": true, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/string-width": { - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", - "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "inBundle": true, - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "inBundle": true, - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/wrap-ansi": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", - "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", - "inBundle": true, - "dependencies": { - "ansi-styles": "^4.0.0", - "string-width": "^4.1.0", - "strip-ansi": "^6.0.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/wrap-ansi?sponsor=1" - } - }, - "node_modules/y18n": { - "version": "5.0.8", - "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", - "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", - "inBundle": true, - "engines": { - "node": ">=10" - } - }, - "node_modules/yargs": { - "version": "17.2.1", - "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.2.1.tgz", - "integrity": "sha512-XfR8du6ua4K6uLGm5S6fA+FIJom/MdJcFNVY8geLlp2v8GYbOXD4EB1tPNZsRn4vBzKGMgb5DRZMeWuFc2GO8Q==", - "inBundle": true, - "dependencies": { - "cliui": "^7.0.2", - "escalade": "^3.1.1", - "get-caller-file": "^2.0.5", - "require-directory": "^2.1.1", - "string-width": "^4.2.0", - "y18n": "^5.0.5", - "yargs-parser": "^20.2.2" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/yargs-parser": { - "version": "20.2.9", - "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.9.tgz", - "integrity": "sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==", - "inBundle": true, - "engines": { - "node": ">=10" - } - } - }, "dependencies": { "@xmldom/xmldom": { "version": "0.7.5", @@ -292,11 +70,6 @@ "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==" }, - "lodash": { - "version": "4.17.21", - "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", - "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==" - }, "require-directory": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", diff --git a/package.json b/package.json index c0085144..784f1a5e 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "pdf2json", - "version": "1.3.1", + "version": "2.0.0", "description": "PDF file parser that converts PDF binaries to text based JSON, powered by porting a fork of PDF.JS to Node.js", "keywords": [ "pdf", @@ -48,14 +48,12 @@ }, "dependencies": { "async": "^3.2.1", - "lodash": "^4.17.21", "@xmldom/xmldom": "^0.7.5", "yargs": "^17.2.1" }, "devDependencies": {}, "bundledDependencies": [ "async", - "lodash", "@xmldom/xmldom", "yargs" ], diff --git a/pdfparser.js b/pdfparser.js index 05bed84c..1d48f250 100644 --- a/pdfparser.js +++ b/pdfparser.js @@ -1,7 +1,6 @@ const fs = require("fs"), {EventEmitter} = require("events"), - nodeUtil = require("util"), - _ = require("lodash"), + nodeUtil = require("util"), async = require("async"), PDFJS = require("./lib/pdf"), {ParserStream} = require("./lib/parserstream"), @@ -9,6 +8,11 @@ const fs = require("fs"), class PDFParser extends EventEmitter { // inherit from event emitter + //public static + static get colorDict() {return kColors; } + static get fontFaceDict() { return kFontFaces; } + static get fontStyleDict() { return kFontStyles; } + //private static static #maxBinBufferCount = 10; static #binBuffer = {}; @@ -50,10 +54,6 @@ class PDFParser extends EventEmitter { // inherit from event emitter get data() { return this.#data; } get binBufferKey() { return this.#pdfFilePath + this.#pdfFileMTime; } - get colorDict() {return kColors}; - get fontFaceDict() { return kFontFaces; } - get fontStyleDict() { return kFontStyles; } - //private methods, needs to invoked by [funcName].call(this, ...) #onPDFJSParseDataReady(data) { if (!data) { //v1.1.2: data===null means end of parsed data @@ -86,12 +86,12 @@ class PDFParser extends EventEmitter { // inherit from event emitter } #processBinaryCache() { - if (_.has(PDFParser.#binBuffer, this.binBufferKey)) { + if (this.binBufferKey in PDFParser.#binBuffer) { this.#startParsingPDF(); return true; } - const allKeys = _.keys(PDFParser.#binBuffer); + const allKeys = Object.keys(PDFParser.#binBuffer); if (allKeys.length > PDFParser.#maxBinBufferCount) { const idx = this.id % PDFParser.#maxBinBufferCount; const key = allKeys[idx]; diff --git a/readme.md b/readme.md index bae722f4..0d4c2a1b 100644 --- a/readme.md +++ b/readme.md @@ -70,7 +70,7 @@ Or, call directly with buffer: }) ```` -Or, use more granular page level parsing events (v1.3.0) +Or, use more granular page level parsing events (v2.0.0) ````javascript pdfParser.on("readable", meta => console.log("PDF Metadata", meta) ); @@ -122,7 +122,7 @@ Alternatively, you can pipe input and output streams: (requires v1.1.4) inputStream.pipe(new PDFParser()).pipe(new StringifyStream()).pipe(outputStream); ```` -With v1.3.0, last line above changes to +With v2.0.0, last line above changes to ````javascript inputStream.pipe(this.pdfParser.createParserStream()).pipe(new StringifyStream()).pipe(outputStream); ```` @@ -153,7 +153,7 @@ See [p2jcmd.js](https://github.com/modesty/pdf2json/blob/master/lib/p2jcmd.js) f * pdfParser_dataError: will be raised when parsing failed * pdfParser_dataReady: when parsing succeeded -* alternative events: (v1.3.0) +* alternative events: (v2.0.0) * readable: first event dispatched after PDF file metadata is parsed and before processing any page * data: one parsed page succeeded, null means last page has been processed, signle end of data stream * error: exception or error occured @@ -163,7 +163,7 @@ See [p2jcmd.js](https://github.com/modesty/pdf2json/blob/master/lib/p2jcmd.js) f function loadPDF(pdfFilePath); ```` If failed, event "pdfParser_dataError" will be raised with error object: {"parserError": errObj}; -If success, event "pdfParser_dataReady" will be raised with output data object: {"formImage": parseOutput}, which can be saved as json file (in command line) or serialized to json when running in web service. __note__: "formImage" is removed from v1.3.0, see breaking changes for details. +If success, event "pdfParser_dataReady" will be raised with output data object: {"formImage": parseOutput}, which can be saved as json file (in command line) or serialized to json when running in web service. __note__: "formImage" is removed from v2.0.0, see breaking changes for details. * Get all textual content from "pdfParser_dataReady" event handler: ````javascript @@ -182,8 +182,8 @@ returns an array of field objects. Current parsed data has four main sub objects to describe the PDF document. * 'Transcoder': pdf2json version number -* 'Agency': the main text identifier for the PDF document. If Id.AgencyId present, it'll be same, otherwise it'll be set as document title; (_deprecated since v1.3.0, see notes below_) -* 'Id': the XML meta data that embedded in PDF document (_deprecated since v1.3.0, see notes below_) +* 'Agency': the main text identifier for the PDF document. If Id.AgencyId present, it'll be same, otherwise it'll be set as document title; (_deprecated since v2.0.0, see notes below_) +* 'Id': the XML meta data that embedded in PDF document (_deprecated since v2.0.0, see notes below_) * all forms attributes metadata are defined in "Custom" tab of "Document Properties" dialog in Acrobat Pro; * v0.1.22 added support for the following custom properties: * AgencyId: default "unknown"; @@ -191,7 +191,7 @@ Current parsed data has four main sub objects to describe the PDF document. * MC: default false; * Max: default -1; * Parent: parent name, default "unknown"; - * *_v1.3.0_*: 'Agency' and 'Id' are replaced with full metadata, example: for `./test/pdf/fd/form/F1040.pdf`, full metadata is: + * *_v2.0.0_*: 'Agency' and 'Id' are replaced with full metadata, example: for `./test/pdf/fd/form/F1040.pdf`, full metadata is: ````json Meta: { PDFFormatVersion: '1.7', @@ -228,7 +228,7 @@ Current parsed data has four main sub objects to describe the PDF document. Each page object within 'Pages' array describes page elements and attributes with 5 main fields: * 'Height': height of the page in page unit -* 'Width': width of the page in page unit, moved from root to page object in v1.3.0 +* 'Width': width of the page in page unit, moved from root to page object in v2.0.0 * 'HLines': horizontal line array, each line has 'x', 'y' in relative coordinates for positioning, and 'w' for width, plus 'l' for length. Both width and length are in page unit * 'Vline': vertical line array, each line has 'x', 'y' in relative coordinates for positioning, and 'w' for width, plus 'l' for length. Both width and length are in page unit; * v0.4.3 added Line color support. Default is 'black', other wise set in 'clr' if found in color dictionary, or 'oc' field if not found in dictionary; @@ -375,15 +375,15 @@ It does require the client of the payload to have the same dictionary definition [5, 12, 0, 0] //60 ]; ```` -v1.3.0: to access these dictionary programactically, do either +v2.0.0: to access these dictionary programactically, do either ````javascript const {kColors, kFontFaces, kFontStyles} = require("./lib/pdfconst"); ```` -or via getters of your instanace of PDFParser: +or via public static getters of PDFParser: ````javascript - console.dir(this.pdfParser.colorDict); - console.dir(this.pdfParser.fontFaceDict); - console.dir(this.pdfParser.fontStyleDict); + console.dir(PDFParser.colorDict); + console.dir(PDFParser.fontFaceDict); + console.dir(PDFParser.fontStyleDict); ```` ## Interactive Forms Elements @@ -871,22 +871,24 @@ In order to support this auto merging capability, text block objects have an add * v1.1.4 unified event data structure: **only when you handle these top level events, no change if you use commandline** * event "pdfParser_dataError": {"parserError": errObj} - * event "pdfParser_dataReady": {"formImage": parseOutput} __note__: "formImage" is removed from v1.3.0, see breaking changes for details. + * event "pdfParser_dataReady": {"formImage": parseOutput} __note__: "formImage" is removed from v2.0.0, see breaking changes for details. * v1.0.8 fixed [issue 27](https://github.com/modesty/pdf2json/issues/27), it converts x coordinate with the same ratio as y, which is 24 (96/4), rather than 8.7 (96/11), please adjust client renderer accordingly when position all elements' x coordinate. -* v1.3.0 output data field, `Agency` and `Id` are replaced with `Meta`, JSON of the PDF's full metadata. (See above for details). Each page object also added `Width` property besides `Height`. +* v2.0.0 output data field, `Agency` and `Id` are replaced with `Meta`, JSON of the PDF's full metadata. (See above for details). Each page object also added `Width` property besides `Height`. **Major Refactoring** -* v1.3.0 has the major refactoring since 2015. Primary updates including: +* v2.0.0 has the major refactoring since 2015. Primary updates including: * Full PDF metadata support (see page format and breaking changes for details) - * Simplify root properties, besides the addition of `Meta` as root property, unnecessary "formImage" is removed from v1.3.0, also `Width` is move from root to each page object under `Pages`. + * Simplify root properties, besides the addition of `Meta` as root property, unnecessary "formImage" is removed from v2.0.0, also `Width` is move from root to each page object under `Pages`. * Improved Stream support with test _`npm run parse-r`_, plus new events are added to PDF.js, including _`readable`_, _`data`_, _`end`_, _`error`_. These new Readable Stream like events can be optional replacement for customed events (_`pdfjs_parseDataReady`_, and _`pdfjs_parseDataError`_). It offers more granular data chunk flow control, like _`readable`_ with Meta, _`data`_ sequence for each PDF page result, instead of _`pdfjs_parseDataReady`_ combines all pages in one shot. See `./lib/parserstream.js` for more details + * Object with {clr:-1} (like HLines, VLines, Fills, etc.) is replaced with {oc: "#xxxxxx"}. If `clr` index value is valid, then `oc` (original color) field is removed. * Greater performance, near ~20% improvements with PDFs under _test_ directory * Better exception handling, fixes a few uncaught exception errors * More test coverage, 4 more test scripts added, see _package.json_ for details * Easier access to dictionaries, including color, font face and font style, see Dictionary reference section for details * Refactor to ES6 class for major entry modules + * Dependency is removed: lodash. * Upgrade to Node v14.18.0 LTSs ### Install on Ubuntu From ef0fe963ca88d6e4e52ab7424a85ef5f79e96b02 Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Sat, 30 Oct 2021 16:24:23 -0700 Subject: [PATCH 64/66] remove dependency: async --- bin/pdf2json | 4 +--- lib/p2jcmd.js | 53 +++++++++++++++++++++++------------------------ package-lock.json | 7 +------ package.json | 2 -- pdfparser.js | 45 +++++++++++++++++----------------------- readme.md | 2 +- 6 files changed, 48 insertions(+), 65 deletions(-) diff --git a/bin/pdf2json b/bin/pdf2json index 9ccf33bc..8e9b6868 100755 --- a/bin/pdf2json +++ b/bin/pdf2json @@ -1,6 +1,4 @@ #!/usr/bin/env node -'use strict'; - -var P2JCMD = require('../lib/p2jcmd'); +const P2JCMD = require('../lib/p2jcmd'); new P2JCMD().start(); diff --git a/lib/p2jcmd.js b/lib/p2jcmd.js index 1ea02ca6..f93b7a10 100644 --- a/lib/p2jcmd.js +++ b/lib/p2jcmd.js @@ -1,8 +1,6 @@ - const nodeUtil = require("util"), fs = require("fs"), path = require("path"), - async = require("async"), {ParserStream, StringifyStream} = require("./parserstream"), pkInfo = require("../package.json"), PDFParser = require("../pdfparser"); @@ -96,35 +94,36 @@ class PDFProcessor { this.pdfParser.getAllFieldsTypesStream().pipe(new StringifyStream()).pipe(outputStream); } - #processAdditionalStreams(outputTasks, callback) { - if (PROCESS_FIELDS_CONTENT) {//needs to generate fields.json file - outputTasks.push(cbFunc => this.#generateFieldsTypesStream(cbFunc)); - } - if (PROCESS_RAW_TEXT_CONTENT) {//needs to generate content.txt file - outputTasks.push(cbFunc => this.#generateRawTextContentStream(cbFunc)); - } - if (PROCESS_MERGE_BROKEN_TEXT_BLOCKS) {//needs to generate json file with merged broken text blocks - outputTasks.push(cbFunc => this.#generateMergedTextBlocksStream(cbFunc)); - } - - if (outputTasks.length > 0) { - async.series(outputTasks, (err, results) => {//additional streams process complete - if (err) { - this.curCLI.addStatusMsg(err, `[+]=> ${err}`); - } else { - results.forEach( r => this.curCLI.addStatusMsg(null, `[+]=> ${r}`)); - } - this.#continue(callback); - }); - } - else { - this.#continue(callback); - } + #processAdditionalStreams(callback) { + const outputTasks = []; + if (PROCESS_FIELDS_CONTENT) {//needs to generate fields.json file + outputTasks.push(cbFunc => this.#generateFieldsTypesStream(cbFunc)); + } + if (PROCESS_RAW_TEXT_CONTENT) {//needs to generate content.txt file + outputTasks.push(cbFunc => this.#generateRawTextContentStream(cbFunc)); + } + if (PROCESS_MERGE_BROKEN_TEXT_BLOCKS) {//needs to generate json file with merged broken text blocks + outputTasks.push(cbFunc => this.#generateMergedTextBlocksStream(cbFunc)); + } + + let taskId = 0; + function sequenceTask() { + if (taskId < outputTasks.length) { + outputTasks[taskId]((err, ret) => { + this.curCLI.addStatusMsg(err, `[+]=> ${ret}`); + taskId++; + sequenceTask.call(this); + }); + } + else + this.#continue(callback); + } + sequenceTask.call(this); } #onPrimarySuccess(callback) { this.curCLI.addResultCount(); - this.#processAdditionalStreams([], callback); + this.#processAdditionalStreams(callback); } #onPrimaryError(err, callback) { diff --git a/package-lock.json b/package-lock.json index 3c90a846..c7c19c56 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,6 @@ { "name": "pdf2json", - "version": "1.3.1", + "version": "2.0.0", "lockfileVersion": 1, "requires": true, "dependencies": { @@ -22,11 +22,6 @@ "color-convert": "^2.0.1" } }, - "async": { - "version": "3.2.1", - "resolved": "https://registry.npmjs.org/async/-/async-3.2.1.tgz", - "integrity": "sha512-XdD5lRO/87udXCMC9meWdYiR+Nq6ZjUfXidViUZGu2F1MO4T3XwZ1et0hb2++BgLfhyJwy44BGB/yx80ABx8hg==" - }, "cliui": { "version": "7.0.4", "resolved": "https://registry.npmjs.org/cliui/-/cliui-7.0.4.tgz", diff --git a/package.json b/package.json index 784f1a5e..8d9a4df0 100644 --- a/package.json +++ b/package.json @@ -47,13 +47,11 @@ "pdf2json": "./bin/pdf2json" }, "dependencies": { - "async": "^3.2.1", "@xmldom/xmldom": "^0.7.5", "yargs": "^17.2.1" }, "devDependencies": {}, "bundledDependencies": [ - "async", "@xmldom/xmldom", "yargs" ], diff --git a/pdfparser.js b/pdfparser.js index 1d48f250..340be777 100644 --- a/pdfparser.js +++ b/pdfparser.js @@ -1,7 +1,7 @@ const fs = require("fs"), + { readFile } = require("fs/promises"), {EventEmitter} = require("events"), nodeUtil = require("util"), - async = require("async"), PDFJS = require("./lib/pdf"), {ParserStream} = require("./lib/parserstream"), {kColors, kFontFaces, kFontStyles} = require("./lib/pdfconst"); @@ -21,7 +21,6 @@ class PDFParser extends EventEmitter { // inherit from event emitter #password = ""; #context = null; // service context object, only used in Web Service project; null in command line - #fq = null; //async queue for reading files #pdfFilePath = null; //current PDF file to load and parse, null means loading/parsing not started #pdfFileMTime = null; // last time the current pdf was modified, used to recognize changes and ignore cache @@ -37,9 +36,6 @@ class PDFParser extends EventEmitter { // inherit from event emitter // private // service context object, only used in Web Service project; null in command line this.#context = context; - this.#fq = async.queue( (task, callback) => { - fs.readFile(task.path, callback); - }, 1); this.#pdfFilePath = null; //current PDF file to load and parse, null means loading/parsing not started this.#pdfFileMTime = null; // last time the current pdf was modified, used to recognize changes and ignore cache @@ -104,37 +100,34 @@ class PDFParser extends EventEmitter { // inherit from event emitter return false; } - #processPDFContent(err, data) { - nodeUtil.p2jinfo("Load PDF file status:" + (!!err ? "Error!" : "Success!") ); - if (err) { - this.#data = null; - this.emit("pdfParser_dataError", err); - } - else { - PDFParser.#binBuffer[this.binBufferKey] = data; - this.#startParsingPDF(); - } - }; - //public APIs createParserStream() { return new ParserStream(this, {objectMode: true, bufferSize: 64 * 1024}); } - loadPDF(pdfFilePath, verbosity) { + async loadPDF(pdfFilePath, verbosity) { nodeUtil.verbosity(verbosity || 0); nodeUtil.p2jinfo("about to load PDF file " + pdfFilePath); this.#pdfFilePath = pdfFilePath; - this.#pdfFileMTime = fs.statSync(pdfFilePath).mtimeMs; - if (this.#processFieldInfoXML) { - this.#PDFJS.tryLoadFieldInfoXML(pdfFilePath); - } - - if (this.#processBinaryCache()) - return; - this.#fq.push({path: pdfFilePath}, this.#processPDFContent.bind(this)); + try { + this.#pdfFileMTime = fs.statSync(pdfFilePath).mtimeMs; + if (this.#processFieldInfoXML) { + this.#PDFJS.tryLoadFieldInfoXML(pdfFilePath); + } + + if (this.#processBinaryCache()) + return; + + PDFParser.#binBuffer[this.binBufferKey] = await readFile(pdfFilePath); + nodeUtil.p2jinfo(`Load OK: ${pdfFilePath}`); + this.#startParsingPDF(); + } + catch(err) { + nodeUtil.p2jerror(`Load Failed: ${pdfFilePath} - ${err}`); + this.emit("pdfParser_dataError", err); + } } // Introduce a way to directly process buffers without the need to write it to a temporary file diff --git a/readme.md b/readme.md index 0d4c2a1b..1077062b 100644 --- a/readme.md +++ b/readme.md @@ -888,7 +888,7 @@ In order to support this auto merging capability, text block objects have an add * More test coverage, 4 more test scripts added, see _package.json_ for details * Easier access to dictionaries, including color, font face and font style, see Dictionary reference section for details * Refactor to ES6 class for major entry modules - * Dependency is removed: lodash. + * Dependencies removed: lodash, async * Upgrade to Node v14.18.0 LTSs ### Install on Ubuntu From 1271d2a62fe6f18631f64cf1e4d9745d5cd18685 Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Fri, 12 Nov 2021 17:48:50 -0800 Subject: [PATCH 65/66] remove dependency: yargs --- lib/p2jcmd.js | 31 ++++------- lib/p2jcmdarg.js | 136 ++++++++++++++++++++++++++++++++++++++++++++++ package-lock.json | 113 -------------------------------------- package.json | 24 ++++---- readme.md | 2 +- test/p2j.one.sh | 2 +- 6 files changed, 160 insertions(+), 148 deletions(-) create mode 100644 lib/p2jcmdarg.js diff --git a/lib/p2jcmd.js b/lib/p2jcmd.js index f93b7a10..1e32a33f 100644 --- a/lib/p2jcmd.js +++ b/lib/p2jcmd.js @@ -7,26 +7,17 @@ const nodeUtil = require("util"), const _PRO_TIMER = `${pkInfo.name}@${pkInfo.version} [${pkInfo.homepage}]`; -const yargs = require('yargs') - .usage("\n" + _PRO_TIMER + "\n\nUsage: $0 -f|--file [-o|output_dir]") - .alias('v', 'version') - .describe('v', 'Display version.\n') - .alias('h', 'help') - .describe('h', 'Display brief help information.\n') - .alias('f', 'file') - .describe('f', '(required) Full path of input PDF file or a directory to scan for all PDF files. When specifying a PDF file name, it must end with .PDF, otherwise it would be treated as a input directory.\n') - .alias('o', 'output_dir') - .describe('o', '(optional) Full path of output directory, must already exist. Current JSON file in the output folder will be replaced when file name is same.\n') - .alias('s', 'silent') - .describe('s', '(optional) when specified, will only log errors, otherwise verbose.\n') - .alias('t', 'fieldTypes') - .describe('t', '(optional) when specified, will generate .fields.json that includes fields ids and types.\n') - .alias('c', 'content') - .describe('c', '(optional) when specified, will generate .content.txt that includes text content from PDF.\n') - .alias('m', 'merge') - .describe('m', '(optional) when specified, will generate .merged.json that includes auto-merged broken text blocks from PDF (Experimental).\n') - .alias('r', 'stream') - .describe('r', '(optional) when specified, will process and parse with buffer/object transform stream rather than file system (Experimental).\n'); +const yargs = require('./p2jcmdarg') + .usage(`\n${_PRO_TIMER}\n\nUsage: ${pkInfo.name} -f|--file [-o|output_dir]`) + .alias('v', 'version', 'Display version.') + .alias('h', 'help', 'Display brief help information.') + .alias('f', 'file', '(required) Full path of input PDF file or a directory to scan for all PDF files.\n\t\t When specifying a PDF file name, it must end with .PDF, otherwise it would be treated as a input directory.') + .alias('o', 'output', '(optional) Full path of output directory, must already exist.\n\t\t Current JSON file in the output folder will be replaced when file name is same.') + .alias('s', 'silent', '(optional) when specified, will only log errors, otherwise verbose.') + .alias('t', 'fieldTypes', '(optional) when specified, will generate .fields.json that includes fields ids and types.') + .alias('c', 'content', '(optional) when specified, will generate .content.txt that includes text content from PDF.') + .alias('m', 'merge', '(optional) when specified, will generate .merged.json that includes auto-merged broken text blocks from PDF.') + .alias('r', 'stream', '(optional) when specified, will process and parse with buffer/object transform stream rather than file system.'); const argv = yargs.argv; const ONLY_SHOW_VERSION = ('v' in argv); diff --git a/lib/p2jcmdarg.js b/lib/p2jcmdarg.js new file mode 100644 index 00000000..c7cc0084 --- /dev/null +++ b/lib/p2jcmdarg.js @@ -0,0 +1,136 @@ +class CLIArgParser { + args = []; + #aliases = {}; + + #usage = ""; + #argv = null; + + // constructor + constructor(args) { + if (Array.isArray(args)) + this.args = args; + } + + usage(usageMsg) { + this.#usage = usageMsg + '\n\nOptions:\n'; + return this; + } + + alias(key, name, description) { + this.#aliases[key] = {name, description}; + return this; + } + + showHelp() { + let helpMsg = this.#usage; + for (const [key, value] of Object.entries(this.#aliases)) { + helpMsg += `-${key},--${value.name}\t ${value.description}\n`; + } + console.log(helpMsg); + } + + get argv() { + return this.#argv ? this.#argv : this.#parseArgv(); + } + + static isNumber (x) { + if (typeof x === 'number') + return true; + if (/^0x[0-9a-f]+$/i.test(x)) + return true; + return /^[-+]?(?:\d+(?:\.\d*)?|\.\d+)(e[-+]?\d+)?$/.test(x); + } + + #setArg(key, val, argv) { + const value = CLIArgParser.isNumber(val) ? Number(val) : val; + this.#setKey(argv, key.split('.'), value); + + const aliasKey = (key in this.#aliases) ? [this.#aliases[key].name] : []; + if (aliasKey.length < 1) { + for (const [akey, avalue] of Object.entries(this.#aliases)) { + if (key === avalue.name) { + aliasKey.push(akey); + break; + } + } + } + aliasKey.forEach(x => this.#setKey(argv, x.split('.'), value)); + } + + #setKey(obj, keys, value) { + let o = obj; + for (let i = 0; i < keys.length-1; i++) { + let key = keys[i]; + if (key === '__proto__') return; + if (o[key] === undefined) o[key] = {}; + if (o[key] === Object.prototype || o[key] === Number.prototype + || o[key] === String.prototype) o[key] = {}; + if (o[key] === Array.prototype) o[key] = []; + o = o[key]; + } + + let key = keys[keys.length - 1]; + if (key === '__proto__') return; + if (o === Object.prototype || o === Number.prototype + || o === String.prototype) o = {}; + if (o === Array.prototype) o = []; + if (o[key] === undefined) { + o[key] = value; + } + else if (Array.isArray(o[key])) { + o[key].push(value); + } + else { + o[key] = [ o[key], value ]; + } + } + + #parseArgv() { + let aliases=this.#aliases, args = this.args; + let argv = {}; + + for (let i = 0; i < args.length; i++) { + let arg = args[i]; + + if (/^--.+/.test(arg)) { + let key = arg.match(/^--(.+)/)[1]; + let next = args[i + 1]; + if (next !== undefined && !/^-/.test(next)) { + this.#setArg(key, next, argv); + i++; + } + else if (/^(true|false)$/.test(next)) { + this.#setArg(key, next === 'true', argv); + i++; + } + else { + this.#setArg(key, true, argv); + } + } + else if (/^-[^-]+/.test(arg)) { + let key = arg.slice(-1)[0]; + if (key !== '-') { + if (args[i+1] && !/^(-|--)[^-]/.test(args[i+1])) { + this.#setArg(key, args[i+1], argv); + i++; + } + else if (args[i+1] && /^(true|false)$/.test(args[i+1])) { + this.#setArg(key, args[i+1] === 'true', argv); + i++; + } + else { + this.#setArg(key, true, argv); + } + } + } + else { + console.warn("Unknow CLI options:", arg); + } + } + + this.#argv = argv; + return argv; + } +} + +module.exports = new CLIArgParser(process.argv.slice(2)); \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index c7c19c56..0024257a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,119 +8,6 @@ "version": "0.7.5", "resolved": "https://registry.npmjs.org/@xmldom/xmldom/-/xmldom-0.7.5.tgz", "integrity": "sha512-V3BIhmY36fXZ1OtVcI9W+FxQqxVLsPKcNjWigIaa81dLC9IolJl5Mt4Cvhmr0flUnjSpTdrbMTSbXqYqV5dT6A==" - }, - "ansi-regex": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==" - }, - "ansi-styles": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", - "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", - "requires": { - "color-convert": "^2.0.1" - } - }, - "cliui": { - "version": "7.0.4", - "resolved": "https://registry.npmjs.org/cliui/-/cliui-7.0.4.tgz", - "integrity": "sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==", - "requires": { - "string-width": "^4.2.0", - "strip-ansi": "^6.0.0", - "wrap-ansi": "^7.0.0" - } - }, - "color-convert": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", - "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", - "requires": { - "color-name": "~1.1.4" - } - }, - "color-name": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", - "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==" - }, - "emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==" - }, - "escalade": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz", - "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==" - }, - "get-caller-file": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", - "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==" - }, - "is-fullwidth-code-point": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", - "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==" - }, - "require-directory": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", - "integrity": "sha1-jGStX9MNqxyXbiNE/+f3kqam30I=" - }, - "string-width": { - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", - "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "requires": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - } - }, - "strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "requires": { - "ansi-regex": "^5.0.1" - } - }, - "wrap-ansi": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", - "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", - "requires": { - "ansi-styles": "^4.0.0", - "string-width": "^4.1.0", - "strip-ansi": "^6.0.0" - } - }, - "y18n": { - "version": "5.0.8", - "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", - "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==" - }, - "yargs": { - "version": "17.2.1", - "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.2.1.tgz", - "integrity": "sha512-XfR8du6ua4K6uLGm5S6fA+FIJom/MdJcFNVY8geLlp2v8GYbOXD4EB1tPNZsRn4vBzKGMgb5DRZMeWuFc2GO8Q==", - "requires": { - "cliui": "^7.0.2", - "escalade": "^3.1.1", - "get-caller-file": "^2.0.5", - "require-directory": "^2.1.1", - "string-width": "^4.2.0", - "y18n": "^5.0.5", - "yargs-parser": "^20.2.2" - } - }, - "yargs-parser": { - "version": "20.2.9", - "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.9.tgz", - "integrity": "sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==" } } } diff --git a/package.json b/package.json index 8d9a4df0..48da5924 100644 --- a/package.json +++ b/package.json @@ -29,15 +29,15 @@ "scripts": { "test": "cd ./test && sh p2j.forms.sh", "test-misc": "cd ./test && sh p2j.one.sh misc . \"Expected: 5 success, 2 exception with stack trace\" ", - "parse": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form", - "parse-s": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s", - "parse-t": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t", - "parse-c": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c", - "parse-m": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c -m", - "parse-r": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form -o ./test/target/fd/form -t -c -m -r", - "parse-242": "node --trace-deprecation pdf2json.js -f ./test/pdf/misc/i242_testingWithTable.pdf -o ./test/target/misc", - "parse-e": "node --trace-deprecation pdf2json.js -f ./test/pdf/misc/i43_encrypted.pdf -o ./test/target/misc", - "parse-e2": "node --trace-deprecation pdf2json.js -f ./test/pdf/misc/i243_problem_file_anon.pdf -o ./test/target/misc" + "parse": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form", + "parse-s": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s", + "parse-t": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t", + "parse-c": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c", + "parse-m": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c -m", + "parse-r": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form -o ./test/target/fd/form -t -c -m -r", + "parse-242": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i242_testingWithTable.pdf -o ./test/target/misc", + "parse-e": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i43_encrypted.pdf -o ./test/target/misc", + "parse-e2": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i243_problem_file_anon.pdf -o ./test/target/misc" }, "engines": { "node": ">=14.18.0", @@ -47,13 +47,11 @@ "pdf2json": "./bin/pdf2json" }, "dependencies": { - "@xmldom/xmldom": "^0.7.5", - "yargs": "^17.2.1" + "@xmldom/xmldom": "^0.7.5" }, "devDependencies": {}, "bundledDependencies": [ - "@xmldom/xmldom", - "yargs" + "@xmldom/xmldom" ], "maintainers": [ { diff --git a/readme.md b/readme.md index 1077062b..e330c677 100644 --- a/readme.md +++ b/readme.md @@ -888,7 +888,7 @@ In order to support this auto merging capability, text block objects have an add * More test coverage, 4 more test scripts added, see _package.json_ for details * Easier access to dictionaries, including color, font face and font style, see Dictionary reference section for details * Refactor to ES6 class for major entry modules - * Dependencies removed: lodash, async + * Dependencies removed: lodash, async and yargs * Upgrade to Node v14.18.0 LTSs ### Install on Ubuntu diff --git a/test/p2j.one.sh b/test/p2j.one.sh index 7d1b759f..85e92e5c 100755 --- a/test/p2j.one.sh +++ b/test/p2j.one.sh @@ -16,7 +16,7 @@ echo "-----------------------------------------------------" echo "Update $AGENCY_NAME PDF" echo "-----------------------------------------------------" mkdir -p $OUT_DIR_BASE/$AGENCY_NAME/$FORM_BASE -node --trace-deprecation $PDF2JSON -f $IN_DIR_BASE/$AGENCY_NAME/$FORM_BASE -o $OUT_DIR_BASE/$AGENCY_NAME/$FORM_BASE -s -t -c -m +node --trace-deprecation --trace-warnings $PDF2JSON -f $IN_DIR_BASE/$AGENCY_NAME/$FORM_BASE -o $OUT_DIR_BASE/$AGENCY_NAME/$FORM_BASE -s -t -c -m # diff -rq $OUT_DIR_BASE$AGENCY_NAME/$FORM_BASE/ $DATA_DIR_BASE$AGENCY_NAME/$FORM_BASE/ echo "-----------------------------------------------------" From 17ef9da1d616d1668a9365e419023f10ed5978b3 Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Fri, 19 Nov 2021 17:55:29 -0800 Subject: [PATCH 66/66] catch exceptions for StartRenderPage event --- base/core/core.js | 18 +++++++++++------- package.json | 3 ++- readme.md | 5 ++++- test/pdf/misc/i200_test.pdf | Bin 0 -> 62176 bytes 4 files changed, 17 insertions(+), 9 deletions(-) create mode 100644 test/pdf/misc/i200_test.pdf diff --git a/base/core/core.js b/base/core/core.js index 8ea87394..c25797da 100755 --- a/base/core/core.js +++ b/base/core/core.js @@ -170,13 +170,17 @@ var Page = (function PageClosure() { var opList = new OperatorList(handler, self.pageIndex); - - handler.send('StartRenderPage', { - transparency: partialEvaluator.hasBlendModes(self.resources), - pageIndex: self.pageIndex - }); - partialEvaluator.getOperatorList(contentStream, self.resources, opList); - pageListPromise.resolve(opList); + try { + handler.send('StartRenderPage', { + transparency: partialEvaluator.hasBlendModes(self.resources), + pageIndex: self.pageIndex + }); + partialEvaluator.getOperatorList(contentStream, self.resources, opList); + pageListPromise.resolve(opList); + } + catch(e) { + pageListPromise.reject(e); + } }); var annotationsPromise = pdfManager.ensure(this, 'annotations'); diff --git a/package.json b/package.json index 48da5924..c0f7c380 100644 --- a/package.json +++ b/package.json @@ -37,7 +37,8 @@ "parse-r": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form -o ./test/target/fd/form -t -c -m -r", "parse-242": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i242_testingWithTable.pdf -o ./test/target/misc", "parse-e": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i43_encrypted.pdf -o ./test/target/misc", - "parse-e2": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i243_problem_file_anon.pdf -o ./test/target/misc" + "parse-e2": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i243_problem_file_anon.pdf -o ./test/target/misc", + "parse-e3": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i200_test.pdf -o ./test/target/misc" }, "engines": { "node": ">=14.18.0", diff --git a/readme.md b/readme.md index e330c677..d65e776c 100644 --- a/readme.md +++ b/readme.md @@ -31,7 +31,10 @@ After install, run command line: > npm run test-misc -It'll scan and parse 7 PDF files under *_./test/pdf/misc_*, also runs with *_-s -t -c -m_* command line options, generates primary output JSON, additional text content JSON, form fields JSON and merged text JSON file for 5 PDF fields, while catches exceptions with stack trace, one for _unsupported encryption algorithm_, another one for _Invalid XRef stream header_. +It'll scan and parse all PDF files under *_./test/pdf/misc_*, also runs with *_-s -t -c -m_* command line options, generates primary output JSON, additional text content JSON, form fields JSON and merged text JSON file for 5 PDF fields, while catches exceptions with stack trace for: + * _bad XRef entry_ for `pdf/misc/i200_test.pdf` + * _unsupported encryption algorithm_ for `pdf/misc/i43_encrypted.pdf` + * _Invalid XRef stream header_ for `pdf/misc/i243_problem_file_anon.pdf` ### Test Streams After install, run command line: diff --git a/test/pdf/misc/i200_test.pdf b/test/pdf/misc/i200_test.pdf new file mode 100644 index 0000000000000000000000000000000000000000..bcf40e4728d1aff269ac1beb1e43e31be2fbe3a5 GIT binary patch literal 62176 zcmb5V2UHW?);|n_B1jQXL~4*GHIM+I2c)BdRO!8!1PBm%R8&Bu3yO4+BE5GAi1glj zmEL>k`Qk0_d++nSYyH0A|OGC2v8j$2m&eqgoMB#0iYTH1k?tI2#N}e0F?oT z05OON03;-6EGtDSL}$lozR$X~|;3xI|HDiY?y~s#}aHjuv{wmr#xm&vd6`jo8t>vvP;a1i_6@Z|qDDasz%+}owASf&b)B*?! z;6_KdyIGq%labiXT9}!cS(=$GSesd!39OpW+sfbtq|)*b+#zTqey=1>O1jEG`vRZ! z>HHJET>7zX!mPTF;h*a~U%h7G%l}4jgN}B5a4>C)Jkpa5K+!@;4pVcgHyZAn;Ee{wVby3jC+La0=jp zOHlyur*`2wDuQq>`X>bj{KWr*7e3ZTY98iU2_9pO}gu z0Q3*p(zxT_mG~cn{>xBL%n{a#IB`IEeQiw*RURdr`t}~qKzVC~CC;wqF!w)C5r&|D z7|$4}0K-L(JP|)EQ^~PTnA?VC&U1M3i-bc{%4f`(BRKx z{;Gk3wu-*$(|>Egr-Jj4z2)B&`BOarn*To)`47+kMUg)${!5YnBT4?19zvp`|C<FL!V?7Uy~B8u#DPO8PbV8c-ebvNi_$w%dVYaNn0W68zI^*5xamdW6#s{N-__?4 zS7$5Hqdg4|gM_atp3QU=c39J1w9I9-4W2D+6*gB1Mjy#lMtkkTw(C?j`?fDL>d)NA z)NQe&M*&7b$7d&{((Lu?3!78) z*E8aUyT~&tCh)%b?Mdo4os7raW=Ft0z`55EZ!@pO-Pp#}*f!YK*rwUks8sgYFK_g9 z#7KQp+reEjzpZdQ<}PaCBbM8Kstp`_T;8{SUJDq^Bj#rbrNcemUZwpgf=W83^+~*z zrt7(0BcIoD5c2l|JV}YZ>&G@#tbc=9ar%rF*W^ z*%=Xv%}UwM3%|3?7Gg^EpfggWe$o_p`2ci#TU2^?!a` zjCqA0mM-@3dhVrV@(Nqt2XqfJw+U(R|5aiB#L@pdS_FjtD_UZ@U_S-!Dqzk&@UDL& z5%GTY_R#>ICXIh~8S~xf*NWU~RzKgM3;heH6!_Q&qojU|orwO}aCScD@-CJ0jpUL( z&Mb9fgjK|zK3<)bS8X$%Q)9cHdJ0xlIiCIq>`{n*4q;d>-ud2a`QQ5aL-cXuzcZc@z&-V1)PAISjweaiRXGh6a!cIqld>7Ddnbi_L2 z{%cF(BKhO3iR=b_Gmpo7A)U(4FmQ1)jW(@|5xqR!j0@WuLOTj?MLj%{CnS25Ac~eZ zkp!Ag@NZAV1y4}xG94urHFbyY5x#g!@`AD=n7GSRnUQL%_~ZR-v3sKF^IA@~kIbD( zr0wxe?}_0{b73@&79Ku6T{67k|2S(tCWvrPMNVw5<3^*>;obXwB`@&(G?TO6sCR<< z_c}fk&Qf#@>`e@44+Y(~JAX^KH}Q^eZ=mib88f5Je7gSY6X`o+5)F<|d{3|Ud#I~9 z%x8SaRjIk|wcpCsUTU9vR)55CxY{I(@r3oRpwZ0rSAxkXOyAlJ?)kom&bF!!f0#zV zXKtjwuY}KqoaEU4zW;G( zbL@xDi^`Hst%&_6^0{*`6rDfnj;>&~0|X06g?}G%xDNP4%a@c(bIo+6Ns_tR3A+=w zT-SHUlf*M>EmY36XwZ|($86_iuUG6_F`OM~7rm<}?VIJD!73p8dteqOsWpFIa+zg9 zOyux(J`9^P5F#hux#(9QCw|wnq~wh1O;U&;J&fh#*G*~R9b#JEUsdT|B2nHps{6_d z>9B*hL|Kc~UL5>qi1%16SO3q-VYS7@+8ksdGCvU6F2r{jiw=W|PQI^1WF3X)tDj6& zpq{Ha-t{;MPb_Tl;VNjf8rA6UeAe1I%$D3q>t?dxbAu~fzxPh0Z|mGS11F;~9hH=2 z;loqG_mjG8FFr}qnfreWJ5WBniyHIt z35*r4_X-DUK)?>H9?9Mxz`?@n4mo>f=67A5@B2L}xivdA0Xuw_iRnDy?9YqHrnjhX zi<7W|`?7g;YADy5t2b(E$&1i6*@S4bKk63E;ez=v?A9~-T}|2a)_%+`kE@bvEo!`> z-I^MGyK)dOMui{}Kq|5Ei7s9z#IG}ig!0ZkqIQLM0ZO%{x@o#jQR$7o@9e@)YXBkU zvET8~H>p~?7%i3DR3vh>(lgSSF&)xJfQN{tGYoYh4!KmuTe&oD}!N zq=9F?^1BDt<|8xNNk8*Ksw2L{^5jPgzUT_|o?+8Pv!AM_z#e<2m^7!1RTYe$ zy_ffkqcE^gb>!5c#&RueJ=HE8Q&6toSgHD|Q1|=2j{C0AyN*2VuxO*^A3)b3_VrTw z(;RsNnY+|A_}F0lH@LX$A0#m-Iq`B;dwul{)hp>_kc$2 zY@QN%uPhkPPc`W)Z$0p1Vq%9I;<#MmnQ-W}~amD=hP!7a-_jOR;lQf2X+i^#GMc z{S*xetGv?;!>y=-k3cp3jhB8%Kh0c+lny=ZvTiFD#@lfZe!j>k%JU|4u)Iw)GXXl| zM$Sr)O?M_9gx}l^OK@!Sd`53sm3J#2jG^9Y%Q8v|%I?*9Il$LC;A@=&IA_;?yD{+6 zT4*hAoT(CHldxZ6bi1{ywfd!Pfaz;* zL(4+08*r9}R^eAM82j1$U*mM{9$(W$-$&oO;~ZE}VITtCE%mI&o?ee>7@as`FTNvv zOQM&>3Kz?reffb(%P@2XM%r_l_<3z=df!~e8fyFUy(N!Fg4S7_z!)mr)}UE3OubS- zo$>?!?sXxQj|uq{z4_*?(|eu`@A_Jb%Za+FI%wtZU05F<({S$v3L$k(iE-5Eq8-RN zO}@6o>Pp&_POo1a_{aGte4aX8T!WlkoS>bITCgv}hkP)nyGiA+{JqPrx<2A#Def1&^uqP02BwyV^>LQ%4<{+B!p5A*P>= zJ#9BpkFAPwNM@t4oI4*0tz34rK ziyahV`Sy|=z0he>K^bmRzN^d>VvwoQhU!u$-YMUdhhk0ZSPFzkA-1Is)An`m^s1@V z8C3ZA#9Xp;{Wxi56ih)8Uzc;GIt%UDz@W-b1|4SfYRyHI?2SeCio_Gbq@cKSyHyYyj#-80=P8^VM-svuZk4 z`&i?R|EVl zEuji9Te!_t0A}q%6acw&1KKzv5gu|VTcm>kP*MX>v3p>KkN^p?&H~jI z5SPt%t_VSi@^Yva7Uu4K>I}R-J9idDyQ4;6j+?vg&Z6Led4WfVyY6SMaI_kH*B|)O z*~Z-)VG2Te13X14)5?WCxyz+Q0OAlyAuvSzQVIh7Q3{4ELiUOP!j&Rc<^Cdo<4Q=X z2<{gX8U%8wm#W|ZLmB`g&{NI>h$yE7M0%|OC1wsrUKE2MK95VF&} zRywGo7qh@)Y+{vEuY}EQQpt z;A{b|!HAlbREpvB6~*~iQ~)<$krJpV2Tr~{4if@bRtte37il;io*ompkm+L zP+prjJ_#R)C~FByk_*?5tpXy!asUy{2BAbB<8;N1CnZz@@dn}IM-YS&OLaqv13WQ4 zhn^5ImlCKrE|#Ute39Z(IQz1K&|axvh$KrXvP#Go=X(fR;t}p@?gpu2K|<>Q;!vEe zP~zDVm7;dIv2!X>l8;5OI6Way0I2fx(K${(aOLG=+;jE{+!8K=@xtjW$>LV&XI25Z zbP2i@Q7$V%@cXG5SG)PZki)EieQez8PQ(AG6=pfFZV* zVTW)V`yE@L`=$+S)y)}ZX?^JH3e*KI!)>81)(7?|D>%Z%-f|fT$L&I_VXwVB0tykp zWuV8d2MP{c0otQs#)UfmN6tWFxFdYa8MYeGkdK5}!+jj2K&6k09>Z;-r%2mf zn0qQpN)+g5j!-@J#%!MmKwJZ$y4Jpq&ag`{J`e~3RDzk?BFsuXp}M#ozSh=VKf`>K zr|>ta!xCX$kTg`3RnW^9Vhrqsqx{wEotJ?s&Q2#Dp7x02LKSz{EoWcZU0Ww>R}k>! z@iJ-@P+J8Y@pZF?tD#_?@*W6BjJN9%+zqJr+<)En)Pq|Xfe4T$L&B}#mcA68C4gdH ztObk=+5lJF4)8$w02N`l?W102Tey`oP|X%)>)}{*fOLgQBm8`pE#YKp=gG$UeXw6? z?lTw0ytbw(rWasuH)RPA&6ih86}b($<+-arWQ35{{#Q`X5kAfZ+=8GqRpC%^aMbs3 z;7wNG;83#@4QJz#0C#=a!zRR|!_mRq1rrEH2+p(tVbIy*!$$=x< z+v2{7Oj2{-^06uszLiWbT*`LU0$=({3bn*kll4Fa6Z?UG6#enE*s~by?l`v*jlZg<2CbTtQ+0u+Az{kV~Vg!vNX{+ z%=_JZbG7fIFqR*SMO>QXIqn9>_f(gSW4cXS{6B?bbMp5&@;3AXSTf&5-h3Wz3)@Bb zxmh2=ftY|J?4rX=1y~68>u%M7QPK~n^#p$IiUwN%%>y*h!h> zBv8xW!rlM$3TOjA*@ll?c&tLl`)qcskw;MJ!vKV}EV#YgZdtjphxO< zOaF=GTu#V_C#1H|6nWIV-*z_l3$n^-J0S(UMq%M}>t4>st7AU3mx!I}ResD#?QG2% zV4Cv$0vTF+#rd>1Y6P*E{0aP!0*bZq9?QQT5WJ8|ho98uW=NGi$Q{C9m`^UIui-Q6 zSz6v>gbAiqRUqcfc!!g|EK8qduavZ7$DxMdLxruwwum|~#^b#_yXJRTUMo0OKhrT|kT|FX? zFy8Q9!3JMrDC!tnr?9=L+BM>}HgiWRZTs_B|849+1>+uahUVG;1F!Un0IFlC)l>Q{ zEG?~}XpBXQzJua|0Xc@xU9H@rzw-$tbAWdzgK! zawzvCEW|&cP~!tF+yQe^KGT8W>FM+Eor9aGuhi&)O?(g6``b!JSH;j+p!KFdW3C^D z#?ge=Y36FS0&_jhs@R)7NebC$)k$ODVBS36?fZPp-z(uQr8CuRZWIbeCHBlfmcK<* zcB*cg%{;Ad*!Ez?hNI+cS2fW5)js(N7dqKne zl)?<{;tQMKNc6jjG3!1XtCgV+T_ghIQKmlqwa1@^wby$(*-BJ?o&OQV)VN%I(>D_A zb!AVDe(`0(_Q~beN<%R$>0-a)qH=KP*%f$(k8cwyR)#1s+TiG-@8jq0PS*2D&`G&* ziiQ3daO~^&IaaIX(TCYCVRn>;OHoYEohG~--I+T<;kN@BReDsNcb5>J2y63gGF#lo zI~zx5kfeo6HWqgA+)Y9REI@|a%!OMxMu3n2uVT2RPneW6u+`hi!`Th42zT{dR&@T2 zva&8TvPRin+KCC4lu0$9pRdB;?snyiAj#Ac7E{~?nFqq(!}&RhZ^|k*q|W4}$I6t$ zTG4BJ+k(Cwtn?$6^)s` zFx7Eacp0!!*P{7Zs&Y%Bq)!dS%r~cNvN?0OY1DG9e+bhlK$e|hn5B-MT3^pzRi~ss zMqyreWnE);r|0IHbtc`E^08Zk*Z#iP3oZ0j;4TV^-Sr!aaa=j`t-@~ZtQw+cB+h;p z2K+_}wfOs>miL@}QPrq(bz6S*Htf)_RIM7*1hyP=aSdiis^(cjhL@~RUurik3oyoT+JC6S57F>@he{Gk-pEQL(S&rX6J z*j{jHkiy%7gTB5llJ4{9R;=|H3L|Yeht@!yr^H?@mNuNC_IG!#yHLohbu8>^dmjbA z9KNVO#$H~bbUaa*Wo*qw+rb | z#o9dg?01LwkBOEsh{Mg5zNCV)KC)q&OG)O3(wM>N%j(Y#0RiUtyPAa`?-gR0j@m6p z&AK;8`O7qc^!-FNdZGx$n$Z1K6LCZ<>egZ8Zr{-GF&Tjy0f(Bn8FlmO`Ve9Wc*;*9WBAVvvK>iQy8a8K{(x6ARW zOr^&QvQzP!wrVJmj{Iv+tM99(FZqrJlv4B9Da0P3Vb&o|!Rd%^z|3!N?OOg3A47h-AkuI(*3A z-8aE9As@E)zL=9%Cq6sWzVo*Bt>dP5XB}w=I{0Q(?Hk?A&{Xan62ZQ`OYhDo3_4hN z_=SEX{Z|z^gj`^_(Ar}i>jeeg=0(4t^;$GK6F`Bw`kOZSN^k<7Q7 zon?B8<$Jn(KdJc2@x*S6-SocqZ7KlYC2(kbON!86o+a0gl$z`vy|xVt8O=A4ifpl6 zbatvKARLciY^PUyWd#SCLYQ`!-r9%9Eu(t~H%M9v-{D4RXA1>TZ{K(I&&P zz`eAGS_LLQ=&p-j=RIl5%dsi%Sr;_%ZlwiN*Y?$4OL>wyYA4SkC}r;lvA-Lag0&Ei z*ty6?N2;~owgOv0mk5_wf?*QF2PQ_B{BI}Jj3uH*;(sUys@@1}QMDl()09;=Un7Pn zE!Y|uc>#JNVePb5OeKV3#NLDlgi8-^e^T;)c)-{m0l<<|UTuB$_$Kz@^?cXR2h*&D zX!|NFI{x_gPc#2X1|+@5p=0)^9(4aj>R$S6t68Dd$-R%c5U)9`^j*L!h z zmb`DO?Va&FJ+vKrm?&FYoWbWfIt5mN2kdW$CiHG?yzrhh9AbL`s0hHX@O*LOiipb5 zhUNKZ6E<4(qs4?94O}IRzUyB0b7oWMMM$}e&wHUoaP9@Cj z$>Ubqr3x57^8?m@KPDRAR*1~_m_{h@>4m^2y!TXV{P?8oum0TQy_d<4RAM2Nvr6g} z7D$h$otZA;)%yPW(c>y*%J7pAq5H)qcT3eul?!cn^j}%ITlFIqWtDEd?`bT0NGn3n zY-2V~N}7N#4w6y2wbqWu_|Oma&uc0xfxFc`_q-WmStZYSFD*wy4J!w5zKY6boR*Mw zGWoz_C*N0%{3x25C(h!6Ohe^N^_#N?3#&U!s${U~8sT!$rQT<4ZoUxDxDzO7+xbvW ztK9SFype+SiSgh=nBFGtLTe7$RdDQ>8PYr%U|ar1A-cFIy0!DmU|r&Ajl-FB3Xf zg~gY;KNv#aeb^xxk|*Ym8yKL8(0Eb%MldK>o6DEu!6xGfv0DCbArTZvzzfb|bAXBH zA&DYBQSsOvPDEEHRI=IZ&T2#LLjdx{gwEZ!Q^@pm=#su#e|0XF+1jo33>Pu%FiL22 znp0?K)*=`-*zX_<&3`RK`(5AL#XQ#LowEEV20lgOyeV~Lw8v*;$s8HG6o<|gQH>p< zQyV2aW|BJh)!`7aky4*yAvHlL^=tNrpaDZ%uEEQ3d#eeL*MOg-v9pUMCjNI2=oWjy z?3*p(3>}audz%roLDR29+Da<*!=I|NlJSK0D)aos7NxWgeYZXoQv?3PuL?GjWK! zsej`Zoa&R#0heCuv#@^klq^N_&w|QW_Qe~&sCZkU4mvCQ33=XoygbEu79&)G8b6wX zJqOL;0^cHpPQP!x)f|2FV^Jx`?YpK@j*a1x!{C3e!*?~G``6x5FQ= z6?}O1NMmiQ#J-zXpH9byP7vWB+`W-Syu4V*Hrn~rUH*mkub}Cp-iDy8oxJUbx4kWh zZQO>O@f+TC%$9kJQ>Ac2?j-C?oeDd;74d0yoaq@nKBn#2Y0;v<%=u(>!C>|kTYN9= zW2e64KFdtFBU@iTT9|gtODOpO=aC{`tFo=lmKD2+ zA<&MOCO6KP!*FjyyJy9Y?L5@?_UV21n3}kE8|hI~cmvv@r(tSbi%1JLHM4PIQi<2* zoB~^f?~`O+QQvmqruDqhtVY?KV@);7L%YLfx$V?g#LyqYQ%T3_5^S1P5qvBgra!Ux!0gwvoxv4&OLgB4m#-KS&L_;q&+(%}ps>W$urO25Sxk`r)A| z0*~-7rC-ev?OosnFiW-*Q5-Y{`t1S!o6Pd+$CYNLrNC=HO(7-RM`!mN2@G!jWN50< zcXk+VuZ-c(=_sRV$xkCgDQ6XfZ?MFiWChy7b@C45w!P_<;d2dY3Gb5?{4g{)Yu~=@fdSu$R*JaL z(WqtZ@3_>id%(h4``VAxy%a{`+2-dzy&c$qg64DVn$j%1%FP?B0v6HQLy z@!SXR7&N>Z=X>g^DP-9-F!r5{`%S^EH}VJbZjeFZ4prH(ZtRkJI2prNn#s(H%=D2a z)i3-@iW1Q_(&;K{y}b`=)wk9uFfgMUTQ+pP7USBs9(MFZAa^v!qeYt9IRia7RLGWk z?WDdoWZnd?S&y5R7nCU(Nv?&>L1z|Wnd%;7tX5A!4dwd@M64>p&A#Nhke0^z{rbbMpU2J@sC;VsBXOEU%!l7qp7mkPOe%rsKQlL`p49o$i!-L-iJWkYi z#SfM-eH74xSd7#G`~ghrD4ux+)85BF?bCG#jsE@E-7>;~!^`*fL|6zRGhY!y-BMTdux*q&eb^oZ27|pPm($%urVgT8o^uDxq6P z_G9AnIAX%dU4c{1pV3be-+JAGN~g$2Y?=ULkCu z)z{ziulYj9=v|4q6JB4b1Q*kliifh!=b%td38{{5<3dK`cbV5cJ~3_V(WoEW-XdIG zMuUYV2!BL3N#;NDyB-SF_R3&lzGb?;T>qu0>PCTBz=TomLUi5gacjAkCKwzHCjGYA zU!o0&;Q2&JB(%79B3dU!4v%P_`i&zWvyyiP2Z9z;KmIpa@S+vBKzw3xW*4Gq&zNEU z#d^>iDf86VlnvxvUBEvu58;rlW1F*v<f&d15$@e3xlnXuu^=Ca?Jp+{BqIE&Qm*+{2IOw#wDd5ej| z0E4<(3E>L#pcwafX}QNyoz~i1k+uo8H-TqUdjA{|PMzXwmc%Y1ZRA>53x68!jl}dP zp;2YI0<}DqUT(jzhnLqghjKl?;lBH8mO{0cRh3KV<)jpY;FTM#KTn7{I_Gq;LKk0r z@El`aYkxg_8(=uaKr8b>o^}Vm!MguNu<0@iP4{dVU0D3(W8zD&)1jU5>5={ii?*s= zO<7GHL}A`JtYkw1rJ}ldSylg9Z& zN$Unp@IxZ+O4QPLZPo?3TBTQQ#q{C8)sfN_@#iR>XNhN_5i|9UBJl})`yCA-UwRz| z1=CK_Lzma~dVcd6Qp_GMJ>{fs>7@;03z@t)uN=SX4tAbJ0hln zdaMF9`FL1hp|K3tX4%1BN>N{^@!K~%{lA51ARg%{m(RM1qA=pUT&~Gu^#+=p*j^#;oT>t zfxa|lK8ZVRj=(lGG!*x?48^<)tc6gp`Rrc*+$_|hsJmu7e|!Ooxz~qvXvfCG@!`TQ z78FO4sBpBOq=)KQyvZvxp8mG6!XWj45kq}Bx3m_82yDJ-Tt`y zW!5*H^<@qJkkhs|G5SsZx-KdIP3ly^aqIU^=?yYn`=02rZ0(o9#xvjEg()z-kPP~D zMwZ-c_k?$9$a}4l;`=CkGCItLGbpFyV77SW^4BO$siJd{UiTD?Pb_i5z!8CJy>)%} zGB!ggiv!LSPFQO*>2vbYdd(Nou-l$0Hp)t>VCZ+gG=BZMBX9)JT;xByv1TVKKANz% z+NqHcNM`HV?NQdd!^OerP|yE-EYr|z|2Lba?2))E_SoBjN?KSUdz-4@uNpb5#p(OXn zr!G}dlh)yXv=W`c-@E?vyxrFIKj#;-hYMc+bLtbfjqS*@E@&CODHif5W5w#r{xDcb z*bJxqG4VUOVqXfQb^zWSYZ0O=7ujZ-t;+o}BObMx2z9$`_sgoUMlFncd2VjKGPXQ8 zZ1qZ=HYIlNEBBRn5I*beo)#k=Ydl?30EC^i&KQ{@e9}#ICZ~U^lR7!@^^J*3T&$3X z2eq}n|G-kpIr%+2BZ17Jp#>g%uj}=tS9Pg&0+i&fnmHM7X-d;#nPrc^x3{TJd6NR2 zk>gS>zt}Tz_4OsA5gvX7@nVeMhd9t7KULE`Z%9wf$0$B9Z=1>Xp5T+K-@4LUW0e!e z`=6$$(@YV@R~%Qa@YWvEdW=T07tT9S;gxucO}kv+i1ozpH`J_tL7I ze8xK@uZRsY%x`KK8!qHx5=<&JEb!TLu)m>o>DMi4djXX;s`jYwk$zQ*tpPWRp5s*z zH;UkobKp%8nkC?!P&WqL7+1%;AU1>IZ*ji5A}mV5fg~q5bK}^@)$?qYc<68ODBYdu z)E_Qz#60Sl#sB<#*vIf=F^$J^W8A)S075w9q+)gPDnnX&{@L*_R=IRAU(Jh`ur22q zmZXVs!{I{Ace#pfBJQDwq43kCNB#xn&4n+85sJA(pK8v7SVL!+lzar#(TUrn594i& z2cv3g3PI^(8%FviiknUEP}w2`%2G$Z89nJ#zs~4woa?Pj&EvNOqCmvOo&oV&g?pd0 zCK>O3vMUMvlAqpGbP)I2Zaf@vIBhWBV`v5!Z0tssj%Z+cN`I9I$piaeV2aIzP73|*3&t~P4Q)e&yK(h(u>UuHgE@x4Kim7IQ8 zBZO008xV{ZDj#s6h?q!%mP^WCs`_d`? zO2l^&zN8^0X6mOD0n5rK%gpVouC(<}WcK-BF~{?wT4AyNsfCD# zB~!3TTDbAc4>mA^@lSvU_m|%e)g-bC)@5lvXs65^si;I}@aH9aJM?;oZP=9OcUXS7 zO0CK2^sRx@xgG7&F42cW2Q)op>u(YEh$Q+UgZp@+SC5r@Ws}L+`qraX7upXUJ0}2Q zndx_jD8FX#M_PZhSO9V4mOUv(ar_3YA)e(OGbj9ye5oJ# zaQLsfh$0JMi;2A+b3j<>m1o_kJVo(Sv1msB@mT4twdU}@H5TD-CAT|o@-pUYYyO(3 z^7BakYP>J}y6PQZS?x-1=h@;`M?s|-yArdN=c;VC? z=lID_dG=}WSX_QZ)tdThmS<(|$mCC4QpAWbdoa1sLP5m4 z@q%~bl>%c?HUW)Cqb9lDFn*}GnFbDzHanv$*&3!uZ?nO=5Jv?ts!tX6QHKWD5QlV3(<5ZjDAPr*j9lGE>IU>cx zq>N}_Va24Ajg%a84t!ULP)^qJ%0v+kLi7ZM8usy53@}e0NFc*TM#8%BrwcvxXv&umsrk5aa2?eCfz-rII%352%XR4pScUC3J7?6Q z*_P!e*Ob^XRq5d84?$S(3C}CB@{n77orE8jD<>by=(t?DF6iaQXR0O|PH$ zj<~rjC-2(Zn@O);QwzAb2JECR}U?M3Nsz@j}9?J-E7@ zA7svE@KzdIL{F2hzyBz^!tyjSCQ=EYE|dH^G05*O*T#(SejYYe;V33S z4|BuY?RPr8IqE{s{0rZ+LoS_J11*Kd!QPX#4%k^ID^KO=K<$1wL>6$%37}-CBFus}=b~I;gY|9VR`+-QQ#Mg$ zPTfN((+3TP+y#qqug@6=XbPoEuM6g)qvIShl1rl3dg#hrW91$De+fA{kynbHWT|fa z4DRz{SZv@K&3*v<(6^{8p(^`KwUjq0%9H;oMuX}(;iuRi``SvhjM>YGTMr%|$lb)( z5MKJoDb}=N0COH1sQZxU;ldqZS7QD3^NEF%0tBo9eaf80+WW=#7w!79v6;Zl!-|^t zEL7r;Thz!4(ajMkiPF!C6#M5ixoUSw$rE%i6q1`2oP5Q(;R5CucGuNoZ*F;o`+l1p zkBPWVL7 zHhhFW%wfek+4pi~_j>uI^kFhNeEJvamFGYXPu4@YY2l2Nqy~b$U0IU0Bj_$lZRk?L zfyyAZ0#DgEcJ1xf9ct4fz8LDWK@TVW$raY!hd+-a{iIZrY4rBTMmX)iU!gzLVUbZ^ z4JODEe;U&v1^4-%^WIk1;7R^=t9=Bonr`$Z!IHx(BeKZc1uh4#4|K`j-e+b1H~Hu5 zjr_?yVMzr({yU!}2e{6ng32UiOYRFlVOm^93%ccy6jCn~fZkQ%o49v;LU5KxSA(HN4hH%Sh^?ZK+%MEDuTR+emM_US z?9n{*MCGml2`jI%PR`rKV?II@AKF22_F#p5{=qLL4Jvf!Z`tp(-dDe6;2^59=dpQ6dk|lV<+q4~oG&9PTm5ZMF_E@wPX^>K%M#sNOrrXy3!o zjfS?dk3{>&$v2lqS)p1`A>(iAza=V1Iy|N<%hQ*N?pO$mh%&L|bGtpDzW(d2Ag?Kx zFz3P2*ELsnxijLhvh3uazwF$3j~~zkHUBo76&6)Oxj5DJTh@sS39+t4+w?vpwiMvgHsp z)(sN7<2QrQ%KhNTO6DBBsCqyQq5KtX+X)Z}j<^z*t6{N>^lqnGVq2nYr;fD!MSc6$ zt8u)$^mrBoGEOgK>O48wTJ1}tn`1$U+7_&4B!By3t2;}0OKR=Skr+8?{4<87`|Xu1 zg--(1ZqSER<6g#XV34D=xROJi*5$;5eX$N(Hs!J$;eyz#=UbX;Z4<(t($=iueyPp$_9P_KjEWh)4Yo(8rec~xmt zv+-+$5&EfBRdO8Z*e=z>>q5_v_P`{b1o`lh=a2S_FVNI|o#^7)H|nVPi4L>4sSSrDC&-|FiH`f3W%#sF%QJ_U>tm>&*AkM= zA8-0aFYrgJzPS{7C0Ag)zC0QG|FQShQFSzX*B}xsxNCqwfZ%#?3GVLh?(Xg$+#z^y zclY4IgS)#s%ps55=e~3Atar_te`c+(IbHIr>guX$FLv#HRSQd*0Xd=XI6+wwjt@Qy z49IVt9kLpL+9Bm2(HC(Co>M4hb<16oM{swG5$Ika3;n}cPtevRoGENn@x?GJM5K#_Vw!J_|Jg1Y?INo7RgY(1CP8x0 z$1ZQc6D)@IVL9yesQzqwkxP0 zDXtn?)(-nQi9;7lb~gN(e1s*h9Q#w@=tSV?{!%l|K--roM{W)VS~TX{feJ7AlJAnrhvp>rqYNy`RTKLdYj#5i_UU>XfG<7dNBeZcVQO<8yX?Ff%C)c5`VPL*U5B9!S#v&l6kGt%Y3r$fdO&}=ME!)cW zWx?Vg4-ysb$mbs_br4Za8G>L)i2hnsDVjNfBY8ljZ}xuSutTt8uF%oF4N{y+eT>t{ zDy3v43zsU7esJ`bMPl->M4|aZlUTQ07D93lZ^i7Sfzlj2MuQ}oB<>}*;wwsxI+stS z?hR4s^H$t9r6L+D2!6Gy6}4Ohr<{_QCOaJtMjYeM-(PwVUzMH- zUbe%Z4NmbkYr;3vo}@zxEHBu;G>b}?tdmBx(;v}x&ahn^vP_^GMTsqh^SR5K(7rzw z%x?Q!iy3&6^HU3LA)MmPK7s4~yc0P`3e4*ji1VDBrkyRs3yqyykl8GQ@l zJbx47`96VYcD3pL^j8pQeGe#K4h3#X9*82ZN67nf=oi6P{{`X8NdPCDE6*0{*KuAg zUk}_1imlzeA4IYnZ~qxc_A8K^dwGb&GH=8=aHU)q4Kehlw|PqRKVIZ{jG}Jg_S!$` zAuvy>ZrfOF5SlB?qUbA-ZgZ}^Y*H63xRZ=svGh2AI(?uU%~)Mbi;-*g$ZD`3<|0sc zI~ztskhCB+{dT1hD2C{DR`$Zz3vWTlb95=x0U?xmH6crSk4a>*k4aY>89{8csi zvE&%8?_pTNr9B+^jQ#-vEE}h|mzZ=YMc00JsPTSx&KLKnlvVxsIs07juKS~T>pMX~ zx|4Jg2gwGsnK91b)O&{GX{x<=UYGT&A@#vUTsc~p-w?w~b&HPsd+w-0>>TZ--~^Fe zp|=`#^30mA<6DdW9Ch;XjM8Ma<~ohFqXKX}scpd&1Q zU^Qc&7QrvG6W^H)bZ~fVeb_AaSembK-br+}*kaUgiB6~5zCBrylVo#CVn)QcT0haS zE@e zm(y=zqqkkWarfAn+Bm$;Ec>%g06MyVvG4rm^7)hO;=l3#@QVt_yV?Vbeq*x&QVL1i z85r6Dd4E3t#rs1}D`#kI3S?k%{mcvGcQYgh(g4}mSQuKqbqH{0v;xY63_xPIf3FbX z@7|O9n=%Nvo3{*aRR{9Ny^#pLMQN4Q>D8G5OoWU;PA3K+mltrZFd=}BQIka-0HiZw z1hS7Y0hz@BK&CS$AoJSq9Lx-a00tmG6Oc)a8OX23O2_=0tBLV#O8`9~Baod80NjR& zne9KW3=DtTflOaaK=LF2Bd{zw)<4Pz9wOjWU0`$mwJjqnuzX;if0p-_{#GYuCZNv> ztRJwh001!0TbaN*GXh!c-c|r3Ga>VvofSC266n9Rl!=8^6W9dckM19wc?^HilF&0T zvC;j--vr#v|AMT9m5mTc?QJD0FiLO~UnUkM)QN#f*3 z$P2cH;0J;kgHk60D;l~WC|YkInus}=Xz3dBfl{iqw~E^NA#cjoeP5Krj>A%XLxO#b zHGJ}1VAsHw;W&LNDvmWSZpygXyqOa%ajCtHSTA(i12VSdfr|pko z0UrbQ21UDL&dU8c5VG887kwGEKL$7+H6JI^F=l(FAe|(!?q+bb-s~&vx zB8%V{6A^EruE#~hJrU{pb{~Lv!s?Xlt#KB-`|GRitoLFm0c=4priv9_1Of>O6eO!6om|U!{?tM4HPz@4dgZ; z&Yvs6Uh@dwKMC<_v_rf<8~BmP)LQqPV7asMD#=;>Y0R61sBMg=^LcceK-*JtcWbVb zh}|bsJY*?clYLQJejh2y@bkMvtt(LC!N3NsA zPotJ(f)Cgyt0I{k{+rH&cnil%Y2n^$t$8AfBj@4Dji<3WW|UC`WEeAxE^T752Q25$ zuM$=|m*LuLE5wvdkD~aip4tf{!oRpTWR|P$?5|p6uemOWj??@F8^<>jqVhe@ZJl6K z^c$$USt_&=Pd@jY1lMW%CTu@W$%9sQ_@`jG49Hfeq!UHU&SG>@-#T^|c?qwn?n0a5 z^9Vbb7ij4wk4t6gV@PKZUw68yge}$soTKPQ*Ux^wJg6FR;8-w984$5j68NR*H|9$3AWkEsElYR~WCnwnbP6Bc}w z6@5<66hdVA%(X;#?SoLh!GExGex^!OWn^|dJs;<|I`8{XON#=7nDN}7T2!<8{gG&D z>Mq{MG|#2Yyh9zk219?=z@~G!O(F-;f!_0>b}3A7%xm+?bk%ezc#?bID`FOwjMQ4c zDU^oC9-h5jK zc^)|E4@s9=MT2WcOUUx}M1{l!DmGAKEy?CJ1kPjG$O@C1J+1at^GQRDPWoR;JxO&C zU%ggBJr*7n)Y@x=_x_Jr2F#K+<93+W?$8GUP9-F(3jm=P_{1ysZ2Q$oQbn5 z|JMd#>)2QT9M61o~0JrvIXEz=U@YdO^^|j;H0Z^2TOcv>2MT~|!jer|e+OOT zffWJ2Ad+|{1Z~@-Hor!IjPAVz`2=bW{yo|m+!;|!2SFr#E`=P2)$qvta=neGK z0Sc!tk+*N{$2BK)OHE9=t{=i>mJ&=DGj_ZMZ9m5`IMh`7d;4;zEvoAT6D)LN)B8!x zh_}58ELi9l^myj9psS^VLuVHJEsChBOGk}rq9B`}64VRAw&C4XX`1Z5X;udD$+pl= z$^O6r_snEhE;7N`VM<6i+KnrTZ5nN{E|=zFpTk!7v^%u6j9JoYk-WySzt(o)X0G)% zu#oOJy*OOa=q*RAE#xRfILbP&Y&GiAiAbqdmG8|fE{14VMk;%_T395D6-F3cz;Fu8 zZ5_@0mR(eR$NV_H6t2^e)e1YtJNYHk&O55M8>I|5KWIhyOFJYK4IyZ}kFE|e9n=ye zCp0GnCt@q42QHFP*JiZs8KqAk3gpj<HOfgApXH5zoy%F zaYX99Y=}RM0XH_t-wxz*r4joNx*D%pPuc|lLVH=R1|V>Gg4=ktK3`A^k9R*iR(%&b zrYoxDjfz!8V8|IUtOBP_VcW?T$>%C=ejN!I-iTD8ejZ8ws%87My_u@geq?`BzA|H8 zX?J+CQrApg?N!P}Uu`XORIb23FV^3c;)m-@KvgdtHKU)ME{&xA{Oq4wLTaC(4!PN^ z<6P1|5N$D}zF~g4%6xuRC|$cqgfIVH=Ht=EHUzOVbi~~c=8Ifc6KO#uF&09#PZ$D~ zgnkgnR3SRN?_h~kkh=V!|6#KDomfB`wwPbEm>*j2?S`*OCla=&$gH-LZ|W*whWv}s zGW%pm2HwOL#Q4j~tmIH9bS2C$*d+wccbxF8Agu@THYP@c?v zMV=fhUvEJCYy4jklukZO#$lCjo=Xt)^v%=G?u8>^6 z#%NPZm=MFJ^@(q{e16|!ds>n>bnvmE-0NX|)3W~i91-i1o3cuXtTnEdN7{X0bBKnm z5Atrd9r^}xoqdvlBRWyMh2tQL%N^wE89aV_mp={u>c%=Qr-y&A_1SMr&s!qm>Kf1p z;G7Z1yqbh)bKs@b{3(w4L7Iy4aWTI~6Y2El zz^H=LM8!(uQp}r6TByf{bp|%RUvK;_Ah3fuYZK0cRu^c0cQ_nWaYKfmpm0&okD zitcV=h|4AEqD@`aYQRQrptMwauIWg>WD)e-PW!9O*7?*yhizC~=dk#BICpKz*nCRek(n62mmt>O5A74~Uhd+a}L05$(YMCi+CyOper{dpW_@~PgnKUI0)#z3S z%1c!pK?pM(OuhGYQSa3rdN4DeIxOCg;m<1GX^7Zaiuuf`1lORHrk~JCtAQtlCY*Ns z%n^NCi-;+|Bu0P2GB8PR+=>+?*?Sy#U|_p<=K#DJ?l2ilew1!yJ2~He&>IwU@82PJ za0sS+?4LTZ`3hN%UkRk$fZHb-*cC77@cw(VaDV)^YQ5XlC(wZ)*J6Zg{WYPH7(56DnQ;|VvJys(W(tF5oZS(u0MJs z(A+oPig^&4FD{wmHLWiZdF}it8S_opvid*~R?wXpd*=4`U9PQJ{?*1Jo^R0M){1K~ zUmyuhTTHA)K1!*AUxL`=DP1LUGPo5u{<}=DOeihzcKFXq6kTE@IJERV^Jjv{5Y z&XP1OV=iH4x)_;?&iI9`aq{G5qqnzrSa-@!e`I?H&J>2J+d^ewOy+t4o=UvV3^KsM zk2N9(=zHYfzHXATgF2n>5Rsv}M2 z<%|-RnOD4VSOl&2~yAa7I@^J9@JBNpZTO=SUr)fF zDL7c$U?bl?#>GMhK9SW-#$X+Q^Ly%iM})if&E}??h6;MO1V1zj?2O}-t@nu81o?{5g{uZehMMu@zMf>e`AWYXMS;T1pXZi+we7i`DbE28&B!E(Z!FkMsC=YUvnnPT>MwA8Z{Qq3_*kcIBekTuizo1H$DwrRSAj>9<9OC8sC0QUXkSpdn+HLJg z8MFp_2dwbU6@w2x8dIzY*c$}*j53a_)*j7s8W~6G+^NMbwLClRSkA_uMRPx|rI(l! zMT0C)&#YrN>kAKeS)#LXaC^2UpjcEkUCn+)s&TPsJZ0M&(3XFdp5$dLA#$MtmHgoa z-T29sJv@t;Gs@KsKU3H8Oa}sS77|hE&Hs-MpL-+pPnTiZ;YydQ4D;S4Jcl@cE4!bP z2+f1MENpwEP|I82(*9ut{7#RxO8x1~@nBVWI2jVp^H~+_T4q$(UVT)~>ani=yd5FB zWBv#skl(6i%wX=kDtRt_zA{JtOnBtPhV_Vfky=lCj1_0-A)<{dQ-9ohck*_e8!b*_ zu#@b#(m`JQ;)?RsW`9HZB${HS=j% z{Sc?Ml5p?N_jgKnwZRso(WClRMMP4AA7kx~oc+v={msd^cizoO*&P2DC-iXM8K-3J zy(Wb6mZFUU#lj?|AoT@PigJOd8>DO?dk%`I>AFw36uF~quo1bFS^Pdj6V-wlPcLoD zRD+cfMJc`6X#P52e=-l6c1`0vF;E-2z_+J{(tprRxUz=m2CiaEs3gIP)_Nxw!=Eid z^1W^XBT}Rwr_qYR#C{HP19^>Y>+s&0?Roc(yON+%o*yMeN~WA$&(_e|*@{{o4kumR zVWO6~+4aQv@~Z<=ofBOgzDCxdmFcVX>z*=w&YLhF796Kaq#%TeOk z9lQ1d^Es<*mKXHN4?Hc3f`-{HM@74k#uB@m6rbDs?hWNKPs=$R zSbR?4(tmjfi;=mMAs^iymCd3~4P}l#{^iZz-fS&6gtc$M+?dxb;qXlSXdcb&!Kcwmjl$(ltc>cg+iA_j-e9oaJXhz2q2X8>?@Ib znhI9h0n7?$G`eQKr!-gEIQueSUO-@CLlpRUcG9oOcQST{gOp0Q!#<(i!JmPCo0fQW zA*(ey5K_35+oJ81-~LEVy>U5Gr)Mfbk!eQv!81>2=vn7%uOyGhit2u1vzHJfaaPqCx;v(AyySMNwzyN`B~7NoWb%Punecu4iQyxK zznwz^gS^B)UEV22e%VC+Oa}%b^xODRp$!u>)z*1(Tr~FBU+*E0^O#eW*sy^~e^E5| zg2L2rRHyY~D3xXdJD*Od@HK!*a%#*)F22u6lO(3P&%=Xa3kA1tH9Es=-k-f;Fm(j| zsR>XQMcPu)#JGlwhiy#Cf)}ySlSAI@wkg5PE^8iza7=Kne0cq}u*cj%?MkhS$goq0 zogDX>I}QsPLLg53Dp4QoQtvbczTV)MEj2%d1cACsLkTgHm!9}`Vv3&I6n{+#&+E@J zSM^?F7!GAlI}hVMe)-!_V{NZ(O6e7xHp7mtrBBD?sHSuMf(yxH+Z0u0vLpo-^PP!h zU5W5-{(o{Je_Gt<#NQDqtELj$GeK`B3g#1XA;&vgdC^GjVpJ5WMu+JOA$iKEHg|gPlrYsqZ{mM)M%{I7Y`4KI@$2W;MW9!A=3u zaW^zQ;e5ODu(0f3JeNGoV>E(fwR;m6%?s(pPmI7w#*e^s?Qk z!Q5{(j{1Jzh4Yxsc;JkSIpS=W;W*m8%XE$N7|_5|a<85= zU`C*Rb3Qt_2y9M|+y|LJ@Ne>m5E(RRD5EKk{2%;QjcS`L zgenHqEf{9R4R1TMBO$X)pL!kCzk7>o+YH0AoT|UBABU6$M}Jts`PEL}v>(%yNPQU8 zi`d5FzIPXs5i^?hP6FMoOj*+W6C&O2?jW24)29?u$l`^L_=!{D!}5HJMa?XegyVAQ z`XM2A(d;CJ+$1zNp#NVTgi)NhxV+zYpq zjd*$0OXL_N&*@9i!ZNApRhQ6ARR1#fo&z`*c3ea&s5X|mQd920y!e$X^nKfM($*X? zf%fQT;%w5=K>FhpI``3X|?I(30>V&2?&e) zUfE2o?CWr>WlzCFJ~;lNa{vJE-J!6!P>tk(ZF=hdJV(pN543lYREur)J;egw0A{Fo zG{I|?kj;~BO+>8rPgl=CY9vAJIS1|1c0J3(oyUc()U_p!RX67L>cvIC3}$(Bh4r$B zcga)#S&A_}PZcrY!-#kvUy%}lsF6ia%fnL_ABi4flbO1^=Z!=ez;?5$C3@r`H)oz` z2HyHhUK>1iEh6Jx$LQ3LVuPQYPo2?;pX;6yr3(&dOqBwR`htuEO@RJ?b^7u{mTg6~ zI)CB-hfQ_KY$B1_4kjre6g4zemzk)`tVol>aN`p58Tu!S)uJ5DioS*X{QhuP94{6F z-_pn3Y-+!x1MR4wFZtXTA$XAthQtRL0g-XmBQDJVM>;jdZKhqXDFhT4NMG=C4QZTtZChF7h++V zDY!3^@_w!l0{(I)dqK8eDJa#6m|-RH=*u)>qc7(yWr{{54CY@xTywelBUTKl-F!4Q z7oYs>(u3`t)53PdP{IUfyO$f9taWTb(Fe%oGh7U%x7M}P7GpWDbscY9nX9kgpmd1m zA0^IE1rj-{0B73v+HG>&j2d3d-JF$k+fJ!hc#tZe6Fhb+&sB{Fy3KOlcTR9x=!E5= zHfAJ*cWjjFNXihdzIe-(+>1nRJZwa2g#4T!6P(^MX1yc0Vv{a*!iTaVmpc4f`;XuK z;n%-@Hpm|syhq?_DqF(KtJJYlU9?KRa6YhFH94fcgI?fRn@6Jp}EOhR=p=8&Sbv)${blS=Ey@}r0&bS{UNtN)fc8AM#@O_Lb zZT~^iq;V8%7#{iIm1skx3RyTTgjldQ1{g-R%?n|b6Tl-UibDI_oXw>X&s>WuF-soj zm@=reRG|n_-Pk3PDHCl#JGy_r+L-j@(BSRN}pk97r74n+b#zLPG!~j?Z5- zK}l6v2A}5v3-mE&09s>-8{PWaMbNS{l|0@*|5}q*{&HdAAnl+lRKDU!?C96@W>hbP zer;=gq4lf4*m8^e_F6x)@M<>CDm@VAU^upq2&F+0$${(GJ9HgVQkP4Xim%Q!)pT8P zs=hs{bG#B8U4LDlFI?|jAZ+F@nH_^pnFe{9hWlB5c6HY%d3Rd+qXbqwn?_PnA`Csp zlBU5zDZO$pR#X6-?FnQ?BEW3K&)0qi+^-D(~T{4sQOW@aPd1QFJCIc*n)I8MVI6?AwFRyhaG(di{|h z5K9N2dPCx!eVEKB6;&!tTs3mvI^-5)*cq~4FU3Z>QtH^d_=b;^YC91J@bc3R_sZgk zpB^G43#iI2U-6WNN;aX}bO)tYgwl8R0vipt1I@)7T=(xaGd?K`G>t z?6mHqj;4>tV2J0cQ3{q&O_gAi5YOE6#n^O`iy-=aLItJj?`Mbb{$b)Ajv#qE3gxs-K~e~=pVP9?Ppj=^n9rdeS zpDcQE)RZz-BB-_Tu3QZ~%j~0e9%4>$h^Q;lH6A_atT+?22L^=iOMSU&ZrJK7KqPdYlPq|?)1Rj%x1>thbSD?|IZGjKh>k;l(dHDkWtOY8 zCQt4(?`4b=Hfy7RX=35T?7CpES{HUMu%Y8Xs-_=Q*dgCg_CXC7Q?QG3n2s#`my;)kmk9;G#ypQxa0Vx7{7 z6ps#@GEo&`8q~Z7$Wl^U$yAr?to<2^X~ziMWpXJ?^?^xlDXAuBE^QBcS8Hdi>Z^H2 z!XhzI&t(U}D=#ln?&8`@t#s=w5a9Tl)2`RX{og!Y8}&tPyM8SDD>kN;5+VLXVF|`l zps^LlvGJjVVp{Wo#QW(3j$`u$9Koa$6rS}P=wHU%?6;swVwT}|Y=(;{qYnjfSvwV! zH9Q0FNsl6RJkTWoy#QBU2KnIq$GUf#I=nv+}|DN*XA^v1gl_ z$YW@32f;8Q5ButN-Bz)@E(c3mL;BTaBkgo`-rmNO*Uf8fq%gYfCm<_^o99bbd7x9C z@jS_KWth|6tEL_Xfi~-K#*D%@5ccT9gk@)rHqmBBLZj>Tf@Jzm&mk5hXzJEN0IX1g zFo%KTB~x35`W?n&P=OSesGC+o;_&w<6{V6Y2yEHK?&|H>NT=R*dWY9__#FS?(&H%W zHtRsbFn4@!&yMfN>e>(6-4(AJp@G`oXKlV3>$tEIC*kAtmf_mg!@3@{oA2E;y2v%V ziX);yQQ=H3o~!9&jT9A+Hy&6_Oz*=wr(pLw$(bG z?E7Ex2U5;9^3BT6G6H$1S|MOH+Di2>7iTJHYCwW1ZKk`(Yi=6wtQHBn`(4;SdZ>}$ zxCS*CKIp}pn}{#Fjl|S z^WOSaoffEo3M6m`%JT93;qm|BiP9<2@c`zBZiRPuXs|CRbb1n~Zr)l)XL;eK4d+R<9jT5A3QqWf#}f1E-80kufa`j1P21sMB7 z^zRL|NXPQ$rNHoq*6Z8({ReW983-k#W_(k3eUmYzr=zF)<8t_Y_KO%=I2k&a>g!VT zSz8#;{`0i|3#drX%=|B)-+w|XGBW(5*8eNPH3sH?7hIERu?nC=I=X+K_8=bY2txIJy`eVQ@h8aZd`^~sUG$-F|OQf4~`vD%q|j2e6L=*GLR z_(J({OvG|99fG)FMKv(g+U$=Dl3yw&uWZadnN&QvJy56<=|V$D&Xm^#7ftw+zkfik zt#LjCg%Ez&`NWz(z%PSm+M@CZE^oqzwX5z=w{Bneg~1|ur4SdU=J;KsRP55)1F*KGGsn zzI>?vwc8h%BikY^Ks6S4`BSo$S}ULF3EL`?gcn`=0@j<`*YQ8y4*x3eDi@=)!(+a_}luH&Ir8dey0J`8Cl;T zpFl|FpZ;HE{2%|1zWhJ^@PGQ@|0n0bKOe*Y1T_P1H2 z8bGm+zhC?QyQnJzP|6o5Th2%;r)y zWMy3$tCbwKx7(7ERtbzf&9mBWxr|ReXPZI3wmvttI;gz&VaoI&*w_|}jtDEze*}d) z0|6fa(Wz!&1molC@e$$1x4M#uLA@h__F-}Er37J#Z1vxBIC~XMP3Xc5nS2MFfH^v7 z5ZWlyTQu(nSLG?xJAH>n-}9ONQVm1Wo79%N+dPALPRsy}iR`5v)FRfqK2+W5l^cW0*Qna^#i1fUNZ%LpP4Fr$; zoHFV)3?d!R_fF1gAG^uJN-x%@e0VZ9ZGM4aZzcUmA?k|`TCYJq-MvM#x|O zMh1_#kcN8m_aC!HK>ZJ)oqi*2^n(to7s*if7Lf`+o?Ez_YtO3Z1Z_7ikO);-ODB)2 zzEg;A`ivkKw;jKj5Wxt^57Zw!m2a~{cS%GEe^fHk4~~s7@7SsIZ}`1rzwV#0T+Q4@ z^zFZVMqJK_>qIMfzqip`H7P$8=;3Ar8P}JM`|KL>YP+VL)k7B$*ST)*k`rPx5pU}n zn&Wi&O9PZ8LwNV%MIQB4_g7Xe>l~3R4mYS-Dg9TRZ(v(<+&(!-peXiVYQdi;SiC)0 zkvdwTXF~g$Ua9t0Yq||LNaE7&*v6PP)q!jUy+nGa`;kGw7mZL+1eU>P$j6hDzyMX9Y{;(LaJbY8$=C4{? zqzrx%#HTV?Xb^(J*x5(2E8n+2W0(QVJ`|%VMzZ^v315G+$`O)f{4Q<6pax^M+i#a|GZy7H74wWtTf^}LTCiX7~ZFc zUJ0oJWkHx2imnr0>0UKZxm(Hq%l1ee54kXmalmyW$CgtAv>e45%^8CwAgyzBGyhD@ zg~Nj|GjhByX&v`W^A7Zh)hm-w^5;iZq;BZJfXDy}KG=1X^^fc5F(L|N3n>1f8XJ&$ z=;y5G?``AnqCW9Yk~PKXiSSK2P3lg%PI66BGNhDBI*|&;L2MmwkzNR1=wGPAgauRu zM88wlYZHhRXzBgh3oF{1mzWp6MKQ=ZC^i@u3m!Wq=R$5t&O%-=&0d64Brn$^S2Kxb z%DDe!pJ`uiKX+0&pUkk5VSxdk!6Jn(g`WY_u>Ui(p#O%Dt}L@G&gZ1hn+kA1ObWkb zL844Y?v1>$fuRAlVUMA`fw{r>kEowWsm!CniDHzw8Q+$(+mhrH?PE57SjL&h;KruK zX%n4NjmOX?S|@Z+ziRoGuVsJDsg%#=2TDqc$!gQ9(5s*OY>@QrlT4G`h^vT)C8ev_ zX~QT@D!-Ri%XJVbk~URR6`U7m<9DsN5ZpFJPYtUPs}>6!QcWON3@gs1P_E;a)nu zV4h&k5Zv!4Vbnfc>C=(2er_baGYwG{@e;BAOv1!`WwMt>H&EFLrv zz!Eqv*eDz?93hY@P#)QepO4iSc@WVr6fDdlG+utDQsv&~5}_y|JCc#WPHC^CrJ3JI z;W(3`GN@v7G|1|pA+R*jh&b}ah|8XNh0II)32r#DF8DhAc%)oM+7^#>J3aJWaKJ^t zpg?f1W~jWVlxTwJlm?3x;MTC(+0ODpLriCAJw9+cWO|lKLrr6WJQI#Ka4xPqHZzVp zS@)}KJbm?o)%D2?qX&O;3&Vj>ztr-Gkk= zeZ#iYmg|mZ3uIkTT>*zSwYUDuo?23!9}5oB0*_%9ABRha5so^K){Yx%rc8HZ?Dj8OFRC;_TNWty(EHFUoCi+Y)l5}Y zTY@Wv8;$dZI*RQpc2+*P^C*xhu9n-DnJyRBS}(RYJs&;uL!~3uPE*V!PPg;?;Bsjg zD=BZ&YH7bf+L4*fE`(o)Z(C((w=`3*pfY`ONp^PG68RC6Nt#wrkSiuUoVsqLdpTW| ztk2SJ|5{$t_dUNnVOGr!4-XUH`^IzW)>-NLTw{1T?&wH%LI<7|r#v(elIyCOut`fRSdMc>3oyQ?ZxxM0U zjJ(S4%DNYcPq9wjR)t%$lE+_so@K6OPxa@>I;0UXCowQyAi$F^pZ>##_S*>4A7ekX zqWt`CpV|h5zh6q_{vH4VPBHxZ9hJ}2!CuDDj^EnS#@gy_`Xq2%#@fQ#PX29l=l5{c z?+SV+2=EW&brFOg1nlnx92f$B2Xw*TLA-lgARr-rFOZN> zP|#3NkT5Xs-^0Mb{rmD(2q-u>IK(>$7)VGM`1dgH;Smvl3nJq0jDLD?f5iTM+rMM4 zogm22AoL)$;GoDLV9220$e^#iAOs*Fpb((H7mz;}(6`D#LP5j4hXrP+LjnNe>0hyvM-A!p0#X zCHqWH!31DtVP#|I7Z4N@77-PbS5Q<^R#8>cH!w6ZHZe7`cW`uac5!v{4+snj4hap5 zjf+o6OiE5k&B@KnFDU$8R9szCTUX!E*wozJ)7#fS@MCaja%y^JcJ9~w!urPM*7nZs z-u}V)#pTuY&F$U&!&|#RLBRiL7SR2pVgI9Dz)k=Kd-o3f9n@R9K*5}WD>(8yh>!G; zD7>;zy0)l948G84d@UGh3>6zsUm6bO%sgX$+E3Y6c?G2~)b$-8&Yj9+^S-t<&iZ@f$vc@CL+{4<%CP05YL}?gS-e$_2s5XlSF67 zZ!GUpg$$)xqba15zcT;8Bzw%8n>-70P6|okdj}c#QX}!Nxr-mK)M;Qd%PeymcnEp(|Z3Nwz%nuFG zYA$8^6SFJ_?$moFtoBbPS=kj=!+xz;i*4LP#1p=>&m)_rh)T-rk&K|VxBvCT%i3}>&6|w=Ny&yyguL6ykawAWvTRt| z{Ki!TdGsB9UW169DZ!YYuT#M_LYLFIZn1pWcH7kBE6A``uoY>BD7U$4MGH`cO&)xG zw?D(_$G+$$Zpq^_{>j6oXj9YlQDl3zv8~y0gM-atcANz-4&M;B@(R|G1CE{zz4KPJ zleBPd$N0%(3$||6q|$DSvbkb$!|>_jW0u-0h>41)|D*Ep>?_E8rtm^Xt;fs-XJpIM zDhr2`<*t)$l=;B6b6p)*l%e@5-?pf5riaj7{#taPn__0+CYKo()4`mm?zjE#a1E(} zTLJIiG$v5C$AukgPdIFr|ZTMOk9kr)9vW(p_bo;Q)pW` ziX|GDtH~D*x3pZVL}a2*X*|s}2D;&j^Ebs)(-TKo9_l+TnBl7o`sKj8~8YEtK?`_X(8j6~{tp4sl*ii(@yZ`sL@= z{nT0m-R%hHNdX=_)@7#nWGR-0K|4m`At!kxa?7&p89xrxV`5IVGL8EHMeCpS&T8Y= z6&FSq#H3B;T?@)xU5(#Ql!R9V^eNh8khtf&G?r~UZ%tL@e)ITpKGP84!1Lba6{LsV zE7n8N+Lk%kzEA@@jH}u;8f~ox%;^>j`}(*YmSdm?TN*wxAIn`~Nkj8^sC>HNaUzK2 zQ>Qe9JgQ#_`6FrkbV9|*QM%J())O~bR6C{1W%oiuI7DF}(<{iG&=cd0X7gAsQ{pR# zDGOb&g8wHNb*JraW2~Jf#;8vNYb8qCxdevdNh@)-J3Gp?-YF{w6ZaV@EbwdewI-KJ zNp&Sa*%xxVixw^Ip)zAvk7(XA5loqsF9OA^YO^zvZBncAX;3?^- z36kdzxK9w>ZZ6WjSTKDZ59DONpRD87Ia)j2&S<{iWa3*UtyLy(^JOTrn+@4jJ2~H$ zS$;^`%G}DQ+hMEjF4WGBK5GszN{iE^SjA z`R-Ytk`2qbsGW0+q++#S!o@DOYnRpzY0P~ zn|-8sO*Rw=qSV%i;pFUWV4&9Xsn?w-@g;tv{D_=`GAq$Ed+AB~F51(cdh>u`UTDE< zZ#n0ZRK+OsBY z^J#(WQ1LG1Z1)9eJ9bL8{u~pCY07MeJ3E4)rAkyeIi6gZ5`JWqwyc7NZBrkifMwf+ z-Q|Z>Uob0!x4LoXXwn+L;)2=u{>b%VH$U;v(GXYDE58l@6{IT*zHS`pj(7ujN~E4d zniph2-lgAf%D4HQt!Rqe%+dSx3fg|Es?@Ip@$E;Xur1P}$S28dn-esrR?ZKGkeMnb-AHfpJH9Sua>l+=%Jjmq&`%0;DjuSX6|A^n)D;_g6k(k zZ$)TTP~?zr8`eP)i%~Pj(WACQ!|Z}HeHrx!h4PGRrDf;((c?m`{;Ve|T=ggzF+>zx_)|gz6e;3yij^wo={m z;=;qR`NJznC-*^1b3;{KQ(H@dPF+ptZc|G0C)*+;{UeZ3j|a&X;#Er$CofbFo^#qT z9y9L++&&&`!2O3-mUT9Di96@KC022B+B!y(QuD?DB@t(FN@piHm;bB1^Nwn&S@$s5 zr3fNO5dlF&KuCZPdQj3H^%3cP@8Y{Ula+F4aMe{PML zwPxAeF-*sPati!F-1Gc^qfE~c%hC?RZ2^CQK5Ow3Sfll;8&7YS#<9~%kWJZmotNK_ zi8W5)>fjM265!5S&9y?2;wrN63h zb7$r7o0b+undYa?CWbPxOIwAWXkI7gSxmjz9!uhktE^O}walE;Ug0LE;`-c+V*=B# zkP3m&qJw7%=G1di>m4D%OJcdHsa22VgP18IPQlFM3(wf}Z>!bUGx|GDXt+)|9uwpo znYDFRtqDeHI6B9~9hK}e79X(`3_vjC66hp=SmEXO z9b@?9b&3Yf4z27lL|X@|S6%a_S@Qz<6~x?%D~Uz8bk%zvR=?(xVy1cY@-f*f%})Y>tIaw??>I?3Yvy zO0;~kyJV}J^IYeUVcROW#`-WpIl$c1GjDp@8Mv#xT7+-t43OM(&3mGjahr-ey+9z;q!Vq zjcTpl81u^BCCsKdE|`qbQ)Dl7DvVxt@9FSTeTBd8J3;g%bDgZw*OyCz+aNs60bWBw zcVrYtwXQmao0g5*=TXb6Pab|&ifijsQPA>%HQOj8%{;tA{FYUd|5o*Wee`N8FIo!C zJ?Db$08Mo!V$l@bIoT+>nBctPi`+#5QE46*ZbY`V-#jCCDwolmwwZn~|4F@R-^XIF zY%Iq{q;2&mb7J*aWWm75mZ+6C+wH8t)aAE=UxHN=v(sQ z*3Sp#tcub(fv)6ayT_GR@o)^Y#V|&{C8T$15z4zl7G>6Fr@_M$b;+WtYT|lzKhj!U zZM4C(oYacz4kX1eIz|9g>*F>yVfdwg@5$OoV?}B3G7j-x+ts6y36=5a2laDJt&-w! z#c9ZebX3dHE=y7sB1Po0ozr@LqiP68%^ZkTVWV*u_?X4_Y^);1>y&F1h&44(9=Or1 zoDNo8fzQk_0U2XGV*n$$C+#$;;ln=Ndh+=i7THeVGADCK`j$U*BYNNlz4>Di+)$^C z7|M1>>NRq&r9Wd?LpXX?8LrbZ60XgfHHgX3lSoDm`Cp>UI51O9+}9g+snnWtv7Rql znfIbLbo69WK1UXap6S5}U~5P zh=Xt}QwySmX|8j1-dFF6x(@afRYZ+7b`(Bb5Z28ZDvd@)ejnu!U%fHxCWMZ1KGe5> z4J#)c5xZHkr5@?WZoIPepc0gr#w5M4L?=6XArWrYYfDq6$yYs{;dUc~$X+!LJi*j~3 z(p-TBx zmA8>CTr6%z60?@+gVpzWG=3P?aiF#1 zGE{^+Cb0I(>)hm-vEGDwR*GW|Fz%~!VWm5zTKhQYQH@T5Xv7XfYv%+AIFkmuB({KG z9uhsWJw?{PN2X=LQZii~)Qft%`O3P)-sd-+6KXM~+N9CG-Ty49Rbdkre}!6DEpV*y zNM_cs&d}d_4TIJ4!?Cm0jJYp@Rv0g523J~8zY%G&YvJMk8gNj&{k2M|G>?62$Q`se z_vA^PmmwLA)7THp#@CU-qbr{BWuR}aY??7*(2@M?g=uq8Nl3c?hod%82c13I^`~Cr zO7)4(59Y2}(+^tl(F-3~t_sfiW&`=uP2z+wPB_(Utj8o=dO&;nm_Wsr+5cXWN{Q^^ zn6wOKYQw%Moo6u*d2oz$JF?y_nj@M2j_3?^Kg&)t7VQfGQQ)rZiKn@CNtN`YapG7w zbI&4iw6?+U_iHpp<-t`}(v#~ekH}OE6K3}woI`7>MfjOJv^0J%_M(GrvNuj|1lm2* zo9~L?P%wM97~8v;#I+~3W;y}yE(X3o&9HsKSt1K1)Pv3gr1q_MT~s);N3)u zDHl$K*AvTk>u%(Aj(I)cyMyijkRF)jCRUz2*>?cQ5#wM)Z% zXHbgvBDn%2gux|SM3*9}_Vax^{eF9#)NO~(gjXI#wrcI*ZMGI7KqkoH^D%*C*sv3R?SE%STJZ|aP^4VQyf#=gR@&F zt{$BU7 z4822}&GE(CH`kk+(0+`hw*m*7G2~lhL*J70R2dM^l0_W>~-7g`M6vBEsrs3L-I^ zFnbBRs2sy3TZVz4BrYpfxn;D-xiPXX9ut%p`flY7YYt+q#<|xV4M~jGss`h_LQMJ) z?R29KpOX=m;Q}vHRly`CHf?5NSzrC-R3D04w{d%0v!DydN0LN0E9{y)xXm~0$0`qV z2fzMkuALyqonOB+u(A6oiKn~D$TI!&$BC|#@p{*fpX=?j{jXZTe-fTl{7tJ~<4sQn z^|f71F?X6cjb6uZsWzQ+?9*gn$!Dtwvq1qkvvrC3B>KTiy|<#=d?=J?&=ttLa={F5 zYB+U-oNpDmy;X#}6mJZF!JNNUg)dNsnTD%<{~2s?Ll|=*Y)FOTlFv z@^Z9DC(%$+2YKFfPDrnzTQ9Bjdz&~^a?}K^C(j4q!ryH(7?QeJb7|mB(ZTgm^HMeN z(+xq@W{r~3cva&zn;sjYdelwN1w-8@nOWGaiU>>$deoUze8 zEygU>VwhxyTHxidg>6lt{gg#)c`_E#Kql}vDLm9|cB<1D?*G}jFh%_ghQ;zY1$Qi(bErPpbl%cz2u z%h8q*pH{fIdSw{A=Xt5l6kJ()B!qxhgl>3kYi;avdCNqG&c5nN%b^;PL5pLQVz;_* zH^#3Y6ZnbG`qY_WyY^(oOMHrbus08+l6MSoH$8Be{QdY6M91O4==ZTszKy}CmAUOO zf6d~YoPCd@^o(VrZJ$7a>ebZpO7DOKPr1XC;v7wnF|v9nXUfpmMV>0UEZGhR(AU{Z z<;tfrE8noXP5br@PHG<$FlgnmtJF6~@WcUHt}q!N$#`AMw1Cy2^MqgC(hM{xB&qsV zS%1%ULTmPTEaaxigyd`;69gua_dCHaO)Yj#XcU`!svy!8W+oQwTY1=}>%lKNo#oGg zZEfo?W|QAJ>Sy~9w~I2HM!$F+6KITJa(Zw9L9WI%8k*{}$%Rq-rWJYJyZT~xCc0qY z!)zFjcc(CQK+bZpuHg$E%0g(;%e`s?FL|!;)%&G8c{Urtr!L0oS9tdC^c7gagyBV# zySvT@z+Iiw*mdnx&oeA$oh7@a2mP2UEl#U@9EMFJdkoi!Yilllx!!cPSHjJW`QU@c zvj>OjgZeOvdZ~k+$b*=9w_?FUior^nyy~O$HU^f%H$LWJAuv#WS#m=~E@ld~qr0FZ z?T4hdYR%SWRIaH+2@@dBSKBDdo%mfB_bj@}vR<(i3s);C z*<8yH^^z0sbXE$>4^B^p1<}pbzP^Lw@=DWPeCGk@Jtk-vHZN;i`0;*6xg^z+B%}OV zTfF@b6EDmC2n@L@HoLBd>_T-et$LNsc4**&_~v5qz}!|p14CBBS0b8r-JORq#PV%& z+LcgDTS~72{UDAqvVQ#XI%91z;obn8_uOFG`mGqFRLfwfeskI)c^9PnbaI7!3i-X- zA8|*ICvke2>Q&diy%NgV)!h_NF2h`;vn}eG3*JZXPHYG!4@eWF9Z}|Wx8IH?30ReW?&yWdKJy9C-PeI@kXpO z6a6&Xb<23ab*iguLk=*__)>+h3sb>j0{%NyS+S73fk83M{XP8yW*o~g!85}t>K(Qx zo|iK}y@Z8KiNt?4YS_K}y#DTHyLus7kcF+9eNVb;^}u2}_5&7bE#moSdtei&MT}okp;V&NHID>4yKcNCyhuOT2al0mQ}} znMa+g5gj9`T_Stvw=G}Jd8Rd_w#{|(5Ge&wJf9S8zmo4pVS5>{F?fy^$%l1{CeI+anN->~crD%8Ini-C1@T zzn0t3z-?RQhoU!Gb+UVs@`J@YCo)B=Ovd@B1iMquVc(~UZ(!y>i$97l-wGQBO^-jx z5O}sFm%CMY`j+ox+j}p<@7R}S2cj2KNsIM&&mxM0aw$p9H}I&|sYJ}WSDREXxdjEh zpAO2RZY|Q0Iw;+Mh$^kgOvW8e=zyLSgHFvmkCP16Fm`0?%~OEFQFfVfc8(8PENj3vs|&cZrE4Vt z>l|p(MY=J}Ge?i-CVe*CcXCXw7RaHiXOMFA1Q(h+zFRifHyE2;oDxb9il`jX%1tjx z38=qsTfdR9QHrbyf|Gqt9-6Mhkb7EA9$M;uIXdMD+3WSnG91IasL}#{G!PijG&6oE zNAlwp8G6vxj;_$md3t2IkfSd{T_p1x<{SCExN1mSi@4C#U#eHq>w3MF*`?(tms5D->bVWYLdZ045Gc!BMH%? zmUCr+#dNI+SQk#nNKhDWZ-v5lm~Q|| zdmqSth@UAE4>E8jzUI)_r1FfK&u5C6S>K-)D;h{erkBHr%*;s z&SYN6SA#_^hisH)33Q;aUevaWE510GHMxx^B9~<&#W4QRNIlLrWHc((G&-c@&?zz+s>%!hxjTQm4^F?mB5|40-O>8xP)4- zvmB9Q_K9_m3DAd?D-&N~@aQhkvLT}Bn1Jo`fg;Rv+SD6x_4Kp*>K;<1t5heM>-0&{b?MCSz=|cVSmkK%fUQC}(Ed-P9x029_qFW=kL; z!fn#l(gI;-GRs=I6<4E%40{bf9bP%#Oq+D|6^dP2o}(>Re`HXpAuHXxbNqDo_i0Q| z9{(70=gz*1=HM(lftd8nU0aXP?ZivxJ{irX%WJT(_A7R+%JyKcBi6}nv*_%D=CMw@ zP^szUA)iUJhJ2H8p*7Z;Y6l})Sy1pUv|uK``s=X5sc0*beA+xr?d-$so0LW>xaE<9 z?lqcG3~6v=l)UAPV@zsn6Uy-8+;ZM~*@)V;Af<{3zNiDIvBRnL#mLbL#s)3D&1J{Z z4@YvK7W$W;ZI%1g451UVoj=-k-1pqx>na#wzjEW))8Gwprcb}1US6JWvpnMwaL_Q= zJgY6yuD?zbxUn&ZayKvDA3Y$c6x4PvCK0<(gNyO503-`IhK@RtiC1il%OhHk_9SbM zx*VLFpiFZlF{X0VLT2Um?6Rg^vXIyakTh#EE>& ziNh|9x&9#4FkP#zDyh)g(O=Se*T|!ZG~cPPRe$7nzQiu@@EUgq=OgB6`4(~R<~gBI zEg42uj#6vGJcr2ju4RQa@u^F`lZ=;^zF4-U;{Hu4d-eJLCKHmdLGg=wm~{Qf8(o5XaFF}MBeU8m)~?2sBf_xGqv@XnJoWOAp;EVeAx8KCdo+;Zk4%n}s> zY}IEv+I%#Jg~@^@^3kBVagATzf3gb>MN?iTcmV6F_!1!WLRcV7lM9CP+oLcT6dv_OwgY+an zwzqa26wCHSe)L+NE`{U|4Y1HEMWp6<%m>z*lzGzcFO2V$WcyCVENicwo>~q3?rAv{ zMlv?M7p6jdnD6m^yv?LE`^{~)>HEqh1ZN+UctlEH5p6VjyNm@nSvz?UQ-@>i_<)!v zRl9}74QnpT3I~F&#Erw$p`tvKsBF)B*z%O}?l~_t7`I`Tw2UCqkueu-l|jDcOmkyIsX z?)hLZTTT(b))}5|h#9F0t6?fL#3yet}B?yjRFSAsP&xJ0F7Kx&a}s;5TJsGtO0FOR`KS zYoKWbE2~!M>g&xm*d9?7jgDH4?>LLOOdb;~@2|yA!gEv6YrS<#@8H?bV;=S)%aRrs zzukY>#GpO)nMm(t){`#zsis+t^$_8L>XP~Jp{H#h9`&lHSg!UbHQJ%kF2o~a_pY~7 zAljd5Tnh0Gk9u_Pn-+z&Md(0ecar+V8E;0pb!JeY3!PX0#0xIOz4W0`110WQOWW{$ z=I`UR3H9_-pqIjVr*9g?zTQ6c&dc9nKd6br+}h1=*(Nd=87^LRpRei}o-n)8RJN`BP4+j57Opy9mq`7^ot634l)k_Gtkz{eewfsvrq_ zwCzb${}b#0fC`udA-wt^IMM`V^lz!kMgYqPNCf<>r;2nyJJ|zdAJfTZFjbH$+R@1I zB&7dOOiCHx5dUHte%UT|f?@rgj`)L#07>%!9O8+sf1@Kn50#89kcg9tasX2G+Z&ey z0?z+d`mcumfE!O*K>gfu1~9RX$lo`eJwV!+o$M$BIL^Pel({A}K9mcamZZ$>tjXwA z5JvH)posl$UH;HiT;58eN6z%5jLAMl=!qcHIfw&xb+g~NISOLqlAan)m7b}G&M5Wl z@?Mp0fB&M|B*`R!2r2n46OvgA<$I;(esWL&ig1k2JOF)3#-hx33yE=H{K` zrCwBuTw!Dx^`@sf(BL+YUN<>7Fo)ZG9`lHJ30;V&#H^Ibka)-JjXhc+)_Yi0*FSD` zOz%~Bc9YVxOzvpR4no_1pA{`5Z)@?Od~I-k?mkr#*zH5$tPP|3e&ZtVO~&wb{}=qR z)(iy_@6sn@Ysr^=^MT8lj|jK0*cQr$K|cR(f9h&Xd8GoYwK+bLSINUdn@VD6TY+t z!EtIqr{sk4Ez*6*P_L0E`BPZu!~(c^ePY!`*{ThJ3JqYK1e0HcUrA(LQtVi%ISPl;<eJodiQ$6jL=3 zykWN1ryO;?J-&;HY!d|*E&YWMOhR)dD& zS{;&~MI~k#pWV#~4@tYl#}O{wt-khV{6*n7O`Ql~W7Ee9h0jUE=dPW(vS(`(Lu6%3 z`_+HFD+tF)m<1z?N)w{Yn_nY0wqyD5%HyUP`B#^9@Ve}b@!Pwd0WTG9Q)c!pYri97 zOfEkl%8lFCyMgr24if^G<@< zLO8|8*&wyTcQ>UJ`5ZjqnHwneh|DuGn(sNXOwY>Me~bDi0=A;2S|G$Z$Qm2FpKGL5 zwb$URcre4Upm92uf>)!}_4Y0gZ(IzkV^5_yb*Zwg7rRa|2%`Ua8 z#tQTrSD#@aT|<0Pe$V5%h(Q)`^}7|HN%)s(Yxs<%;djN}2~za47<4?+XUctVq~yQ- zyoPLdr1ah~#ctYVU^FqD{!VqjVN%rd-RLoe`*Al?I+jx7F{AjYNtO-PH;q#V>4f9r z&6XBdT*F>Ne0QIJL#=V2`B?FRJPdfTl_kB#5Kd3Ldbtqx4qibPkWomOed!uOe;&fM zE526byn*SD`nm(ffTlGAR<~QOVR!84tN0X$yw}}?tF+1+*E-ELQIG5>Z(DPaW*fil z`BYIF(^NdM>r-9iHlLa}Nv`FLnq(O9=t7va!NqIy>pHlZ!Tu~C_HrQ7i^*pmO<-dcZbz0Xxg3o1qnqtX=_r#P=7 zJ9c)3AhMy}2o-9C;e4V_u14&$acwr5W|$?_zy@^Vg;(GZf9{LZMRsm!l}=tIG4ebT z8Vof`R-DM$VRNDPFM9nZtzc{n@|hv(t&JRF{f!}D->9uCjL z;d!|KBOdOLU`M*Y;BY7R9sZ=@ehE(WpJBK^g3{nwxIc+8fOf;PaCjCD&%)tZI6Mo7 zXW{TH9G->4vv7D84$s2jSvWikhiBpNEF7MN!?SRB77owC;aNC53x{Xn@GKmjg~PLO zcoq)N!r@ssJPU_s;qWXRo`u7+aCjCD&%)tZxPJiT{+WgQMV3th?F0aAydVXX37}B- z6J-0TqNDn|6x|8N=I|F@_KO7B?<^S~4<8_j_BwY^G(*{lF|Sr)n3+%}V$53na4_6f0%?wtb+<>Vxhtw8+$|A8Cd}e6B2iai zS8H2qz#5aQwUv#7u&WrekqO!uDGWS6Nd_@9{Vd{WDaL&7B!lT8px7*dwnsAYb3?fh zU>+!w5DzyGln=@Sy#uJ;0Xlg=oONCyE*_{bzo0Ngi0Su(8Ab#=6SX%n6;_p${=GTi zl^C?*3K@ z$N(zw{bMD-pK$npZp+&GA8VQ*{;X!}WN-D;6D9}{(h6yPf>Q!5@__zy0MI{CpebPq zd*lg!2>ihS=L2r%F3bOUb_+^wS0$ZGtj&`v)6*{K8=9 zuQq-u`L8y9KO_HFR(>t}iygqN!pin&6DI`H9tNmQTA_@YfXM$2Xd5Fdl)I7R$>*$w zw0B0@GYN6?aP!*oFx^&`RTcm}K-oCCa=8jZ4WSTrQPAJ){9^y-M7GN|FHW@-CyjQAcRfP z_SQy@Fe6)AD?m5$!~-Y269xU1^?P|y(8&n+Wkmco+#C(hJuB7z&~f1Y+x`OI|!H_{#yY){-0CG-wOOO zySx3ro89UC`}9tao&B%nlB+$^l$7X1XB+tc=fMQ!=LhtePjXHUU<$_z#2z_0m~8$` zI}uO+*ED{9Ad1P~%R#{qAmqp2(*y-h1n2)X4GLi7e@o*Rgq$q+{xwbDU*8L`a1#8b zuA{vX$_i;uN(A8OYAAOk&^eHtq^>CcJ&1s$G(-|u4@yC$B&GQ72>>0FmX?qPOM!); z_h6#;AW*4$f|B>4{L=hTDG1~qM34_6B_sspg$fDsOG>~_^kj*ENvgf$Z;s>Pfszuj LvPvmQlM?+8`>Y9= literal 0 HcmV?d00001