From 1271d2a62fe6f18631f64cf1e4d9745d5cd18685 Mon Sep 17 00:00:00 2001 From: "modesty.zhang" Date: Fri, 12 Nov 2021 17:48:50 -0800 Subject: [PATCH] remove dependency: yargs --- lib/p2jcmd.js | 31 ++++------- lib/p2jcmdarg.js | 136 ++++++++++++++++++++++++++++++++++++++++++++++ package-lock.json | 113 -------------------------------------- package.json | 24 ++++---- readme.md | 2 +- test/p2j.one.sh | 2 +- 6 files changed, 160 insertions(+), 148 deletions(-) create mode 100644 lib/p2jcmdarg.js diff --git a/lib/p2jcmd.js b/lib/p2jcmd.js index f93b7a10..1e32a33f 100644 --- a/lib/p2jcmd.js +++ b/lib/p2jcmd.js @@ -7,26 +7,17 @@ const nodeUtil = require("util"), const _PRO_TIMER = `${pkInfo.name}@${pkInfo.version} [${pkInfo.homepage}]`; -const yargs = require('yargs') - .usage("\n" + _PRO_TIMER + "\n\nUsage: $0 -f|--file [-o|output_dir]") - .alias('v', 'version') - .describe('v', 'Display version.\n') - .alias('h', 'help') - .describe('h', 'Display brief help information.\n') - .alias('f', 'file') - .describe('f', '(required) Full path of input PDF file or a directory to scan for all PDF files. When specifying a PDF file name, it must end with .PDF, otherwise it would be treated as a input directory.\n') - .alias('o', 'output_dir') - .describe('o', '(optional) Full path of output directory, must already exist. Current JSON file in the output folder will be replaced when file name is same.\n') - .alias('s', 'silent') - .describe('s', '(optional) when specified, will only log errors, otherwise verbose.\n') - .alias('t', 'fieldTypes') - .describe('t', '(optional) when specified, will generate .fields.json that includes fields ids and types.\n') - .alias('c', 'content') - .describe('c', '(optional) when specified, will generate .content.txt that includes text content from PDF.\n') - .alias('m', 'merge') - .describe('m', '(optional) when specified, will generate .merged.json that includes auto-merged broken text blocks from PDF (Experimental).\n') - .alias('r', 'stream') - .describe('r', '(optional) when specified, will process and parse with buffer/object transform stream rather than file system (Experimental).\n'); +const yargs = require('./p2jcmdarg') + .usage(`\n${_PRO_TIMER}\n\nUsage: ${pkInfo.name} -f|--file [-o|output_dir]`) + .alias('v', 'version', 'Display version.') + .alias('h', 'help', 'Display brief help information.') + .alias('f', 'file', '(required) Full path of input PDF file or a directory to scan for all PDF files.\n\t\t When specifying a PDF file name, it must end with .PDF, otherwise it would be treated as a input directory.') + .alias('o', 'output', '(optional) Full path of output directory, must already exist.\n\t\t Current JSON file in the output folder will be replaced when file name is same.') + .alias('s', 'silent', '(optional) when specified, will only log errors, otherwise verbose.') + .alias('t', 'fieldTypes', '(optional) when specified, will generate .fields.json that includes fields ids and types.') + .alias('c', 'content', '(optional) when specified, will generate .content.txt that includes text content from PDF.') + .alias('m', 'merge', '(optional) when specified, will generate .merged.json that includes auto-merged broken text blocks from PDF.') + .alias('r', 'stream', '(optional) when specified, will process and parse with buffer/object transform stream rather than file system.'); const argv = yargs.argv; const ONLY_SHOW_VERSION = ('v' in argv); diff --git a/lib/p2jcmdarg.js b/lib/p2jcmdarg.js new file mode 100644 index 00000000..c7cc0084 --- /dev/null +++ b/lib/p2jcmdarg.js @@ -0,0 +1,136 @@ +class CLIArgParser { + args = []; + #aliases = {}; + + #usage = ""; + #argv = null; + + // constructor + constructor(args) { + if (Array.isArray(args)) + this.args = args; + } + + usage(usageMsg) { + this.#usage = usageMsg + '\n\nOptions:\n'; + return this; + } + + alias(key, name, description) { + this.#aliases[key] = {name, description}; + return this; + } + + showHelp() { + let helpMsg = this.#usage; + for (const [key, value] of Object.entries(this.#aliases)) { + helpMsg += `-${key},--${value.name}\t ${value.description}\n`; + } + console.log(helpMsg); + } + + get argv() { + return this.#argv ? this.#argv : this.#parseArgv(); + } + + static isNumber (x) { + if (typeof x === 'number') + return true; + if (/^0x[0-9a-f]+$/i.test(x)) + return true; + return /^[-+]?(?:\d+(?:\.\d*)?|\.\d+)(e[-+]?\d+)?$/.test(x); + } + + #setArg(key, val, argv) { + const value = CLIArgParser.isNumber(val) ? Number(val) : val; + this.#setKey(argv, key.split('.'), value); + + const aliasKey = (key in this.#aliases) ? [this.#aliases[key].name] : []; + if (aliasKey.length < 1) { + for (const [akey, avalue] of Object.entries(this.#aliases)) { + if (key === avalue.name) { + aliasKey.push(akey); + break; + } + } + } + aliasKey.forEach(x => this.#setKey(argv, x.split('.'), value)); + } + + #setKey(obj, keys, value) { + let o = obj; + for (let i = 0; i < keys.length-1; i++) { + let key = keys[i]; + if (key === '__proto__') return; + if (o[key] === undefined) o[key] = {}; + if (o[key] === Object.prototype || o[key] === Number.prototype + || o[key] === String.prototype) o[key] = {}; + if (o[key] === Array.prototype) o[key] = []; + o = o[key]; + } + + let key = keys[keys.length - 1]; + if (key === '__proto__') return; + if (o === Object.prototype || o === Number.prototype + || o === String.prototype) o = {}; + if (o === Array.prototype) o = []; + if (o[key] === undefined) { + o[key] = value; + } + else if (Array.isArray(o[key])) { + o[key].push(value); + } + else { + o[key] = [ o[key], value ]; + } + } + + #parseArgv() { + let aliases=this.#aliases, args = this.args; + let argv = {}; + + for (let i = 0; i < args.length; i++) { + let arg = args[i]; + + if (/^--.+/.test(arg)) { + let key = arg.match(/^--(.+)/)[1]; + let next = args[i + 1]; + if (next !== undefined && !/^-/.test(next)) { + this.#setArg(key, next, argv); + i++; + } + else if (/^(true|false)$/.test(next)) { + this.#setArg(key, next === 'true', argv); + i++; + } + else { + this.#setArg(key, true, argv); + } + } + else if (/^-[^-]+/.test(arg)) { + let key = arg.slice(-1)[0]; + if (key !== '-') { + if (args[i+1] && !/^(-|--)[^-]/.test(args[i+1])) { + this.#setArg(key, args[i+1], argv); + i++; + } + else if (args[i+1] && /^(true|false)$/.test(args[i+1])) { + this.#setArg(key, args[i+1] === 'true', argv); + i++; + } + else { + this.#setArg(key, true, argv); + } + } + } + else { + console.warn("Unknow CLI options:", arg); + } + } + + this.#argv = argv; + return argv; + } +} + +module.exports = new CLIArgParser(process.argv.slice(2)); \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index c7c19c56..0024257a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,119 +8,6 @@ "version": "0.7.5", "resolved": "https://registry.npmjs.org/@xmldom/xmldom/-/xmldom-0.7.5.tgz", "integrity": "sha512-V3BIhmY36fXZ1OtVcI9W+FxQqxVLsPKcNjWigIaa81dLC9IolJl5Mt4Cvhmr0flUnjSpTdrbMTSbXqYqV5dT6A==" - }, - "ansi-regex": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==" - }, - "ansi-styles": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", - "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", - "requires": { - "color-convert": "^2.0.1" - } - }, - "cliui": { - "version": "7.0.4", - "resolved": "https://registry.npmjs.org/cliui/-/cliui-7.0.4.tgz", - "integrity": "sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==", - "requires": { - "string-width": "^4.2.0", - "strip-ansi": "^6.0.0", - "wrap-ansi": "^7.0.0" - } - }, - "color-convert": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", - "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", - "requires": { - "color-name": "~1.1.4" - } - }, - "color-name": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", - "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==" - }, - "emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==" - }, - "escalade": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz", - "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==" - }, - "get-caller-file": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", - "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==" - }, - "is-fullwidth-code-point": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", - "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==" - }, - "require-directory": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", - "integrity": "sha1-jGStX9MNqxyXbiNE/+f3kqam30I=" - }, - "string-width": { - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", - "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "requires": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - } - }, - "strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "requires": { - "ansi-regex": "^5.0.1" - } - }, - "wrap-ansi": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", - "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", - "requires": { - "ansi-styles": "^4.0.0", - "string-width": "^4.1.0", - "strip-ansi": "^6.0.0" - } - }, - "y18n": { - "version": "5.0.8", - "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", - "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==" - }, - "yargs": { - "version": "17.2.1", - "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.2.1.tgz", - "integrity": "sha512-XfR8du6ua4K6uLGm5S6fA+FIJom/MdJcFNVY8geLlp2v8GYbOXD4EB1tPNZsRn4vBzKGMgb5DRZMeWuFc2GO8Q==", - "requires": { - "cliui": "^7.0.2", - "escalade": "^3.1.1", - "get-caller-file": "^2.0.5", - "require-directory": "^2.1.1", - "string-width": "^4.2.0", - "y18n": "^5.0.5", - "yargs-parser": "^20.2.2" - } - }, - "yargs-parser": { - "version": "20.2.9", - "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.9.tgz", - "integrity": "sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==" } } } diff --git a/package.json b/package.json index 8d9a4df0..48da5924 100644 --- a/package.json +++ b/package.json @@ -29,15 +29,15 @@ "scripts": { "test": "cd ./test && sh p2j.forms.sh", "test-misc": "cd ./test && sh p2j.one.sh misc . \"Expected: 5 success, 2 exception with stack trace\" ", - "parse": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form", - "parse-s": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s", - "parse-t": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t", - "parse-c": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c", - "parse-m": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c -m", - "parse-r": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form -o ./test/target/fd/form -t -c -m -r", - "parse-242": "node --trace-deprecation pdf2json.js -f ./test/pdf/misc/i242_testingWithTable.pdf -o ./test/target/misc", - "parse-e": "node --trace-deprecation pdf2json.js -f ./test/pdf/misc/i43_encrypted.pdf -o ./test/target/misc", - "parse-e2": "node --trace-deprecation pdf2json.js -f ./test/pdf/misc/i243_problem_file_anon.pdf -o ./test/target/misc" + "parse": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form", + "parse-s": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s", + "parse-t": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t", + "parse-c": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c", + "parse-m": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c -m", + "parse-r": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form -o ./test/target/fd/form -t -c -m -r", + "parse-242": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i242_testingWithTable.pdf -o ./test/target/misc", + "parse-e": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i43_encrypted.pdf -o ./test/target/misc", + "parse-e2": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i243_problem_file_anon.pdf -o ./test/target/misc" }, "engines": { "node": ">=14.18.0", @@ -47,13 +47,11 @@ "pdf2json": "./bin/pdf2json" }, "dependencies": { - "@xmldom/xmldom": "^0.7.5", - "yargs": "^17.2.1" + "@xmldom/xmldom": "^0.7.5" }, "devDependencies": {}, "bundledDependencies": [ - "@xmldom/xmldom", - "yargs" + "@xmldom/xmldom" ], "maintainers": [ { diff --git a/readme.md b/readme.md index 1077062b..e330c677 100644 --- a/readme.md +++ b/readme.md @@ -888,7 +888,7 @@ In order to support this auto merging capability, text block objects have an add * More test coverage, 4 more test scripts added, see _package.json_ for details * Easier access to dictionaries, including color, font face and font style, see Dictionary reference section for details * Refactor to ES6 class for major entry modules - * Dependencies removed: lodash, async + * Dependencies removed: lodash, async and yargs * Upgrade to Node v14.18.0 LTSs ### Install on Ubuntu diff --git a/test/p2j.one.sh b/test/p2j.one.sh index 7d1b759f..85e92e5c 100755 --- a/test/p2j.one.sh +++ b/test/p2j.one.sh @@ -16,7 +16,7 @@ echo "-----------------------------------------------------" echo "Update $AGENCY_NAME PDF" echo "-----------------------------------------------------" mkdir -p $OUT_DIR_BASE/$AGENCY_NAME/$FORM_BASE -node --trace-deprecation $PDF2JSON -f $IN_DIR_BASE/$AGENCY_NAME/$FORM_BASE -o $OUT_DIR_BASE/$AGENCY_NAME/$FORM_BASE -s -t -c -m +node --trace-deprecation --trace-warnings $PDF2JSON -f $IN_DIR_BASE/$AGENCY_NAME/$FORM_BASE -o $OUT_DIR_BASE/$AGENCY_NAME/$FORM_BASE -s -t -c -m # diff -rq $OUT_DIR_BASE$AGENCY_NAME/$FORM_BASE/ $DATA_DIR_BASE$AGENCY_NAME/$FORM_BASE/ echo "-----------------------------------------------------"