Skip to content

Commit

Permalink
remove dependency: yargs
Browse files Browse the repository at this point in the history
  • Loading branch information
modestysn committed Nov 13, 2021
1 parent ef0fe96 commit 1271d2a
Show file tree
Hide file tree
Showing 6 changed files with 160 additions and 148 deletions.
31 changes: 11 additions & 20 deletions lib/p2jcmd.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,17 @@ const nodeUtil = require("util"),

const _PRO_TIMER = `${pkInfo.name}@${pkInfo.version} [${pkInfo.homepage}]`;

const yargs = require('yargs')
.usage("\n" + _PRO_TIMER + "\n\nUsage: $0 -f|--file [-o|output_dir]")
.alias('v', 'version')
.describe('v', 'Display version.\n')
.alias('h', 'help')
.describe('h', 'Display brief help information.\n')
.alias('f', 'file')
.describe('f', '(required) Full path of input PDF file or a directory to scan for all PDF files. When specifying a PDF file name, it must end with .PDF, otherwise it would be treated as a input directory.\n')
.alias('o', 'output_dir')
.describe('o', '(optional) Full path of output directory, must already exist. Current JSON file in the output folder will be replaced when file name is same.\n')
.alias('s', 'silent')
.describe('s', '(optional) when specified, will only log errors, otherwise verbose.\n')
.alias('t', 'fieldTypes')
.describe('t', '(optional) when specified, will generate .fields.json that includes fields ids and types.\n')
.alias('c', 'content')
.describe('c', '(optional) when specified, will generate .content.txt that includes text content from PDF.\n')
.alias('m', 'merge')
.describe('m', '(optional) when specified, will generate .merged.json that includes auto-merged broken text blocks from PDF (Experimental).\n')
.alias('r', 'stream')
.describe('r', '(optional) when specified, will process and parse with buffer/object transform stream rather than file system (Experimental).\n');
const yargs = require('./p2jcmdarg')
.usage(`\n${_PRO_TIMER}\n\nUsage: ${pkInfo.name} -f|--file [-o|output_dir]`)
.alias('v', 'version', 'Display version.')
.alias('h', 'help', 'Display brief help information.')
.alias('f', 'file', '(required) Full path of input PDF file or a directory to scan for all PDF files.\n\t\t When specifying a PDF file name, it must end with .PDF, otherwise it would be treated as a input directory.')
.alias('o', 'output', '(optional) Full path of output directory, must already exist.\n\t\t Current JSON file in the output folder will be replaced when file name is same.')
.alias('s', 'silent', '(optional) when specified, will only log errors, otherwise verbose.')
.alias('t', 'fieldTypes', '(optional) when specified, will generate .fields.json that includes fields ids and types.')
.alias('c', 'content', '(optional) when specified, will generate .content.txt that includes text content from PDF.')
.alias('m', 'merge', '(optional) when specified, will generate .merged.json that includes auto-merged broken text blocks from PDF.')
.alias('r', 'stream', '(optional) when specified, will process and parse with buffer/object transform stream rather than file system.');

const argv = yargs.argv;
const ONLY_SHOW_VERSION = ('v' in argv);
Expand Down
136 changes: 136 additions & 0 deletions lib/p2jcmdarg.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
class CLIArgParser {
args = [];
#aliases = {};

#usage = "";
#argv = null;

// constructor
constructor(args) {
if (Array.isArray(args))
this.args = args;
}

usage(usageMsg) {
this.#usage = usageMsg + '\n\nOptions:\n';
return this;
}

alias(key, name, description) {
this.#aliases[key] = {name, description};
return this;
}

showHelp() {
let helpMsg = this.#usage;
for (const [key, value] of Object.entries(this.#aliases)) {
helpMsg += `-${key},--${value.name}\t ${value.description}\n`;
}
console.log(helpMsg);
}

get argv() {
return this.#argv ? this.#argv : this.#parseArgv();
}

static isNumber (x) {
if (typeof x === 'number')
return true;
if (/^0x[0-9a-f]+$/i.test(x))
return true;
return /^[-+]?(?:\d+(?:\.\d*)?|\.\d+)(e[-+]?\d+)?$/.test(x);
}

#setArg(key, val, argv) {
const value = CLIArgParser.isNumber(val) ? Number(val) : val;
this.#setKey(argv, key.split('.'), value);

const aliasKey = (key in this.#aliases) ? [this.#aliases[key].name] : [];
if (aliasKey.length < 1) {
for (const [akey, avalue] of Object.entries(this.#aliases)) {
if (key === avalue.name) {
aliasKey.push(akey);
break;
}
}
}
aliasKey.forEach(x => this.#setKey(argv, x.split('.'), value));
}

#setKey(obj, keys, value) {
let o = obj;
for (let i = 0; i < keys.length-1; i++) {
let key = keys[i];
if (key === '__proto__') return;
if (o[key] === undefined) o[key] = {};
if (o[key] === Object.prototype || o[key] === Number.prototype
|| o[key] === String.prototype) o[key] = {};
if (o[key] === Array.prototype) o[key] = [];
o = o[key];
}

let key = keys[keys.length - 1];
if (key === '__proto__') return;
if (o === Object.prototype || o === Number.prototype
|| o === String.prototype) o = {};
if (o === Array.prototype) o = [];
if (o[key] === undefined) {
o[key] = value;
}
else if (Array.isArray(o[key])) {
o[key].push(value);
}
else {
o[key] = [ o[key], value ];
}
}

#parseArgv() {
let aliases=this.#aliases, args = this.args;
let argv = {};

for (let i = 0; i < args.length; i++) {
let arg = args[i];

if (/^--.+/.test(arg)) {
let key = arg.match(/^--(.+)/)[1];
let next = args[i + 1];
if (next !== undefined && !/^-/.test(next)) {
this.#setArg(key, next, argv);
i++;
}
else if (/^(true|false)$/.test(next)) {
this.#setArg(key, next === 'true', argv);
i++;
}
else {
this.#setArg(key, true, argv);
}
}
else if (/^-[^-]+/.test(arg)) {
let key = arg.slice(-1)[0];
if (key !== '-') {
if (args[i+1] && !/^(-|--)[^-]/.test(args[i+1])) {
this.#setArg(key, args[i+1], argv);
i++;
}
else if (args[i+1] && /^(true|false)$/.test(args[i+1])) {
this.#setArg(key, args[i+1] === 'true', argv);
i++;
}
else {
this.#setArg(key, true, argv);
}
}
}
else {
console.warn("Unknow CLI options:", arg);
}
}

this.#argv = argv;
return argv;
}
}

module.exports = new CLIArgParser(process.argv.slice(2));
113 changes: 0 additions & 113 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 11 additions & 13 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,15 @@
"scripts": {
"test": "cd ./test && sh p2j.forms.sh",
"test-misc": "cd ./test && sh p2j.one.sh misc . \"Expected: 5 success, 2 exception with stack trace\" ",
"parse": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form",
"parse-s": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s",
"parse-t": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t",
"parse-c": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c",
"parse-m": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c -m",
"parse-r": "node --trace-deprecation pdf2json.js -f ./test/pdf/fd/form -o ./test/target/fd/form -t -c -m -r",
"parse-242": "node --trace-deprecation pdf2json.js -f ./test/pdf/misc/i242_testingWithTable.pdf -o ./test/target/misc",
"parse-e": "node --trace-deprecation pdf2json.js -f ./test/pdf/misc/i43_encrypted.pdf -o ./test/target/misc",
"parse-e2": "node --trace-deprecation pdf2json.js -f ./test/pdf/misc/i243_problem_file_anon.pdf -o ./test/target/misc"
"parse": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form",
"parse-s": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s",
"parse-t": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t",
"parse-c": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c",
"parse-m": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form/F1040.pdf -o ./test/target/fd/form -s -t -c -m",
"parse-r": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/fd/form -o ./test/target/fd/form -t -c -m -r",
"parse-242": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i242_testingWithTable.pdf -o ./test/target/misc",
"parse-e": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i43_encrypted.pdf -o ./test/target/misc",
"parse-e2": "node --trace-deprecation --trace-warnings pdf2json.js -f ./test/pdf/misc/i243_problem_file_anon.pdf -o ./test/target/misc"
},
"engines": {
"node": ">=14.18.0",
Expand All @@ -47,13 +47,11 @@
"pdf2json": "./bin/pdf2json"
},
"dependencies": {
"@xmldom/xmldom": "^0.7.5",
"yargs": "^17.2.1"
"@xmldom/xmldom": "^0.7.5"
},
"devDependencies": {},
"bundledDependencies": [
"@xmldom/xmldom",
"yargs"
"@xmldom/xmldom"
],
"maintainers": [
{
Expand Down
2 changes: 1 addition & 1 deletion readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -888,7 +888,7 @@ In order to support this auto merging capability, text block objects have an add
* More test coverage, 4 more test scripts added, see _package.json_ for details
* Easier access to dictionaries, including color, font face and font style, see Dictionary reference section for details
* Refactor to ES6 class for major entry modules
* Dependencies removed: lodash, async
* Dependencies removed: lodash, async and yargs
* Upgrade to Node v14.18.0 LTSs
### Install on Ubuntu
Expand Down
2 changes: 1 addition & 1 deletion test/p2j.one.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ echo "-----------------------------------------------------"
echo "Update $AGENCY_NAME PDF"
echo "-----------------------------------------------------"
mkdir -p $OUT_DIR_BASE/$AGENCY_NAME/$FORM_BASE
node --trace-deprecation $PDF2JSON -f $IN_DIR_BASE/$AGENCY_NAME/$FORM_BASE -o $OUT_DIR_BASE/$AGENCY_NAME/$FORM_BASE -s -t -c -m
node --trace-deprecation --trace-warnings $PDF2JSON -f $IN_DIR_BASE/$AGENCY_NAME/$FORM_BASE -o $OUT_DIR_BASE/$AGENCY_NAME/$FORM_BASE -s -t -c -m
# diff -rq $OUT_DIR_BASE$AGENCY_NAME/$FORM_BASE/ $DATA_DIR_BASE$AGENCY_NAME/$FORM_BASE/

echo "-----------------------------------------------------"
Expand Down

0 comments on commit 1271d2a

Please sign in to comment.