Skip to content

Commit

Permalink
remove dependency: async
Browse files Browse the repository at this point in the history
  • Loading branch information
modestysn committed Oct 30, 2021
1 parent 06a405f commit ef0fe96
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 65 deletions.
4 changes: 1 addition & 3 deletions bin/pdf2json
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
#!/usr/bin/env node

'use strict';

var P2JCMD = require('../lib/p2jcmd');
const P2JCMD = require('../lib/p2jcmd');
new P2JCMD().start();
53 changes: 26 additions & 27 deletions lib/p2jcmd.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@

const nodeUtil = require("util"),
fs = require("fs"),
path = require("path"),
async = require("async"),
{ParserStream, StringifyStream} = require("./parserstream"),
pkInfo = require("../package.json"),
PDFParser = require("../pdfparser");
Expand Down Expand Up @@ -96,35 +94,36 @@ class PDFProcessor {
this.pdfParser.getAllFieldsTypesStream().pipe(new StringifyStream()).pipe(outputStream);
}

#processAdditionalStreams(outputTasks, callback) {
if (PROCESS_FIELDS_CONTENT) {//needs to generate fields.json file
outputTasks.push(cbFunc => this.#generateFieldsTypesStream(cbFunc));
}
if (PROCESS_RAW_TEXT_CONTENT) {//needs to generate content.txt file
outputTasks.push(cbFunc => this.#generateRawTextContentStream(cbFunc));
}
if (PROCESS_MERGE_BROKEN_TEXT_BLOCKS) {//needs to generate json file with merged broken text blocks
outputTasks.push(cbFunc => this.#generateMergedTextBlocksStream(cbFunc));
}

if (outputTasks.length > 0) {
async.series(outputTasks, (err, results) => {//additional streams process complete
if (err) {
this.curCLI.addStatusMsg(err, `[+]=> ${err}`);
} else {
results.forEach( r => this.curCLI.addStatusMsg(null, `[+]=> ${r}`));
}
this.#continue(callback);
});
}
else {
this.#continue(callback);
}
#processAdditionalStreams(callback) {
const outputTasks = [];
if (PROCESS_FIELDS_CONTENT) {//needs to generate fields.json file
outputTasks.push(cbFunc => this.#generateFieldsTypesStream(cbFunc));
}
if (PROCESS_RAW_TEXT_CONTENT) {//needs to generate content.txt file
outputTasks.push(cbFunc => this.#generateRawTextContentStream(cbFunc));
}
if (PROCESS_MERGE_BROKEN_TEXT_BLOCKS) {//needs to generate json file with merged broken text blocks
outputTasks.push(cbFunc => this.#generateMergedTextBlocksStream(cbFunc));
}

let taskId = 0;
function sequenceTask() {
if (taskId < outputTasks.length) {
outputTasks[taskId]((err, ret) => {
this.curCLI.addStatusMsg(err, `[+]=> ${ret}`);
taskId++;
sequenceTask.call(this);
});
}
else
this.#continue(callback);
}
sequenceTask.call(this);
}

#onPrimarySuccess(callback) {
this.curCLI.addResultCount();
this.#processAdditionalStreams([], callback);
this.#processAdditionalStreams(callback);
}

#onPrimaryError(err, callback) {
Expand Down
7 changes: 1 addition & 6 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 0 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,11 @@
"pdf2json": "./bin/pdf2json"
},
"dependencies": {
"async": "^3.2.1",
"@xmldom/xmldom": "^0.7.5",
"yargs": "^17.2.1"
},
"devDependencies": {},
"bundledDependencies": [
"async",
"@xmldom/xmldom",
"yargs"
],
Expand Down
45 changes: 19 additions & 26 deletions pdfparser.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
const fs = require("fs"),
{ readFile } = require("fs/promises"),
{EventEmitter} = require("events"),
nodeUtil = require("util"),
async = require("async"),
PDFJS = require("./lib/pdf"),
{ParserStream} = require("./lib/parserstream"),
{kColors, kFontFaces, kFontStyles} = require("./lib/pdfconst");
Expand All @@ -21,7 +21,6 @@ class PDFParser extends EventEmitter { // inherit from event emitter
#password = "";

#context = null; // service context object, only used in Web Service project; null in command line
#fq = null; //async queue for reading files

#pdfFilePath = null; //current PDF file to load and parse, null means loading/parsing not started
#pdfFileMTime = null; // last time the current pdf was modified, used to recognize changes and ignore cache
Expand All @@ -37,9 +36,6 @@ class PDFParser extends EventEmitter { // inherit from event emitter
// private
// service context object, only used in Web Service project; null in command line
this.#context = context;
this.#fq = async.queue( (task, callback) => {
fs.readFile(task.path, callback);
}, 1);

this.#pdfFilePath = null; //current PDF file to load and parse, null means loading/parsing not started
this.#pdfFileMTime = null; // last time the current pdf was modified, used to recognize changes and ignore cache
Expand Down Expand Up @@ -104,37 +100,34 @@ class PDFParser extends EventEmitter { // inherit from event emitter
return false;
}

#processPDFContent(err, data) {
nodeUtil.p2jinfo("Load PDF file status:" + (!!err ? "Error!" : "Success!") );
if (err) {
this.#data = null;
this.emit("pdfParser_dataError", err);
}
else {
PDFParser.#binBuffer[this.binBufferKey] = data;
this.#startParsingPDF();
}
};

//public APIs
createParserStream() {
return new ParserStream(this, {objectMode: true, bufferSize: 64 * 1024});
}

loadPDF(pdfFilePath, verbosity) {
async loadPDF(pdfFilePath, verbosity) {
nodeUtil.verbosity(verbosity || 0);
nodeUtil.p2jinfo("about to load PDF file " + pdfFilePath);

this.#pdfFilePath = pdfFilePath;
this.#pdfFileMTime = fs.statSync(pdfFilePath).mtimeMs;
if (this.#processFieldInfoXML) {
this.#PDFJS.tryLoadFieldInfoXML(pdfFilePath);
}

if (this.#processBinaryCache())
return;

this.#fq.push({path: pdfFilePath}, this.#processPDFContent.bind(this));
try {
this.#pdfFileMTime = fs.statSync(pdfFilePath).mtimeMs;
if (this.#processFieldInfoXML) {
this.#PDFJS.tryLoadFieldInfoXML(pdfFilePath);
}

if (this.#processBinaryCache())
return;

PDFParser.#binBuffer[this.binBufferKey] = await readFile(pdfFilePath);
nodeUtil.p2jinfo(`Load OK: ${pdfFilePath}`);
this.#startParsingPDF();
}
catch(err) {
nodeUtil.p2jerror(`Load Failed: ${pdfFilePath} - ${err}`);
this.emit("pdfParser_dataError", err);
}
}

// Introduce a way to directly process buffers without the need to write it to a temporary file
Expand Down
2 changes: 1 addition & 1 deletion readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -888,7 +888,7 @@ In order to support this auto merging capability, text block objects have an add
* More test coverage, 4 more test scripts added, see _package.json_ for details
* Easier access to dictionaries, including color, font face and font style, see Dictionary reference section for details
* Refactor to ES6 class for major entry modules
* Dependency is removed: lodash.
* Dependencies removed: lodash, async
* Upgrade to Node v14.18.0 LTSs
### Install on Ubuntu
Expand Down

0 comments on commit ef0fe96

Please sign in to comment.