Skip to content

Commit

Permalink
v1.0.8: breaking change for x coordinate page unit conversion, client…
Browse files Browse the repository at this point in the history
… renderer needs update. See readme for more details
  • Loading branch information
modestysn committed Mar 6, 2016
1 parent eab8f05 commit 70ca4c7
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 117 deletions.
51 changes: 25 additions & 26 deletions lib/p2jcmd.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
'use strict';

let fs = require('fs'),
let nodeUtil = require("util"),
fs = require('fs'),
path = require('path'),
_ = require('underscore'),
PDFParser = require("../pdfparser"),
Expand Down Expand Up @@ -36,7 +37,7 @@ let PDF2JSONUtil = (function () {
let _continue = function(callback, err) {
if (err)
console.warn(err);
if (_.isFunction(callback))
if (nodeUtil.isFunction(callback))
callback(err);
};

Expand All @@ -45,35 +46,35 @@ let PDF2JSONUtil = (function () {
let fieldsTypesPath = this.outputPath.replace(".json", ".fields.json");
let fieldTypesFile = this.outputFile.replace(".json", ".fields.json");

fs.writeFile(fieldsTypesPath, JSON.stringify(pJSON), function(err) {
fs.writeFile(fieldsTypesPath, JSON.stringify(pJSON), err => {
if (err) {
console.warn(this.inputFile + " => " + fieldTypesFile + " Exception: " + err);
} else {
console.log(this.inputFile + " => " + fieldTypesFile + " [" + this.outputDir + "] OK");
}
callback(err, fieldTypesFile);
}.bind(this));
});
};

let _generateRawTextContentFile = function(data, callback) {
let contentPath = this.outputPath.replace(".json", ".content.txt");
let contentFile = this.outputFile.replace(".json", ".content.txt");

fs.writeFile(contentPath, this.pdfParser.getRawTextContent(), function(err) {
fs.writeFile(contentPath, this.pdfParser.getRawTextContent(), err => {
if (err) {
console.warn(this.inputFile + " => " + contentFile + " Exception: " + err);
} else {
console.log(this.inputFile + " => " + contentFile + " [" + this.outputDir + "] OK");
}
callback(err, contentFile);
}.bind(this));
});
};


let _writeOneJSON = function(data, callback) {
let pJSON = JSON.stringify({"formImage":data});

fs.writeFile(this.outputPath, pJSON, function(err) {
fs.writeFile(this.outputPath, pJSON, err => {
if(err) {
console.warn(this.inputFile + " => " + this.outputFile + " Exception: " + err);
this.curProcessor.failedCount++;
Expand All @@ -83,7 +84,7 @@ let PDF2JSONUtil = (function () {

}
callback(err, this.outputFile);
}.bind(this));
});
};

let _writeOneJSONWithMergedTextBlocks = function(data, callback) {
Expand All @@ -92,33 +93,31 @@ let PDF2JSONUtil = (function () {
let outputPath = this.outputPath.replace(".json", ".merged.json");
let contentFile = this.outputFile.replace(".json", ".merged.json");

fs.writeFile(outputPath, pJSON, function(err) {
fs.writeFile(outputPath, pJSON, err => {
if (err) {
console.warn(err);
} else {
console.log(this.inputFile + " => " + this.outputFile + " [" + outputPath + "] OK");
}
callback(err, contentFile);
}.bind(this));
});
};

let _parseOnePDF = function(callback) {
let processRawTextContent = _.has(argv, 'c');
let self = this;
this.pdfParser = new PDFParser(null, processRawTextContent);

this.pdfParser.on("pdfParser_dataReady", function (evtData) {
this.pdfParser.on("pdfParser_dataReady", evtData => {
if ((!!evtData) && (!!evtData.data)) {

let outputTasks = [function(cbFunc) { _writeOneJSON.call(self, evtData.data, cbFunc);}];
let outputTasks = [cbFunc => _writeOneJSON.call(this, evtData.data, cbFunc)];
if (_.has(argv, 't')) {//needs to generate fields.json file
outputTasks.push(function(cbFunc) {_generateFieldsTypesFile.call(self, evtData.data, cbFunc);});
outputTasks.push(cbFunc => _generateFieldsTypesFile.call(this, evtData.data, cbFunc));
}
if (processRawTextContent) {//needs to generate content.txt file
outputTasks.push(function(cbFunc) {_generateRawTextContentFile.call(self, evtData.data, cbFunc);});
outputTasks.push(cbFunc => _generateRawTextContentFile.call(this, evtData.data, cbFunc));
}
if (_.has(argv, 'm')) {//needs to generate json file with merged broken text blocks
outputTasks.push(function(cbFunc) {_writeOneJSONWithMergedTextBlocks.call(self, evtData.data, cbFunc);});
outputTasks.push(cbFunc => _writeOneJSONWithMergedTextBlocks.call(this, evtData.data, cbFunc));
}

async.series(outputTasks, function(err, results){
Expand All @@ -128,20 +127,20 @@ let PDF2JSONUtil = (function () {
console.log("Output files OK", results);
}

_continue.call(self, callback);
_continue.call(this, callback);
});
}
else {
this.curProcessor.failedCount++;
_continue.call(this, callback, "Exception: empty parsing result - " + this.inputPath);
}
}.bind(this));
});

this.pdfParser.on("pdfParser_dataError", function (evtData) {
this.pdfParser.on("pdfParser_dataError", evtData => {
this.curProcessor.failedCount++;
let errMsg = "Exception: " + evtData.data;
_continue.call(this, callback, errMsg);
}.bind(this));
});

console.log("\nTranscoding " + this.inputFile + " to - " + this.outputPath);
this.pdfParser.loadPDF(this.inputPath, (_.has(argv, 's') ? 0 : 5));
Expand Down Expand Up @@ -290,11 +289,11 @@ let PDFProcessor = (function () {
let statusMsg = "\n%d input files\t%d success\t%d fail\t%d warning.";
console.log(statusMsg, this.inputCount, this.successCount, this.failedCount, this.warningCount);

process.nextTick( function() {
process.nextTick( () => {
console.timeEnd(_PRO_TIMER);
let exitCode = (this.inputCount === this.successCount) ? 0 : 1;
process.exit(exitCode);
}.bind(this));
});
};

cls.prototype.processOneFile = function () {
Expand All @@ -303,7 +302,7 @@ let PDFProcessor = (function () {

this.inputCount = 1;
this.p2j = new PDF2JSONUtil(inputDir, inputFile, this);
this.p2j.processFile(_.bind(this.complete, this));
this.p2j.processFile( () => this.complete() );
};

cls.prototype.processFiles = function(inputDir, files) {
Expand Down Expand Up @@ -334,7 +333,7 @@ let PDFProcessor = (function () {
cls.prototype.processOneDirectory = function () {
let inputDir = path.normalize(argv.f);

fs.readdir(inputDir, function(err, files) {
fs.readdir(inputDir, (err, files) => {
let _iChars = "!@#$%^&*()+=[]\\\';,/{}|\":<>?~`.-_ ";
let pdfFiles = files.filter(function(file) {
return file.substr(-4).toLowerCase() === '.pdf' && _iChars.indexOf(file.substr(0,1)) < 0;
Expand All @@ -348,7 +347,7 @@ let PDFProcessor = (function () {
console.log("No PDF files found. [" + inputDir + "].");
this.complete(null);
}
}.bind(this));
});
};

return cls;
Expand Down
Loading

0 comments on commit 70ca4c7

Please sign in to comment.