-
Notifications
You must be signed in to change notification settings - Fork 0
/
pdf.js
81 lines (70 loc) · 2.49 KB
/
pdf.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
const muhammara = require("muhammara");
const muhammaraRecipe = require("muhammara").Recipe;
module.exports = {
replaceText,
mergePdfFiles,
};
function mergePdfFiles(fromFilenames, intoFilename) {
const pdfDoc = new muhammaraRecipe(fromFilenames[0], intoFilename);
fromFilenames.shift();
fromFilenames.map((filename, index) => {
pdfDoc.insertPage(1, filename, 1);
});
pdfDoc.endPDF();
}
/**
* @desc Searches a PDF file and makes replacements to text elements
* @param {string} filePath
* @param {string} newFilePath
* @param {[{regEx: string, text: string }]} replacements
*/
function replaceText(filePath, newFilePath, replacements) {
const modPdfWriter = loadPdf(filePath, newFilePath);
const copyingContext = modPdfWriter.createPDFCopyingContextForModifiedFile();
const objectsContext = modPdfWriter.getObjectsContext();
const documentParser = copyingContext.getSourceDocumentParser();
const numPages = copyingContext.getSourceDocumentParser().getPagesCount();
for (let page = 0; page < numPages; page++) {
const pageDictionary = documentParser.parsePage(page).getDictionary();
const textStream = documentParser.queryDictionaryObject(pageDictionary, "Contents");
const textObjectID = pageDictionary.toJSObject().Contents.getObjectID();
const readStream = documentParser.startReadingFromStream(textStream);
const pdfPageAsString = readStreamIntoString(readStream);
const processedPdf = makeReplacements(pdfPageAsString, replacements);
objectsContext.startModifiedIndirectObject(textObjectID);
const stream = objectsContext.startUnfilteredPDFStream();
stream.getWriteStream().write(strToByteArray(processedPdf));
objectsContext.endPDFStream(stream);
objectsContext.endIndirectObject();
}
modPdfWriter.end();
}
function loadPdf(filePath, newFileName) {
return muhammara.createWriterToModify(filePath, {
modifiedFilePath: newFileName,
compress: false,
});
}
function makeReplacements(pdfPageAsString, replacements) {
let processedPdf = pdfPageAsString;
for (let replace of replacements) {
processedPdf = processedPdf.replace(new RegExp(replace.regEx, "g"), replace.text);
}
return processedPdf;
}
function readStreamIntoString(readStream) {
let data = [];
while (readStream.notEnded()) {
const readData = readStream.read(10000);
data = data.concat(readData);
}
return Buffer.from(data).toString();
}
function strToByteArray(str) {
let myBuffer = [];
let buffer = Buffer.from(str);
for (let i = 0; i < buffer.length; i++) {
myBuffer.push(buffer[i]);
}
return myBuffer;
}