From 431e872eaed18587e038f102f46c7b546221a601 Mon Sep 17 00:00:00 2001 From: Michael Weiser Date: Fri, 6 Mar 2020 19:39:10 +0100 Subject: [PATCH] Avoid TypeError on reference resolution With the previous change deferring reading of objects from the decoded stream until references can be resolved, it now runs into jesparza/peepdf#70. This change provides a different approach in fixing it to #6 by syncing it with the other locations where the identical code is in use: 1. Force the numbers extracted by re.findall to int() as before, avoiding the TypeError exception: Traceback (most recent call last): File "peepdf-venv2/lib64/python2.7/site-packages/peepdf/main.py", line 409, in main ret, pdf = pdfParser.parse(fileName, options.isForceMode, options.isLooseMode, options.isManualAnalysis) File "peepdf-venv2/lib64/python2.7/site-packages/peepdf/PDFCore.py", line 7117, in parse ret = body.updateObjects() File "peepdf-venv2/lib64/python2.7/site-packages/peepdf/PDFCore.py", line 4291, in updateObjects object.resolveReferences() File "peepdf-venv2/lib64/python2.7/site-packages/peepdf/PDFCore.py", line 3256, in resolveReferences ret = PDFParser.readObject(objectsSection[offset:]) TypeError: slice indices must be integers or None or have an __index__ method 2. Instantiate a new PDFParser object by adding the missing braces, avoiding another TypeError because readObject is no class method: Traceback (most recent call last): File "peepdf-venv2/lib64/python2.7/site-packages/peepdf/main.py", line 409, in main ret, pdf = pdfParser.parse(fileName, options.isForceMode, options.isLooseMode, options.isManualAnalysis) File "peepdf-venv2/lib64/python2.7/site-packages/peepdf/PDFCore.py", line 7118, in parse ret = body.updateObjects() File "peepdf-venv2/lib64/python2.7/site-packages/peepdf/PDFCore.py", line 4292, in updateObjects object.resolveReferences() File "peepdf-venv2/lib64/python2.7/site-packages/peepdf/PDFCore.py", line 3256, in resolveReferences ret = PDFParser.readObject(objectsSection[offset:]) TypeError: unbound method readObject() must be called with PDFParser instance as first argument (got str instance instead) 3. Explicitly force the id to be an int() as well and append it do the list of indices as at the other callsites of this code. This solves no issue I have run into but seems sensible to avoid other potential TypeErrors and keep internal bookkeeping of the object consistent. This should conclusively resolve jesparza/peepdf#70 and supersedes #6. Signed-off-by: Michael Weiser --- peepdf/PDFCore.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/peepdf/PDFCore.py b/peepdf/PDFCore.py index 4fc312a..6905bb8 100644 --- a/peepdf/PDFCore.py +++ b/peepdf/PDFCore.py @@ -3251,8 +3251,9 @@ def resolveReferences(self): numbers = re.findall('\d{1,10}', offsetsSection) if numbers != [] and len(numbers) % 2 == 0: for i in range(0, len(numbers), 2): - offset = numbers[i+1] - ret = PDFParser.readObject(objectsSection[offset:]) + id = int(numbers[i]) + offset = int(numbers[i+1]) + ret = PDFParser().readObject(objectsSection[offset:]) if ret[0] == -1: if isForceMode: object = None @@ -3261,7 +3262,8 @@ def resolveReferences(self): return ret else: object = ret[1] - self.compressedObjectsDict[numbers[i]] = [offset, object] + self.compressedObjectsDict[id] = [offset, object] + self.indexes.append(id) else: errorMessage = 'Missing offsets in object stream' if isForceMode: