From 431e872eaed18587e038f102f46c7b546221a601 Mon Sep 17 00:00:00 2001
From: Michael Weiser <michael.weiser@gmx.de>
Date: Fri, 6 Mar 2020 19:39:10 +0100
Subject: [PATCH] Avoid TypeError on reference resolution

With the previous change deferring reading of objects from the decoded
stream until references can be resolved, it now runs into
jesparza/peepdf#70. This change provides a different approach in fixing
it to #6 by syncing it with the other locations where the identical code
is in use:

1. Force the numbers extracted by re.findall to int() as before,
   avoiding the TypeError exception:

Traceback (most recent call last):
  File "peepdf-venv2/lib64/python2.7/site-packages/peepdf/main.py", line 409, in main
    ret, pdf = pdfParser.parse(fileName, options.isForceMode, options.isLooseMode, options.isManualAnalysis)
  File "peepdf-venv2/lib64/python2.7/site-packages/peepdf/PDFCore.py", line 7117, in parse
    ret = body.updateObjects()
  File "peepdf-venv2/lib64/python2.7/site-packages/peepdf/PDFCore.py", line 4291, in updateObjects
    object.resolveReferences()
  File "peepdf-venv2/lib64/python2.7/site-packages/peepdf/PDFCore.py", line 3256, in resolveReferences
    ret = PDFParser.readObject(objectsSection[offset:])
TypeError: slice indices must be integers or None or have an __index__ method

2. Instantiate a new PDFParser object by adding the missing braces,
   avoiding another TypeError because readObject is no class method:

Traceback (most recent call last):
  File "peepdf-venv2/lib64/python2.7/site-packages/peepdf/main.py", line 409, in main
    ret, pdf = pdfParser.parse(fileName, options.isForceMode, options.isLooseMode, options.isManualAnalysis)
  File "peepdf-venv2/lib64/python2.7/site-packages/peepdf/PDFCore.py", line 7118, in parse
    ret = body.updateObjects()
  File "peepdf-venv2/lib64/python2.7/site-packages/peepdf/PDFCore.py", line 4292, in updateObjects
    object.resolveReferences()
  File "peepdf-venv2/lib64/python2.7/site-packages/peepdf/PDFCore.py", line 3256, in resolveReferences
    ret = PDFParser.readObject(objectsSection[offset:])
TypeError: unbound method readObject() must be called with PDFParser instance as first argument (got str instance instead)

3. Explicitly force the id to be an int() as well and append it do the
   list of indices as at the other callsites of this code. This solves
   no issue I have run into but seems sensible to avoid other potential
   TypeErrors and keep internal bookkeeping of the object consistent.

This should conclusively resolve jesparza/peepdf#70 and supersedes #6.

Signed-off-by: Michael Weiser <michael.weiser@gmx.de>
---
 peepdf/PDFCore.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/peepdf/PDFCore.py b/peepdf/PDFCore.py
index 4fc312a..6905bb8 100644
--- a/peepdf/PDFCore.py
+++ b/peepdf/PDFCore.py
@@ -3251,8 +3251,9 @@ def resolveReferences(self):
                 numbers = re.findall('\d{1,10}', offsetsSection)
                 if numbers != [] and len(numbers) % 2 == 0:
                     for i in range(0, len(numbers), 2):
-                        offset = numbers[i+1]
-                        ret = PDFParser.readObject(objectsSection[offset:])
+                        id = int(numbers[i])
+                        offset = int(numbers[i+1])
+                        ret = PDFParser().readObject(objectsSection[offset:])
                         if ret[0] == -1:
                             if isForceMode:
                                 object = None
@@ -3261,7 +3262,8 @@ def resolveReferences(self):
                                 return ret
                         else:
                             object = ret[1]
-                        self.compressedObjectsDict[numbers[i]] = [offset, object]
+                        self.compressedObjectsDict[id] = [offset, object]
+                        self.indexes.append(id)
                 else:
                     errorMessage = 'Missing offsets in object stream'
                     if isForceMode: