From 7cb1f5051b831927a44421bdb7f184604ad4df9e Mon Sep 17 00:00:00 2001
From: Philipp Schrader <philipp.schrader@gmail.com>
Date: Sun, 5 May 2013 13:03:53 -0700
Subject: [PATCH 1/5] Adding a basic read_pdf.py script.

This is intended to grow into the counterpart of generate_pdf.py.
---
 read_pdf.py | 36 ++++++++++++++++++++++++++++++++++++
 scantron.py |  9 +++++++++
 2 files changed, 45 insertions(+)
 create mode 100755 read_pdf.py

diff --git a/read_pdf.py b/read_pdf.py
new file mode 100755
index 0000000..13f8216
--- /dev/null
+++ b/read_pdf.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python2
+
+import os
+import sys
+from argparse import ArgumentParser
+from scantron import ScantronParser
+
+# Make this file easier to use by adding nice arguments
+parser = ArgumentParser(description='Parse scanned scantron sheets.')
+parser.add_argument(
+        'data',
+        metavar='input_data',
+        help='File where the field data is stored. ' +
+                'This must be a python script with an array called "data" ' +
+                'of Field entries.')
+
+args = parser.parse_args()
+
+# If the file has a .py extension, we should still accept it
+if args.data.endswith('.py'):
+    args.data = os.path.splitext(args.data)[0]
+
+# Import the file specified on the command line
+try:
+    __import__(args.data)
+    data = sys.modules[args.data].data
+except ImportError:
+    print('Failed to import %s.' % args.data)
+    quit(1)
+except AttributeError:
+    print('Could not find data array.')
+    quit(1)
+
+# If everything went well, proceed to parse the filled scantron
+st = ScantronParser()
+st.scan(data, 'pages/page-1.jpg')
diff --git a/scantron.py b/scantron.py
index e97d8ac..5b39cc9 100755
--- a/scantron.py
+++ b/scantron.py
@@ -6,6 +6,15 @@
 from qrcode import *
 
 
+class ScantronParser:
+    def __init__(self):
+        pass
+
+
+    def scan(self, data, path):
+        pass
+
+
 class Scantron:
     def __init__(self, filename, spacing=0.3*inch):
         self._fontSize = 0.15*inch

From 01d68947a9c30059609f291eb6455c709c2c244b Mon Sep 17 00:00:00 2001
From: Philipp Schrader <philipp.schrader@gmail.com>
Date: Sun, 5 May 2013 15:27:29 -0700
Subject: [PATCH 2/5] Adding some basic processing to a scantron image.

This parser can sort of detect the outer squares, but a lot still needs
to be refined.
---
 scantron.py | 45 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/scantron.py b/scantron.py
index 5b39cc9..bc5ea9e 100755
--- a/scantron.py
+++ b/scantron.py
@@ -5,6 +5,11 @@
 from reportlab.pdfgen import canvas
 from qrcode import *
 
+import numpy as np
+import scipy as sp
+from scipy import ndimage
+from PIL import Image, ImageDraw
+
 
 class ScantronParser:
     def __init__(self):
@@ -12,7 +17,45 @@ def __init__(self):
 
 
     def scan(self, data, path):
-        pass
+        img = Image.open(path).convert('RGB')
+        im = sp.misc.fromimage(img, flatten=True)
+        im = np.where(im > 128, 0, 1)
+        label_im, num = ndimage.label(im, structure=np.ones((3, 3)).tolist())
+        centroids = ndimage.measurements.center_of_mass(im, label_im, xrange(1, 
+                num+1))
+        slices = ndimage.find_objects(label_im)
+
+        for i in range(len(slices)):
+            sub_img = np.where(label_im[slices[i]] == i + 1, 1, 0)
+            num_ones = np.sum(sub_img)
+            num_all = sub_img.size
+            shape = sub_img.shape
+
+            ratio = float(shape[0]) / float(shape[1])
+            brightness = float(num_ones)/float(num_all)
+
+            if brightness > 0.95 and abs(ratio - 1.0) < 0.1 and shape[0] > 14:
+                print('s: ' + str(slices[i]))
+                x1, x2 = slices[i][1].start, slices[i][1].stop
+                y1, y2 = slices[i][0].start, slices[i][0].stop
+
+                draw = ImageDraw.Draw(img)
+                draw.rectangle([x1, y1, x2, y2], outline='blue')
+                del draw
+
+        for centroid in centroids:
+            x = int(centroid[1])
+            y = int(centroid[0])
+
+            red = (255, 0, 0)
+
+            img.putpixel([x-1, y], red)
+            img.putpixel([x+1, y], red)
+            img.putpixel([x, y], red)
+            img.putpixel([x, y-1], red)
+            img.putpixel([x, y+1], red)
+
+        img.show()
 
 
 class Scantron:

From 30aa64df17a0d48eaf963ba36908d4035b07c449 Mon Sep 17 00:00:00 2001
From: Philipp Schrader <philipp.schrader@gmail.com>
Date: Mon, 6 May 2013 16:42:13 -0700
Subject: [PATCH 3/5] Adding a third square to make re-orientation easier.

---
 scantron.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scantron.py b/scantron.py
index bc5ea9e..684c7fc 100755
--- a/scantron.py
+++ b/scantron.py
@@ -36,6 +36,7 @@ def scan(self, data, path):
 
             if brightness > 0.95 and abs(ratio - 1.0) < 0.1 and shape[0] > 14:
                 print('s: ' + str(slices[i]))
+                print('c: ' + str(centroids[i]))
                 x1, x2 = slices[i][1].start, slices[i][1].stop
                 y1, y2 = slices[i][0].start, slices[i][0].stop
 
@@ -156,6 +157,7 @@ def set_box_sizes(self, box_size, box_spacing):
     def add_sheet(self, data, match=1, position=1):
         # Draw boxes for determining boundaries
         self.draw_box(1*inch, 1.2*inch, size=0.4*inch, filled=True)
+        self.draw_box(1*inch, (10.2-0.4)*inch, size=0.4*inch, filled=True)
         self.draw_box((7.5-0.4)*inch, (10.2-0.4)*inch, size=0.4*inch, 
                 filled=True)
 

From 5f222998891670fcde86d1587550d72b98361db3 Mon Sep 17 00:00:00 2001
From: Philipp Schrader <philipp.schrader@gmail.com>
Date: Tue, 7 May 2013 14:22:54 -0700
Subject: [PATCH 4/5] Adding rotation detection.

---
 scantron.py | 32 ++++++++++++++++++++------------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/scantron.py b/scantron.py
index 684c7fc..a8b55c6 100755
--- a/scantron.py
+++ b/scantron.py
@@ -9,6 +9,7 @@
 import scipy as sp
 from scipy import ndimage
 from PIL import Image, ImageDraw
+import math
 
 
 class ScantronParser:
@@ -25,6 +26,8 @@ def scan(self, data, path):
                 num+1))
         slices = ndimage.find_objects(label_im)
 
+        squares = []
+
         for i in range(len(slices)):
             sub_img = np.where(label_im[slices[i]] == i + 1, 1, 0)
             num_ones = np.sum(sub_img)
@@ -35,8 +38,6 @@ def scan(self, data, path):
             brightness = float(num_ones)/float(num_all)
 
             if brightness > 0.95 and abs(ratio - 1.0) < 0.1 and shape[0] > 14:
-                print('s: ' + str(slices[i]))
-                print('c: ' + str(centroids[i]))
                 x1, x2 = slices[i][1].start, slices[i][1].stop
                 y1, y2 = slices[i][0].start, slices[i][0].stop
 
@@ -44,19 +45,26 @@ def scan(self, data, path):
                 draw.rectangle([x1, y1, x2, y2], outline='blue')
                 del draw
 
-        for centroid in centroids:
-            x = int(centroid[1])
-            y = int(centroid[0])
+                squares.append(i)
+
+        if len(squares) != 3:
+            print('Could not uniquely identify the three page markers.')
+            raise Exception
+
+        squares = zip(squares, map(lambda s: sum(centroids[s]), squares))
+        squares = sorted(squares, key=lambda x: x[1])
+
+        for s in squares:
+            print('square ' + str(s))
 
-            red = (255, 0, 0)
+        tl = centroids[squares[0][0]]
+        bl = centroids[squares[1][0]]
+        br = centroids[squares[2][0]]
 
-            img.putpixel([x-1, y], red)
-            img.putpixel([x+1, y], red)
-            img.putpixel([x, y], red)
-            img.putpixel([x, y-1], red)
-            img.putpixel([x, y+1], red)
+        rotation = math.atan2(bl[1] - tl[1], bl[0] - tl[0])
+        print('rotation: ' + str(rotation))
 
-        img.show()
+        #img.show()
 
 
 class Scantron:

From 5361cf34b81f492442f04895408f36d2d0ddf311 Mon Sep 17 00:00:00 2001
From: Philipp Schrader <philipp.schrader@gmail.com>
Date: Sun, 19 May 2013 09:21:40 -0700
Subject: [PATCH 5/5] Adding script to generate a bunch of filled scantrons.

---
 scantron.py |  6 ++----
 testcase.py | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+), 4 deletions(-)
 create mode 100755 testcase.py

diff --git a/scantron.py b/scantron.py
index a8b55c6..b8ebb5a 100755
--- a/scantron.py
+++ b/scantron.py
@@ -35,9 +35,9 @@ def scan(self, data, path):
             shape = sub_img.shape
 
             ratio = float(shape[0]) / float(shape[1])
-            brightness = float(num_ones)/float(num_all)
+            darkness = float(num_ones)/float(num_all)
 
-            if brightness > 0.95 and abs(ratio - 1.0) < 0.1 and shape[0] > 14:
+            if darkness > 0.95 and abs(ratio - 1.0) < 0.1 and shape[0] > 14:
                 x1, x2 = slices[i][1].start, slices[i][1].stop
                 y1, y2 = slices[i][0].start, slices[i][0].stop
 
@@ -64,8 +64,6 @@ def scan(self, data, path):
         rotation = math.atan2(bl[1] - tl[1], bl[0] - tl[0])
         print('rotation: ' + str(rotation))
 
-        #img.show()
-
 
 class Scantron:
     def __init__(self, filename, spacing=0.3*inch):
diff --git a/testcase.py b/testcase.py
new file mode 100755
index 0000000..ea0fa9d
--- /dev/null
+++ b/testcase.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python2
+
+from scantron import *
+import PythonMagick
+from pyPdf import PdfFileReader
+from PIL import Image
+
+data = [
+    Field('foo', 'Foo foo foo', int),
+    Field('bar', 'Bar bar bar', int),
+    Field('baz', 'Baz baz baz', int),
+    Field('laber', 'Laber laber', bool),
+]
+
+# Generate PDF
+st = Scantron('test.pdf')
+st.set_box_sizes(box_size=0.2*inch, box_spacing=0.3*inch)
+st.populate(data, matches=1, collate='no')
+st.save()
+
+# Convert PDF to a series of pictures
+pages = []
+
+pdf = PdfFileReader(file('test.pdf', 'rb'))
+
+for page in range(pdf.getNumPages()):
+    #page += 1
+    name = 'test_image_%d.png' % page
+
+    im = PythonMagick.Image()
+    im.density('200')
+    im.read('test.pdf[%d]' % page)
+    im.write(name)
+
+    pages.append(name)
+
+# Create a series of transformations to apply
+transformations = [
+    lambda x: x.rotate(10, expand=False),
+    lambda x: x.rotate(-10, expand=False),
+]
+
+tf = 0
+
+# Take all pictures and modify them in different ways
+for page in pages:
+    im = Image.open(page).convert('L')
+    im = transformations[tf](im)
+    im.save(page)
+
+    tf = (tf + 1) % len(transformations)