From 7cb1f5051b831927a44421bdb7f184604ad4df9e Mon Sep 17 00:00:00 2001 From: Philipp Schrader Date: Sun, 5 May 2013 13:03:53 -0700 Subject: [PATCH 1/5] Adding a basic read_pdf.py script. This is intended to grow into the counterpart of generate_pdf.py. --- read_pdf.py | 36 ++++++++++++++++++++++++++++++++++++ scantron.py | 9 +++++++++ 2 files changed, 45 insertions(+) create mode 100755 read_pdf.py diff --git a/read_pdf.py b/read_pdf.py new file mode 100755 index 0000000..13f8216 --- /dev/null +++ b/read_pdf.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python2 + +import os +import sys +from argparse import ArgumentParser +from scantron import ScantronParser + +# Make this file easier to use by adding nice arguments +parser = ArgumentParser(description='Parse scanned scantron sheets.') +parser.add_argument( + 'data', + metavar='input_data', + help='File where the field data is stored. ' + + 'This must be a python script with an array called "data" ' + + 'of Field entries.') + +args = parser.parse_args() + +# If the file has a .py extension, we should still accept it +if args.data.endswith('.py'): + args.data = os.path.splitext(args.data)[0] + +# Import the file specified on the command line +try: + __import__(args.data) + data = sys.modules[args.data].data +except ImportError: + print('Failed to import %s.' % args.data) + quit(1) +except AttributeError: + print('Could not find data array.') + quit(1) + +# If everything went well, proceed to parse the filled scantron +st = ScantronParser() +st.scan(data, 'pages/page-1.jpg') diff --git a/scantron.py b/scantron.py index e97d8ac..5b39cc9 100755 --- a/scantron.py +++ b/scantron.py @@ -6,6 +6,15 @@ from qrcode import * +class ScantronParser: + def __init__(self): + pass + + + def scan(self, data, path): + pass + + class Scantron: def __init__(self, filename, spacing=0.3*inch): self._fontSize = 0.15*inch From 01d68947a9c30059609f291eb6455c709c2c244b Mon Sep 17 00:00:00 2001 From: Philipp Schrader Date: Sun, 5 May 2013 15:27:29 -0700 Subject: [PATCH 2/5] Adding some basic processing to a scantron image. This parser can sort of detect the outer squares, but a lot still needs to be refined. --- scantron.py | 45 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/scantron.py b/scantron.py index 5b39cc9..bc5ea9e 100755 --- a/scantron.py +++ b/scantron.py @@ -5,6 +5,11 @@ from reportlab.pdfgen import canvas from qrcode import * +import numpy as np +import scipy as sp +from scipy import ndimage +from PIL import Image, ImageDraw + class ScantronParser: def __init__(self): @@ -12,7 +17,45 @@ def __init__(self): def scan(self, data, path): - pass + img = Image.open(path).convert('RGB') + im = sp.misc.fromimage(img, flatten=True) + im = np.where(im > 128, 0, 1) + label_im, num = ndimage.label(im, structure=np.ones((3, 3)).tolist()) + centroids = ndimage.measurements.center_of_mass(im, label_im, xrange(1, + num+1)) + slices = ndimage.find_objects(label_im) + + for i in range(len(slices)): + sub_img = np.where(label_im[slices[i]] == i + 1, 1, 0) + num_ones = np.sum(sub_img) + num_all = sub_img.size + shape = sub_img.shape + + ratio = float(shape[0]) / float(shape[1]) + brightness = float(num_ones)/float(num_all) + + if brightness > 0.95 and abs(ratio - 1.0) < 0.1 and shape[0] > 14: + print('s: ' + str(slices[i])) + x1, x2 = slices[i][1].start, slices[i][1].stop + y1, y2 = slices[i][0].start, slices[i][0].stop + + draw = ImageDraw.Draw(img) + draw.rectangle([x1, y1, x2, y2], outline='blue') + del draw + + for centroid in centroids: + x = int(centroid[1]) + y = int(centroid[0]) + + red = (255, 0, 0) + + img.putpixel([x-1, y], red) + img.putpixel([x+1, y], red) + img.putpixel([x, y], red) + img.putpixel([x, y-1], red) + img.putpixel([x, y+1], red) + + img.show() class Scantron: From 30aa64df17a0d48eaf963ba36908d4035b07c449 Mon Sep 17 00:00:00 2001 From: Philipp Schrader Date: Mon, 6 May 2013 16:42:13 -0700 Subject: [PATCH 3/5] Adding a third square to make re-orientation easier. --- scantron.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scantron.py b/scantron.py index bc5ea9e..684c7fc 100755 --- a/scantron.py +++ b/scantron.py @@ -36,6 +36,7 @@ def scan(self, data, path): if brightness > 0.95 and abs(ratio - 1.0) < 0.1 and shape[0] > 14: print('s: ' + str(slices[i])) + print('c: ' + str(centroids[i])) x1, x2 = slices[i][1].start, slices[i][1].stop y1, y2 = slices[i][0].start, slices[i][0].stop @@ -156,6 +157,7 @@ def set_box_sizes(self, box_size, box_spacing): def add_sheet(self, data, match=1, position=1): # Draw boxes for determining boundaries self.draw_box(1*inch, 1.2*inch, size=0.4*inch, filled=True) + self.draw_box(1*inch, (10.2-0.4)*inch, size=0.4*inch, filled=True) self.draw_box((7.5-0.4)*inch, (10.2-0.4)*inch, size=0.4*inch, filled=True) From 5f222998891670fcde86d1587550d72b98361db3 Mon Sep 17 00:00:00 2001 From: Philipp Schrader Date: Tue, 7 May 2013 14:22:54 -0700 Subject: [PATCH 4/5] Adding rotation detection. --- scantron.py | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/scantron.py b/scantron.py index 684c7fc..a8b55c6 100755 --- a/scantron.py +++ b/scantron.py @@ -9,6 +9,7 @@ import scipy as sp from scipy import ndimage from PIL import Image, ImageDraw +import math class ScantronParser: @@ -25,6 +26,8 @@ def scan(self, data, path): num+1)) slices = ndimage.find_objects(label_im) + squares = [] + for i in range(len(slices)): sub_img = np.where(label_im[slices[i]] == i + 1, 1, 0) num_ones = np.sum(sub_img) @@ -35,8 +38,6 @@ def scan(self, data, path): brightness = float(num_ones)/float(num_all) if brightness > 0.95 and abs(ratio - 1.0) < 0.1 and shape[0] > 14: - print('s: ' + str(slices[i])) - print('c: ' + str(centroids[i])) x1, x2 = slices[i][1].start, slices[i][1].stop y1, y2 = slices[i][0].start, slices[i][0].stop @@ -44,19 +45,26 @@ def scan(self, data, path): draw.rectangle([x1, y1, x2, y2], outline='blue') del draw - for centroid in centroids: - x = int(centroid[1]) - y = int(centroid[0]) + squares.append(i) + + if len(squares) != 3: + print('Could not uniquely identify the three page markers.') + raise Exception + + squares = zip(squares, map(lambda s: sum(centroids[s]), squares)) + squares = sorted(squares, key=lambda x: x[1]) + + for s in squares: + print('square ' + str(s)) - red = (255, 0, 0) + tl = centroids[squares[0][0]] + bl = centroids[squares[1][0]] + br = centroids[squares[2][0]] - img.putpixel([x-1, y], red) - img.putpixel([x+1, y], red) - img.putpixel([x, y], red) - img.putpixel([x, y-1], red) - img.putpixel([x, y+1], red) + rotation = math.atan2(bl[1] - tl[1], bl[0] - tl[0]) + print('rotation: ' + str(rotation)) - img.show() + #img.show() class Scantron: From 5361cf34b81f492442f04895408f36d2d0ddf311 Mon Sep 17 00:00:00 2001 From: Philipp Schrader Date: Sun, 19 May 2013 09:21:40 -0700 Subject: [PATCH 5/5] Adding script to generate a bunch of filled scantrons. --- scantron.py | 6 ++---- testcase.py | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 4 deletions(-) create mode 100755 testcase.py diff --git a/scantron.py b/scantron.py index a8b55c6..b8ebb5a 100755 --- a/scantron.py +++ b/scantron.py @@ -35,9 +35,9 @@ def scan(self, data, path): shape = sub_img.shape ratio = float(shape[0]) / float(shape[1]) - brightness = float(num_ones)/float(num_all) + darkness = float(num_ones)/float(num_all) - if brightness > 0.95 and abs(ratio - 1.0) < 0.1 and shape[0] > 14: + if darkness > 0.95 and abs(ratio - 1.0) < 0.1 and shape[0] > 14: x1, x2 = slices[i][1].start, slices[i][1].stop y1, y2 = slices[i][0].start, slices[i][0].stop @@ -64,8 +64,6 @@ def scan(self, data, path): rotation = math.atan2(bl[1] - tl[1], bl[0] - tl[0]) print('rotation: ' + str(rotation)) - #img.show() - class Scantron: def __init__(self, filename, spacing=0.3*inch): diff --git a/testcase.py b/testcase.py new file mode 100755 index 0000000..ea0fa9d --- /dev/null +++ b/testcase.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python2 + +from scantron import * +import PythonMagick +from pyPdf import PdfFileReader +from PIL import Image + +data = [ + Field('foo', 'Foo foo foo', int), + Field('bar', 'Bar bar bar', int), + Field('baz', 'Baz baz baz', int), + Field('laber', 'Laber laber', bool), +] + +# Generate PDF +st = Scantron('test.pdf') +st.set_box_sizes(box_size=0.2*inch, box_spacing=0.3*inch) +st.populate(data, matches=1, collate='no') +st.save() + +# Convert PDF to a series of pictures +pages = [] + +pdf = PdfFileReader(file('test.pdf', 'rb')) + +for page in range(pdf.getNumPages()): + #page += 1 + name = 'test_image_%d.png' % page + + im = PythonMagick.Image() + im.density('200') + im.read('test.pdf[%d]' % page) + im.write(name) + + pages.append(name) + +# Create a series of transformations to apply +transformations = [ + lambda x: x.rotate(10, expand=False), + lambda x: x.rotate(-10, expand=False), +] + +tf = 0 + +# Take all pictures and modify them in different ways +for page in pages: + im = Image.open(page).convert('L') + im = transformations[tf](im) + im.save(page) + + tf = (tf + 1) % len(transformations)