diff --git a/read_pdf.py b/read_pdf.py new file mode 100755 index 0000000..13f8216 --- /dev/null +++ b/read_pdf.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python2 + +import os +import sys +from argparse import ArgumentParser +from scantron import ScantronParser + +# Make this file easier to use by adding nice arguments +parser = ArgumentParser(description='Parse scanned scantron sheets.') +parser.add_argument( + 'data', + metavar='input_data', + help='File where the field data is stored. ' + + 'This must be a python script with an array called "data" ' + + 'of Field entries.') + +args = parser.parse_args() + +# If the file has a .py extension, we should still accept it +if args.data.endswith('.py'): + args.data = os.path.splitext(args.data)[0] + +# Import the file specified on the command line +try: + __import__(args.data) + data = sys.modules[args.data].data +except ImportError: + print('Failed to import %s.' % args.data) + quit(1) +except AttributeError: + print('Could not find data array.') + quit(1) + +# If everything went well, proceed to parse the filled scantron +st = ScantronParser() +st.scan(data, 'pages/page-1.jpg') diff --git a/scantron.py b/scantron.py index e97d8ac..b8ebb5a 100755 --- a/scantron.py +++ b/scantron.py @@ -5,6 +5,65 @@ from reportlab.pdfgen import canvas from qrcode import * +import numpy as np +import scipy as sp +from scipy import ndimage +from PIL import Image, ImageDraw +import math + + +class ScantronParser: + def __init__(self): + pass + + + def scan(self, data, path): + img = Image.open(path).convert('RGB') + im = sp.misc.fromimage(img, flatten=True) + im = np.where(im > 128, 0, 1) + label_im, num = ndimage.label(im, structure=np.ones((3, 3)).tolist()) + centroids = ndimage.measurements.center_of_mass(im, label_im, xrange(1, + num+1)) + slices = ndimage.find_objects(label_im) + + squares = [] + + for i in range(len(slices)): + sub_img = np.where(label_im[slices[i]] == i + 1, 1, 0) + num_ones = np.sum(sub_img) + num_all = sub_img.size + shape = sub_img.shape + + ratio = float(shape[0]) / float(shape[1]) + darkness = float(num_ones)/float(num_all) + + if darkness > 0.95 and abs(ratio - 1.0) < 0.1 and shape[0] > 14: + x1, x2 = slices[i][1].start, slices[i][1].stop + y1, y2 = slices[i][0].start, slices[i][0].stop + + draw = ImageDraw.Draw(img) + draw.rectangle([x1, y1, x2, y2], outline='blue') + del draw + + squares.append(i) + + if len(squares) != 3: + print('Could not uniquely identify the three page markers.') + raise Exception + + squares = zip(squares, map(lambda s: sum(centroids[s]), squares)) + squares = sorted(squares, key=lambda x: x[1]) + + for s in squares: + print('square ' + str(s)) + + tl = centroids[squares[0][0]] + bl = centroids[squares[1][0]] + br = centroids[squares[2][0]] + + rotation = math.atan2(bl[1] - tl[1], bl[0] - tl[0]) + print('rotation: ' + str(rotation)) + class Scantron: def __init__(self, filename, spacing=0.3*inch): @@ -104,6 +163,7 @@ def set_box_sizes(self, box_size, box_spacing): def add_sheet(self, data, match=1, position=1): # Draw boxes for determining boundaries self.draw_box(1*inch, 1.2*inch, size=0.4*inch, filled=True) + self.draw_box(1*inch, (10.2-0.4)*inch, size=0.4*inch, filled=True) self.draw_box((7.5-0.4)*inch, (10.2-0.4)*inch, size=0.4*inch, filled=True) diff --git a/testcase.py b/testcase.py new file mode 100755 index 0000000..ea0fa9d --- /dev/null +++ b/testcase.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python2 + +from scantron import * +import PythonMagick +from pyPdf import PdfFileReader +from PIL import Image + +data = [ + Field('foo', 'Foo foo foo', int), + Field('bar', 'Bar bar bar', int), + Field('baz', 'Baz baz baz', int), + Field('laber', 'Laber laber', bool), +] + +# Generate PDF +st = Scantron('test.pdf') +st.set_box_sizes(box_size=0.2*inch, box_spacing=0.3*inch) +st.populate(data, matches=1, collate='no') +st.save() + +# Convert PDF to a series of pictures +pages = [] + +pdf = PdfFileReader(file('test.pdf', 'rb')) + +for page in range(pdf.getNumPages()): + #page += 1 + name = 'test_image_%d.png' % page + + im = PythonMagick.Image() + im.density('200') + im.read('test.pdf[%d]' % page) + im.write(name) + + pages.append(name) + +# Create a series of transformations to apply +transformations = [ + lambda x: x.rotate(10, expand=False), + lambda x: x.rotate(-10, expand=False), +] + +tf = 0 + +# Take all pictures and modify them in different ways +for page in pages: + im = Image.open(page).convert('L') + im = transformations[tf](im) + im.save(page) + + tf = (tf + 1) % len(transformations)