Skip to content

Commit cad7f01

Browse files
committed
first
1 parent f4bee3c commit cad7f01

22 files changed

+251
-0
lines changed

.document

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
README.rdoc
2+
lib/**/*.rb
3+
bin/*
4+
features/**/*.feature
5+
LICENSE

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
*.sw?
2+
.DS_Store
3+
coverage
4+
rdoc
5+
pkg

LICENSE

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
Copyright (c) 2009 Aitor García
2+
3+
Permission is hereby granted, free of charge, to any person obtaining
4+
a copy of this software and associated documentation files (the
5+
"Software"), to deal in the Software without restriction, including
6+
without limitation the rights to use, copy, modify, merge, publish,
7+
distribute, sublicense, and/or sell copies of the Software, and to
8+
permit persons to whom the Software is furnished to do so, subject to
9+
the following conditions:
10+
11+
The above copyright notice and this permission notice shall be
12+
included in all copies or substantial portions of the Software.
13+
14+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

README.rdoc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
= congressmen
2+
3+
Description goes here.
4+
5+
== Copyright
6+
7+
Copyright (c) 2009 Aitor García. See LICENSE for details.

Rakefile

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
require 'rubygems'
2+
require 'rake'
3+
4+
begin
5+
require 'jeweler'
6+
Jeweler::Tasks.new do |gem|
7+
gem.name = "congressmen"
8+
gem.summary = %Q{TODO}
9+
gem.email = "[email protected]"
10+
gem.homepage = "http://github.com/aitor/congressmen"
11+
gem.authors = ["Aitor García"]
12+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
13+
end
14+
rescue LoadError
15+
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
16+
end
17+
18+
require 'rake/testtask'
19+
Rake::TestTask.new(:test) do |test|
20+
test.libs << 'lib' << 'test'
21+
test.pattern = 'test/**/*_test.rb'
22+
test.verbose = true
23+
end
24+
25+
begin
26+
require 'rcov/rcovtask'
27+
Rcov::RcovTask.new do |test|
28+
test.libs << 'test'
29+
test.pattern = 'test/**/*_test.rb'
30+
test.verbose = true
31+
end
32+
rescue LoadError
33+
task :rcov do
34+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
35+
end
36+
end
37+
38+
39+
task :default => :test
40+
41+
require 'rake/rdoctask'
42+
Rake::RDocTask.new do |rdoc|
43+
if File.exist?('VERSION.yml')
44+
config = YAML.load(File.read('VERSION.yml'))
45+
version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
46+
else
47+
version = ""
48+
end
49+
50+
rdoc.rdoc_dir = 'rdoc'
51+
rdoc.title = "congressmen #{version}"
52+
rdoc.rdoc_files.include('README*')
53+
rdoc.rdoc_files.include('lib/**/*.rb')
54+
end
55+

lib/congressmen.rb

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
require 'rubygems'
2+
require 'pdf/reader'
3+
4+
require 'congressmen/parser'
5+
require 'congressmen/session_record'
6+
require 'congressmen/actor'
7+
8+
# backport of 1.9
9+
class Object
10+
def tap
11+
yield self
12+
self
13+
end
14+
end unless Object.respond_to? :tap

lib/congressmen/actor.rb

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
module Congressmen
2+
3+
class Actor
4+
attr_accessor :name
5+
6+
def initialize
7+
end
8+
9+
end
10+
11+
end

lib/congressmen/parser.rb

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
def Congressmen(file, opts = {})
2+
Congressmen.parse(file, opts)
3+
end
4+
5+
module Congressmen
6+
7+
def Congressmen.parse(file, opts = {})
8+
receiver = PDFScanner.new(Congressmen::SessionRecord.new)
9+
pdf = PDF::Reader.file(file, receiver)
10+
receiver
11+
end
12+
13+
end
14+
15+
16+
class PDFScanner
17+
attr_accessor :doc
18+
19+
def initialize(doc)
20+
@doc = doc
21+
end
22+
23+
# Called when page parsing starts
24+
def begin_page(arg = nil)
25+
@current_page = ""
26+
end
27+
28+
def end_page
29+
doc.add_page(@current_page)
30+
puts doc.pages.last.text + "\n\n\n\n\n"
31+
end
32+
# record text that is drawn on the page
33+
def show_text(string, *params)
34+
@current_page << string
35+
end
36+
37+
def end_text_object
38+
@current_page << " "
39+
end
40+
41+
def move_to_start_of_next_line
42+
@current_page << " "
43+
end
44+
def set_character_spacing(string)
45+
@current_page << " "
46+
end
47+
48+
# there's a few text callbacks, so make sure we process them all
49+
alias :super_show_text :show_text
50+
alias :move_to_next_line_and_show_text :show_text
51+
alias :set_spacing_next_line_show_text :show_text
52+
53+
# this final text callback takes slightly different arguments
54+
def show_text_with_positioning(*params)
55+
params = params.first
56+
params.each { |str| show_text(str) if str.kind_of?(String)}
57+
end
58+
end

lib/congressmen/session_record.rb

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
module Congressmen
2+
3+
I_LEGISLATURE = 1979..1982
4+
II_LEGISLATURE = 1982..1986
5+
III_LEGISLATURE = 1986..1989
6+
IV_LEGISLATURE = 1989..1993
7+
V_LEGISLATURE = 1993..1995
8+
VI_LEGISLATURE = 1996..1999
9+
VII_LEGISLATURE = 2000..2003
10+
VIII_LEGISLATURE = 2004..2007
11+
IX_LEGISLATURE = 2008..2011
12+
13+
class SessionRecord
14+
attr_accessor :pages, :actors, :date, :number
15+
16+
def initialize
17+
@pages = []
18+
@actors = []
19+
end
20+
21+
def add_page(text)
22+
Congressmen::SessionRecordPage.new(self, text).tap do |p|
23+
pages << p
24+
puts "Pag. #{p.number} -------------------------"
25+
end
26+
end
27+
28+
end
29+
class SessionRecordPage
30+
attr_accessor :record, :number, :text
31+
32+
def initialize(record, text)
33+
@record = record
34+
@original_text = text
35+
@text = clean_text(text)
36+
analyze
37+
end
38+
private
39+
def clean_text(text)
40+
text.gsub!(/(.)-(.)/,'\1\2')
41+
text.gsub!(/(.)\.(.)/,'\1. \2')
42+
text.squeeze!(" ")
43+
text.strip!
44+
end
45+
def analyze
46+
data = text.scan(/CONGRESO(.*). –NÚM. (.*)— (.*)—/).flatten
47+
if data
48+
#Date.strptime('28/03/2008', '%d/%m/%Y')
49+
50+
record.date ||= data[0]
51+
puts record.date
52+
record.number ||= data[1]
53+
self.number = data[2]
54+
end
55+
end
56+
end
57+
58+
59+
end

test/congressmen_test.rb

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
require 'test_helper'
2+
3+
class CongressmenTest < Test::Unit::TestCase
4+
should "probably rename this file and start testing for real" do
5+
receiver = Congressmen(File.dirname(__FILE__) + "/pdfs/PL_184.pdf")
6+
end
7+
end

0 commit comments

Comments
 (0)