-
Notifications
You must be signed in to change notification settings - Fork 1
/
haikufinder.rb
executable file
·83 lines (76 loc) · 2.35 KB
/
haikufinder.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#! /usr/bin/ruby
# USAGE - can take as argument either a file or a url
# This could be slicker obviously
if ARGV.empty?
puts "Usage: haikufinder [file|url]"
exit
end
if ARGV[0] =~ /^http/
url = ARGV[0]
text = `wget #{url} -O -`
else
text = File.read(ARGV[0])
end
# First we parse our hyphenated dictionary file (modified from wherever I originally found this)
# Entries in this file are split with either a "|" or for hyphenated words a "-"
# We're creating a constant SYLHASH in which we can look up words and determine number of syllables
# Note that some words can be pronounced with multiple numbers of syllables so this isn't perfect
SYLHASH = {}
sylfile = File.open('hyphenated_dict.txt')
sylfile.each_line do |line|
splitline = line.chomp.split(/[\|-]/)
word = splitline.join('')
syllables = splitline.size
SYLHASH[word.downcase] = syllables unless SYLHASH[word.downcase].to_i > 0 and SYLHASH[word.downcase].to_i < syllables
end
# This takes a line/sentence and returns an array with the number of syllables per word
def getsyllist(line)
originalline = line
line = line.gsub(/[\.!?,":;()_]/,'').gsub(/--/,'').gsub(/\s+/,' ').downcase
linewordlist = line.split(/\s+/)
sylcount = 0
if linewordlist.size >= 0
sylcount = 0
syllist = []
linewordlist.each do |word|
word = word.chomp.strip
if SYLHASH[word].to_i > 0
syllist.push(SYLHASH[word].to_i)
sylcount = sylcount + SYLHASH[word].to_i
else
return []
end
end
return syllist
end
end
def get_haiku_list(text)
haikulist = []
wholetext = ''
text.each_line do |line|
wholetext += line.chomp + ' '
end
wholetext = wholetext.gsub(/\s+/,' ')
# Need to split on punctuation but avoid ending prematurely on punctuated honorifics
wholetextlist = wholetext.scan(/[^\.!?]+(?<!Mr|Mrs|Dr)[\.!?]/).map {|s| s.strip}
# wholetextlist now should have entire text parsed into sentences if all has gone well
wholetextlist.each do |line|
originalline = line
syllist = getsyllist(line)
sylcount = 0
syllist.each do |thissyl|
sylcount += thissyl.to_i
end
if sylcount == 17
haikulist.push([originalline, syllist])
end
end
return haikulist
end
haikulist = get_haiku_list(text)
haikulist.each do |thishaiku|
originalline = thishaiku[0]
syllist = thishaiku[1]
puts originalline
puts syllist.join('-')
end