-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathHaikuBook.py
185 lines (133 loc) · 3.84 KB
/
HaikuBook.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# -*- coding: utf-8 -*-
import re
import sys
from random import sample
import random
import syllables_en
from sentences import split_into_sentences
import time
import argparse
#Function to count syllables
def CountSyllables(word, isName=True):
return syllables_en.count(word)
def GenSentence(rword, slist, att):
count = 0
while count <= att:
s1 = sample(slist, 1)
s2 = ''.join(s1)
#s3 = s2 # version of the sentence for use in deletion from sentences vector
s2 = s2.split()
#print("Attempt ", count)
if rword in s2:
return s1
break
if count == att:
return 'fail'
break
count += 1
#Process arguments
parser = argparse.ArgumentParser()
parser.add_argument('-r', '--random', help='Create Haiku based on random word', action="store_true")
parser.add_argument('-s', '--specific', help='Create Haiku based on specific word', action="store")
args = parser.parse_args()
# Start main program
attempts = 4000 # attempts at finding sentence with given word
print("Haiku generator\n")
#print("Input word or type 'r': ")
#user_input = input()
start = time.time()
if args.random:
cword = random.choice(open('20k.txt').readlines())[:-1]
word_is_random = True
else:
cword = args.specific
word_is_random = False
print("Specific word chosen:", args.specific)
#print("Word selected: ", word, "\n")
#sentences = []
fives = []
sevens = []
print("Extracting and sorting sentences.")
with open('40b.txt') as f:
sentences = split_into_sentences(f.read())
for sentence in sentences:
s = ''.join(sentence) #turn sentence into string
if CountSyllables(sentence) == 5:
fives.append(sentence)
if CountSyllables(sentence) == 7:
sevens.append(sentence)
#print("Number of sentences with five syllables: ", len(fives))
#print("Number of sentences with seven syllables: ", len(sevens))
trials = 1
count = 1
sen1 = 'fail'
sen2 = 'fail'
sen3 = 'fail'
# Generate 3 sentences
print("Generating Haiku.")
while sen1 == 'fail' or sen2 == 'fail' or sen3 == 'fail':
#print("Attempting 5 syl with",cword)
sen1 = GenSentence(cword, fives, attempts)
#print(sen1)
#print("Attempting 7 syl with",cword)
sen2 = GenSentence(cword, sevens, attempts)
#print(sen2)
#print("Attempting 5 syl with",cword)
sen3 = GenSentence(cword, fives, attempts)
#print(sen3)
trials += 1
if sen1 == sen2 or sen1 == sen3 or sen2 == sen3:
sen1 = 'fail'
sen2 = 'fail'
sen3 = 'fail'
prevword = cword
if word_is_random:
#print("Moving to next word")
cword = random.choice(open('20k.txt').readlines())[:-1]
if word_is_random == False and trials == 1000: #let's make sure it gives up eventually
print("Giving up on", cword)
sys.exit()
print("Sanitising text.")
#Convert and clean sentences
sen1 = ''.join(sen1)
sen2 = ''.join(sen2)
sen3 = ''.join(sen3)
for i in range(len(sen1)):
if sen1[i].isalpha(): #True if its a letter
pos = i #first letter position
break
sen1 = sen1[pos:]
for i in range(len(sen2)):
if sen2[i].isalpha(): #True if its a letter
pos = i #first letter position
break
sen2 = sen2[pos:]
for i in range(len(sen3)):
if sen3[i].isalpha(): #True if its a letter
pos = i #first letter position
break
sen3 = sen3[pos:]
for ch in ['\"', '[', ']', '*', '_', '-']:
if ch in sen1:
sen1 = sen1.replace(ch,"")
for ch in ['\"', '[', ']', '*', '_', '-']:
if ch in sen2:
sen2 = sen2.replace(ch,"")
for ch in ['\"', '[', ']', '*', '_', '-']:
if ch in sen3:
sen3 = sen3.replace(ch,"")
sen1.strip()
sen2.strip()
sen3.strip()
sen1.capitalize()
sen2.capitalize()
sen3.capitalize()
final = sen1 + '\n' + sen2 + '\n' + sen3
end = time.time()
#Print results
print("\nHaiku completed using seed word:", prevword, "after", trials, "trials and", round(end-start), "seconds.\n")
print(final)
#print(sen1)
#print(sen2)
#print(sen3)
print("\n")