-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
130 lines (112 loc) · 3.51 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
'''
This version uses a Markov Chain to model the text using bigrams.
After scanning a corpus of song lyrics,
the lyrics are tokenized and entered into memory as
a 'bag of words'. ex// [(('in', 'the'), 1413)]
When generating the next possible word, the most frequently
occuring bigram is chosen. In case of a tie, a random bigram
from among the most probabilistic next words is chosen
inspired by: http://www.samansari.info/2016/01/generating-sentences-with-markov-chains.html
'''
import sys, random
import argparse
import nltk
import re
import operator
import pronouncing
import config #contains genius api key
import pickle
# https://stackoverflow.com/questions/3679694/a-weighted-version-of-random-choice
# weighted version of random.choice
def weighted_choice(choices):
# gets total count of all choices
total = sum(c[1]['frequency'] for c in choices)
# create uniform distribution (same probability)
r = random.uniform(0, total)
upto = 0
for c in choices:
if upto + c[1]['frequency'] > r:
# return ngram tuple
return c[0]
upto += c[1]['frequency']
'''
:return: generated line based on last rhyming word
Generates a line given a rhyming word, works backwords through ngram freq
'''
def generate_line_backward(word, gram):
line = []
for __ in range(8):
line.insert(0, word)
choices = [element for element in gram if element[0][1] == word]
if not choices:
break
word = weighted_choice(choices)[0]
return line
'''
:return: generated line based on given starting word
Generates a line given a starting word, based on ngram frequencies
Example of choices:
If first word is 'up', then choices are (('up', 'i'), 3), (('up', 'you'), 2)
'''
def generate_line_forward(word, gram):
line = []
for __ in range(8):
line.append(word)
choices = [element for element in gram if element[0][0] == word]
if not choices:
break
word = weighted_choice(choices)[1]
return line
def generate_lyrics(num_lines, gram):
lines = []
line_len = 0
for i in range(num_lines):
# generate first line (no rhyming)
if i % 2 == 0:
while True:
starting_word = random.choice(gram[0][0])
line = generate_line_forward(starting_word, gram)
if len(line) == 8:
lines.append(line)
break
# generate rhyming line
# if no rhyming line found, recreate first line (no rhyming)
else:
rhyming_word = lines[-1][-1]
possible_rhymes = pronouncing.rhymes(rhyming_word)
while True:
if len(possible_rhymes) == 0:
return []
rhyme = random.choice(possible_rhymes)
rhyme_tag = nltk.pos_tag(rhyme)
# remove particle, determine, and conjunction/coordinates
bad_tags = ['RP', 'DT', 'CC']
if rhyme_tag in bad_tags:
possible_rhymes.remove(rhyme)
line = generate_line_backward(rhyme, gram)
if len(line) < 8:
possible_rhymes.remove(rhyme)
else:
lines.append(line)
break
return lines
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("num_lines", help="number of lines the program generates", type=int)
parser.add_argument("pickle_file", help="pickle file to read from", type=str)
args = parser.parse_args()
num_lines = args.num_lines # number of lines the program generates
pickle_in = open(args.pickle_file, "rb")
gram = pickle.load(pickle_in)
lines = generate_lyrics(num_lines, gram)
while len(lines) < num_lines:
lines = generate_lyrics(num_lines, gram)
# print generated lyrics
for line in lines:
counter = 0
for l in line:
if counter == 7:
print(l, end=',\n')
else:
print(l, end=' ')
counter += 1