-
Notifications
You must be signed in to change notification settings - Fork 0
/
rdf.py
110 lines (100 loc) · 3.28 KB
/
rdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
"""
Reality Distortion Field.
A python module to distort a sentence as it would be spoken by
a certain sales executive.
Copyright (C) 2014. Akshay Dewan <[email protected]>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import nltk
from random import choice
from random import random
#Before past tense verb:
vbd = ["kind of", "smartly", "already"]
vbd_ignore = []
vbd_prob = 0.75
#Before past participle verb:
vbn = ["kind of", "smartly", "already"]
vbn_ignore = []
vbn_prob = 0.30
#Before gerund verb:
vbg = ["kind of", "smartly"]
vbg_ignore = []
vbg_prob = 0.6
#Before present tense verb:
vbp = ["smartly"]
vbp_ignore = ["are"]
vbp_prob = 0.75
#Before present tense 3rd person singular
vbz = ["smartly", "kind of"]
vbz_ignore = ["is", "'s"]
vbz_prob = 0.75
#Before superlative adjective
jjs = ["kind of"]
jjs_ignore = []
jjs_prob = 0.5
#Before noun
nn = ["smart", "fancy", "tremendous"]
nn_ignore = []
nn_prob = 0.30
#
#vbd_after = ["in production"]
def make_rdf(text):
tokens = nltk.word_tokenize(text)
pos_tags = nltk.pos_tag(tokens)
index = 0
fluff_list = []
for t in pos_tags:
pos = t[1]
token = t[0]
#print("Found POS: " + token + "->" + pos)
fluff = ''
if pos == "VBD":
fluff = prepend_random(token, vbd, vbd_ignore, vbd_prob)
elif pos == "VBG":
fluff = prepend_random(token, vbg, vbg_ignore, vbg_prob)
elif pos == "VBP":
fluff = prepend_random(token, vbp, vbp_ignore, vbp_prob)
elif pos == "VBZ":
fluff = prepend_random(token, vbz, vbz_ignore, vbz_prob)
elif pos == "JJS":
fluff = prepend_random(token, jjs, jjs_ignore, jjs_prob)
elif pos[:2] == "NN":
fluff = prepend_random(token, nn, nn_ignore, nn_prob)
if(len(fluff) > 0):
fluff_list.append((fluff, index))
index = index + 1
return combine(fluff_list, tokens)
#
# Chooses a random word based on the probability and ignore_list.
# Returns an empty string if no word is to be added
#
def prepend_random(token, word_list, ignore_list, probability):
if token in ignore_list:
return ''
rand = random()
if(0 <= rand <= probability):
return choice(word_list)
else:
return ''
def combine(fluff_list, tokens):
offset = 0
for fluff in fluff_list:
pos = fluff[1] + offset
tokens.insert(pos, fluff[0])
offset = offset + 1
return ' '.join(tokens)
def main():
print "Start"
text = raw_input("Enter a string: ")
print "===RDF form==="
print make_rdf(text)
if __name__ == "__main__":main()