forked from quadrismegistus/prosodic
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.py
244 lines (227 loc) · 6.78 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
#encoding=utf-8
############################################
# [config.py]
# CONFIGURATION SETTINGS FOR PROSODIC
#
# Here you may change the runtime settings for prosodic.
# For more help on this file, please see the README in this folder,
# or visit it online: <https://github.com/quadrismegistus/prosodic>.
# If you have any questions, please email Ryan <[email protected]>.
#
############################################
############################################
# METRICAL PARSING
#
# Set the Meter ID: the filename to its configuration file
# in the "meters" subdirectory, e.g. "kiparskyhanson_shakespeare"
# (omit the .py from the filename).
#
# meter = 'kiparskyhanson_shakespeare'
#
# If no Meter ID is provided, PROSODIC will ask you to set the meter
# in its interactive mode. As a python module, you will have to
# create the meter first and pass it to the Text object to parse.
#
meter = 'meter_arto'
############################################
############################################
# SELECT THE LANGUAGE
#
# Select the language that will be used in PROSODIC,
# when typing text directly or loading text.
#
# All text is English:
lang='en'
#
# All text is Finnish:
#lang='fi'
#
# Detect language from first two characters of filename:
# e.g. "en.[filename].txt" is English, "fi.[filename].txt" is Finnish
#lang='**'
############################################
############################################
# CONFIGURE TEXT-TO-SPEECH ENGINE (for English)
#
# To parse unknown English words, you'll need a TTS engine installed.
# For instructions, please see the README.
#
# Use espeak for TTS (recommended):
# [Note: syllabification done with CMU Syllabifier]
en_TTS_ENGINE = 'espeak'
#
# Use OpenMary for TTS:
#en_TTS_ENGINE = 'openmary'
#
# Do not use TTS:
# [Lines with unknown words will be skipped during metrical parsing]
#en_TTS_ENGINE = 'none'
#
# Cache results of TTS for an unknown word so it's not necessary
# to use TTS for that word again [Change to 0 to be false]
en_TTS_cache = 1
############################################
############################################
# CONFIGURE METRICALTREE
#
# Parse text using metrical tree? (Only for English).
parse_using_metrical_tree = False
############################################
############################################
# OPTIONS ABOUT PRINTING TO SCREEN
#
# Print loaded words, parses, etc. to screen:
#print_to_screen=True
#
# Do not print loaded words, parses, etc. to screen:
# Although hiden, you may still save any output to disk
# using the /save command.
print_to_screen=False
#
# The default length for the line used by printing
linelen=60
############################################
############################################
# OPTIONS ABOUT LINES
#
######
# [Line SIZE]
#
# The maximum size of the line to parse:
# [others will be skipped during parsing]
# [PROSODIC can parse lines of up to approximately 20 syllables
# before the number of possibilities become too large,
# slowing the algorithm down to a halt.]
line_maxsylls=60
#
# The minimum size of the line to parse:
# [useful if lines are determined by punctuation,
# because sometimes they can be very very short
# and so pointless for metrical parsing.]
#line_minsylls=9
#
# Alternatively, after how many seconds should Prosodic give up
# when trying to parse a (long or ambiguous) line?
parse_maxsec = 30
#
#
######
# [Line DIVISIONS]
#
# Here you may decide how texts divide into lines.
# This is significant only because the line,
# with its words and syllables, is the unit passed
# to the metrical parser for parsing.
#
# Linebreaks occur only at actual linebreaks in the
# processed text file (good for metrical poetry):
linebreak='line'
#
# Linebreaks occur only upon encountering any of these
# punctuation marks (good for prose):
#linebreak=',;:.?!()[]{}<>'
#
# Linebreaks occur both at linebreaks in the text,
# *and* at any of these punctuation marks (good for
# prose and free-verse poetry):
#linebreak='line,;:.?!()[]{}<>'
#
#
######
# [MISCELLANEOUS line options]
#
# Headedness [optional]
# If there are multiple parses tied for the lowest score,
# break the tie by preferring lines that begin with this pattern:
line_headedness='ws'
#line_headedness='sw'
#line_headedness='wws'
#line_headedness='ssw'
############################################
############################################
# OPTIONS ABOUT WORDS
#
######
# [Tokenization]
#
# How are lines of text split into words? Define the regular
# expression that is applied to a string of text in order
# to split it into a list of words.
#
# Words are tokenized against [^] white-spaces [\s+] and hyphens [-]
#tokenizer='[^\s+-]+'
#
# Words are tokenized against [^] white-spaces [\s+]
tokenizer='[^\s+]+'
#
######
# [Resolving stress ambiguity]
#
# Some words are multiple stress profiles: ambiguous polysyllabic
# words, and also ambiguous monosyllabic words. Words in the
# "maybestressed.txt" file of a language folder (e.g. dicts/en)
# will be given two stress profiles, one stressed and the other
# unstressed. The CMU also has multiple stress profiles for words.
#
# Allow the metrical parser to parse all stress profiles for all
# words in the line, thus choosing the stress profile for each
# word that best fit the metrical parse:
resolve_optionality=1
#resolve_optionality=0
#
#
######
# [ELISIONS of Syllables: English only]
#
# Some syllables are elided in English verse, e.g.
# e.g. sweet as love, which overflows her bower
# --> with|MU|sic|SWEET|as|LOVE|which|OV|er|FLOWS|her|BOW'R
# or e.g. scattering unbeholden
# --> SCAT|tring|UN|be|HOLD|en
#
# Add pronunciations for words that could have elided syllables:
add_elided_pronunciations=1
#add_elided_pronunciations=0
#
#
######
# [Output formatting]
#
# Here you may change the format under which the syllabified,
# phonetic output will appear. The options are:
# - ipa
# - cmu (the formatting used in the CMU Pronunciation Dictionary)
# - orth (the orthography itself [good for Finnish])
#
# The default phonetic output for all languages:
output='ipa'
#
# The phonetic output for English:
output_en='ipa'
#
# The phonetic output for Finnish:
output_fi='orth' # since finnish pronunciation is essentially identical to its orthography
############################################
############################################
# PATHS USED BY PROSODIC
#
# If these are relative paths (no leading /),
# they are defined from the point of view of
# the root directory of PROSODIC.
#
# Folder used as the folder of corpora:
# [it should contain folders, each of which contains text files]
folder_corpora='corpora/'
#
# Folder to store results within (statistics, etc)
folder_results='results/'
#
# Folder in which tagged samples (hand-parsed lines) are stored:
folder_tagged_samples = 'tagged_samples/'
############################################
####
# MEMORY DECISIONS
#
num_bounded_parses_to_store = 100
#
###