-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathVerbPreferences.py
78 lines (72 loc) · 3.15 KB
/
VerbPreferences.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import xml.etree.ElementTree as Et
import os
from utility import deaccent
def proieltbs(treebank):
froot = treebank.getroot()
author = 'unknown'
title = 'unknown'
for source in froot:
for division in source:
if division.tag == 'title':
title = division.text
if division.tag == 'author':
author = division.text
for sentence in division:
alltokesinsent = sentence.findall(".*[@form]")
for token in alltokesinsent:
subject = 'ellipsed'
en = 'ellipsed'
prepobj = 'ellipsed'
if deaccent(token.get('lemma')) == 'περισσευω' and token.get('morphology')[4] == 'a':
verbid = token.get('id')
for word in alltokesinsent:
if word.get('head-id') == verbid:
if word.get('relation') == 'sub':
subject = word.get('form')
if word.get('lemma') == 'ἐν':
en = 'ἐν'
enid = word.get('id')
for preobj in alltokesinsent:
if preobj.get('head-id') == enid:
prepobj = preobj.get('form')
print(author, ":", title, subject, token.get('form'), en, prepobj)
return
def perseustbs(treebank):
froot = treebank.getroot()
author = froot.find(".//author")
author = author.text
title = froot.find(".//title")
title = title.text
for body in froot:
for sentence in body:
mainverb = 'ellipsed'
alltokesinsent = sentence.findall(".*[@form]")
for verb in alltokesinsent:
subject = 'ellipsed'
en = 'ellipsed'
prepobj = 'ellipsed'
if deaccent(verb.get('lemma')) == 'περισσευω' and verb.get('postag')[5] == 'a':
verbid = verb.get('id')
for word in alltokesinsent:
if word.get('head') == verbid:
if word.get('relation') == 'sub':
subject = word.get('form')
if word.get('lemma') == 'ἐν':
en = 'ἐν'
enid = word.get('id')
for preobj in alltokesinsent:
if preobj.get('head-id') == enid:
prepobj = preobj.get('form')
print(author, ":", title, subject, verb.get('form'), en, prepobj)
return
os.chdir('/home/chris/Desktop/Treebanks')
indir = os.listdir('/home/chris/Desktop/Treebanks')
for file_name in indir:
if not file_name == 'README.md' and not file_name == '.git':
tb = Et.parse(file_name)
tbroot = tb.getroot()
# print(file_name)
if tbroot.tag == 'proiel':
proieltbs(tb)
else:
perseustbs(tb)