Skip to content

Commit 1d2f072

Browse files
committed
update utilities
1 parent 7c6908e commit 1d2f072

File tree

3 files changed

+60
-0
lines changed

3 files changed

+60
-0
lines changed

clean.cmd

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,10 @@ del C:\python\autoindex\sentiments\*.* /Q
77
del C:\python\autoindex\extract\*.* /Q
88
del C:\python\autoindex\kvextract\*.* /Q
99
del C:\python\autoindex\category\*.* /Q
10+
del C:\python\autoindex\FL_sentiment\*.* /Q
11+
del C:\python\autoindex\HF_sentiment\*.* /Q
12+
del C:\python\autoindex\NER_Extraction\*.* /Q
13+
del C:\python\autoindex\Rake_Extraction\*.* /Q
14+
del C:\python\autoindex\relation_extraction\*.* /Q
15+
del C:\python\autoindex\topic_modeling\*.* /Q
16+
del C:\python\autoindex\VADER_sentiment\*.* /Q

javakill.cmd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
taskkill /F /IM java.exe

relation_extraction.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import os
2+
import stanza
3+
from stanza.server import CoreNLPClient # Import the client module
4+
from stanza.server.client import PermanentlyFailedException
5+
6+
7+
# Download the Stanford CoreNLP package and English models
8+
stanza.install_corenlp()
9+
stanza.download_corenlp_models(model='english-kbp', version='4.2.0')
10+
11+
# Create a client object that uses the CoreNLP server and includes the kbp processor
12+
client = CoreNLPClient(annotators="tokenize,pos,lemma,depparse,sentiment,ner,kbp".split(), kbp_model_name='english-kbp', timeout=600000, memory='6G', use_gpu=True, pos_batch_size=16)
13+
14+
15+
# Create the output directory if it doesn't exist
16+
os.makedirs("C:\\python\\autoindex\\relation_extraction\\", exist_ok=True)
17+
18+
# Iterate through the txt files in C:\python\autoindex\txt_output.
19+
for filename in os.listdir("C:\\python\\autoindex\\txt_output"):
20+
# Read the file.
21+
with open(f"C:\\python\\autoindex\\txt_output\\{filename}", "r") as f:
22+
document = f.read()
23+
24+
# Try to annotate the document with the client
25+
try:
26+
doc = client.annotate(document)
27+
except PermanentlyFailedException as e:
28+
# If the server cannot start because of port conflict, try to stop the previous server and start a new one
29+
if "unable to start the CoreNLP server on port" in str(e):
30+
print("Trying to stop the previous server and start a new one...")
31+
client.stop()
32+
client.start()
33+
doc = client.annotate(document)
34+
else:
35+
# If the error is not related to port conflict, raise it
36+
raise e
37+
38+
# Extract the relations from the document.
39+
relations = []
40+
for sentence in doc.sentence:
41+
relations.extend(sentence.relation)
42+
43+
# Split the filename and the extension
44+
filename, extension = os.path.splitext(filename)
45+
46+
# Write the relations to a file using the filename without the extension
47+
with open(f"C:\\python\\autoindex\\relation_extraction\\{filename}_relations.txt", "w") as f:
48+
for relation in relations:
49+
f.write(str(relation) + "\n")
50+
51+
# Close the client when done
52+
client.close()

0 commit comments

Comments
 (0)