|
| 1 | +import os |
| 2 | +import stanza |
| 3 | +from stanza.server import CoreNLPClient # Import the client module |
| 4 | +from stanza.server.client import PermanentlyFailedException |
| 5 | + |
| 6 | + |
| 7 | +# Download the Stanford CoreNLP package and English models |
| 8 | +stanza.install_corenlp() |
| 9 | +stanza.download_corenlp_models(model='english-kbp', version='4.2.0') |
| 10 | + |
| 11 | +# Create a client object that uses the CoreNLP server and includes the kbp processor |
| 12 | +client = CoreNLPClient(annotators="tokenize,pos,lemma,depparse,sentiment,ner,kbp".split(), kbp_model_name='english-kbp', timeout=600000, memory='6G', use_gpu=True, pos_batch_size=16) |
| 13 | + |
| 14 | + |
| 15 | +# Create the output directory if it doesn't exist |
| 16 | +os.makedirs("C:\\python\\autoindex\\relation_extraction\\", exist_ok=True) |
| 17 | + |
| 18 | +# Iterate through the txt files in C:\python\autoindex\txt_output. |
| 19 | +for filename in os.listdir("C:\\python\\autoindex\\txt_output"): |
| 20 | + # Read the file. |
| 21 | + with open(f"C:\\python\\autoindex\\txt_output\\{filename}", "r") as f: |
| 22 | + document = f.read() |
| 23 | + |
| 24 | + # Try to annotate the document with the client |
| 25 | + try: |
| 26 | + doc = client.annotate(document) |
| 27 | + except PermanentlyFailedException as e: |
| 28 | + # If the server cannot start because of port conflict, try to stop the previous server and start a new one |
| 29 | + if "unable to start the CoreNLP server on port" in str(e): |
| 30 | + print("Trying to stop the previous server and start a new one...") |
| 31 | + client.stop() |
| 32 | + client.start() |
| 33 | + doc = client.annotate(document) |
| 34 | + else: |
| 35 | + # If the error is not related to port conflict, raise it |
| 36 | + raise e |
| 37 | + |
| 38 | + # Extract the relations from the document. |
| 39 | + relations = [] |
| 40 | + for sentence in doc.sentence: |
| 41 | + relations.extend(sentence.relation) |
| 42 | + |
| 43 | + # Split the filename and the extension |
| 44 | + filename, extension = os.path.splitext(filename) |
| 45 | + |
| 46 | + # Write the relations to a file using the filename without the extension |
| 47 | + with open(f"C:\\python\\autoindex\\relation_extraction\\{filename}_relations.txt", "w") as f: |
| 48 | + for relation in relations: |
| 49 | + f.write(str(relation) + "\n") |
| 50 | + |
| 51 | +# Close the client when done |
| 52 | +client.close() |
0 commit comments