Skip to content

Commit

Permalink
pos given lid resulted in 0.0 performance, something wrong with groun…
Browse files Browse the repository at this point in the history
…dtruth
  • Loading branch information
ctarnold committed Apr 25, 2024
1 parent d5d5172 commit 1f42c0e
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions miamiCorpusPOS.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)

out_dir = '/scratch/gpfs/ca2992/jpLLM/jpLLM/pos_cond_lid_out'
out_dir = '/scratch/gpfs/ca2992/jpLLM/jpLLM/pos_list_lid_out'
data_dir = '/scratch/gpfs/ca2992/jpLLM/bangor/crowdsourced_bangor'

pos_model = pipeline('ner', model=model, tokenizer=tokenizer)
Expand Down Expand Up @@ -49,7 +49,7 @@ def tokenToWordPred(message, trueWords, lid):
# to the pos word level predictions
pos = posResult[index].get('entity')
# have the pos and the true lid to get pos given lid stats
pos_pred.append([pos + " " + lid])
pos_pred.append([pos, lid])
# if token word mismatch impossible to handle
if (word != posToken and word[0] != posToken[0]):
print("MISMATCH", word, posToken)
Expand Down Expand Up @@ -97,7 +97,7 @@ def tokenToWordPred(message, trueWords, lid):
# if it is not a contraction, use the truth tag
message = message + " " + word
words.append(word)
pos_truth.append([pos + " " + lid])
pos_truth.append([pos, lid])
lid_truth.append([lid])
# at the end of each sentence, pass into the model
if (word == '.'):
Expand Down

0 comments on commit 1f42c0e

Please sign in to comment.