Skip to content

Commit

Permalink
report
Browse files Browse the repository at this point in the history
  • Loading branch information
olafurjohannsson committed Nov 24, 2023
1 parent 834b136 commit 217136d
Showing 1 changed file with 20 additions and 11 deletions.
31 changes: 20 additions & 11 deletions src/generate_classification_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.metrics import classification_report
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split
from pprint import pprint
from transformers import AutoTokenizer, AutoModelForSequenceClassification
Expand Down Expand Up @@ -115,7 +115,8 @@ def generate_report(self) -> (str | dict):
y_true.extend(labels.tolist())
y_pred.extend(prediction.indices.tolist())
report = classification_report(y_true, y_pred, output_dict=True)
return report
acc = accuracy_score(y_true, y_pred)
return acc


class DataFrameLoader():
Expand Down Expand Up @@ -144,6 +145,22 @@ def __init__(self, pdf_src, sample_size=None, random_state=42):

self.X_test = X_test
self.y_test = y_test

def call_model(X_all, y_all, folder, device):
model = AutoModelForSequenceClassification.from_pretrained(folder)
model.to(device)
tokenizer = AutoTokenizer.from_pretrained(folder)
report = RoBERTaClassificationReport(model, tokenizer, X_all, y_all, device)
return report.generate_report()

def generate_report(filename, folder, device):
print("*"*50)
print("Loading model from folder {} using file {}".format(folder, filename))
dfl = DataFrameLoader(filename)

pprint(call_model(dfl.X_all, dfl.y_all, folder, device))
print("*"*50)

def eval_files():
import gc
gc.collect()
Expand Down Expand Up @@ -172,15 +189,7 @@ def eval_files():
for d in data:
folder = d['folder']
filename = d['filename']
print("*"*50)
print("Loading model from folder {} using file {}".format(folder, filename))
dfl = DataFrameLoader(filename)
model = AutoModelForSequenceClassification.from_pretrained(folder)
model.to(device)
tokenizer = AutoTokenizer.from_pretrained(folder)
report = RoBERTaClassificationReport(model, tokenizer, dfl.X_all, dfl.y_all, device)
pprint(report.generate_report())
print("*"*50)
generate_report(filename, folder, device)

if __name__ == '__main__':
eval_files()
Expand Down

0 comments on commit 217136d

Please sign in to comment.