Skip to content

Commit

Permalink
Add first version of expected calibration error in evaluations
Browse files Browse the repository at this point in the history
  • Loading branch information
adelmemariani committed Jan 22, 2025
1 parent 01619c2 commit c1d955c
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 14 deletions.
12 changes: 11 additions & 1 deletion dicee/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,16 @@ def evaluate_lp_k_vs_all(self, model, triple_idx, info=None, form_of_labelling=N
predictions[j, id_e_target] = target_value
# (5) Sort predictions.
sort_values, sort_idxs = torch.sort(predictions, dim=1, descending=True)

#probabilities = torch.sigmoid(sort_values)
#non_zero_probs = probabilities[probabilities != 0.0]
#one_minus_non_zero_probs = 1 - non_zero_probs

non_zero_probs = sort_values[sort_values != 0.0]
probabilities = torch.sigmoid(non_zero_probs)
one_minus_non_zero_probs = 1 - probabilities

ECE = one_minus_non_zero_probs.sum().item()
# (6) Compute the filtered ranks.
for j in range(data_batch.shape[0]):
# index between 0 and \inf
Expand All @@ -327,7 +337,7 @@ def evaluate_lp_k_vs_all(self, model, triple_idx, info=None, form_of_labelling=N
hit_10 = sum(hits[10]) / num_triples
mean_reciprocal_rank = np.mean(1. / np.array(ranks))

results = {'H@1': hit_1, 'H@3': hit_3, 'H@10': hit_10, 'MRR': mean_reciprocal_rank}
results = {'H@1': hit_1, 'H@3': hit_3, 'H@10': hit_10, 'MRR': mean_reciprocal_rank, 'ECE': ECE }
if info and self.during_training is False:
print(info)
print(results)
Expand Down
30 changes: 17 additions & 13 deletions dicee/scripts/bayesian_optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ def objective(trial, model, dataset, loss):
embedding_dim = 32 #trial.suggest_categorical("embedding_dim", [32, 64])
optimizer = "Adam" #trial.suggest_categorical("optimizer", ["Adam", "Adopt"])
batch_size = 1024 #trial.suggest_categorical("batch_size", [512, 1024])
learning_rate = trial.suggest_float("learning_rate", 0.01, 0.1)
learning_rate = 0.1 #trial.suggest_float("learning_rate", 0.01, 0.1)

label_relaxation_alpha = trial.suggest_float("label_relaxation_alpha", 0.01, 0.1,) if loss == "LRLoss" else 0.0
label_smoothing_rate = trial.suggest_float("label_smoothing_rate", 0.01, 0.1) if loss == "LS" else 0.0
label_relaxation_alpha = trial.suggest_float("label_relaxation_alpha", 0.01, 0.2,) if loss == "LRLoss" else 0.0
label_smoothing_rate = trial.suggest_float("label_smoothing_rate", 0.01, 0.2) if loss == "LS" else 0.0

parser.add_argument('--loss_fn', type=str, default=loss)
parser.add_argument("--label_smoothing_rate", type=float, default=label_smoothing_rate)
Expand Down Expand Up @@ -71,14 +71,14 @@ def objective(trial, model, dataset, loss):

# set according to your environment TODO: make it as a parameter
main_math = "../../../KGs/Datasets_Perturbed/"
report_folder_name = "./bo_outputs/"
report_folder_name = "./bo_outputs/1024/"
report_file_name = "bayesian_optimization_report.txt"

datasets = ["UMLS", "KINSHIP", "NELL-995-h100", "WN18RR", "FB15k-237"]
models = ["Keci", "Pykeen_MuRE", "QMult", "Pykeen_DistMult", "Pykeen_ComplEx", "Pykeen_RotatE", "Pykeen_BoxE"]
losses = ["LRLoss", "LS"]
models = ["Keci", "Pykeen_MuRE", "QMult", "Pykeen_DistMult", "Pykeen_ComplEx"] #, "Pykeen_RotatE", "Pykeen_BoxE"
losses = ["LRLoss", "LS", "BCELoss"]

number_of_runs = 30
number_of_runs = 50

for dataset in datasets:
for model in models:
Expand All @@ -94,6 +94,7 @@ def objective(trial, model, dataset, loss):

os.makedirs(os.path.dirname(report_folder_name), exist_ok=True)


fig1 = plot_parallel_coordinate(study)
fig1.write_image(report_folder_name + f"parallel_coordinate-{dataset}-{model}-{loss}"+ ".png")

Expand All @@ -102,18 +103,21 @@ def objective(trial, model, dataset, loss):

fig4 = plot_optimization_history(study)
fig4.write_image(report_folder_name + f"plot_optimization_history-{dataset}-{model}-{loss}" + ".png")

"""
if loss == "LRLoss":
fig2 = plot_contour(study, params=["label_relaxation_alpha", "learning_rate"])
fig2.write_image(report_folder_name + f"contour-{dataset}-{model}-{loss}" + ".png")
if loss == "LS":
fig2 = plot_contour(study, params=["label_smoothing_rate", "learning_rate"])
fig2.write_image(report_folder_name + f"contour-{dataset}-{model}-{loss}" + ".png")

"""

with open(report_folder_name + report_file_name, "a") as file:
file.write(f"Value: {best_trial.value}, Params: {best_trial.params}, Dataset: {dataset}, Model: {model}, Loss: {loss} \n")


0
file.write(f"Value: {best_trial.value}, "
f"Params: {best_trial.params}, "
f"Dataset: {dataset}, "
f"Model: {model}, "
f"Loss: {loss} "
f"\n")

0 comments on commit c1d955c

Please sign in to comment.