Skip to content

Commit

Permalink
Merge pull request #73 from sbintuitions/leaderboard/rosetta-glucosev2
Browse files Browse the repository at this point in the history
[Leaderboard] Add `pkshatech/RoSEtta-base-ja` and `pkshatech/GLuCoSE-base-ja-v2`
  • Loading branch information
akiFQC authored Sep 11, 2024
2 parents 66a9e7b + 73a304d commit d0a2218
Show file tree
Hide file tree
Showing 3 changed files with 141 additions and 3 deletions.
62 changes: 62 additions & 0 deletions docs/results/pkshatech/GLuCoSE-base-ja-v2/summary.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{
"Classification": {
"amazon_counterfactual_classification": {
"macro_f1": 0.7528271196943096
},
"amazon_review_classification": {
"macro_f1": 0.5561679575066396
},
"massive_intent_classification": {
"macro_f1": 0.8058990735631814
},
"massive_scenario_classification": {
"macro_f1": 0.8729457394926279
}
},
"Reranking": {
"esci": {
"ndcg@10": 0.9289703513027785
}
},
"Retrieval": {
"jagovfaqs_22k": {
"ndcg@10": 0.6842208748694516
},
"jaqket": {
"ndcg@10": 0.666162910609933
},
"mrtydi": {
"ndcg@10": 0.3679312414893066
},
"nlp_journal_abs_intro": {
"ndcg@10": 0.8961561684616985
},
"nlp_journal_title_abs": {
"ndcg@10": 0.9465973412523236
},
"nlp_journal_title_intro": {
"ndcg@10": 0.7514787290834406
}
},
"STS": {
"jsick": {
"spearman": 0.8499279029619572
},
"jsts": {
"spearman": 0.8150603412605322
}
},
"Clustering": {
"livedoor_news": {
"v_measure_score": 0.5165568486237136
},
"mewsc16": {
"v_measure_score": 0.4970285237567235
}
},
"PairClassification": {
"paws_x_ja": {
"binary_f1": 0.6239830208701804
}
}
}
62 changes: 62 additions & 0 deletions docs/results/pkshatech/RoSEtta-base-ja/summary.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{
"Classification": {
"amazon_counterfactual_classification": {
"macro_f1": 0.7006688790331752
},
"amazon_review_classification": {
"macro_f1": 0.5299983831023539
},
"massive_intent_classification": {
"macro_f1": 0.7952268533717546
},
"massive_scenario_classification": {
"macro_f1": 0.869707847800633
}
},
"Reranking": {
"esci": {
"ndcg@10": 0.9267539503767978
}
},
"Retrieval": {
"jagovfaqs_22k": {
"ndcg@10": 0.6379929234552755
},
"jaqket": {
"ndcg@10": 0.6533570255483011
},
"mrtydi": {
"ndcg@10": 0.3407337609040446
},
"nlp_journal_abs_intro": {
"ndcg@10": 0.9577227924391506
},
"nlp_journal_title_abs": {
"ndcg@10": 0.9282272189004226
},
"nlp_journal_title_intro": {
"ndcg@10": 0.7938878816204916
}
},
"STS": {
"jsick": {
"spearman": 0.8302539464008364
},
"jsts": {
"spearman": 0.7961383132420531
}
},
"Clustering": {
"livedoor_news": {
"v_measure_score": 0.5503116157834466
},
"mewsc16": {
"v_measure_score": 0.389105324755125
}
},
"PairClassification": {
"paws_x_ja": {
"binary_f1": 0.6218727662616155
}
}
}
20 changes: 17 additions & 3 deletions leaderboard.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ The summary shows the average scores within each task.
| OpenAI/text-embedding-3-large | **73.97** | **74.48** | 82.52 | **77.58** | **93.58** | 53.32 | 62.35 |
| cl-nagoya/ruri-large | 73.45 | 73.02 | 83.13 | 77.43 | 92.99 | 51.82 | 62.29 |
| cl-nagoya/ruri-base | 72.95 | 69.82 | 82.87 | 75.58 | 92.91 | **54.16** | 62.38 |
| pkshatech/GLuCoSE-base-ja-v2 | 72.63 | 71.88 | **83.25** | 74.70 | 92.90 | 50.68 | 62.40 |
| cl-nagoya/ruri-small | 72.45 | 69.41 | 82.79 | 76.22 | 93.00 | 51.19 | 62.11 |
| intfloat/multilingual-e5-large | 71.65 | 70.98 | 79.70 | 72.89 | 92.96 | 51.24 | 62.15 |
| pkshatech/RoSEtta-base-ja | 71.23 | 71.87 | 81.32 | 72.39 | 92.68 | 46.97 | 62.19 |
| OpenAI/text-embedding-3-small | 70.86 | 66.39 | 79.46 | 73.06 | 92.92 | 51.06 | 62.27 |
| pkshatech/GLuCoSE-base-ja | 70.44 | 59.02 | 78.71 | 76.82 | 91.90 | 49.78 | **66.39** |
| intfloat/multilingual-e5-base | 70.12 | 68.21 | 79.84 | 69.30 | 92.85 | 48.26 | 62.26 |
Expand All @@ -20,7 +22,7 @@ The summary shows the average scores within each task.
| cl-nagoya/sup-simcse-ja-base | 68.56 | 49.64 | 82.05 | 73.47 | 91.83 | 51.79 | 62.57 |
| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 66.89 | 47.38 | 78.99 | 73.13 | 91.30 | 48.25 | 62.27 |
| oshizo/sbert-jsnli-luke-japanese-base-lite | 66.75 | 43.00 | 76.60 | 76.61 | 91.56 | 50.33 | 62.38 |
| cl-nagoya/sup-simcse-ja-large | 66.51 | 37.62 | **83.18** | 73.73 | 91.48 | 50.56 | 62.51 |
| cl-nagoya/sup-simcse-ja-large | 66.51 | 37.62 | 83.18 | 73.73 | 91.48 | 50.56 | 62.51 |
| cl-nagoya/unsup-simcse-ja-large | 66.27 | 40.53 | 80.56 | 74.66 | 90.95 | 48.41 | 62.49 |
| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 66.23 | 46.36 | 77.49 | 73.30 | 91.16 | 46.68 | 62.38 |
| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 65.28 | 40.82 | 78.28 | 73.47 | 90.95 | 45.81 | 62.35 |
Expand All @@ -37,7 +39,9 @@ The summary shows the average scores within each task.
| Model | Avg. | jagovfaqs_22k<br>(ndcg@10) | jaqket<br>(ndcg@10) | mrtydi<br>(ndcg@10) | nlp_journal_abs_intro<br>(ndcg@10) | nlp_journal_title_abs<br>(ndcg@10) | nlp_journal_title_intro<br>(ndcg@10) |
|:----------------------------------------------|:----------|:-----------------------------|:----------------------|:----------------------|:-------------------------------------|:-------------------------------------|:---------------------------------------|
| OpenAI/text-embedding-3-large | **74.48** | 72.41 | 48.21 | 34.88 | **99.33** | 96.55 | **95.47** |
| cl-nagoya/ruri-large | 73.02 | **76.68** | **61.74** | 38.03 | 87.12 | 96.58 | 77.97 |
| cl-nagoya/ruri-large | 73.02 | **76.68** | 61.74 | 38.03 | 87.12 | 96.58 | 77.97 |
| pkshatech/GLuCoSE-base-ja-v2 | 71.88 | 68.42 | **66.62** | 36.79 | 89.62 | 94.66 | 75.15 |
| pkshatech/RoSEtta-base-ja | 71.87 | 63.80 | 65.34 | 34.07 | 95.77 | 92.82 | 79.39 |
| intfloat/multilingual-e5-large | 70.98 | 70.30 | 58.78 | **43.63** | 86.00 | 94.70 | 72.48 |
| cl-nagoya/ruri-base | 69.82 | 74.56 | 50.12 | 35.45 | 86.89 | 96.57 | 75.31 |
| cl-nagoya/ruri-small | 69.41 | 73.65 | 48.44 | 33.43 | 87.69 | **97.17** | 76.09 |
Expand Down Expand Up @@ -65,12 +69,14 @@ The summary shows the average scores within each task.
## STS
| Model | Avg. | jsick<br>(spearman) | jsts<br>(spearman) |
|:----------------------------------------------|:----------|:----------------------|:---------------------|
| cl-nagoya/sup-simcse-ja-large | **83.18** | **83.80** | 82.57 |
| pkshatech/GLuCoSE-base-ja-v2 | **83.25** | **84.99** | 81.51 |
| cl-nagoya/sup-simcse-ja-large | 83.18 | 83.80 | 82.57 |
| cl-nagoya/ruri-large | 83.13 | 82.00 | **84.26** |
| cl-nagoya/ruri-base | 82.87 | 82.32 | 83.43 |
| cl-nagoya/ruri-small | 82.79 | 83.44 | 82.13 |
| OpenAI/text-embedding-3-large | 82.52 | 81.27 | 83.77 |
| cl-nagoya/sup-simcse-ja-base | 82.05 | 82.83 | 81.27 |
| pkshatech/RoSEtta-base-ja | 81.32 | 83.03 | 79.61 |
| cl-nagoya/unsup-simcse-ja-large | 80.56 | 80.15 | 80.98 |
| intfloat/multilingual-e5-small | 80.07 | 81.50 | 78.65 |
| intfloat/multilingual-e5-base | 79.84 | 81.28 | 78.39 |
Expand Down Expand Up @@ -100,6 +106,7 @@ The summary shows the average scores within each task.
| oshizo/sbert-jsnli-luke-japanese-base-lite | 76.61 | 79.95 | 57.48 | 80.26 | 88.75 |
| cl-nagoya/ruri-small | 76.22 | 79.92 | 55.61 | 81.49 | 87.88 |
| cl-nagoya/ruri-base | 75.58 | 76.66 | 55.76 | 81.41 | 88.49 |
| pkshatech/GLuCoSE-base-ja-v2 | 74.70 | 75.28 | 55.62 | 80.59 | 87.29 |
| cl-nagoya/unsup-simcse-ja-large | 74.66 | 76.79 | 55.37 | 79.13 | 87.36 |
| MU-Kindai/Japanese-DiffCSE-BERT-base | 73.77 | 78.10 | 51.56 | 78.79 | 86.63 |
| cl-nagoya/sup-simcse-ja-large | 73.73 | 73.21 | 54.76 | 79.23 | 87.72 |
Expand All @@ -113,6 +120,7 @@ The summary shows the average scores within each task.
| intfloat/multilingual-e5-large | 72.89 | 70.66 | 56.54 | 75.78 | 88.59 |
| MU-Kindai/Japanese-SimCSE-BERT-base-sup | 72.76 | 76.20 | 52.06 | 77.89 | 84.90 |
| sentence-transformers/LaBSE | 72.66 | 73.61 | 51.70 | 76.99 | 88.35 |
| pkshatech/RoSEtta-base-ja | 72.39 | 70.07 | 53.00 | 79.52 | 86.97 |
| sentence-transformers/stsb-xlm-r-multilingual | 71.84 | 75.65 | 51.32 | 74.28 | 86.10 |
| pkshatech/simcse-ja-bert-base-clcmlp | 71.30 | 67.49 | 50.85 | 79.67 | 87.20 |
| OpenAI/text-embedding-ada-002 | 69.75 | 64.42 | 53.13 | 74.57 | 86.89 |
Expand All @@ -131,7 +139,9 @@ The summary shows the average scores within each task.
| intfloat/multilingual-e5-large | 92.96 | 92.96 |
| OpenAI/text-embedding-3-small | 92.92 | 92.92 |
| cl-nagoya/ruri-base | 92.91 | 92.91 |
| pkshatech/GLuCoSE-base-ja-v2 | 92.90 | 92.90 |
| intfloat/multilingual-e5-base | 92.85 | 92.85 |
| pkshatech/RoSEtta-base-ja | 92.68 | 92.68 |
| pkshatech/GLuCoSE-base-ja | 91.90 | 91.90 |
| cl-nagoya/sup-simcse-ja-base | 91.83 | 91.83 |
| sentence-transformers/LaBSE | 91.63 | 91.63 |
Expand Down Expand Up @@ -159,6 +169,7 @@ The summary shows the average scores within each task.
| intfloat/multilingual-e5-large | 51.24 | **57.13** | 45.34 |
| cl-nagoya/ruri-small | 51.19 | 50.96 | 51.41 |
| OpenAI/text-embedding-3-small | 51.06 | 54.57 | 47.55 |
| pkshatech/GLuCoSE-base-ja-v2 | 50.68 | 51.66 | 49.70 |
| cl-nagoya/sup-simcse-ja-large | 50.56 | 50.75 | 50.38 |
| oshizo/sbert-jsnli-luke-japanese-base-lite | 50.33 | 46.77 | 53.89 |
| pkshatech/GLuCoSE-base-ja | 49.78 | 49.89 | 49.68 |
Expand All @@ -167,6 +178,7 @@ The summary shows the average scores within each task.
| intfloat/multilingual-e5-base | 48.26 | 55.03 | 41.49 |
| MU-Kindai/Japanese-SimCSE-BERT-large-unsup | 48.25 | 53.20 | 43.31 |
| pkshatech/simcse-ja-bert-base-clcmlp | 47.53 | 44.77 | 50.30 |
| pkshatech/RoSEtta-base-ja | 46.97 | 55.03 | 38.91 |
| intfloat/multilingual-e5-small | 46.91 | 54.70 | 39.12 |
| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 46.68 | 53.02 | 40.35 |
| MU-Kindai/Japanese-SimCSE-BERT-large-sup | 45.81 | 48.45 | 43.17 |
Expand All @@ -188,6 +200,7 @@ The summary shows the average scores within each task.
| cl-nagoya/unsup-simcse-ja-base | 62.44 | 62.44 |
| pkshatech/simcse-ja-bert-base-clcmlp | 62.40 | 62.40 |
| OpenAI/text-embedding-ada-002 | 62.40 | 62.40 |
| pkshatech/GLuCoSE-base-ja-v2 | 62.40 | 62.40 |
| MU-Kindai/Japanese-SimCSE-BERT-base-unsup | 62.38 | 62.38 |
| cl-nagoya/ruri-base | 62.38 | 62.38 |
| oshizo/sbert-jsnli-luke-japanese-base-lite | 62.38 | 62.38 |
Expand All @@ -204,6 +217,7 @@ The summary shows the average scores within each task.
| intfloat/multilingual-e5-base | 62.26 | 62.26 |
| sentence-transformers/stsb-xlm-r-multilingual | 62.20 | 62.20 |
| intfloat/multilingual-e5-small | 62.19 | 62.19 |
| pkshatech/RoSEtta-base-ja | 62.19 | 62.19 |
| intfloat/multilingual-e5-large | 62.15 | 62.15 |
| cl-nagoya/ruri-small | 62.11 | 62.11 |

0 comments on commit d0a2218

Please sign in to comment.