diff --git a/src/jmteb/configs/jmteb.jsonnet b/src/jmteb/configs/jmteb.jsonnet
index f6b2bf0..66fd2dc 100644
--- a/src/jmteb/configs/jmteb.jsonnet
+++ b/src/jmteb/configs/jmteb.jsonnet
@@ -17,4 +17,6 @@
(import './tasks/jaqket.jsonnet') +
(import './tasks/nlp_journal_title_abs.jsonnet') +
(import './tasks/nlp_journal_title_intro.jsonnet') +
-(import './tasks/nlp_journal_abs_intro.jsonnet')
+(import './tasks/nlp_journal_abs_intro.jsonnet') +
+// Reranking
+(import './tasks/esci.jsonnet')
\ No newline at end of file
diff --git a/src/jmteb/configs/tasks/esci.jsonnet b/src/jmteb/configs/tasks/esci.jsonnet
new file mode 100644
index 0000000..0767336
--- /dev/null
+++ b/src/jmteb/configs/tasks/esci.jsonnet
@@ -0,0 +1,23 @@
+{
+ esci: {
+ class_path: 'RerankingEvaluator',
+ init_args: {
+ query_dataset: {
+ class_path: 'HfRerankingQueryDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'test',
+ name: 'esci-query',
+ },
+ },
+ doc_dataset: {
+ class_path: 'HfRerankingDocDataset',
+ init_args: {
+ path: 'sbintuitions/JMTEB',
+ split: 'corpus',
+ name: 'esci-corpus',
+ },
+ },
+ },
+ },
+}
diff --git a/src/jmteb/evaluators/__init__.py b/src/jmteb/evaluators/__init__.py
index b3da743..5de30ea 100644
--- a/src/jmteb/evaluators/__init__.py
+++ b/src/jmteb/evaluators/__init__.py
@@ -2,5 +2,6 @@
from .classification import ClassificationEvaluator
from .clustering import ClusteringEvaluator
from .pair_classification import PairClassificationEvaluator
+from .reranking import RerankingEvaluator
from .retrieval import RetrievalEvaluator
from .sts import STSEvaluator
diff --git a/src/jmteb/evaluators/reranking/__init__.py b/src/jmteb/evaluators/reranking/__init__.py
new file mode 100644
index 0000000..9931fcb
--- /dev/null
+++ b/src/jmteb/evaluators/reranking/__init__.py
@@ -0,0 +1,7 @@
+from .data import (
+ RerankingDoc,
+ RerankingDocDataset,
+ RerankingQuery,
+ RerankingQueryDataset,
+)
+from .evaluator import RerankingEvaluator
diff --git a/src/jmteb/evaluators/reranking/data.py b/src/jmteb/evaluators/reranking/data.py
new file mode 100644
index 0000000..8f7d55b
--- /dev/null
+++ b/src/jmteb/evaluators/reranking/data.py
@@ -0,0 +1,124 @@
+from __future__ import annotations
+
+import json
+from abc import ABC, abstractmethod
+
+import datasets
+import smart_open
+from loguru import logger
+from pydantic.dataclasses import dataclass
+
+
+@dataclass
+class RerankingQuery:
+ query: str
+ retrieved_docs: list[str | int]
+ relevance_scores: list[int]
+
+
+@dataclass
+class RerankingDoc:
+ id: str | int
+ text: str
+
+
+class RerankingQueryDataset(ABC):
+ @abstractmethod
+ def __len__(self):
+ pass
+
+ @abstractmethod
+ def __getitem__(self, idx) -> RerankingQuery:
+ pass
+
+
+class RerankingDocDataset(ABC):
+ @abstractmethod
+ def __len__(self):
+ pass
+
+ @abstractmethod
+ def __getitem__(self, idx) -> RerankingDoc:
+ pass
+
+
+class HfRerankingQueryDataset(RerankingQueryDataset):
+ def __init__(
+ self,
+ path: str,
+ split: str,
+ name: str | None = None,
+ query_key: str = "query",
+ retrieved_docs_key: str = "retrieved_docs",
+ relevance_scores_key: str = "relevance_scores",
+ ):
+ self.dataset = datasets.load_dataset(path, split=split, name=name, trust_remote_code=True)
+ self.query_key = query_key
+ self.retrieved_docs_key = retrieved_docs_key
+ self.relevance_scores_key = relevance_scores_key
+
+ def __len__(self):
+ return len(self.dataset)
+
+ def __getitem__(self, idx) -> RerankingQuery:
+ retrieved_docs = self.dataset[idx][self.retrieved_docs_key]
+ relevance_scores = self.dataset[idx][self.relevance_scores_key]
+
+ return RerankingQuery(
+ query=self.dataset[idx][self.query_key], retrieved_docs=retrieved_docs, relevance_scores=relevance_scores
+ )
+
+
+class JsonlRerankingQueryDataset(RerankingQueryDataset):
+ def __init__(
+ self,
+ filename: str,
+ query_key: str = "query",
+ retrieved_docs_key: str = "retrieved_docs",
+ relevance_scores_key: str = "relevance_scores",
+ ):
+ self.dataset: datasets.Dataset = datasets.load_dataset("json", data_files=filename)["train"]
+ self.query_key = query_key
+ self.retrieved_docs_key = retrieved_docs_key
+ self.relevance_scores_key = relevance_scores_key
+
+ def __len__(self):
+ return len(self.dataset)
+
+ def __getitem__(self, idx) -> RerankingQuery:
+ retrieved_docs = self.dataset[idx][self.retrieved_docs_key]
+ relevance_scores = self.dataset[idx][self.relevance_scores_key]
+
+ return RerankingQuery(
+ query=self.dataset[idx][self.query_key], retrieved_docs=retrieved_docs, relevance_scores=relevance_scores
+ )
+
+
+class HfRerankingDocDataset(RerankingDocDataset):
+ def __init__(self, path: str, split: str, name: str | None = None, id_key: str = "docid", text_key: str = "text"):
+ logger.info(f"Loading dataset {path} (name={name}) with split {split}")
+ self.dataset = datasets.load_dataset(path, split=split, name=name, trust_remote_code=True)
+ self.id_key = id_key
+ self.text_key = text_key
+
+ def __len__(self):
+ return len(self.dataset)
+
+ def __getitem__(self, idx) -> RerankingDoc:
+ return RerankingDoc(id=self.dataset[idx][self.id_key], text=self.dataset[idx][self.text_key])
+
+
+class JsonlRerankingDocDataset(RerankingDocDataset):
+ def __init__(self, filename: str, id_key: str = "docid", text_key: str = "text"):
+ logger.info(f"Loading dataset from {filename}")
+ with smart_open.open(filename, "r", encoding="utf-8", errors="ignore") as fin:
+ corpus = [json.loads(line.strip()) for line in fin.readlines()]
+ self.dataset = corpus
+ self.id_key = id_key
+ self.text_key = text_key
+
+ def __len__(self):
+ return len(self.dataset)
+
+ def __getitem__(self, idx) -> RerankingDoc:
+ return RerankingDoc(id=self.dataset[idx][self.id_key], text=self.dataset[idx][self.text_key].strip())
diff --git a/src/jmteb/evaluators/reranking/evaluator.py b/src/jmteb/evaluators/reranking/evaluator.py
new file mode 100644
index 0000000..93e2962
--- /dev/null
+++ b/src/jmteb/evaluators/reranking/evaluator.py
@@ -0,0 +1,166 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from os import PathLike
+from pathlib import Path
+from typing import Callable, TypeVar
+
+import numpy as np
+import torch
+import tqdm
+from loguru import logger
+from torch import Tensor
+
+from jmteb.embedders.base import TextEmbedder
+from jmteb.evaluators.base import EmbeddingEvaluator, EvaluationResults
+
+from .data import RerankingDocDataset, RerankingQueryDataset
+
+T = TypeVar("T")
+
+
+class RerankingEvaluator(EmbeddingEvaluator):
+ """
+ Evaluator for reranking task.
+
+ Args:
+ query_dataset (RerankingQueryDataset): query dataset
+ doc_dataset (RerankingDocDataset): document dataset
+ ndcg_at_k (list[int] | None): top k documents to consider in NDCG (Normalized Documented Cumulative Gain).
+ """
+
+ def __init__(
+ self,
+ query_dataset: RerankingQueryDataset,
+ doc_dataset: RerankingDocDataset,
+ ndcg_at_k: list[int] | None = None,
+ ) -> None:
+ self.query_dataset = query_dataset
+ self.doc_dataset = doc_dataset
+ self.ndcg_at_k = ndcg_at_k or [10, 20, 40]
+ self.main_metric = f"ndcg@{self.ndcg_at_k[0]}"
+
+ def __call__(
+ self,
+ model: TextEmbedder,
+ cache_dir: str | PathLike[str] | None = None,
+ overwrite_cache: bool = False,
+ ) -> EvaluationResults:
+ if cache_dir is not None:
+ Path(cache_dir).mkdir(parents=True, exist_ok=True)
+
+ query_embeddings = model.batch_encode_with_cache(
+ text_list=[item.query for item in self.query_dataset],
+ cache_path=Path(cache_dir) / "query.bin" if cache_dir is not None else None,
+ overwrite_cache=overwrite_cache,
+ )
+
+ doc_embeddings = model.batch_encode_with_cache(
+ text_list=[item.text for item in self.doc_dataset],
+ cache_path=Path(cache_dir) / "corpus.bin" if cache_dir is not None else None,
+ overwrite_cache=overwrite_cache,
+ )
+
+ doc_indices = {item.id: i for i, item in enumerate(self.doc_dataset)}
+
+ logger.info("Start reranking")
+ results: dict[str, dict[str, float]] = {}
+
+ dist_metrics: dict[str, Callable] = {
+ "cosine_similarity": Similarities.cosine_similarity,
+ "dot_score": Similarities.dot_score,
+ "euclidean_distance": Similarities.euclidean_distance,
+ }
+
+ for dist_metric, dist_func in dist_metrics.items():
+ dist_scores: dict[str, float] = {}
+
+ with tqdm.tqdm(total=len(self.query_dataset), desc="Reranking docs") as pbar:
+ device = "cuda" if torch.cuda.is_available() else "cpu"
+ reranked_docs_list = []
+ for i, item in enumerate(self.query_dataset):
+ query_embedding = convert_to_tensor(query_embeddings[i], device=device)
+ doc_embedding = convert_to_tensor(
+ np.array(
+ [doc_embeddings[doc_indices[retrieved_doc]] for retrieved_doc in item.retrieved_docs]
+ ),
+ device=device,
+ )
+ similarity = dist_func(query_embedding, doc_embedding)
+
+ argsorted_indices = torch.argsort(
+ similarity,
+ dim=1,
+ descending=True,
+ )[0]
+ reranked_docs = [item.retrieved_docs[argsorted_indice] for argsorted_indice in argsorted_indices]
+ reranked_docs_list.append(reranked_docs)
+ pbar.update(i)
+
+ retrieved_docs_list = [item.retrieved_docs for item in self.query_dataset]
+ relevance_scores_list = [item.relevance_scores for item in self.query_dataset]
+
+ for k in self.ndcg_at_k:
+ dist_scores[f"ndcg@{k}"] = ndcg_at_k(retrieved_docs_list, relevance_scores_list, reranked_docs_list, k)
+
+ results[dist_metric] = dist_scores
+
+ return EvaluationResults(
+ metric_name=self.main_metric,
+ metric_value=max([v[self.main_metric] for v in results.values()]),
+ details=results,
+ )
+
+
+def ndcg_at_k(
+ retrieved_docs_list: list[list[T]], relevance_scores_list: list[list[T]], reranked_docs_list: list[list[T]], k: int
+) -> float:
+ total_ndcg_scores = 0
+ for retrieved_docs, relevance_scores, reranked_docs in zip(
+ retrieved_docs_list, relevance_scores_list, reranked_docs_list
+ ):
+ dcg = 0
+ for rank, doc_id in enumerate(reranked_docs[:k], start=1):
+ relevance_score = relevance_scores[retrieved_docs.index(doc_id)]
+ dcg += relevance_score / np.log2(rank + 1)
+ idcg = sum(
+ [
+ relevance_score / np.log2(rank + 1)
+ for rank, relevance_score in enumerate(sorted(relevance_scores)[::-1][:k], start=1)
+ ]
+ )
+ total_ndcg_scores += dcg / idcg
+ return total_ndcg_scores / len(retrieved_docs_list)
+
+
+def convert_to_tensor(embeddings: np.ndarray | Tensor, device: str) -> Tensor:
+ if not isinstance(embeddings, Tensor):
+ embeddings = torch.tensor(embeddings)
+ if len(embeddings.shape) == 1:
+ embeddings = embeddings.unsqueeze(0)
+ return embeddings.to(device=device)
+
+
+@dataclass
+class Similarities:
+ @staticmethod
+ def cosine_similarity(e1: Tensor, e2: Tensor) -> Tensor:
+ e1_norm = torch.nn.functional.normalize(e1, p=2, dim=1)
+ e2_norm = torch.nn.functional.normalize(e2, p=2, dim=1)
+ return torch.mm(e1_norm, e2_norm.transpose(0, 1))
+
+ @staticmethod
+ def manhatten_distance(e1: Tensor, e2: Tensor) -> Tensor:
+ # the more distant, the less similar, so we use 100 / dist as similarity
+ x = e1.unsqueeze(1)
+ y = e2.unsqueeze(0).repeat(e1.shape[0], 1, 1)
+ return 100 / ((x - y).abs().sum(dim=-1) + 1e-4)
+
+ @staticmethod
+ def euclidean_distance(e1: Tensor, e2: Tensor) -> Tensor:
+ # the more distant, the less similar, so we use 100 / dist as similarity
+ return 100 / (torch.cdist(e1, e2) + 1e-4)
+
+ @staticmethod
+ def dot_score(e1: Tensor, e2: Tensor) -> Tensor:
+ return torch.mm(e1, e2.transpose(0, 1))
diff --git a/tests/evaluator/test_reranking_evaluator.py b/tests/evaluator/test_reranking_evaluator.py
new file mode 100644
index 0000000..ceb863d
--- /dev/null
+++ b/tests/evaluator/test_reranking_evaluator.py
@@ -0,0 +1,61 @@
+from jmteb.evaluators.reranking import (
+ RerankingDoc,
+ RerankingDocDataset,
+ RerankingEvaluator,
+ RerankingQuery,
+ RerankingQueryDataset,
+)
+from jmteb.evaluators.reranking.data import (
+ JsonlRerankingDocDataset,
+ JsonlRerankingQueryDataset,
+)
+
+
+class DummyDocDataset(RerankingDocDataset):
+ def __init__(self):
+ self._items = [RerankingDoc(id=str(i), text=f"dummy document {i}") for i in range(30)]
+
+ def __len__(self):
+ return len(self._items)
+
+ def __getitem__(self, idx):
+ return self._items[idx]
+
+
+class DummyQueryDataset(RerankingQueryDataset):
+ def __init__(self):
+ self._items = [
+ RerankingQuery(query=f"dummy query {i}", retrieved_docs=[str(i)], relevance_scores=[1]) for i in range(10)
+ ]
+
+ def __len__(self):
+ return len(self._items)
+
+ def __getitem__(self, idx):
+ return self._items[idx]
+
+
+def test_reranking_evaluator(embedder):
+ evaluator = RerankingEvaluator(
+ query_dataset=DummyQueryDataset(),
+ doc_dataset=DummyDocDataset(),
+ )
+ results = evaluator(model=embedder)
+
+ assert results.metric_name == "ndcg@10"
+ assert set(results.details.keys()) == {"cosine_similarity", "euclidean_distance", "dot_score"}
+ for scores in results.details.values():
+ for score in scores.keys():
+ assert any(score.startswith(metric) for metric in ["ndcg"])
+
+
+def test_jsonl_reranking_datasets():
+ query = JsonlRerankingQueryDataset(
+ filename="tests/test_data/dummy_reranking/dev.jsonl",
+ )
+ assert len(query) == 10
+
+ corpus = JsonlRerankingDocDataset(
+ filename="tests/test_data/dummy_reranking/corpus.jsonl",
+ )
+ assert len(corpus) == 10
diff --git a/tests/test_data/dummy_reranking/corpus.jsonl b/tests/test_data/dummy_reranking/corpus.jsonl
new file mode 100644
index 0000000..02c3e54
--- /dev/null
+++ b/tests/test_data/dummy_reranking/corpus.jsonl
@@ -0,0 +1,10 @@
+{"docid": "B002JK6QTS", "text": "Nike Men's Air Force 1 '07 Shoes 315122 White/White 10.5: Originally released in 1982, the Nike Air Force 1 was the first Nike model to feature \"Air\" technology. This legendary basketball sneaker was designed by Bruce Kilgore, and named after the aircraft that carries the President of the United States, the Air Force One. The Air Force 1 is Nike's most popular sneaker to date, has been produced in nearly 2,000 different colorways, and is available in low, mid, and high-top models."}
+{"docid": "B01M4MCUAF", "text": "TruSkin ビタミンCフェイシャルセラム 1オンス(約30ml): 使い方: 洗顔後に化粧水などで肌を整えます。セラム3〜5滴を目安に、手の指または手のひらを使って顔に伸ばします。頬全体から目元にかけてやさしくなじませてください。目に入らないようにご注意ください。その後に乳液やクリームをご使用ください。顔、首、デコルテ、手の甲など、気になる部分にお使いください。"}
+{"docid": "B07PRZYJHZ", "text": "Lol Surprise Winter Disco Glitter Globe Series: L.O.L.サプライズ!に、スノーをテーマにしたNewサプライズ「ウィンターディスコ」が登場! グリッターグローブは、見たことのない新しいお水でサプライズが楽しめる! ドールにお水を飲ませると、透明になったヘアー部分に水が溜まり、スノードームのようにグリッターが舞いヘアカラーがチェンジします。 ドールをくるくる回したり、さかさまにしたりして、キラキラさせてみてね。"}
+{"docid": "B07HRR6K4G", "text": "LOLサプライズ L.O.L. サプライズ! バブリー サプライズ ピンク ドール & ペット プレゼント 誕生日 ギフト 子供 おもちゃ lolサプライズ: サプライズボールを開いてキネティックサンドを掘り下げることで6個のサプライズ! *色:ピンク 【内容】ハート型スタンド、6個のサプライズ 注意:バスタブ用ではありません。完全に溶解するまで触れないでください。 *表記は全て英語で和訳は付属しません。 【注意】小さなパーツなどで窒息の危険がありますので3歳未満のお子様にはお与えにならないでください。"}
+{"docid": "B0000U11LW", "text": "Intex Kiddie Pool - Kid's Summer Sunset Glow Design - 58\" x 13\": Your little swimmers will want to swim until the sun goes down in this Intex Sunset Glow Kids Pool. With fun, vibrant colors and a soft inflatable floor, there's no way your kiddos won't have a blast swimming in this pool. It's the perfect backyard pool to cool off in on those hot summer days. It allows for 9.5 inches of water and is constructed of durable vinyl for long-term use. This pool features a cushioned, inflatable floor that keeps your little backyard swimming bums comfortable and safe. It's perfect for kids ages 3 and up and provides hours of fun in the sun. You and your kids can swim until twilight and watch the sun go down as they spash around in the Sunset Glow Kids Pool."}
+{"docid": "B01NASZ0UM", "text": "世紀ワイヤレスリモート制御電気家庭用のコンセントスイッチアプライアンス、ホワイト(学習コード、3rx-1tx ): ワット数 1200ワット 電球電圧120ボルト。 屋内でのみ使用できます。"}
+{"docid": "B07H256MBK", "text": "Anker USB C to Lightning Cable (6ft, MFi Certified) Powerline II for iPhone 13 13 Pro 12 Pro Max 12 11 X XS XR 8 Plus, AirPods Pro, Supports Power Delivery (Charger Not Included)(White): Model Number: A8633
PowerLine II USB-C to Lightning CableThe Cable That’s Yours For Life
From Strength to StrengthPowerLine II tolerates being bent over 12,000 times, lasting an enormous 12x longer than other cables.
Certified CompatibilityMFi certification means total peace of mind because PowerLine II is completely Apple authorized. Designed to work flawlessly with iPhone, iPad, iPod or any other device with a Lightning port.
Lasts a LifetimeTo show our belief in PowerLine II, we are offering a hassle-free replacement for all quality issues. Not for half a year, not for 18 months, but for an entire lifetime. It may be the last cable you’ll ever need to buy.
Note:
Power Delivery Compatible iOS Devices:
iPhone 13, 13 Pro, 13 mini, 13 Pro max, 12, 12 mini, 12 Pro, 12 Pro max, 11, 11 Pro, 11 Pro Max, 8, 8 Plus, X, XS, XR, XS Max, iPad 8, iPad Pro (10.5-inch), iPad Pro (12.9-inch) 1st generation, iPad Pro (12.9-inch) 2nd generation, AirPods, AirPods with Wireless Charging Case and AirPods Pro. Compatible Devices (limited charging speeds) iPhone 7 / 7 Plus /iPhone SE(1st generation) / 6S / 6S Plus / 6 / 6 Plus / 5 / 5S / 5C"}
+{"docid": "B092QS1JVS", "text": "韓国語書籍, エッセイ/작은 별이지만 빛나고 있어 – 소윤/존재만으로도 충분한 너에게 해주고 싶은 말/韓国より配送: “존재만으로도 충분한 너에게 해주고 싶은 말.\" 고단한 일상에서 가끔 우리는 각자의 빛을 잊고 살 때가 있다. 빛나야 하는 이유도 점점 내가 아닌 타인, 혹은 다른 이유가 되어버리는 세상. 세상이 제멋대로 정의한 거대한 별만 바라보느라, 내 안의 빛을 보고 있지 못한 우리에게 건네는 작가의 위로. 잊지 말자, 우리는 모두 각자의 생김새대로, 제각기 다른 방식으로 빛나고 있다는 것을."}
+{"docid": "B0928KWTWX", "text": "韓国書籍, エッセイ/엑소(EXO) 멤버 세훈 (오세훈) 추천운다고 달라지는 일은 아무것도 없겠지만 - 박준/EXO-L 권장도서/韓国より配: 2017년 7월 1일에 출간한 박준 시인의 첫 산문 「운다고 달라지는 일은 아무것도 없겠지만」을 2020년 같은 날에 20만 부 기념 리커버 에디션으로 다시금 선보인다. 20만 부 기념 리커버 에디션은 초판 때 표지로 삼았던 것처럼 기드온 루빈의 작품이다. 또한 시인의 신작 산문 '바둑이점'을 커버에 수록하였다. 「당신의 이름을 지어다가 며칠은 먹었다」는 박준 시인이 그간 제 시를 함께 읽어주고 함께 느껴주고 함께 되새겨준 여러분들에게 보내는 한 권의 답서이자 연서이다. '시인 박준'이라는 '사람'을 정통으로 관통하는 글이 수록되어 있다. 총 4부로 나뉘어 있지만, 그런 나눔에 상관없이 아무 페이지나 살살 넘겨봐도 또 아무 대목이나 슬슬 읽어봐도 그 이야기의 편린들이 유기적으로 연결되어 있음을 확인하게 해주는 글이다. 드러낼 작정 없이 절로 드러난 이야기의 어린 손들을 우리들은 읽어가는 내내 잡기 바쁜데 불쑥 잡은 그 어린 손들이 우리들 손바닥을 펴서 손가락으로 적어주는 말들을 읽자면 그 이름에 가난이 있었고, 이별이 있었고, 죽음이 있었다. 더불어 이 책은 시와 산문의 유연한 결합체임을 증명해 보인다. 어느 날 보면 한 권의 시집으로 읽히고 또 어느 날 보면 한 권의 산문으로 읽힌다. 특히나 이번 산문집에서는 박준 시인만의 세심하면서도 집요한 관찰력이 소환해낸 추억의 장면들이 우리를 자주 눈물짓게 한다."}
+{"docid": "B091YBZ9F8", "text": "韓国書籍, 한국어 에세이/나는 내 파이를 구할 뿐 인류를 구하러 온 게 아니라 – 김진아 Kim Jina/자기 몫을 되찾고 싶은 여성들을 위한 야망 에세이/韓国より配送: 프리랜서 카피라이터이자 페미니즘 공간 '울프소셜클럽Woolf Social Club'을 운영하는 김진아가 탈혼과 유사 경력단절을 통해 경제적 위기감과 여성으로서의 자기 인식을 절박하게 느꼈던 시간들에 대한 솔직한 고백을 담고 있는 책이다. 여성이 국가, 종교, 제도, 관습 어디에도 종속되지 않고 독립된 자아로서 존엄 있게 존재하는 것. 저자는 이를 실현하기 위해 우리가 가져야 할 것, 하지만 아직 갖지 못한 것에 대해 촘촘히 이야기한다. 여성이 남성에게 빼앗긴 파이를 가져오는 데 필요한 건 무엇일까? 여성이 사회와 쉽게 단절되지 않고 존엄을 지키며 보란 듯이 살아남기 위해 필요한 것은 무엇일까? 저자는 '야망'으로 무장한 여성들이 '우먼소셜클럽'을 구축하고 '정치' 세력으로 성장해야 한다고 말한다. 페미니즘이 '파이 싸움'이라는 걸 이해하고 나면 무엇이 여성의 파이를 가져오는 데 도움이 되는지 가릴 수 있게 된다는 것이다. 이 책은 가부장제를 이탈해 매일의 곤경과 사투하며 자기만의 영역을 만들어가고 있는 한 40대 여성이 빠른 속도로 증가하고 있는 젊은 페미니스트와 비혼 여성들에게 보내는 연대의 메시지다. 더불어 지금 각성한 야망 있는 10대, 20대가 최종 결정권자의 자리에 올라 여성의 관점에서 만족스러운 아이디어와 메시지를 승인하는 날이 올 때까지 아낌없는 지지를 보내겠다는 선언이다."}
\ No newline at end of file
diff --git a/tests/test_data/dummy_reranking/dev.jsonl b/tests/test_data/dummy_reranking/dev.jsonl
new file mode 100644
index 0000000..4d91fd8
--- /dev/null
+++ b/tests/test_data/dummy_reranking/dev.jsonl
@@ -0,0 +1,10 @@
+{"query": "エアフォース1 07", "retrieved_docs": ["B002JK6QTS", "B004C4Q9LE", "B083WD42TW", "B06XRYMTW4", "B01MTTYEMO", "B00PPW6NZO", "B00IR9HOZK", "B083924CTQ"], "relevance_scores": [2, 3, 3, 3, 3, 3, 3, 3]}
+{"query": "cc melano serum", "retrieved_docs": ["B01M4MCUAF", "B01M4MCUAF", "B010FOFSH0", "B010FOFSH0", "B08W8H3JJF", "B08WMGW2S4", "B08WMJ1WF1", "B08WMJ7WN4", "B08WMJB5WV", "B08WMJC5BC", "B091HV7WZ5", "B08SC333QW", "B096ZPNKHP", "B098D3K3FS", "B098WMF8VH", "B099QSPPR5", "B08ZJ8JJPZ", "B08WMJ1WF4", "B088CXWV86", "B00K6BK1MA", "B00ORM1HCC", "B01LYH0VMF", "B086YCF7F1", "B07HKBHTFN", "B0845HJ1D9", "B07GC1K3QP", "B07GCG4MF1"], "relevance_scores": [2, 2, 3, 3, 2, 3, 2, 3, 3, 3, 2, 2, 2, 2, 3, 2, 0, 2, 3, 3, 3, 3, 2, 3, 3, 2, 3]}
+{"query": "lol ヘアーゴールズ", "retrieved_docs": ["B07PRZYJHZ", "B07HRR6K4G", "B07ZKGHCBT", "B07TWGSHWY"], "relevance_scores": [2, 2, 2, 3]}
+{"query": "ビニールプール", "retrieved_docs": ["B0000U11LW", "B07R4SP8MT", "B08974PPD5", "B088ZTFCRC", "B0892BBWR8", "B08965WNGF", "B0896P26GD", "B08979WJ1L", "B086VZN27F", "B00BGYQ63O"], "relevance_scores": [3, 1, 3, 3, 3, 1, 3, 3, 3, 1]}
+{"query": "電源 オンオフ リモコン", "retrieved_docs": ["B01NASZ0UM", "B01NASZ0UM", "B07B7NXV4R", "B07B7NXV4R", "B07TTH5TMW", "B07TTH5TMW", "B09C8986DP", "B0897767QY", "B08GK7GXD7", "B08F4Z3KK1", "B08C28GVHN", "B0897H4V3J", "B08P6YS13K", "B08MZRV32C", "B08R5YC2VJ", "B08LN2Y31T", "B07XMKW31K", "B000J3ORUM", "B01MUAR8G2", "B07B3DYCQ9"], "relevance_scores": [3, 3, 3, 3, 3, 3, 0, 3, 2, 3, 3, 3, 3, 0, 2, 2, 3, 3, 2, 3]}
+{"query": "anker 充電器 pd4", "retrieved_docs": ["B07H256MBK", "B07H256MBK", "B06ZXXQGZ8", "B01N533KZH", "B07SQPZG81", "B072KBG9W4", "B01IVTGE4M", "B00VTJS58U", "B07ZNFR6BF", "B07DPQ7WB7", "B019GNUT0C", "B07QTQ3R2J", "B082NJ4RK2", "B07SDF9JFM", "B07RRWDTJB", "B0725VK5ZC"], "relevance_scores": [1, 1, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]}
+{"query": "純正 ケーブル iphone", "retrieved_docs": ["B07H256MBK", "B07H256MBK", "B086YQNVDQ", "B01N40PO2M", "B07Q611G3S", "B08V8P6F5L", "B09DPWQTNF", "B092SFC6KH", "B09682FLRN", "B097JSWDKJ", "B099F32C7Y", "B09C5HQFGQ", "B097D5F8MS", "B091BS131D", "B07QXFG6HW", "B08VRR315H", "B08DP2MZ9D", "B096VD1G77", "B08XWRZCM2", "B08T9B857V", "B083XVYVRP", "B08FDTSLRF", "B09CGRLWCS", "B08L4QNJLC", "B08BLN5YFK", "B08939VGWQ", "B08W25QZ92", "B09H3YC57X", "B08MW5KHQZ", "B07WDPH515", "B081GQMWCY", "B08FSTPF7K"], "relevance_scores": [3, 3, 2, 3, 2, 3, 0, 2, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3]}
+{"query": "jellycat(ジェリーキャット) バシュフル バニー s ぬいぐるみ ウサギ 座高15cm", "retrieved_docs": ["B092QS1JVS", "B08VDCRGKK", "B08XZ27RMT", "B00163U4LK", "B00163U4LK", "B07WKJKG6D", "B07T8LS1ZV", "B0867FDNYR", "B0753HQC51", "B08VNKVXVG", "B08YN26H9Z", "B0995WKR5K", "B09DPSS6TV", "B08JGNM8TR", "B06X3VG7XS", "B0183DGK5O", "B00Q0WT3NC", "B00SWNQMXQ"], "relevance_scores": [0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2]}
+{"query": "scandal 会わないつもりの、元気でね", "retrieved_docs": ["B092QS1JVS", "B08X4D26FN", "B08WLQ3676", "B09C5MKMDQ", "B08VDCRGKK", "B08NGFD7MF", "B0963WWFDM", "B08Y5JQW53", "B08RRPMG5G", "B08YD5MCW1", "B0953112LF", "B08YDB9MKM", "B00I7Y7VF8", "B08Y55WXFS", "B09D7JRR87", "B09DNH1R8S"], "relevance_scores": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3]}
+{"query": "friday 桃月なしこ", "retrieved_docs": ["B0928KWTWX", "B08XJVN5FK", "B09B3477VQ", "B08X4D26FN", "B096JGZC2F", "B09C5MKMDQ", "B09C7MS48S", "B0922CKJPV", "B08YD18573", "B08XZ27RMT", "B08GS4729H", "B08NGFD7MF", "B088NP6JVW", "B0957B5MW5", "B08YDB9MKM", "B08YN6R6TL", "B089GBVNDN", "B095P9ZQ72", "B08P9R6WDQ", "B092VCDHJ4", "B08Y55WXFS", "B09789DGNG", "B0982F2HBV", "B09BJGCMKH", "B095YW5NTF", "B07RZQLP8G", "B083XYVBBP", "B07RML8546", "B08T7D77K8"], "relevance_scores": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 3, 2, 2, 0]}
\ No newline at end of file