From 8fa4e51a75cf3326a500eddf633a0927a19f8bce Mon Sep 17 00:00:00 2001
From: taharallouche <tahar.allouche.pro@gmail.com>
Date: Sat, 26 Oct 2024 11:31:42 +0200
Subject: [PATCH] feat: interface

---
 hakeem/core/aggregation/aggregators.py        |  64 ----------
 .../core/aggregation/aggregators/__init__.py  |   0
 .../core/aggregation/aggregators/condorcet.py |  30 +++++
 .../core/aggregation/aggregators/mallows.py   |  31 +++++
 hakeem/core/aggregation/base.py               |  60 ++++++---
 hakeem/paper_results/evaluation/accuracy.py   |  36 +++---
 tests/core/aggregation/test_base.py           | 120 +++++++++++++++++-
 7 files changed, 239 insertions(+), 102 deletions(-)
 delete mode 100644 hakeem/core/aggregation/aggregators.py
 create mode 100644 hakeem/core/aggregation/aggregators/__init__.py
 create mode 100644 hakeem/core/aggregation/aggregators/condorcet.py
 create mode 100644 hakeem/core/aggregation/aggregators/mallows.py

diff --git a/hakeem/core/aggregation/aggregators.py b/hakeem/core/aggregation/aggregators.py
deleted file mode 100644
index 2b89d93..0000000
--- a/hakeem/core/aggregation/aggregators.py
+++ /dev/null
@@ -1,64 +0,0 @@
-import numpy as np
-import pandas as pd
-
-from hakeem.core.aggregation.base import WeightedAggregator
-from hakeem.core.utils.inventory import DEFAULT_RELIABILITY_BOUNDS
-
-
-class CondorcetAggregator(WeightedAggregator):
-	_name: str = "Condorcet Aggregator"
-
-	def __init__(
-		self,
-		lower_reliability_bound: float = DEFAULT_RELIABILITY_BOUNDS.lower,
-		upper_reliability_bound: float = DEFAULT_RELIABILITY_BOUNDS.upper,
-	):
-		self.lower_reliability_bound = lower_reliability_bound
-		self.upper_reliability_bound = upper_reliability_bound
-
-	def _compute_weights(self, annotations: pd.DataFrame) -> pd.Series:
-		vote_size = annotations.sum(axis=1)
-		reliabilities = (len(annotations.columns) - vote_size - 1) / (
-			len(annotations.columns) - 2
-		)
-		reliabilities = reliabilities.clip(
-			self.lower_reliability_bound, self.upper_reliability_bound
-		)
-		weights = np.log(reliabilities / (1 - reliabilities))
-
-		return weights
-
-
-class StandardApprovalAggregator(WeightedAggregator):
-	_name: str = "Standard Approval Aggregator"
-
-	@staticmethod
-	def _compute_weights(annotations: pd.DataFrame) -> pd.Series:
-		return pd.Series(1, index=annotations.index)
-
-
-class EuclidAggregator(WeightedAggregator):
-	_name: str = "Euclidean Mallow Aggregator"
-
-	@staticmethod
-	def _compute_weights(annotations: pd.DataFrame) -> pd.Series:
-		vote_size = annotations.sum(axis=1)
-		return np.sqrt(vote_size + 1) - np.sqrt(vote_size - 1)
-
-
-class JaccardAggregator(WeightedAggregator):
-	_name: str = "Jaccard Mallow Aggregator"
-
-	@staticmethod
-	def _compute_weights(annotations: pd.DataFrame) -> pd.Series:
-		vote_size = annotations.sum(axis=1)
-		return 1 / vote_size
-
-
-class DiceAggregator(WeightedAggregator):
-	_name: str = "Dice Mallow Aggregator"
-
-	@staticmethod
-	def _compute_weights(annotations: pd.DataFrame) -> pd.Series:
-		vote_size = annotations.sum(axis=1)
-		return 2 / (vote_size + 1)
diff --git a/hakeem/core/aggregation/aggregators/__init__.py b/hakeem/core/aggregation/aggregators/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/hakeem/core/aggregation/aggregators/condorcet.py b/hakeem/core/aggregation/aggregators/condorcet.py
new file mode 100644
index 0000000..40481e1
--- /dev/null
+++ b/hakeem/core/aggregation/aggregators/condorcet.py
@@ -0,0 +1,30 @@
+import numpy as np
+import pandas as pd
+
+from hakeem.core.aggregation.base import WeightedAggregator
+from hakeem.core.utils.inventory import COLUMNS, DEFAULT_RELIABILITY_BOUNDS
+
+
+class CondorcetAggregator(WeightedAggregator):
+	def __init__(
+		self,
+		lower_reliability_bound: float = DEFAULT_RELIABILITY_BOUNDS.lower,
+		upper_reliability_bound: float = DEFAULT_RELIABILITY_BOUNDS.upper,
+		task_column: str = COLUMNS.question,
+		worker_column: str = COLUMNS.voter,
+	):
+		super().__init__(task_column, worker_column)
+		self.lower_reliability_bound = lower_reliability_bound
+		self.upper_reliability_bound = upper_reliability_bound
+
+	def compute_weights(self, annotations: pd.DataFrame) -> pd.Series:
+		vote_size = annotations.sum(axis=1)
+		reliabilities = (len(annotations.columns) - vote_size - 1) / (
+			len(annotations.columns) - 2
+		)
+		reliabilities = reliabilities.clip(
+			self.lower_reliability_bound, self.upper_reliability_bound
+		)
+		weights = np.log(reliabilities / (1 - reliabilities))
+
+		return weights
diff --git a/hakeem/core/aggregation/aggregators/mallows.py b/hakeem/core/aggregation/aggregators/mallows.py
new file mode 100644
index 0000000..0e03d18
--- /dev/null
+++ b/hakeem/core/aggregation/aggregators/mallows.py
@@ -0,0 +1,31 @@
+import numpy as np
+import pandas as pd
+
+from hakeem.core.aggregation.base import WeightedAggregator
+
+
+class StandardApprovalAggregator(WeightedAggregator):
+	@staticmethod
+	def compute_weights(annotations: pd.DataFrame) -> pd.Series:
+		return pd.Series(1, index=annotations.index)
+
+
+class EuclidAggregator(WeightedAggregator):
+	@staticmethod
+	def compute_weights(annotations: pd.DataFrame) -> pd.Series:
+		vote_size = annotations.sum(axis=1)
+		return np.sqrt(vote_size + 1) - np.sqrt(vote_size - 1)
+
+
+class JaccardAggregator(WeightedAggregator):
+	@staticmethod
+	def compute_weights(annotations: pd.DataFrame) -> pd.Series:
+		vote_size = annotations.sum(axis=1)
+		return 1 / vote_size
+
+
+class DiceAggregator(WeightedAggregator):
+	@staticmethod
+	def compute_weights(annotations: pd.DataFrame) -> pd.Series:
+		vote_size = annotations.sum(axis=1)
+		return 2 / (vote_size + 1)
diff --git a/hakeem/core/aggregation/base.py b/hakeem/core/aggregation/base.py
index 165df38..694982e 100644
--- a/hakeem/core/aggregation/base.py
+++ b/hakeem/core/aggregation/base.py
@@ -6,25 +6,55 @@
 
 
 class Aggregator(ABC):
-	_name: str
+	def __init__(
+		self, task_column: str = COLUMNS.question, worker_column: str = COLUMNS.voter
+	) -> None:
+		self.task_column = task_column
+		self.worker_column = worker_column
+
+	def fit_predict(self, annotations: pd.DataFrame) -> pd.DataFrame:
+		annotations = self._coerce_annotations(annotations)
+		return self._aggregate(annotations)
 
 	@abstractmethod
-	def aggregate(self, annotations: pd.DataFrame) -> pd.DataFrame:
+	def _aggregate(self, annotations: pd.DataFrame) -> pd.DataFrame:
 		raise NotImplementedError
 
-	def __str__(self) -> str:
-		return self._name
+	def _coerce_annotations(self, annotations: pd.DataFrame) -> pd.DataFrame:
+		all_columns = annotations.reset_index().columns
+		required = [self.task_column, self.worker_column]
 
+		if missing := set(required) - set(all_columns):
+			raise ValueError(
+				f"Annotations should have {self.task_column} and"
+				f" {self.worker_column} as columns or index levels, missing {missing}."
+			)
 
-class VoterMixin:
-	@staticmethod
-	def _get_aggregated_labels(votes: pd.DataFrame) -> pd.DataFrame:
-		scores = votes.groupby(COLUMNS.question, sort=False)[votes.columns].sum()
+		if set(all_columns) == set(required):
+			raise ValueError("Annotations should have at least one label column")
+
+		annotations = annotations.reset_index().set_index(required)[
+			[column for column in annotations.columns if column not in required]
+		]
+
+		return annotations
 
-		scores = scores.reindex(votes.index.get_level_values(COLUMNS.question).unique())
+
+class WeightedApprovalMixin:
+	@staticmethod
+	def _get_aggregated_labels(
+		weighted_answers: pd.DataFrame, task_column: str
+	) -> pd.DataFrame:
+		scores = weighted_answers.groupby(task_column, sort=False)[
+			weighted_answers.columns
+		].sum()
+
+		scores = scores.reindex(
+			weighted_answers.index.get_level_values(task_column).unique()
+		)
 
 		winning_alternatives = scores.idxmax(axis=1).astype(
-			pd.CategoricalDtype(categories=votes.columns)
+			pd.CategoricalDtype(categories=weighted_answers.columns)
 		)
 
 		aggregated_labels = pd.get_dummies(winning_alternatives)
@@ -32,14 +62,14 @@ def _get_aggregated_labels(votes: pd.DataFrame) -> pd.DataFrame:
 		return aggregated_labels
 
 
-class WeightedAggregator(Aggregator, VoterMixin):
+class WeightedAggregator(Aggregator, WeightedApprovalMixin):
 	@abstractmethod
-	def _compute_weights(self, annotations: pd.DataFrame) -> pd.Series:
+	def compute_weights(self, annotations: pd.DataFrame) -> pd.Series:
 		raise NotImplementedError
 
-	def aggregate(self, annotations: pd.DataFrame) -> pd.DataFrame:
-		weights = self._compute_weights(annotations)
+	def _aggregate(self, annotations: pd.DataFrame) -> pd.DataFrame:
+		weights = self.compute_weights(annotations)
 
 		weighted_answers = annotations.multiply(weights, axis="index")
 
-		return self._get_aggregated_labels(weighted_answers)
+		return self._get_aggregated_labels(weighted_answers, self.task_column)
diff --git a/hakeem/paper_results/evaluation/accuracy.py b/hakeem/paper_results/evaluation/accuracy.py
index 2117092..7ea1767 100644
--- a/hakeem/paper_results/evaluation/accuracy.py
+++ b/hakeem/paper_results/evaluation/accuracy.py
@@ -1,5 +1,4 @@
 import logging
-from collections.abc import Iterable
 from random import sample
 from typing import Mapping
 
@@ -10,8 +9,10 @@
 from sklearn.metrics import accuracy_score
 from tqdm import tqdm
 
-from hakeem.core.aggregation.aggregators import (
+from hakeem.core.aggregation.aggregators.condorcet import (
 	CondorcetAggregator,
+)
+from hakeem.core.aggregation.aggregators.mallows import (
 	DiceAggregator,
 	EuclidAggregator,
 	JaccardAggregator,
@@ -31,20 +32,19 @@ def compare_methods(
 	groundtruth: pd.DataFrame,
 	max_voters: int,
 	n_batch: int,
-	aggregators: Iterable[Aggregator] = (
-		StandardApprovalAggregator(),
-		EuclidAggregator(),
-		JaccardAggregator(),
-		DiceAggregator(),
-		CondorcetAggregator(),
-	),
+	aggregators: Mapping[str, Aggregator] = {
+		"Standard Approval Aggregator": StandardApprovalAggregator(),
+		"Euclidean Mallow Aggregator": EuclidAggregator(),
+		"Jaccard Mallow Aggregator": JaccardAggregator(),
+		"Dice Mallow Aggregator": DiceAggregator(),
+		"Condorcet Aggregator": CondorcetAggregator(),
+	},
 ) -> dict[str, NDArray]:
 	accuracy = {
-		str(aggregator): np.zeros([n_batch, max_voters - 1])
-		for aggregator in aggregators
+		aggregator: np.zeros([n_batch, max_voters - 1]) for aggregator in aggregators
 	}
 	confidence_intervals = {
-		str(aggregator): np.zeros([max_voters - 1, 3]) for aggregator in aggregators
+		aggregator: np.zeros([max_voters - 1, 3]) for aggregator in aggregators
 	}
 
 	logging.info("Experiment started : running the different aggregators ...")
@@ -60,15 +60,15 @@ def compare_methods(
 				annotations.index.get_level_values(COLUMNS.voter).isin(voters)
 			]
 
-			for aggregator in aggregators:
-				aggregated_labels = aggregator.aggregate(annotations_batch)
-				accuracy[str(aggregator)][batch, num - 1] = accuracy_score(
+			for name, aggregator in aggregators.items():
+				aggregated_labels = aggregator.fit_predict(annotations_batch)
+				accuracy[name][batch, num - 1] = accuracy_score(
 					groundtruth, aggregated_labels
 				)
 
-		for aggregator in aggregators:
-			confidence_intervals[str(aggregator)][num - 1, :] = (
-				get_mean_confidence_interval(accuracy[str(aggregator)][:, num - 1])
+		for name in aggregators:
+			confidence_intervals[name][num - 1, :] = get_mean_confidence_interval(
+				accuracy[name][:, num - 1]
 			)
 
 	logging.info("Experiment completed, gathering the results ..")
diff --git a/tests/core/aggregation/test_base.py b/tests/core/aggregation/test_base.py
index 94a24dd..0d183dc 100644
--- a/tests/core/aggregation/test_base.py
+++ b/tests/core/aggregation/test_base.py
@@ -1,13 +1,16 @@
+from unittest.mock import patch
+
 import pandas as pd
 import pytest
 
+from hakeem.core.aggregation.base import Aggregator
 from hakeem.core.utils.inventory import COLUMNS
 
 
 @pytest.mark.ut
-def test_VoterMixin_handles_empty_input():
+def test_WeightedApprovalMixin_handles_empty_input():
 	# Given
-	from hakeem.core.aggregation.base import VoterMixin
+	from hakeem.core.aggregation.base import WeightedApprovalMixin
 
 	annotations = pd.DataFrame(
 		columns=["a", "b"],
@@ -15,7 +18,9 @@ def test_VoterMixin_handles_empty_input():
 	)
 
 	# Then
-	result = VoterMixin._get_aggregated_labels(annotations)
+	result = WeightedApprovalMixin._get_aggregated_labels(
+		annotations, task_column=COLUMNS.question
+	)
 
 	# Then
 	assert result.empty
@@ -52,10 +57,115 @@ def test_Voter_Mixin_get_aggregated_labels_handles_one_question(
 	weighted_votes: pd.DataFrame, expected_result: pd.DataFrame
 ) -> None:
 	# Given
-	from hakeem.core.aggregation.base import VoterMixin
+	from hakeem.core.aggregation.base import WeightedApprovalMixin
+
+	# When
+	result = WeightedApprovalMixin._get_aggregated_labels(
+		weighted_votes, task_column=COLUMNS.question
+	)
+
+	# Then
+	pd.testing.assert_frame_equal(expected_result, result)
 
+
+@pytest.mark.ut
+@pytest.mark.parametrize(
+	["annotations", "task_column", "worker_column", "expected_result"],
+	[
+		(
+			pd.DataFrame(
+				{
+					"task": ["q1", "q1"],
+					"worker": ["v1", "v2"],
+					"a": [1, 0],
+					"b": [0, 1],
+				}
+			).set_index(["task", "worker"]),
+			"task",
+			"worker",
+			pd.DataFrame(
+				{
+					"task": ["q1", "q1"],
+					"worker": ["v1", "v2"],
+					"a": [1, 0],
+					"b": [0, 1],
+				}
+			).set_index(["task", "worker"]),
+		),
+		(
+			pd.DataFrame(
+				{
+					"question": ["q1", "q1"],
+					"voter": ["v1", "v2"],
+					"a": [1, 0],
+					"b": [0, 1],
+				}
+			),
+			"question",
+			"voter",
+			pd.DataFrame(
+				{
+					"question": ["q1", "q1"],
+					"voter": ["v1", "v2"],
+					"a": [1, 0],
+					"b": [0, 1],
+				}
+			).set_index(["question", "voter"]),
+		),
+		(
+			pd.DataFrame(
+				{
+					"question": ["q1", "q1"],
+					"voter": ["v1", "v2"],
+					"a": [1, 0],
+					"b": [0, 1],
+				}
+			).set_index("question"),
+			"question",
+			"voter",
+			pd.DataFrame(
+				{
+					"question": ["q1", "q1"],
+					"voter": ["v1", "v2"],
+					"a": [1, 0],
+					"b": [0, 1],
+				}
+			).set_index(["question", "voter"]),
+		),
+		(
+			pd.DataFrame(
+				{
+					"extra_index_level": ["l1", "l2"],
+					"question": ["q1", "q1"],
+					"voter": ["v1", "v2"],
+					"a": [1, 0],
+					"b": [0, 1],
+				}
+			).set_index(["extra_index_level", "question"]),
+			"question",
+			"voter",
+			pd.DataFrame(
+				{
+					"question": ["q1", "q1"],
+					"voter": ["v1", "v2"],
+					"a": [1, 0],
+					"b": [0, 1],
+				}
+			).set_index(["question", "voter"]),
+		),
+	],
+)
+@patch.multiple(Aggregator, __abstractmethods__=set())
+def test_Aggregator__coerce_annotations(
+	annotations: pd.DataFrame,
+	task_column: str,
+	worker_column: str,
+	expected_result: pd.DataFrame,
+):
 	# When
-	result = VoterMixin._get_aggregated_labels(weighted_votes)
+	result = Aggregator(
+		task_column=task_column, worker_column=worker_column
+	)._coerce_annotations(annotations)
 
 	# Then
 	pd.testing.assert_frame_equal(expected_result, result)