From 8fa4e51a75cf3326a500eddf633a0927a19f8bce Mon Sep 17 00:00:00 2001 From: taharallouche Date: Sat, 26 Oct 2024 11:31:42 +0200 Subject: [PATCH] feat: interface --- hakeem/core/aggregation/aggregators.py | 64 ---------- .../core/aggregation/aggregators/__init__.py | 0 .../core/aggregation/aggregators/condorcet.py | 30 +++++ .../core/aggregation/aggregators/mallows.py | 31 +++++ hakeem/core/aggregation/base.py | 60 ++++++--- hakeem/paper_results/evaluation/accuracy.py | 36 +++--- tests/core/aggregation/test_base.py | 120 +++++++++++++++++- 7 files changed, 239 insertions(+), 102 deletions(-) delete mode 100644 hakeem/core/aggregation/aggregators.py create mode 100644 hakeem/core/aggregation/aggregators/__init__.py create mode 100644 hakeem/core/aggregation/aggregators/condorcet.py create mode 100644 hakeem/core/aggregation/aggregators/mallows.py diff --git a/hakeem/core/aggregation/aggregators.py b/hakeem/core/aggregation/aggregators.py deleted file mode 100644 index 2b89d93..0000000 --- a/hakeem/core/aggregation/aggregators.py +++ /dev/null @@ -1,64 +0,0 @@ -import numpy as np -import pandas as pd - -from hakeem.core.aggregation.base import WeightedAggregator -from hakeem.core.utils.inventory import DEFAULT_RELIABILITY_BOUNDS - - -class CondorcetAggregator(WeightedAggregator): - _name: str = "Condorcet Aggregator" - - def __init__( - self, - lower_reliability_bound: float = DEFAULT_RELIABILITY_BOUNDS.lower, - upper_reliability_bound: float = DEFAULT_RELIABILITY_BOUNDS.upper, - ): - self.lower_reliability_bound = lower_reliability_bound - self.upper_reliability_bound = upper_reliability_bound - - def _compute_weights(self, annotations: pd.DataFrame) -> pd.Series: - vote_size = annotations.sum(axis=1) - reliabilities = (len(annotations.columns) - vote_size - 1) / ( - len(annotations.columns) - 2 - ) - reliabilities = reliabilities.clip( - self.lower_reliability_bound, self.upper_reliability_bound - ) - weights = np.log(reliabilities / (1 - reliabilities)) - - return weights - - -class StandardApprovalAggregator(WeightedAggregator): - _name: str = "Standard Approval Aggregator" - - @staticmethod - def _compute_weights(annotations: pd.DataFrame) -> pd.Series: - return pd.Series(1, index=annotations.index) - - -class EuclidAggregator(WeightedAggregator): - _name: str = "Euclidean Mallow Aggregator" - - @staticmethod - def _compute_weights(annotations: pd.DataFrame) -> pd.Series: - vote_size = annotations.sum(axis=1) - return np.sqrt(vote_size + 1) - np.sqrt(vote_size - 1) - - -class JaccardAggregator(WeightedAggregator): - _name: str = "Jaccard Mallow Aggregator" - - @staticmethod - def _compute_weights(annotations: pd.DataFrame) -> pd.Series: - vote_size = annotations.sum(axis=1) - return 1 / vote_size - - -class DiceAggregator(WeightedAggregator): - _name: str = "Dice Mallow Aggregator" - - @staticmethod - def _compute_weights(annotations: pd.DataFrame) -> pd.Series: - vote_size = annotations.sum(axis=1) - return 2 / (vote_size + 1) diff --git a/hakeem/core/aggregation/aggregators/__init__.py b/hakeem/core/aggregation/aggregators/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/hakeem/core/aggregation/aggregators/condorcet.py b/hakeem/core/aggregation/aggregators/condorcet.py new file mode 100644 index 0000000..40481e1 --- /dev/null +++ b/hakeem/core/aggregation/aggregators/condorcet.py @@ -0,0 +1,30 @@ +import numpy as np +import pandas as pd + +from hakeem.core.aggregation.base import WeightedAggregator +from hakeem.core.utils.inventory import COLUMNS, DEFAULT_RELIABILITY_BOUNDS + + +class CondorcetAggregator(WeightedAggregator): + def __init__( + self, + lower_reliability_bound: float = DEFAULT_RELIABILITY_BOUNDS.lower, + upper_reliability_bound: float = DEFAULT_RELIABILITY_BOUNDS.upper, + task_column: str = COLUMNS.question, + worker_column: str = COLUMNS.voter, + ): + super().__init__(task_column, worker_column) + self.lower_reliability_bound = lower_reliability_bound + self.upper_reliability_bound = upper_reliability_bound + + def compute_weights(self, annotations: pd.DataFrame) -> pd.Series: + vote_size = annotations.sum(axis=1) + reliabilities = (len(annotations.columns) - vote_size - 1) / ( + len(annotations.columns) - 2 + ) + reliabilities = reliabilities.clip( + self.lower_reliability_bound, self.upper_reliability_bound + ) + weights = np.log(reliabilities / (1 - reliabilities)) + + return weights diff --git a/hakeem/core/aggregation/aggregators/mallows.py b/hakeem/core/aggregation/aggregators/mallows.py new file mode 100644 index 0000000..0e03d18 --- /dev/null +++ b/hakeem/core/aggregation/aggregators/mallows.py @@ -0,0 +1,31 @@ +import numpy as np +import pandas as pd + +from hakeem.core.aggregation.base import WeightedAggregator + + +class StandardApprovalAggregator(WeightedAggregator): + @staticmethod + def compute_weights(annotations: pd.DataFrame) -> pd.Series: + return pd.Series(1, index=annotations.index) + + +class EuclidAggregator(WeightedAggregator): + @staticmethod + def compute_weights(annotations: pd.DataFrame) -> pd.Series: + vote_size = annotations.sum(axis=1) + return np.sqrt(vote_size + 1) - np.sqrt(vote_size - 1) + + +class JaccardAggregator(WeightedAggregator): + @staticmethod + def compute_weights(annotations: pd.DataFrame) -> pd.Series: + vote_size = annotations.sum(axis=1) + return 1 / vote_size + + +class DiceAggregator(WeightedAggregator): + @staticmethod + def compute_weights(annotations: pd.DataFrame) -> pd.Series: + vote_size = annotations.sum(axis=1) + return 2 / (vote_size + 1) diff --git a/hakeem/core/aggregation/base.py b/hakeem/core/aggregation/base.py index 165df38..694982e 100644 --- a/hakeem/core/aggregation/base.py +++ b/hakeem/core/aggregation/base.py @@ -6,25 +6,55 @@ class Aggregator(ABC): - _name: str + def __init__( + self, task_column: str = COLUMNS.question, worker_column: str = COLUMNS.voter + ) -> None: + self.task_column = task_column + self.worker_column = worker_column + + def fit_predict(self, annotations: pd.DataFrame) -> pd.DataFrame: + annotations = self._coerce_annotations(annotations) + return self._aggregate(annotations) @abstractmethod - def aggregate(self, annotations: pd.DataFrame) -> pd.DataFrame: + def _aggregate(self, annotations: pd.DataFrame) -> pd.DataFrame: raise NotImplementedError - def __str__(self) -> str: - return self._name + def _coerce_annotations(self, annotations: pd.DataFrame) -> pd.DataFrame: + all_columns = annotations.reset_index().columns + required = [self.task_column, self.worker_column] + if missing := set(required) - set(all_columns): + raise ValueError( + f"Annotations should have {self.task_column} and" + f" {self.worker_column} as columns or index levels, missing {missing}." + ) -class VoterMixin: - @staticmethod - def _get_aggregated_labels(votes: pd.DataFrame) -> pd.DataFrame: - scores = votes.groupby(COLUMNS.question, sort=False)[votes.columns].sum() + if set(all_columns) == set(required): + raise ValueError("Annotations should have at least one label column") + + annotations = annotations.reset_index().set_index(required)[ + [column for column in annotations.columns if column not in required] + ] + + return annotations - scores = scores.reindex(votes.index.get_level_values(COLUMNS.question).unique()) + +class WeightedApprovalMixin: + @staticmethod + def _get_aggregated_labels( + weighted_answers: pd.DataFrame, task_column: str + ) -> pd.DataFrame: + scores = weighted_answers.groupby(task_column, sort=False)[ + weighted_answers.columns + ].sum() + + scores = scores.reindex( + weighted_answers.index.get_level_values(task_column).unique() + ) winning_alternatives = scores.idxmax(axis=1).astype( - pd.CategoricalDtype(categories=votes.columns) + pd.CategoricalDtype(categories=weighted_answers.columns) ) aggregated_labels = pd.get_dummies(winning_alternatives) @@ -32,14 +62,14 @@ def _get_aggregated_labels(votes: pd.DataFrame) -> pd.DataFrame: return aggregated_labels -class WeightedAggregator(Aggregator, VoterMixin): +class WeightedAggregator(Aggregator, WeightedApprovalMixin): @abstractmethod - def _compute_weights(self, annotations: pd.DataFrame) -> pd.Series: + def compute_weights(self, annotations: pd.DataFrame) -> pd.Series: raise NotImplementedError - def aggregate(self, annotations: pd.DataFrame) -> pd.DataFrame: - weights = self._compute_weights(annotations) + def _aggregate(self, annotations: pd.DataFrame) -> pd.DataFrame: + weights = self.compute_weights(annotations) weighted_answers = annotations.multiply(weights, axis="index") - return self._get_aggregated_labels(weighted_answers) + return self._get_aggregated_labels(weighted_answers, self.task_column) diff --git a/hakeem/paper_results/evaluation/accuracy.py b/hakeem/paper_results/evaluation/accuracy.py index 2117092..7ea1767 100644 --- a/hakeem/paper_results/evaluation/accuracy.py +++ b/hakeem/paper_results/evaluation/accuracy.py @@ -1,5 +1,4 @@ import logging -from collections.abc import Iterable from random import sample from typing import Mapping @@ -10,8 +9,10 @@ from sklearn.metrics import accuracy_score from tqdm import tqdm -from hakeem.core.aggregation.aggregators import ( +from hakeem.core.aggregation.aggregators.condorcet import ( CondorcetAggregator, +) +from hakeem.core.aggregation.aggregators.mallows import ( DiceAggregator, EuclidAggregator, JaccardAggregator, @@ -31,20 +32,19 @@ def compare_methods( groundtruth: pd.DataFrame, max_voters: int, n_batch: int, - aggregators: Iterable[Aggregator] = ( - StandardApprovalAggregator(), - EuclidAggregator(), - JaccardAggregator(), - DiceAggregator(), - CondorcetAggregator(), - ), + aggregators: Mapping[str, Aggregator] = { + "Standard Approval Aggregator": StandardApprovalAggregator(), + "Euclidean Mallow Aggregator": EuclidAggregator(), + "Jaccard Mallow Aggregator": JaccardAggregator(), + "Dice Mallow Aggregator": DiceAggregator(), + "Condorcet Aggregator": CondorcetAggregator(), + }, ) -> dict[str, NDArray]: accuracy = { - str(aggregator): np.zeros([n_batch, max_voters - 1]) - for aggregator in aggregators + aggregator: np.zeros([n_batch, max_voters - 1]) for aggregator in aggregators } confidence_intervals = { - str(aggregator): np.zeros([max_voters - 1, 3]) for aggregator in aggregators + aggregator: np.zeros([max_voters - 1, 3]) for aggregator in aggregators } logging.info("Experiment started : running the different aggregators ...") @@ -60,15 +60,15 @@ def compare_methods( annotations.index.get_level_values(COLUMNS.voter).isin(voters) ] - for aggregator in aggregators: - aggregated_labels = aggregator.aggregate(annotations_batch) - accuracy[str(aggregator)][batch, num - 1] = accuracy_score( + for name, aggregator in aggregators.items(): + aggregated_labels = aggregator.fit_predict(annotations_batch) + accuracy[name][batch, num - 1] = accuracy_score( groundtruth, aggregated_labels ) - for aggregator in aggregators: - confidence_intervals[str(aggregator)][num - 1, :] = ( - get_mean_confidence_interval(accuracy[str(aggregator)][:, num - 1]) + for name in aggregators: + confidence_intervals[name][num - 1, :] = get_mean_confidence_interval( + accuracy[name][:, num - 1] ) logging.info("Experiment completed, gathering the results ..") diff --git a/tests/core/aggregation/test_base.py b/tests/core/aggregation/test_base.py index 94a24dd..0d183dc 100644 --- a/tests/core/aggregation/test_base.py +++ b/tests/core/aggregation/test_base.py @@ -1,13 +1,16 @@ +from unittest.mock import patch + import pandas as pd import pytest +from hakeem.core.aggregation.base import Aggregator from hakeem.core.utils.inventory import COLUMNS @pytest.mark.ut -def test_VoterMixin_handles_empty_input(): +def test_WeightedApprovalMixin_handles_empty_input(): # Given - from hakeem.core.aggregation.base import VoterMixin + from hakeem.core.aggregation.base import WeightedApprovalMixin annotations = pd.DataFrame( columns=["a", "b"], @@ -15,7 +18,9 @@ def test_VoterMixin_handles_empty_input(): ) # Then - result = VoterMixin._get_aggregated_labels(annotations) + result = WeightedApprovalMixin._get_aggregated_labels( + annotations, task_column=COLUMNS.question + ) # Then assert result.empty @@ -52,10 +57,115 @@ def test_Voter_Mixin_get_aggregated_labels_handles_one_question( weighted_votes: pd.DataFrame, expected_result: pd.DataFrame ) -> None: # Given - from hakeem.core.aggregation.base import VoterMixin + from hakeem.core.aggregation.base import WeightedApprovalMixin + + # When + result = WeightedApprovalMixin._get_aggregated_labels( + weighted_votes, task_column=COLUMNS.question + ) + + # Then + pd.testing.assert_frame_equal(expected_result, result) + +@pytest.mark.ut +@pytest.mark.parametrize( + ["annotations", "task_column", "worker_column", "expected_result"], + [ + ( + pd.DataFrame( + { + "task": ["q1", "q1"], + "worker": ["v1", "v2"], + "a": [1, 0], + "b": [0, 1], + } + ).set_index(["task", "worker"]), + "task", + "worker", + pd.DataFrame( + { + "task": ["q1", "q1"], + "worker": ["v1", "v2"], + "a": [1, 0], + "b": [0, 1], + } + ).set_index(["task", "worker"]), + ), + ( + pd.DataFrame( + { + "question": ["q1", "q1"], + "voter": ["v1", "v2"], + "a": [1, 0], + "b": [0, 1], + } + ), + "question", + "voter", + pd.DataFrame( + { + "question": ["q1", "q1"], + "voter": ["v1", "v2"], + "a": [1, 0], + "b": [0, 1], + } + ).set_index(["question", "voter"]), + ), + ( + pd.DataFrame( + { + "question": ["q1", "q1"], + "voter": ["v1", "v2"], + "a": [1, 0], + "b": [0, 1], + } + ).set_index("question"), + "question", + "voter", + pd.DataFrame( + { + "question": ["q1", "q1"], + "voter": ["v1", "v2"], + "a": [1, 0], + "b": [0, 1], + } + ).set_index(["question", "voter"]), + ), + ( + pd.DataFrame( + { + "extra_index_level": ["l1", "l2"], + "question": ["q1", "q1"], + "voter": ["v1", "v2"], + "a": [1, 0], + "b": [0, 1], + } + ).set_index(["extra_index_level", "question"]), + "question", + "voter", + pd.DataFrame( + { + "question": ["q1", "q1"], + "voter": ["v1", "v2"], + "a": [1, 0], + "b": [0, 1], + } + ).set_index(["question", "voter"]), + ), + ], +) +@patch.multiple(Aggregator, __abstractmethods__=set()) +def test_Aggregator__coerce_annotations( + annotations: pd.DataFrame, + task_column: str, + worker_column: str, + expected_result: pd.DataFrame, +): # When - result = VoterMixin._get_aggregated_labels(weighted_votes) + result = Aggregator( + task_column=task_column, worker_column=worker_column + )._coerce_annotations(annotations) # Then pd.testing.assert_frame_equal(expected_result, result)