From d275f4be00303a85a533ac1777430bbe261acddd Mon Sep 17 00:00:00 2001 From: taharallouche Date: Sat, 26 Oct 2024 11:55:12 +0200 Subject: [PATCH] test: aggregators --- .../core/aggregation/aggregators/mallows.py | 31 ++++----- .../aggregators.py/test_mallows.py | 67 +++++++++++++++++++ 2 files changed, 83 insertions(+), 15 deletions(-) create mode 100644 tests/core/aggregation/aggregators.py/test_mallows.py diff --git a/hakeem/core/aggregation/aggregators/mallows.py b/hakeem/core/aggregation/aggregators/mallows.py index 0e03d18..64b2c8d 100644 --- a/hakeem/core/aggregation/aggregators/mallows.py +++ b/hakeem/core/aggregation/aggregators/mallows.py @@ -5,27 +5,28 @@ class StandardApprovalAggregator(WeightedAggregator): - @staticmethod - def compute_weights(annotations: pd.DataFrame) -> pd.Series: - return pd.Series(1, index=annotations.index) + @staticmethod + def compute_weights(annotations: pd.DataFrame) -> pd.Series: + return pd.Series(1, index=annotations.index) class EuclidAggregator(WeightedAggregator): - @staticmethod - def compute_weights(annotations: pd.DataFrame) -> pd.Series: - vote_size = annotations.sum(axis=1) - return np.sqrt(vote_size + 1) - np.sqrt(vote_size - 1) + @staticmethod + def compute_weights(annotations: pd.DataFrame) -> pd.Series: + vote_size = annotations.sum(axis=1) + return np.sqrt(vote_size + 1) - np.sqrt(vote_size - 1) class JaccardAggregator(WeightedAggregator): - @staticmethod - def compute_weights(annotations: pd.DataFrame) -> pd.Series: - vote_size = annotations.sum(axis=1) - return 1 / vote_size + @staticmethod + def compute_weights(annotations: pd.DataFrame) -> pd.Series: + vote_size = annotations.sum(axis=1) + assert np.all(vote_size > 0), "Jaccard weights are not defined for empty votes" + return 1 / vote_size class DiceAggregator(WeightedAggregator): - @staticmethod - def compute_weights(annotations: pd.DataFrame) -> pd.Series: - vote_size = annotations.sum(axis=1) - return 2 / (vote_size + 1) + @staticmethod + def compute_weights(annotations: pd.DataFrame) -> pd.Series: + vote_size = annotations.sum(axis=1) + return 2 / (vote_size + 1) diff --git a/tests/core/aggregation/aggregators.py/test_mallows.py b/tests/core/aggregation/aggregators.py/test_mallows.py new file mode 100644 index 0000000..b3f677a --- /dev/null +++ b/tests/core/aggregation/aggregators.py/test_mallows.py @@ -0,0 +1,67 @@ +import pandas as pd +import pytest + + +@pytest.mark.ut +@pytest.mark.parametrize( + ["annotations", "expected_result"], + [ + ( + pd.DataFrame( + {"task": ["q1", "q1"], "worker": ["v1", "v2"], "a": [1, 0], "b": [0, 1]} + ).set_index(["task", "worker"]), + pd.Series( + [1, 1], + index=pd.MultiIndex.from_tuples( + [("q1", "v1"), ("q1", "v2")], names=["task", "worker"] + ), + ), + ), + ], +) +def test_StandardApprovalAggregator_compute_weights( + annotations: pd.DataFrame, expected_result: pd.Series +) -> None: + # Given + from hakeem.core.aggregation.aggregators.mallows import StandardApprovalAggregator + + # When + result = StandardApprovalAggregator().compute_weights(annotations) + + # Then + pd.testing.assert_series_equal(expected_result, result) + + +@pytest.mark.ut +@pytest.mark.parametrize( + ["annotations", "expected_result"], + [ + ( + pd.DataFrame( + { + "task": ["q1", "q1", "q2"], + "worker": ["v1", "v2", "v1"], + "a": [1, 1, 1], + "b": [0, 1, 1], + } + ).set_index(["task", "worker"]), + pd.Series( + [1, 0.5, 0.5], + index=pd.MultiIndex.from_tuples( + [("q1", "v1"), ("q1", "v2"), ("q2", "v1")], names=["task", "worker"] + ), + ), + ), + ], +) +def test_JaccardAggregator_compute_weights( + annotations: pd.DataFrame, expected_result: pd.Series +) -> None: + # Given + from hakeem.core.aggregation.aggregators.mallows import JaccardAggregator + + # When + result = JaccardAggregator().compute_weights(annotations) + + # Then + pd.testing.assert_series_equal(expected_result, result)