Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: split inventory #64

Merged
merged 1 commit into from
Oct 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 0 additions & 55 deletions hakeem/core/utils/inventory.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,66 +9,11 @@ class _DEFAULT_RELIABILITY_BOUNDS:

DEFAULT_RELIABILITY_BOUNDS = _DEFAULT_RELIABILITY_BOUNDS()

"""
The rest of the file contains utilities specific for the paper results reproduction.
"""


@dataclass(frozen=True)
class Dataset:
path: str
alternatives: list
nbr_questions: int
nbr_voters: int


@dataclass(frozen=True)
class _COLUMNS:
interface: str = "Interface"
mechanism: str = "Mechanism"
question: str = "Question"
true_answer: str = "TrueAnswer"
answer: str = "Answer"
comments: str = "Comments"
voter: str = "Voter"
weight: str = "Weight"


COLUMNS = _COLUMNS()


ANIMALS_DATASET = Dataset(
"data/animals/raw.csv",
["Leopard", "Tiger", "Puma", "Jaguar", "Lion(ess)", "Cheetah"],
16,
110,
)

TEXTURES_DATASET = Dataset(
"data/textures/raw.csv",
["Gravel", "Grass", "Brick", "Wood", "Sand", "Cloth"],
16,
96,
)

LANGUAGE_DATASET = Dataset(
"data/languages/raw.csv",
[
"Hebrew",
"Russian",
"Japanese",
"Thai",
"Chinese",
"Tamil",
"Latin",
"Hindi",
],
25,
109,
)

DATASETS = {
"animals": ANIMALS_DATASET,
"textures": TEXTURES_DATASET,
"languages": LANGUAGE_DATASET,
}
2 changes: 1 addition & 1 deletion hakeem/paper_results/evaluation/accuracy.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
StandardApprovalAggregator,
)
from hakeem.core.aggregation.base import Aggregator
from hakeem.core.utils.inventory import COLUMNS
from hakeem.core.utils.utils import get_mean_confidence_interval
from hakeem.paper_results.inventory import COLUMNS

logging.basicConfig(
level=logging.INFO, format="'%(asctime)s - %(levelname)s - %(message)s'"
Expand Down
61 changes: 61 additions & 0 deletions hakeem/paper_results/inventory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from dataclasses import dataclass


@dataclass(frozen=True)
class Dataset:
path: str
alternatives: list
nbr_questions: int
nbr_voters: int


@dataclass(frozen=True)
class _COLUMNS:
interface: str = "Interface"
mechanism: str = "Mechanism"
question: str = "Question"
true_answer: str = "TrueAnswer"
answer: str = "Answer"
comments: str = "Comments"
voter: str = "Voter"
weight: str = "Weight"


COLUMNS = _COLUMNS()


ANIMALS_DATASET = Dataset(
"data/animals/raw.csv",
["Leopard", "Tiger", "Puma", "Jaguar", "Lion(ess)", "Cheetah"],
16,
110,
)

TEXTURES_DATASET = Dataset(
"data/textures/raw.csv",
["Gravel", "Grass", "Brick", "Wood", "Sand", "Cloth"],
16,
96,
)

LANGUAGE_DATASET = Dataset(
"data/languages/raw.csv",
[
"Hebrew",
"Russian",
"Japanese",
"Thai",
"Chinese",
"Tamil",
"Latin",
"Hindi",
],
25,
109,
)

DATASETS = {
"animals": ANIMALS_DATASET,
"textures": TEXTURES_DATASET,
"languages": LANGUAGE_DATASET,
}
2 changes: 1 addition & 1 deletion hakeem/paper_results/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@

import pandas as pd

from hakeem.core.utils.inventory import COLUMNS, DATASETS
from hakeem.paper_results.evaluation.accuracy import (
compare_methods,
plot_accuracies,
)
from hakeem.paper_results.inventory import COLUMNS, DATASETS


def _process_dataset() -> None:
Expand Down
2 changes: 1 addition & 1 deletion hakeem/paper_results/parsing/data_preparation.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
import pandas as pd

from hakeem.core.utils.inventory import COLUMNS, Dataset
from hakeem.paper_results.inventory import COLUMNS, Dataset


def _get_column_names(name: str, nbr_questions: int) -> list[str]:
Expand Down
2 changes: 1 addition & 1 deletion tests/paper_results/evaluation/test_accuracy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ def test_compare_methods() -> None:
# Given
import random

from hakeem.core.utils.inventory import COLUMNS, DATASETS
from hakeem.paper_results.evaluation.accuracy import compare_methods
from hakeem.paper_results.inventory import COLUMNS, DATASETS

random.seed(42)
dataset = DATASETS["animals"]
Expand Down
4 changes: 2 additions & 2 deletions tests/paper_results/parsing/test_data_preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
import pandas as pd
import pytest

from hakeem.core.utils.inventory import COLUMNS
from hakeem.paper_results.inventory import COLUMNS


@pytest.mark.e2e
@pytest.mark.parametrize("dataset", ["animals", "languages", "textures"])
def test_prepare_data(dataset) -> None:
# With
from hakeem.core.utils.inventory import DATASETS
from hakeem.paper_results.inventory import DATASETS
from hakeem.paper_results.parsing.data_preparation import prepare_data

dataset = DATASETS[dataset]
Expand Down