Skip to content

Commit

Permalink
test(api): Add end-to-end API integration test
Browse files Browse the repository at this point in the history
  • Loading branch information
aecio committed Jun 21, 2024
1 parent 838f7e6 commit 1e71758
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 7 deletions.
9 changes: 5 additions & 4 deletions bdikit/mapping_algorithms/value_mapping/value_mappers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pandas as pd
from typing import Callable
from typing import Any, Callable
from collections import defaultdict


class ValueMapper:
Expand Down Expand Up @@ -52,12 +53,12 @@ class DictionaryMapper(ValueMapper):
values stored in the provided dictionary.
"""

def __init__(self, dictionary: dict):
self.dictionary = dictionary
def __init__(self, dictionary: dict, missing_data_value: Any = None):
self.dictionary = defaultdict(lambda: missing_data_value, dictionary)

def map(self, input_column: pd.Series) -> pd.Series:
"""
Transforms the values in the input_column to the values specified in
the dictionary provided using the object constructor.
"""
return input_column.map(self.dictionary)
return input_column.map(self.dictionary, na_action="ignore")
40 changes: 37 additions & 3 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,40 @@ def test_value_mapping_dataframe():
assert len(src_column_mapping["matches"]) == 3


# TODO
# def test_preview_value_mappings():
# pass
def test_end_to_end_api_integration():
# given
df_source = pd.DataFrame(
{"src_column": ["Red Apple", "Banana", "Oorange", "Strawberry"]}
)
df_target = pd.DataFrame(
{"tgt_column": ["apple", "banana", "orange", "kiwi", "grapes"]}
)

# when
column_mappings = bdi.match_columns(df_source, df_target, method="coma")
# then
assert column_mappings is not None
assert column_mappings.empty == False
assert "source" in column_mappings.columns
assert "target" in column_mappings.columns

# when
value_mappings = bdi.match_values(
df_source, df_target, column_mappings, method="tfidf"
)

assert value_mappings is not None
assert "src_column" in value_mappings
assert value_mappings["src_column"]["matches"] is not None
assert value_mappings["src_column"]["target"] == "tgt_column"

src_column_mapping = value_mappings["src_column"]
assert len(src_column_mapping["matches"]) == 3
assert len(src_column_mapping["matches"]) == 3

# when
harmonization_spec = bdi.update_mappings(value_mappings, [])
df_mapped = bdi.materialize_mapping(df_source, harmonization_spec)

assert "tgt_column" in df_mapped.columns
assert df_mapped["tgt_column"].tolist() == ["apple", "banana", "orange", None]

0 comments on commit 1e71758

Please sign in to comment.