Skip to content

Commit

Permalink
add scope_reducing cache
Browse files Browse the repository at this point in the history
  • Loading branch information
EdenWuyifan committed Jun 17, 2024
1 parent 9b0efe6 commit 8ecd044
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 2 deletions.
35 changes: 34 additions & 1 deletion bdikit/mapping_recommendation/scope_reducing_manager.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
import hashlib
import json
import os

import pandas as pd
from bdikit.download import BDIKIT_CACHE_DIR
from bdikit.mapping_algorithms.scope_reducing.algorithms import YurongReducer
from bdikit.visualization.scope_reducing import SRHeatMapManager

Expand All @@ -8,12 +14,39 @@ def __init__(self, dataset, target_domain):
self.target_domain = target_domain
self.best_method = YurongReducer()
self.visualization_manager = None
self.df_checksum = self._get_data_checksum()

def reduce(self):
reducings = self.best_method.reduce_scope(self.dataset)
if self._load_cached_results() is not None:
reducings = self._load_cached_results()
else:
reducings = self.best_method.reduce_scope(self.dataset)
self._cache_results(reducings)
self.visualization_manager = SRHeatMapManager(self.dataset, reducings)
return reducings

def get_heatmap(self):
self.visualization_manager.get_heatmap()
return self.visualization_manager.plot_heatmap()

def _get_data_checksum(self):
return hashlib.sha1(pd.util.hash_pandas_object(self.dataset).values).hexdigest()

def _cache_results(self, reducings):
cache_path = os.path.join(
BDIKIT_CACHE_DIR,
f"reducings_{self.best_method.__class__.__name__}_{self.df_checksum}.json",
)
if not os.path.exists(cache_path):
with open(cache_path, "w") as f:
json.dump(reducings, f)

def _load_cached_results(self):
cache_path = os.path.join(
BDIKIT_CACHE_DIR,
f"reducings_{self.best_method.__class__.__name__}_{self.df_checksum}.json",
)
if os.path.exists(cache_path):
with open(cache_path) as f:
return json.load(f)
return None
2 changes: 1 addition & 1 deletion bdikit/visualization/scope_reducing.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ def _plot_heatmap_base(self, heatmap_rec_list, show_subschema):
tooltip=[
alt.Tooltip("Column", title="Column"),
alt.Tooltip("Recommendation", title="Recommendation"),
alt.Tooltip("Value", title="Correlation Score"),
alt.Tooltip("Value", title="Similarity"),
alt.Tooltip("Description", title="Description"),
alt.Tooltip("Values (sample)", title="Values (sample)"),
],
Expand Down

0 comments on commit 8ecd044

Please sign in to comment.