From d695cc9431eea83502dd41f2c7516f4131ddf3b0 Mon Sep 17 00:00:00 2001 From: nfelnlp Date: Sat, 3 Jul 2021 03:42:54 +0200 Subject: [PATCH] Relaxed dataset script path for pip package. --- README.md | 6 +++++- src/thermostat/data/dataset_utils.py | 6 +++++- src/thermostat/dataset.py | 2 +- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 59cd3da..c74dfaf 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ __Thermostat: A Large Collection of NLP Model Explanations and Analysis Tools.__ ### With pip (coming soon!) ```bash -pip install thermostat +pip install thermostat-datasets ``` @@ -246,6 +246,10 @@ If you're successful, follow the official instructions for [sharing a community At first, all Thermostat contributions will have to be loaded via the code example above. Please notify us of existing explanation datasets by creating an [Issue](https://github.com/DFKI-NLP/thermostat/issues) with the tag [Contribution](https://github.com/DFKI-NLP/thermostat/labels/contribution) and a maintainer of this repository will add your dataset to the Thermostat configs s.t. it can be accessed by everyone via `thermostat.load()`. +## Disclaimer +We give no warranties for the correctness of the heatmaps or any other part of the data. This is evolving work and will be hot-patched continuously. + + ## Acknowledgements The majority of the codebase, especially regarding the combination of transformers and captum, stems from our other recent project [Empirical Explainers](https://github.com/DFKI-NLP/emp-exp). diff --git a/src/thermostat/data/dataset_utils.py b/src/thermostat/data/dataset_utils.py index e141819..20c6244 100644 --- a/src/thermostat/data/dataset_utils.py +++ b/src/thermostat/data/dataset_utils.py @@ -1,4 +1,5 @@ import numpy as np +import os from datasets import Dataset, load_dataset from itertools import groupby from overrides import overrides @@ -344,7 +345,10 @@ def load(config_str: str = None, **kwargs) -> Thermopack: print(f'Loading Thermostat configuration: {config_str}') if ld_kwargs: print(f'Additional parameters for loading: {ld_kwargs}') - data = load_dataset(path="src/thermostat/dataset.py", name=config_str, split="test", **ld_kwargs) + dataset_script_path = os.path.dirname(os.path.realpath(__file__)).replace('/thermostat/data', + '/thermostat/dataset.py') + data = load_dataset(path=dataset_script_path, + name=config_str, split="test", **ld_kwargs) return Thermopack(data) diff --git a/src/thermostat/dataset.py b/src/thermostat/dataset.py index 1e32e57..4ed28a8 100644 --- a/src/thermostat/dataset.py +++ b/src/thermostat/dataset.py @@ -31,7 +31,7 @@ _HOMEPAGE = 'https://github.com/DFKI-NLP/thermostat' # TODO: Add the licence for the dataset here if you can find it -_LICENSE = '' +_LICENSE = 'Apache 2.0' class Thermostat(datasets.GeneratorBasedBuilder):