diff --git a/.github/workflows/tox-gha.yml b/.github/workflows/tox-gha.yml
index 696456d..6cf0ec4 100644
--- a/.github/workflows/tox-gha.yml
+++ b/.github/workflows/tox-gha.yml
@@ -2,10 +2,12 @@ name: Python package
 
 on:
   push:
+    branches: [main]
   pull_request:
-  # release:
-  #   types: [published]
-
+    branches: [main]
+  release:
+    types: [published]
+    
 jobs:
   build:
     runs-on: ubuntu-latest
diff --git a/acore/decomposition/__init__.py b/acore/decomposition/__init__.py
new file mode 100644
index 0000000..9f86d45
--- /dev/null
+++ b/acore/decomposition/__init__.py
@@ -0,0 +1,3 @@
+from . import pca
+
+__all__ = ["pca"]
diff --git a/acore/decomposition/pca.py b/acore/decomposition/pca.py
index 229661d..0ae8102 100644
--- a/acore/decomposition/pca.py
+++ b/acore/decomposition/pca.py
@@ -26,10 +26,10 @@ def run_pca(
         n_comp_max = min(df_wide.shape)
         n_comp_max = min(n_comp_max, n_components)
     pca = sklearn.decomposition.PCA(n_components=n_comp_max)
-    PCs = pca.fit_transform(df_wide)
+    pcs = pca.fit_transform(df_wide)
     cols = [
         f"principal component {i+1} ({var_explained*100:.2f} %)"
         for i, var_explained in enumerate(pca.explained_variance_ratio_)
     ]
-    PCs = pd.DataFrame(PCs, index=df_wide.index, columns=cols)
-    return PCs, pca
+    pcs = pd.DataFrame(pcs, index=df_wide.index, columns=cols)
+    return pcs, pca
diff --git a/acore/enrichment_analysis.py b/acore/enrichment_analysis.py
index a3f9082..6838d83 100644
--- a/acore/enrichment_analysis.py
+++ b/acore/enrichment_analysis.py
@@ -1,3 +1,12 @@
+"""Enrichment Analysis Module. Contains different functions to perform enrichment 
+analysis.
+
+Most things in this module are covered in https://www.youtube.com/watch?v=2NC1QOXmc5o
+by Lars Juhl Jensen.
+"""
+
+from __future__ import annotations
+
 import os
 import re
 import uuid
@@ -9,17 +18,32 @@
 
 from acore.multiple_testing import apply_pvalue_correction
 
+TYPE_COLS_MSG = """
+columns: 'terms', 'identifiers', 'foreground',
+    'background', foreground_pop, background_pop, 'pvalue', 'padj' and 'rejected'.
+"""
+
+
+def run_fisher(
+    group1: list[int],
+    group2: list[int],
+    alternative: str = "two-sided",
+) -> tuple[float, float]:
+    """Run fisher's exact test on two groups using `scipy.stats.fisher_exact`_.
 
-def run_fisher(group1, group2, alternative="two-sided"):
-    """annotated   not-annotated
-    group1      a               b
-    group2      c               d
-    ------------------------------------
+    .. _scipy.stats.fisher_exact: https://docs.scipy.org/doc/scipy/reference/generated/\
+scipy.stats.fisher_exact.html
+
+    Example::
+
+        # annotated   not-annotated
+        # group1      a               b
+        # group2      c               d
 
-    group1 = [a, b]
-    group2 = [c, d]
 
-    odds, pvalue = stats.fisher_exact([[a, b], [c, d]])
+        odds, pvalue = stats.fisher_exact(group1=[a, b],
+                                          group2 =[c, d]
+                        )
     """
 
     odds, pvalue = stats.fisher_exact([group1, group2], alternative)
@@ -29,15 +53,23 @@ def run_fisher(group1, group2, alternative="two-sided"):
 
 def run_kolmogorov_smirnov(dist1, dist2, alternative="two-sided"):
     """
-    Compute the Kolmogorov-Smirnov statistic on 2 samples. See https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ks_2samp.html
+    Compute the Kolmogorov-Smirnov statistic on 2 samples.
+    See `scipy.stats.ks_2samp`_
+
+    .. _scipy.stats.ks_2samp: https://docs.scipy.org/doc/scipy/reference/generated/\
+scipy.stats.ks_2samp.html
+
 
-    :param list dist1: sequence of 1-D ndarray (first distribution to compare) drawn from a continuous distribution
-    :param list dist2: sequence of 1-D ndarray (second distribution to compare) drawn from a continuous distribution
+    :param list dist1: sequence of 1-D ndarray (first distribution to compare)
+        drawn from a continuous distribution
+    :param list dist2: sequence of 1-D ndarray (second distribution to compare)
+        drawn from a continuous distribution
     :param str alternative: defines the alternative hypothesis (default is ‘two-sided’):
         * **'two-sided'**
         * **'less'**
         * **'greater'**
     :return: statistic float and KS statistic pvalue float Two-tailed p-value.
+
     Example::
 
         result = run_kolmogorov_smirnov(dist1, dist2, alternative='two-sided')
@@ -49,193 +81,289 @@ def run_kolmogorov_smirnov(dist1, dist2, alternative="two-sided"):
     return result
 
 
+# ! undocumented for now (find usage example)
 def run_site_regulation_enrichment(
-    regulation_data,
-    annotation,
-    identifier="identifier",
-    groups=["group1", "group2"],
-    annotation_col="annotation",
-    reject_col="rejected",
-    group_col="group",
-    method="fisher",
-    regex=r"(\w+~.+)_\w\d+\-\w+",
-    correction="fdr_bh",
+    regulation_data: pd.DataFrame,
+    annotation: pd.DataFrame,
+    identifier: str = "identifier",
+    groups: list[str] = ("group1", "group2"),
+    annotation_col: str = "annotation",
+    rejected_col: str = "rejected",
+    group_col: str = "group",
+    method: str = "fisher",
+    regex: str = "(\\w+~.+)_\\w\\d+\\-\\w+",
+    correction: str = "fdr_bh",
+    remove_duplicates: bool = False,
 ):
-    """
-    This function runs a simple enrichment analysis for significantly regulated protein sites in a dataset.
-
-    :param regulation_data: pandas dataframe resulting from differential regulation analysis.
-    :param annotation: pandas dataframe with annotations for features (columns: 'annotation', 'identifier' (feature identifiers), and 'source').
-    :param str identifier: name of the column from annotation containing feature identifiers.
-    :param list groups: column names from regulation_data containing group identifiers.
-    :param str annotation_col: name of the column from annotation containing annotation terms.
-    :param str reject_col: name of the column from regulatio_data containing boolean for rejected null hypothesis.
-    :param str group_col: column name for new column in annotation dataframe determining if feature belongs to foreground or background.
-    :param str method: method used to compute enrichment (only 'fisher' is supported currently).
+    r"""
+    This function runs a simple enrichment analysis for significantly
+    regulated protein sites in a dataset.
+
+    :param regulation_data: pandas.DataFrame resulting from differential
+        regulation analysis.
+    :param annotation: pandas.DataFrame with annotations for features
+        (columns: 'annotation', 'identifier' (feature identifiers), and 'source').
+    :param str identifier: name of the column from annotation containing
+        feature identifiers.
+    :param list groups: column names from regulation_data containing
+        group identifiers.
+    :param str annotation_col: name of the column from annotation containing
+        annotation terms.
+    :param str rejected_col: name of the column from regulation_data containing
+        boolean for rejected null hypothesis.
+    :param str group_col: column name for new column in annotation dataframe
+        determining if feature belongs to foreground or background.
+    :param str method: method used to compute enrichment
+        (only 'fisher' is supported currently).
     :param str regex: how to extract the annotated identifier from the site identifier
-    :return: Pandas dataframe with columns: 'terms', 'identifiers', 'foreground', 'background', 'pvalue', 'padj' and 'rejected'.
+    :return: pandas.DataFrame with columns: 'terms', 'identifiers', 'foreground',
+        'background', foreground_pop, background_pop, 'pvalue', 'padj' and 'rejected'.
+
+    :raises ValueError: if regulation_data is `None` or empty.
 
     Example::
 
-        result = run_site_regulation_enrichment(regulation_data, annotation, identifier='identifier', groups=['group1', 'group2'], annotation_col='annotation', reject_col='rejected', group_col='group', method='fisher', match="(\w+~.+)_\w\d+\-\w+")
+        result = run_site_regulation_enrichment(regulation_data,
+            annotation,
+            identifier='identifier',
+            groups=['group1', 'group2'],
+            annotation_col='annotation',
+            rejected_col='rejected',
+            group_col='group',
+            method='fisher',
+            match="(\\w+~.+)_\\w\\d+\\-\\w+"
+        )
     """
     result = pd.DataFrame()
-    if regulation_data is not None:
-        if not regulation_data.empty:
-            new_ids = []
-            for ident in regulation_data[identifier].tolist():
-                match = re.search(regex, ident)
-                if match is not None:
-                    new_ids.append(match.group(1))
-                else:
-                    new_ids.append(ident)
-            regulation_data[identifier] = new_ids
-            regulation_data = regulation_data.drop_duplicates()
-            result = run_regulation_enrichment(
-                regulation_data,
-                annotation,
-                identifier,
-                groups,
-                annotation_col,
-                reject_col,
-                group_col,
-                method,
-                correction,
-            )
+    if regulation_data is None or regulation_data.empty:
+        raise ValueError("regulation_data is empty")
+
+    new_ids = []
+    # find any identifiers with a PTM and save only prot+gene identifer
+    for ident in regulation_data[identifier].tolist():
+        match = re.search(regex, ident)
+        if match is not None:
+            new_ids.append(
+                match.group(1)
+            )  # removes the PTM extension of the identifier of CKG
+        else:
+            new_ids.append(ident)
+    # so this is normalizing the identifiers to ignore the PTM extension
+    regulation_data[identifier] = new_ids  # matches are used as identifiers
+    if remove_duplicates:
+        regulation_data = regulation_data.drop_duplicates(subset=[identifier])
+    result = run_regulation_enrichment(
+        regulation_data,
+        annotation,
+        identifier,
+        groups,
+        annotation_col,
+        rejected_col,
+        group_col,
+        method,
+        correction,
+    )
 
     return result
 
 
 def run_up_down_regulation_enrichment(
-    regulation_data,
-    annotation,
-    identifier="identifier",
-    groups=["group1", "group2"],
-    annotation_col="annotation",
-    reject_col="rejected",
-    group_col="group",
-    method="fisher",
-    correction="fdr_bh",
-    alpha=0.05,
-    lfc_cutoff=1,
-):
+    regulation_data: pd.DataFrame,
+    annotation: pd.DataFrame,
+    identifier: str = "identifier",
+    groups: list[str] = ("group1", "group2"),
+    annotation_col: str = "annotation",
+    # rejected_col:str="rejected",
+    group_col: str = "group",
+    method: str = "fisher",
+    min_detected_in_set: int = 2,
+    correction: str = "fdr_bh",
+    correction_alpha: float = 0.05,
+    lfc_cutoff: float = 1,
+) -> pd.DataFrame:
     """
-    This function runs a simple enrichment analysis for significantly regulated proteins distinguishing between up- and down-regulated.
+    This function runs a simple enrichment analysis for significantly regulated proteins
+    distinguishing between up- and down-regulated.
 
-    :param regulation_data: pandas dataframe resulting from differential regulation analysis (CKG's regulation table).
-    :param annotation: pandas dataframe with annotations for features (columns: 'annotation', 'identifier' (feature identifiers), and 'source').
+    :param pandas.DataFrame regulation_data: pandas.DataFrame resulting from differential regulation
+        analysis (CKG's regulation table).
+    :param pandas.DataFrame annotation: pandas.DataFrame with annotations for features
+        (columns: 'annotation', 'identifier' (feature identifiers), and 'source').
     :param str identifier: name of the column from annotation containing feature identifiers.
-    :param list groups: column names from regulation_data containing group identifiers.
+    :param list[str] groups: column names from regulation_data containing group identifiers.
+            See `pandas.DataFrame.groupby`_ for more information.
+            
+            .. _pandas.DataFrame.groupby: https://pandas.pydata.org/pandas-docs/stable/\
+reference/api/pandas.DataFrame.groupby.html
     :param str annotation_col: name of the column from annotation containing annotation terms.
-    :param str reject_col: name of the column from regulation_data containing boolean for rejected null hypothesis.
-    :param str group_col: column name for new column in annotation dataframe determining if feature belongs to foreground or background.
-    :param str method: method used to compute enrichment (only 'fisher' is supported currently).
+    :param str rejected_col: name of the column from regulation_data containing boolean for
+        rejected null hypothesis.
+    :param str group_col: column name for new column in annotation dataframe determining
+        if feature belongs to foreground or background.
+    :param str method: method used to compute enrichment
+        (only 'fisher' is supported currently).
     :param str correction: method to be used for multiple-testing correction
     :param float alpha: adjusted p-value cutoff to define significance
     :param float lfc_cutoff: log fold-change cutoff to define practical significance
-    :return: Pandas dataframe with columns: 'terms', 'identifiers', 'foreground', 'background', 'pvalue', 'padj' and 'rejected'.
+    :return: pandas.DataFrame with columns: 'terms', 'identifiers', 'foreground',
+        'background', 'pvalue', 'padj', 'rejected', 'direction' and 'comparison'.
 
     Example::
 
-        result = run_up_down_regulation_enrichment(regulation_data, annotation, identifier='identifier', groups=['group1', 'group2'], annotation_col='annotation', reject_col='rejected', group_col='group', method='fisher', correction='fdr_bh', alpha=0.05, lfc_cutoff=1)
+        result = run_up_down_regulation_enrichment(
+            regulation_data,
+            annotation,
+            identifier='identifier',
+            groups=['group1',
+            'group2'],
+            annotation_col='annotation',
+            rejected_col='rejected',
+            group_col='group',
+            method='fisher',
+            correction='fdr_bh',
+            alpha=0.05,
+            lfc_cutoff=1,
+        )
     """
-    enrichment_results = {}
+    if isinstance(groups, str):
+        groups = [groups]
+    if isinstance(groups, tuple):
+        groups = list(groups)
+    if len(groups) != 2:
+        raise ValueError("groups should contains exactly two columns.")
+
+    ret = list()
+    # In case of multiple comparisons this is used to get all possible combinations
     for g1, g2 in regulation_data.groupby(groups).groups:
+
         df = regulation_data.groupby(groups).get_group((g1, g2))
+
+        padj_name = "padj"
         if "posthoc padj" in df:
-            df["up_pairwise_regulation"] = (df["posthoc padj"] <= alpha) & (
-                df["log2FC"] >= lfc_cutoff
-            )
-            df["down_pairwise_regulation"] = (df["posthoc padj"] <= alpha) & (
-                df["log2FC"] <= -lfc_cutoff
-            )
-        else:
-            df["up_pairwise_regulation"] = (df["padj"] <= alpha) & (
-                df["log2FC"] >= lfc_cutoff
-            )
-            df["down_pairwise_regulation"] = (df["padj"] <= alpha) & (
-                df["log2FC"] <= -lfc_cutoff
-            )
+            padj_name = "posthoc padj"
 
-        enrichment = run_regulation_enrichment(
-            df,
-            annotation,
-            identifier=identifier,
-            groups=groups,
-            annotation_col=annotation_col,
-            reject_col="up_pairwise_regulation",
-            group_col=group_col,
-            method=method,
-            correction=correction,
-        )
-        enrichment["direction"] = "upregulated"
-        enrichment_results[g1 + "~" + g2] = enrichment
-        enrichment = run_regulation_enrichment(
-            df,
-            annotation,
-            identifier=identifier,
-            groups=groups,
-            annotation_col=annotation_col,
-            reject_col="down_pairwise_regulation",
-            group_col=group_col,
-            method=method,
-            correction=correction,
+        df["up_pairwise_regulation"] = (df[padj_name] <= correction_alpha) & (
+            df["log2FC"] >= lfc_cutoff
         )
-        enrichment["direction"] = "downregulated"
-        enrichment_results[g1 + "~" + g2] = enrichment_results[g1 + "~" + g2].append(
-            enrichment
+        df["down_pairwise_regulation"] = (df[padj_name] <= correction_alpha) & (
+            df["log2FC"] <= -lfc_cutoff
         )
+        comparison_tag = g1 + "~" + g2
+
+        for rej_col, direction in zip(
+            ("up_pairwise_regulation", "down_pairwise_regulation"),
+            ("upregulated", "downregulated"),
+        ):
+            _enrichment = run_regulation_enrichment(
+                df,
+                annotation,
+                identifier=identifier,
+                annotation_col=annotation_col,
+                rejected_col=rej_col,
+                group_col=group_col,
+                method=method,
+                min_detected_in_set=min_detected_in_set,
+                correction=correction,
+                correction_alpha=correction_alpha,
+            )
+            _enrichment["direction"] = direction
+            _enrichment["comparison"] = comparison_tag
+            ret.append(_enrichment)
+
+    ret = pd.concat(ret)
+
+    return ret
 
-    return enrichment_results
+
+# ! to move
+def _annotate_features(
+    features: pd.Series,
+    in_foreground: set[str] | list[str],
+    in_background: set[str] | list[str],
+) -> pd.Series:
+    """
+    Annotate features as foreground or background based on their presence in the
+    foreground and background lists.
+
+    :param features: pandas.Series with features and their annotations.
+    :param in_foreground: list of features identifiers in the foreground.
+    :type in_foreground: set or list-like
+    :param in_background: list of features identifiers in the background.
+    :type in_background: set or list-like
+    :return: pandas.Series containing 'foreground' or 'background'.
+             missing values are preserved.
+
+    Example::
+
+        result = _annotate_features(features, in_foreground, in_background)
+    """
+    in_either_or = features.isin(in_foreground) | features.isin(in_background)
+    res = (
+        features.where(in_either_or, np.nan)
+        .mask(features.isin(in_foreground), "foreground")
+        .mask(features.isin(in_background), "background")
+    )
+    return res
 
 
 def run_regulation_enrichment(
-    regulation_data,
-    annotation,
-    identifier="identifier",
-    groups=["group1", "group2"],
-    annotation_col="annotation",
-    reject_col="rejected",
-    group_col="group",
-    method="fisher",
-    correction="fdr_bh",
-):
+    regulation_data: pd.DataFrame,
+    annotation: pd.DataFrame,
+    identifier: str = "identifier",
+    annotation_col: str = "annotation",
+    rejected_col: str = "rejected",
+    group_col: str = "group",
+    method: str = "fisher",
+    min_detected_in_set: int = 2,
+    correction: str = "fdr_bh",
+    correction_alpha: float = 0.05,
+) -> pd.DataFrame:
     """
-    This function runs a simple enrichment analysis for significantly regulated features in a dataset.
+    This function runs a simple enrichment analysis for significantly regulated features
+    in a dataset.
 
-    :param regulation_data: pandas dataframe resulting from differential regulation analysis.
-    :param annotation: pandas dataframe with annotations for features (columns: 'annotation', 'identifier' (feature identifiers), and 'source').
+    :param regulation_data: pandas.DataFrame resulting from differential regulation analysis.
+    :param annotation: pandas.DataFrame with annotations for features
+        (columns: 'annotation', 'identifier' (feature identifiers), and 'source').
     :param str identifier: name of the column from annotation containing feature identifiers.
-    :param list groups: column names from regulation_data containing group identifiers.
+        It should also be present in `regulation_data`.
     :param str annotation_col: name of the column from annotation containing annotation terms.
-    :param str reject_col: name of the column from regulatio_data containing boolean for rejected null hypothesis.
-    :param str group_col: column name for new column in annotation dataframe determining if feature belongs to foreground or background.
+    :param str rejected_col: name of the column from `regulation_data` containing boolean for
+        rejected null hypothesis.
+    :param str group_col: column name for new column in annotation dataframe determining
+        if feature belongs to foreground or background.
     :param str method: method used to compute enrichment (only 'fisher' is supported currently).
     :param str correction: method to be used for multiple-testing correction
-    :return: Pandas dataframe with columns: 'terms', 'identifiers', 'foreground', 'background', 'pvalue', 'padj' and 'rejected'.
+    :return: pandas.DataFrame with columns: 'terms', 'identifiers', 'foreground',
+        'background', 'foreground_pop', 'background_pop', 'pvalue', 'padj' and 'rejected'.
 
     Example::
 
-        result = run_regulation_enrichment(regulation_data, annotation, identifier='identifier', groups=['group1', 'group2'], annotation_col='annotation', reject_col='rejected', group_col='group', method='fisher')
+        result = run_regulation_enrichment(
+            regulation_data,
+            annotation,
+            identifier='identifier',
+            annotation_col='annotation',
+            rejected_col='rejected',
+            group_col='group',
+            method='fisher',
+            min_detected_in_set=2,
+            correction='fdr_bh',
+            correction_alpha=0.05,
+         )
     """
-    result = {}
-    foreground_list = (
-        regulation_data[regulation_data[reject_col]][identifier].unique().tolist()
-    )
-    background_list = (
-        regulation_data[~regulation_data[reject_col]][identifier].unique().tolist()
-    )
+    # ? can we remove NA features in that column?
+    mask_rejected = regulation_data[rejected_col].astype(bool)
+    foreground_list = regulation_data.loc[mask_rejected, identifier].unique()
+    background_list = regulation_data.loc[~mask_rejected, identifier].unique()
     foreground_pop = len(foreground_list)
-    background_pop = len(regulation_data[identifier].unique().tolist())
-    grouping = []
-    for i in annotation[identifier]:
-        if i in foreground_list:
-            grouping.append("foreground")
-        elif i in background_list:
-            grouping.append("background")
-        else:
-            grouping.append(np.nan)
-    annotation[group_col] = grouping
+    background_pop = len(regulation_data[identifier].unique())
+    # needs to allow for missing annotations
+    annotation[group_col] = _annotate_features(
+        features=annotation[identifier],
+        in_foreground=foreground_list,
+        in_background=background_list,
+    )
     annotation = annotation.dropna(subset=[group_col])
 
     result = run_enrichment(
@@ -249,66 +377,92 @@ def run_regulation_enrichment(
         identifier_col=identifier,
         method=method,
         correction=correction,
+        min_detected_in_set=min_detected_in_set,
+        correction_alpha=correction_alpha,
     )
 
     return result
 
 
 def run_enrichment(
-    data,
-    foreground_id,
-    background_id,
-    foreground_pop,
-    background_pop,
-    annotation_col="annotation",
-    group_col="group",
-    identifier_col="identifier",
-    method="fisher",
-    correction="fdr_bh",
-):
+    data: pd.DataFrame,
+    foreground_id: str,
+    background_id: str,
+    foreground_pop: int,
+    background_pop: int,
+    min_detected_in_set: int = 2,
+    annotation_col: str = "annotation",
+    group_col: str = "group",
+    identifier_col: str = "identifier",
+    method: str = "fisher",
+    correction: str = "fdr_bh",
+    correction_alpha: float = 0.05,
+) -> pd.DataFrame:
     """
-    Computes enrichment of the foreground relative to a given backgroung, using Fisher's exact test, and corrects for multiple hypothesis testing.
+    Computes enrichment of the foreground relative to a given backgroung,
+    using Fisher's exact test, and corrects for multiple hypothesis testing.
 
-    :param data: pandas dataframe with annotations for dataset features (columns: 'annotation', 'identifier', 'source', 'group').
+    :param data: pandas.DataFrame with annotations for dataset features
+        (columns: 'annotation', 'identifier', 'group').
     :param str foreground_id: group identifier of features that belong to the foreground.
     :param str background_id: group identifier of features that belong to the background.
+    :param int foreground_pop: number of features in the foreground.
+    :param int background_pop: number of features in the background.
     :param str annotation_col: name of the column containing annotation terms.
     :param str group_col: name of column containing the group identifiers.
     :param str identifier_col: name of column containing dependent variables identifiers.
     :param str method: method used to compute enrichment (only 'fisher' is supported currently).
-    :return: Pandas dataframe with annotation terms, features, number of foregroung/background features in each term, p-values and corrected p-values (columns: 'terms', 'identifiers', 'foreground', 'background', 'pvalue', 'padj' and 'rejected').
+    :param str correction: method to be used for multiple-testing correction.
+    :param float correction_alpha: adjusted p-value cutoff to define significance.
+    :return: pandas.DataFrame with columns: annotation terms, features,
+        number of foregroung/background features in each term,
+        p-values and corrected p-values
+        (columns: 'terms', 'identifiers', 'foreground',
+        'background', 'pvalue', 'padj' and 'rejected').
 
     Example::
 
-        result = run_enrichment(data, foreground='foreground', background='background', foreground_pop=len(foreground_list), background_pop=len(background_list), annotation_col='annotation', group_col='group', identifier_col='identifier', method='fisher')
+        result = run_enrichment(
+            data,
+            foreground='foreground',
+            background='background',
+            foreground_pop=len(foreground_list),
+            background_pop=len(background_list),
+            annotation_col='annotation',
+            group_col='group',
+            identifier_col='identifier',
+            method='fisher',
+         )
     """
+    if method != "fisher":
+        raise ValueError("Only Fisher's exact test is supported at the moment.")
+
     result = pd.DataFrame()
-    df = data.copy()
     terms = []
     ids = []
     pvalues = []
     fnum = []
     bnum = []
     countsdf = (
-        df.groupby([annotation_col, group_col])
+        data.groupby([annotation_col, group_col])
         .agg(["count"])[(identifier_col, "count")]
         .reset_index()
     )
     countsdf.columns = [annotation_col, group_col, "count"]
-    for annotation in (
-        countsdf[countsdf[group_col] == foreground_id][annotation_col].unique().tolist()
-    ):
+    for annotation in countsdf.loc[
+        countsdf[group_col] == foreground_id, annotation_col
+    ].unique():
         counts = countsdf[countsdf[annotation_col] == annotation]
         num_foreground = counts.loc[counts[group_col] == foreground_id, "count"].values
         num_background = counts.loc[counts[group_col] == background_id, "count"].values
-
+        # ! counts should always be of length one count? squeeze?
         if len(num_foreground) == 1:
             num_foreground = num_foreground[0]
         if len(num_background) == 1:
             num_background = num_background[0]
         else:
             num_background = 0
-        if method == "fisher" and num_foreground > 1:
+        if num_foreground >= min_detected_in_set:
             _, pvalue = run_fisher(
                 [num_foreground, foreground_pop - num_foreground],
                 [num_background, background_pop - foreground_pop - num_background],
@@ -319,15 +473,19 @@ def run_enrichment(
             pvalues.append(pvalue)
             ids.append(
                 ",".join(
-                    df.loc[
-                        (df[annotation_col] == annotation)
-                        & (df[group_col] == foreground_id),
+                    data.loc[
+                        (data[annotation_col] == annotation)
+                        & (data[group_col] == foreground_id),
                         identifier_col,
-                    ].tolist()
+                    ]
                 )
             )
-    if len(pvalues) > 1:
-        rejected, padj = apply_pvalue_correction(pvalues, alpha=0.05, method=correction)
+    if len(pvalues) >= 1:
+        rejected, padj = apply_pvalue_correction(
+            pvalues,
+            alpha=correction_alpha,
+            method=correction,
+        )
         result = pd.DataFrame(
             {
                 "terms": terms,
@@ -338,7 +496,7 @@ def run_enrichment(
                 "background_pop": background_pop,
                 "pvalue": pvalues,
                 "padj": padj,
-                "rejected": rejected,
+                "rejected": rejected.astype(bool),
             }
         )
         result = result.sort_values(by="padj", ascending=True)
@@ -347,35 +505,53 @@ def run_enrichment(
 
 
 def run_ssgsea(
-    data,
-    annotation,
-    annotation_col="an notation",
-    identifier_col="identifier",
-    set_index=[],
-    outdir="tmp",
-    min_size=15,
-    max_size=500,
-    scale=False,
-    permutations=0,
-):
+    data: pd.DataFrame,
+    annotation: str,
+    set_index: list[str] = None,
+    annotation_col: str = "annotation",
+    identifier_col: str = "identifier",
+    outdir: str = "tmp",
+    min_size: int = 15,
+    max_size: int = 500,
+    scale: bool = False,
+    permutations: int = 0,
+) -> pd.DataFrame:
     """
-    Project each sample within a data set onto a space of gene set enrichment scores using the ssGSEA projection methodology described in Barbie et al., 2009.
-
-    :param data: pandas dataframe with the quantified features (i.e. subject x proteins)
-    :param annotation: pandas dataframe with the annotation to be used in the enrichment (i.e. CKG pathway annotation file)
+    Project each sample within a data set onto a space of gene set enrichment scores using
+    the single sample gene set enrichment analysis (ssGSEA) projection methodology
+    described in Barbie et al., 2009:
+    https://www.nature.com/articles/nature08460#Sec3 (search "Single Sample" GSEA).
+
+    :param pd.DataFrame data: pandas.DataFrame with the quantified features (i.e. subject x proteins)
+    :param str annotation: pandas.DataFrame with the annotation to be used in the enrichment
+        (i.e. CKG pathway annotation file)
+    :param list[str] set_index: column/s to be used as index. Enrichment will be calculated
+        for these values (i.e ["subject"] will return subjects x pathways matrix of
+        enrichment scores)
     :param str annotation_col: name of the column containing annotation terms.
     :param str identifier_col: name of column containing dependent variables identifiers.
-    :param list set_index: column/s to be used as index. Enrichment will be calculated for these values (i.e ["subject"] will return subjects x pathways matrix of enrichment scores)
-    :param str out_dir: directory path where results will be stored (default None, tmp folder is used)
-    :param int min_size: minimum number of features (i.e. proteins) in enriched terms (i.e. pathways)
-    :param int max_size: maximum number of features (i.e. proteins) in enriched terms (i.e. pathways)
+    :param str out_dir: directory path where results will be stored
+        (default None, tmp folder is used)
+    :param int min_size: minimum number of features (i.e. proteins) in enriched terms
+        (i.e. pathways)
+    :param int max_size: maximum number of features (i.e. proteins) in enriched terms
+        (i.e. pathways)
     :param bool scale: whether or not to scale the data
     :param int permutations: number of permutations used in the ssgsea analysis
-    :return: dictionary with two dataframes: es - enrichment scores, and nes - normalized enrichment scores.
+    :return: pandas.DataFrame containing unnormalized enrichment scores (`ES`) for each sample,
+        and normalized enrichment scores (`NES`) with the enriched `Term` and sample `Name`.
+    :rtype: pandas.DataFrame
 
     Example::
+
         stproject = "P0000008"
-        p = project.Project(stproject, datasets={}, knowledge=None, report={}, configuration_files=None)
+        p = project.Project(
+            stproject,
+            datasets={},
+            knowledge=None,
+            report={},
+            configuration_files=None,
+        )
         p.build_project(False)
         p.generate_report()
 
@@ -383,60 +559,79 @@ def run_ssgsea(
         annotations = proteomics_dataset.get_dataframe("pathway annotation")
         processed = proteomics_dataset.get_dataframe('processed')
 
-        result = run_ssgsea(processed, annotations, annotation_col='annotation', identifier_col='identifier', set_index=['group', 'sample','subject'], outdir=None, min_size=10, scale=False, permutations=0)
-
+        result = run_ssgsea(
+            processed,
+            annotations,
+            annotation_col='annotation',
+            identifier_col='identifier',
+            set_index=['group',
+            'sample',
+            'subject'],
+            outdir=None,
+            min_size=10,
+            scale=False,
+            permutations=0
+        )
     """
-    result = {}
     df = data.copy()
     if not os.path.exists(outdir):
         os.makedirs(outdir)
 
+    # Comine columns to create a unique name for each set (?)
     name = []
-    index = data[set_index]
-    for i, row in data[set_index].iterrows():
-        name.append("_".join(row[set_index].tolist()))
+    if set_index is None:
+        index = data.index.to_frame()
+        set_index = index.columns.tolist()
+    else:
+        index = data[set_index]
+        df = df.drop(set_index, axis=1)
+
+    for _, row in index.iterrows():
+        name.append(
+            "_".join(row[set_index].tolist())
+        )  # this assumes strings as identifiers
 
     df["Name"] = name
     index.index = name
-    df = df.drop(set_index, axis=1).set_index("Name").transpose()
+    df = df.set_index("Name").transpose()
 
-    if annotation_col in annotation and identifier_col in annotation:
-        grouped_annotations = (
-            annotation.groupby(annotation_col)[identifier_col].apply(list).reset_index()
+    if not annotation_col in annotation:
+        raise ValueError(
+            f"Missing Annotation Column: {annotation_col} as specified by `annotation_col`"
         )
-        fid = uuid.uuid4()
-        file_path = os.path.join(outdir, str(fid) + ".gmt")
-        with open(file_path, "w") as out:
-            for i, row in grouped_annotations.iterrows():
-                out.write(
-                    row[annotation_col]
-                    + "\t"
-                    + "\t".join(list(filter(None, row[identifier_col])))
-                    + "\n"
-                )
-        try:
-            enrichment = gp.ssgsea(
-                data=df,
-                gene_sets=str(file_path),
-                outdir=outdir,
-                min_size=min_size,
-                max_size=max_size,
-                scale=scale,
-                permutation_num=permutations,
-                no_plot=True,
-                processes=1,
-                seed=10,
-                format="png",
-            )
-
-            enrichment_es = pd.DataFrame(enrichment.resultsOnSamples).transpose()
-            enrichment_es = enrichment_es.join(index)
-            enrichment_nes = enrichment.res2d.transpose()
-            enrichment_nes = enrichment_nes.join(index)
 
-            result = {"es": enrichment_es, "nes": enrichment_nes}
-        except Exception as e:
-            print("Error in ssGSEA.", e)
-    df = None
+    if not identifier_col in annotation:
+        raise ValueError(
+            f"Missing Identifier Column: {identifier_col} as specified by `identifier_col`"
+        )
 
+    grouped_annotations = (
+        annotation.groupby(annotation_col)[identifier_col].apply(list).reset_index()
+    )
+    fid = uuid.uuid4()
+    file_path = os.path.join(outdir, str(fid) + ".gmt")
+    with open(file_path, "w", encoding="utf8") as out:
+        for _, row in grouped_annotations.iterrows():
+            out.write(
+                row[annotation_col]
+                + "\t"
+                + "\t".join(list(filter(None, row[identifier_col])))
+                + "\n"
+            )
+    enrichment = gp.ssgsea(
+        data=df,
+        gene_sets=str(file_path),
+        outdir=outdir,
+        min_size=min_size,
+        max_size=max_size,
+        scale=scale,
+        permutation_num=permutations,
+        no_plot=True,
+        processes=1,
+        seed=10,
+        format="png",
+    )
+    result = pd.DataFrame(enrichment.res2d).set_index("Name")
+    # potentially return wide format in separate format
+    # result = {"es": enrichment_es, "nes": enrichment_nes}
     return result
diff --git a/acore/exploratory_analysis.py b/acore/exploratory_analysis.py
index b0a09da..356e4a9 100644
--- a/acore/exploratory_analysis.py
+++ b/acore/exploratory_analysis.py
@@ -6,13 +6,13 @@
 from sklearn.manifold import TSNE
 
 
-def calculate_coefficient_variation(values):
+def calculate_coefficient_variation(values: np.ndarray) -> np.ndarray:
     """
     Compute the coefficient of variation, the ratio of the biased standard
     deviation to the mean, in percentage. For more information
     visit https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.variation.html.
 
-    :param numpy.ndarray values: numpy array
+    :param numpy.ndarray values: numpy array of log2 transformed values
     :return: The calculated variation along rows.
     :rtype: numpy.ndarray
 
@@ -29,13 +29,15 @@ def get_coefficient_variation(data, drop_columns, group, columns=["name", "y"]):
     """
     Extracts the coefficients of variation in each group.
 
-    :param data: pandas dataframe with samples as rows and protein identifiers as columns (with additional columns 'group', 'sample' and 'subject').
+    :param data: pandas dataframe with samples as rows and protein identifiers as columns
+                 (with additional columns 'group', 'sample' and 'subject').
     :param list drop_columns: column labels to be dropped from the dataframe
     :param str group: column label containing group identifiers.
     :param list columns: names to use for the variable column(s), and for the value column(s)
-    :return: Pandas dataframe with columns 'name' (protein identifier), 'x' (coefficient of variation), 'y' (mean) and 'group'.
+    :return: Pandas dataframe with columns 'name' (protein identifier),
+             'x' (coefficient of variation), 'y' (mean) and 'group'.
 
-    Exmaple::
+    Example::
 
         result = get_coefficient_variation(data, drop_columns=['sample', 'subject'], group='group')
     """
@@ -46,24 +48,27 @@ def get_coefficient_variation(data, drop_columns, group, columns=["name", "y"]):
     cvs_df = pd.DataFrame()
     for i in cvs.index:
         gcvs = cvs[i].tolist()
-        ints = formated_df.set_index("group").mean().values.tolist()
+        ints = formated_df.set_index(group).mean().values.tolist()
         tdf = pd.DataFrame(data={"name": cols, "x": gcvs, "y": ints})
-        tdf["group"] = i
+        tdf[group] = i
 
         if cvs_df.empty:
             cvs_df = tdf.copy()
         else:
-            cvs_df = cvs_df.append(tdf)
+            cvs_df = pd.concat([cvs_df, tdf])
 
     return cvs_df
 
 
 def extract_number_missing(data, min_valid, drop_cols=["sample"], group="group"):
     """
-    Counts how many valid values exist in each column and filters column labels with more valid values than the minimum threshold defined.
+    Counts how many valid values exist in each column and filters column labels with more
+    valid values than the minimum threshold defined.
 
     :param data: pandas DataFrame with group as rows and protein identifier as column.
-    :param str group: column label containing group identifiers. If None, number of valid values is counted across all samples, otherwise is counted per unique group identifier.
+    :param str group: column label containing group identifiers.
+                      If None, number of valid values is counted across all samples,
+                      otherwise is counted per unique group identifier.
     :param int min_valid: minimum number of valid values to be filtered.
     :param list drop_columns: column labels to be dropped.
     :return: List of column labels above the threshold.
@@ -89,16 +94,21 @@ def extract_percentage_missing(
     data, missing_max, drop_cols=["sample"], group="group", how="all"
 ):
     """
-    Extracts ratio of missing/valid values in each column and filters column labels with lower ratio than the minimum threshold defined.
+    Extracts ratio of missing/valid values in each column and filters column labels with
+    lower ratio than the minimum threshold defined.
 
     :param data: pandas dataframe with group as rows and protein identifier as column.
-    :param str group: column label containing group identifiers. If None, ratio is calculated across all samples, otherwise is calculated per unique group identifier.
+    :param str group: column label containing group identifiers.
+                      If None, ratio is calculated across all samples,
+                      otherwise is calculated per unique group identifier.
     :param float missing_max: maximum ratio of missing/valid values to be filtered.
-    :param str how: define if labels with a higher percentage of missing values than the threshold in any group ('any') or in all groups ('all') should be filtered
+    :param str how: define if labels with a higher percentage of missing values than the threshold
+                    in any group ('any') or in all groups ('all') should be filtered
     :return: List of column labels below the threshold.
 
     Example::
-        result = extract_percentage_missing(data, missing_max=0.3, drop_cols=['sample'], group='group')
+        result = extract_percentage_missing(data, missing_max=0.3,
+                                            drop_cols=['sample'], group='group')
     """
     if group is None:
         groups = data.loc[:, data.isnull().mean() <= missing_max].columns
@@ -128,81 +138,86 @@ def run_pca(
     dropna=True,
 ):
     """
-    Performs principal component analysis and returns the values of each component for each sample and each protein, and the loadings for each protein. \
-    For information visit https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html.
+    Performs principal component analysis and returns the values of each component for each sample
+    and each protein, and the loadings for each protein.
+
+    For information visit
+    https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html.
 
-    :param data: pandas dataframe with samples as rows and protein identifiers as columns (with additional columns 'group', 'sample' and 'subject').
+    :param data: pandas dataframe with samples as rows and protein identifiers as columns
+                 (with additional columns 'group', 'sample' and 'subject').
     :param list drop_cols: column labels to be dropped from the dataframe.
     :param str group: column label containing group identifiers.
     :param list annotation_cols: list of columns to be added in the scatter plot annotation
     :param int components: number of components to keep.
     :param bool dropna: if True removes all columns with any missing values.
-    :return: tuple: 1) three pandas dataframes: components, loadings and variance; 2) xaxis and yaxis titles with components loadings for plotly.
+    :return: tuple: 1) three pandas dataframes: components, loadings and variance; 2)
+             xaxis and yaxis titles with components loadings for plotly.
 
     Example::
 
-        result = run_pca(data, drop_cols=['sample', 'subject'], group='group', components=2, dropna=True)
+        result = run_pca(data, drop_cols=['sample', 'subject'], group='group',
+                         components=2, dropna=True)
     """
 
     np.random.seed(112736)
-    resultDf = pd.DataFrame()
-    loadings = pd.DataFrame()
     var_exp = []
     args = {}
-    if not data.empty:
-        df = data.copy()
-        annotations = pd.DataFrame()
-        if annotation_cols is not None:
-            if len(list(set(annotation_cols).intersection(data.columns))) > 0:
-                annotations = data.set_index(group)[annotation_cols]
-        drop_cols_int = list(set(drop_cols).intersection(df.columns))
-        if len(drop_cols_int) > 0:
-            df = df.drop(drop_cols_int, axis=1)
-
-        y = df[group].tolist()
-        df = df.set_index(group)
-        df = df.select_dtypes(["number"])
-        if dropna:
-            df = df.dropna(axis=1)
-        X = df.values
-
-        if X.size > 0 and X.shape[1] > components:
-            pca = PCA(n_components=components)
-            X = pca.fit_transform(X)
-            var_exp = pca.explained_variance_ratio_
-            loadings = pd.DataFrame(pca.components_.transpose())
-            loadings.index = df.columns
-            values = {
-                index: np.sqrt(np.power(row, 2).sum())
-                for index, row in loadings.iterrows()
-            }
-            loadings["value"] = loadings.index.map(values.get)
-            loadings = loadings.sort_values(by="value", ascending=False)
-            args = {
-                "x_title": "PC1" + " ({0:.2f})".format(var_exp[0]),
-                "y_title": "PC2" + " ({0:.2f})".format(var_exp[1]),
-                "group": "group",
-            }
-            if components == 2:
-                resultDf = pd.DataFrame(X, index=y, columns=["x", "y"])
-                resultDf = resultDf.assign(**annotations)
-                resultDf = resultDf.reset_index()
-                resultDf.columns = ["group", "x", "y"] + annotation_cols
-
-                loadings.columns = ["x", "y", "value"]
-            if components > 2:
-                args.update({"z_title": "PC3" + " ({0:.2f})".format(var_exp[2])})
-                resultDf = pd.DataFrame(X, index=y)
-                resultDf = resultDf.assign(**annotations)
-                resultDf = resultDf.reset_index()
-                pca_cols = []
-                loading_cols = []
-                if components > 3:
-                    pca_cols = [str(i) for i in resultDf.columns[4:]]
-                    loading_cols = [str(i) for i in loadings.columns[3:]]
-
-                resultDf.columns = ["group", "x", "y", "z"] + pca_cols
-                loadings.columns = ["x", "y", "z"] + loading_cols
+    if data.empty:
+        raise ValueError("Dataframe is empty.")
+
+    df = data.copy()
+    annotations = pd.DataFrame()
+    if annotation_cols is not None:
+        if len(list(set(annotation_cols).intersection(data.columns))) > 0:
+            annotations = data.set_index(group)[annotation_cols]
+    drop_cols_int = list(set(drop_cols).intersection(df.columns))
+    if len(drop_cols_int) > 0:
+        df = df.drop(drop_cols_int, axis=1)
+
+    y = df[group].tolist()
+    df = df.set_index(group)
+    df = df.select_dtypes(["number"])
+    if dropna:
+        df = df.dropna(axis=1)
+    X = df.values
+
+    if X.size > 0 and X.shape[1] > components:
+        pca = PCA(n_components=components)
+        X = pca.fit_transform(X)
+        var_exp = pca.explained_variance_ratio_
+        loadings = pd.DataFrame(pca.components_.transpose())
+        loadings.index = df.columns
+        values = {
+            index: np.sqrt(np.power(row, 2).sum()) for index, row in loadings.iterrows()
+        }
+        loadings["value"] = loadings.index.map(values.get)
+        loadings = loadings.sort_values(by="value", ascending=False)
+        args = {
+            "x_title": "PC1" + " ({0:.2f})".format(var_exp[0]),
+            "y_title": "PC2" + " ({0:.2f})".format(var_exp[1]),
+            "group": "group",
+        }
+        if components == 2:
+            resultDf = pd.DataFrame(X, index=y, columns=["x", "y"])
+            resultDf = resultDf.assign(**annotations)
+            resultDf = resultDf.reset_index()
+            resultDf.columns = ["group", "x", "y"] + annotation_cols
+
+            loadings.columns = ["x", "y", "value"]
+        if components > 2:
+            args.update({"z_title": "PC3" + " ({0:.2f})".format(var_exp[2])})
+            resultDf = pd.DataFrame(X, index=y)
+            resultDf = resultDf.assign(**annotations)
+            resultDf = resultDf.reset_index()
+            pca_cols = []
+            loading_cols = []
+            if components > 3:
+                pca_cols = [str(i) for i in resultDf.columns[4:]]
+                loading_cols = [str(i) for i in loadings.columns[3:]]
+
+            resultDf.columns = ["group", "x", "y", "z"] + pca_cols
+            loadings.columns = ["x", "y", "z"] + loading_cols
 
     return (resultDf, loadings, var_exp), args
 
@@ -219,22 +234,39 @@ def run_tsne(
     dropna=True,
 ):
     """
-    Performs t-distributed Stochastic Neighbor Embedding analysis. For more information visit https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html.
+    Performs t-distributed Stochastic Neighbor Embedding analysis.
+
+    For more information visit
+    https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html.
 
-    :param data: pandas dataframe with samples as rows and protein identifiers as columns (with additional columns 'group', 'sample' and 'subject').
+    :param data: pandas dataframe with samples as rows and protein identifiers as columns
+                 (with additional columns 'group', 'sample' and 'subject').
     :param list drop_cols: column labels to be dropped from the dataframe.
     :param str group: column label containing group identifiers.
     :param int components: dimension of the embedded space.
     :param list annotation_cols: list of columns to be added in the scatter plot annotation
-    :param int perplexity: related to the number of nearest neighbors that is used in other manifold learning algorithms. Consider selecting a value between 5 and 50.
+    :param int perplexity: related to the number of nearest neighbors that is used
+                           in other manifold learning algorithms.
+                           Consider selecting a value between 5 and 50.
     :param int n_iter: maximum number of iterations for the optimization (at least 250).
-    :param str init: initialization of embedding ('random', 'pca' or numpy array of shape n_samples x n_components).
+    :param str init: initialization of embedding ('random', 'pca' or
+                     numpy array of shape n_samples x n_components).
     :param bool dropna: if True removes all columns with any missing values.
-    :return: Two dictionaries: 1) pandas dataframe with embedding vectors, 2) xaxis and yaxis titles for plotly.
+    :return: Two dictionaries:
+                1) pandas dataframe with embedding vectors,
+                2) xaxis and yaxis titles for plotly.
 
     Example::
 
-        result = run_tsne(data, drop_cols=['sample', 'subject'], group='group', components=2, perplexity=40, n_iter=1000, init='pca', dropna=True)
+        result = run_tsne(data,
+                          drop_cols=['sample', 'subject'],
+                          group='group',
+                          components=2,
+                          perplexity=40,
+                          n_iter=1000,
+                          init='pca',
+                          dropna=True
+                        )
     """
     result = {}
     args = {}
@@ -256,7 +288,7 @@ def run_tsne(
             n_components=components,
             verbose=0,
             perplexity=perplexity,
-            max_iter=n_iter,
+            n_iter=n_iter,
             init=init,
         )
         X = tsne.fit_transform(X)
@@ -289,21 +321,34 @@ def run_umap(
     dropna=True,
 ):
     """
-    Performs Uniform Manifold Approximation and Projection. For more information vist https://umap-learn.readthedocs.io.
+    Performs Uniform Manifold Approximation and Projection.
+
+    For more information vist https://umap-learn.readthedocs.io.
 
-    :param data: pandas dataframe with samples as rows and protein identifiers as columns (with additional columns 'group', 'sample' and 'subject').
+    :param data: pandas dataframe with samples as rows and protein identifiers as columns
+                 (with additional columns 'group', 'sample' and 'subject').
     :param list drop_cols: column labels to be dropped from the dataframe.
     :param str group: column label containing group identifiers.
     :param list annotation_cols: list of columns to be added in the scatter plot annotation
-    :param int n_neighbors: number of neighboring points used in local approximations of manifold structure.
+    :param int n_neighbors: number of neighboring points used
+                            in local approximations of manifold structure.
     :param float min_dist: controls how tightly the embedding is allowed compress points together.
     :param str metric: metric used to measure distance in the input space.
     :param bool dropna: if True removes all columns with any missing values.
-    :return: Two dictionaries: 1) pandas dataframe with embedding of the training data in low-dimensional space, 2) xaxis and yaxis titles for plotly.
+    :return: Two dictionaries:
+                1) pandas dataframe with embedding of the training data in low-dimensional space,
+                2) xaxis and yaxis titles for plotly.
 
     Example::
 
-        result = run_umap(data, drop_cols=['sample', 'subject'], group='group', n_neighbors=10, min_dist=0.3, metric='cosine', dropna=True)
+        result = run_umap(data,
+                          drop_cols=['sample', 'subject'],
+                          group='group',
+                          n_neighbors=10,
+                          min_dist=0.3,
+                          metric='cosine',
+                          dropna=True
+                        )
     """
     np.random.seed(1145536)
     result = {}
diff --git a/acore/io/uniprot/__init__.py b/acore/io/uniprot/__init__.py
new file mode 100644
index 0000000..96e6002
--- /dev/null
+++ b/acore/io/uniprot/__init__.py
@@ -0,0 +1,48 @@
+"""Uniprot API user functions for fetching annotations for UniProt IDs and providing 
+the results as a pandas.DataFrame."""
+
+import pandas as pd
+
+from .uniprot import (
+    check_id_mapping_results_ready,
+    get_id_mapping_results_link,
+    get_id_mapping_results_search,
+    submit_id_mapping,
+)
+
+
+# function for outside usage
+def fetch_annotations(
+    ids: pd.Index | list,
+    fields: str = "accession,go_p,go_c,go_f",
+) -> pd.DataFrame:
+    """Fetch annotations for UniProt IDs. Combines several calls to the API of UniProt's
+    knowledgebase (KB).
+
+    Parameters
+    ----------
+    ids : pd.Index | list
+        Iterable of UniProt IDs. Fetches annotations as speecified by the specified fields.
+    fields : str, optional
+        Fields to fetch, by default "accession,go_p,go_c. See for availble fields:
+        https://www.uniprot.org/help/return_fields
+
+    Returns
+    -------
+    pd.DataFrame
+        DataFrame with annotations of the UniProt IDs.
+    """
+    job_id = submit_id_mapping(from_db="UniProtKB_AC-ID", to_db="UniProtKB", ids=ids)
+    # tsv used here to return a DataFrame. Maybe other formats are availale at some points
+    _format = "tsv"
+    if check_id_mapping_results_ready(job_id):
+        link = get_id_mapping_results_link(job_id)
+        # add fields to the link to get more information
+        # From and Entry (accession) are the same for UniProt IDs.
+        results = get_id_mapping_results_search(
+            link + f"?fields={fields}&format={_format}"
+        )
+    header = results.pop(0).split("\t")
+    results = [line.split("\t") for line in results]
+    df = pd.DataFrame(results, columns=header)
+    return df
diff --git a/acore/io/uniprot/uniprot.py b/acore/io/uniprot/uniprot.py
new file mode 100644
index 0000000..c9abf05
--- /dev/null
+++ b/acore/io/uniprot/uniprot.py
@@ -0,0 +1,325 @@
+"""Uniprot ID mapping using Python.
+
+Source: https://www.uniprot.org/help/id_mapping
+"""
+
+import json
+import re
+import time
+import zlib
+from urllib.parse import parse_qs, urlencode, urlparse
+from xml.etree import ElementTree
+
+import pandas as pd
+import requests
+from requests.adapters import HTTPAdapter, Retry
+
+POLLING_INTERVAL = 3
+API_URL = "https://rest.uniprot.org"
+
+
+retries = Retry(total=5, backoff_factor=0.25, status_forcelist=[500, 502, 503, 504])
+session = requests.Session()
+session.mount("https://", HTTPAdapter(max_retries=retries))
+
+
+def check_response(response):
+    try:
+        response.raise_for_status()
+    except requests.HTTPError:
+        print(response.json())
+        raise
+
+
+def submit_id_mapping(from_db, to_db, ids):
+    request = requests.post(
+        f"{API_URL}/idmapping/run",
+        data={"from": from_db, "to": to_db, "ids": ",".join(ids)},
+    )
+    check_response(request)
+    return request.json()["jobId"]
+
+
+def get_next_link(headers):
+    re_next_link = re.compile(r'<(.+)>; rel="next"')
+    if "Link" in headers:
+        match = re_next_link.match(headers["Link"])
+        if match:
+            return match.group(1)
+
+
+def check_id_mapping_results_ready(job_id):
+    while True:
+        request = session.get(f"{API_URL}/idmapping/status/{job_id}")
+        check_response(request)
+        j = request.json()
+        if "jobStatus" in j:
+            if j["jobStatus"] in ("NEW", "RUNNING"):
+                print(f"Retrying in {POLLING_INTERVAL}s")
+                time.sleep(POLLING_INTERVAL)
+            else:
+                raise Exception(j["jobStatus"])
+        else:
+            return bool(j["results"] or j["failedIds"])
+
+
+def get_batch(batch_response, file_format, compressed):
+    batch_url = get_next_link(batch_response.headers)
+    while batch_url:
+        batch_response = session.get(batch_url)
+        batch_response.raise_for_status()
+        yield decode_results(batch_response, file_format, compressed)
+        batch_url = get_next_link(batch_response.headers)
+
+
+def combine_batches(all_results, batch_results, file_format):
+    if file_format == "json":
+        for key in ("results", "failedIds"):
+            if key in batch_results and batch_results[key]:
+                all_results[key] += batch_results[key]
+    elif file_format == "tsv":
+        return all_results + batch_results[1:]
+    else:
+        return all_results + batch_results
+    return all_results
+
+
+def get_id_mapping_results_link(job_id):
+    url = f"{API_URL}/idmapping/details/{job_id}"
+    request = session.get(url)
+    check_response(request)
+    return request.json()["redirectURL"]
+
+
+def decode_results(response, file_format, compressed):
+    if compressed:
+        decompressed = zlib.decompress(response.content, 16 + zlib.MAX_WBITS)
+        if file_format == "json":
+            j = json.loads(decompressed.decode("utf-8"))
+            return j
+        elif file_format == "tsv":
+            return [line for line in decompressed.decode("utf-8").split("\n") if line]
+        elif file_format == "xlsx":
+            return [decompressed]
+        elif file_format == "xml":
+            return [decompressed.decode("utf-8")]
+        else:
+            return decompressed.decode("utf-8")
+    elif file_format == "json":
+        return response.json()
+    elif file_format == "tsv":
+        return [line for line in response.text.split("\n") if line]
+    elif file_format == "xlsx":
+        return [response.content]
+    elif file_format == "xml":
+        return [response.text]
+    return response.text
+
+
+def get_xml_namespace(element):
+    m = re.match(r"\{(.*)\}", element.tag)
+    return m.groups()[0] if m else ""
+
+
+def merge_xml_results(xml_results):
+    merged_root = ElementTree.fromstring(xml_results[0])
+    for result in xml_results[1:]:
+        root = ElementTree.fromstring(result)
+        for child in root.findall("{http://uniprot.org/uniprot}entry"):
+            merged_root.insert(-1, child)
+    ElementTree.register_namespace("", get_xml_namespace(merged_root[0]))
+    return ElementTree.tostring(merged_root, encoding="utf-8", xml_declaration=True)
+
+
+def print_progress_batches(batch_index, size, total):
+    n_fetched = min((batch_index + 1) * size, total)
+    print(f"Fetched: {n_fetched} / {total}")
+
+
+def get_id_mapping_results_search(url):
+    parsed = urlparse(url)
+    query = parse_qs(parsed.query)
+    file_format = query["format"][0] if "format" in query else "json"
+    if "size" in query:
+        size = int(query["size"][0])
+    else:
+        size = 500
+        query["size"] = size
+    compressed = (
+        query["compressed"][0].lower() == "true" if "compressed" in query else False
+    )
+    parsed = parsed._replace(query=urlencode(query, doseq=True))
+    url = parsed.geturl()
+    request = session.get(url)
+    check_response(request)
+    results = decode_results(request, file_format, compressed)
+    total = int(request.headers["x-total-results"])
+    print_progress_batches(0, size, total)
+    for i, batch in enumerate(get_batch(request, file_format, compressed), 1):
+        results = combine_batches(results, batch, file_format)
+        print_progress_batches(i, size, total)
+    if file_format == "xml":
+        return merge_xml_results(results)
+    return results
+
+
+def get_id_mapping_results_stream(url):
+    if "/stream/" not in url:
+        url = url.replace("/results/", "/results/stream/")
+    request = session.get(url)
+    check_response(request)
+    parsed = urlparse(url)
+    query = parse_qs(parsed.query)
+    file_format = query["format"][0] if "format" in query else "json"
+    compressed = (
+        query["compressed"][0].lower() == "true" if "compressed" in query else False
+    )
+    return decode_results(request, file_format, compressed)
+
+
+
+
+if __name__ == "__main__":
+    # id mapping is used to create a link to a query (you can see the json in the browser)
+    # UniProtKB is the knowleadgebase integrating all kind of other databases
+    import pandas as pd
+
+    job_id = submit_id_mapping(
+        from_db="UniProtKB_AC-ID", to_db="UniProtKB", ids=["P05067", "P12345"]
+    )
+
+    if check_id_mapping_results_ready(job_id):
+        link = get_id_mapping_results_link(job_id)
+        # add fields to the link to get more information
+        # From and Entry (accession) are the same.
+        results = get_id_mapping_results_search(
+            link + "?fields=accession,go_p,go_c,go_f&format=tsv"
+        )
+        # see the available fields you can add
+        # https://www.uniprot.org/help/return_fields
+        # and the available formats: json and tsv (for most endpoints)
+        # https://www.uniprot.org/help/api_queries#tips
+    print(results)
+    header = results.pop(0).split("\t")
+    results = [line.split("\t") for line in results]
+    df = pd.DataFrame(results, columns=header)
+    # result from:
+    # df.to_dict(orient="records")
+    records = [
+        {
+            "From": "P05067",
+            "Gene Ontology (biological process)": (
+                "adult locomotory behavior [GO:0008344]; amyloid fibril formation"
+                " [GO:1990000]; astrocyte activation [GO:0048143]; astrocyte activation"
+                " involved in immune response [GO:0002265]; axo-dendritic transport"
+                " [GO:0008088]; axon midline choice point recognition [GO:0016199];"
+                " axonogenesis [GO:0007409]; cell adhesion [GO:0007155]; cellular"
+                " response to amyloid-beta [GO:1904646]; central nervous system"
+                " development [GO:0007417]; cholesterol metabolic process [GO:0008203];"
+                " cognition [GO:0050890]; collateral sprouting in absence of injury"
+                " [GO:0048669]; cytosolic mRNA polyadenylation [GO:0180011]; dendrite"
+                " development [GO:0016358]; endocytosis [GO:0006897]; extracellular"
+                " matrix organization [GO:0030198]; forebrain development [GO:0030900];"
+                " G2/M transition of mitotic cell cycle [GO:0000086]; intracellular"
+                " copper ion homeostasis [GO:0006878]; ionotropic glutamate receptor"
+                " signaling pathway [GO:0035235]; learning [GO:0007612]; learning or"
+                " memory [GO:0007611]; locomotory behavior [GO:0007626]; mating"
+                " behavior [GO:0007617]; microglia development [GO:0014005]; microglial"
+                " cell activation [GO:0001774]; modulation of excitatory postsynaptic"
+                " potential [GO:0098815]; negative regulation of cell population"
+                " proliferation [GO:0008285]; negative regulation of gene expression"
+                " [GO:0010629]; negative regulation of long-term synaptic potentiation"
+                " [GO:1900272]; negative regulation of neuron differentiation"
+                " [GO:0045665]; neuromuscular process controlling balance [GO:0050885];"
+                " neuron apoptotic process [GO:0051402]; neuron cellular homeostasis"
+                " [GO:0070050]; neuron projection development [GO:0031175]; neuron"
+                " projection maintenance [GO:1990535]; neuron remodeling [GO:0016322];"
+                " NMDA selective glutamate receptor signaling pathway [GO:0098989];"
+                " Notch signaling pathway [GO:0007219]; positive regulation of amyloid"
+                " fibril formation [GO:1905908]; positive regulation of"
+                " calcium-mediated signaling [GO:0050850]; positive regulation of"
+                " chemokine production [GO:0032722]; positive regulation of ERK1 and"
+                " ERK2 cascade [GO:0070374]; positive regulation of G2/M transition of"
+                " mitotic cell cycle [GO:0010971]; positive regulation of gene"
+                " expression [GO:0010628]; positive regulation of glycolytic process"
+                " [GO:0045821]; positive regulation of inflammatory response"
+                " [GO:0050729]; positive regulation of interleukin-1 beta production"
+                " [GO:0032731]; positive regulation of interleukin-6 production"
+                " [GO:0032755]; positive regulation of JNK cascade [GO:0046330];"
+                " positive regulation of long-term synaptic potentiation [GO:1900273];"
+                " positive regulation of mitotic cell cycle [GO:0045931]; positive"
+                " regulation of non-canonical NF-kappaB signal transduction"
+                " [GO:1901224]; positive regulation of peptidyl-serine phosphorylation"
+                " [GO:0033138]; positive regulation of peptidyl-threonine"
+                " phosphorylation [GO:0010800]; positive regulation of protein"
+                " metabolic process [GO:0051247]; positive regulation of protein"
+                " phosphorylation [GO:0001934]; positive regulation of T cell migration"
+                " [GO:2000406]; positive regulation of transcription by RNA polymerase"
+                " II [GO:0045944]; positive regulation of tumor necrosis factor"
+                " production [GO:0032760]; regulation of gene expression [GO:0010468];"
+                " regulation of long-term neuronal synaptic plasticity [GO:0048169];"
+                " regulation of multicellular organism growth [GO:0040014]; regulation"
+                " of peptidyl-tyrosine phosphorylation [GO:0050730]; regulation of"
+                " presynapse assembly [GO:1905606]; regulation of spontaneous synaptic"
+                " transmission [GO:0150003]; regulation of synapse structure or"
+                " activity [GO:0050803]; regulation of translation [GO:0006417];"
+                " regulation of Wnt signaling pathway [GO:0030111]; response to"
+                " interleukin-1 [GO:0070555]; response to oxidative stress"
+                " [GO:0006979]; smooth endoplasmic reticulum calcium ion homeostasis"
+                " [GO:0051563]; suckling behavior [GO:0001967]; synapse organization"
+                " [GO:0050808]; synaptic assembly at neuromuscular junction"
+                " [GO:0051124]; visual learning [GO:0008542]"
+            ),
+            "Gene Ontology (cellular component)": (
+                "apical part of cell [GO:0045177]; axon [GO:0030424]; cell surface"
+                " [GO:0009986]; cell-cell junction [GO:0005911]; ciliary rootlet"
+                " [GO:0035253]; clathrin-coated pit [GO:0005905]; COPII-coated ER to"
+                " Golgi transport vesicle [GO:0030134]; cytoplasm [GO:0005737]; cytosol"
+                " [GO:0005829]; dendrite [GO:0030425]; dendritic shaft [GO:0043198];"
+                " dendritic spine [GO:0043197]; early endosome [GO:0005769];"
+                " endoplasmic reticulum [GO:0005783]; endoplasmic reticulum lumen"
+                " [GO:0005788]; endosome [GO:0005768]; endosome lumen [GO:0031904];"
+                " extracellular exosome [GO:0070062]; extracellular region"
+                " [GO:0005576]; extracellular space [GO:0005615]; Golgi apparatus"
+                " [GO:0005794]; Golgi lumen [GO:0005796]; Golgi-associated vesicle"
+                " [GO:0005798]; growth cone [GO:0030426]; membrane [GO:0016020];"
+                " membrane raft [GO:0045121]; mitochondrial inner membrane"
+                " [GO:0005743]; neuromuscular junction [GO:0031594]; nuclear envelope"
+                " lumen [GO:0005641]; perikaryon [GO:0043204]; perinuclear region of"
+                " cytoplasm [GO:0048471]; plasma membrane [GO:0005886]; platelet alpha"
+                " granule lumen [GO:0031093]; presynaptic active zone [GO:0048786];"
+                " receptor complex [GO:0043235]; recycling endosome [GO:0055037];"
+                " smooth endoplasmic reticulum [GO:0005790]; spindle midzone"
+                " [GO:0051233]; synapse [GO:0045202]; synaptic vesicle [GO:0008021];"
+                " trans-Golgi network membrane [GO:0032588]"
+            ),
+            "Gene Ontology (molecular function)": (
+                "DNA binding [GO:0003677]; enzyme binding [GO:0019899]; heparin binding"
+                " [GO:0008201]; identical protein binding [GO:0042802]; protein"
+                " serine/threonine kinase binding [GO:0120283]; PTB domain binding"
+                " [GO:0051425]; receptor ligand activity [GO:0048018]; RNA polymerase"
+                " II cis-regulatory region sequence-specific DNA binding [GO:0000978];"
+                " serine-type endopeptidase inhibitor activity [GO:0004867]; signaling"
+                " receptor activator activity [GO:0030546]; signaling receptor binding"
+                " [GO:0005102]; transition metal ion binding [GO:0046914]"
+            ),
+        },
+        {
+            "From": "P12345",
+            "Gene Ontology (biological process)": (
+                "2-oxoglutarate metabolic process [GO:0006103]; aspartate catabolic"
+                " process [GO:0006533]; aspartate metabolic process [GO:0006531];"
+                " glutamate metabolic process [GO:0006536]; lipid transport"
+                " [GO:0006869]; protein folding [GO:0006457]"
+            ),
+            "Gene Ontology (cellular component)": (
+                "mitochondrial matrix [GO:0005759]; mitochondrion [GO:0005739]; plasma"
+                " membrane [GO:0005886]"
+            ),
+            "Gene Ontology (molecular function)": (
+                "kynurenine-oxoglutarate transaminase activity [GO:0016212];"
+                " L-aspartate:2-oxoglutarate aminotransferase activity [GO:0004069];"
+                " pyridoxal phosphate binding [GO:0030170]"
+            ),
+        },
+    ]
diff --git a/acore/multiple_testing.py b/acore/multiple_testing.py
index 35fd5d6..5261b39 100644
--- a/acore/multiple_testing.py
+++ b/acore/multiple_testing.py
@@ -5,37 +5,52 @@
 from sklearn.utils import shuffle
 from statsmodels.stats import multitest
 
+# ? dictionary with available methods in statsmodels.stats.multitest.multipletests:
+# multitest.multitest_methods_names
 
-def apply_pvalue_correction(pvalues, alpha=0.05, method="bonferroni"):
+
+def apply_pvalue_correction(
+    pvalues: np.ndarray, alpha: float = 0.05, method: str = "bonferroni"
+) -> tuple[np.ndarray, np.ndarray]:
     """
-    Performs p-value correction using the specified method. For more information visit https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html.
+    Performs p-value correction using the specified method as in
+    statsmodels.stats.multitest.multipletests_.
+
+    .. _statsmodels.stats.multitest.multipletests: \
+https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html
 
     :param numpy.ndarray pvalues: et of p-values of the individual tests.
     :param float alpha: error rate.
     :param str method: method of p-value correction:
-        - bonferroni : one-step correction
-        - sidak : one-step correction
-        - holm-sidak : step down method using Sidak adjustments
-        - holm : step-down method using Bonferroni adjustments
-        - simes-hochberg : step-up method (independent)
-        - hommel : closed method based on Simes tests (non-negative)
-        - fdr_bh : Benjamini/Hochberg (non-negative)
-        - fdr_by : Benjamini/Yekutieli (negative)
-        - fdr_tsbh : two stage fdr correction (non-negative)
-        - fdr_tsbky : two stage fdr correction (non-negative)
-    :return: Tuple with two arrays, boolen for rejecting H0 hypothesis and float for adjusted p-value.
 
-    Exmaple::
+        - 'bonferroni' : one-step correction
+        - 'sidak' : one-step correction
+        - 'holm-sidak' : step down method using Sidak adjustments
+        - 'holm' : step-down method using Bonferroni adjustments
+        - 'simes-hochberg' : step-up method (independent)
+        - 'hommel' : closed method based on Simes tests (non-negative)
+        - 'fdr_bh' : Benjamini/Hochberg (non-negative)
+        - 'fdr_by' : Benjamini/Yekutieli (negative)
+        - 'fdr_tsbh' : two stage fdr correction (non-negative)
+        - 'fdr_tsbky' : two stage fdr correction (non-negative)
+
+    :return: Tuple with two `numpy.array`s, boolen for rejecting H0 hypothesis
+             and float for adjusted p-value. Can contain missing values if `pvalues`
+             contain missing values.
+
+    Example::
 
         result = apply_pvalue_correction(pvalues, alpha=0.05, method='bonferroni')
     """
     p = np.array(pvalues)
     mask = np.isfinite(p)
     pval_corrected = np.full(p.shape, np.nan)
-    pval_corrected[mask] = multitest.multipletests(p[mask], alpha, method)[1]
-    rejected = [p <= alpha for p in pval_corrected]
+    _rejected, _pvals_corrected, _, _ = multitest.multipletests(p[mask], alpha, method)
+    pval_corrected[mask] = _pvals_corrected
+    rejected = np.full(p.shape, np.nan)
+    rejected[mask] = _rejected
 
-    return (rejected, pval_corrected.tolist())
+    return (rejected, pval_corrected)
 
 
 def apply_pvalue_fdrcorrection(pvalues, alpha=0.05, method="indep"):
@@ -47,7 +62,7 @@ def apply_pvalue_fdrcorrection(pvalues, alpha=0.05, method="indep"):
     :param str method: method of p-value correction ('indep', 'negcorr').
     :return: Tuple with two arrays, boolen for rejecting H0 hypothesis and float for adjusted p-value.
 
-    Exmaple::
+    Example::
 
         result = apply_pvalue_fdrcorrection(pvalues, alpha=0.05, method='indep')
     """
@@ -65,7 +80,7 @@ def apply_pvalue_twostage_fdrcorrection(pvalues, alpha=0.05, method="bh"):
     :param str method: method of p-value correction ('bky', 'bh').
     :return: Tuple with two arrays, boolen for rejecting H0 hypothesis and float for adjusted p-value.
 
-    Exmaple::
+    Example::
 
         result = apply_pvalue_twostage_fdrcorrection(pvalues, alpha=0.05, method='bh')
     """
diff --git a/acore/network_analysis.py b/acore/network_analysis.py
index 6af038e..50f9376 100644
--- a/acore/network_analysis.py
+++ b/acore/network_analysis.py
@@ -2,10 +2,11 @@
 import networkx as nx
 import pandas as pd
 import snf
-import utils
 from sklearn import cluster
 from sklearn.cluster import AffinityPropagation
 
+from . import utils
+
 
 def get_network_communities(graph, args):
     """
diff --git a/docs/api_examples/enrichment_analysis.ipynb b/docs/api_examples/enrichment_analysis.ipynb
new file mode 100644
index 0000000..17e96d8
--- /dev/null
+++ b/docs/api_examples/enrichment_analysis.ipynb
@@ -0,0 +1,650 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "f79a8051",
+   "metadata": {
+    "lines_to_next_cell": 2
+   },
+   "source": [
+    "# Enrichment analysis\n",
+    "\n",
+    "- we need some groups of genes to compute clusters\n",
+    "- we need functional annotations, i.e. a category summarizing a set of genes.\n",
+    "-\n",
+    "You can start with watching Lars Juhl Jensen's brief introduction to enrichment analysis\n",
+    "on [youtube](https://www.youtube.com/watch?v=2NC1QOXmc5o).\n",
+    "\n",
+    "Use example data for ovarian cancer\n",
+    "([PXD010372](https://github.com/Multiomics-Analytics-Group/acore/tree/main/example_data/PXD010372))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "956ed7b7",
+   "metadata": {
+    "lines_to_next_cell": 2,
+    "tags": [
+     "hide-output"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "%pip install acore vuecore"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a3030d08",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pathlib import Path\n",
+    "\n",
+    "import dsp_pandas\n",
+    "import pandas as pd\n",
+    "\n",
+    "import acore\n",
+    "import acore.differential_regulation\n",
+    "import acore.enrichment_analysis\n",
+    "from acore.io.uniprot import fetch_annotations\n",
+    "\n",
+    "dsp_pandas.format.set_pandas_options(max_colwidth=60)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fddd607c",
+   "metadata": {},
+   "source": [
+    "Parameters of this notebook"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6af9349a",
+   "metadata": {
+    "tags": [
+     "parameters"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "base_path: str = (\n",
+    "    \"https://raw.githubusercontent.com/Multiomics-Analytics-Group/acore/refs/heads/main/\"\n",
+    "    \"example_data/PXD010372/processed\"\n",
+    ")\n",
+    "omics: str = f\"{base_path}/omics.csv\"\n",
+    "meta_pgs: str = f\"{base_path}/meta_pgs.csv\"\n",
+    "meta: str = f\"{base_path}/meta_patients.csv\"\n",
+    "features_to_sample: int = 100"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "10ed1830",
+   "metadata": {},
+   "source": [
+    "# Load processed data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8d70ef4c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_omics = pd.read_csv(omics, index_col=0)\n",
+    "df_meta_pgs = pd.read_csv(meta_pgs, index_col=0)\n",
+    "df_meta = pd.read_csv(meta, index_col=0)\n",
+    "df_omics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b3897e1f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ax = df_omics.notna().sum().sort_values(ascending=True).plot()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8ce47108",
+   "metadata": {},
+   "source": [
+    "Keep only features with a certain amount of non-NaN values and select 100 of these\n",
+    "for illustration. Add the ones which were differently regulated in the ANOVA using all\n",
+    "the protein groups."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f3a8ab49",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "idx_always_included = [\"Q5HYN5\", \"P39059\", \"O43432\", \"O43175\"]\n",
+    "df_omics[idx_always_included]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1145a2cd",
+   "metadata": {
+    "lines_to_next_cell": 2,
+    "tags": [
+     "hide-input"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "df_omics = (\n",
+    "    df_omics\n",
+    "    # .dropna(axis=1)\n",
+    "    .drop(idx_always_included, axis=1)\n",
+    "    .dropna(thresh=18, axis=1)\n",
+    "    .sample(\n",
+    "        features_to_sample - len(idx_always_included),\n",
+    "        axis=1,\n",
+    "        random_state=42,\n",
+    "    )\n",
+    "    .join(df_omics[idx_always_included])\n",
+    ")\n",
+    "df_omics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aea77e80",
+   "metadata": {
+    "lines_to_next_cell": 2
+   },
+   "outputs": [],
+   "source": [
+    "df_meta"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4bbf5dc4",
+   "metadata": {},
+   "source": [
+    "## Compute up and downregulated genes\n",
+    "These will be used to find enrichments in the set of both up and downregulated genes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "231bb6da",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "group = \"Status\"\n",
+    "covariates = [\"PlatinumValue\"]\n",
+    "diff_reg = acore.differential_regulation.run_anova(\n",
+    "    df_omics.join(df_meta[[group]]),\n",
+    "    drop_cols=[],\n",
+    "    subject=None,\n",
+    "    group=group,\n",
+    ")\n",
+    "diff_reg.describe(exclude=[\"float\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1e347b06",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "diff_reg[\"rejected\"] = diff_reg[\"rejected\"].astype(bool)  # ! needs to be fixed in anova\n",
+    "diff_reg.query(\"rejected\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d6c0a225",
+   "metadata": {},
+   "source": [
+    "## Find functional annotations, here pathways\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d2668415",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fname_annotations = f\"downloaded/annotations_{features_to_sample}.csv\"\n",
+    "fname = Path(fname_annotations)\n",
+    "try:\n",
+    "    annotations = pd.read_csv(fname, index_col=0)\n",
+    "    print(f\"Loaded annotations from {fname}\")\n",
+    "except FileNotFoundError:\n",
+    "    print(f\"Fetching annotations for {df_omics.columns.size} UniProt IDs.\")\n",
+    "    fields = \"go_p,go_c,go_f\"\n",
+    "    annotations = fetch_annotations(df_omics.columns, fields=fields)\n",
+    "    # First column (`From`) is additional to specified fields\n",
+    "    d_fields_to_col = {k: v for k, v in zip(fields.split(\",\"), annotations.columns[1:])}\n",
+    "\n",
+    "    # expand go terms\n",
+    "    to_expand = list()\n",
+    "    for field in d_fields_to_col:\n",
+    "        if \"go_\" in field:\n",
+    "            col = d_fields_to_col[field]\n",
+    "            annotations[col] = annotations[col].str.split(\";\")\n",
+    "            to_expand.append(col)\n",
+    "    for col in to_expand:\n",
+    "        # this is a bit wastefull. Processing to stack format should be done here.\n",
+    "        annotations = annotations.explode(col, ignore_index=True)\n",
+    "    # process other than go term columns\n",
+    "    annotations = (\n",
+    "        annotations.set_index(\"From\")\n",
+    "        .rename_axis(\"identifier\")\n",
+    "        # .drop(\"Entry\", axis=1)\n",
+    "        .rename_axis(\"source\", axis=1)\n",
+    "        .stack()\n",
+    "        .to_frame(\"annotation\")\n",
+    "        .reset_index()\n",
+    "        .drop_duplicates(ignore_index=True)\n",
+    "    )\n",
+    "    fname.parent.mkdir(exist_ok=True, parents=True)\n",
+    "    annotations.to_csv(fname, index=True)\n",
+    "\n",
+    "annotations"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d4734452",
+   "metadata": {},
+   "source": [
+    "See how many protein groups are associated with each annotation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "57fddefb",
+   "metadata": {
+    "tags": [
+     "hide-input"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "_ = (\n",
+    "    annotations.groupby(\"annotation\")\n",
+    "    .size()\n",
+    "    .value_counts()\n",
+    "    .sort_index()\n",
+    "    .plot(kind=\"bar\")\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4165bc94",
+   "metadata": {},
+   "source": [
+    "## Enrichment analysis\n",
+    "Is done separately for up- and downregulated genes as it's assumed that biological\n",
+    "processes are regulated in one direction."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1a631495",
+   "metadata": {
+    "tags": [
+     "hide-input"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "diff_reg.query(\"rejected\")[\n",
+    "    [\n",
+    "        \"identifier\",\n",
+    "        \"group1\",\n",
+    "        \"group2\",\n",
+    "        \"pvalue\",\n",
+    "        \"padj\",\n",
+    "        \"rejected\",\n",
+    "        \"log2FC\",\n",
+    "        \"FC\",\n",
+    "    ]\n",
+    "].sort_values(\"log2FC\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7380a528",
+   "metadata": {},
+   "source": [
+    "- this additionally sets a fold change cutoff\n",
+    "- and the fore and backgroud populations are changed due to the separation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2a73ed6e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ret = acore.enrichment_analysis.run_up_down_regulation_enrichment(\n",
+    "    regulation_data=diff_reg,\n",
+    "    annotation=annotations,\n",
+    "    min_detected_in_set=2,  # ! default is 2, so more conservative\n",
+    "    lfc_cutoff=0.5,  # ! the default is 1\n",
+    ")\n",
+    "ret"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5cb036be",
+   "metadata": {},
+   "source": [
+    "here we see differences for the same set of differently regulated protein groups,\n",
+    "which can be reset using lfc_cutoff=0."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5e5e2b61",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ret = acore.enrichment_analysis.run_up_down_regulation_enrichment(\n",
+    "    regulation_data=diff_reg,\n",
+    "    annotation=annotations,\n",
+    "    min_detected_in_set=1,  # ! default is 2, so more conservative\n",
+    "    lfc_cutoff=0.1,  # ! the default is 1\n",
+    ")\n",
+    "ret"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e3530547",
+   "metadata": {},
+   "source": [
+    "Or restricting the analysis to functional annotation for which we at least found 2\n",
+    "protein groups to be upregulated."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3ea367ca",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ret = acore.enrichment_analysis.run_up_down_regulation_enrichment(\n",
+    "    regulation_data=diff_reg,\n",
+    "    annotation=annotations,\n",
+    "    min_detected_in_set=2,\n",
+    "    lfc_cutoff=0.5,  # ! the default is 1\n",
+    ")\n",
+    "ret"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ecf75e7c",
+   "metadata": {},
+   "source": [
+    "### Site specific enrichment analysis"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0ee3a7c8",
+   "metadata": {},
+   "source": [
+    "The basic example uses a modified peptide sequence to\n",
+    "demonstrate the enrichment analysis.\n",
+    "> TODO: The example on how to do that needs a PTM focused dataset.\n",
+    "The details of how site specific enrichment analysis is done will depend on the\n",
+    "dataset and the question at hand.\n",
+    "\n",
+    "If the identifiers contain PTMs this information is removed to match it to the annotation\n",
+    "using a regular expression (in the function). For example:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "88b05e0c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import re\n",
+    "\n",
+    "regex = \"(\\\\w+~.+)_\\\\w\\\\d+\\\\-\\\\w+\"\n",
+    "identifier_ckg = \"gnd~P00350_T10-WW\"\n",
+    "match = re.search(regex, identifier_ckg)\n",
+    "match.group(1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b9e2fddf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ToDo: Add example for site specific enrichment analysis\n",
+    "# acore.enrichment_analysis.run_up_down_regulation_enrichment"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dbc34b7b",
+   "metadata": {},
+   "source": [
+    "## Single sample GSEA (ssGSEA)\n",
+    "Run a gene set enrichment analysis (GSEA) for each sample,\n",
+    "see [article](https://www.nature.com/articles/nature08460#Sec3) and\n",
+    "the package [`gseapy`](https://gseapy.readthedocs.io/en/latest/run.html#gseapy.ssgsea)\n",
+    "for more details."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e211bf65",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "enrichtments = acore.enrichment_analysis.run_ssgsea(\n",
+    "    data=df_omics,\n",
+    "    annotation=annotations,\n",
+    "    min_size=1,\n",
+    ")\n",
+    "enrichtments"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0fd08e79",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "enrichtments.iloc[0].to_dict()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ca9e4908",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "enrichtments[\"NES\"].plot.hist()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6b9a50cc",
+   "metadata": {},
+   "source": [
+    "The normalised enrichment score (NES) can be used in a PCA plot to see if the samples\n",
+    "cluster according to the enrichment of the gene sets."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9d917326",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nes = enrichtments.set_index(\"Term\", append=True).unstack()[\"NES\"].convert_dtypes()\n",
+    "nes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "02dbec64",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import acore.exploratory_analysis as ea\n",
+    "\n",
+    "pca_result, pca_annotation = ea.run_pca(\n",
+    "    data=nes.join(df_meta[[group]]),\n",
+    "    drop_cols=[],\n",
+    "    annotation_cols=[],\n",
+    "    group=group,\n",
+    "    components=2,\n",
+    "    dropna=False,\n",
+    ")\n",
+    "resultDf, loadings, var_exp = pca_result\n",
+    "resultDf"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "acd530d6",
+   "metadata": {},
+   "source": [
+    "The loadings show how the variables are correlated with the principal components."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "28464b86",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "loadings"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5d0c1eba",
+   "metadata": {},
+   "source": [
+    "We will plot both on the sample plot (samples on the first two principal components and\n",
+    "loadings of variables). We use the\n",
+    "[`vuecore` package](https://github.com/Multiomics-Analytics-Group/vuecore)\n",
+    "for this, which is also developed by the Multiomics Analytics Group."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0c66fcef",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from plotly.offline import iplot\n",
+    "from vuecore import viz\n",
+    "\n",
+    "args = {\"factor\": 1, \"loadings\": 10}\n",
+    "#! pca_results has three items, but docstring requests only two -> double check\n",
+    "figure = viz.get_pca_plot(data=pca_result, identifier=\"PCA enrichment\", args=args)\n",
+    "iplot(figure)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4be5a8c8",
+   "metadata": {},
+   "source": [
+    "## Compare two distributions - KS test\n",
+    "The Kolmogorov-Smirnov test is a non-parametric test that compares two distributions.\n",
+    "- we compare the distributions of the two differently upregulated protein groups\n",
+    "This is not the best example for comparing distributions, but it shows how to use the\n",
+    "KS test."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6dd57b99",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# plot two histograms of intensity values here\n",
+    "sel_pgs = [\"O43175\", \"P39059\"]\n",
+    "view = df_omics[sel_pgs].sub(df_omics[sel_pgs].mean())\n",
+    "ax = view.plot.hist(bins=20, alpha=0.5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a480b65b",
+   "metadata": {},
+   "source": [
+    "Let us compare the two centered distributions using the KS test."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e0f8e6a8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "acore.enrichment_analysis.run_kolmogorov_smirnov(view[sel_pgs[0]], view[sel_pgs[1]])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "385b0ceb",
+   "metadata": {},
+   "source": [
+    "The result suggests that the two distributions are from the same distribution."
+   ]
+  }
+ ],
+ "metadata": {
+  "jupytext": {
+   "cell_metadata_filter": "tags,-all",
+   "main_language": "python",
+   "notebook_metadata_filter": "-all"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/api_examples/enrichment_analysis.py b/docs/api_examples/enrichment_analysis.py
new file mode 100644
index 0000000..5f09cd0
--- /dev/null
+++ b/docs/api_examples/enrichment_analysis.py
@@ -0,0 +1,326 @@
+# %% [markdown]
+# # Enrichment analysis
+#
+# - we need some groups of genes to compute clusters
+# - we need functional annotations, i.e. a category summarizing a set of genes.
+# -
+# You can start with watching Lars Juhl Jensen's brief introduction to enrichment analysis
+# on [youtube](https://www.youtube.com/watch?v=2NC1QOXmc5o).
+#
+# Use example data for ovarian cancer
+# ([PXD010372](https://github.com/Multiomics-Analytics-Group/acore/tree/main/example_data/PXD010372))
+
+
+# %% tags=["hide-output"]
+# %pip install acore vuecore
+
+
+# %%
+from pathlib import Path
+
+import dsp_pandas
+import pandas as pd
+
+import acore
+import acore.differential_regulation
+import acore.enrichment_analysis
+from acore.io.uniprot import fetch_annotations
+
+dsp_pandas.format.set_pandas_options(max_colwidth=60)
+
+# %% [markdown]
+# Parameters of this notebook
+
+# %% tags=["parameters"]
+base_path: str = (
+    "https://raw.githubusercontent.com/Multiomics-Analytics-Group/acore/refs/heads/main/"
+    "example_data/PXD010372/processed"
+)
+omics: str = f"{base_path}/omics.csv"
+meta_pgs: str = f"{base_path}/meta_pgs.csv"
+meta: str = f"{base_path}/meta_patients.csv"
+features_to_sample: int = 100
+
+# %% [markdown]
+# # Load processed data
+
+# %%
+df_omics = pd.read_csv(omics, index_col=0)
+df_meta_pgs = pd.read_csv(meta_pgs, index_col=0)
+df_meta = pd.read_csv(meta, index_col=0)
+df_omics
+
+# %%
+ax = df_omics.notna().sum().sort_values(ascending=True).plot()
+
+# %% [markdown]
+# Keep only features with a certain amount of non-NaN values and select 100 of these
+# for illustration. Add the ones which were differently regulated in the ANOVA using all
+# the protein groups.
+
+# %%
+idx_always_included = ["Q5HYN5", "P39059", "O43432", "O43175"]
+df_omics[idx_always_included]
+
+# %% tags=["hide-input"]
+df_omics = (
+    df_omics
+    # .dropna(axis=1)
+    .drop(idx_always_included, axis=1)
+    .dropna(thresh=18, axis=1)
+    .sample(
+        features_to_sample - len(idx_always_included),
+        axis=1,
+        random_state=42,
+    )
+    .join(df_omics[idx_always_included])
+)
+df_omics
+
+
+# %%
+df_meta
+
+
+# %% [markdown]
+# ## Compute up and downregulated genes
+# These will be used to find enrichments in the set of both up and downregulated genes.
+
+# %%
+group = "Status"
+covariates = ["PlatinumValue"]
+diff_reg = acore.differential_regulation.run_anova(
+    df_omics.join(df_meta[[group]]),
+    drop_cols=[],
+    subject=None,
+    group=group,
+)
+diff_reg.describe(exclude=["float"])
+
+# %%
+diff_reg["rejected"] = diff_reg["rejected"].astype(bool)  # ! needs to be fixed in anova
+diff_reg.query("rejected")
+
+# %% [markdown]
+# ## Find functional annotations, here pathways
+#
+
+# %%
+fname_annotations = f"downloaded/annotations_{features_to_sample}.csv"
+fname = Path(fname_annotations)
+try:
+    annotations = pd.read_csv(fname, index_col=0)
+    print(f"Loaded annotations from {fname}")
+except FileNotFoundError:
+    print(f"Fetching annotations for {df_omics.columns.size} UniProt IDs.")
+    fields = "go_p,go_c,go_f"
+    annotations = fetch_annotations(df_omics.columns, fields=fields)
+    # First column (`From`) is additional to specified fields
+    d_fields_to_col = {k: v for k, v in zip(fields.split(","), annotations.columns[1:])}
+
+    # expand go terms
+    to_expand = list()
+    for field in d_fields_to_col:
+        if "go_" in field:
+            col = d_fields_to_col[field]
+            annotations[col] = annotations[col].str.split(";")
+            to_expand.append(col)
+    for col in to_expand:
+        # this is a bit wastefull. Processing to stack format should be done here.
+        annotations = annotations.explode(col, ignore_index=True)
+    # process other than go term columns
+    annotations = (
+        annotations.set_index("From")
+        .rename_axis("identifier")
+        # .drop("Entry", axis=1)
+        .rename_axis("source", axis=1)
+        .stack()
+        .to_frame("annotation")
+        .reset_index()
+        .drop_duplicates(ignore_index=True)
+    )
+    fname.parent.mkdir(exist_ok=True, parents=True)
+    annotations.to_csv(fname, index=True)
+
+annotations
+
+# %% [markdown]
+# See how many protein groups are associated with each annotation.
+
+# %% tags=["hide-input"]
+_ = (
+    annotations.groupby("annotation")
+    .size()
+    .value_counts()
+    .sort_index()
+    .plot(kind="bar")
+)
+
+# %% [markdown]
+# ## Enrichment analysis
+# Is done separately for up- and downregulated genes as it's assumed that biological
+# processes are regulated in one direction.
+
+# %% tags=["hide-input"]
+diff_reg.query("rejected")[
+    [
+        "identifier",
+        "group1",
+        "group2",
+        "pvalue",
+        "padj",
+        "rejected",
+        "log2FC",
+        "FC",
+    ]
+].sort_values("log2FC")
+
+# %% [markdown]
+# - this additionally sets a fold change cutoff
+# - and the fore and backgroud populations are changed due to the separation
+
+# %%
+ret = acore.enrichment_analysis.run_up_down_regulation_enrichment(
+    regulation_data=diff_reg,
+    annotation=annotations,
+    min_detected_in_set=2,  # ! default is 2, so more conservative
+    lfc_cutoff=0.5,  # ! the default is 1
+)
+ret
+
+# %% [markdown]
+# here we see differences for the same set of differently regulated protein groups,
+# which can be reset using lfc_cutoff=0.
+
+# %%
+ret = acore.enrichment_analysis.run_up_down_regulation_enrichment(
+    regulation_data=diff_reg,
+    annotation=annotations,
+    min_detected_in_set=1,  # ! default is 2, so more conservative
+    lfc_cutoff=0.1,  # ! the default is 1
+)
+ret
+
+# %% [markdown]
+# Or restricting the analysis to functional annotation for which we at least found 2
+# protein groups to be upregulated.
+
+# %%
+ret = acore.enrichment_analysis.run_up_down_regulation_enrichment(
+    regulation_data=diff_reg,
+    annotation=annotations,
+    min_detected_in_set=2,
+    lfc_cutoff=0.5,  # ! the default is 1
+)
+ret
+
+# %% [markdown]
+# ### Site specific enrichment analysis
+
+# %% [markdown]
+# The basic example uses a modified peptide sequence to
+# demonstrate the enrichment analysis.
+# > TODO: The example on how to do that needs a PTM focused dataset.
+# The details of how site specific enrichment analysis is done will depend on the
+# dataset and the question at hand.
+#
+# If the identifiers contain PTMs this information is removed to match it to the annotation
+# using a regular expression (in the function). For example:
+
+# %%
+import re
+
+regex = "(\\w+~.+)_\\w\\d+\\-\\w+"
+identifier_ckg = "gnd~P00350_T10-WW"
+match = re.search(regex, identifier_ckg)
+match.group(1)
+
+# %%
+# ToDo: Add example for site specific enrichment analysis
+# acore.enrichment_analysis.run_up_down_regulation_enrichment
+
+# %% [markdown]
+# ## Single sample GSEA (ssGSEA)
+# Run a gene set enrichment analysis (GSEA) for each sample,
+# see [article](https://www.nature.com/articles/nature08460#Sec3) and
+# the package [`gseapy`](https://gseapy.readthedocs.io/en/latest/run.html#gseapy.ssgsea)
+# for more details.
+
+# %%
+enrichtments = acore.enrichment_analysis.run_ssgsea(
+    data=df_omics,
+    annotation=annotations,
+    min_size=1,
+)
+enrichtments
+
+# %%
+enrichtments.iloc[0].to_dict()
+
+# %%
+enrichtments["NES"].plot.hist()
+
+# %% [markdown]
+# The normalised enrichment score (NES) can be used in a PCA plot to see if the samples
+# cluster according to the enrichment of the gene sets.
+
+# %%
+nes = enrichtments.set_index("Term", append=True).unstack()["NES"].convert_dtypes()
+nes
+
+# %%
+import acore.exploratory_analysis as ea
+
+pca_result, pca_annotation = ea.run_pca(
+    data=nes.join(df_meta[[group]]),
+    drop_cols=[],
+    annotation_cols=[],
+    group=group,
+    components=2,
+    dropna=False,
+)
+resultDf, loadings, var_exp = pca_result
+resultDf
+
+# %% [markdown]
+# The loadings show how the variables are correlated with the principal components.
+
+# %%
+loadings
+
+# %% [markdown]
+# We will plot both on the sample plot (samples on the first two principal components and
+# loadings of variables). We use the
+# [`vuecore` package](https://github.com/Multiomics-Analytics-Group/vuecore)
+# for this, which is also developed by the Multiomics Analytics Group.
+
+# %%
+from plotly.offline import iplot
+from vuecore import viz
+
+args = {"factor": 1, "loadings": 10}
+# #! pca_results has three items, but docstring requests only two -> double check
+figure = viz.get_pca_plot(data=pca_result, identifier="PCA enrichment", args=args)
+iplot(figure)
+
+# %% [markdown]
+# ## Compare two distributions - KS test
+# The Kolmogorov-Smirnov test is a non-parametric test that compares two distributions.
+# - we compare the distributions of the two differently upregulated protein groups
+# This is not the best example for comparing distributions, but it shows how to use the
+# KS test.
+
+# %%
+# plot two histograms of intensity values here
+sel_pgs = ["O43175", "P39059"]
+view = df_omics[sel_pgs].sub(df_omics[sel_pgs].mean())
+ax = view.plot.hist(bins=20, alpha=0.5)
+
+# %% [markdown]
+# Let us compare the two centered distributions using the KS test.
+
+# %%
+acore.enrichment_analysis.run_kolmogorov_smirnov(view[sel_pgs[0]], view[sel_pgs[1]])
+
+# %% [markdown]
+# The result suggests that the two distributions are from the same distribution.
diff --git a/docs/index.rst b/docs/index.rst
index 91a92e6..eba7982 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -16,6 +16,7 @@
 
    api_examples/exploratory_analysis
    api_examples/normalization_analysis
+   api_examples/enrichment_analysis
 
 .. toctree::
    :maxdepth: 1
diff --git a/tests/test_differential_regulation.py b/tests/test_differential_regulation.py
index 2e13f92..169bafa 100644
--- a/tests/test_differential_regulation.py
+++ b/tests/test_differential_regulation.py
@@ -7,56 +7,85 @@
 class TestCalculateTtest(unittest.TestCase):
     def setUp(self):
         self.data = {
-            'subject': [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4],
-            'sample': ['S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8', 'S9', 'S10', 'S11', 'S12'],
-            'group': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C'],
-            'protein': [1.4, 6.2, 9.03, 2.3, 7.4, 14.01, 4.5, 9.6, 10.4, 7.5, 11.6, 16.4],
-            'group2': ['a', 'a', 'a', 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'b', 'b'],
+            "subject": [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4],
+            "sample": [
+                "S1",
+                "S2",
+                "S3",
+                "S4",
+                "S5",
+                "S6",
+                "S7",
+                "S8",
+                "S9",
+                "S10",
+                "S11",
+                "S12",
+            ],
+            "group": ["A", "B", "C", "A", "B", "C", "A", "B", "C", "A", "B", "C"],
+            "protein": [
+                1.4,
+                6.2,
+                9.03,
+                2.3,
+                7.4,
+                14.01,
+                4.5,
+                9.6,
+                10.4,
+                7.5,
+                11.6,
+                16.4,
+            ],
+            "group2": ["a", "a", "a", "a", "a", "a", "b", "b", "b", "b", "b", "b"],
         }
         self.df = pd.DataFrame(self.data)
 
     def test_ttest_ind(self):
         data = {
-            'group1': [1.3, 1.2, 0.2, 2.2, 3.3, 8.43],
-            'group2': [1.4, 6.2, 7.3, 0.4, 1.5, 0.6]
+            "group1": [1.3, 1.2, 0.2, 2.2, 3.3, 8.43],
+            "group2": [1.4, 6.2, 7.3, 0.4, 1.5, 0.6],
         }
         df = pd.DataFrame(data)
 
-        condition1 = 'group1'
-        condition2 = 'group2'
+        condition1 = "group1"
+        condition2 = "group2"
         is_logged = True
         non_par = False
 
         expected_result = pg.ttest(df[condition1], df[condition2], paired=False)
-        result = dr.calculate_ttest(df, condition1, condition2, is_logged=is_logged, non_par=non_par)
+        result = dr.calculate_ttest(
+            df, condition1, condition2, is_logged=is_logged, non_par=non_par
+        )
 
         self.assertAlmostEqual(result[0], expected_result["T"].values[0])
         self.assertAlmostEqual(result[1], expected_result["p-val"].values[0])
 
     def test_mann_whitney(self):
-        data = {
-            'group1': [1, 2, 3, 4, 5],
-            'group2': [2, 4, 6, 8, 10]
-        }
+        data = {"group1": [1, 2, 3, 4, 5], "group2": [2, 4, 6, 8, 10]}
         df = pd.DataFrame(data)
 
-        condition1 = 'group1'
-        condition2 = 'group2'
+        condition1 = "group1"
+        condition2 = "group2"
         is_logged = True
         non_par = True
 
         expected_result = pg.mwu(df[condition1], df[condition2])
-        result = dr.calculate_ttest(df, condition1, condition2, is_logged=is_logged, non_par=non_par)
+        result = dr.calculate_ttest(
+            df, condition1, condition2, is_logged=is_logged, non_par=non_par
+        )
 
         self.assertAlmostEqual(result[0], expected_result["U-val"].values[0])
         self.assertAlmostEqual(result[1], expected_result["p-val"].values[0])
 
     def test_calculate_anova(self):
-        column = 'protein'
-        group = 'group'
+        column = "protein"
+        group = "group"
 
         expected_result = pg.anova(data=self.df, dv=column, between=group)
-        expected_t, expected_df1, expected_df2, expected_pvalue = expected_result[['F', 'ddof1', 'ddof2', 'p-unc']].values[0]
+        expected_t, expected_df1, expected_df2, expected_pvalue = expected_result[
+            ["F", "ddof1", "ddof2", "p-unc"]
+        ].values[0]
 
         result = dr.calculate_anova(self.df, column, group=group)
 
@@ -66,14 +95,20 @@ def test_calculate_anova(self):
         self.assertEqual(result[4], expected_pvalue)
 
     def test_calculate_ancova(self):
-        column = 'protein'
-        group = 'group'
+        column = "protein"
+        group = "group"
         covariates = []
 
-        expected_result = pg.ancova(data=self.df, dv=column, between=group, covar=covariates)
-        expected_t, expected_df, expected_pvalue = expected_result.loc[expected_result['Source'] == group, ['F', 'DF', 'p-unc']].values[0]
+        expected_result = pg.ancova(
+            data=self.df, dv=column, between=group, covar=covariates
+        )
+        expected_t, expected_df, expected_pvalue = expected_result.loc[
+            expected_result["Source"] == group, ["F", "DF", "p-unc"]
+        ].values[0]
 
-        result = dr.calculate_ancova(self.df, column, group=group, covariates=covariates)
+        result = dr.calculate_ancova(
+            self.df, column, group=group, covariates=covariates
+        )
 
         self.assertEqual(result[1], expected_df)
         self.assertEqual(result[2], expected_df)
@@ -82,16 +117,27 @@ def test_calculate_ancova(self):
 
     def test_calculate_repeated_measures_anova(self):
         """Source     SS  DF      MS         F     p-unc       ng2  eps
-0  group   1.50   1   1.500  0.087642  0.795106  0.032609  1.0
-1  Error  34.23   2  17.115       NaN       NaN       NaN  NaN"""
-        column = 'protein'
-        subject = 'subject'
-        within = 'group'
-
-        expected_result = pg.rm_anova(data=self.df, dv=column, within=within, subject=subject, detailed=True, correction=True)
-        expected_t, expected_pvalue = expected_result.loc[0, ["F", "p-unc"]].values.tolist()
+        0  group   1.50   1   1.500  0.087642  0.795106  0.032609  1.0
+        1  Error  34.23   2  17.115       NaN       NaN       NaN  NaN"""
+        column = "protein"
+        subject = "subject"
+        within = "group"
+
+        expected_result = pg.rm_anova(
+            data=self.df,
+            dv=column,
+            within=within,
+            subject=subject,
+            detailed=True,
+            correction=True,
+        )
+        expected_t, expected_pvalue = expected_result.loc[
+            0, ["F", "p-unc"]
+        ].values.tolist()
         expected_df1, expected_df2 = expected_result["DF"]
-        result = dr.calculate_repeated_measures_anova(self.df, column, subject=subject, within=within)
+        result = dr.calculate_repeated_measures_anova(
+            self.df, column, subject=subject, within=within
+        )
 
         self.assertEqual(result[1], expected_df1)
         self.assertEqual(result[2], expected_df2)
@@ -99,19 +145,30 @@ def test_calculate_repeated_measures_anova(self):
         self.assertEqual(result[4], expected_pvalue)
 
     def test_calculate_mixed_anova(self):
-        column = 'protein'
-        subject = 'subject'
-        within = 'group'
-        between = 'group2'
-
-        expected_result = pg.mixed_anova(data=self.df, dv=column, within=within, between=between, subject=subject, correction=True)
-        expected_result['identifier'] = column
-        expected_result = expected_result[['identifier', 'DF1', 'DF2', 'F', 'p-unc', 'Source']]
-
-        result = dr.calculate_mixed_anova(self.df, column, subject=subject, within=within, between=between)
+        column = "protein"
+        subject = "subject"
+        within = "group"
+        between = "group2"
+
+        expected_result = pg.mixed_anova(
+            data=self.df,
+            dv=column,
+            within=within,
+            between=between,
+            subject=subject,
+            correction=True,
+        )
+        expected_result["identifier"] = column
+        expected_result = expected_result[
+            ["identifier", "DF1", "DF2", "F", "p-unc", "Source"]
+        ]
+
+        result = dr.calculate_mixed_anova(
+            self.df, column, subject=subject, within=within, between=between
+        )
 
         self.assertEqual(result.values.tolist(), expected_result.values.tolist())
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_enrichment.py b/tests/test_enrichment.py
index cfd2e03..6c0930a 100644
--- a/tests/test_enrichment.py
+++ b/tests/test_enrichment.py
@@ -1,5 +1,9 @@
 import unittest
+
+import numpy as np
+import pandas as pd
 from scipy import stats
+
 import acore.enrichment_analysis as ea
 
 
@@ -7,9 +11,11 @@ class TestRunFisher(unittest.TestCase):
     def test_run_fisher(self):
         group1 = [10, 5]
         group2 = [8, 12]
-        alternative = 'two-sided'
+        alternative = "two-sided"
 
-        expected_odds, expected_pvalue = stats.fisher_exact([[10, 5], [8, 12]], alternative)
+        expected_odds, expected_pvalue = stats.fisher_exact(
+            [[10, 5], [8, 12]], alternative
+        )
 
         result = ea.run_fisher(group1, group2, alternative=alternative)
 
@@ -21,9 +27,11 @@ class TestRunKolmogorovSmirnov(unittest.TestCase):
     def test_run_kolmogorov_smirnov(self):
         dist1 = [1, 2, 3, 4, 5]
         dist2 = [1, 2, 3, 4, 6]
-        alternative = 'two-sided'
+        alternative = "two-sided"
 
-        expected_result = stats.ks_2samp(dist1, dist2, alternative=alternative, mode='auto')
+        expected_result = stats.ks_2samp(
+            dist1, dist2, alternative=alternative, mode="auto"
+        )
 
         result = ea.run_kolmogorov_smirnov(dist1, dist2, alternative=alternative)
 
@@ -31,5 +39,62 @@ def test_run_kolmogorov_smirnov(self):
         self.assertEqual(result[1], expected_result.pvalue)
 
 
-if __name__ == '__main__':
+def test__annotate_features():
+    expected = pd.Series(
+        [
+            "foreground",
+            "foreground",
+            "background",
+            "foreground",
+            "background",
+            "background",
+            np.nan,
+        ]
+    )
+
+    features = pd.Series(["G1", "G2", "G3", "G4", "G5", "G6", "G9"])
+    in_foreground = ["G1", "G2", "G4"]
+    in_background = ["G3", "G5", "G6"]
+    actual = ea._annotate_features(features, in_foreground, in_background)
+    pd.testing.assert_series_equal(expected, actual)
+
+
+def test_run_regulation_enrichment():
+    """Integration test for run_regulation_enrichment. Indirectly tests
+    run_enrichment from enrichment_analysis module."""
+    annotation = {
+        "annotation": ["path1", "path1", "path1", "path2", "path2", "path3", "path3"],
+        "identifier": ["gene1", "gene2", "gene3", "gene1", "gene5", "gene6", "gene9"],
+        "source": ["GO", "GO", "GO", "GO_P", "GO_P", "GO_P", "GO_P"],
+    }
+    annotation = pd.DataFrame(annotation)
+    regulation_res = {
+        "identifier": ["gene1", "gene2", "gene3", "gene4", "gene5", "gene6"],
+        "rejected": [True, True, False, False, True, True],
+    }
+    regulation_res = pd.DataFrame(regulation_res)
+
+    actual = ea.run_regulation_enrichment(
+        regulation_data=regulation_res,
+        annotation=annotation,
+        min_detected_in_set=1,
+    )
+
+    expected = pd.DataFrame(
+        {
+            "terms": ["path1", "path2", "path3"],
+            "identifiers": ["gene1,gene2", "gene1,gene5", "gene6"],
+            "foreground": [2, 2, 1],
+            "background": [1, 0, 0],
+            "foreground_pop": [4, 4, 4],
+            "background_pop": [6, 6, 6],
+            "pvalue": [1.0, 0.4666666666666667, 1.0],
+            "padj": [1.0, 1.0, 1.0],
+            "rejected": [False, False, False],
+        }
+    )
+    assert expected.equals(actual)
+
+
+if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_exploratory.py b/tests/test_exploratory.py
index ef84e3a..97dd673 100644
--- a/tests/test_exploratory.py
+++ b/tests/test_exploratory.py
@@ -1,94 +1,152 @@
-import unittest
 import platform
+import unittest
+
 import pandas as pd
+
 import acore.exploratory_analysis as ea
 
 
 class TestExtractMissing(unittest.TestCase):
     def setUp(self):
-        self.data = pd.DataFrame({
-            'group': ['A', 'A', 'B', 'B'],
-            'sample': ['S1', 'S2', 'S3', 'S4'],
-            'protein1': [1.4, 2.2, None, 4.2],
-            'protein2': [5.6, None, None, 8.1],
-            'protein3': [9.1, 10.01, 11.2, 12.9]
-        })
+        self.data = pd.DataFrame(
+            {
+                "group": ["A", "A", "B", "B"],
+                "sample": ["S1", "S2", "S3", "S4"],
+                "protein1": [1.4, 2.2, None, 4.2],
+                "protein2": [5.6, None, None, 8.1],
+                "protein3": [9.1, 10.01, 11.2, 12.9],
+            }
+        )
 
     def test_extract_number_missing(self):
         min_valid = 2
-        expected_result = ['protein1', 'protein3']
+        expected_result = ["protein1", "protein3"]
 
-        result = ea.extract_number_missing(self.data, min_valid, drop_cols=['sample'], group='group')
+        result = ea.extract_number_missing(
+            self.data, min_valid, drop_cols=["sample"], group="group"
+        )
 
         self.assertEqual(result, expected_result)
 
     def test_extract_percentage_missing(self):
         missing_max = 0.2
-        expected_result = ['protein1', 'protein3']
+        expected_result = ["protein1", "protein3"]
 
-        result = ea.extract_percentage_missing(self.data, missing_max, drop_cols=['sample'], group='group')
+        result = ea.extract_percentage_missing(
+            self.data, missing_max, drop_cols=["sample"], group="group"
+        )
 
         self.assertEqual(result, expected_result)
 
 
 class TestDimensionalityReduction(unittest.TestCase):
     def setUp(self):
-        self.data = pd.DataFrame({
-            'group': ['A', 'A', 'B', 'B'],
-            'protein1': [1.4, 2.2, 5.3, 4.2],
-            'protein2': [5.6, 0.3, 2.1, 8.1],
-            'protein3': [9.1, 10.01, 11.2, 12.9]
-        })
+        self.data = pd.DataFrame(
+            {
+                "group": ["A", "A", "B", "B"],
+                "protein1": [1.4, 2.2, 5.3, 4.2],
+                "protein2": [5.6, 0.3, 2.1, 8.1],
+                "protein3": [9.1, 10.01, 11.2, 12.9],
+            }
+        )
 
     def test_run_pca(self):
         components = 2
         expected_result = (
-            pd.DataFrame({
-                'group': ['A', 'A', 'B', 'B'],
-                'x': [0.877154, -3.883458, -1.550791, 4.557095],
-                'y': [-2.815737, -0.420979, 2.278209, 0.958507],
-            }),
-            pd.DataFrame({
-                'x': [0.958489, 0.092382, 0.269749],
-                'y': [-0.235412, 0.790179, 0.565861],
-                'value': [0.986975, 0.795561, 0.626868]
-            },
-                index=['protein2', 'protein1', 'protein3']),
-            pd.Series([0.71760534, 0.26139782])
+            pd.DataFrame(
+                {
+                    "group": ["A", "A", "B", "B"],
+                    "x": [0.877154, -3.883458, -1.550791, 4.557095],
+                    "y": [-2.815737, -0.420979, 2.278209, 0.958507],
+                }
+            ),
+            pd.DataFrame(
+                {
+                    "x": [0.958489, 0.092382, 0.269749],
+                    "y": [-0.235412, 0.790179, 0.565861],
+                    "value": [0.986975, 0.795561, 0.626868],
+                },
+                index=["protein2", "protein1", "protein3"],
+            ),
+            pd.Series([0.71760534, 0.26139782]),
+        )
+        exp_dict = {"x_title": "PC1 (0.72)", "y_title": "PC2 (0.26)", "group": "group"}
+        result, annotation = ea.run_pca(
+            self.data,
+            drop_cols=[],
+            annotation_cols=[],
+            group="group",
+            components=components,
+            dropna=True,
         )
-        exp_dict = {'x_title': 'PC1 (0.72)',
-                    'y_title': 'PC2 (0.26)', 'group': 'group'}
-        result, annotation = ea.run_pca(self.data, drop_cols=[], annotation_cols=[], group='group', components=components, dropna=True)
         assert annotation == exp_dict
         # results: tuple of 3 DFs: components, loadings, explained_variance
         pd.testing.assert_frame_equal(result[0], expected_result[0], check_exact=False)
         pd.testing.assert_frame_equal(result[1], expected_result[1], check_exact=False)
-        pd.testing.assert_series_equal(pd.Series(result[2]), expected_result[2], check_exact=False, check_dtype=False)
-        
+        pd.testing.assert_series_equal(
+            pd.Series(result[2]),
+            expected_result[2],
+            check_exact=False,
+            check_dtype=False,
+        )
+
     def test_run_tsne(self):
         components = 2
         # ! MAC ARM64 yields these results (using the similar installation)
-        expected_result = pd.DataFrame({
-            'group': ['A', 'A', 'B', 'B'],
-            'x': [-113.341728, 36.020717, 57.2992514, -134.729141],
-            'y': [131.169082, -59.616630, 110.601203, -39.086254]
-            })
+        expected_result = pd.DataFrame(
+            {
+                "group": ["A", "A", "B", "B"],
+                "x": [-113.341728, 36.020717, 57.2992514, -134.729141],
+                "y": [131.169082, -59.616630, 110.601203, -39.086254],
+            }
+        )
         # ! Ubuntu x64 yields these results:
-        if not platform.machine() == 'arm64':
-            expected_result = pd.DataFrame({
-                'group': ['A', 'A', 'B', 'B'],
-                'x': [-27.375560760498047, -7.526909828186035, -28.731882095336914, -8.872751235961914],
-                'y': [1.547648310661316, 0.17019487917423248, -18.321491241455078, -19.713821411132812]
-            })
-        result, _ = ea.run_tsne(self.data, drop_cols=['sample', 'subject'], group='group', components=components, perplexity=3, n_iter=1000, init='pca', dropna=True)
-        result = result['tsne']
-        pd.testing.assert_frame_equal(result, expected_result, check_exact=False, check_dtype=False)
+        if not platform.machine() == "arm64":
+            expected_result = pd.DataFrame(
+                {
+                    "group": ["A", "A", "B", "B"],
+                    "x": [
+                        -27.375560760498047,
+                        -7.526909828186035,
+                        -28.731882095336914,
+                        -8.872751235961914,
+                    ],
+                    "y": [
+                        1.547648310661316,
+                        0.17019487917423248,
+                        -18.321491241455078,
+                        -19.713821411132812,
+                    ],
+                }
+            )
+        result, _ = ea.run_tsne(
+            self.data,
+            drop_cols=["sample", "subject"],
+            group="group",
+            components=components,
+            perplexity=3,
+            n_iter=1000,
+            init="pca",
+            dropna=True,
+        )
+        result = result["tsne"]
+        pd.testing.assert_frame_equal(
+            result, expected_result, check_exact=False, check_dtype=False
+        )
 
     def test_run_umap(self):
-        _ = ea.run_umap(self.data, drop_cols=['sample', 'subject'], group='group', n_neighbors=10, min_dist=0.3, metric='cosine', dropna=True)
+        _ = ea.run_umap(
+            self.data,
+            drop_cols=["sample", "subject"],
+            group="group",
+            n_neighbors=10,
+            min_dist=0.3,
+            metric="cosine",
+            dropna=True,
+        )
 
         self.assertEqual(1, 1)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_multiple_testing.py b/tests/test_multiple_testing.py
new file mode 100644
index 0000000..0f339bc
--- /dev/null
+++ b/tests/test_multiple_testing.py
@@ -0,0 +1,45 @@
+import numpy as np
+import pytest
+
+from acore.multiple_testing import apply_pvalue_correction
+
+# benferoni correction
+array = [
+    0.1,
+    0.01,
+    0.2,
+    0.4,
+    np.nan,
+    0.5,
+]  # 0.04, 0.02, 0.0001]
+
+test_res = [
+    (
+        "b",
+        np.array([0.0, 1.0, 0.0, 0.0, np.nan, 0.0]),
+        np.array([0.5, 0.05, 1.0, 1.0, np.nan, 1.0]),
+    ),
+    (
+        "fdr_bh",
+        np.array(
+            [
+                0.0,
+                1.0,
+                0.0,
+                0.0,
+                np.nan,
+                0.0,
+            ]
+        ),
+        np.array([0.25, 0.05, 0.33333333, 0.5, np.nan, 0.5]),
+    ),
+]
+
+
+@pytest.mark.parametrize("method,exp_rejected,exp_pvalues", test_res)
+def test_apply_pvalue_correction_alpha_5_percent(method, exp_rejected, exp_pvalues):
+    act_rejected, act_pvalues = apply_pvalue_correction(
+        array, alpha=0.05, method=method
+    )
+    np.testing.assert_array_equal(act_rejected, exp_rejected)
+    np.testing.assert_array_almost_equal(act_pvalues, exp_pvalues)