Source code for athena.metrics.heterogeneity.metrics

# %%
from .base_metrics import _shannon, _richness, _simpson, _shannon_evenness, _hill_number, \
    _simpson_evenness, _gini_simpson, _renyi, _abundance, _quadratic_entropy

from ...utils.general import is_categorical, make_iterable, is_numeric

import numpy as np
import pandas as pd
from collections import Counter

from sklearn.preprocessing import StandardScaler


# %%

[docs]def richness(so, spl: str, attr: str, *, local=True, key_added=None, graph_key='knn', inplace=True) -> None: """Computes the richness on the observation or the sample level Args: so: SpatialOmics instance spl: Spl for which to compute the metric attr: Categorical feature in SpatialOmics.obs to use for the grouping local: Whether to compute the metric on the observation or the sample level key_added: Key added to either obs or spl depending on the choice of `local` graph_key: Specifies the graph representation to use in so.G[spl] if `local=True`. inplace: Whether to add the metric to the current SpatialOmics instance or to return a new one. Examples: .. code-block:: python so = sh.dataset.imc() spl = so.spl.index[0] sh.metrics.richness(so, spl, 'meta_id', local=False) sh.metrics.richness(so, spl, 'meta_id', local=True) """ if key_added is None: key_added = 'richness' key_added = f'{key_added}_{attr}' if local: key_added += f'_{graph_key}' metric = _richness kwargs_metric = {} return _compute_metric(so=so, spl=spl, attr=attr, key_added=key_added, graph_key=graph_key, metric=metric, kwargs_metric=kwargs_metric, local=local, inplace=inplace)
[docs]def shannon(so, spl: str, attr: str, *, local=True, key_added=None, graph_key='knn', base=2, inplace=True) -> None: """Computes the Shannon Index on the observation or the sample level Args: so: SpatialOmics instance spl: Spl for which to compute the metric attr: Categorical feature in SpatialOmics.obs to use for the grouping local: Whether to compute the metric on the observation or the sample level key_added: Key added to either obs or spl depending on the choice of `local` graph_key: Specifies the graph representation to use in so.G[spl] if `local=True`. inplace: Whether to add the metric to the current SpatialOmics instance or to return a new one. Examples: .. code-block:: python so = sh.dataset.imc() spl = so.spl.index[0] sh.metrics.shannon(so, spl, 'meta_id', local=False) sh.metrics.shannon(so, spl, 'meta_id', local=True) """ if key_added is None: key_added = 'shannon' key_added = f'{key_added}_{attr}' if local: key_added += f'_{graph_key}' metric = _shannon kwargs_metric = {'base': base} return _compute_metric(so=so, spl=spl, attr=attr, key_added=key_added, graph_key=graph_key, metric=metric, kwargs_metric=kwargs_metric, local=local, inplace=inplace)
[docs]def simpson(so, spl: str, attr: str, *, local=True, key_added=None, graph_key='knn', inplace=True) -> None: """Computes the Simpson Index on the observation or the sample level Args: so: SpatialOmics instance spl: Spl for which to compute the metric attr: Categorical feature in SpatialOmics.obs to use for the grouping local: Whether to compute the metric on the observation or the sample level key_added: Key added to either obs or spl depending on the choice of `local` graph_key: Specifies the graph representation to use in so.G[spl] if `local=True`. inplace: Whether to add the metric to the current SpatialOmics instance or to return a new one. Examples: .. code-block:: python so = sh.dataset.imc() spl = so.spl.index[0] sh.metrics.simpson(so, spl, 'meta_id', local=False) sh.metrics.simpson(so, spl, 'meta_id', local=True) """ if key_added is None: key_added = 'simpson' key_added = f'{key_added}_{attr}' if local: key_added += f'_{graph_key}' metric = _simpson kwargs_metric = {} return _compute_metric(so=so, spl=spl, attr=attr, key_added=key_added, graph_key=graph_key, metric=metric, kwargs_metric=kwargs_metric, local=local, inplace=inplace)
[docs]def hill_number(so, spl: str, attr: str, q: float, *, local=True, key_added=None, graph_key='knn', inplace=True): """Computes the Hill Numbers on the observation or the sample level Args: so: SpatialOmics instance spl: Spl for which to compute the metric attr: Categorical feature in SpatialOmics.obs to use for the grouping q: The hill coefficient as defined here_. local: Whether to compute the metric on the observation or the sample level key_added: Key added to either obs or spl depending on the choice of `local` graph_key: Specifies the graph representation to use in so.G[spl] if `local=True`. inplace: Whether to add the metric to the current SpatialOmics instance or to return a new one. Examples: .. code-block:: python so = sh.dataset.imc() spl = so.spl.index[0] sh.metrics.hill_number(so, spl, 'meta_id', q=2, local=False) sh.metrics.hill_number(so, spl, 'meta_id', q=2, local=True) """ if key_added is None: key_added = 'hill_number' key_added = f'{key_added}_{attr}_q{q}' if local: key_added += f'_{graph_key}' metric = _hill_number kwargs_metric = {'q': q} return _compute_metric(so=so, spl=spl, attr=attr, key_added=key_added, graph_key=graph_key, metric=metric, kwargs_metric=kwargs_metric, local=local, inplace=inplace)
[docs]def renyi_entropy(so, spl: str, attr: str, q: float, *, local=True, key_added=None, graph_key='knn', base=2, inplace=True): """Computes the Renyi-Entropy. Args: so: SpatialOmics instance spl: Spl for which to compute the metric attr: Categorical feature in SpatialOmics.obs to use for the grouping q: The renyi coefficient as defined here_ local: Whether to compute the metric on the observation or the sample level key_added: Key added to either obs or spl depending on the choice of `local` graph_key: Specifies the graph representation to use in so.G[spl] if `local=True`. inplace: Whether to add the metric to the current SpatialOmics instance or to return a new one. Examples: .. code-block:: python so = sh.dataset.imc() spl = so.spl.index[0] sh.metrics.renyi_entropy(so, spl, 'meta_id', q=2, local=False) sh.metrics.renyi_entropy(so, spl, 'meta_id', q=2, local=True) .. _here: https://ai4scr.github.io/ATHENA/source/methodology.html """ if key_added is None: key_added = 'renyi' key_added = f'{key_added}_{attr}_q{q}' if local: key_added += f'_{graph_key}' metric = _renyi kwargs_metric = {'q': q, 'base': base} return _compute_metric(so=so, spl=spl, attr=attr, key_added=key_added, graph_key=graph_key, metric=metric, kwargs_metric=kwargs_metric, local=local, inplace=inplace)
[docs]def quadratic_entropy(so, spl: str, attr: str, *, metric='minkowski', metric_kwargs={}, scale: bool = True, local=True, key_added=None, graph_key='knn', inplace=True): """Computes the quadratic entropy, taking relative abundance and similarity between observations into account. Args: so: SpatialOmics instance spl: Spl for which to compute the metric attr: Categorical feature in SpatialOmics.obs to use for the grouping metric: metric used to compute distance of observations in the features space so.X[spl] metric_kwargs: key word arguments for metric scale: whether to scale features of observations to unit variance and 0 mean local: whether to compute the metric on the observation or the sample level key_added: Key added to either obs or spl depending on the choice of `local` graph_key: Specifies the graph representation to use in so.G[spl] if `local=True`. inplace: Whether to add the metric to the current SpatialOmics instance or to return a new one. Notes: The implementation computes an average feature vector for each group in attr based on all observations in the sample. Thus, if staining biases across samples exists this will directly distort this metric. Examples: .. code-block:: python so = sh.dataset.imc() spl = so.spl.index[0] sh.metrics.quadratic_entropy(so, spl, 'meta_id', local=False) sh.metrics.quadratic_entropy(so, spl, 'meta_id', local=True) """ if key_added is None: key_added = 'quadratic' key_added = f'{key_added}_{attr}' if local: key_added += f'_{graph_key}' # collect feature vectors of all observations and add attr grouping features: pd.DataFrame = so.X[spl] features = features.merge(so.obs[spl][attr], right_index=True, left_index=True) assert len(features) == len(so.X[spl]), 'inner merge resulted in dropped index ids' # compute average feature vector for each attr group and standardise features = features.groupby(attr).mean() if scale: tmp = StandardScaler().fit_transform(features) features = pd.DataFrame(tmp, index=features.index, columns=features.columns) base_metric = _quadratic_entropy kwargs_metric = {'features': features, 'metric': metric, 'metric_kwargs': metric_kwargs, 'scale': False} # we scaled already return _compute_metric(so=so, spl=spl, attr=attr, key_added=key_added, graph_key=graph_key, metric=base_metric, kwargs_metric=kwargs_metric, local=local, inplace=inplace)
[docs]def abundance(so, spl: str, attr: str, *, mode='proportion', key_added: str = None, graph_key='knn', local=False, inplace: bool = True): """Computes the abundance of species on the observation or the sample level. Args: so: SpatialOmics instance spl: Spl for which to compute the metric attr: Categorical feature in SpatialOmics.obs to use for the grouping local: Whether to compute the metric on the observation or the sample level key_added: Key added to either uns[spl] or obs depending on the choice of `local` graph_key: Specifies the graph representation to use in so.G[spl] if `local=True`. inplace: Whether to add the metric to the current SpatialOmics instance or to return a new one. Examples: .. code-block:: python so = sh.dataset.imc() spl = so.spl.index[0] sh.metrics.abundance(so, spl, 'meta_id', local=False) sh.metrics.abundance(so, spl, 'meta_id', local=True) """ if key_added is None: key_added = f'{mode}' if local: key_added += f'_{graph_key}' event_space = so.obs[spl][attr] if is_categorical(event_space): event_space = event_space.dtypes.categories else: raise TypeError(f'{attr} is not categorical') metric = _abundance kwargs_metric = {'event_space': event_space, 'mode': mode} return _compute_metric(so=so, spl=spl, attr=attr, key_added=key_added, metric=metric, graph_key=graph_key, kwargs_metric=kwargs_metric, local=local, inplace=inplace)
def _compute_metric(so, spl: str, attr, key_added, graph_key, metric, kwargs_metric, local, inplace=True): """Computes the given metric for each observation or the sample """ # generate a copy if necessary so = so if inplace else so.copy() # extract relevant categorisation data = so.obs[spl][attr] if not is_categorical(data): raise TypeError('`attr` needs to be categorical') if local: # get graph g = so.G[spl][graph_key] # compute metric for each observation res = [] observation_ids = so.obs[spl].index for observation_id in observation_ids: n = list(g.neighbors(observation_id)) if len(n) == 0: res.append(0) continue counts = Counter(data.loc[n].values) res.append(metric(counts, **kwargs_metric)) if np.ndim(res[0]) > 0: res = pd.DataFrame(res, index=observation_ids) if spl not in so.obsm: so.obsm[spl] = {} so.obsm[spl][key_added] = res else: res = pd.DataFrame({key_added: res}, index=observation_ids) if key_added in so.obs[spl]: # drop previous computation of metric so.obs[spl].drop(key_added, axis=1, inplace=True) so.obs[spl] = pd.concat((so.obs[spl], res), axis=1) else: res = metric(Counter(data), **kwargs_metric) if np.ndim(res) > 0: if spl not in so.uns: so.uns[spl] = {} so.uns[spl][key_added] = res else: so.spl.loc[spl, key_added] = res if not inplace: return so