Source code for tdm.preprocess.ki67

import pandas as pd
import numpy as np
from tdm.preprocess.single_cell_df import CELL_TYPE_COL, KI67_COL


def _transform_ki67_series(x: np.ndarray, typical_noise=0.5, drop_values_below_noise: bool = False) -> np.ndarray:
    """Selects values greater than typical_noise, subtracts typical_noise and divides by the standard deviation of
    the shifted selected values.

    Args:
        x (np.ndarray): ki67 values
        typical_noise (float, optional): typical magnitude of noise. Defaults to 0.5.

    Returns:
        np.ndarray: transformed ki67 values
    """
    x = x - typical_noise
    std = x[x > 0].std()  # standard deviation of values above noise
    x = x / std

    if drop_values_below_noise:
        x = x[x > 0]
    else:
        x[x < 0] = 0

    return x


[docs] def transform_ki67( single_cell_df: pd.DataFrame, typical_noise: float = 0.5, ki67_col: str = KI67_COL, cell_type_col: str = CELL_TYPE_COL, ): """Return a single-cell dataframe with standardized Ki67 values above noise, the transformed distributions should have similar shapes. Args: single_cell_df (pd.DataFrame): dataframe with row per cell, columns for cell type and Ki67 values. typical_noise (float, optional): magnitude of typical noise in the data. See example plot for finding the typical noise in :ref:`tutorial 01<tutorials>`. ki67_col (str): name of the column with Ki67 values. cell_type_col (str): name of the column with cell types. Note: The transformed values should have similar distributions accross different cell types. To plot the transformed values: .. code-block:: python from tdm.preprocess.ki67 import transform_ki67, plot_marker_distributions transformed_ki67_single_cell_df = transform_ki67(single_cell_df) plot_marker_distributions(transformed_ki67_single_cell_df, ki67_col) Returns: _type_: _description_ """ transformed_df = ( single_cell_df.groupby(cell_type_col) .apply(lambda g: _transform_ki67_series(g[ki67_col], typical_noise=typical_noise, drop_values_below_noise=True)) .reset_index() .drop(columns="level_1") ) return transformed_df
def _compute_ki67_division_cutoff_series( single_cell_df: pd.Series, ki67_col: str, typical_noise: float, ki67_threshold: float ): """Computes the ki67 cutoff for division. Args: single_cell_df (pd.Series): single-cell dataframe for one cell type. ki67_col (str): column with the non-transformed ki67 values typical_noise (float): magnitude of typical noise in the data. See example plot for finding the typical noise in :ref:`tutorial 01<tutorials>`. ki67_threshold (float): _description_ Returns: _type_: _description_ """ x = single_cell_df[ki67_col] std = np.std(x[x > typical_noise] - typical_noise) return (ki67_threshold * std) + typical_noise def _compute_ki67_division_cutoffs( single_cell_df: pd.DataFrame, typical_noise: float, ki67_col: str, ki67_threshold: float = 0.5, cell_type_col: str = "cell_type", ) -> pd.DataFrame: def func(df): return _compute_ki67_division_cutoff_series( df, ki67_col=ki67_col, typical_noise=typical_noise, ki67_threshold=ki67_threshold ) return single_cell_df.groupby(cell_type_col).apply(func, include_groups=False)
[docs] def is_dividing(single_cell_df: pd.DataFrame, typical_noise: float, ki67_threshold: float, ki67_col: str) -> pd.Series: r"""Compute a binary division label for each cell. Computes a cutoff for each cell-type: .. math:: \\text{cutoff} = K \cdot \sigma + N Where :math:`K` is the ki67 threshold, :math:`N` is the typical noise level and :math:`\sigma` is the standard deviation of ki67 values above typical noise, after subtracting the noise level. Args: single_cell_df (pd.DataFrame): dataframe with row per cell, columns for cell type and Ki67 values. typical_noise (float): magnitude of typical noise in the data. See example plot for finding the typical noise in :ref:`tutorial 01<tutorials>`. ki67_threshold (float): fraction of standard deviation above noise to use as the cutoff for division. ki67_col (str): name of the column with Ki67 values. Returns: pd.Series: boolean series with True for dividing cells. """ ki67_threshold_per_type = _compute_ki67_division_cutoffs( single_cell_df=single_cell_df, typical_noise=typical_noise, ki67_threshold=ki67_threshold, ki67_col=ki67_col ) return single_cell_df[ki67_col] > single_cell_df.cell_type.map(ki67_threshold_per_type)