Source code for tdm.preprocess.ki67
import pandas as pd
import numpy as np
from tdm.preprocess.single_cell_df import CELL_TYPE_COL, KI67_COL
def _transform_ki67_series(x: np.ndarray, typical_noise=0.5, drop_values_below_noise: bool = False) -> np.ndarray:
"""Selects values greater than typical_noise, subtracts typical_noise and divides by the standard deviation of
the shifted selected values.
Args:
x (np.ndarray): ki67 values
typical_noise (float, optional): typical magnitude of noise. Defaults to 0.5.
Returns:
np.ndarray: transformed ki67 values
"""
x = x - typical_noise
std = x[x > 0].std() # standard deviation of values above noise
x = x / std
if drop_values_below_noise:
x = x[x > 0]
else:
x[x < 0] = 0
return x
def _compute_ki67_division_cutoff_series(
single_cell_df: pd.Series, ki67_col: str, typical_noise: float, ki67_threshold: float
):
"""Computes the ki67 cutoff for division.
Args:
single_cell_df (pd.Series): single-cell dataframe for one cell type.
ki67_col (str): column with the non-transformed ki67 values
typical_noise (float): magnitude of typical noise in the data. See example plot for finding the typical noise
in :ref:`tutorial 01<tutorials>`.
ki67_threshold (float): _description_
Returns:
_type_: _description_
"""
x = single_cell_df[ki67_col]
std = np.std(x[x > typical_noise] - typical_noise)
return (ki67_threshold * std) + typical_noise
def _compute_ki67_division_cutoffs(
single_cell_df: pd.DataFrame,
typical_noise: float,
ki67_col: str,
ki67_threshold: float = 0.5,
cell_type_col: str = "cell_type",
) -> pd.DataFrame:
def func(df):
return _compute_ki67_division_cutoff_series(
df, ki67_col=ki67_col, typical_noise=typical_noise, ki67_threshold=ki67_threshold
)
return single_cell_df.groupby(cell_type_col).apply(func, include_groups=False)
[docs]
def is_dividing(single_cell_df: pd.DataFrame, typical_noise: float, ki67_threshold: float, ki67_col: str) -> pd.Series:
r"""Compute a binary division label for each cell.
Computes a cutoff for each cell-type:
.. math::
\\text{cutoff} = K \cdot \sigma + N
Where :math:`K` is the ki67 threshold, :math:`N` is the typical noise level and :math:`\sigma` is the standard
deviation of ki67 values above typical noise, after subtracting the noise level.
Args:
single_cell_df (pd.DataFrame): dataframe with row per cell, columns for cell type and Ki67 values.
typical_noise (float): magnitude of typical noise in the data. See example plot for finding the typical noise
in :ref:`tutorial 01<tutorials>`.
ki67_threshold (float): fraction of standard deviation above noise to use as the cutoff for division.
ki67_col (str): name of the column with Ki67 values.
Returns:
pd.Series: boolean series with True for dividing cells.
"""
ki67_threshold_per_type = _compute_ki67_division_cutoffs(
single_cell_df=single_cell_df, typical_noise=typical_noise, ki67_threshold=ki67_threshold, ki67_col=ki67_col
)
return single_cell_df[ki67_col] > single_cell_df.cell_type.map(ki67_threshold_per_type)