{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Cross Validation" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "\n", "\n", "from tdm.raw.breast_mibi import read_single_cell_df\n", "from tdm.model.selection import cross_validation2, plot_cross_validation_result\n", "from tdm.cell_types import FIBROBLAST, MACROPHAGE, TUMOR, ENDOTHELIAL\n", "\n", "single_cell_df = read_single_cell_df()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Define the settings to compare by constructing a dictionary that maps a setting name, to kwargs for the model and feature transforms:" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "setting_dicts = {\n", " 'degree 2': {\n", " 'model_kwargs': {}, # see: LogisticRegressionModel for more options such as regularization\n", " 'polynomial_dataset_kwargs': {'degree': 2} # see: PolynomialDataset for more feature transformations\n", " },\n", " 'degree 1': {\n", " 'model_kwargs': {},\n", " 'polynomial_dataset_kwargs': {'degree': 1},\n", " }\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Run the cross validation (note: partitions the dataset at the level of tissues, not cells)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 3/3 [00:03<00:00, 1.29s/it]\n" ] } ], "source": [ "res = cross_validation2(\n", " single_cell_df, \n", " model_dicts=setting_dicts,\n", " cell_types_to_model=[FIBROBLAST, MACROPHAGE],\n", " allowed_neighbor_types=[FIBROBLAST, MACROPHAGE, TUMOR, ENDOTHELIAL],\n", " neighborhood_mode='extrapolate',\n", " n_splits=3\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Plot the result:" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plot_cross_validation_result(res)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "There are no major differences over the settings above, for simplicity we can take the best performing model on average over all cell types:" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "setting\n", "degree 2 0.081324\n", "degree 1 0.081365\n", "Name: loss, dtype: float64" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res.groupby('setting').loss.mean().sort_values()" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.1" } }, "nbformat": 4, "nbformat_minor": 2 }