Figure 3c, d#

[1]:
%load_ext nb_black

import ABCParse
import anndata
import cellplots as cp
import larry
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
import pandas as pd
import scdiffeq as sdq
import scdiffeq_analyses as sdq_an
import scipy.stats
import seaborn as sns
import torch

from typing import Any, Dict, List, Optional
[2]:
h5ad_path = (
    "/home/mvinyard/data/adata.reprocessed_19OCT2023.more_feature_inclusive.h5ad"
)
adata = sdq.io.read_h5ad(h5ad_path)
larry_cmap = larry.pl.InVitroColorMap()._dict

PCA = sdq.io.read_pickle("/home/mvinyard/data/pca_model.pkl")
UMAP = sdq.io.read_pickle("/home/mvinyard/data/umap_model.pkl")
AnnData object with n_obs × n_vars = 130887 × 2492
    obs: 'Library', 'Cell barcode', 'Time point', 'Starting population', 'Cell type annotation', 'Well', 'SPRING-x', 'SPRING-y', 'clone_idx', 'fate_observed', 't0_fated', 'train'
    var: 'gene_ids', 'hv_gene', 'must_include', 'exclude', 'use_genes'
    uns: 'fate_counts', 'h5ad_path', 'time_occupance'
    obsm: 'X_clone', 'X_pca', 'X_umap', 'cell_fate_df'
    layers: 'X_scaled'
/home/mvinyard/.anaconda3/envs/sdq-dev/lib/python3.9/site-packages/sklearn/base.py:318: UserWarning: Trying to unpickle estimator PCA from version 1.0.2 when using version 1.2.2. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
  warnings.warn(
[3]:
project_path = "/home/mvinyard/experiments/LARRY.full_dataset/v1/LightningSDE-FixedPotential-RegularizedVelocityRatio/"
project = sdq.io.Project(path=project_path)
best_ckpts = sdq_an.parsers.best_checkpoints(project)
best_ckpts
[3]:
train test ckpt_path epoch
version_0 0.571656 0.551804 /home/mvinyard/experiments/LARRY.full_dataset/... 2500
version_1 0.541401 0.465658 /home/mvinyard/experiments/LARRY.full_dataset/... 1706
version_2 0.547771 0.499418 /home/mvinyard/experiments/LARRY.full_dataset/... 1238
version_3 0.496815 0.504075 /home/mvinyard/experiments/LARRY.full_dataset/... 1245
version_4 0.562102 0.522701 /home/mvinyard/experiments/LARRY.full_dataset/... 1662
[4]:
Perturbed = {}
for version, row in best_ckpts.iterrows():
    model = sdq.io.load_model(adata=adata, ckpt_path=row["ckpt_path"])
    break
 - [INFO] | Input data configured.
 - [INFO] | Bulding Annoy kNN Graph on adata.obsm['train']
Seed set to 0
 - [INFO] | Using the specified parameters, LightningSDE-FixedPotential-RegularizedVelocityRatio has been called.
[5]:
result = sdq.tl.perturb_scan_z_range(
    adata=adata,
    model=model,
    obs_key="Cell type annotation",
    subset_key="Cell type annotation",
    subset_val="Undifferentiated",
    genes=["Cebpe", "Dach1", "Lmo4", "Mxd1"],
    t_sim=torch.linspace(2, 6, 41),
    z_range=np.linspace(-10, 10, 9),
    PCA=PCA,
    N=200,
    replicates=10,
    seed=0,
)
[6]:
mon = {key: val.prtb.loc["Monocyte"].values.tolist() for key, val in result.items()}
neu = {key: val.prtb.loc["Neutrophil"].values.tolist() for key, val in result.items()}
[7]:
class StylishBoxPlot(ABCParse.ABCParse):
    def __init__(
        self,
        colors: Optional[List[str]] = None,
        widths: Optional[float] = None,
        scatter_kw={
            "alpha": 0.8,
            "s": 25,
        },
        *args,
        **kwargs
    ):
        self.__parse__(locals())

    @property
    def colors(self):
        if not hasattr(self, "_colors") or self._colors is None:
            self._colors = list(cm.tab20.colors)
        return self._colors

    def _background_scatter(self, ax, data):
        for en, (key, val) in enumerate(data.items()):

            x = [key] * len(val)

            if len(x) > 1:
                x_vals = en + 1 + (np.random.random(len(x)) - 0.5) / 5
            else:
                x_vals = en + 1

            ax.scatter(
                x_vals,
                val,
                color=self.colors[en],
                zorder=0,
                ec="None",
                rasterized=False,
                **self._scatter_kw,
            )

    def _background_boxplot(self, ax, data):

        x = list(data.keys())
        y = list(data.values())

        x = np.arange(len(y)) + 1

        bp = ax.boxplot(
            y,
            positions=x,
            patch_artist=True,
            showmeans=True,
            showfliers=False,
            meanline=True,
            zorder=1,
            widths=self._widths,
        )
        for median in bp["medians"]:
            median.set_visible(False)
        for en, mean in enumerate(bp["means"]):
            mean.set_c(self.colors[en])

        for en, box in enumerate(bp["boxes"]):
            box.set_facecolor(self.colors[en])
            box.set_alpha(0.2)

        for en, whisker in enumerate(bp["whiskers"]):
            whisker.set_c("None")

        for en, cap in enumerate(bp["caps"]):
            cap.set_c("None")

    def _foreground_boxplot(self, ax, data):

        y = list(data.values())
        x = list(data.keys())
        x = np.arange(len(y)) + 1
        bp = ax.boxplot(
            y,
            positions=x,
            patch_artist=True,
            showmeans=True,
            showfliers=False,
            meanline=True,
            zorder=2,
            widths=self._widths,
        )
        for en, box in enumerate(bp["boxes"]):
            box.set_facecolor(self.colors[en])
            box.set_edgecolor("k")
            box.set_linewidth(0.5)

        colors_ = np.repeat(
            np.array(self.colors), 2, axis=0
        )  # list(np.repeat(self.colors, 2))
        for en, whisker in enumerate(bp["whiskers"]):
            whisker.set_c("k")

        for en, cap in enumerate(bp["caps"]):
            cap.set_c("k")
            cap.set_linewidth(0.5)

        for median in bp["medians"]:
            median.set_visible(False)
        for en, mean in enumerate(bp["means"]):
            mean.set_c("k")
            mean.set_linewidth(0.5)

    def __call__(self, ax, data, *args, **kwargs):

        self.__update__(locals())

        #         try:
        #             self._background_scatter(ax, data)
        #         except:
        #             print(data)
        #         self._background_boxplot(ax, data)
        self._foreground_boxplot(ax, data)
[8]:
x = [0, 2.5, 5, 7.5, 10, -2.5, -5, -7.5, -10]
mon_x = {xi: mon[xi] for xi in x}
neu_x = {xi: neu[xi] for xi in x}
[9]:
colors = [
    "#f5f6f6",
    "#96d2b1",
    "#4FB587",
    "#377D5D",
    "#1E4433",
    "#f48da2",
    "#C81F4C",
    "#911738",
    "#5A0F24",
]
[10]:
fig, axes = cp.plot(
    nplots=2,
    ncols=2,
    wspace=0.1,
    y_label=["Fraction Neutrophils", "Fraction Monocytes"],
    title=[
        "Ensemble perturbation: Lmo4/Cepbe/Mxd1/Dach1",
    ],
    x_label=["Z-score"],
)
bp = StylishBoxPlot(colors=colors)
bp(ax=axes[0], data=neu_x)
bp = StylishBoxPlot(colors=colors)
bp(ax=axes[1], data=mon_x)
for ax in axes:
    ax.set_ylim(0, 0.6)
    ax.set_xticks(range(1, len(x) + 1))
    ax.set_xticklabels([str(xi) for xi in x])

plt.savefig("Figure3CD.LARRY.z_scan.tf_perturbation.svg", dpi=500)
../_images/_analyses_Figure3CD_10_0.png

Statistics#

Stats: Z=-10.0#

[11]:
result[-10].stats
[11]:
lfc lfc_std pval
Baso -0.344163 0.464065 6.334807e-02
Ccr7_DC -2.225350 7.037174 3.434364e-01
Eos 0.100000 10.728173 6.619521e-01
Lymphoid -2.225350 12.632113 5.565650e-01
Mast 0.049185 0.542898 7.071420e-01
Meg -0.388254 0.996793 2.465738e-01
Monocyte 0.503497 0.165400 7.803902e-07
Neutrophil -0.716182 0.219733 2.822856e-08
Undifferentiated 0.213196 0.294746 5.978327e-02

Stats: Z=-7.5#

[12]:
result[-7.5].stats
[12]:
lfc lfc_std pval
Baso -0.357732 0.426074 0.063345
Ccr7_DC -2.225350 7.037174 0.343436
Eos 2.325350 12.810971 0.411768
Lymphoid 0.000000 14.835665 1.000000
Mast -0.101994 0.642909 0.666921
Meg -0.293650 0.895389 0.322376
Monocyte 0.388769 0.169799 0.000030
Neutrophil -0.507302 0.225406 0.000010
Undifferentiated 0.257598 0.283954 0.006670

Stats: Z=-5.0#

[13]:
result[-5].stats
[13]:
lfc lfc_std pval
Baso -0.298958 0.320706 0.050857
Ccr7_DC -2.225350 7.037174 0.343436
Eos 2.325350 12.810971 0.411768
Lymphoid -2.225350 12.632113 0.556565
Mast 0.051255 0.516998 0.729008
Meg -0.242112 0.808106 0.291466
Monocyte 0.290306 0.201228 0.000859
Neutrophil -0.250560 0.172126 0.000836
Undifferentiated 0.088532 0.272041 0.335660

Stats: Z=-2.5#

[14]:
result[-2.5].stats
[14]:
lfc lfc_std pval
Baso -0.285827 0.335098 0.069205
Ccr7_DC -2.225350 7.037174 0.343436
Eos 4.550699 14.217715 0.239034
Lymphoid -2.225350 12.632113 0.556565
Mast 0.078241 0.510552 0.565435
Meg -0.134875 0.656334 0.495795
Monocyte 0.127975 0.194121 0.177587
Neutrophil -0.173706 0.161797 0.014792
Undifferentiated 0.183716 0.282854 0.045200

Stats: Z=0 (negative control)#

[15]:
result[0].stats
[15]:
lfc lfc_std pval
Baso -0.329532 0.350264 0.062290
Ccr7_DC -2.225350 7.037174 0.343436
Eos 4.550699 14.217715 0.239034
Lymphoid -2.225350 12.632113 0.556565
Mast 0.123756 0.447023 0.393449
Meg -0.093901 0.579420 0.562036
Monocyte 0.001702 0.232710 1.000000
Neutrophil 0.043065 0.150576 0.484823
Undifferentiated 0.058939 0.267663 0.521007

Stats: Z=+2.5#

[16]:
result[2.5].stats
[16]:
lfc lfc_std pval
Baso -0.403819 0.333990 0.009848
Ccr7_DC -2.225350 7.037174 0.343436
Eos 9.159895 15.814315 0.065820
Lymphoid -2.225350 12.632113 0.556565
Mast 0.054175 0.527476 0.693671
Meg -0.179211 0.692902 0.425182
Monocyte -0.166468 0.163012 0.073626
Neutrophil 0.239194 0.105876 0.001354
Undifferentiated 0.001625 0.289195 1.000000

Stats: Z=+5.0#

[17]:
result[5].stats
[17]:
lfc lfc_std pval
Baso -0.267582 0.372348 0.124908
Ccr7_DC -2.225350 7.037174 0.343436
Eos 2.383846 12.918037 0.370888
Lymphoid -2.225350 12.632113 0.556565
Mast 0.004420 0.582175 0.900783
Meg 0.036657 0.728058 0.769546
Monocyte -0.418199 0.278190 0.000333
Neutrophil 0.353356 0.106797 0.000005
Undifferentiated -0.026492 0.247384 0.851797

Stats: Z=+7.5#

[18]:
result[7.5].stats
[18]:
lfc lfc_std pval
Baso -0.407182 0.371657 1.167101e-02
Ccr7_DC -2.225350 7.037174 3.434364e-01
Eos 2.325350 12.810971 4.117681e-01
Lymphoid -2.225350 12.632113 5.565650e-01
Mast -0.005319 0.627175 9.086113e-01
Meg 0.103583 0.735823 5.163128e-01
Monocyte -0.611540 0.285579 8.386557e-06
Neutrophil 0.495526 0.076474 2.386503e-07
Undifferentiated -0.089762 0.348114 3.469466e-01

Stats: Z=+10.0#

[19]:
result[10].stats
[19]:
lfc lfc_std pval
Baso -0.474214 0.246406 3.394814e-04
Ccr7_DC -2.225350 7.037174 3.434364e-01
Eos 6.776049 10.913870 1.274668e-01
Lymphoid -2.225350 12.632113 5.565650e-01
Mast 0.047206 0.632534 6.553308e-01
Meg 0.110495 0.555091 5.748016e-01
Monocyte -0.988180 0.285764 7.556143e-08
Neutrophil 0.687548 0.100663 1.406881e-10
Undifferentiated -0.218872 0.199491 2.798487e-02