Figure 3H, I#

[1]:
%load_ext nb_black

import scdiffeq as sdq
import pathlib
import matplotlib.pyplot as plt
import cellplots as cp
import ABCParse

print(sdq.__version__, sdq.__path__)
0.1.2rc0 ['/home/mvinyard/github/scDiffEq/scdiffeq']

Load data, dimension reduction models#

[2]:
data_dir = pathlib.Path("/home/mvinyard/data/scdiffeq_data/human_hematopoiesis/")
h5ad_path = data_dir.joinpath("human_hematopoiesis.preprocessed.h5ad")
pca_model_path = data_dir.joinpath("human_hematopoiesis.pca_model.pkl")
umap_model_path = data_dir.joinpath("human_hematopoiesis.umap_model.pkl")

adata = sdq.io.read_h5ad(h5ad_path)
pca = sdq.io.read_pickle(pca_model_path)
umap = sdq.io.read_pickle(umap_model_path)
AnnData object with n_obs × n_vars = 1947 × 2477
    obs: 'batch', '_time', 'nGenes', 'UMI', 't', 'n_genes', 'cell_type'
    var: 'gene_name', 'n_cells', 'mean', 'std'
    uns: 'log1p', 'h5ad_path'
    obsm: 'X_pca', 'X_umap'
    layers: 'X_l_TC', 'X_n_TC', 'X_orig', 'ambiguous', 'labeled_TC', 'sl_TC', 'sn_TC', 'spliced', 'total', 'ul_TC', 'un_TC', 'unlabeled_TC', 'unspliced'

Define some helper functions#

[3]:
class ManuscriptFigurePlot(ABCParse.ABCParse):
    def __init__(self, cmap=None, color_quantile_cutoff: float = 0.01, *args, **kwargs):

        self.__parse__(locals())

    @property
    def cmap(self):
        if self._cmap is None:
            self._cmap = {
                "HSC": "#023047",
                "MEP-like": "#fb8500",
                "GMP-like": "#126782",
                "Bas": "#219ebc",
                "Mon": "#58b4d1",
                "Neu": "#8ecae6",
                "Meg": "#ffb703",
                "Ery": "#fd9e02",
            }
        return self._cmap

    @property
    def scatter_kwargs(self):
        return {
            "s": 80,
            "ec": "None",
            "alpha": 0.4,
            "vmin": self._adata.obs[self._c].quantile(self._color_quantile_cutoff),
            "vmax": self._adata.obs[self._c].quantile(1 - self._color_quantile_cutoff),
        }

    def plot_manifold(
        self,
        adata,
        groupby: str = "cell_type",
        s_background=350,
        s_cover=200,
        *args,
        **kwargs
    ):
        fig, axes = cp.plot(
            1, 1, height=1, width=1.2, del_xy_ticks=[True], delete="all"
        )
        axes = cp.umap_manifold(
            adata,
            groupby=groupby,
            c_background=self.cmap,
            s_background=s_background,
            s_cover=s_cover,
            ax=axes[0],
            *args,
            **kwargs,
        )

        self.axes = axes
        self.axes[0].legend(
            facecolor="None", edgecolor="None", markerscale=0.4, loc=(1.2, 0)
        )

    def plot_velocity(
        self,
        adata,
        c="diffusion",
        scatter_zorder=101,
        stream_zorder=110,
        return_axes=True,
        png_dpi=500,
        save=True,
        svg_dpi=500,
        scatter_kwargs={},
        *args,
        **kwargs
    ):

        scatter_kw = self.scatter_kwargs.copy()
        scatter_kw.update(scatter_kwargs)

        self.axes[0] = sdq.pl.velocity_stream(
            adata,
            c=c,
            ax=self.axes[0],
            scatter_kwargs=scatter_kw,
            scatter_zorder=scatter_zorder,
            stream_zorder=stream_zorder,
            return_axes=return_axes,
            png_dpi=png_dpi,
            save=save,
            svg_dpi=svg_dpi,
            *args,
            **kwargs,
        )

    def __call__(
        self,
        adata,
        groupby: str = "cell_type",
        s_background=350,
        s_cover=200,
        c="diffusion",
        scatter_zorder=101,
        stream_zorder=110,
        return_axes=True,
        png_dpi=500,
        save=True,
        svg_dpi=500,
        scatter_kwargs={},
        *args,
        **kwargs
    ):
        self.__update__(locals())

        self.plot_manifold(
            adata=adata,
            groupby=groupby,
            s_background=s_background,
            s_cover=s_cover,
        )
        self.plot_velocity(
            adata=adata,
            c=c,
            scatter_zorder=scatter_zorder,
            stream_zorder=stream_zorder,
            return_axes=return_axes,
            png_dpi=png_dpi,
            save=save,
            svg_dpi=svg_dpi,
            scatter_kwargs=scatter_kwargs,
        )
        plt.show()

Figure 3H: plot the dataset UMAP manifold#

[4]:
cmap = {
    "HSC": "#023047",
    "MEP-like": "#fb8500",
    "GMP-like": "#126782",
    "Bas": "#219ebc",
    "Mon": "#58b4d1",
    "Neu": "#8ecae6",
    "Meg": "#ffb703",
    "Ery": "#fd9e02",
}
zorder = {
    "HSC": 105,
    "MEP-like": 106,
    "GMP-like": 107,
    "Bas": 104,
    "Mon": 104,
    "Neu": 103,
    "Meg": 102,
    "Ery": 101,
}
axes = cp.umap_manifold(
    adata, c_background="k", s_background=400, c_cover="white", s_cover=350
)
axes = cp.umap(
    adata, groupby="cell_type", cmap=cmap, s=250, ax=axes[0], force_zorder=zorder
)
axes[0].legend(facecolor="None", edgecolor="None", markerscale=0.4, loc=(1.2, 0))
plt.savefig("human_hematopoiesis.dynamo.svg", dpi=500)
../_images/_analyses_Figure3HI_7_0.png

Load project#

This “project” consists of five “versions”. In this case, each version is identical, wherein we run model training over five seeds (0 through 4).

[5]:
project = sdq.io.Project("./LightningSDE-FixedPotential-RegularizedVelocityRatio/")

Define helper functions#

[7]:
def aggr(df):
    return df.filter(regex="sinkhorn").filter(regex="validation").dropna().sum(1).mean()


import pandas as pd


def aggr_total_loss(group_df, key="training"):

    SD_loss = group_df.filter(regex="sinkhorn").filter(regex=key).dropna().sum(1).mean()

    VR_loss = (
        group_df.filter(regex="velo_ratio").filter(regex=key).dropna().sum(1).mean()
    )
    total_loss = VR_loss + SD_loss
    return pd.Series({"total": total_loss, "VR": VR_loss, "SD": SD_loss})


def get_best_saved_ckpt(version):
    grouped = version.metrics_df.groupby("epoch")
    val = grouped.apply(aggr_total_loss)
    saved_ckpts = list(version.ckpts.keys())
    saved_ckpts.remove("last")
    saved_ckpts.append(2499)
    best_epoch = val["total"].loc[saved_ckpts].idxmin()
    return best_epoch

Figure 3I: scDiffEq velocity stream plots: human hematopoiesis#

[8]:
for v, version_path in project._VERSION_PATHS.items():
    version = sdq.io.Version(version_path)
    best_ckpt_epoch = get_best_saved_ckpt(version)
    ckpt_path = version.ckpts[best_ckpt_epoch].path
    model = sdq.io.load_model(adata=adata, ckpt_path=ckpt_path)
    adata.uns["sdq_info"] = (
        f"human_hematopoiesis.{v}.epoch_{best_ckpt_epoch}"  # used for saving plot to a unique fname
    )
    model.drift(adata, silent=True)
    model.diffusion(adata, silent=True)
    sdq.tl.velocity_graph(adata, n_pcs=10)
    plot = ManuscriptFigurePlot()
    plot(adata)
Seed set to 0
 - [INFO] | Input data configured.
 - [INFO] | Bulding Annoy kNN Graph on adata.obsm['train']
 - [INFO] | Using the specified parameters, LightningSDE-FixedPotential-RegularizedVelocityRatio has been called.
 - [INFO] | Added: adata.obsp['distances']
 - [INFO] | Added: adata.obsp['connectivities']
 - [INFO] | Added: adata.uns['neighbors']
 - [INFO] | Added: adata.obsp['velocity_graph']
 - [INFO] | Added: adata.obsp['velocity_graph_neg']
 - [INFO] | Saved to:
  scdiffeq_figures/velocity_stream.human_hematopoiesis.version_1.epoch_1446.svg
  scdiffeq_figures/velocity_stream.human_hematopoiesis.version_1.epoch_1446.png
../_images/_analyses_Figure3HI_13_2.png
Seed set to 0
 - [INFO] | Input data configured.
 - [INFO] | Bulding Annoy kNN Graph on adata.obsm['train']
 - [INFO] | Using the specified parameters, LightningSDE-FixedPotential-RegularizedVelocityRatio has been called.
 - [INFO] | Updated: adata.obsp['velocity_graph']
 - [INFO] | Updated: adata.obsp['velocity_graph_neg']
 - [INFO] | Saved to:
  scdiffeq_figures/velocity_stream.human_hematopoiesis.version_2.epoch_1464.svg
  scdiffeq_figures/velocity_stream.human_hematopoiesis.version_2.epoch_1464.png
../_images/_analyses_Figure3HI_13_5.png
Seed set to 0
 - [INFO] | Input data configured.
 - [INFO] | Bulding Annoy kNN Graph on adata.obsm['train']
 - [INFO] | Using the specified parameters, LightningSDE-FixedPotential-RegularizedVelocityRatio has been called.
 - [INFO] | Updated: adata.obsp['velocity_graph']
 - [INFO] | Updated: adata.obsp['velocity_graph_neg']
 - [INFO] | Saved to:
  scdiffeq_figures/velocity_stream.human_hematopoiesis.version_3.epoch_1543.svg
  scdiffeq_figures/velocity_stream.human_hematopoiesis.version_3.epoch_1543.png
../_images/_analyses_Figure3HI_13_8.png
Seed set to 0
 - [INFO] | Input data configured.
 - [INFO] | Bulding Annoy kNN Graph on adata.obsm['train']
 - [INFO] | Using the specified parameters, LightningSDE-FixedPotential-RegularizedVelocityRatio has been called.
 - [INFO] | Updated: adata.obsp['velocity_graph']
 - [INFO] | Updated: adata.obsp['velocity_graph_neg']
 - [INFO] | Saved to:
  scdiffeq_figures/velocity_stream.human_hematopoiesis.version_4.epoch_1951.svg
  scdiffeq_figures/velocity_stream.human_hematopoiesis.version_4.epoch_1951.png
../_images/_analyses_Figure3HI_13_11.png
Seed set to 0
 - [INFO] | Input data configured.
 - [INFO] | Bulding Annoy kNN Graph on adata.obsm['train']
 - [INFO] | Using the specified parameters, LightningSDE-FixedPotential-RegularizedVelocityRatio has been called.
 - [INFO] | Updated: adata.obsp['velocity_graph']
 - [INFO] | Updated: adata.obsp['velocity_graph_neg']
 - [INFO] | Saved to:
  scdiffeq_figures/velocity_stream.human_hematopoiesis.version_5.epoch_2264.svg
  scdiffeq_figures/velocity_stream.human_hematopoiesis.version_5.epoch_2264.png
../_images/_analyses_Figure3HI_13_14.png