Figure S2a#

Import libraries#

[1]:

import cellplots as cp
import larry
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats
import seaborn as sns

Read data#

This version of the LARRY dataset has not been split for test and train

[2]:

in_vitro = larry.datasets.inVitroData()
adata = in_vitro.compose_adata()

/home/mvinyard/.anaconda3/envs/sdq/lib/python3.9/site-packages/anndata/_core/anndata.py:522: FutureWarning: The dtype argument is deprecated and will be removed in late 2024.
  warnings.warn(

- [ INFO ] | Added lineage x fate counts to: adata.uns['fate_counts']
- [ INFO ] | Added lineage-time occupance to: adata.uns['time_occupance']
- [ INFO ] | Fated cells annotated at: adata.obs['fate_observed']
- [ INFO ] | Fated cells (t=t0) annotated at: adata.obs['t0_fated']
- [ INFO ] | Added cell x fate counts to: adata.obsm['cell_fate_df']

Compute correlation across mean gene values#

[3]:

grouped = adata.obs.groupby(['Well', 'Time point'])

def _apply(df):
    return adata[df.index].X.mean(0).A.flatten()

gene_means = grouped.apply(_apply).to_dict()

[4]:

Corr = {}
for i, x_i in gene_means.items():
    Corr[i] = {}
    for j, x_j in gene_means.items():
        if i != j:
            Corr[i][j], _ = scipy.stats.pearsonr(x_i, x_j)

[5]:

corr_df = pd.DataFrame(Corr).sort_index()
corr_df = corr_df[[0, 2]].loc[[0, 1]].fillna(1)
corr_df

[5]:

		0	2
		2.0	4.0	6.0
0	2.0	1.000000	0.964734	0.833761
1	4.0	0.972180	0.996787	0.925579
1	6.0	0.815005	0.928177	0.997465

Plot correlation matrix#

[9]:

fig, axes = cp.plot(height = 0.5, width = 0.5)
sns.heatmap(corr_df, cmap = "Blues", vmin = 0.8, vmax = 1, annot=True, annot_kws={"size": 6}, fmt = ".3f", ax=axes[0])
plt.savefig("larry.figure_s2a.svg")
plt.savefig("larry.figure_s2a.png")