Figure 4G#
Import packages#
[1]:
import anndata
import ezplot
import matplotlib.pyplot as plt
import pandas as pd
import glob
import pathlib
import seaborn as sns
import sklearn.cluster
import numpy as np
import matplotlib.cm as cm
Define helper functions#
[3]:
def read_gene_velocity_corr(idx: int, version: int):
base_path = f"./version_{version}_gene_velocity_corr/{idx}.*.csv"
paths = glob.glob(base_path)
DataFrames = {}
for path in paths:
fpath = pathlib.Path(path)
fate = fpath.name.split(".")[1]
if fate != "Undifferentiated":
DataFrames[fate] = pd.read_csv(path, index_col = 0)
return DataFrames
def _adjust_colnames(data, fate: str = "Monocyte"):
df = data[fate].copy()
df.columns = [f"{fate}.{key}" for key in df.columns]
return df
Two fates#
For the two-fate example, we’ll use idx=19977 (v3)
[4]:
data = read_gene_velocity_corr(idx = 19977, version = 3)
df = pd.concat([_adjust_colnames(data, "Monocyte"), _adjust_colnames(data, "Neutrophil")], axis = 1)
df
[4]:
| Monocyte.f.corr | Monocyte.f.pval | Monocyte.g.corr | Monocyte.g.pval | Neutrophil.f.corr | Neutrophil.f.pval | Neutrophil.g.corr | Neutrophil.g.pval | |
|---|---|---|---|---|---|---|---|---|
| gene_ids | ||||||||
| 1110002J07Rik | 0.246731 | 0.119912 | 0.594733 | 4.121254e-05 | 0.025470 | 8.744020e-01 | 0.406625 | 8.333975e-03 |
| 1110032F04Rik | 0.537274 | 0.000292 | 0.322823 | 3.952964e-02 | 0.533234 | 3.310762e-04 | 0.298061 | 5.838780e-02 |
| 1500002F19Rik | 0.199835 | 0.210322 | -0.192597 | 2.276558e-01 | -0.679190 | 1.050207e-06 | -0.932045 | 8.675594e-19 |
| 1500026H17Rik | -0.144147 | 0.368571 | -0.669251 | 1.718679e-06 | -0.656420 | 3.160551e-06 | -0.927592 | 2.872365e-18 |
| 1600010M07Rik | -0.666030 | 0.000002 | -0.851748 | 1.666314e-12 | -0.297194 | 5.915940e-02 | -0.664837 | 2.126375e-06 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| Zmpste24 | 0.667209 | 0.000002 | 0.913492 | 8.110006e-17 | -0.876496 | 5.967734e-14 | -0.819266 | 5.826618e-11 |
| Zmynd15 | -0.038569 | 0.810783 | 0.464953 | 2.192460e-03 | 0.329221 | 3.556725e-02 | 0.801130 | 3.157972e-10 |
| Znfx1 | -0.624361 | 0.000013 | -0.976683 | 1.131776e-27 | -0.771510 | 3.547780e-09 | -0.961632 | 1.633703e-23 |
| Zscan2 | 0.462753 | 0.002316 | 0.499486 | 8.850821e-04 | 0.349633 | 2.504204e-02 | 0.112174 | 4.850126e-01 |
| Zyx | -0.538328 | 0.000283 | -0.969048 | 2.649156e-25 | -0.642644 | 5.891798e-06 | -0.944852 | 1.659873e-20 |
2492 rows × 8 columns
Read table of transcription factors (from Weinreb2020)#
[5]:
def get_tf_table(df):
tf_df = pd.read_table(
"/Users/michaelvinyard/tf_list.txt",
usecols=[1, 2, 3],
index_col = 0,
header=None,
names=['idx', 'gene_name', 'description'],
)
return df.loc[df.index.isin(tf_df['gene_name'].tolist())]
[6]:
tf_df = get_tf_table(df)
tf_df = tf_df.filter(regex = "corr").copy()
Plot heatmap for Figure 4G#
[7]:
cg = sns.clustermap(
tf_df,
vmin = -1,
vmax = 1,
cmap = cm.RdYlBu_r,
figsize = (4, 10),
rasterized = True,
yticklabels = tf_df.index,
)
plt.savefig("Figure4G.svg")
[8]:
tf_df.to_csv("scdiffeq.SupplementaryTable6.csv")
[ ]: