Tutorial 1 SpatialEx Translates Histology to Omics at Single-Cell Resolution

[1]:
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt

import SpatialEx as se

device = 'cuda:6'

1. Prepare the dataset

Choice 1: Download the Xenium Human Breast Cancer tissue dataset.

We provide H&E patch representations generated by different H&E foundation models in the following links, and the expression data is preprocessed:

UNI (Default): Slice 1 and Slice 2

CONCH: Slice 1 and Slice 2

Gigapath: Slice 1 and Slice 2

Phikon: Slice 1 and Slice 2

ResNet50: Slice 1 and Slice 2

[ ]:
image_encoder = 'UNI'

if image_encoder == 'CONCH':
    adata1 = sc.read_h5ad('./datasets/Human_Breast_Cancer_Rep1/Human_Breast_Cancer_Rep1_conch_resolution64_full.h5ad')
    adata2 = sc.read_h5ad('./datasets/Human_Breast_Cancer_Rep2/Human_Breast_Cancer_Rep2_conch_resolution64_full.h5ad')
elif image_encoder == 'Gigapath':
    adata1 = sc.read_h5ad('./datasets/Human_Breast_Cancer_Rep1/Human_Breast_Cancer_Rep1_gigapath_resolution64_full.h5ad')
    adata2 = sc.read_h5ad('./datasets/Human_Breast_Cancer_Rep2/Human_Breast_Cancer_Rep2_gigapath_resolution64_full.h5ad')
elif image_encoder == 'Phikon':
    adata1 = sc.read_h5ad('./datasets/Human_Breast_Cancer_Rep1/Human_Breast_Cancer_Rep1_phikon_resolution64_full.h5ad')
    adata2 = sc.read_h5ad('./datasets/Human_Breast_Cancer_Rep2/Human_Breast_Cancer_Rep2_phikon_resolution64_full.h5ad')
elif image_encoder == 'Resnet50':
    adata1 = sc.read_h5ad('./datasets/Human_Breast_Cancer_Rep1/Human_Breast_Cancer_Rep1_resnet50_resolution64_full.h5ad')
    adata2 = sc.read_h5ad('./datasets/Human_Breast_Cancer_Rep2/Human_Breast_Cancer_Rep2_resnet50_resolution64_full.h5ad')
else:
    adata1 = sc.read_h5ad('./datasets/Human_Breast_Cancer_Rep1/Human_Breast_Cancer_Rep1_uni_resolution64_full.h5ad')
    adata2 = sc.read_h5ad('./datasets/Human_Breast_Cancer_Rep2/Human_Breast_Cancer_Rep2_uni_resolution64_full.h5ad')

Choice 2: Preprocess your own data from scratch.

datasets/
│
├── Human_Breast_Cancer_Rep1/                  # The 1st slice
│   ├── cell_feature_matrix.h5
│   ├── cells.csv
│   ├── Xenium_FFPE_Human_Breast_Cancer_Rep1_he_image.ome.tif
│   ├── Xenium_FFPE_Human_Breast_Cancer_Rep1_he_imagealignment.csv
│   ├── HBRC_Rep1_cell_coor.csv               # Cell segmentation result on H&E image
│   ├── HBRC_Rep1_Out_uni.npy
│
├── Human_Breast_Cancer_Rep2/                  # The 2nd slice
│   ├── cell_feature_matrix.h5
│   ├── cells.csv
│   ├── Xenium_FFPE_Human_Breast_Cancer_Rep2_he_image.ome.tif
│   ├── Xenium_FFPE_Human_Breast_Cancer_Rep2_he_imagealignment.csv
│   ├── HBRC_Rep2_cell_coor.csv
│   ├── HBRC_Rep2_Out_uni.npy

1.1 Preprocess Slice 1

[2]:
save_root1 = '/home/wcy/code/datasets/Xenium/Human_Breast_Cancer_Rep1/'
save_root2 = '/home/wcy/code/datasets/Xenium/Human_Breast_Cancer_Rep2/'
resolution = 64
image_encoder = 'uni'
[3]:
file_path1 = save_root1 + 'cell_feature_matrix.h5'
obs_path1 = save_root1 + 'cells.csv'
img_path1 = save_root1 + 'Xenium_FFPE_Human_Breast_Cancer_Rep1_he_image.ome.tif'
transform_mtx_path1 = save_root1 + 'Xenium_FFPE_Human_Breast_Cancer_Rep1_he_imagealignment.csv'
adata1 = se.pp.Read_Xenium(file_path1, obs_path1)
adata1 = se.pp.Preprocess_adata(adata1)

img, scale = se.pp.Read_HE_image(img_path1)
transform_mtx = pd.read_csv(transform_mtx_path1, header=None).values
adata1 = se.pp.Register_physical_to_pixel(adata1, transform_mtx, scale=scale)
he_patches, adata1 = se.pp.Tiling_HE_patches(resolution, adata1, img)
adata1 = se.pp.Extract_HE_patches_representaion(he_patches, adata=adata1, image_encoder=image_encoder, device=device, store_key='he')
======================== Tiling HE patches for each single cells ===========================
patch radius is  32
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 164000/164000 [00:03<00:00, 43116.35it/s]
====================== Extracting HE representations for each cell =========================
The image encoder is uni
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2563/2563 [21:10<00:00,  2.02it/s]

1.2 Preprocess Slice 2

[4]:
file_path2 = save_root2 + 'cell_feature_matrix.h5'
obs_path2 = save_root2 + 'cells.csv'
img_path2 = save_root2 + 'Xenium_FFPE_Human_Breast_Cancer_Rep2_he_image.ome.tif'
transform_mtx_path2 = save_root2 + 'Xenium_FFPE_Human_Breast_Cancer_Rep2_he_imagealignment.csv'
adata2 = se.pp.Read_Xenium(file_path2, obs_path2)
adata2 = se.pp.Preprocess_adata(adata2)

img, scale = se.pp.Read_HE_image(img_path2)
transform_mtx = pd.read_csv(transform_mtx_path2, header=None).values
adata2 = se.pp.Register_physical_to_pixel(adata2, transform_mtx, scale=scale)
he_patches, adata2 = se.pp.Tiling_HE_patches(resolution, adata2, img)
adata2 = se.pp.Extract_HE_patches_representaion(he_patches, adata=adata2, image_encoder=image_encoder, store_key='he', device=device)
======================== Tiling HE patches for each single cells ===========================
patch radius is  32
Remove the outlier cells, and Anndata file was reduced!
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 111555/111555 [00:01<00:00, 69620.54it/s]
====================== Extracting HE representations for each cell =========================
The image encoder is uni
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1744/1744 [14:23<00:00,  2.02it/s]

2. Train SpatialEx

2.1 Within the squencing area

[5]:
num_neighbors = 7
epochs = 500

graph1 = se.pp.Build_hypergraph_spatial_and_HE(adata1, num_neighbors, graph_kind='spatial', return_type='crs')
graph2 = se.pp.Build_hypergraph_spatial_and_HE(adata2, num_neighbors, graph_kind='spatial', return_type='crs')

spatialex = se.SpatialEx(adata1, adata2, graph1, graph2, epochs=epochs, device=device)
spatialex.train()
panelB1, panelA2 = spatialex.auto_inference()
164000  cells are included in its nearest spot!
111555  cells are included in its nearest spot!


=================================== Start training =========================================
#Epoch: 499: train_loss: 10.68: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [06:01<00:00,  1.38it/s]

2.2 Outside the squencing area

[6]:
out_spatial1 = pd.read_csv(save_root1 + '/HBRC_Rep1_cell_coor.csv', index_col=0)
out_spatial2 = pd.read_csv(save_root2 + '/HBRC_Rep2_cell_coor.csv', index_col=0)
out_he1 = np.load(save_root1 + '/HBRC_Rep1_Out_uni.npy')
out_he2 = np.load(save_root2 + '/HBRC_Rep2_Out_uni.npy')

graph1 = se.pp.Build_hypergraph(out_spatial1.values, num_neighbors=num_neighbors, normalize=True)
graph2 = se.pp.Build_hypergraph(out_spatial2.values, num_neighbors=num_neighbors, normalize=True)

panelA1_out = spatialex.inference(out_he1, graph1, panel='panelA')
panelA2_out = spatialex.inference(out_he2, graph2, panel='panelA')
panelB1_out = spatialex.inference(out_he1, graph1, panel='panelB')
panelB2_out = spatialex.inference(out_he2, graph2, panel='panelB')

3. Evalulation

3.1 Quantatitive metrics

3.1.1 Training on Slice 1, test on Slice 2

[7]:
graph = se.pp.Build_graph(adata1.obsm['spatial'], graph_type='knn', weighted='gaussian', apply_normalize='row', return_type='coo')
ssim, ssim_reduce = se.utils.Compute_metrics(adata1.X.copy(), panelB1.copy(), metric='ssim', graph=graph, reduce='mean')
pcc, pcc_reduce = se.utils.Compute_metrics(adata1.X.copy(), panelB1.copy(), metric='pcc', reduce='mean')
cmd, cmd_reduce = se.utils.Compute_metrics(adata1.X.copy(), panelB1.copy(), metric='cmd', reduce='mean')
print('Evaluation of the predicted gene expression on Slice 2, PCC: ', pcc_reduce, ' SSIM: ', ssim_reduce, ' CMD: ', cmd_reduce)
x shape is  164000
cell number is less than 200000
Evaluation of the predicted gene expression on Slice 2, PCC:  0.25761247  SSIM:  0.3653508153801421  CMD:  0.21490466849032452

3.1.2 Training on Slice 2, test on Slice 1

[8]:
graph = se.pp.Build_graph(adata2.obsm['spatial'], graph_type='knn', weighted='gaussian', apply_normalize='row', return_type='coo')
ssim, ssim_reduce = se.utils.Compute_metrics(adata2.X.copy(), panelA2.copy(), metric='ssim', graph=graph)
pcc, pcc_reduce = se.utils.Compute_metrics(adata2.X.copy(), panelA2.copy(), metric='pcc', reduce='mean')
cmd, cmd_reduce = se.utils.Compute_metrics(adata2.X.copy(), panelA2.copy(), metric='cmd', reduce='mean')
print('Evaluation of the predicted gene expression on Slice 1, PCC: ', pcc_reduce, ' SSIM: ', ssim_reduce, ' CMD: ', cmd_reduce)
x shape is  111555
cell number is less than 200000
Evaluation of the predicted gene expression on Slice 1, PCC:  0.2732997  SSIM:  0.380874110038334  CMD:  0.2032631360221221

3.2 Visualization

3.2.1 Training on Slice 1, test on Slice 2

[9]:
col_min, col_max = adata2.obsm['image_coor'][:, 1].min(), adata2.obsm['image_coor'][:, 1].max()
row_min, row_max = adata2.obsm['image_coor'][:, 0].min(), adata2.obsm['image_coor'][:, 0].max()
selection = (out_spatial2['image_row'] > row_min) & (out_spatial2['image_row'] < row_max) & (out_spatial2['image_col'] > col_min) & (out_spatial2['image_col'] < col_max)
obs_inner = out_spatial2[selection]
var_names = adata1.var_names
[10]:
gene_name = 'EPCAM'
gene_idx = np.where(adata1.var_names == gene_name)[0]

value = panelA2[:, gene_idx]
vmax = value.max()
x, y = adata2.obsm['image_coor'][:, 0], adata2.obsm['image_coor'][:, 1]
plt.scatter(x, y, c=value, vmax=vmax, s=0.01)

x = [row_min, row_min, row_max, row_max, row_min]
y = [col_min, col_max, col_max, col_min, col_min]
plt.plot(x, y, color='red', linestyle='--')

value = panelA2_out[:, gene_idx]
x, y = out_spatial2['image_col'], out_spatial2['image_row']
plt.scatter(y[~selection], x[~selection], c=value[~selection], s=0.01, vmin=0, vmax=vmax)

plt.title(gene_name)
plt.ylim((0,x.max()))
plt.xlim((0,y.max()))
plt.axis('off')
ax = plt.gca()
ax.set_aspect(1)
plt.show()
_images/Tutorial_1_SpatialEx_Translates_Histology_to_Omics_at_Single-Cell_Resolution_v2_25_0.png
[11]:
gene_name = 'ESR1'
gene_idx = np.where(adata1.var_names == gene_name)[0]

value = panelA2[:, gene_idx]
vmax = value.max()
x, y = adata2.obsm['image_coor'][:, 0], adata2.obsm['image_coor'][:, 1]
plt.scatter(x, y, c=value, vmax=vmax, s=0.01)

x = [row_min, row_min, row_max, row_max, row_min]
y = [col_min, col_max, col_max, col_min, col_min]
plt.plot(x, y, color='red', linestyle='--')

value = panelA2_out[:, gene_idx]
x, y = out_spatial2['image_col'], out_spatial2['image_row']
plt.scatter(y[~selection], x[~selection], c=value[~selection], s=0.01, vmin=0, vmax=vmax)

plt.title(gene_name)
plt.ylim((0,x.max()))
plt.xlim((0,y.max()))
plt.axis('off')
ax = plt.gca()
ax.set_aspect(1)
plt.show()
_images/Tutorial_1_SpatialEx_Translates_Histology_to_Omics_at_Single-Cell_Resolution_v2_26_0.png
[12]:
gene_name = 'PGR'
gene_idx = np.where(adata1.var_names == gene_name)[0]

value = panelA2[:, gene_idx]
vmax = value.max()
x, y = adata2.obsm['image_coor'][:, 0], adata2.obsm['image_coor'][:, 1]
plt.scatter(x, y, c=value, vmin=0.05, s=0.01)

x = [row_min, row_min, row_max, row_max, row_min]
y = [col_min, col_max, col_max, col_min, col_min]
plt.plot(x, y, color='red', linestyle='--')

value = panelA2_out[:, gene_idx]
x, y = out_spatial2['image_col'], out_spatial2['image_row']
plt.scatter(y[~selection], x[~selection], c=value[~selection], s=0.01, vmin=0, vmax=vmax)

plt.title(gene_name)
plt.ylim((0,x.max()))
plt.xlim((0,y.max()))
plt.axis('off')
ax = plt.gca()
ax.set_aspect(1)
plt.show()
_images/Tutorial_1_SpatialEx_Translates_Histology_to_Omics_at_Single-Cell_Resolution_v2_27_0.png
[ ]: