The procedures in this step aim to determine the enrichment of differentially expressed cell type gene markers in each cell cluster. Thus, the overall assumption is that the cells relationship in each cluster group also determine the cell type that characterize each cluster.
To start the procedures we consider:
For this tutorial the following clusters were considered
DimPlot(aux.seurat, reduction="umap")
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
The differential expression analysis is performed using MAST approach. It may take few hours depending on the number of cells and clusters.
markers <- FindAllMarkers(aux.seurat, test.use = "MAST")
write.table(markers, file = "DEG_seurat_clusters_res3.csv", quote = F, sep = ",")
Let’s take a look at the general cell type markers considered for the assigments. Other genes marker can be included to the list.
table.gene.markers <- data.frame(Gene=c("EPCAM", "KRT8", "KRT18", "KRT19", "KRT10", "KRT7", "FOXJ1"), Cell="Epithelial_cells")
table.gene.markers <- rbind(table.gene.markers, data.frame(Gene=c("ACTA2"), Cell="Smooth_Muscle_cells"))
table.gene.markers <- rbind(table.gene.markers, data.frame(Gene=c("DCN", "COL11A2", "FAP", "PDGFRA", "COL11A1", "COL1A1", "PDGFRB"), Cell="Mesenchymal_cells"))
table.gene.markers <- rbind(table.gene.markers, data.frame(Gene=c("LYZ", "CD14", "MME", "C1QA", "CLEC10A"), Cell="Myeloid_cells"))
table.gene.markers <- rbind(table.gene.markers, data.frame(Gene=c("CLDN5", "PECAM1", "CD34", "ESAM"), Cell="Endothelial_cells"))
table.gene.markers <- rbind(table.gene.markers, data.frame(Gene=c("CD79A", "JCHAIN"), Cell="B/Plasma_cells"))
#table.gene.markers <- rbind(table.gene.markers, data.frame(Gene=c("JCHAIN"), Cell="B Cells"))
#table.gene.markers <- rbind(table.gene.markers, data.frame(Gene=c("TYROBP", "FCGR3A"), Cell="Natural Killer"))
table.gene.markers <- rbind(table.gene.markers, data.frame(Gene=c("TPSB2"), Cell="Mast_cells"))
table.gene.markers <- rbind(table.gene.markers, data.frame(Gene=c("HBB", "GYPA"), Cell="Erythrocytes"))
table.gene.markers <- rbind(table.gene.markers, data.frame(Gene=c("CD2", "CD3D", "CD3E", "CD3G", 'CD8A', "CCL5", "PTPRC", "TYROBP", "FCGR3A"), Cell="T/NK Cells"))
grid::grid.newpage()
gridExtra::grid.table(table.gene.markers, rows=NULL)
Bellow you can find auxiliary functions to count the number of cell marker genes that are differentialy expressed in each cluster considering fold change >= 0.2 and p-value <= 0.05
markers = read.delim("files/DEG_seurat_clusters_res3.csv", sep = ",")
get_markers <- function(cell) {
return(table.gene.markers$Gene[which(table.gene.markers$Cell == cell)])
}
count_marker_DEG <- function(table.gene.markers, deg, fc=0.2, pval=0.05) {
clusters = unique(deg$cluster)
counts = list()
for (x in (unique(table.gene.markers$Cell))) {
table = NULL
for (i in clusters) {
filt = deg[which(deg$cluster == i),]
if (nrow(filt) > 0) {
res = sum(filt$avg_log2FC >= fc & filt$p_val_adj <= pval & filt$gene %in% get_markers(cell = x))
}else
res = 0
table = rbind(table, data.frame(Cluster=i, MarkesDEG=res))
}
counts[[x]] <- table
}
return(counts)
}
Lets now use the functions above to count the DEG marks using our previous gene table list.
counts_deg = count_marker_DEG(table.gene.markers, markers, fc = 0.1)
counts_deg = do.call(cbind, counts_deg)
rownames(counts_deg) = counts_deg$Epithelial_cells.Cluster
In this step we normalize the counts by the number of the total markers for each corresponding cell type.
row.bar = data.frame(table(Idents(aux.seurat)))
row_ha = rowAnnotation(ClusterSize = anno_barplot(row.bar$Freq))
markers.count = data.frame(table(table.gene.markers$Cell))
markers.count = markers.count[match(colnames(counts_deg[,seq(2, ncol(counts_deg), by = 2)]), paste0(markers.count$Var1, ".MarkesDEG")),]
markers.count$ColumnName = paste0(markers.count$Var1, " (n=", markers.count$Freq, ")")
head(markers.count)
## Var1 Freq ColumnName
## 3 Epithelial_cells 7 Epithelial_cells (n=7)
## 8 Smooth_Muscle_cells 1 Smooth_Muscle_cells (n=1)
## 6 Mesenchymal_cells 7 Mesenchymal_cells (n=7)
## 7 Myeloid_cells 5 Myeloid_cells (n=5)
## 2 Endothelial_cells 4 Endothelial_cells (n=4)
## 1 B/Plasma_cells 2 B/Plasma_cells (n=2)
y = counts_deg[,seq(2, ncol(counts_deg), by = 2)]
x = markers.count$Freq
z = (t(t(y) / x))
rownames(z) = rownames(counts_deg)
colnames(z) = markers.count$ColumnName
head(z)
## Epithelial_cells (n=7) Smooth_Muscle_cells (n=1) Mesenchymal_cells (n=7)
## 0 0 0 0.1428571
## 1 0 0 0.0000000
## 2 0 0 0.2857143
## 3 0 0 0.0000000
## 4 0 0 0.0000000
## 5 0 0 0.0000000
## Myeloid_cells (n=5) Endothelial_cells (n=4) B/Plasma_cells (n=2)
## 0 0 0 0
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 1 0
## Mast_cells (n=1) Erythrocytes (n=2) T/NK Cells (n=9)
## 0 0 0 0.0000000
## 1 0 0 0.0000000
## 2 0 0 0.0000000
## 3 0 0 0.7777778
## 4 0 0 0.6666667
## 5 0 0 0.0000000
Next, let’s visualize the enrichment of marks (DEG counts proportion) as a heatmap plot.
ht1 = Heatmap(z,
na_col = "white",
cluster_rows = T,
cluster_columns = F,
show_row_names = T,
row_names_gp = gpar(fontsize = 12),
row_names_side = "left",
rect_gp = gpar(col = "black", lwd = 0.6),
show_column_names = T,
right_annotation = row_ha,
col = colorRamp2(c(0, mean(as.matrix(z)), max(z)), c("#FFFFFF" , "#d8b365", "#01665e"))
)
draw(ht1, heatmap_legend_side = "right")
Next, the expression of the markers can also be visualized and
evaluated using Seurat DotPlot
function. We first change
the order of the rows to match the order of the non-supervised
clustering obtained, obtained previously from the marker count
proportion.
aux.seurat@active.ident <- factor(aux.seurat@active.ident,
levels=rownames(z)[rev(row_order(ht1))]
)
DotPlot(object = aux.seurat, features = rev(unique(table.gene.markers$Gene))) +
scale_colour_gradient2(low = "#2166ac", mid = "#f7f7f7", high = "#b2182b") +
RotatedAxis() +
theme(axis.text.x = element_text(size = 12))
In this section we defined some rules to systematically assign the
cell type for each cluster. The code corresponds to the workflow image
in the main
tab at the navigation menu.
expr.markers = DotPlot(object = aux.seurat, features = rev(unique(table.gene.markers$Gene)))
expr.markers.data = expr.markers$data
krt.genes = table.gene.markers$Gene[grep("KRT", table.gene.markers$Gene)]
my_cluster_col <- data.frame(row.names = rownames(z))
my_cluster_col$CellType = NA
for (r in 1:nrow(z)) {
c = r -1
nmarkers = which(z[r,] > 0)
pct.KRT = expr.markers.data[expr.markers.data$id == c & expr.markers.data$features.plot %in% krt.genes,]
if ( (sum(pct.KRT$pct.exp > 35) > 0 & sum(pct.KRT$avg.exp.scaled > 1) > 0) ) {
my_cluster_col$CellType[r] = "Epithelial_cells"
} else if ( length(nmarkers) > 1 ){
nmarkers = z[r,which(z[r,] > 0)]
if ( length(grep("Smooth_Muscle", names(nmarkers))) > 0 ){
pos.fibro = grep("Mesenchymal", names(nmarkers))
if ( length(pos.fibro) > 0 ) {
if (nmarkers[pos.fibro] > 0.25) {
my_cluster_col$CellType[r] = "Mesenchymal_cells"
}else {
my_cluster_col$CellType[r] = "Smooth_Muscle_cells"
}
} else {
marker = which.max(nmarkers)
my_cluster_col$CellType[r] = gsub(" .*", "", names(marker))
}
} else {
nmarkers = z[r,which(z[r,] > 0)] * markers.count$Freq[which(markers.count$ColumnName %in% names(nmarkers))]
if (length(unique(nmarkers)) == 1) {
nmarkers = z[r,which(z[r,] > 0)]
marker = which.max(nmarkers)
} else {
marker = which.max(nmarkers)
}
my_cluster_col$CellType[r] = gsub(" .*", "", names(marker))
}
} else if (length(nmarkers) == 1) {
my_cluster_col$CellType[r] = gsub(" .*", "", names(nmarkers))
}
}
Let’s now check the table of cell assignments and write the output as
a .cvs file and go to the Step 2
my_cluster_col
## CellType
## 0 Mesenchymal_cells
## 1 <NA>
## 2 Mesenchymal_cells
## 3 T/NK
## 4 T/NK
## 5 Endothelial_cells
## 6 Endothelial_cells
## 7 T/NK
## 8 Mesenchymal_cells
## 9 <NA>
## 10 Mesenchymal_cells
## 11 T/NK
## 12 Mesenchymal_cells
## 13 Mesenchymal_cells
## 14 T/NK
## 15 <NA>
## 16 Smooth_Muscle_cells
## 17 Mesenchymal_cells
## 18 T/NK
## 19 Mesenchymal_cells
## 20 Myeloid_cells
## 21 Mesenchymal_cells
## 22 T/NK
## 23 Mesenchymal_cells
## 24 Myeloid_cells
## 25 T/NK
## 26 Mesenchymal_cells
## 27 Epithelial_cells
## 28 T/NK
## 29 B/Plasma_cells
## 30 Mesenchymal_cells
## 31 Mesenchymal_cells
## 32 T/NK
## 33 T/NK
## 34 Mesenchymal_cells
## 35 Mesenchymal_cells
## 36 Smooth_Muscle_cells
## 37 T/NK
## 38 Erythrocytes
## 39 Myeloid_cells
## 40 Mesenchymal_cells
## 41 B/Plasma_cells
## 42 Myeloid_cells
## 43 Epithelial_cells
## 44 Mesenchymal_cells
## 45 Mesenchymal_cells
## 46 <NA>
## 47 Mesenchymal_cells
## 48 Mesenchymal_cells
## 49 Endothelial_cells
## 50 Myeloid_cells
## 51 Smooth_Muscle_cells
## 52 Epithelial_cells
## 53 Epithelial_cells
## 54 Epithelial_cells
## 55 Epithelial_cells
## 56 T/NK
## 57 Epithelial_cells
## 58 Mesenchymal_cells
## 59 Mesenchymal_cells
## 60 Myeloid_cells
## 61 T/NK
## 62 Epithelial_cells
## 63 Epithelial_cells
## 64 Mesenchymal_cells
## 65 Epithelial_cells
## 66 Smooth_Muscle_cells
## 67 Epithelial_cells
## 68 Mast_cells
## 69 Erythrocytes
## 70 Myeloid_cells
## 71 Epithelial_cells
## 72 T/NK
## 73 T/NK
## 74 Mesenchymal_cells
## 75 Mesenchymal_cells
## 76 Mesenchymal_cells
## 77 T/NK
## 78 T/NK
## 79 Epithelial_cells
## 80 T/NK
## 81 T/NK
## 82 Erythrocytes
## 83 Mesenchymal_cells
## 84 Myeloid_cells
## 85 B/Plasma_cells
## 86 Smooth_Muscle_cells
## 87 Mesenchymal_cells
## 88 Epithelial_cells
## 89 T/NK
## 90 Endothelial_cells
## 91 Myeloid_cells
## 92 T/NK
## 93 T/NK
## 94 T/NK
## 95 Endothelial_cells
## 96 T/NK
## 97 Erythrocytes
## 98 Epithelial_cells
## 99 Endothelial_cells
## 100 Epithelial_cells
## 101 <NA>
## 102 Myeloid_cells
## 103 Mesenchymal_cells
## 104 T/NK
## 105 T/NK
## 106 Epithelial_cells
## 107 T/NK
## 108 Myeloid_cells
## 109 Mesenchymal_cells
## 110 Endothelial_cells
## 111 Epithelial_cells
## 112 Smooth_Muscle_cells
## 113 <NA>
write.table(my_cluster_col,
"files/reference_panel_clusters_assigned.cvs",
sep = ",",
quote = FALSE)