SuperCAT - Step 2

Introduction

The procedures in this step aim to assign the cell type for the clusters that does not show differentially expressed cell markers genes

Pre-requirements

To start the procedures we consider:

Referece cell type assignment panel from step 1
DEG (one vs all) for each Seurat cluster.

DEG signatures for each cluster

After calculating the DEG per cluster, lets take a look into the profile signature up to the top 100 genes per cluster.

markers = read.delim("files/DEG_seurat_clusters_res3.csv", sep = ",")

ng = 100
p.v = 0.05
fc = 0
features.deg <- markers %>%
  filter(p_val_adj < p.v & avg_log2FC > fc) %>%
  group_by(cluster) %>%
  top_n(ng)

aux = DotPlot(aux.seurat, features = unique(features.deg$gene))
aux.data = aux$data

data.avg.exp.scaled = data.frame(row.names = unique(features.deg$gene))

for (o in unique(aux.data$id) ) {
  aux.data.filt = aux.data[which(aux.data$id == o),]
  rownames(aux.data.filt) <- aux.data.filt$features.plot
  data.avg.exp.scaled = cbind(data.avg.exp.scaled, aux.data.filt[, 'avg.exp.scaled', drop=FALSE])
}

colnames(data.avg.exp.scaled) <- paste(unique(aux.data$id), " (n=", table(features.deg$cluster), ")", sep = "")
pheatmap(data.avg.exp.scaled, 
         show_rownames = F, 
         main = paste0("DEG expression signature clusters (n=", length(unique(features.deg$gene)), ")"), 
         cutree_cols = 10 )

Let’s read the file containing the referece cell type assignment panel from step 1 and identify the cluster that has no assignment. For those cases we consider the most correlated cluster, based on the gene expression signature, to assign the cell type. We performed a Pearson’s correlation procedure.

my_cluster_col = read.delim("files/reference_panel_clusters_assigned.cvs", sep = ",")

my_cluster_col$cor.val = NA
my_cluster_col$cor.clus = NA

cor.mat = cor(data.avg.exp.scaled)
colnames(cor.mat) <- paste(0:113, sep = "")

for (r in 1:nrow(my_cluster_col)) {

  c = r -1
  if (is.na(my_cluster_col$CellType[r]))  {
    print(c)
    cor.mat.filt = cor.mat[r,]
    cor.mat.filt = cor.mat.filt[-r]

    my_cluster_col$cor.val[r] = max(cor.mat.filt)
    pos.clus = as.numeric(names(which.max(cor.mat.filt)))
    my_cluster_col$cor.clus[r] = pos.clus

    my_cluster_col$CellType[r] = my_cluster_col$CellType[(pos.clus + 1)]
  }
}

## [1] 1
## [1] 9
## [1] 15
## [1] 46
## [1] 101
## [1] 113

The correlation values are plotted as a heatmap and the columns annotation indicate the assigned cluster number.

pheatmap(cor.mat,
         annotation_col = my_cluster_col)

Let’s now annotate the Seurat object with the cell types assignments and plot an UMAP.

aux.seurat@active.ident <- plyr::mapvalues(x = aux.seurat@active.ident, from = rownames(my_cluster_col), to = my_cluster_col$CellType)
aux.seurat[["active.cluster"]] <- aux.seurat@active.ident
cluster.cols = c("#9e0142", "#abd9e9", "#abdda4", "#d53e4f", "#f46d43", "#5e4fa2", "#fdae61", "#ffffbf", "#e6f598",  "#66c2a5", "#3288bd","#6DCD59FF","#35B779FF","#1F9E89FF","#26828EFF","#31688EFF","#3E4A89FF","#482878FF","#440154FF")

DimPlot(object = aux.seurat, pt.size = 0.1, cols = cluster.cols, label = T)