Load cohort mutation information
data.table = data.frame(readxl::read_xlsx("files/Mutation Summary Updated_2.0.xlsx"))
data.table = data.table[order(data.table$Major.Class, data.table$Case, decreasing = T),]
data.table$Case = paste0(data.table$Case, " ", data.table$Type)
data.table$Case <- factor(data.table$Case, levels = data.table$Case)
data.table.m = melt(data.table[, c('Case', 'ARID1A', 'KRAS', "Major.Class")], id.vars = "Case")
gridExtra::grid.table(data.table.m, rows=NULL)
ggplot(data.table.m, aes(x = variable, y = Case, color = as.factor(value))) +
geom_point(size = 7) +
theme_bw(base_size = 15) +
scale_color_manual(name="Class", values = c("Endometrioma" = "#7b3294",
"Eutopic Endometrium" = "#c2a5cf",
"Extra-ovarian endometriosis" = "#d9f0d3",
"No endometriosis detected" = "#a6dba0",
"Unaffected ovary" = "#008837",
"NP" = "#f7f7f7",
"Positive" = "#bd0026",
"Negative" = "#ffffb2",
"Heterogenous" = "#fd8d3c",
"WT" = "#3182bd",
"Mut" = "#31a354")) +
theme(plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x = element_blank())
markers.sel.sc = read.delim("files/DEG.endometrial.type.epi.KRAS_Status")
p.v = 0.05
fc = 0.6
markers.sel.sc$STATUS = "NOT.SIG"
markers.sel.sc[markers.sel.sc$avg_logFC < -fc & markers.sel.sc$p_val_adj < p.v, ]$STATUS = "Down"
markers.sel.sc[markers.sel.sc$avg_logFC > fc & markers.sel.sc$p_val_adj < p.v, ]$STATUS = "Up"
c = "Mut"
sc.markers.filt = markers.sel.sc[markers.sel.sc$cluster == c,]
nDEG = nrow(sc.markers.filt)
genes = sc.markers.filt$gene[sc.markers.filt$cluster == c & sc.markers.filt$STATUS != "NOT.SIG"]
vp = ggplot(sc.markers.filt, aes(x = avg_logFC, y = -log10(p_val_adj))) +
geom_point(aes(color = STATUS), cex = 1.45) +
scale_color_manual(values = c("darkgreen", "grey", "red")) +
theme_bw(base_size = 15) +
theme(legend.position = "bottom",
plot.title = element_text(hjust = 0.5)) +
data = sc.markers.filt[genes,],
aes(label = sc.markers.filt$gene[which(sc.markers.filt$gene %in% genes)]),
size = 5,
box.padding = unit(0.35, "lines"),
point.padding = unit(0.3, "lines")
) +
labs(title = paste0("Cluster ", c, " (n=", nDEG, ")"))
## Warning: Removed 1 rows containing missing values (geom_text_repel).
markers.sel.sc = read.delim("files/DEG.endometrial.type.epi.ARID1A_Status")
p.v = 0.05
fc = 0.5
markers.sel.sc$STATUS = "NOT.SIG"
markers.sel.sc[markers.sel.sc$avg_logFC < -fc & markers.sel.sc$p_val_adj < p.v, ]$STATUS = "Down"
markers.sel.sc[markers.sel.sc$avg_logFC > fc & markers.sel.sc$p_val_adj < p.v, ]$STATUS = "Up"
c = "Heterogenous"
sc.markers.filt = markers.sel.sc[markers.sel.sc$cluster == c,]
nDEG = nrow(sc.markers.filt)
genes = sc.markers.filt$gene[sc.markers.filt$cluster == c & sc.markers.filt$STATUS != "NOT.SIG"]
vp = ggplot(sc.markers.filt, aes(x = avg_logFC, y = -log10(p_val_adj))) +
geom_point(aes(color = STATUS), cex = 1.45) +
scale_color_manual(values = c("darkgreen", "grey", "red")) +
theme_bw(base_size = 15) +
theme(legend.position = "bottom",
plot.title = element_text(hjust = 0.5)) +
data = sc.markers.filt[genes,],
aes(label = sc.markers.filt$gene[which(sc.markers.filt$gene %in% genes)]),
size = 5,
box.padding = unit(0.35, "lines"),
point.padding = unit(0.3, "lines")
) +
labs(title = paste0("Cluster ", c, " (n=", nDEG, ")"))
Loading Seurat object and selecting Endothelial cells
aux.seurat = readRDS("rds/aux.seurat.rds")
aux.seurat@meta.data$id.cells = rownames(aux.seurat@meta.data)
aux.seurat@meta.data$selected.cells = "No"
aux.seurat@meta.data$subcluster.name = NA
aux.seurat.endo = subset(aux.seurat, subset = active.cluster %in% "Endothelial cells")
aux.seurat.endo <- FindNeighbors(aux.seurat.endo, dims = 1:20)
aux.seurat.endo <- FindClusters(aux.seurat.endo, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## Number of nodes: 23226
## Number of edges: 762844
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8940
## Number of communities: 18
## Elapsed time: 4 seconds
aux.seurat.endo <- RunUMAP(aux.seurat.endo, dims = 1:20)
DimPlot(aux.seurat.endo, reduction = "umap", label = T)
prop.cells <- data.frame(table(aux.seurat.endo@meta.data$seurat_clusters))
prop.cells$Var1 <- factor(prop.cells$Var1, levels = prop.cells$Var1)
px<- ggplot(prop.cells, aes(Var1, Freq)) +
geom_col() +
theme_minimal(base_size = 15) +
geom_text(aes(label=Freq), position=position_dodge(width=0.9), vjust=-0.25) +
theme(plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x = element_blank()) +
labs(title="Number of cells in each Class", x ="Class", y = "Number of Cells")
id.cells.annot = rownames(aux.seurat.endo@meta.data)
aux.seurat@meta.data$selected.cells[aux.seurat@meta.data$id.cells %in% id.cells.annot] = "Yes"
aux.seurat@meta.data$subcluster.name[match(rownames(aux.seurat.endo@meta.data), aux.seurat@meta.data$id.cells)] = as.character(aux.seurat.endo@meta.data$seurat_clusters)
aux.seurat@meta.data$selected.cells[aux.seurat@meta.data$active.cluster == "Epithelial cells"] = "Yes"
aux.seurat@meta.data$selected.cells[aux.seurat@meta.data$active.cluster == "Mesenchymal cells"] = "Yes"
aux.seurat@meta.data$selected.cells[aux.seurat@meta.data$active.cluster == "Mast cells"] = "Yes"
aux.seurat@meta.data$selected.cells[aux.seurat@meta.data$active.cluster == "Myeloid cells"] = "Yes"
aux.seurat@meta.data$selected.cells[aux.seurat@meta.data$active.cluster == "T/NK cells"] = "Yes"
aux.seurat@meta.data$selected.cells[aux.seurat@meta.data$active.cluster == "B/Plasma cells"] = "Yes"
sc.selected = subset(aux.seurat, subset = selected.cells == "Yes")
sc.selected@meta.data$subcluster.name[sc.selected@meta.data$active.cluster == "Mesenchymal cells"] = "Mesenchymal cells"
sc.selected@meta.data$subcluster.name[sc.selected@meta.data$active.cluster == "Epithelial cells"] = "Epithelial cells"
sc.selected@meta.data$subcluster.name[sc.selected@meta.data$active.cluster == "Mast cells"] = "Mast cells"
sc.selected@meta.data$subcluster.name[sc.selected@meta.data$active.cluster == "Myeloid cells"] = "Myeloid cells"
sc.selected@meta.data$subcluster.name[sc.selected@meta.data$active.cluster == "T/NK cells"] = "T/NK cells"
sc.selected@meta.data$subcluster.name[sc.selected@meta.data$active.cluster == "B/Plasma cells"] = "B/Plasma cells"
.data.table = data.table[data.table$ARID1A %in% c("Positive", "Heterogenous"),]
sel.endo = subset(aux.seurat.endo, subset = SampleName %in% .data.table$SampleName)
sel.endo@meta.data$ARID1A_status = NA
for (i in 1:nrow(.data.table)) {
sample = .data.table$SampleName[i]
sel.endo@meta.data$ARID1A_status[sel.endo@meta.data$SampleName == sample] <- .data.table$ARID1A[i]
prop <- data.frame(table(sel.endo@meta.data$seurat_clusters, sel.endo@meta.data$ARID1A_status))
counts.prop.perc = group_by(prop, Var1) %>% mutate(percent = Freq/sum(Freq))
aux = aggregate(prop$Freq, by=list(Category=prop$Var2), FUN=sum)
aux$Var1 = "Total"
colnames(aux) <- c("Var2", "Freq", "Var1")
aux$percent = aux$Freq / sum(aux$Freq)
counts.prop.perc = rbind(as.data.frame(counts.prop.perc), aux)
p1 <- ggplot(counts.prop.perc, aes(x = reorder(Var1, percent), y = percent, fill = Var2, label = signif(round(percent, digits = 3), digits = 2))) +
geom_bar(position="stack",stat = "identity", width=0.8) +
scale_fill_manual(name="ARID1A status", values = c("Positive" = "#4daf4a",
"Heterogenous" = "#666666")) +
scale_x_discrete(limits = rev(levels(counts.prop.perc$Var1))) +
geom_text(data=subset(counts.prop.perc,Freq != 0), position = position_stack(vjust = 0.5), size = 4, color = "#ffffff") +
theme_set(theme_gray(base_size = 16)) +
theme(plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 270, hjust = 1),
axis.text.y = element_text(angle = 270, hjust = 1),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
legend.position = "right")
table.counts = prop %>%
spread(Var1, Freq)
rownames(table.counts) <- table.counts$Var2
table.counts = table.counts[,-1]
test <- chisq.test(table.counts)
corrplot(test$residuals, is.cor = FALSE, method = 'color')
gene.feat <- c("PROX1", "PDPN", "LYVE1", "FLT4")
aux.seurat.endo@meta.data$plot.clusters = "Remaining"
aux.seurat.endo@meta.data$plot.clusters[aux.seurat.endo@meta.data$seurat_clusters == "8"] = "8"
aux.seurat.endo@meta.data$plot.clusters[aux.seurat.endo@meta.data$seurat_clusters == "12"] = "12"
p1 <- DotPlot(aux.seurat.endo, assay = "RNA", features = unique(gene.feat), group.by = "plot.clusters") +
theme(axis.text.x = element_text(angle = 90)) +
coord_flip() +
scale_colour_gradient2(low = "#2166ac", mid = "#f7f7f7", high = "#b2182b")
## Warning: Scaling data with a low number of groups may produce misleading results
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
.sc.epi = readRDS(file = "rds/epithelial.annotated.rds")
.sc.epi <- AddMetaData(.sc.epi, metadata=NA, col.name="Status")
.sc.epi@meta.data$Status[which(.sc.epi@meta.data$SampleName %in% .data.table$SampleName[.data.table$ARID1A == "Positive"])] <- "Positive"
.sc.epi@meta.data$Status[which(.sc.epi@meta.data$SampleName %in% .data.table$SampleName[.data.table$ARID1A %in% c("Heterogenous")])] <- "Heterogenous"
sc.sel.filt = subset(.sc.epi, subset = SampleName %in% .data.table$SampleName)
.sc.epi@meta.data$selected.cells = "No"
.sc.epi@meta.data$id.cells = rownames(.sc.epi@meta.data)
id.cells.annot = rownames(sc.sel.filt@meta.data)
.sc.epi@meta.data$selected.cells[match(id.cells.annot, .sc.epi@meta.data$id.cells)] = "Yes"
sc.selected = subset(.sc.epi, subset = selected.cells == "Yes")
sc.selected@meta.data = droplevels(sc.selected@meta.data)
gene.feat <- c("VEGFA", "VEGFC", "VEGFD", "CCBE1", "FGF2", "SEMA3A")
p1 <- DotPlot(sc.selected, assay = "RNA", features = unique(gene.feat), group.by = "Status") +
theme(axis.text.x = element_text(angle = 90)) +
coord_flip() +
scale_colour_gradient2(low = "#2166ac", mid = "#f7f7f7", high = "#b2182b")
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.