Load cohort mutation information

Figure 4-a

data.table = data.frame(readxl::read_xlsx("files/Mutation Summary Updated_2.0.xlsx"))

data.table = data.table[order(data.table$Major.Class, data.table$Case, decreasing = T),]
data.table$Case = paste0(data.table$Case, " ", data.table$Type)
data.table$Case <- factor(data.table$Case, levels = data.table$Case)
data.table.m = melt(data.table[, c('Case',  'ARID1A', 'KRAS', "Major.Class")], id.vars = "Case")

grid.newpage()
gridExtra::grid.table(data.table.m, rows=NULL)

ggplot(data.table.m, aes(x = variable, y = Case, color = as.factor(value))) + 
  geom_point(size = 7) +
  theme_bw(base_size = 15) +
  scale_color_manual(name="Class", values = c("Endometrioma" = "#7b3294", 
                                              "Eutopic Endometrium" = "#c2a5cf", 
                                              "Extra-ovarian endometriosis" = "#d9f0d3", 
                                              "No endometriosis detected" = "#a6dba0",
                                              "Unaffected ovary" = "#008837",
                                              "NP" = "#f7f7f7",
                                              "Positive" = "#bd0026",
                                              "Negative" = "#ffffb2",
                                              "Heterogenous" = "#fd8d3c",
                                              "WT" = "#3182bd",
                                              "Mut" = "#31a354")) +
  theme(plot.title = element_text(hjust = 0.5),
        axis.text.x = element_text(angle = 45, hjust = 1),
        axis.title.x = element_blank())

Figure 4-d

markers.sel.sc = read.delim("files/DEG.endometrial.type.epi.KRAS_Status")

p.v = 0.05
fc = 0.6
markers.sel.sc$STATUS = "NOT.SIG"
markers.sel.sc[markers.sel.sc$avg_logFC < -fc & markers.sel.sc$p_val_adj < p.v, ]$STATUS = "Down"
markers.sel.sc[markers.sel.sc$avg_logFC > fc & markers.sel.sc$p_val_adj < p.v, ]$STATUS = "Up"


c = "Mut"
sc.markers.filt = markers.sel.sc[markers.sel.sc$cluster == c,]
nDEG = nrow(sc.markers.filt)
genes = sc.markers.filt$gene[sc.markers.filt$cluster == c & sc.markers.filt$STATUS != "NOT.SIG"]

vp = ggplot(sc.markers.filt, aes(x = avg_logFC, y = -log10(p_val_adj))) +
  geom_point(aes(color = STATUS), cex = 1.45) +
  scale_color_manual(values = c("darkgreen", "grey", "red")) +
  theme_bw(base_size = 15) + 
  theme(legend.position = "bottom",
        plot.title = element_text(hjust = 0.5)) +
  geom_text_repel(
    data = sc.markers.filt[genes,],
    aes(label = sc.markers.filt$gene[which(sc.markers.filt$gene %in% genes)]),
    size = 5,
    box.padding = unit(0.35, "lines"),
    point.padding = unit(0.3, "lines")
  ) +
  labs(title = paste0("Cluster ", c, " (n=", nDEG, ")"))
vp
## Warning: Removed 1 rows containing missing values (geom_text_repel).

Figure 4-e

markers.sel.sc = read.delim("files/DEG.endometrial.type.epi.ARID1A_Status")

p.v = 0.05
fc = 0.5
markers.sel.sc$STATUS = "NOT.SIG"
markers.sel.sc[markers.sel.sc$avg_logFC < -fc & markers.sel.sc$p_val_adj < p.v, ]$STATUS = "Down"
markers.sel.sc[markers.sel.sc$avg_logFC > fc & markers.sel.sc$p_val_adj < p.v, ]$STATUS = "Up"


c = "Heterogenous"
sc.markers.filt = markers.sel.sc[markers.sel.sc$cluster == c,]
nDEG = nrow(sc.markers.filt)
genes = sc.markers.filt$gene[sc.markers.filt$cluster == c & sc.markers.filt$STATUS != "NOT.SIG"]

vp = ggplot(sc.markers.filt, aes(x = avg_logFC, y = -log10(p_val_adj))) +
  geom_point(aes(color = STATUS), cex = 1.45) +
  scale_color_manual(values = c("darkgreen", "grey", "red")) +
  theme_bw(base_size = 15) + 
  theme(legend.position = "bottom",
        plot.title = element_text(hjust = 0.5)) +
  geom_text_repel(
    data = sc.markers.filt[genes,],
    aes(label = sc.markers.filt$gene[which(sc.markers.filt$gene %in% genes)]),
    size = 5,
    box.padding = unit(0.35, "lines"),
    point.padding = unit(0.3, "lines")
  ) +
  labs(title = paste0("Cluster ", c, " (n=", nDEG, ")"))
vp

Figure 4-h

Loading Seurat object and selecting Endothelial cells

aux.seurat = readRDS("rds/aux.seurat.rds")

aux.seurat@meta.data$id.cells = rownames(aux.seurat@meta.data)
aux.seurat@meta.data$selected.cells = "No"
aux.seurat@meta.data$subcluster.name = NA

aux.seurat.endo = subset(aux.seurat, subset = active.cluster %in% "Endothelial cells")
aux.seurat.endo <- FindNeighbors(aux.seurat.endo, dims = 1:20)
aux.seurat.endo <- FindClusters(aux.seurat.endo, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 23226
## Number of edges: 762844
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8940
## Number of communities: 18
## Elapsed time: 4 seconds
aux.seurat.endo <- RunUMAP(aux.seurat.endo, dims = 1:20)

DimPlot(aux.seurat.endo, reduction = "umap", label = T)

prop.cells <- data.frame(table(aux.seurat.endo@meta.data$seurat_clusters))
prop.cells$Var1 <- factor(prop.cells$Var1, levels = prop.cells$Var1)

px<- ggplot(prop.cells, aes(Var1, Freq)) +
  geom_col() +
  theme_minimal(base_size = 15) +
  geom_text(aes(label=Freq), position=position_dodge(width=0.9), vjust=-0.25) +
  theme(plot.title = element_text(hjust = 0.5),
        axis.text.x = element_text(angle = 45, hjust = 1),
        axis.title.x = element_blank()) +
  labs(title="Number of cells in each Class", x ="Class", y = "Number of Cells")
px

id.cells.annot = rownames(aux.seurat.endo@meta.data)
aux.seurat@meta.data$selected.cells[aux.seurat@meta.data$id.cells %in% id.cells.annot] = "Yes"
aux.seurat@meta.data$subcluster.name[match(rownames(aux.seurat.endo@meta.data), aux.seurat@meta.data$id.cells)] = as.character(aux.seurat.endo@meta.data$seurat_clusters)

aux.seurat@meta.data$selected.cells[aux.seurat@meta.data$active.cluster == "Epithelial cells"] = "Yes"
aux.seurat@meta.data$selected.cells[aux.seurat@meta.data$active.cluster == "Mesenchymal cells"] = "Yes"
aux.seurat@meta.data$selected.cells[aux.seurat@meta.data$active.cluster == "Mast cells"] = "Yes"
aux.seurat@meta.data$selected.cells[aux.seurat@meta.data$active.cluster == "Myeloid cells"] = "Yes"
aux.seurat@meta.data$selected.cells[aux.seurat@meta.data$active.cluster == "T/NK cells"] = "Yes"
aux.seurat@meta.data$selected.cells[aux.seurat@meta.data$active.cluster == "B/Plasma cells"] = "Yes"

sc.selected = subset(aux.seurat, subset = selected.cells == "Yes")
sc.selected@meta.data$subcluster.name[sc.selected@meta.data$active.cluster == "Mesenchymal cells"] = "Mesenchymal cells"
sc.selected@meta.data$subcluster.name[sc.selected@meta.data$active.cluster == "Epithelial cells"] = "Epithelial cells"
sc.selected@meta.data$subcluster.name[sc.selected@meta.data$active.cluster == "Mast cells"] = "Mast cells"
sc.selected@meta.data$subcluster.name[sc.selected@meta.data$active.cluster == "Myeloid cells"] = "Myeloid cells"
sc.selected@meta.data$subcluster.name[sc.selected@meta.data$active.cluster == "T/NK cells"] = "T/NK cells"
sc.selected@meta.data$subcluster.name[sc.selected@meta.data$active.cluster == "B/Plasma cells"] = "B/Plasma cells"

.data.table = data.table[data.table$ARID1A %in% c("Positive", "Heterogenous"),]

sel.endo = subset(aux.seurat.endo, subset = SampleName %in% .data.table$SampleName)

sel.endo@meta.data$ARID1A_status = NA
for (i in 1:nrow(.data.table)) {
  sample = .data.table$SampleName[i]
  sel.endo@meta.data$ARID1A_status[sel.endo@meta.data$SampleName == sample] <- .data.table$ARID1A[i]
}

prop <- data.frame(table(sel.endo@meta.data$seurat_clusters, sel.endo@meta.data$ARID1A_status))
counts.prop.perc = group_by(prop, Var1) %>% mutate(percent = Freq/sum(Freq))

aux = aggregate(prop$Freq, by=list(Category=prop$Var2), FUN=sum)
aux$Var1 = "Total"
colnames(aux) <- c("Var2", "Freq", "Var1")
aux$percent = aux$Freq / sum(aux$Freq)
counts.prop.perc = rbind(as.data.frame(counts.prop.perc), aux)

p1 <- ggplot(counts.prop.perc, aes(x = reorder(Var1, percent), y = percent, fill = Var2, label = signif(round(percent, digits = 3), digits = 2))) + 
  geom_bar(position="stack",stat = "identity", width=0.8) +
  scale_fill_manual(name="ARID1A status", values = c("Positive" = "#4daf4a", 
                                                   "Heterogenous" = "#666666")) +
  scale_x_discrete(limits = rev(levels(counts.prop.perc$Var1))) +
  geom_text(data=subset(counts.prop.perc,Freq != 0), position = position_stack(vjust = 0.5), size = 4, color = "#ffffff") +
  theme_set(theme_gray(base_size = 16)) +
  theme(plot.title = element_text(hjust = 0.5),
        axis.text.x = element_text(angle = 270, hjust = 1),
        axis.text.y = element_text(angle = 270, hjust = 1),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        legend.position = "right")
p1

Figure 4-i

table.counts = prop %>% 
  spread(Var1, Freq)
rownames(table.counts) <- table.counts$Var2
table.counts = table.counts[,-1]

test <- chisq.test(table.counts)
corrplot(test$residuals, is.cor = FALSE, method = 'color')

Figure 4-j

gene.feat <- c("PROX1", "PDPN", "LYVE1", "FLT4")

aux.seurat.endo@meta.data$plot.clusters = "Remaining"
aux.seurat.endo@meta.data$plot.clusters[aux.seurat.endo@meta.data$seurat_clusters == "8"] = "8"
aux.seurat.endo@meta.data$plot.clusters[aux.seurat.endo@meta.data$seurat_clusters == "12"] = "12"

p1 <- DotPlot(aux.seurat.endo, assay = "RNA", features = unique(gene.feat), group.by = "plot.clusters") +
  theme(axis.text.x = element_text(angle = 90)) +
  coord_flip() +
  scale_colour_gradient2(low = "#2166ac", mid = "#f7f7f7", high = "#b2182b")
## Warning: Scaling data with a low number of groups may produce misleading results
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
p1

Figure 4-K

.sc.epi = readRDS(file = "rds/epithelial.annotated.rds")

.sc.epi <- AddMetaData(.sc.epi, metadata=NA, col.name="Status")

.sc.epi@meta.data$Status[which(.sc.epi@meta.data$SampleName %in% .data.table$SampleName[.data.table$ARID1A == "Positive"])] <- "Positive"

.sc.epi@meta.data$Status[which(.sc.epi@meta.data$SampleName %in% .data.table$SampleName[.data.table$ARID1A %in% c("Heterogenous")])] <- "Heterogenous"

sc.sel.filt = subset(.sc.epi, subset = SampleName %in% .data.table$SampleName)

.sc.epi@meta.data$selected.cells = "No"
.sc.epi@meta.data$id.cells = rownames(.sc.epi@meta.data)
 
id.cells.annot = rownames(sc.sel.filt@meta.data)
.sc.epi@meta.data$selected.cells[match(id.cells.annot, .sc.epi@meta.data$id.cells)] = "Yes"
sc.selected = subset(.sc.epi, subset = selected.cells == "Yes")
sc.selected@meta.data = droplevels(sc.selected@meta.data)

gene.feat <- c("VEGFA", "VEGFC", "VEGFD", "CCBE1", "FGF2", "SEMA3A")

p1 <- DotPlot(sc.selected, assay = "RNA", features = unique(gene.feat), group.by = "Status") +
    theme(axis.text.x = element_text(angle = 90)) +
    coord_flip() +
    scale_colour_gradient2(low = "#2166ac", mid = "#f7f7f7", high = "#b2182b")
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
p1