Load cohort information and Seurat object containing all identified cell types.

Extended Figure 1-a

my_data = .anno[.anno$Fresh.Frozen %in% tissue, c("Fresh.Frozen", "Estimated.Number.of.Cells", "Major.Class")]
my_data$Estimated.Number.of.Cells <- as.numeric(sub(",", "", my_data$Estimated.Number.of.Cells, fixed = TRUE))

res.aov <- aov(Estimated.Number.of.Cells ~ Major.Class, data = my_data)
pval = summary(res.aov)[[1]][["Pr(>F)"]]

ggplot(my_data, aes(x=Major.Class, y=Estimated.Number.of.Cells, fill = Fresh.Frozen)) +
  geom_boxplot(outlier.size = 0) +
  geom_point(pch = 21, position = position_jitterdodge()) +
  theme_classic(base_size = 15) +
  rotate_x_text(45) +
  geom_text(x=5, y=20000, label=paste0("p-value = ", pval[1]))

Extended Figure 2-b

my_data = .anno[.anno$Fresh.Frozen %in% tissue, c("Fresh.Frozen", "Estimated.Number.of.Cells", "No..Cells.Targeted", "Major.Class")]
my_data$Estimated.Number.of.Cells <- as.numeric(sub(",", "", my_data$Estimated.Number.of.Cells, fixed = TRUE))
my_data$No..Cells.Targeted <- as.numeric(sub(",", "", my_data$No..Cells.Targeted, fixed = TRUE))

cor.value = cor.test(my_data$Estimated.Number.of.Cells, my_data$No..Cells.Targeted)

ggplot(my_data, aes(No..Cells.Targeted, Estimated.Number.of.Cells)) +
  geom_point(aes(colour = factor(Fresh.Frozen)), size = 3) +
  theme_classic(base_size = 15) +
  labs(y = "Estimated", x = "Targeted") +
  geom_text(x=5000, y=20000, label=paste0("r = ", cor.value$estimate, "\n", "p-value = ", cor.value$p.value))
## Warning: Removed 1 rows containing missing values (geom_point).

Extended Figure 2-c

my_data = .anno[.anno$Fresh.Frozen %in% tissue, c("Fresh.Frozen", "Median.Genes.per.Cell", "Major.Class")]
my_data$Median.Genes.per.Cell <- as.numeric(sub(",", "", my_data$Median.Genes.per.Cell, fixed = TRUE))

res.aov <- aov(Median.Genes.per.Cell ~ Major.Class, data = my_data)
pval = summary(res.aov)[[1]][["Pr(>F)"]]

ggplot(my_data, aes(x=Major.Class, y=Median.Genes.per.Cell, fill = Fresh.Frozen)) +
  geom_boxplot(outlier.size = 0) +
  geom_point(pch = 21, position = position_jitterdodge()) +
  theme_classic(base_size = 15) +
  rotate_x_text(45) +
  labs(y = "Median of genes per cells", x = "") +
  geom_text(x=5, y=1500, label=paste0("p-value = ", pval[1]))

Extended Figure 2-d

my_data = .anno[.anno$Fresh.Frozen %in% tissue, c("Fresh.Frozen", "Number.of.Reads", "Major.Class")]
my_data$Number.of.Reads <- as.numeric(gsub(",", "", my_data$Number.of.Reads, fixed = TRUE))

res.aov <- aov(Number.of.Reads ~ Major.Class, data = my_data)
pval = summary(res.aov)[[1]][["Pr(>F)"]]

ggplot(my_data, aes(x=Major.Class, y=Number.of.Reads, fill = Fresh.Frozen)) +
  geom_boxplot(outlier.size = 0) +
  geom_point(pch = 21, position = position_jitterdodge()) +
  theme_classic(base_size = 15) +
  rotate_x_text(45) +
  labs(y = "Median of reads per cells", x = "") +
  geom_text(x=5, y=500000000, label=paste0("p-value = ", pval[1])) +
  scale_y_continuous(labels = comma)

Extended Figure 2-h

DimPlot(object = aux.seurat, 
             pt.size = 0.1, 
             raster=FALSE, 
             group.by = "seurat_clusters",
             label = T)

Extended Figure 2-k

Idents(aux.seurat) <- aux.seurat@meta.data$active.cluster
Idents(aux.seurat)<- gsub(" cells", "", Idents(aux.seurat))
Idents(aux.seurat)<- gsub(" ", "_", Idents(aux.seurat))
Idents(aux.seurat)<- gsub("/", "_", Idents(aux.seurat))

mylist <- list()
cluster.averages <- AverageExpression(aux.seurat, return.seurat = TRUE, add.ident = "Fresh.Frozen")
## Warning: 'add.ident' is a deprecated argument, please use the 'group.by'
## argument instead
## Centering and scaling data matrix
for (i in seq(1, ncol(cluster.averages), 2)) {
        .cluster.averages = cluster.averages[, c(i, i+1)]
        pl = CellScatter(.cluster.averages, cell1 = colnames(.cluster.averages)[1], cell2 = colnames(.cluster.averages)[2])
        mylist[[length(mylist) + 1]]  <- pl
}

plot_grid(plotlist = mylist, ncol = 5)

Extended Figure 2-i

aux = data.frame(table(aux.seurat@meta.data$SampleName, aux.seurat@meta.data$seurat_clusters))
data.m = spread(aux, Var2, Freq)
rownames(data.m) <- data.m$Var1
data.m = t(data.m[, -1])  
data.m.prob = t(t(data.m) / colSums(data.m))

df_pca <- prcomp(t(data.m.prob))
df_out <- as.data.frame(df_pca$x)

df_out$Class <- my_cluster_col$Major.Class[match(rownames(df_out), rownames(my_cluster_col))]
df_out$label <- rownames(my_cluster_col)

percentage <- round(df_pca$sdev / sum(df_pca$sdev) * 100, 2)
percentage <- paste( colnames(df_out), "(", paste( as.character(percentage), "%", ")", sep="") )

p <- ggplot(data=df_out, aes(x = PC1, y = PC2, color= Class)) +
        geom_point(size = 2) +
        scale_colour_manual(name = "Major Class", values =  c("Endometrioma" = "#7b3294", 
                                                              "Eutopic Endometrium" = "#c2a5cf", 
                                                              "Endometriosis" = "#d9f0d3", 
                                                              "No endometriosis detected" = "#a6dba0",
                                                              "Unaffected ovary" = "#008837")) +
        xlab(percentage[1]) +
        ylab(percentage[2]) +
        theme_bw(base_size = 12) +
        theme(panel.background = element_blank(),
              panel.border=element_rect(fill=NA),
              panel.grid.major = element_blank(),
              panel.grid.minor = element_blank(),
              strip.background=element_blank(),
              axis.text.x=element_text(colour="black"),
              axis.text.y=element_text(colour="black"),
              axis.ticks=element_line(colour="black"),
              plot.margin=unit(c(1,1,1,1),"line"))
p

aux = data.frame(table(aux.seurat@meta.data$SampleName, aux.seurat@meta.data$seurat_clusters))
data.m = spread(aux, Var2, Freq)
rownames(data.m) <- data.m$Var1
data.m = t(data.m[, -1])  
data.m.prob = t(t(data.m) / colSums(data.m))

df_pca <- prcomp(t(data.m.prob))
df_out <- as.data.frame(df_pca$x)
df_out$Class <- my_cluster_col$Major.Class[match(rownames(df_out), rownames(my_cluster_col))]
df_out$label <- rownames(my_cluster_col)

percentage <- round(df_pca$sdev / sum(df_pca$sdev) * 100, 2)
percentage <- paste( colnames(df_out), "(", paste( as.character(percentage), "%", ")", sep="") )

p <- ggplot(data=df_out, aes(x = PC2, y = PC3, color= Class)) +
        geom_point(size = 2) +
        scale_colour_manual(name = "Major Class", values =  c("Endometrioma" = "#7b3294", 
                                                              "Eutopic Endometrium" = "#c2a5cf", 
                                                              "Endometriosis" = "#d9f0d3", 
                                                              "No endometriosis detected" = "#a6dba0",
                                                              "Unaffected ovary" = "#008837")) +
        xlab(percentage[2]) +
        ylab(percentage[3]) +
        theme_bw(base_size = 12) +
        theme(panel.background = element_blank(),
              panel.border=element_rect(fill=NA),
              panel.grid.major = element_blank(),
              panel.grid.minor = element_blank(),
              strip.background=element_blank(),
              axis.text.x=element_text(colour="black"),
              axis.text.y=element_text(colour="black"),
              axis.ticks=element_line(colour="black"),
              plot.margin=unit(c(1,1,1,1),"line"))
p