Load Seurat object containing Endometrial-type epithelial cells
sc = readRDS("rds/EnEpi_cells.rds")
prop.cells = data.frame(table(sc@meta.data$active.cluster))
prop.cells <- prop.cells[order(prop.cells$Freq),]
prop.cells$Var1 <- factor(prop.cells$Var1, levels = prop.cells$Var1)
ggplot(prop.cells, aes(Var1, Freq)) +
geom_col() +
theme_minimal(base_size = 15) +
geom_text(aes(label=Freq), position=position_dodge(width=0.9), vjust=-0.25) +
theme(plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x = element_blank()) +
labs(title="Number of cells in each cell type", x ="Cell type", y = "Number of Cells")
prop.cells = data.frame(table(sc@meta.data$Major.Class))
prop.cells <- prop.cells[order(prop.cells$Freq),]
prop.cells$Var1 <- factor(prop.cells$Var1, levels = prop.cells$Var1)
ggplot(prop.cells, aes(Var1, Freq)) +
geom_col() +
theme_minimal(base_size = 15) +
geom_text(aes(label=Freq), position=position_dodge(width=0.9), vjust=-0.25) +
theme(plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x = element_blank()) +
labs(title="", x ="Major class", y = "Number of Cells")
sc.sel = subset(sc, subset = Major.Class %in% c("Endometrioma", "Eutopic Endometrium", "Endometriosis"))
single.cell.expression.set <- SeuratToExpressionSet(sc.sel, delimiter='-', position=2, version="v3")
phenoData(single.cell.expression.set) <- AnnotatedDataFrame(sc.sel@meta.data)
prop.cells = data.frame(table(sc.sel@meta.data$active.cluster))
prop.cells <- prop.cells[order(prop.cells$Freq),]
prop.cells$Var1 <- factor(prop.cells$Var1, levels = prop.cells$Var1)
ggplot(prop.cells, aes(Var1, Freq)) +
geom_col() +
theme_minimal(base_size = 15) +
geom_text(aes(label=Freq), position=position_dodge(width=0.9), vjust=-0.25) +
theme(plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x = element_blank()) +
labs(title="Number of cells in each cell type", x ="Cell type", y = "Number of Cells")
prop.cells = data.frame(table(sc.sel@meta.data$Major.Class))
prop.cells <- prop.cells[order(prop.cells$Freq),]
prop.cells$Var1 <- factor(prop.cells$Var1, levels = prop.cells$Var1)
ggplot(prop.cells, aes(Var1, Freq)) +
geom_col() +
theme_minimal(base_size = 15) +
geom_text(aes(label=Freq), position=position_dodge(width=0.9), vjust=-0.25) +
theme(plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x = element_blank()) +
labs(title="", x ="Major class", y = "Number of Cells")
ensembl <- useMart("ensembl", dataset="hsapiens_gene_ensembl")
bm <- getBM(attributes=c('ensembl_gene_id', 'external_gene_name', 'gene_biotype', 'ensembl_transcript_id'), mart = ensembl)
title = "25 CCOC"
load("data/ccoc_GSE129617.expr_25_samples.rda")
dim(GSE129617.expr)
df <- data.frame(histotype = c(rep('CCOC', 25)))
rownames(df) <- colnames(GSE129617.expr)
# Heapmap 0: plot top variable genes
matrix <- GSE129617.expr
matrix <- matrix[!duplicated(rownames(matrix)) & rownames(matrix) %in%
bm$external_gene_name[!is.na(bm$gene_biotype) &
bm$gene_biotype == 'protein_coding'],]
sd <- apply(X = matrix, MARGIN = 1, FUN = sd)
rank <- (length(sd) - rank(sd) + 1)
select <- rank <= 500
pheatmap(matrix[select,], cluster_rows=T, show_rownames=F, cluster_cols=T, annotation_col = df,
main = paste0('GSE129617 - 25 CCOC Top ',sum(select),' protein coding genes (n=', nrow(matrix), ")"))
matrix.eset = ExpressionSet(assayData=matrix, phenoData = new("AnnotatedDataFrame",data=df))
Est.prop = music_prop(bulk.eset = matrix.eset,
sc.eset = single.cell.expression.set,
clusters = 'active.cluster',
samples = 'SampleName',
verbose = F)
bp.esti.pro = read.delim("files/estimation.25-CCOC.decon.cvs", sep = ",")
df <- data.frame(histotype = c(rep('CCOC', 25)))
rownames(df) <- rownames(bp.esti.pro)
bp.esti.pro = merge(bp.esti.pro, df, by=0)
bp.esti.pro = bp.esti.pro[match(rownames(df), bp.esti.pro$Row.names), ]
collorder = c("#8dd3c7", "#ffffb3", "#bebada", "#fb8072", "#80b1d3", "#fdb462", "#b3de69", "#fccde5", "#d9d9d9")
ht_list = rowAnnotation(text = anno_text(bp.esti.pro$Row.names, location = unit(1, "npc"), just = "right",
gp = gpar(fontsize = 12)))
anno_width = unit(3, "cm")
.cols = c(2: (ncol(bp.esti.pro)-1))
ht_list = ht_list + rowAnnotation("dist_tss" = anno_barplot(bp.esti.pro[, .cols], bar_width = 1, gp = gpar(fill = collorder, fontsize = 14),
width = anno_width), show_annotation_name = FALSE) +
rowAnnotation(Histotype = anno_simple(bp.esti.pro$histotype), gp = gpar(fontsize = 20))
draw(ht_list, heatmap_legend_list = Legend(title = "Epithelial clusters", labels = colnames(bp.esti.pro[, .cols]), legend_gp = gpar(fill = collorder)))
title = "GSE73614 - OvTuCC = 24 OvTuEndo = 35"
five.word <- function(my.string){
elems = unlist(strsplit(as.character(my.string), " "))
elems[length(elems)]
}
GSE73614.anno.clinical <- read.delim("/media/Data01/public_data/CCOC/GSE73614/Sample_type.csv", sep = ",")
GSE73614.anno.clinical$histotype <- sapply(GSE73614.anno.clinical$histotype, five.word)
GSE73614.anno.clinical$histotype = gsub("Mayo", "", GSE73614.anno.clinical$histotype)
GSE73614 <- getGEO('GSE73614',GSEMatrix=TRUE)
GSE73614.expr <- exprs(GSE73614$GSE73614_series_matrix.txt.gz)
head(GSE73614.expr)
GSE73614.expr = read.table("/media/Data01/public_data/CCOC/GSE73614/GSE73614_RAW.tar")
annot.BM <- getBM(mart=ensembl,
filters="agilent_wholegenome_4x44k_v2",
attributes=c("agilent_wholegenome_4x44k_v2","external_gene_name","ensembl_gene_id","gene_biotype"),
values=rownames(GSE73614.expr),
uniqueRows=TRUE)
head(annot.BM)
common = match(rownames(GSE73614.expr), annot.BM$agilent_wholegenome_4x44k_v2)
rownames(GSE73614.expr)[!is.na(common)] = annot.BM$external_gene_name[na.omit(common)]
head(GSE73614.expr)
df <- GSE73614.anno.clinical
rownames(df) <- df$SampleName
df = df[,-1, drop=F]
matrix <- GSE73614.expr
matrix = matrix[complete.cases(matrix),]
delta1 = max(matrix, na.rm = T) - min(matrix, na.rm = T)
delta2 = max(matrix, na.rm = T) - 0
matrix = (matrix - min(matrix, na.rm = T)) * delta1 / delta2
matrix[1:5, 1:4]
matrix <- matrix[!duplicated(rownames(matrix)) & rownames(matrix) %in%
bm$external_gene_name[!is.na(bm$gene_biotype) &
bm$gene_biotype == 'protein_coding'],]
sd <- apply(X = matrix, MARGIN = 1, FUN = sd)
rank <- (length(sd) - rank(sd) + 1)
select <- rank <= 50
df = df[df$histotype %in% c("OvTuEndo", "OvTuCC"), , drop=F]
df = df[order(df$histotype), , drop=F]
matrix = matrix[, match(rownames(df), colnames(matrix))]
matrix = matrix[-which(rownames(matrix)==""),]
matrix.eset = ExpressionSet(assayData=matrix, phenoData = new("AnnotatedDataFrame",data=df))
Est.prop = music_prop(bulk.eset = matrix.eset,
sc.eset = single.cell.expression.set,
clusters = 'active.cluster',
samples = 'SampleName',
verbose = F)
five.word <- function(my.string){
elems = unlist(strsplit(as.character(my.string), " "))
elems[length(elems)]
}
bp.esti.pro = read.delim("files/estimation.GSE73614 - OvTuCC = 24 OvTuEndo = 35.cvs", sep = ",")
GSE73614.anno.clinical <- read.delim("files/Sample_type.csv", sep = ",")
GSE73614.anno.clinical$histotype <- sapply(GSE73614.anno.clinical$histotype, five.word)
GSE73614.anno.clinical$histotype = gsub("Mayo", "", GSE73614.anno.clinical$histotype)
df <- GSE73614.anno.clinical
rownames(df) <- df$SampleName
df = df[,-1, drop=F]
df = df[df$histotype %in% c("OvTuEndo", "OvTuCC"), , drop=F]
rownames(df) <- rownames(bp.esti.pro)
bp.esti.pro = merge(bp.esti.pro, df, by=0)
bp.esti.pro = bp.esti.pro[match(rownames(df), bp.esti.pro$Row.names), ]
collorder = c("#8dd3c7", "#ffffb3", "#bebada", "#fb8072", "#80b1d3", "#fdb462", "#b3de69", "#fccde5", "#d9d9d9")
ht_list = rowAnnotation(text = anno_text(bp.esti.pro$Row.names, location = unit(1, "npc"), just = "right",
gp = gpar(fontsize = 12)))
anno_width = unit(3, "cm")
.cols = c(2: (ncol(bp.esti.pro)-1))
ht_list = ht_list + rowAnnotation("dist_tss" = anno_barplot(bp.esti.pro[, .cols], bar_width = 1, gp = gpar(fill = collorder, fontsize = 14),
width = anno_width), show_annotation_name = FALSE) +
rowAnnotation(Histotype = anno_simple(bp.esti.pro$histotype), gp = gpar(fontsize = 20))
draw(ht_list, heatmap_legend_list = Legend(title = "Epithelial clusters", labels = colnames(bp.esti.pro[, .cols]), legend_gp = gpar(fill = collorder)))
GSE9899.anno.clinical <- read.delim("/media/Data01/public_data/CCOC/GSE9899/GSE9899_clinical_anns.csv", sep = ",")
GSE9899.anno.clinical = merge(GSE9899.anno.clinical, read.delim("/media/Data01/public_data/CCOC/GSE9899/AOCSID_GSM_IDs_clinical_anns.csv", sep = ","), by="AOCSID")
title = "GSE9899 - 20 Endometrioid"
GSE9899 <- getGEO('GSE9899',GSEMatrix=TRUE)
GSE9899.expr <- exprs(GSE9899$GSE9899_series_matrix.txt.gz)
head(GSE9899.expr)
annot.BM <- getBM(mart=ensembl,
attributes=c("affy_hg_u133_plus_2", "ensembl_gene_id", "gene_biotype", "external_gene_name"),
filter="affy_hg_u133_plus_2",
values=rownames(GSE9899.expr),
uniqueRows=TRUE)
common = match(rownames(GSE9899.expr), annot.BM$affy_hg_u133_plus_2)
rownames(GSE9899.expr)[!is.na(common)] = annot.BM$external_gene_name[na.omit(common)]
df <- GSE9899.anno.clinical
rownames(df) <- df$GSMID
df <- df[order(df$Primary.Site),]
matrix <- GSE9899.expr[, df$GSMID]
matrix <- matrix[complete.cases(matrix),]
matrix <- matrix[!duplicated(rownames(matrix)) & rownames(matrix) %in%
bm$external_gene_name[!is.na(bm$gene_biotype) &
bm$gene_biotype == 'protein_coding'],]
sd <- apply(X = matrix, MARGIN = 1, FUN = sd)
rank <- (length(sd) - rank(sd) + 1)
select <- rank <= 50
df = df[,-c(1, 7)]
df = df[df$Subtype == "Endo", , drop=F]
matrix = matrix[, which(colnames(matrix) %in% rownames(df))]
df = df[,3, drop=F]
colnames(df) <- "histotype"
matrix = matrix[-which(rownames(matrix)==""),]
matrix.eset = ExpressionSet(assayData=matrix, phenoData = new("AnnotatedDataFrame",data=df))
Est.prop = music_prop(bulk.eset = matrix.eset,
sc.eset = single.cell.expression.set,
clusters = 'active.cluster',
samples = 'SampleName',
verbose = F)
bp.esti.pro = read.delim("files/estimation.GSE9899 - 20 Endometrioid.cvs", sep = ",")
df <- data.frame(histotype = c(rep('EnOC', 20)))
rownames(df) <- rownames(bp.esti.pro)
bp.esti.pro = merge(bp.esti.pro, df, by=0)
bp.esti.pro = bp.esti.pro[match(rownames(df), bp.esti.pro$Row.names), ]
collorder = c("#8dd3c7", "#ffffb3", "#bebada", "#fb8072", "#80b1d3", "#fdb462", "#b3de69", "#fccde5", "#d9d9d9")
ht_list = rowAnnotation(text = anno_text(bp.esti.pro$Row.names, location = unit(1, "npc"), just = "right",
gp = gpar(fontsize = 12)))
anno_width = unit(3, "cm")
.cols = c(2: (ncol(bp.esti.pro)-1))
ht_list = ht_list + rowAnnotation("dist_tss" = anno_barplot(bp.esti.pro[, .cols], bar_width = 1, gp = gpar(fill = collorder, fontsize = 14),
width = anno_width), show_annotation_name = FALSE) +
rowAnnotation(Histotype = anno_simple(bp.esti.pro$histotype), gp = gpar(fontsize = 20))
draw(ht_list, heatmap_legend_list = Legend(title = "Epithelial clusters", labels = colnames(bp.esti.pro[, .cols]), legend_gp = gpar(fill = collorder)))
title = "269 TCGA HGSOC"
ucec.ID = subset(pd.mRNA.prim, cancer.type == "OV")
dim(ucec.ID)
UCEC.RNA <- TCGA.RNA[,c(1, which(colnames(TCGA.RNA) %in% ucec.ID$TCGAlong.id))]
dim(UCEC.RNA)
df <- data.frame(histotype = c(rep('HGSOC', 269)))
colnames(UCEC.RNA) <- gsub("^(.*?-.*?-.*?)-.*", "\\1", colnames(UCEC.RNA))
rownames(df) <- colnames(UCEC.RNA)[-1]
# Heapmap 0: plot top variable genes
matrix <- UCEC.RNA
matrix <- matrix[!duplicated(TCGA.RNA$gene_symbol) & TCGA.RNA$gene_symbol %in%
bm$external_gene_name[!is.na(bm$gene_biotype) &
bm$gene_biotype == 'protein_coding'],]
matrix[1:4, 1:4]
rownames(matrix) <- matrix$gene_symbol
matrix = matrix[, -1]
matrix = matrix[complete.cases(matrix),]
sd <- apply(X = matrix, MARGIN = 1, FUN = sd)
rank <- (length(sd) - rank(sd) + 1)
select <- rank <= 500
delta1 = max(matrix, na.rm = T) - min(matrix, na.rm = T)
delta2 = max(matrix, na.rm = T) - 0
matrix = (matrix - min(matrix, na.rm = T)) * delta1 / delta2
pheatmap(log2(matrix[select,] + 1), cluster_rows=T, show_rownames=F, show_colnames = F, cluster_cols=T, annotation_col = df,
main = paste0(title, ' Top ',sum(select),' protein coding genes (n=', nrow(matrix), ")"))
matrix.eset = new("ExpressionSet", exprs=as.matrix(matrix), phenoData = new("AnnotatedDataFrame",data=df))
Est.prop = music_prop(bulk.eset = matrix.eset,
sc.eset = single.cell.expression.set,
clusters = 'active.cluster',
samples = 'SampleName',
verbose = F)
bp.esti.pro = read.delim("files/estimation.HGSOC.decon.cvs", sep = ",")
df <- data.frame(histotype = c(rep('HGSOC', 269)))
rownames(df) <- rownames(bp.esti.pro)
bp.esti.pro = merge(bp.esti.pro, df, by=0)
bp.esti.pro = bp.esti.pro[match(rownames(df), bp.esti.pro$Row.names), ]
collorder = c("#8dd3c7", "#ffffb3", "#bebada", "#fb8072", "#80b1d3", "#fdb462", "#b3de69", "#fccde5", "#d9d9d9")
ht_list = rowAnnotation(text = anno_text(bp.esti.pro$Row.names, location = unit(1, "npc"), just = "right",
gp = gpar(fontsize = 12)))
anno_width = unit(3, "cm")
.cols = c(2: (ncol(bp.esti.pro)-1))
ht_list = ht_list + rowAnnotation("dist_tss" = anno_barplot(bp.esti.pro[, .cols], bar_width = 1, gp = gpar(fill = collorder, fontsize = 14),
width = anno_width), show_annotation_name = FALSE) +
rowAnnotation(Histotype = anno_simple(bp.esti.pro$histotype), gp = gpar(fontsize = 20))
draw(ht_list, heatmap_legend_list = Legend(title = "Epithelial clusters", labels = colnames(bp.esti.pro[, .cols]), legend_gp = gpar(fill = collorder)))