Introduction

The following script describes the process of weghing chromMAGMA gene-level results from 02 with Mullerian RNAseq data from Corona et al. 2020 & GEOXXXXX. This breaks the ties of chromMAGMA results (due to one enhancer being assigned to multiple genes) for further gene-set enrichment analysis.

Libraries

library(tidyverse)

Sourcecode

source('Rscripts/Utils.R')
## Warning: package 'valr' was built under R version 4.0.5

Reading mullerian RNAseq samples

#Getting Corona et al. 2020 
corona2020 <- read_corona2020_FTSEC()

#Getting Average Gene Expression across all samples
corona2020$average_expression <- rowMeans(corona2020[,c(1:24)])

corona2020_avg_expression <- corona2020 %>%
  dplyr::select(average_expression,ensembl_gene_id)

Weighing NEGLOG10P chromMAGMA with mullerian RNAseq samples

## Reading chromMAGMA Results
Gene_Level <- read_gene_level()
Gene_Level_chromMAGMA <- Gene_Level %>%
  filter(FEATURE == 'chromMAGMA')

## Adding average expression column from corona et al.
Gene_Level_chromMAGMA_avg <- merge(Gene_Level_chromMAGMA,corona2020_avg_expression, by = 'ensembl_gene_id')
length(unique(Gene_Level_chromMAGMA_avg$ensembl_gene_id))
## [1] 17424
#27 genes are not in expression data, therefore not in analysis
n <- Gene_Level_chromMAGMA[!Gene_Level_chromMAGMA$ensembl_gene_id %in% Gene_Level_chromMAGMA_avg$ensembl_gene_id,]
length(unique(n$ensembl_gene_id))
## [1] 27
## Weighted Final List chromMAGMA
weighted_final_list_chromMAGMA <- Gene_Level_chromMAGMA_avg  %>%
  filter(FEATURE == 'chromMAGMA') %>%
  mutate(weighted_P = NEG_LOG10P*average_expression) %>%
  dplyr::group_by(GWAS_TYPE) %>%
  dplyr::arrange(desc(weighted_P), .by_group = TRUE) %>%
  group_by(FEATURE,GWAS_TYPE) %>%
  filter(!duplicated(ensembl_gene_id))
  
head(weighted_final_list_chromMAGMA)
## # A tibble: 6 x 20
## # Groups:   FEATURE, GWAS_TYPE [1]
##   ensembl_gene_id external_gene_n~ FEATURE GWAS_TYPE originalfeature
##   <chr>           <chr>            <chr>   <chr>     <chr>          
## 1 ENSG00000141141 DDX52            chromM~ CCOC      17:36100871-36~
## 2 ENSG00000132142 ACACA            chromM~ CCOC      17:36100871-36~
## 3 ENSG00000108753 HNF1B            chromM~ CCOC      17:36098905-36~
## 4 ENSG00000181163 NPM1             chromM~ CCOC      5:170209218-17~
## 5 ENSG00000102804 TSC22D1          chromM~ CCOC      13:44874244-44~
## 6 ENSG00000149257 SERPINH1         chromM~ CCOC      11:75400410-75~
## # ... with 15 more variables: originalfeature_CHR <chr>,
## #   originalfeature_START <dbl>, originalfeature_STOP <dbl>, NSNPS <int>,
## #   NPARAM <int>, N <int>, ZSTAT <dbl>, P <dbl>,
## #   gene_body_chromosome_name <chr>, gene_body_start_position <int>,
## #   gene_body_end_position <dbl>, min_Pvalue <dbl>, NEG_LOG10P <dbl>,
## #   average_expression <dbl>, weighted_P <dbl>
#saveRDS(weighted_final_list_chromMAGMA, file = 'Data/Gene_Level_Weighted_4.9.21.rds')