Robbin Nameki, Anamay Shetty et al. August 2021
The following workflow describes assigning linkage disequilibrium bins to chromMAGMA and MAGMA results. Linkage disequiliberium information is from https://bitbucket.org/nygcresearch/ldetect-data/src/master/.
This data will later be used to identify unique linkage disequiliberium regions identified by chromMAGMA, in comparison to MAGMA in 04.
library(biomaRt)
library(tidyverse)
library(valr)
source('Rscripts/Utils.R')
.ensembl <- useEnsembl_GRCh37()
gene_file <- read_gene_level()
ld_bins <- read_ld_bins() %>%
mutate(ID = seq(nrow(.)))
used for the assignments of MAGMA and chromMAGMA results to linkage disequiliberium bins
# This looks at the MAGMA and chromMAGMA referenced genes ----
run_intersection <- function(df, old_col_names, suffix) {
colnames(df)[colnames(df) %in% old_col_names] <- c("chrom", "start", "end")
df <- df %>%
mutate(chrom = paste0("chr", chrom)) %>%
bed_intersect(ld_bins, suffix = c("", suffix)) %>%
dplyr::select(-.overlap) %>%
mutate(chrom = str_remove(chrom, "chr"))
colnames(df)[colnames(df) %in% c("chrom", "start", "end")] <- old_col_names
return(df)
}
subset_by_significant_gene <- function(df) {
df %>%
group_by(FEATURE, GWAS_TYPE) %>%
filter(P < 0.05 / n())
}
ld_annotation_originalfeature<- run_intersection(
gene_file,
c("originalfeature_CHR", "originalfeature_START", "originalfeature_STOP"),
".LDBin.originalfeature"
)
ld_annotation_genebody <- run_intersection(
gene_file,
c("gene_body_chromosome_name", "gene_body_start_position", "gene_body_end_position"),
".LDBin.gene_body"
)
ld_annotated_gene_file <- full_join(
ld_annotation_genebody,
ld_annotation_originalfeature,
c("ensembl_gene_id", "external_gene_name", "FEATURE", "GWAS_TYPE", "originalfeature", "originalfeature_CHR", "originalfeature_START", "originalfeature_STOP", "NSNPS", "NPARAM", "N", "ZSTAT", "P", "gene_body_chromosome_name", "gene_body_start_position", "gene_body_end_position", "min_Pvalue", "NEG_LOG10P")
)
rm(ld_annotation_genebody, ld_annotation_originalfeature)
head(ld_annotated_gene_file)
## # A tibble: 6 x 24
## ensembl_gene_id external_gene_n~ FEATURE GWAS_TYPE originalfeature
## <chr> <chr> <chr> <chr> <chr>
## 1 ENSG00000117419 ERI3 chromM~ CCOC 1:44820025-448~
## 2 ENSG00000132773 TOE1 chromM~ CCOC 1:44820025-448~
## 3 ENSG00000142945 KIF2C chromM~ CCOC 1:44820025-448~
## 4 ENSG00000178028 DMAP1 chromM~ CCOC 1:44820025-448~
## 5 ENSG00000198198 SZT2 chromM~ CCOC 1:44820025-448~
## 6 ENSG00000090686 USP48 chromM~ CCOC 1:22281643-222~
## # ... with 19 more variables: originalfeature_CHR <chr>,
## # originalfeature_START <dbl>, originalfeature_STOP <dbl>, NSNPS <int>,
## # NPARAM <int>, N <int>, ZSTAT <dbl>, P <dbl>,
## # gene_body_chromosome_name <chr>, gene_body_start_position <int>,
## # gene_body_end_position <dbl>, min_Pvalue <dbl>, NEG_LOG10P <dbl>,
## # start.LDBin.gene_body <int>, end.LDBin.gene_body <int>,
## # ID.LDBin.gene_body <int>, start.LDBin.originalfeature <int>,
## # end.LDBin.originalfeature <int>, ID.LDBin.originalfeature <int>
#Writing table of Gene_level results that are now binned into LD bins
#saveRDS(ld_annotated_gene_file, "Data/Gene_Level_Bin.4.9.21.rds")