Robbin Nameki, Anamay Shetty et al. August 2021

Introduction

The following workflow describes assigning linkage disequilibrium bins to chromMAGMA and MAGMA results. Linkage disequiliberium information is from https://bitbucket.org/nygcresearch/ldetect-data/src/master/.

This data will later be used to identify unique linkage disequiliberium regions identified by chromMAGMA, in comparison to MAGMA in 04.

Libraries

library(biomaRt)
library(tidyverse)
library(valr)

Sourcecode

source('Rscripts/Utils.R')

Reading files

.ensembl <- useEnsembl_GRCh37()

gene_file <- read_gene_level()

ld_bins <- read_ld_bins()  %>%
    mutate(ID = seq(nrow(.)))

Functions

used for the assignments of MAGMA and chromMAGMA results to linkage disequiliberium bins

# This looks at the MAGMA and chromMAGMA referenced genes ----
run_intersection <- function(df, old_col_names, suffix) {
  
  colnames(df)[colnames(df) %in% old_col_names] <- c("chrom", "start", "end")
  
  df <- df %>%
    mutate(chrom = paste0("chr", chrom)) %>%
    bed_intersect(ld_bins, suffix = c("", suffix)) %>%
    dplyr::select(-.overlap) %>%
    mutate(chrom = str_remove(chrom, "chr"))
  
  
  colnames(df)[colnames(df) %in% c("chrom", "start", "end")] <- old_col_names
  
  return(df)

}

subset_by_significant_gene <- function(df) {
  df %>%
    group_by(FEATURE, GWAS_TYPE) %>%
    filter(P < 0.05 / n())
}

Binning genes (MAGMA) or enhancers (chromMAGMA) to linkage disequilibrium bins

ld_annotation_originalfeature<- run_intersection(
  gene_file, 
  c("originalfeature_CHR", "originalfeature_START", "originalfeature_STOP"),
  ".LDBin.originalfeature"
)

ld_annotation_genebody <- run_intersection(
  gene_file, 
  c("gene_body_chromosome_name", "gene_body_start_position", "gene_body_end_position"),
  ".LDBin.gene_body"
)

ld_annotated_gene_file <- full_join(
  ld_annotation_genebody, 
  ld_annotation_originalfeature,
  c("ensembl_gene_id", "external_gene_name", "FEATURE", "GWAS_TYPE", "originalfeature", "originalfeature_CHR", "originalfeature_START", "originalfeature_STOP", "NSNPS", "NPARAM", "N", "ZSTAT", "P", "gene_body_chromosome_name", "gene_body_start_position", "gene_body_end_position", "min_Pvalue", "NEG_LOG10P")
)

rm(ld_annotation_genebody, ld_annotation_originalfeature)

head(ld_annotated_gene_file)
## # A tibble: 6 x 24
##   ensembl_gene_id external_gene_n~ FEATURE GWAS_TYPE originalfeature
##   <chr>           <chr>            <chr>   <chr>     <chr>          
## 1 ENSG00000117419 ERI3             chromM~ CCOC      1:44820025-448~
## 2 ENSG00000132773 TOE1             chromM~ CCOC      1:44820025-448~
## 3 ENSG00000142945 KIF2C            chromM~ CCOC      1:44820025-448~
## 4 ENSG00000178028 DMAP1            chromM~ CCOC      1:44820025-448~
## 5 ENSG00000198198 SZT2             chromM~ CCOC      1:44820025-448~
## 6 ENSG00000090686 USP48            chromM~ CCOC      1:22281643-222~
## # ... with 19 more variables: originalfeature_CHR <chr>,
## #   originalfeature_START <dbl>, originalfeature_STOP <dbl>, NSNPS <int>,
## #   NPARAM <int>, N <int>, ZSTAT <dbl>, P <dbl>,
## #   gene_body_chromosome_name <chr>, gene_body_start_position <int>,
## #   gene_body_end_position <dbl>, min_Pvalue <dbl>, NEG_LOG10P <dbl>,
## #   start.LDBin.gene_body <int>, end.LDBin.gene_body <int>,
## #   ID.LDBin.gene_body <int>, start.LDBin.originalfeature <int>,
## #   end.LDBin.originalfeature <int>, ID.LDBin.originalfeature <int>
#Writing table of Gene_level results that are now binned into LD bins
#saveRDS(ld_annotated_gene_file, "Data/Gene_Level_Bin.4.9.21.rds")