Introduction

This section contains the complete ELMER code for the following analysis:

Vignette example
BRCA Supervised analysis
BRCA Unsupervised analysis

Vignette example

Below is the complete code that was explained in the other sections.

library(MultiAssayExperiment)
library(ELMER.data)
library(ELMER)
# get distal probes that are 2kb away from TSS on chromosome 1
distal.probes <- get.feature.probe(genome = "hg19", 
                                   met.platform = "450K", 
                                   rm.chr = paste0("chr",c(2:22,"X","Y")))
data(LUSC_RNA_refined,package = "ELMER.data") # GeneExp
data(LUSC_meth_refined,package = "ELMER.data") # Meth

mae <- createMAE(exp = GeneExp, 
                 met = Meth,
                 save = TRUE,
                 linearize.exp = TRUE,
                 save.filename = "mae.rda",
                 filter.probes = distal.probes,
                 met.platform = "450K",
                 genome = "hg19",
                 TCGA = TRUE)

group.col <- "definition"
group1 <-  "Primary solid Tumor"
group2 <- "Solid Tissue Normal"
dir.out <- "result"
diff.dir <-  "hypo" # Search for hypomethylated probes in group 1

sig.diff <- get.diff.meth(data = mae, 
                          group.col = group.col,
                          group1 = group1,
                          group2 = group2,
                          minSubgroupFrac = 0.2,
                          sig.dif = 0.3,
                          diff.dir = diff.dir,
                          cores = 1, 
                          dir.out = dir.out,
                          pvalue = 0.01)


nearGenes <- GetNearGenes(data = mae, 
                          probes = sig.diff$probe, 
                          numFlankingGenes = 20, # 10 upstream and 10 dowstream genes
                          cores = 1)

pair <- get.pair(data = mae,
                 group.col = group.col,
                 group1 = group1,
                 mode = "unsupervised",
                 group2 = group2,
                 nearGenes = nearGenes,
                 diff.dir = diff.dir,
                 minSubgroupFrac = 0.4, # % of samples to use in to create groups U/M
                 permu.dir = file.path(dir.out,"permu"),
                 permu.size = 100, # Please set to 100000 to get significant results
                 raw.pvalue = 0.05,   
                 Pe = 0.01, # Please set to 0.001 to get significant results
                 filter.probes = TRUE, # See preAssociationProbeFiltering function
                 filter.percentage = 0.05,
                 filter.portion = 0.3,
                 dir.out = dir.out,
                 cores = 1,
                 label = diff.dir)

# Identify enriched motif for significantly hypomethylated probes which 
# have putative target genes.
enriched.motif <- get.enriched.motif(data = mae,
                                     probes = pair$Probe, 
                                     dir.out = dir.out, 
                                     label = diff.dir,
                                     min.incidence = 10,
                                     lower.OR = 1.1)

TF <- get.TFs(data = mae, 
              mode = "unsupervised",
              group.col = group.col,
              group1 = group1,
              group2 = group2,
              enriched.motif = enriched.motif,
              dir.out = dir.out,
              cores = 1, 
              label = diff.dir)

BRCA Supervised analysis

library(stringr)
library(TCGAbiolinks)
library(dplyr)
library(ELMER)
library(MultiAssayExperiment)
library(parallel)
library(readr)
dir.create("~/paper_elmer/",showWarnings = FALSE)
setwd("~/paper_elmer/")

file <- "mae_BRCA_hg38_450K_no_ffpe.rda"
if(file.exists(file)) {
  mae <- get(load(file))
} else {
  getTCGA(disease = "BRCA", # TCGA disease abbreviation (BRCA,BLCA,GBM, LGG, etc)
          basedir = "DATA", # Where data will be downloaded
          genome  = "hg38") # Genome of refenrece "hg38" or "hg19"
  
  distal.probes <- get.feature.probe(feature = NULL,
                                     genome = "hg38", 
                                     met.platform = "450K") 
  
  mae <- createMAE(exp = "~/paper_elmer/Data/BRCA/BRCA_RNA_hg38.rda", 
                   met = "~/paper_elmer/Data/BRCA/BRCA_meth_hg38.rda", 
                   met.platform = "450K",
                   genome = "hg38",
                   linearize.exp = TRUE,
                   filter.probes = distal.probes,
                   met.na.cut = 0.2,
                   save = FALSE,
                   TCGA = TRUE) 
  # Remove FFPE samples from the analysis
  mae <- mae[,!mae$is_ffpe]
  
  # Get molecular subytpe information from cell paper and more metadata (purity etc...)
  # https://doi.org/10.1016/j.cell.2015.09.033
  file <- "http://ars.els-cdn.com/content/image/1-s2.0-S0092867415011952-mmc2.xlsx"
  downloader::download(file, basename(file))
  subtypes <- readxl::read_excel(basename(file), skip = 2)
  
  subtypes$sample <- substr(subtypes$Methylation,1,16)
  meta.data <- merge(colData(mae),subtypes,by = "sample",all.x = T)
  meta.data <- meta.data[match(colData(mae)$sample,meta.data$sample),]
  meta.data <- S4Vectors::DataFrame(meta.data)
  rownames(meta.data) <- meta.data$sample
  stopifnot(all(meta.data$patient == colData(mae)$patient))
  colData(mae) <- meta.data
  save(mae, file = "mae_BRCA_hg38_450K_no_ffpe.rda")
}
dir.out <- "BRCA_unsupervised_hg38/hypo"
cores <- 10
diff.probes <- get.diff.meth(data = mae, 
                             group.col = "definition",
                             group1 = "Primary solid Tumor",
                             group2 = "Solid Tissue Normal",
                             diff.dir = "hypo", # Get probes hypometh. in group 1 
                             cores = cores,
                             minSubgroupFrac = 0.2, # % group samples  used. 
                             pvalue = 0.01, 
                             sig.dif = 0.3,
                             dir.out = dir.out,
                             save = TRUE)

# For each differently methylated probes we will get the 
# 20 nearby genes (10 downstream and 10 upstream)
nearGenes <- GetNearGenes(data = mae, 
                          probes =  diff.probes$probe, 
                          numFlankingGenes = 20,
                          cores = cores)

# This step is the most time consuming. Depending on the size of the groups
# and the number of probes found previously it migh take hours
Hypo.pair <- get.pair(data = mae,
                      nearGenes = nearGenes,
                      group.col = "definition",
                      group1 = "Primary solid Tumor",
                      group2 = "Solid Tissue Normal",
                      permu.dir = paste0(dir.out,"/permu"),
                      permu.size = 10000, 
                      mode = "unsupervised",
                      minSubgroupFrac = 0.4, # 40% of samples to create U and M
                      raw.pvalue = 0.001,   
                      Pe = 0.001, 
                      filter.probes = TRUE,
                      filter.percentage = 0.05,
                      filter.portion = 0.3,
                      dir.out = dir.out,
                      cores = cores,
                      label = "hypo")
# Number of pairs: 2950 


enriched.motif <- get.enriched.motif(data = mae,
                                     min.motif.quality = "DS",
                                     probes = unique(Hypo.pair$Probe), 
                                     dir.out = dir.out, 
                                     label = "hypo",
                                     min.incidence = 10,
                                     lower.OR = 1.1)
TF <- get.TFs(data = mae, 
              group.col = "definition",
              group1 = "Primary solid Tumor",
              group2 = "Solid Tissue Normal",
              minSubgroupFrac = 0.4, # Set to 1 if supervised mode
              enriched.motif = enriched.motif,
              dir.out = dir.out, 
              cores = cores, 
              label = "hypo")

BRCA Unsupervised analysis

library(stringr)
library(TCGAbiolinks)
library(dplyr)
library(ELMER)
library(MultiAssayExperiment)
library(parallel)
library(readr)
#-----------------------------------
# 1 - Samples
# ----------------------------------
dir.create("~/paper_elmer/",showWarnings = FALSE)
setwd("~/paper_elmer/")

file <- "mae_BRCA_hg38_450K_no_ffpe.rda"
if(file.exists(file)) {
  mae <- get(load(file))
} else {
  getTCGA(disease = "BRCA", # TCGA disease abbreviation (BRCA,BLCA,GBM, LGG, etc)
          basedir = "DATA", # Where data will be downloaded
          genome  = "hg38") # Genome of refenrece "hg38" or "hg19"
  
  distal.probes <- get.feature.probe(feature = NULL,
                                     genome = "hg38", 
                                     met.platform = "450K") 
  
  mae <- createMAE(exp = "DATA/BRCA/BRCA_RNA_hg38.rda", 
                   met = "DATA/BRCA/BRCA_meth_hg38.rda", 
                   met.platform = "450K",
                   genome = "hg38",
                   linearize.exp = TRUE,
                   filter.probes = distal.probes,
                   met.na.cut = 0.2,
                   save = FALSE,
                   TCGA = TRUE) 
  # Remove FFPE samples from the analysis
  mae <- mae[,!mae$is_ffpe]
  
  # Get molecular subytpe information from cell paper and more metadata (purity etc...)
  # https://doi.org/10.1016/j.cell.2015.09.033
  file <- "http://ars.els-cdn.com/content/image/1-s2.0-S0092867415011952-mmc2.xlsx"
  downloader::download(file, basename(file))
  subtypes <- readxl::read_excel(basename(file), skip = 2)
  
  subtypes$sample <- substr(subtypes$Methylation,1,16)
  meta.data <- merge(colData(mae),subtypes,by = "sample",all.x = T)
  meta.data <- meta.data[match(colData(mae)$sample,meta.data$sample),]
  meta.data <- S4Vectors::DataFrame(meta.data)
  rownames(meta.data) <- meta.data$sample
  stopifnot(all(meta.data$patient == colData(mae)$patient))
  colData(mae) <- meta.data
  save(mae, file = "mae_BRCA_hg38_450K_no_ffpe.rda")
}

cores <- 6
direction <- c( "hypo","hyper")
genome <- "hg38"
group.col  <- "PAM50"
groups <- t(combn(na.omit(unique(colData(mae)[,group.col])),2))
for(g in 1:nrow(groups)) {
  group1 <- groups[g,1]
  group2 <- groups[g,2]
  for (j in direction){
    tryCatch({
      message("Analysing probes ",j, "methylated in ", group1, " vs ", group2)
      dir.out <- paste0("BRCA_supervised_",genome,"/",group1,"_",group2,"/",j)
      dir.create(dir.out, recursive = TRUE)
      #--------------------------------------
      # STEP 3: Analysis                     |
      #--------------------------------------
      # Step 3.1: Get diff methylated probes |
      #--------------------------------------
      Sig.probes <- get.diff.meth(data       = mae,
                                  group.col  = group.col,
                                  group1     = group1,
                                  group2     = group2,
                                  sig.dif    = 0.3,
                                  minSubgroupFrac = 1,
                                  cores      = cores,
                                  dir.out    = dir.out,
                                  diff.dir   = j,
                                  pvalue     = 0.01)
      if(nrow(Sig.probes) == 0) next
      #-------------------------------------------------------------
      # Step 3.2: Identify significant probe-gene pairs            |
      #-------------------------------------------------------------
      # Collect nearby 20 genes for Sig.probes
      nearGenes <- GetNearGenes(data  = mae,
                                probe = Sig.probes$probe,
                                cores = cores)
      
      pair <- get.pair(data       = mae,
                       nearGenes  = nearGenes,
                       group.col  = group.col,
                       group1     = group1,
                       group2     = group2,
                       permu.dir  = paste0(dir.out,"/permu"),
                       dir.out    = dir.out,
                       mode       = "supervised", 
                       diff.dir   = j,
                       cores      = cores,
                       label      = j,
                       permu.size = 10000,
                       raw.pvalue = 0.001)
      
      Sig.probes.paired <- read.csv(paste0(dir.out,
                                           "/getPair.",j,
                                           ".pairs.significant.csv"),
                                    stringsAsFactors=FALSE)[,1]
      
      
      #-------------------------------------------------------------
      # Step 3.3: Motif enrichment analysis on the selected probes |
      #-------------------------------------------------------------
      if(length(Sig.probes.paired) > 0 ){
        #-------------------------------------------------------------
        # Step 3.3: Motif enrichment analysis on the selected probes |
        #-------------------------------------------------------------
        enriched.motif <- get.enriched.motif(probes  = Sig.probes.paired,
                                             dir.out = dir.out,
                                             data    = mae,
                                             label   = j,
                                             plot.title =  paste0("BRCA: OR for paired probes ",
                                                                  j, "methylated in ",
                                                                  group1, " vs ",group2))
        motif.enrichment <- read.csv(paste0(dir.out,
                                            "/getMotif.",j,
                                            ".motif.enrichment.csv"),
                                     stringsAsFactors=FALSE)
        if(length(enriched.motif) > 0){
          #-------------------------------------------------------------
          # Step 3.4: Identifying regulatory TFs                        |
          #-------------------------------------------------------------
          print("get.TFs")
          
          TF <- get.TFs(data           = mae,
                        enriched.motif = enriched.motif,
                        dir.out        = dir.out,
                        mode           = "supervised",
                        group.col      = group.col,
                        group1         = group1,
                        diff.dir       = j,
                        group2         = group2,
                        cores          = cores,
                        label          = j)
          TF.meth.cor <- get(load(paste0(dir.out,
                                         "/getTF.",j,
                                         ".TFs.with.motif.pvalue.rda")))
          save(mae,TF, enriched.motif, Sig.probes.paired,
               pair, nearGenes, Sig.probes, motif.enrichment,
               TF.meth.cor,
               file=paste0(dir.out,"/ELMER_results_",j,".rda"))
        }
      }
    }, error = function(e){
      message(e)
    })
  }
}