## ---- fig.height=6,echo=FALSE, message=FALSE, warning=FALSE, include=TRUE----- library(ELMER.data) library(ELMER) data(elmer.data.example) data(LUSC_meth_refined) data(LUSC_RNA_refined) library(DT) ## ----------------------------------------------------------------------------- # Example of DNA methylation data input datatable(Meth[1:10, 1:10], options = list(scrollX = TRUE, keys = TRUE, pageLength = 5), rownames = TRUE) ## ----------------------------------------------------------------------------- # Example of Gene expression data input datatable(GeneExp[1:10, 1:2], options = list(scrollX = TRUE, keys = TRUE, pageLength = 5), rownames = TRUE) ## ---- message=FALSE----------------------------------------------------------- library(MultiAssayExperiment) data <- createMAE(exp = GeneExp, met = Meth, met.platform = "450K", genome = "hg19", save = FALSE, TCGA = TRUE) data as.data.frame(colData(data)[,c("patient","definition","TN")]) %>% datatable(options = list(scrollX = TRUE,pageLength = 5)) # Adding sample information for non TCGA samples # You should have two objects with one for DNA methylation and # one for gene expression. They should have the same number of samples and the names of the # sample in the gene expression object and in hte DNA methylation matrix # should be the same not.tcga.exp <- GeneExp # 234 samples colnames(not.tcga.exp) <- substr(colnames(not.tcga.exp),1,15) not.tcga.met <- Meth # 268 samples colnames(not.tcga.met) <- substr(colnames(not.tcga.met),1,15) # Number of samples in both objects (234) table(colnames(not.tcga.met) %in% colnames(not.tcga.exp)) # Our sample information must have as row names the samples information phenotype.data <- data.frame(row.names = colnames(not.tcga.exp), primary = colnames(not.tcga.exp), group = c(rep("group1", ncol(GeneExp)/2), rep("group2", ncol(GeneExp)/2))) data.hg19 <- createMAE(exp = not.tcga.exp, met = not.tcga.met, TCGA = FALSE, met.platform = "450K", genome = "hg19", colData = phenotype.data) data.hg19 # The samples that does not have data for both DNA methylation and Gene exprssion will be removed even for the phenotype data phenotype.data <- data.frame(row.names = colnames(not.tcga.met), primary = colnames(not.tcga.met), group = c(rep("group1", ncol(Meth)/4), rep("group2", ncol(Meth)/4), rep("group3", ncol(Meth)/4), rep("group4", ncol(Meth)/4))) data.hg38 <- createMAE(exp = not.tcga.exp, met = not.tcga.met, TCGA = FALSE, save = FALSE, met.platform = "450K", genome = "hg38", colData = phenotype.data) data.hg38 as.data.frame(colData(data.hg38)[1:20,]) %>% datatable(options = list(scrollX = TRUE,pageLength = 5)) ## ---- message=FALSE----------------------------------------------------------- library(SummarizedExperiment, quietly = TRUE) rowRanges(getMet(data))[1:3,1:8] ## ----------------------------------------------------------------------------- rowRanges(getExp(data)) ## ---- message=FALSE----------------------------------------------------------- data <- createMAE(exp = GeneExp, met = Meth, genome = "hg19", save = FALSE, met.platform = "450K", TCGA = TRUE) # For TGCA data 1-12 represents the patient and 1-15 represents the sample ID (i.e. primary solid tumor samples ) all(substr(colnames(getExp(data)),1,15) == substr(colnames(getMet(data)),1,15)) # See sample information for data as.data.frame(colData(data)) %>% datatable(options = list(scrollX = TRUE)) # See sample names for each experiment as.data.frame(sampleMap(data)) %>% datatable(options = list(scrollX = TRUE)) ## ---- message=FALSE----------------------------------------------------------- # NON TCGA example: matrices has different column names gene.exp <- S4Vectors::DataFrame(sample1.exp = c("ENSG00000141510"=2.3,"ENSG00000171862"=5.4), sample2.exp = c("ENSG00000141510"=1.6,"ENSG00000171862"=2.3)) dna.met <- S4Vectors::DataFrame(sample1.met = c("cg14324200"=0.5,"cg23867494"=0.1), sample2.met = c("cg14324200"=0.3,"cg23867494"=0.9)) sample.info <- S4Vectors::DataFrame(primary = c("sample1","sample2"), sample.type = c("Normal", "Tumor")) sampleMap <- S4Vectors::DataFrame( assay = c("Gene expression","DNA methylation","Gene expression","DNA methylation"), primary = c("sample1","sample1","sample2","sample2"), colname = c("sample1.exp","sample1.met","sample2.exp","sample2.met")) mae <- createMAE(exp = gene.exp, met = dna.met, sampleMap = sampleMap, met.platform ="450K", colData = sample.info, genome = "hg38") # You can also use sample Mapping and Sample information tables from a tsv file # You can use the createTSVTemplates function to create the tsv files readr::write_tsv(as.data.frame(sampleMap), path = "sampleMap.tsv") readr::write_tsv(as.data.frame(sample.info), path = "sample.info.tsv") mae <- createMAE(exp = gene.exp, met = dna.met, sampleMap = "sampleMap.tsv", met.platform ="450K", colData = "sample.info.tsv", genome = "hg38") mae # NON TCGA example: matrices has same column names gene.exp <- S4Vectors::DataFrame(sample1 = c("ENSG00000141510"=2.3,"ENSG00000171862"=5.4), sample2 = c("ENSG00000141510"=1.6,"ENSG00000171862"=2.3)) dna.met <- S4Vectors::DataFrame(sample1 = c("cg14324200"=0.5,"cg23867494"=0.1), sample2= c("cg14324200"=0.3,"cg23867494"=0.9)) sample.info <- S4Vectors::DataFrame(primary = c("sample1","sample2"), sample.type = c("Normal", "Tumor")) sampleMap <- S4Vectors::DataFrame( assay = c("Gene expression","DNA methylation","Gene expression","DNA methylation"), primary = c("sample1","sample1","sample2","sample2"), colname = c("sample1","sample1","sample2","sample2") ) mae <- createMAE(exp = gene.exp, met = dna.met, sampleMap = sampleMap, met.platform ="450K", colData = sample.info, genome = "hg38") mae