## ----load-libs, message=FALSE---------------------------------------------- library(missMethyl) library(limma) library(minfi) ## ----reading-data, message=FALSE------------------------------------------- library(minfiData) baseDir <- system.file("extdata", package = "minfiData") targets <- read.metharray.sheet(baseDir) targets[,1:9] targets[,10:12] rgSet <- read.metharray.exp(targets = targets) ## ----ppraw----------------------------------------------------------------- mSet <- preprocessRaw(rgSet) ## ----swan------------------------------------------------------------------ mSetSw <- SWAN(mSet,verbose=TRUE) ## ----betasByType,fig.height=5,fig.width=10,fig.retina=1,fig.cap="Density distributions of $\beta$ values before and after using SWAN."---- par(mfrow=c(1,2), cex=1.25) densityByProbeType(mSet[,1], main = "Raw") densityByProbeType(mSetSw[,1], main = "SWAN") ## ----filtering------------------------------------------------------------- detP <- detectionP(rgSet) keep <- rowSums(detP < 0.01) == ncol(rgSet) mSetSw <- mSetSw[keep,] ## ----extraction------------------------------------------------------------ mset_reduced <- mSetSw[sample(1:nrow(mSetSw), 20000),] meth <- getMeth(mset_reduced) unmeth <- getUnmeth(mset_reduced) Mval <- log2((meth + 100)/(unmeth + 100)) beta <- getBeta(mset_reduced) dim(Mval) ## ----mdsplot,fig.height=5,fig.width=5,fig.cap="A multi-dimensional scaling (MDS) plot of cancer and normal samples."---- par(mfrow=c(1,1)) plotMDS(Mval, labels=targets$Sample_Name, col=as.integer(factor(targets$status))) legend("topleft",legend=c("Cancer","Normal"),pch=16,cex=1.2,col=1:2) ## ----design---------------------------------------------------------------- group <- factor(targets$status,levels=c("normal","cancer")) id <- factor(targets$person) design <- model.matrix(~id + group) design ## ----diffmeth-------------------------------------------------------------- fit.reduced <- lmFit(Mval,design) fit.reduced <- eBayes(fit.reduced) ## ----diffmeth-results------------------------------------------------------ summary(decideTests(fit.reduced)) top<-topTable(fit.reduced,coef=4) top ## ----top4,fig.width=10,fig.height=10,fig.cap="The $\beta$ values for the top 4 differentially methylated CpGs."---- cpgs <- rownames(top) par(mfrow=c(2,2)) for(i in 1:4){ stripchart(beta[rownames(beta)==cpgs[i],]~design[,4],method="jitter", group.names=c("Normal","Cancer"),pch=16,cex=1.5,col=c(4,2),ylab="Beta values", vertical=TRUE,cex.axis=1.5,cex.lab=1.5) title(cpgs[i],cex.main=1.5) } ## ----diffmeth2------------------------------------------------------------- # get M-values for ALL probes meth <- getMeth(mSet) unmeth <- getUnmeth(mSet) M <- log2((meth + 100)/(unmeth + 100)) grp <- factor(targets$status,levels=c("normal","cancer")) des <- model.matrix(~grp) des INCs <- getINCs(rgSet) head(INCs) Mc <- rbind(M,INCs) ctl <- rownames(Mc) %in% rownames(INCs) table(ctl) rfit1 <- RUVfit(data=Mc, design=des, coef=2, ctl=ctl) # Stage 1 analysis rfit2 <- RUVadj(rfit1) ## ----ruv1------------------------------------------------------------------ top1 <- topRUV(rfit2, num=Inf) head(top1) ctl <- rownames(M) %in% rownames(top1[top1$p.ebayes.BH > 0.5,]) table(ctl) ## ----ruv2------------------------------------------------------------------ # Perform RUV adjustment and fit rfit1 <- RUVfit(data=M, design=des, coef=2, ctl=ctl) # Stage 2 analysis rfit2 <- RUVadj(rfit1) # Look at table of top results topRUV(rfit2) ## ----diffvar--------------------------------------------------------------- fitvar <- varFit(Mval, design = design, coef = c(1,4)) ## ----diffvar-results------------------------------------------------------- summary(decideTests(fitvar)) topDV <- topVar(fitvar, coef=4) topDV ## ----alternative----------------------------------------------------------- design2 <- model.matrix(~0+group+id) fitvar.contr <- varFit(Mval, design=design2, coef=c(1,2)) contr <- makeContrasts(groupcancer-groupnormal,levels=colnames(design2)) fitvar.contr <- contrasts.varFit(fitvar.contr,contrasts=contr) ## ----altresults------------------------------------------------------------ summary(decideTests(fitvar.contr)) topVar(fitvar.contr,coef=1) ## ----top4DV,fig.width=10,fig.height=10,fig.cap="The $\beta$ values for the top 4 differentially variable CpGs."---- cpgsDV <- rownames(topDV) par(mfrow=c(2,2)) for(i in 1:4){ stripchart(beta[rownames(beta)==cpgsDV[i],]~design[,4],method="jitter", group.names=c("Normal","Cancer"),pch=16,cex=1.5,col=c(4,2),ylab="Beta values", vertical=TRUE,cex.axis=1.5,cex.lab=1.5) title(cpgsDV[i],cex.main=1.5) } ## ----loadingdata----------------------------------------------------------- library(tweeDEseqCountData) data(pickrell1) counts<-exprs(pickrell1.eset) dim(counts) gender <- pickrell1.eset$gender table(gender) rm(pickrell1.eset) data(genderGenes) data(annotEnsembl63) annot <- annotEnsembl63[,c("Symbol","Chr")] rm(annotEnsembl63) ## ----dgelist--------------------------------------------------------------- library(edgeR) y <- DGEList(counts=counts, genes=annot[rownames(counts),]) ## ----dgelist-filtering----------------------------------------------------- isexpr <- rowSums(cpm(y)>1) >= 20 hasannot <- rowSums(is.na(y$genes))==0 y <- y[isexpr & hasannot,,keep.lib.sizes=FALSE] dim(y) y <- calcNormFactors(y) ## ----testhapmap------------------------------------------------------------ design.hapmap <- model.matrix(~gender) fitvar.hapmap <- varFit(y, design = design.hapmap) fitvar.hapmap$genes <- y$genes ## ----resultshapmap--------------------------------------------------------- summary(decideTests(fitvar.hapmap)) topDV.hapmap <- topVar(fitvar.hapmap,coef=ncol(design.hapmap)) topDV.hapmap ## ----top4DVhapmap,fig.width=10,fig.height=10,fig.cap="The log counts per million for the top 4 differentially variably expressed genes."---- genesDV <- rownames(topDV.hapmap) par(mfrow=c(2,2)) for(i in 1:4){ stripchart(cpm(y,log=TRUE)[rownames(y)==genesDV[i],]~design.hapmap[,ncol(design.hapmap)],method="jitter", group.names=c("Female","Male"),pch=16,cex=1.5,col=c(4,2),ylab="Log counts per million", vertical=TRUE,cex.axis=1.5,cex.lab=1.5) title(genesDV[i],cex.main=1.5) } ## ----gometh1--------------------------------------------------------------- topRUV(rfit2) table(rfit2$p.ebayes.BH < 0.01) ## ----gometh2--------------------------------------------------------------- beta <- getBeta(mSet) beta_norm <- rowMeans(beta[,des[,2]==0]) beta_can <- rowMeans(beta[,des[,2]==1]) Delta_beta <- beta_can - beta_norm sigDM <- rfit2$p.ebayes.BH < 0.01 & abs(Delta_beta) > 0.25 table(sigDM) ## ----gometh3--------------------------------------------------------------- topCpGs<-topRUV(rfit2,number=10000) sigCpGs <- rownames(topCpGs) sigCpGs[1:10] ## ----gometh4--------------------------------------------------------------- library(IlluminaHumanMethylation450kanno.ilmn12.hg19) gst <- gometh(sig.cpg=sigCpGs, all.cpg=rownames(rfit2), collection="GO") topGO(gst) ## ----gsameth--------------------------------------------------------------- library(org.Hs.eg.db) genes <- toTable(org.Hs.egSYMBOL2EG) set1 <- sample(genes$gene_id,size=80) set2 <- sample(genes$gene_id,size=100) set3 <- sample(genes$gene_id,size=30) genesets <- list(set1,set2,set3) gsa <- gsameth(sig.cpg=sigCpGs, all.cpg=rownames(rfit2), collection=genesets) topGSA(gsa) ## ----sessionInfo, eval=TRUE, results='asis'-------------------------------- sessionInfo()