## ----style, eval=TRUE, echo = FALSE, results = 'asis'------------------------- BiocStyle::markdown() ## ---- echo=FALSE, warning=FALSE----------------------------------------------- library(knitr) library(RColorBrewer) ## ----------------------------------------------------------------------------- library(Harman) library(HarmanData) data(OLF) ## ----------------------------------------------------------------------------- table(olf.info) ## ----------------------------------------------------------------------------- olf.info[1:7,] ## ----------------------------------------------------------------------------- head(olf.data) ## ----runharman---------------------------------------------------------------- olf.harman <- harman(olf.data, expt=olf.info$Treatment, batch=olf.info$Batch, limit=0.95) ## ----------------------------------------------------------------------------- str(olf.harman) ## ---- fig.height=4, fig.width=7----------------------------------------------- plot(olf.harman) ## ---- fig.height=5, fig.width=5----------------------------------------------- arrowPlot(olf.harman, main='Arrowplot of correction') ## ----------------------------------------------------------------------------- summary(olf.harman) ## ---- echo=FALSE-------------------------------------------------------------- kable(olf.harman$stats) ## ---- fig.height=5, fig.width=5----------------------------------------------- arrowPlot(olf.harman, 1, 8) ## ---- fig.height=5, fig.width=5----------------------------------------------- arrowPlot(olf.harman, 8, 9) ## ---- fig.height=4, fig.width=7----------------------------------------------- par(mfrow=c(1,2)) pcaPlot(olf.harman, colBy='batch', pchBy='expt', main='Col by Batch') pcaPlot(olf.harman, colBy='expt', pchBy='batch', main='Col by Expt') ## ----------------------------------------------------------------------------- olf.data.corrected <- reconstructData(olf.harman) ## ----------------------------------------------------------------------------- olf.data.remade <- reconstructData(olf.harman, this='original') all.equal(as.matrix(olf.data), olf.data.remade) ## ---- fig.height=9, fig.width=7----------------------------------------------- olf.data.diff <- abs(as.matrix(olf.data) - olf.data.corrected) diff_rowvar <- apply(olf.data.diff, 1, var) by_rowvar <- order(diff_rowvar) par(mfrow=c(1,1)) image(x=1:ncol(olf.data.diff), y=1:nrow(olf.data.diff), z=t(olf.data.diff[by_rowvar, ]), xlab='Sample', ylab='Probeset', main='Harman probe adjustments (ordered by variance)', cex.main=0.7, col=brewer.pal(9, 'Reds')) ## ----cleanup1, echo=FALSE----------------------------------------------------- rm(list=ls()[grep("olf", ls())]) ## ----------------------------------------------------------------------------- require(Harman) require(HarmanData) data(IMR90) ## ---- echo=FALSE-------------------------------------------------------------- kable(imr90.info) ## ----------------------------------------------------------------------------- table(expt=imr90.info$Treatment, batch=imr90.info$Batch) ## ---- fig.height=4, fig.width=7----------------------------------------------- par(mfrow=c(1,2), mar=c(4, 4, 4, 2) + 0.1) imr90.pca <- prcomp(t(imr90.data), scale. = TRUE) prcompPlot(imr90.pca, pc_x=1, pc_y=2, colFactor=imr90.info$Batch, pchFactor=imr90.info$Treatment, main='IMR90, PC1 v PC2') prcompPlot(imr90.pca, pc_x=1, pc_y=3, colFactor=imr90.info$Batch, pchFactor=imr90.info$Treatment, main='IMR90, PC1 v PC3') ## ----------------------------------------------------------------------------- imr90.hm <- harman(as.matrix(imr90.data), expt=imr90.info$Treatment, batch=imr90.info$Batch) ## ---- echo=FALSE-------------------------------------------------------------- kable(imr90.hm$stats) ## ---- fig.height=4, fig.width=7----------------------------------------------- plot(imr90.hm, pc_x=1, pc_y=2) plot(imr90.hm, pc_x=1, pc_y=3) ## ---- fig.height=5, fig.width=5----------------------------------------------- arrowPlot(imr90.hm, pc_x=1, pc_y=3) ## ---- fig.height=4, fig.width=7----------------------------------------------- imr90.hm <- harman(as.matrix(imr90.data), expt=imr90.info$Treatment, batch=imr90.info$Batch, limit=0.9) plot(imr90.hm, pc_x=1, pc_y=3) ## ---- echo=FALSE-------------------------------------------------------------- kable(imr90.hm$stats) ## ---- fig.height=4, fig.width=7----------------------------------------------- imr90.data.corrected <- reconstructData(imr90.hm) par(mfrow=c(1,2)) prcompPlot(imr90.data, 1, 3, colFactor=imr90.hm$factors$batch, cex=1.5, main='PCA, Original') prcompPlot(imr90.data.corrected, 1, 3, colFactor=imr90.hm$factors$batch, cex=1.5, main='PCA, Corrected') ## ----cleanup2, echo=FALSE----------------------------------------------------- rm(list=ls()[grep("imr90", ls())]) ## ----affydata, warning=FALSE, message=FALSE, comment=FALSE-------------------- library(affydata, quietly = TRUE, warn.conflicts = FALSE) data(Dilution) Dilution.log <- Dilution intensity(Dilution.log) <- log(intensity(Dilution)) cols <- brewer.pal(nrow(pData(Dilution)), 'Paired') ## ---- echo=FALSE-------------------------------------------------------------- kable(pData(Dilution)) ## ---- fig.height=4, fig.width=4----------------------------------------------- boxplot(Dilution, col=cols) ## ----normalize, fig.height=7, fig.width=9------------------------------------- Dilution.loess <- normalize(Dilution.log, method="loess") Dilution.qnt <- normalize(Dilution.log, method="quantiles") # define a quick PCA and plot function pca <- function(exprs, pc_x=1, pc_y=2, cols, ...) { pca <- prcomp(t(exprs), retx=TRUE, center=TRUE) if(is.factor(cols)) { factor_names <- levels(cols) num_levels <- length(factor_names) palette <- rainbow(num_levels) mycols <- palette[cols] } else { mycols <- cols } plot(pca$x[, pc_x], pca$x[, pc_y], xlab=paste('PC', pc_x, sep=''), ylab=paste('PC', pc_y, sep=''), col=mycols, ...) if(is.factor(cols)) { legend(x=min(pca$x[, pc_x]), y=max(pca$x[, pc_y]), legend=factor_names, fill=palette, cex=0.5) } } par(mfrow=c(1,2), mar=c(4, 4, 4, 2) + 0.1) boxplot(Dilution.loess, col=cols, main='Loess') boxplot(Dilution.qnt, col=cols, main='Quantile') pca(exprs(Dilution.loess), cols=cols, cex=1.5, pch=16, main='PCA Loess') pca(exprs(Dilution.qnt), cols=cols, cex=1.5, pch=16, main='PCA Quantile') ## ----affydata_harman1--------------------------------------------------------- library(Harman) loess.hm <- harman(exprs(Dilution.loess), expt=pData(Dilution.loess)$liver, batch=pData(Dilution.loess)$scanner) qnt.hm <- harman(exprs(Dilution.qnt), expt=pData(Dilution.qnt)$liver, batch=pData(Dilution.qnt)$scanner) ## ---- echo=FALSE, fig.align='left'-------------------------------------------- kable(loess.hm$stats) ## ---- echo=FALSE, fig.align='left'-------------------------------------------- kable(qnt.hm$stats) ## ----affydata_harman2--------------------------------------------------------- loess.hm <- harman(exprs(Dilution.loess), expt=pData(Dilution.loess)$liver, batch=pData(Dilution.loess)$scanner, limit=0.75) qnt.hm <- harman(exprs(Dilution.qnt), expt=pData(Dilution.qnt)$liver, batch=pData(Dilution.qnt)$scanner, limit=0.75) ## ---- echo=FALSE-------------------------------------------------------------- kable(loess.hm$stats) ## ---- echo=FALSE-------------------------------------------------------------- kable(qnt.hm$stats) ## ---- fig.height=4, fig.width=7----------------------------------------------- par(mfrow=c(2,2), mar=c(4, 4, 4, 2) + 0.1) plot(loess.hm, 1, 2, pch=17, col=cols) plot(qnt.hm, 1, 2, pch=17, col=cols) ## ----affydata_reconstruct----------------------------------------------------- Dilution.loess.hm <- Dilution.loess Dilution.qnt.hm <- Dilution.qnt exprs(Dilution.loess.hm) <- reconstructData(loess.hm) exprs(Dilution.qnt.hm) <- reconstructData(qnt.hm) ## ----------------------------------------------------------------------------- detach('package:affydata') ## ---- message=FALSE, warning=FALSE-------------------------------------------- library(bladderbatch) library(Harman) # This loads an ExpressionSet object called bladderEset data(bladderdata) bladderEset ## ---- echo=FALSE-------------------------------------------------------------- kable(pData(bladderEset)) ## ----------------------------------------------------------------------------- table(batch=pData(bladderEset)$batch, expt=pData(bladderEset)$outcome) ## ---- fig.height=4, fig.width=7----------------------------------------------- par(mfrow=c(1,2)) prcompPlot(exprs(bladderEset), colFactor=pData(bladderEset)$batch, pchFactor=pData(bladderEset)$outcome, main='Col by Batch') prcompPlot(exprs(bladderEset), colFactor=pData(bladderEset)$outcome, pchFactor=pData(bladderEset)$batch, main='Col by Expt') ## ---- cache=FALSE------------------------------------------------------------- expt <- pData(bladderEset)$outcome batch <- pData(bladderEset)$batch bladder_harman <- harman(exprs(bladderEset), expt=expt, batch=batch) sum(bladder_harman$stats$correction < 1) ## ----------------------------------------------------------------------------- summary(bladder_harman) ## ---- fig.height=4, fig.width=7----------------------------------------------- par(mfrow=c(1,1)) plot(bladder_harman) ## ---- fig.height=4, fig.width=7----------------------------------------------- plot(bladder_harman, 1, 3) ## ---- fig.height=5, fig.width=5----------------------------------------------- arrowPlot(bladder_harman, 1, 3) ## ----------------------------------------------------------------------------- CorrectedBladderEset <- bladderEset exprs(CorrectedBladderEset) <- reconstructData(bladder_harman) comment(bladderEset) <- 'Original' comment(CorrectedBladderEset) <- 'Corrected' ## ---- message=FALSE, warning=FALSE-------------------------------------------- library(limma, quietly=TRUE) design <- model.matrix(~0 + expt) colnames(design) <- c("Biopsy", "mTCC", "Normal", "sTCC", "sTCCwCIS") contrast_matrix <- makeContrasts(sTCCwCIS - sTCC, sTCCwCIS - Normal, Biopsy - Normal, levels=design) fit <- lmFit(exprs(bladderEset), design) fit2 <- contrasts.fit(fit, contrast_matrix) fit2 <- eBayes(fit2) summary(decideTests(fit2)) ## ----------------------------------------------------------------------------- fit_hm <- lmFit(exprs(CorrectedBladderEset), design) fit2_hm <- contrasts.fit(fit_hm, contrast_matrix) fit2_hm <- eBayes(fit2_hm) summary(decideTests(fit2_hm)) ## ----cleanup_bladderbatch, echo=FALSE----------------------------------------- rm(list=ls()[!grepl("^pca$", ls())]) detach('package:bladderbatch') ## ---- message=FALSE, warning=FALSE-------------------------------------------- library(minfi, quietly = TRUE) logit2 ilogit2 ilogit2(Inf) ilogit2(-Inf) ## ----------------------------------------------------------------------------- betas <- seq(0, 1, by=0.05) betas newBetas <- shiftBetas(betas, shiftBy=1e-4) newBetas range(betas) range(newBetas) logit2(betas) logit2(newBetas) ## ----loadminfidata, message=FALSE, warning=FALSE------------------------------ library(minfiData, quietly = TRUE) baseDir <- system.file("extdata", package="minfiData") targets <- read.metharray.sheet(baseDir) ## ----read450K----------------------------------------------------------------- rgSet <- read.metharray.exp(targets=targets) mSetSw <- preprocessSWAN(rgSet) mSet <- preprocessIllumina(rgSet, bg.correct=TRUE, normalize="controls", reference=2) detP <- detectionP(rgSet) keep <- rowSums(detP < 0.01) == ncol(rgSet) mSetIl <- mSet[keep,] mSetSw <- mSetSw[keep,] ## ---- echo=FALSE-------------------------------------------------------------- kable(pData(mSetSw)[,c('Sample_Group', 'person', 'sex', 'status', 'Array', 'Slide')]) ## ----mdsplots, fig.height=5, fig.width=7-------------------------------------- par(mfrow=c(1, 1)) cancer_by_gender_factor <- as.factor(paste(pData(mSetSw)$sex, pData(mSetSw)$status) ) mdsPlot(mSetSw, sampGroups=cancer_by_gender_factor, pch=16) mdsPlot(mSetSw, sampGroups=as.factor(pData(mSet)$Slide), pch=16) ## ----------------------------------------------------------------------------- table(gender=pData(mSetSw)$sex, slide=pData(mSetSw)$Slide) ## ----------------------------------------------------------------------------- cancer_by_gender_factor <- as.factor(paste(pData(mSetSw)$sex, pData(mSetSw)$status) ) table(expt=cancer_by_gender_factor, batch=pData(mSetSw)$Slide) ## ----------------------------------------------------------------------------- table(expt=pData(mSetSw)$status, batch=pData(mSetSw)$Slide) ## ----shift, fig.height=7, fig.width=7----------------------------------------- par(mfrow=c(2,2)) library(lumi, quietly = TRUE) shifted_betas <- shiftBetas(betas=getBeta(mSetIl), shiftBy=1e-7) shifted_ms <- beta2m(shifted_betas) # same as logit2() from package minfi plot(density(shifted_ms, 0.05), main="Shifted M-values, shiftBy = 1e-7", cex.main=0.7) shifted_betas <- shiftBetas(betas=getBeta(mSetIl), shiftBy=1e-4) shifted_ms <- beta2m(shifted_betas) # same as logit2() from package minfi plot(density(shifted_ms, 0.05), main="Shifted M-values, shiftBy = 1e-4", cex.main=0.7) plot(density(beta2m(getBeta(mSetSw)), 0.05), main="SWAN normalised M-values", cex.main=0.7) ## ---- fig.height=4, fig.width=7----------------------------------------------- par(mfrow=c(1,2)) plot(density(shifted_betas, 0.1), main="Beta-values, shiftBy = 1e-4", cex.main=0.7) plot(density(shifted_ms, 0.1), main="M-values, shiftBy = 1e-4", cex.main=0.7) ## ---- fig.height=4, fig.width=7----------------------------------------------- par(mfrow=c(1,2)) plot(density(shifted_ms, 0.1), main="GenomeStudio-like M-values, shiftBy = 1e-4", cex.main=0.7) plot(density(getM(mSetSw), 0.1), main="SWAN M-values", cex.main=0.7) ## ---- fig.height=4, fig.width=7----------------------------------------------- methHarman <- harman(getM(mSetSw), expt=pData(mSetSw)$status, batch=pData(mSetSw)$Slide, limit=0.65) summary(methHarman) plot(methHarman, 2, 5) ## ----------------------------------------------------------------------------- ms_hm <- reconstructData(methHarman) betas_hm <- m2beta(ms_hm) ## ----------------------------------------------------------------------------- library(HarmanData) data(Infinium5) lvr.harman[1:2, ] md.harman[1:2, ] ## ----------------------------------------------------------------------------- betas_sml <- getBeta(mSetSw)[1:1000, ] betas_hm_sml <- betas_hm[1:1000, ] myK <- discoverClusteredMethylation(betas_sml, min_cluster_size = 3, cores=2, printInfo = TRUE) mykClust = kClusterMethylation(betas_sml, row_ks=myK, cores=2) res <- clusterStats(pre_betas=betas_sml, post_betas=betas_hm_sml, kClusters = mykClust) res[1:2, ] ## ----------------------------------------------------------------------------- library(HarmanData) data(episcope) bad_batches <- c(1, 5, 9, 17, 25) is_bad_sample <- episcope$pd$array_num %in% bad_batches myK <- discoverClusteredMethylation(episcope$original[, !is_bad_sample]) mykClust <- kClusterMethylation(episcope$original, row_ks=myK) res <- clusterStats(pre_betas=episcope$original, post_betas=episcope$harman, kClusters = mykClust) res[1:2, ] ## ----clust_plot, fig.height=7, fig.width=7------------------------------------ slide_cols <- rep(brewer.pal(9, "Set1"), times=5)[as.factor(episcope$pd$array_num)] myplot <- function(probe) { plot(episcope$original[probe, ], main=paste(probe, ". Pre-var: ", round(res[probe, "pre_withinvar"], 5), sep=""), xlab="", ylab="Beta", cex=0.7, cex.main=0.9, ylim=c(0, 1), col=slide_cols) plot(episcope$harman[probe, ], main=paste(probe, ". Post-var: ", round(res[probe, "post_withinvar"], 5), ", LVR: ", round(res[probe, "log2_var_ratio"], 2), sep=""), xlab="", ylab="Beta", cex=0.7, cex.main=0.9, ylim=c(0, 1), col=slide_cols) } par(mfrow=c(2,2)) myplot("cg04294190") myplot("cg25465065") ## ----------------------------------------------------------------------------- # Erroneously corrected res$probe_id[res$log2_var_ratio > log2(1.5) & res$meandiffs > 0.01] # Batch-effect susceptible res$probe_id[res$log2_var_ratio < log2(1/1.5) & res$meandiffs > 0.01] ## ----------------------------------------------------------------------------- is_epic <- grepl("BodyFatness|NOVI|URECA", colnames(lvr.harman)) epic_lvr <- na.omit(lvr.harman[, is_epic]) epic_md <- na.omit(md.harman[, is_epic]) sum_batchy_lvr <- apply(epic_lvr, 1, function(p) sum(p < log2(1/1.5))) sum_geno_lvr <- apply(epic_lvr, 1, function(p) sum(p > log2(1.5))) sum_md <- apply(epic_md, 1, function(p) sum(p > 0.01)) table(sum_batchy_lvr & sum_md) table(sum_geno_lvr & sum_md) ## ----limma-------------------------------------------------------------------- library(limma) group <- factor(pData(mSetSw)$status, levels=c("normal", "cancer")) id <- factor(pData(mSetSw)$person) design <- model.matrix(~id + group) design ## ----fit---------------------------------------------------------------------- fit_hm <- lmFit(ms_hm, design) fit_hm <- eBayes(fit_hm) fit <- lmFit(getM(mSetSw), design) fit <- eBayes(fit) ## ----limma_output------------------------------------------------------------- summary_fit_hm <- summary(decideTests(fit_hm)) summary_fit <- summary(decideTests(fit)) summary_fit_hm summary_fit ## ---- echo=FALSE, warning=FALSE, message=FALSE-------------------------------- rm(list=ls()[!grepl("^pca$", ls())]) #detach('package:minfiData', force = TRUE) #detach('package:lumi', force = TRUE) #detach('package:minfi', force = TRUE) #detach('package:IlluminaHumanMethylation450kanno.ilmn12.hg19', force = TRUE) ## ----msms, message=FALSE, warning=FALSE--------------------------------------- # Call dependencies library(msmsEDA) library(Harman) data(msms.dataset) msms.dataset ## ----msms_pp------------------------------------------------------------------ # Preprocess to remove rows which are all 0 and replace NA values with 0. msms_pp <- pp.msms.data(msms.dataset) ## ---- echo=FALSE-------------------------------------------------------------- kable(pData(msms_pp)) ## ----------------------------------------------------------------------------- # Create experimental and batch variables expt <- pData(msms_pp)$treat batch <- pData(msms_pp)$batch table(expt, batch) ## ----msms_harman-------------------------------------------------------------- # Log2 transform data, add 1 to avoid infinite values log_ms_exprs <- log(exprs(msms_pp) + 1, 2) # Correct data with Harman hm <- harman(log_ms_exprs, expt=expt, batch=batch) summary(hm) ## ---- fig.height=4, fig.width=7----------------------------------------------- plot(hm) ## ----------------------------------------------------------------------------- # Reconstruct data and convert from Log2, removing 1 as we added it before. log_ms_exprs_hm <- reconstructData(hm) ms_exprs_hm <- 2^log_ms_exprs_hm - 1 # Place corrected data into a new 'MSnSet' instance msms_pp_hm <- msms_pp exprs(msms_pp_hm) <- ms_exprs_hm ## ---- echo=FALSE-------------------------------------------------------------- rm(list=ls()[!grepl("^pca$", ls())]) detach('package:msmsEDA') ## ----------------------------------------------------------------------------- table(imr90.info) ## ---- message=FALSE, warning=FALSE-------------------------------------------- library(HarmanData) library(sva) modcombat <- model.matrix(~1, data=imr90.info) imr90.data.cb <- ComBat(dat=as.matrix(imr90.data), batch=imr90.info$Batch, mod=modcombat, par.prior=TRUE, prior.plots=FALSE) imr90.hr <- harman(imr90.data, expt = imr90.info$Treatment, batch = imr90.info$Batch) imr90.data.hr <- reconstructData(imr90.hr) prcompPlot(imr90.data, pc_x = 1, pc_y = 3, colFactor = imr90.info$Batch, pchFactor = imr90.info$Treatment, main='PC1 versus PC3') arrowPlot(imr90.hr, pc_x = 1, pc_y = 3, main='PC1 versus PC3') ## ----------------------------------------------------------------------------- library(RColorBrewer) diffMap <- function(a, b, feature_order=1:nrow(a), batch, ...) { image(t(abs(a[feature_order, ] - b[feature_order, ])), col = brewer.pal(9, "Greys"), xaxt='n', yaxt='n', xlab='Batch', ylab='Ordered feature', ...) axis(1, at=seq(0, 1, length.out=length(batch)), labels=batch) } ## ---- fig.height=7, fig.width=7----------------------------------------------- probe_diffs_hr <- order(rowSums(abs(as.matrix(imr90.data - imr90.data.hr)))) probe_diffs_cb <- order(rowSums(abs(as.matrix(imr90.data - imr90.data.cb)))) diffMap(a=imr90.data, b=imr90.data.hr, feature_order=probe_diffs_hr, batch=imr90.info$Batch, main='Original - Harman') ## ---- fig.height=7, fig.width=7----------------------------------------------- diffMap(a=imr90.data, b=imr90.data.cb, feature_order=probe_diffs_hr, batch=imr90.info$Batch, main='Original - ComBat') ## ---- fig.height=7, fig.width=7----------------------------------------------- diffMap(a=imr90.data.hr, b=imr90.data.cb, feature_order=probe_diffs_hr, batch=imr90.info$Batch, main='Harman - ComBat')