IHWpaper 1.6.0
In this vignette we will reproduce Figure 1 of the paper, which includes applications of IHW to RNASeq, proteomics and hQTL data.
library("IHW")
library("dplyr")
library("ggplot2")
library("grid")
library("tidyr")
library("cowplot")
library("RColorBrewer")
library("scales")
library("IHWpaper")
Define colors and names for methods we will be using throughout this vignette.
pretty_colors <- scales::hue_pal(h = c(0, 360) + 15, c = 100, l = 65, h.start = 0,direction = 1)(5)
pretty_names <- c("IHW", "BH", "Indep. Filt. \n 10 kb", "Indep. Filt. \n 200 kb", "Indep. Filt. \n 1 Mb")
rnaseq_file <- system.file("real_data_examples/result_files", "RNAseq_benchmark.Rds", package = "IHWpaper")
rnaseq_data <- readRDS(file=rnaseq_file)
panel_a_data <- group_by(rnaseq_data$alpha_df, alpha) %>% summarize(BH = max(bh_rejections), IHW=max(rejections)) %>%
gather(method, rejections, BH, IHW) %>%
mutate(method = factor(as.character(method), levels=pretty_names))
last_vals_a <- group_by(panel_a_data, method) %>%
summarize(last_vals = max(rejections)) %>%
mutate(label = method,
colour = pretty_colors[match(label, pretty_names)])
panel_a <- ggplot(panel_a_data, aes(x=alpha,y=rejections,col=method)) +
geom_line(size=1.2) +
xlab(expression(bold(paste("Nominal ",alpha)))) +
ylab("Discoveries") +
scale_color_manual(values=pretty_colors)+
scale_x_continuous(expand=c(0,0)) +
theme(plot.margin = unit(c(1, 7.5, 2, 1), "lines"))+
theme(axis.title = element_text(face="bold" ))
panel_a <- pretty_legend(panel_a, last_vals_a, 0.102)
panel_a
breaks <- rnaseq_data$breaks
break_min <- rnaseq_data$break_min
breaks_left <- c(break_min,breaks[-length(breaks)])
step_df <- mutate(rnaseq_data$alpha_df, baseMean_left = breaks_left[stratum], baseMean_right = breaks[stratum],
baseMean_ratio = baseMean_right/baseMean_left ,
baseMean_left = baseMean_left * baseMean_ratio^.2,
baseMean_right = baseMean_right *baseMean_ratio^(-.2))
stratum_fun <- function(df){
stratum <- df$stratum
weight <- df$weight
stratum_left <- stratum[stratum != length(stratum)]
weight_left <- weight[stratum_left]
baseMean_left <- df$baseMean_right[stratum_left]
stratum_right <- stratum[stratum != 1]
weight_right <- weight[stratum_right]
baseMean_right <- df$baseMean_left[stratum_right]
data.frame(stratum_left= stratum_left, weight_left= weight_left,
stratum_right = stratum_right, weight_right = weight_right,
baseMean_left = baseMean_left, baseMean_right = baseMean_right)
}
connect_df <- filter(step_df, alpha==0.1) %>% group_by(fold) %>%
do(stratum_fun(.)) %>%
mutate(dashed = factor(ifelse(abs(weight_left - weight_right) > 10^(-4) , TRUE, FALSE),
levels=c(FALSE,TRUE)))
panel_b <- ggplot(filter(step_df, alpha==0.1),
aes(x=baseMean_left,
xend=baseMean_right,
y=weight, yend=weight, col=fold)) +
geom_segment(size=0.8)+
geom_segment(data= connect_df, aes(x=baseMean_left, xend=baseMean_right,
y=weight_left, yend=weight_right,
linetype=dashed),
size=0.8)+
scale_x_log10(breaks=c(1,10,100,1000,10000))+
xlab("Mean of normalized counts")+
ylab("Weight")+
theme(legend.position=c(0.8,0.4)) +
theme(plot.margin = unit(c(1, 1, 2, 1), "lines")) +
guides(linetype=FALSE)+
theme(axis.title = element_text(face="bold" ))
panel_b
proteomics_file <- system.file("real_data_examples/result_files", "proteomics_benchmark.Rds", package = "IHWpaper")
proteomics_data <- readRDS(file=proteomics_file)
panel_c_data <- group_by(proteomics_data$alpha_df, alpha) %>% summarize(BH = max(bh_rejections), IHW=max(rejections)) %>%
gather(method, rejections, BH, IHW) %>%
mutate(method = factor(as.character(method), levels=pretty_names))
last_vals_c <- group_by(panel_c_data, method) %>%
summarize(last_vals = max(rejections)) %>%
mutate(label = method,
colour = pretty_colors[match(label, pretty_names)])
panel_c <- ggplot(panel_c_data, aes(x=alpha,y=rejections,col=method)) +
geom_line(size=1.2) +
xlab(expression(bold(paste("Nominal ",alpha)))) +
ylab("Discoveries") +
scale_color_manual(values=pretty_colors)+
scale_x_continuous(expand=c(0,0))+
theme(plot.margin = unit(c(1, 7.5, 2, 1), "lines"))+
theme(axis.title = element_text(face="bold" ))
panel_c <- pretty_legend(panel_c, last_vals_c, 0.102)
panel_c
breaks <- proteomics_data$breaks
break_min <- proteomics_data$break_min
breaks_left <- c(break_min,breaks[-length(breaks)])
step_df <- mutate(filter(proteomics_data$alpha_df , alpha==0.1), break_left = breaks_left[stratum],
break_right = breaks[stratum],
break_ratio = break_right/break_left ,
break_left =break_left * break_ratio^.1,
break_right = break_right *break_ratio^(-.1))
stratum_fun <- function(df){
stratum <- df$stratum
weight <- df$weight
stratum_left <- stratum[stratum != length(stratum)]
weight_left <- weight[stratum_left]
break_left <- df$break_right[stratum_left]
stratum_right <- stratum[stratum != 1]
weight_right <- weight[stratum_right]
break_right <- df$break_left[stratum_right]
data.frame(stratum_left= stratum_left, weight_left= weight_left,
stratum_right = stratum_right, weight_right = weight_right,
break_left = break_left, break_right = break_right)
}
connecting_df <- step_df %>% group_by(fold) %>%
do(stratum_fun(.)) %>%
mutate(dashed = factor(ifelse(abs(weight_left - weight_right) > 10^(-5) , TRUE, FALSE),
levels=c(FALSE,TRUE)))
panel_d <- ggplot(step_df, aes(x=break_left, xend=break_right,y=weight, yend=weight, col=fold)) +
geom_segment(size=0.8)+
geom_segment(data= connecting_df, aes(x=break_left, xend=break_right,
y=weight_left, yend=weight_right,
linetype=dashed),
size=0.8)+
scale_x_log10(breaks=c(1, 10,100,400)) +
xlab("Number of peptides quantified")+
ylab("Weight")+
theme(legend.position=c(0.8,0.4)) +
theme(plot.margin = unit(c(1, 1, 2, 1), "lines"))+
guides(linetype=FALSE)+
theme(axis.title = element_text(face="bold" ))
panel_d
hqtl_file <- system.file("real_data_examples/result_files", "hQTL_benchmark.Rds", package = "IHWpaper")
hqtl_data <- readRDS(file=hqtl_file)
hqtl_summary <- group_by(hqtl_data$alpha_df, alpha) %>% gather(method, rejections, 6:10) %>%
select(alpha, method, rejections)
# will use below later to convert to nicer names for our plot
# see http://stackoverflow.com/questions/7547597/dictionary-style-replace-multiple-items-in-r
map <- setNames(pretty_names,
c("rejections","bh_rejections","threshold:10000","threshold:2e+05", "threshold:1e+06"))
hqtl_summary <- mutate(hqtl_summary, method = map[method],
method= factor(method, levels = pretty_names))
last_vals_e <- group_by(hqtl_summary, method) %>%
summarize(last_vals = max(rejections)) %>%
mutate(last_vals = last_vals + c(0,0, 250, 0, -350), # offset to make it look nice
label = method,
colour = pretty_colors[match(label, pretty_names)])
panel_e <- ggplot(hqtl_summary, aes(x=alpha,y=rejections,col=method)) +
geom_line(size=1.2) +
xlab(expression(bold(paste("Nominal ",alpha)))) +
ylab("Discoveries") +
scale_x_continuous(expand=c(0,0))+
scale_color_manual(values=pretty_colors)+
theme(plot.margin = unit(c(1, 7.5, 2, 1), "lines"))+
theme(axis.title = element_text(face="bold" ))
panel_e <- pretty_legend(panel_e, last_vals_e, 0.102)
panel_e
breaks <- hqtl_data$breaks
breaks <- breaks[-1]
break_min <- hqtl_data$break_min
breaks_left <- c(break_min,breaks[-length(breaks)])
step_df <- mutate(filter(hqtl_data$alpha_df,alpha==0.1), break_left = breaks_left[stratum],
break_right = breaks[stratum],
break_ratio = break_right/break_left ,
break_left =break_left * break_ratio^.2,
break_right = break_right *break_ratio^(-.2))
stratum_fun <- function(df){
stratum <- df$stratum
weight <- df$weight
stratum_left <- stratum[stratum != length(stratum)]
weight_left <- weight[stratum_left]
break_left <- df$break_right[stratum_left]
stratum_right <- stratum[stratum != 1]
weight_right <- weight[stratum_right]
break_right <- df$break_left[stratum_right]
data.frame(stratum_left= stratum_left, weight_left= weight_left,
stratum_right = stratum_right, weight_right = weight_right,
break_left = break_left, break_right = break_right)
}
connecting_df <- step_df %>% group_by(fold) %>%
do(stratum_fun(.)) %>%
mutate(dashed = factor(ifelse(abs(weight_left - weight_right) > 2 , TRUE, FALSE),
levels=c(FALSE,TRUE)))
panel_f <- ggplot(step_df, aes(x=break_left, xend=break_right,y=weight, yend=weight, col=fold)) +
geom_segment(size=0.8)+
geom_segment(data= connecting_df, aes(x=break_left, xend=break_right,
y=weight_left, yend=weight_right,
linetype=dashed),
size=0.8)+
scale_x_log10(breaks=c(10^4, 10^5,10^6,10^7),
labels = trans_format("log10", math_format(10^.x))) +
xlab("Genomic distance (bp)")+
ylab("Weight")+
theme(legend.position=c(0.8,0.4)) +
theme(plot.margin = unit(c(1, 1, 2, 1), "lines"))+
theme(axis.title = element_text(face="bold" ))+
guides(linetype=FALSE)
panel_f
full_fig <- plot_grid(panel_a,
panel_c,
panel_e, panel_f,
labels= c("a)", "b)", "c)",
"d)"),
nrow=2)
full_fig
ggsave(plot=full_fig, file="data_examples.pdf", width=12, height=10)
suppl_fig <- plot_grid(panel_b,
panel_d,
labels= c("a)", "b)"),
nrow=1)
suppl_fig
ggsave(plot=suppl_fig, file="data_examples_suppl.pdf", width=12, height=5)