## ---- echo=FALSE, include=FALSE----------------------------------------------- library(knitr) knitr::opts_chunk$set(cache = TRUE, warning = FALSE, message = FALSE, cache.lazy = FALSE) library(dplyr) library(tidyr) library(tibble) library(magrittr) library(ggplot2) library(ggrepel) library(tidybulk) library(tidySummarizedExperiment) my_theme = theme_bw() + theme( panel.border = element_blank(), axis.line = element_line(), panel.grid.major = element_line(size = 0.2), panel.grid.minor = element_line(size = 0.1), text = element_text(size=12), legend.position="bottom", aspect.ratio=1, strip.background = element_blank(), axis.title.x = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)), axis.title.y = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)) ) tibble_counts = tidybulk::counts_SE %>% tidybulk() %>% as_tibble() ## ----------------------------------------------------------------------------- tt = se_mini ## ----aggregate, cache=TRUE, message=FALSE, warning=FALSE, results='hide', class.source='yellow'---- rowData(tt)$gene_name = rownames(tt) tt.aggr = tt %>% aggregate_duplicates(.transcript = gene_name) ## ----aggregate long, eval=FALSE----------------------------------------------- # temp = data.frame( # symbol = dge_list$genes$symbol, # dge_list$counts # ) # dge_list.nr <- by(temp, temp$symbol, # function(df) # if(length(df[1,1])>0) # matrixStats:::colSums(as.matrix(df[,-1])) # ) # dge_list.nr <- do.call("rbind", dge_list.nr) # colnames(dge_list.nr) <- colnames(dge_list) ## ----normalise, cache=TRUE---------------------------------------------------- tt.norm = tt.aggr %>% identify_abundant(factor_of_interest = condition) %>% scale_abundance() ## ----normalise long, eval=FALSE----------------------------------------------- # library(edgeR) # # dgList <- DGEList(count_m=x,group=group) # keep <- filterByExpr(dgList) # dgList <- dgList[keep,,keep.lib.sizes=FALSE] # [...] # dgList <- calcNormFactors(dgList, method="TMM") # norm_counts.table <- cpm(dgList) ## ----filter variable, cache=TRUE---------------------------------------------- tt.norm.variable = tt.norm %>% keep_variable() ## ----filter variable long, eval=FALSE----------------------------------------- # library(edgeR) # # x = norm_counts.table # # s <- rowMeans((x-rowMeans(x))^2) # o <- order(s,decreasing=TRUE) # x <- x[o[1L:top],,drop=FALSE] # # norm_counts.table = norm_counts.table[rownames(x)] # # norm_counts.table$cell_type = tibble_counts[ # match( # tibble_counts$sample, # rownames(norm_counts.table) # ), # "Cell type" # ] ## ----mds, cache=TRUE---------------------------------------------------------- tt.norm.MDS = tt.norm %>% reduce_dimensions(method="MDS", .dims = 2) ## ---- eval = FALSE------------------------------------------------------------ # library(limma) # # count_m_log = log(count_m + 1) # cmds = limma::plotMDS(ndim = .dims, plot = FALSE) # # cmds = cmds %$% # cmdscale.out %>% # setNames(sprintf("Dim%s", 1:6)) # # cmds$cell_type = tibble_counts[ # match(tibble_counts$sample, rownames(cmds)), # "Cell type" # ] ## ----pca, cache=TRUE, message=FALSE, warning=FALSE, results='hide'------------ tt.norm.PCA = tt.norm %>% reduce_dimensions(method="PCA", .dims = 2) ## ----eval=FALSE--------------------------------------------------------------- # count_m_log = log(count_m + 1) # pc = count_m_log %>% prcomp(scale = TRUE) # variance = pc$sdev^2 # variance = (variance / sum(variance))[1:6] # pc$cell_type = counts[ # match(counts$sample, rownames(pc)), # "Cell type" # ] ## ----tsne, cache=TRUE, message=FALSE, warning=FALSE, results='hide'----------- tt.norm.tSNE = breast_tcga_mini_SE %>% tidybulk( sample, ens, count_scaled) %>% identify_abundant() %>% reduce_dimensions( method = "tSNE", perplexity=10, pca_scale =TRUE ) ## ---- eval=FALSE-------------------------------------------------------------- # count_m_log = log(count_m + 1) # # tsne = Rtsne::Rtsne( # t(count_m_log), # perplexity=10, # pca_scale =TRUE # )$Y # tsne$cell_type = tibble_counts[ # match(tibble_counts$sample, rownames(tsne)), # "Cell type" # ] ## ----rotate, cache=TRUE------------------------------------------------------- tt.norm.MDS.rotated = tt.norm.MDS %>% rotate_dimensions(`Dim1`, `Dim2`, rotation_degrees = 45, action="get") ## ---- eval=FALSE-------------------------------------------------------------- # rotation = function(m, d) { # r = d * pi / 180 # ((bind_rows( # c(`1` = cos(r), `2` = -sin(r)), # c(`1` = sin(r), `2` = cos(r)) # ) %>% as_matrix) %*% m) # } # mds_r = pca %>% rotation(rotation_degrees) # mds_r$cell_type = counts[ # match(counts$sample, rownames(mds_r)), # "Cell type" # ] ## ----de, cache=TRUE, message=FALSE, warning=FALSE, results='hide'------------- tt.de = tt %>% test_differential_abundance( ~ condition, action="get") tt.de ## ---- eval=FALSE-------------------------------------------------------------- # library(edgeR) # # dgList <- DGEList(counts=counts_m,group=group) # keep <- filterByExpr(dgList) # dgList <- dgList[keep,,keep.lib.sizes=FALSE] # dgList <- calcNormFactors(dgList) # design <- model.matrix(~group) # dgList <- estimateDisp(dgList,design) # fit <- glmQLFit(dgList,design) # qlf <- glmQLFTest(fit,coef=2) # topTags(qlf, n=Inf) ## ----adjust, cache=TRUE, message=FALSE, warning=FALSE, results='hide'--------- tt.norm.adj = tt.norm %>% adjust_abundance( ~ condition + time) ## ---- eval=FALSE-------------------------------------------------------------- # library(sva) # # count_m_log = log(count_m + 1) # # design = # model.matrix( # object = ~ condition + time, # data = annotation # ) # # count_m_log.sva = # ComBat( # batch = design[,2], # mod = design, # ... # ) # # count_m_log.sva = ceiling(exp(count_m_log.sva) -1) # count_m_log.sva$cell_type = counts[ # match(counts$sample, rownames(count_m_log.sva)), # "Cell type" # ] # ## ----cibersort, cache=TRUE, eval=FALSE---------------------------------------- # tt.cibersort = # tt %>% # deconvolve_cellularity(action="get", cores=1) # ## ---- eval=FALSE-------------------------------------------------------------- # # source(‘CIBERSORT.R’) # count_m %>% write.table("mixture_file.txt") # results <- CIBERSORT( # "sig_matrix_file.txt", # "mixture_file.txt", # perm=100, QN=TRUE # ) # results$cell_type = tibble_counts[ # match(tibble_counts$sample, rownames(results)), # "Cell type" # ] # ## ----cluster, cache=TRUE------------------------------------------------------ tt.norm.cluster = tt.norm.MDS %>% cluster_elements(method="kmeans", centers = 2, action="get" ) ## ---- eval=FALSE-------------------------------------------------------------- # count_m_log = log(count_m + 1) # # k = kmeans(count_m_log, iter.max = 1000, ...) # cluster = k$cluster # # cluster$cell_type = tibble_counts[ # match(tibble_counts$sample, rownames(cluster)), # c("Cell type", "Dim1", "Dim2") # ] # ## ----SNN, eval=FALSE, message=FALSE, warning=FALSE, results='hide'------------ # tt.norm.SNN = # tt.norm.tSNE %>% # cluster_elements(method = "SNN") ## ---- eval=FALSE-------------------------------------------------------------- # library(Seurat) # # snn = CreateSeuratObject(count_m) # snn = ScaleData( # snn, display.progress = TRUE, # num.cores=4, do.par = TRUE # ) # snn = FindVariableFeatures(snn, selection.method = "vst") # snn = FindVariableFeatures(snn, selection.method = "vst") # snn = RunPCA(snn, npcs = 30) # snn = FindNeighbors(snn) # snn = FindClusters(snn, method = "igraph", ...) # snn = snn[["seurat_clusters"]] # # snn$cell_type = tibble_counts[ # match(tibble_counts$sample, rownames(snn)), # c("Cell type", "Dim1", "Dim2") # ] # ## ----drop, cache=TRUE--------------------------------------------------------- tt.norm.non_redundant = tt.norm.MDS %>% remove_redundancy( method = "correlation" ) ## ---- eval=FALSE-------------------------------------------------------------- # library(widyr) # # .data.correlated = # pairwise_cor( # counts, # sample, # transcript, # rc, # sort = TRUE, # diag = FALSE, # upper = FALSE # ) %>% # filter(correlation > correlation_threshold) %>% # distinct(item1) %>% # rename(!!.element := item1) # # # Return non redundant data frame # counts %>% anti_join(.data.correlated) %>% # spread(sample, rc, - transcript) %>% # left_join(annotation) # # # ## ----heat, eval=FALSE--------------------------------------------------------- # tt.norm.MDS %>% # # # filter lowly abundant # keep_abundant() %>% # # # extract 500 most variable genes # keep_variable( .abundance = count_scaled, top = 500) %>% # # # create heatmap # heatmap(sample, transcript, count_scaled, transform = log1p) %>% # add_tile(`Cell type`) ## ---- eval=FALSE-------------------------------------------------------------- # # Example taken from airway dataset from BioC2020 workshop. # dgList <- SE2DGEList(airway) # group <- factor(dgList$samples$`Cell type`) # keep.exprs <- filterByExpr(dgList, group=group) # dgList <- dgList[keep.exprs,, keep.lib.sizes=FALSE] # dgList <- calcNormFactors(dgList) # logcounts <- cpm(dgList, log=TRUE) # var_genes <- apply(logcounts, 1, var) # select_var <- names(sort(var_genes, decreasing=TRUE))[1:500] # highly_variable_lcpm <- logcounts[select_var,] # colours <- c("#440154FF", "#21908CFF", "#fefada" ) # col.group <- c("red","grey")[group] # gplots::heatmap.2(highly_variable_lcpm, col=colours, trace="none", ColSideColors=col.group, scale="row") ## ----density, eval=FALSE------------------------------------------------------ # # Example taken from airway dataset from BioC2020 workshop. # airway %>% # tidybulk() %>% # identify_abundant() %>% # scale_abundance() %>% # pivot_longer(cols = starts_with("counts"), names_to = "source", values_to = "abundance") %>% # filter(!lowly_abundant) %>% # ggplot(aes(x=abundance + 1, color=sample)) + # geom_density() + # facet_wrap(~source) + # scale_x_log10() ## ---- eval=FALSE-------------------------------------------------------------- # # Example taken from airway dataset from BioC2020 workshop. # dgList <- SE2DGEList(airway) # group <- factor(dgList$samples$dex) # keep.exprs <- filterByExpr(dgList, group=group) # dgList <- dgList[keep.exprs,, keep.lib.sizes=FALSE] # dgList <- calcNormFactors(dgList) # logcounts <- cpm(dgList, log=TRUE) # var_genes <- apply(logcounts, 1, var) # select_var <- names(sort(var_genes, decreasing=TRUE))[1:500] # highly_variable_lcpm <- logcounts[select_var,] # colours <- c("#440154FF", "#21908CFF", "#fefada" ) # col.group <- c("red","grey")[group] # gplots::heatmap.2(highly_variable_lcpm, col=colours, trace="none", ColSideColors=col.group, scale="row") ## ----------------------------------------------------------------------------- sessionInfo()