## ----libraries, echo=FALSE, message=FALSE, warning=FALSE---------------------- suppressPackageStartupMessages({ library(RcisTarget) library(RcisTarget.hg19.motifDBs.cisbpOnly.500bp) library(DT) library(data.table) #require(visNetwork) }) ## ----citation, echo=FALSE----------------------------------------------------- print(citation("RcisTarget")[1], style="textVersion") ## ----overview, eval=FALSE----------------------------------------------------- # library(RcisTarget) # # # Load gene sets to analyze. e.g.: # geneList1 <- read.table(file.path(system.file('examples', package='RcisTarget'), "hypoxiaGeneSet.txt"), stringsAsFactors=FALSE)[,1] # geneLists <- list(geneListName=geneList1) # # geneLists <- GSEABase::GeneSet(genes, setName="geneListName") # alternative # # # Select motif database to use (i.e. organism and distance around TSS) # data(motifAnnotations_hgnc) # motifRankings <- importRankings("~/databases/hg19-tss-centered-10kb-7species.mc9nr.feather") # # # Motif enrichment analysis: # motifEnrichmentTable_wGenes <- cisTarget(geneLists, motifRankings, # motifAnnot=motifAnnotations_hgnc) ## ----overviewAdvanced, eval=FALSE--------------------------------------------- # # 1. Calculate AUC # motifs_AUC <- calcAUC(geneLists, motifRankings) # # # 2. Select significant motifs, add TF annotation & format as table # motifEnrichmentTable <- addMotifAnnotation(motifs_AUC, # motifAnnot=motifAnnotations_hgnc) # # # 3. Identify significant genes for each motif # # (i.e. genes from the gene set in the top of the ranking) # # Note: Method 'iCisTarget' instead of 'aprox' is more accurate, but slower # motifEnrichmentTable_wGenes <- addSignificantGenes(motifEnrichmentTable, # geneSets=geneLists, # rankings=motifRankings, # nCores=1, # method="aprox") ## ----installDep, eval=FALSE--------------------------------------------------- # if (!requireNamespace("BiocManager", quietly=TRUE)) # install.packages("BiocManager") # # To support paralell execution: # BiocManager::install(c("doMC", "doRNG")) # # For the examples in the follow-up section of the tutorial: # BiocManager::install(c("DT", "visNetwork")) ## ----vignette, eval=FALSE----------------------------------------------------- # # Explore tutorials in the web browser: # browseVignettes(package="RcisTarget") # # # Commnad line-based: # vignette(package="RcisTarget") # list # vignette("RcisTarget") # open ## ----editRmd, eval=FALSE------------------------------------------------------ # vignetteFile <- paste(file.path(system.file('doc', package='RcisTarget')), # "RcisTarget.Rmd", sep="/") # # Copy to edit as markdown # file.copy(vignetteFile, ".") # # Alternative: extract R code # Stangle(vignetteFile) ## ----geneSets----------------------------------------------------------------- library(RcisTarget) geneSet1 <- c("gene1", "gene2", "gene3") geneLists <- list(geneSetName=geneSet1) # or: # geneLists <- GSEABase::GeneSet(geneSet1, setName="geneSetName") ## ----geneSetsFormat, eval=FALSE----------------------------------------------- # class(geneSet1) # class(geneLists) # geneSet2 <- c("gene2", "gene4", "gene5", "gene6") # geneLists[["anotherGeneSet"]] <- geneSet2 # names(geneLists) # geneLists$anotherGeneSet # lengths(geneLists) ## ----sampleGeneSet------------------------------------------------------------ txtFile <- paste(file.path(system.file('examples', package='RcisTarget')), "hypoxiaGeneSet.txt", sep="/") geneLists <- list(hypoxia=read.table(txtFile, stringsAsFactors=FALSE)[,1]) head(geneLists$hypoxia) ## ----downloadDatabases, eval=FALSE-------------------------------------------- # featherURL <- "https://resources.aertslab.org/cistarget/databases/homo_sapiens/hg19/refseq_r45/mc9nr/gene_based/hg19-tss-centered-10kb-7species.mc9nr.feather" # download.file(featherURL, destfile=basename(featherURL)) # saved in current dir ## ----loadDatabases, eval=FALSE------------------------------------------------ # # Search space: 10k bp around TSS - HUMAN # motifRankings <- importRankings("hg19-tss-centered-10kb-7species.mc9nr.feather") # # Load the annotation to human transcription factors # data(motifAnnotations_hgnc) ## ----loadAnnot, eval=TRUE----------------------------------------------------- # mouse: # data(motifAnnotations_mgi) # human: data(motifAnnotations_hgnc) motifAnnotations_hgnc[199:202,] ## ----motifAnnotQuery---------------------------------------------------------- motifAnnotations_hgnc[(directAnnotation==TRUE) & (TF %in% c("HIF1A")), ] ## ----installDatabases, eval=FALSE--------------------------------------------- # # From Bioconductor # if (!requireNamespace("BiocManager", quietly=TRUE)) # install.packages("BiocManager") # BiocManager::install("RcisTarget.hg19.motifDBs.cisbpOnly.500bp") ## ----loadDatabasesCisbpOnly--------------------------------------------------- library(RcisTarget.hg19.motifDBs.cisbpOnly.500bp) # Rankings data(hg19_500bpUpstream_motifRanking_cispbOnly) motifRankings <- hg19_500bpUpstream_motifRanking_cispbOnly motifRankings # Annotations data(hg19_motifAnnotation_cisbpOnly) motifAnnotations_hgnc <- hg19_motifAnnotation_cisbpOnly ## ----eval=FALSE--------------------------------------------------------------- # motifEnrichmentTable_wGenes <- cisTarget(geneLists, # motifRankings, # motifAnnot=motifAnnotations_hgnc) ## ----calcAUC, cache=TRUE------------------------------------------------------ motifs_AUC <- calcAUC(geneLists, motifRankings, nCores=1) ## ----AUChistogram, cache=TRUE, fig.height=5, fig.width=5---------------------- auc <- getAUC(motifs_AUC)["hypoxia",] hist(auc, main="hypoxia", xlab="AUC histogram", breaks=100, col="#ff000050", border="darkred") nes3 <- (3*sd(auc)) + mean(auc) abline(v=nes3, col="red") ## ----addMotifAnnotation, cache=TRUE------------------------------------------- motifEnrichmentTable <- addMotifAnnotation(motifs_AUC, nesThreshold=3, motifAnnot=motifAnnotations_hgnc, highlightTFs=list(hypoxia="HIF1A")) ## ----------------------------------------------------------------------------- class(motifEnrichmentTable) dim(motifEnrichmentTable) head(motifEnrichmentTable[,-"TF_lowConf", with=FALSE]) ## ----addSignificantGenes, cache=TRUE------------------------------------------ motifEnrichmentTable_wGenes <- addSignificantGenes(motifEnrichmentTable, rankings=motifRankings, geneSets=geneLists) dim(motifEnrichmentTable_wGenes) ## ----getSignificantGenes, fig.height=7, fig.width=7--------------------------- geneSetName <- "hypoxia" selectedMotifs <- c("cisbp__M6275", sample(motifEnrichmentTable$motif, 2)) par(mfrow=c(2,2)) getSignificantGenes(geneLists[[geneSetName]], motifRankings, signifRankingNames=selectedMotifs, plotCurve=TRUE, maxRank=5000, genesFormat="none", method="aprox") ## ----output------------------------------------------------------------------- motifEnrichmentTable_wGenes_wLogo <- addLogo(motifEnrichmentTable_wGenes) resultsSubset <- motifEnrichmentTable_wGenes_wLogo[1:10,] library(DT) datatable(resultsSubset[,-c("enrichedGenes", "TF_lowConf"), with=FALSE], escape = FALSE, # To show the logo filter="top", options=list(pageLength=5)) ## ----anotatedTfs, cache=TRUE-------------------------------------------------- anotatedTfs <- lapply(split(motifEnrichmentTable_wGenes$TF_highConf, motifEnrichmentTable$geneSet), function(x) { genes <- gsub(" \\(.*\\). ", "; ", x, fixed=FALSE) genesSplit <- unique(unlist(strsplit(genes, "; "))) return(genesSplit) }) anotatedTfs$hypoxia ## ----network, cache=FALSE, eval=FALSE----------------------------------------- # signifMotifNames <- motifEnrichmentTable$motif[1:3] # # incidenceMatrix <- getSignificantGenes(geneLists$hypoxia, # motifRankings, # signifRankingNames=signifMotifNames, # plotCurve=TRUE, maxRank=5000, # genesFormat="incidMatrix", # method="aprox")$incidMatrix # # library(reshape2) # edges <- melt(incidenceMatrix) # edges <- edges[which(edges[,3]==1),1:2] # colnames(edges) <- c("from","to") ## ----visNetwork, eval=FALSE--------------------------------------------------- # library(visNetwork) # motifs <- unique(as.character(edges[,1])) # genes <- unique(as.character(edges[,2])) # nodes <- data.frame(id=c(motifs, genes), # label=c(motifs, genes), # title=c(motifs, genes), # tooltip # shape=c(rep("diamond", length(motifs)), rep("elypse", length(genes))), # color=c(rep("purple", length(motifs)), rep("skyblue", length(genes)))) # visNetwork(nodes, edges) %>% visOptions(highlightNearest = TRUE, # nodesIdSelection = TRUE) ## ----------------------------------------------------------------------------- date() sessionInfo()