## ----global_options, include=FALSE--------------------------------------- knitr::opts_chunk$set(warning = FALSE, message = FALSE, eval = TRUE, fig.width = 7, fig.height = 4.2) ## ----load_libraries, results='hide'-------------------------------------- library(RCAS) ## ----sample_data--------------------------------------------------------- library(RCAS) data(queryRegions) #sample queryRegions in BED format() data(gff) #sample GFF file ## ----RCAS_import_data, eval = FALSE-------------------------------------- # queryRegions <- importBed(filePath = , sampleN = 10000) # gff <- importGtf(filePath = ) ## ----queryGFF------------------------------------------------------------ overlaps <- as.data.table(queryGff(queryRegions = queryRegions, gffData = gff)) ## ----query_gene_types---------------------------------------------------- biotype_col <- grep('gene_biotype', colnames(overlaps), value = T) df <- overlaps[,length(unique(overlappingQuery)), by = biotype_col] colnames(df) <- c("feature", "count") df$percent <- round(df$count / length(queryRegions) * 100, 1) df <- df[order(count, decreasing = TRUE)] ggplot2::ggplot(df, aes(x = reorder(feature, -percent), y = percent)) + geom_bar(stat = 'identity', aes(fill = feature)) + geom_label(aes(y = percent + 0.5), label = df$count) + labs(x = 'transcript feature', y = paste0('percent overlap (n = ', length(queryRegions), ')')) + theme_bw(base_size = 14) + theme(axis.text.x = element_text(angle = 90)) ## ----getTxdbFeatures----------------------------------------------------- txdbFeatures <- getTxdbFeaturesFromGRanges(gff) ## ----summarizeQueryRegions----------------------------------------------- summary <- summarizeQueryRegions(queryRegions = queryRegions, txdbFeatures = txdbFeatures) df <- data.frame(summary) df$percent <- round((df$count / length(queryRegions)), 3) * 100 df$feature <- rownames(df) ggplot2::ggplot(df, aes(x = reorder(feature, -percent), y = percent)) + geom_bar(stat = 'identity', aes(fill = feature)) + geom_label(aes(y = percent + 3), label = df$count) + labs(x = 'transcript feature', y = paste0('percent overlap (n = ', length(queryRegions), ')')) + theme_bw(base_size = 14) + theme(axis.text.x = element_text(angle = 90)) ## ----getTargetedGenesTable----------------------------------------------- dt <- getTargetedGenesTable(queryRegions = queryRegions, txdbFeatures = txdbFeatures) dt <- dt[order(transcripts, decreasing = TRUE)] knitr::kable(dt[1:10,]) ## ----transcriptBoundaryCoverage------------------------------------------ cvgF <- getFeatureBoundaryCoverage(queryRegions = queryRegions, featureCoords = txdbFeatures$transcripts, flankSize = 1000, boundaryType = 'fiveprime', sampleN = 10000) cvgT <- getFeatureBoundaryCoverage(queryRegions = queryRegions, featureCoords = txdbFeatures$transcripts, flankSize = 1000, boundaryType = 'threeprime', sampleN = 10000) cvgF$boundary <- 'fiveprime' cvgT$boundary <- 'threeprime' df <- rbind(cvgF, cvgT) ggplot2::ggplot(df, aes(x = bases, y = meanCoverage)) + geom_ribbon(fill = 'lightgreen', aes(ymin = meanCoverage - standardError * 1.96, ymax = meanCoverage + standardError * 1.96)) + geom_line(color = 'black') + facet_grid( ~ boundary) + theme_bw(base_size = 14) ## ----coverageprofilelist, fig.height=6----------------------------------- cvgList <- calculateCoverageProfileList(queryRegions = queryRegions, targetRegionsList = txdbFeatures, sampleN = 10000) ggplot2::ggplot(cvgList, aes(x = bins, y = meanCoverage)) + geom_ribbon(fill = 'lightgreen', aes(ymin = meanCoverage - standardError * 1.96, ymax = meanCoverage + standardError * 1.96)) + geom_line(color = 'black') + theme_bw(base_size = 14) + facet_wrap( ~ feature, ncol = 3) ## ----motif_analysis------------------------------------------------------ motifResults <- runMotifRG(queryRegions = queryRegions, resizeN = 15, sampleN = 10000, genomeVersion = 'hg19', motifN = 2, nCores = 2) par(mfrow = c(1,2), mar = c(2,2,2,2)) for (i in 1:length(motifResults$motifs)) { motifPattern <- motifResults$motifs[[i]]@pattern motifRG::plotMotif(match = motifResults$motifs[[i]]@match$pattern, main = paste0('Motif-',i,': ',motifPattern), entropy = TRUE) } ## ----motif_analysis_table------------------------------------------------ summary <- getMotifSummaryTable(motifResults) knitr::kable(summary) ## ----GO analysis--------------------------------------------------------- #get all genes from the GTF data backgroundGenes <- unique(gff$gene_id) #get genes that overlap query regions targetedGenes <- unique(overlaps$gene_id) #run TopGO goBP <- runTopGO(ontology = 'BP', species = 'human', backgroundGenes = backgroundGenes, targetedGenes = targetedGenes) goBP <- goBP[order(goBP$foldEnrichment, decreasing = TRUE),] rownames(goBP) <- goBP$GO.ID goBP <- subset(goBP, select = -c(Annotated,classicFisher, bh, GO.ID)) knitr::kable(goBP[1:10,]) ## ----msigdb_analysis----------------------------------------------------- #geneSets <- parseMsigdb(< path to msigdbFile>) data(geneSets) resultsGSEA <- runGSEA(geneSetList = geneSets, backgroundGenes = backgroundGenes, targetedGenes = targetedGenes) knitr::kable(x = resultsGSEA[1:10,])