## ----global_options, include=FALSE--------------------------------------- knitr::opts_chunk$set(warning = FALSE, message = FALSE, eval = FALSE) ## ----load_libraries, results='hide'-------------------------------------- # library(RCAS) ## ----sample_data--------------------------------------------------------- # library(RCAS) # data(queryRegions) #sample queryRegions in BED format() # data(gff) #sample GFF file ## ----RCAS_import_data---------------------------------------------------- # #library(RCAS) # #queryRegions <- importBed(filePath = , sampleN = 10000) # #gff <- importGtf(filePath = ) ## ----queryGFF------------------------------------------------------------ # overlaps <- queryGff(queryRegions = queryRegions, gffData = gff) # #data.table is used to do quick summary operations # overlaps.dt <- data.table(as.data.frame(overlaps)) ## ----query_gene_types---------------------------------------------------- # biotype_col <- grep('gene_biotype', colnames(overlaps.dt), value = T) # df <- overlaps.dt[,length(unique(overlappingQuery)), by = biotype_col] # colnames(df) <- c("feature", "count") # df$percent <- round(df$count / length(queryRegions) * 100, 1) # df <- df[order(count, decreasing = TRUE)] # p <- plot_ly(data = df, # type = "bar", # x = df$feature, # y = df$percent, # text = paste("count:", df$count), color=df$feature) # layout(p = p, # margin = list(l=100, r=100, b=150), # xaxis = list(showticklabels = TRUE, tickangle = 90), # yaxis = list(title = paste("percentage of query regions,", # "n =", length(queryRegions)))) # ## ----getTxdbFeatures----------------------------------------------------- # txdbFeatures <- getTxdbFeaturesFromGRanges(gff) ## ----summarizeQueryRegions----------------------------------------------- # summary <- summarizeQueryRegions(queryRegions = queryRegions, # txdbFeatures = txdbFeatures) # # df <- data.frame(summary) # df$percent <- round((df$count / length(queryRegions)), 3) * 100 # p <- plot_ly( data = df, # x = rownames(df), # y = df$percent, # type = 'bar', # text = paste("count:", df$count), # color = rownames(df) # ) # layout(p = p, # xaxis = list(title = 'features'), # yaxis = list(title = paste("percentage of query regions,", # "n =", length(queryRegions) # ) # ), # margin = list(b = 150, r = 50) # ) ## ----getTargetedGenesTable----------------------------------------------- # dt <- getTargetedGenesTable(queryRegions = queryRegions, # txdbFeatures = txdbFeatures) # dt <- dt[order(transcripts, decreasing = TRUE)] # # DT::datatable(dt[1:100], # extensions = c('Buttons', 'FixedColumns'), # options = list(fixedColumns = TRUE, # scrollX = TRUE, # dom = 'Bfrtip', # buttons = c('copy', 'print', 'csv','excel', 'pdf')), # filter = 'bottom' # ) ## ----transcriptBoundaryCoverage------------------------------------------ # cvgF <- getFeatureBoundaryCoverage(queryRegions = queryRegions, featureCoords = txdbFeatures$transcripts, flankSize = 1000, boundaryType = 'fiveprime', sampleN = 10000) # cvgT <- getFeatureBoundaryCoverage(queryRegions = queryRegions, featureCoords = txdbFeatures$transcripts, flankSize = 1000, boundaryType = 'threeprime', sampleN = 10000) # # plotFeatureBoundaryCoverage(cvgF = cvgF, cvgT = cvgT, featureName = 'transcripts') # ## ----coverageprofilelist------------------------------------------------- # cvgList <- calculateCoverageProfileList(queryRegions = queryRegions, # targetRegionsList = txdbFeatures, # sampleN = 10000) # # p <- plotly::plot_ly(data = cvgList, type = 'scatter', mode = 'lines') # for (f in unique(cvgList$feature)){ # data <- cvgList[cvgList$feature == f,] # p <- add_trace(p = p, data = data, x = ~bins, y = ~meanCoverage, # legendgroup = f, showlegend = FALSE, opacity = 1, color = f) # p <- add_ribbons(p = p, data = data, x = ~bins, # ymin = data$meanCoverage - data$standardError*1.96, # ymax = data$meanCoverage + data$standardError*1.96, # legendgroup = f, # name = f, color = f # ) # } # layout(p, font = list(size = 14)) ## ----motif_analysis------------------------------------------------------ # motifResults <- runMotifRG(queryRegions = queryRegions, # genomeVersion = 'hg19', # motifN = 2, nCores = 2) # # par(mfrow = c(1,2), mar = c(2,2,2,2)) # for (i in 1:length(motifResults$motifs)) { # motifPattern <- motifResults$motifs[[i]]@pattern # motifRG::plotMotif(match = motifResults$motifs[[i]]@match$pattern, # main = paste0('Motif-',i,': ',motifPattern), # entropy = TRUE) # } ## ----motif_analysis_table------------------------------------------------ # summary <- getMotifSummaryTable(motifResults) # DT::datatable(summary, # extensions = c('Buttons', 'FixedColumns'), # options = list(fixedColumns = TRUE, # scrollX = TRUE, # dom = 'Bfrtip', # buttons = c('copy', 'print', 'csv','excel', 'pdf')), # filter = 'bottom' # ) ## ----GO analysis--------------------------------------------------------- # # #get all genes from the GTF data # backgroundGenes <- unique(gff$gene_id) # #get genes that overlap query regions # targetedGenes <- unique(overlaps$gene_id) # # #run TopGO # goBP <- runTopGO(ontology = 'BP', # species = 'human', # backgroundGenes = backgroundGenes, # targetedGenes = targetedGenes) # # goBP <- goBP[order(goBP$foldEnrichment, decreasing = TRUE),] # rownames(goBP) <- goBP$GO.ID # goBP <- subset(goBP, select = -c(Annotated,classicFisher, bh, GO.ID)) # DT::datatable(goBP[1:10,], # extensions = c('Buttons', 'FixedColumns'), # options = list(fixedColumns = TRUE, # scrollX = TRUE, # dom = 'Bfrtip', # buttons = c('copy', 'print', 'csv', 'excel', 'pdf')), # filter = 'bottom' # ) ## ----msigdb_analysis----------------------------------------------------- # #geneSets <- parseMsigdb(< path to msigdbFile>) # data(geneSets) # resultsGSEA <- runGSEA(geneSetList = geneSets, # backgroundGenes = backgroundGenes, # targetedGenes = targetedGenes) # # datatable(resultsGSEA[1:10,], # extensions = 'FixedColumns', # options = list( # dom = 't', # scrollX = TRUE, # scrollCollapse = TRUE # )) #