## ----include=FALSE, cache=FALSE------------------------------------------ require(png) require(grid) require(gridExtra) suppressMessages(require(LINC)) data(BRAIN_EXPR) ## ----eval = FALSE-------------------------------------------------------- # str(GTEX_CRBL) # # num [1:56318, 1:117] 0.0475 10.7799 0.105 0 0 ... # - attr(*, "dimnames")=List of 2 # ..$ : chr [1:56318] "ENSG00000223972" "ENSG00000227232" "ENSG00000243485" "ENSG00000237613" ... # ..$ : chr [1:117] "GTEX-117XS-3126-SM-5GIDP" "GTEX-1192X-3226-SM-5987D" "GTEX-11DXW-1026-SM-5H11K" # "GTEX-11DXY-3126-SM-5N9BT" ... # ## ----eval = FALSE-------------------------------------------------------- # # STEP 1: select the high-variance genes # var_index <- order(apply(GTEX_CRBL, 1, var), decreasing = T) # GTEX_CRBL_HVAR <- GTEX_CRBL[var_index[1:5000], ] # # # STEP 2: get the gene biotype # require(biomaRt) # ensembl <- useMart("ensembl", dataset = "hsapiens_gene_ensembl") # biotype <- getBM(attributes=c('gene_biotype', # 'ensembl_gene_id'), # filters = 'ensembl_gene_id', # values = rownames(GTEX_CRBL_HVAR), # mart = ensembl) # # STEP 3: # index <- match(rownames(GTEX_CRBL_HVAR), biotype$ensembl_gene_id) # GTEX_CRBL_BIOTYPE <- biotype$gene_biotype[index] ## ----eval = FALSE-------------------------------------------------------- # table(GTEX_CRBL_BIOTYPE) # # 3prime_overlapping_ncRNA antisense lincRNA # 8 279 74 # miRNA misc_RNA Mt_rRNA # 3 4 2 # Mt_tRNA processed_pseudogene processed_transcript # 4 119 37 # protein_coding rRNA sense_intronic # 4256 1 11 # sense_overlapping snoRNA snRNA # 13 8 1 # TR_C_gene transcribed_processed_pseudogene transcribed_unprocessed_pseudogene # 1 8 25 # unitary_pseudogene unprocessed_pseudogene # 2 18 # ## ----warning = FALSE, message = FALSE, fig.width = 17, fig.height = 11, eval = TRUE---- # the preprocessed expression matrix with 1000 genes str(cerebellum) # a TRUE/FALSE vector with TRUE for protein-coding genes str(pcgenes_crbl) # STEP 1: Separate the protine-coding genes from the queries (lincRNAs) crbl_matrix <- linc(cerebellum, codingGenes = pcgenes_crbl) # STEP 2: Compute the co-expression network with a fixed threshold crbl_cluster <- clusterlinc(crbl_matrix, pvalCutOff = 0.005) # STEP 3: Interrogate enriched biological terms for co-expressed genes crbl_bp <- getbio(crbl_cluster) # Show the results as a plot! plotlinc(crbl_bp) ## ---- message = FALSE---------------------------------------------------- getcoexpr(crbl_cluster, query = "55384")[1:5] # The co-expressed genes can also be returned as gene symbols. getcoexpr(crbl_cluster, query = "55384", keyType = 'SYMBOL')[1:5] ## ----fig.width = 23, fig.height = 10, warning = FALSE, message = FALSE, eval = TRUE---- meg3 <- singlelinc(crbl_matrix, query = "55384", onlycor = TRUE, underth = FALSE, threshold = 0.5, ont = 'MF') plotlinc(meg3) ## ----fig.width = 17, fig.height = 9.5, warning = FALSE, message = FALSE, eval = TRUE---- data(BRAIN_EXPR) # (A) call 'linc' with no further arguments # 'cerebellum' is a matrix of expression values; rows correspond to genes # 'pcgenes_crbl' is a TRUE/FALSE vector; TRUE indicates a protein-coding gene crbl_matrix <- linc(cerebellum, codingGenes = pcgenes_crbl) # (B) remove first seven principle components crbl_matrix_pca <- linc(cerebellum, codingGenes = pcgenes_crbl, rmPC = c(1:7)) # (C) negative correlation by using 'userFun' crbl_matrix_ncor <- linc(cerebellum, codingGenes = pcgenes_crbl, userFun = function(x,y){ -cor(x,y) }) # (D) remove outliers using the ESD method crbl_matrix_esd <- linc(cerebellum, codingGenes = pcgenes_crbl, outlier = "esd") # (E) plot this object plotlinc(crbl_matrix_esd) ## ----eval = FALSE-------------------------------------------------------- # plotlinc(crbl_matrix) ## ----fig.width = 23, fig.height = 10, warning = FALSE, message = FALSE, eval = TRUE---- meg3_pca <- singlelinc(crbl_matrix_pca, query = "55384", onlycor = TRUE, underth = FALSE, threshold = 0.5, ont = 'MF') plotlinc(meg3_pca) ## ------------------------------------------------------------------------ intersect(getcoexpr(meg3), getcoexpr(meg3_pca)) ## ----warning = FALSE, eval = TRUE---------------------------------------- # results() - results (different for subclasses) # correlation() - correlation matrices # assignment() - vector of protein-coding genes # history() - stored parameters # express() - original expression matrix cerebellum <- express(crbl_cluster) str(cerebellum) ## ----warning = FALSE, eval = TRUE---------------------------------------- # getlinc() is used to accesss information getlinc(crbl_cluster, subject = "queries") ## ----warning = FALSE, eval = TRUE---------------------------------------- # feature() can be used to convert objects crbl_matrix <- crbl_cluster + feature(setLevel = "LINCmatrix", showLevels = FALSE) ## ----warning = FALSE, eval = TRUE---------------------------------------- # change the used gene annotation, here from "human" to "mouse" murine_matrix <- changeOrgDb(crbl_matrix, OrgDb = 'org.Mm.eg.db') ## ----fig.width = 11, fig.height = 7, warning = FALSE, eval = TRUE-------- # scatterplots, correlations and expression level of query plotlinc(crbl_cluster, showCor = c("647979", "6726", "3337", "3304" ,"3320")) ## ----eval = FALSE-------------------------------------------------------- # linctable(file_name = "crbl_co_expr", input = crbl_cluster) ## ----fig.width = 14, fig.height = 12, warning = FALSE, message = FALSE, eval = FALSE---- # justlinc(GTEX_LIVER_CRUDE) # 'justlinc' will search for the 10 best candidates # # my_lincRNAs <- c("ENSG00000197813") # This could also be a vector of ids # res <- justlinc(GTEX_LIVER_CRUDE, targetGenes = my_lincRNAs) # 'justlinc' with one query ## ----include=FALSE, cache=FALSE------------------------------------------ data(BRAIN_EXPR) ## ----fig.width = 13, fig.height = 7.5, warning = FALSE, message = FALSE, eval = TRUE---- # apply the distance method "correlation" instead of "dicedist", the default crbl_cluster_cor <- clusterlinc(crbl_matrix, distMethod = "correlation") plotlinc(crbl_cluster_cor) ## ----fig.width = 13.5, fig.height = 6.5, warning = FALSE, fig.keep = 'last', eval = TRUE---- # add custom names and colors gbm_cluster <- gbm_cluster + feature(customID = "CANCER_GBM", customCol = "red") ctx_cluster <- ctx_cluster + feature(customID = "HEALTHY_CTX", customCol = "blue") hpc_cluster <- hpc_cluster + feature(customID = "HEALTHY_HPC", customCol = "blue") crbl_cluster <- crbl_cluster + feature(customID = "HEALTHY_CRBL", customCol = "blue") # plot the dendrogram norad <- querycluster('647979', queryTitle = 'NORAD', gbm_cluster, # Glioblastoma ctx_cluster, # Cortex hpc_cluster, # Hippocampus crbl_cluster) # Cerebellum neat1 <- querycluster('283131', queryTitle = 'NEAT1', gbm_cluster, ctx_cluster, hpc_cluster, crbl_cluster) grid.arrange(norad, neat1, ncol = 2) ## ----eval = FALSE-------------------------------------------------------- # # STEP 1: get the co-expressed genes # norad <- lapply(list(gbm_cluster, # ctx_cluster, # hpc_cluster, # crbl_cluster), # function(x){ # getcoexpr(x, '647979')}) # # # STEP 2: name the list # names(norad) <- c("CANCER_GBM", # "HEALTHY_CTX", # "HEALTHY_HPC", # "HEALTHY_CRBL") # # # STEP 3: enrichment analysis in 'clusterProfiler' # require(clusterProfiler) # norad_cluster <- compareCluster(norad, # fun = 'enrichGO', # OrgDb = 'org.Hs.eg.db', # ont = 'BP') # plot(norad_cluster) ## ----eval = FALSE-------------------------------------------------------- # # WRAPPER # justlinc() # gene selection, co-expression # # # MAIN FUNCTIONS # linc() # cor. matrix and statistics # clusterlinc() # cluster and cor. test # singlelinc() # single query co-expression # # # PLOTTING FUNCTIONS # plotlinc() # main plotting function # querycluster() # one query in multiple data sets # # # HELPING FUNCTIONS # getcoexpr() # get the co-expressed genes # getbio() # enriched terms for a cluster # object + feature() # level and data labeling # getlinc() # subsetting of 'LINC' objects # changeOrgDb() # change organism # linctable() # write to table ## ----echo=FALSE, fig.width = 8, fig.height = 3.5, eval = TRUE------------ overview_img <- readPNG(system.file("extdata", "overview_img.png", package ="LINC")) overview_plot <- rasterGrob(overview_img, interpolate = TRUE) grid.arrange(overview_plot)