## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ---- eval = FALSE------------------------------------------------------------ # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("BgeeCall") ## ---- message = FALSE, warning = FALSE---------------------------------------- library(BgeeCall) ## ---- eval=FALSE-------------------------------------------------------------- # library("ShortRead") # # keep 48.000 reads # sampler <- FastqSampler(file.path("absolute_path","/SRX099901/SRR350955.fastq.gz"), 48000) # set.seed(1); SRR350955 <- yield(sampler) # writeFastq(object = SRR350955, # file =file.path( "absolute_path","SRX099901_subset", "SRR350955_subset.fastq.gz"), # mode = "w", full = FALSE, compress = TRUE) ## ---- message = FALSE, warning = FALSE---------------------------------------- ah <- AnnotationHub::AnnotationHub() ah_resources <- AnnotationHub::query(ah, c("Ensembl", "Caenorhabditis elegans", "84")) annotation_object <- ah_resources[["AH50789"]] # filter on MtDNA because annotation of C. elegans from AnnotationHub contain info from 2 genomes. # Chromosomes are associated to C. elegans and MtDNA are associated to NA. This cause a bug when running # makeTxDbFromGRanges function from the GenomicFeatures package. annotation_object <- dropSeqlevels(annotation_object, "MtDNA", "coarse") transcriptome_object <- rtracklayer::import.2bit(ah_resources[["AH50453"]]) ## ---- message = FALSE, warning = FALSE---------------------------------------- # create an object of class UserMetadata and specify the species ID user_BgeeCall <- new("UserMetadata", species_id = "6239") # import annotation and transcriptome in the user_BgeeCall object # it is possible to import them using an S4 object (GRanges, DNAStringSet) or a file (gtf, fasta) user_BgeeCall <- setAnnotationFromObject(user_BgeeCall, annotation_object, "WBcel235_84") user_BgeeCall <- setTranscriptomeFromObject(user_BgeeCall, transcriptome_object, "WBcel235") # provide path to the directory of your RNA-Seq library user_BgeeCall <- setRNASeqLibPath(user_BgeeCall, system.file("extdata", "SRX099901_subset", package = "BgeeCall")) ## ---- eval = FALSE------------------------------------------------------------ # calls_output <- generate_calls_workflow(userMetadata = user_BgeeCall) ## ---- echo=FALSE-------------------------------------------------------------- user_BgeeCall <- setWorkingPath(user_BgeeCall, system.file("extdata", package = "BgeeCall")) user_BgeeCall<- setSimpleArborescence(user_BgeeCall, TRUE) calls_output <- generate_presence_absence(myUserMetadata = user_BgeeCall) ## ---- message = FALSE, warning = FALSE---------------------------------------- head(read.table(calls_output$calls_tsv_path, header = TRUE), n = 5) ## ---- message = FALSE, warning = FALSE---------------------------------------- read.table(calls_output$cutoff_info_file_path) ## ---- message = FALSE, warning = FALSE---------------------------------------- head(read.table(calls_output$abundance_tsv, header = TRUE), n = 5) ## ---- eval = FALSE------------------------------------------------------------ # openPDF(calls_output$TPM_distribution_path) ## ---- message = FALSE, warning = FALSE---------------------------------------- read.table(calls_output$S4_slots_summary, header = TRUE, sep = "\t") ## ---- eval=FALSE-------------------------------------------------------------- # calls_output <- generate_calls_workflow(userFile = "path_to_your_file.tsv") ## ---- eval = FALSE------------------------------------------------------------ # # generate kallisto indexes # generate_slurm_indexes(userFile = "path_to_your_file.tsv") # # generate expression calls # generate_slurm_calls(userFile = "path_to_your_file.tsv") ## ----------------------------------------------------------------------------- list_intergenic_release() ## ----------------------------------------------------------------------------- # create BgeeMetadata object and define one reference intergenic release bgee <- new("BgeeMetadata", intergenic_release = "0.1") # change the reference intergenic release of your BgeeMetadata object bgee <- setIntergenicRelease(bgee, "0.2") ## ----------------------------------------------------------------------------- list_bgee_ref_intergenic_species(myBgeeMetadata = bgee) ## ----------------------------------------------------------------------------- list_community_ref_intergenic_species() ## ---- eval=FALSE-------------------------------------------------------------- # # create a BgeeMetadata object using the community release # bgee <- new("BgeeMetadata", intergenic_release = "community") # calls_output <- generate_calls_workflow(bgeeMetadata = bgee, userMetadata = user_BgeeCall) ## ---- eval=FALSE-------------------------------------------------------------- # bgee <- new("BgeeMetadata", intergenic_release = "custom", custom_intergenic_path = "path/to/custom/ref_intergenic.fa.gz") # calls_output <- generate_calls_workflow(bgeeMetadata = bgee, userMetadata = user_BgeeCall) ## ---- eval=FALSE-------------------------------------------------------------- # kallisto <- new("KallistoMetadata", txOut = TRUE) # calls_output <- generate_calls_workflow(myAbundanceMetadata = kallisto, userMetadata = user_BgeeCall) ## ---- eval=FALSE-------------------------------------------------------------- # kallisto <- new("KallistoMetadata", download_kallisto = TRUE) # calls_output <- generate_calls_workflow(myAbundanceMetadata = kallisto, userMetadata = user_BgeeCall) ## ---- eval=FALSE-------------------------------------------------------------- # kallisto <- new("KallistoMetadata", single_end_parameters = "-t 3 --single -l 150 -s 30", pair_end_parameters = "-t 2 -b --seed 36") # calls_output <- generate_calls_workflow(myAbundanceMetadata = kallisto, userMetadata = user_BgeeCall) ## ---- eval=FALSE-------------------------------------------------------------- # # libraries with reads smaller than 70bp will use the index with kmer size = 15 # kallisto <- new("KallistoMetadata", read_size_kmer_threshold = 70) # calls_output <- generate_calls_workflow(myAbundanceMetadata = kallisto, userMetadata = user_BgeeCall) ## ---- eval=FALSE-------------------------------------------------------------- # # RNA-Seq run SRR350955_subsetof from the RNA-Seq library will be used to generate the calls # user_BgeeCall <- setRunIds(user_BgeeCall, c("SRR350955_subset")) # calls_output <- run_from_object(myUserMetadata = user_BgeeCall) ## ----------------------------------------------------------------------------- kallisto <- new("KallistoMetadata", cutoff = 0.1) ## ----------------------------------------------------------------------------- user_BgeeCall@verbose <- FALSE ## ---- eval=FALSE-------------------------------------------------------------- # user_BgeeCall <- setSimpleArborescence(userObject = user_BgeeCall, simpleArborescence = FALSE) # calls_output <- run_from_object(myUserMetadata = user_BgeeCall) ## ----------------------------------------------------------------------------- user_BgeeCall <- setOutputDir(user_BgeeCall, "path/to/calls/for/this/library/") ## ---- eval=FALSE-------------------------------------------------------------- # # run 50 jobs in parallel # generate_slurm_indexes(userFile = "path/to/file.tsv", nodes = 50) ## ---- eval=FALSE-------------------------------------------------------------- # # create temporary files but do not submit the jobs # generate_slurm_indexes(userFile = "path/to/file.tsv", submit = FALSE) ## ---- eval=FALSE-------------------------------------------------------------- # # add slurm options to the sbatch script # slurm_options_index <- list(account = "account", time = "2:00:00", partition = "partition", mem = "30G") # generate_slurm_indexes(userFile = "path/to/file.tsv", slurm_options = slurm_options_index) ## ---- eval=FALSE-------------------------------------------------------------- # # load R 3.6.1 and kallisto in a cluster environment where software has to loaded manually # modules <- c("module add R/3.6.1;", "module add kallisto;") # generate_slurm_indexes(userFile = "path/to/file.tsv", modules = modules) ## ---- eval=FALSE-------------------------------------------------------------- # # create BgeeCall objects and use them to generate indexes # kallistoMetadata <- new("KallistoMetadata", download_kallisto=TRUE) # userMetadata <- new("UserMetadata", working_path = "/path/to/working/dir") # bgeeMetadata <- new("BgeeMetadata", intergenic_release = "0.1") # generate_slurm_indexes(userFile = "path/to/file.tsv", kallistoMetadata = kallistoMetadata, bgeeMetadata = bgeeMetadata, userMetadata = userMetadata) ## ----sessioninfo-------------------------------------------------------------- sessionInfo() ## ----cleanup_after, echo=FALSE, message=FALSE, warning=FALSE------------------ unlink(BgeeCall:::get_kallisto_dir_path(kallisto, user_BgeeCall), recursive = TRUE) unlink(file.path(getWorkingPath(user_BgeeCall), paste0(getIntergenicPrefix(bgee), "*")), recursive = TRUE)