## ----------------------------------------------------------------------------- suppressPackageStartupMessages( library( "GEOquery" ) ) suppressPackageStartupMessages( library( "airway" ) ) dir <- system.file("extdata",package="airway") geofile <- file.path(dir, "GSE52778_series_matrix.txt") gse <- getGEO(filename=geofile) pdata <- pData(gse)[,grepl("ch1",names(pData(gse)))] names(pdata) <- c("treatment","tissue","ercc_mix","cell","celltype") pdataclean <- data.frame(treatment=sub("treatment: (.*)","\\1",pdata$treatment), cell=sub("cell line: (.*)","\\1",pdata$cell), row.names=rownames(pdata)) pdataclean$dex <- ifelse(grepl("Dex",pdataclean$treatment),"trt","untrt") pdataclean$albut <- ifelse(grepl("Albut",pdataclean$treatment),"trt","untrt") pdataclean$SampleName <- rownames(pdataclean) pdataclean$treatment <- NULL ## ----------------------------------------------------------------------------- srafile <- file.path(dir, "SraRunInfo_SRP033351.csv") srp <- read.csv(srafile) srpsmall <- srp[,c("Run","avgLength","Experiment","Sample","BioSample","SampleName")] ## ----------------------------------------------------------------------------- coldata <- merge(pdataclean, srpsmall, by="SampleName") rownames(coldata) <- coldata$Run coldata <- coldata[coldata$albut == "untrt",] coldata$albut <- NULL coldata ## ----eval=FALSE--------------------------------------------------------------- # write.csv(coldata, file="sample_table.csv") ## ----eval=FALSE--------------------------------------------------------------- # library( "GenomicFeatures" ) # txdb <- makeTranscriptDbFromBiomart( biomart="ensembl", dataset="hsapiens_gene_ensembl") # exonsByGene <- exonsBy( txdb, by="gene" ) ## ----eval=FALSE--------------------------------------------------------------- # sampleTable <- read.csv( "sample_table.csv", row.names=1 ) # fls <- file.path("aligned",rownames(sampleTable), ".bam") # library( "Rsamtools" ) # bamLst <- BamFileList( fls, yieldSize=2000000 ) ## ----eval=FALSE--------------------------------------------------------------- # library( "BiocParallel" ) # register( MulticoreParam( workers=8 ) ) # library( "GenomicAlignments" ) # airway <- summarizeOverlaps( features=exonsByGene, reads=bamLst, # mode="Union", singleEnd=FALSE, # ignore.strand=TRUE, fragments=TRUE ) ## ----eval=FALSE--------------------------------------------------------------- # colData(airway) <- DataFrame( sampleTable ) ## ----eval=FALSE--------------------------------------------------------------- # library( "annotate" ) # miame <- list(pmid2MIAME("24926665")) # miame[[1]]@url <- "http://www.ncbi.nlm.nih.gov/pubmed/24926665" # # because R's CHECK doesn't like non-ASCII characters in data objects # # or in vignettes. the actual char was used in the first argument # miame[[1]]@abstract <- gsub("micro","micro",abstract(miame[[1]])) # miame[[1]]@abstract <- gsub("beta","beta",abstract(miame[[1]])) # metadata(airway) <- miame # save(airway, file="airway.RData") ## ----------------------------------------------------------------------------- library("airway") data(airway) airway as.data.frame(colData(airway)) summary(colSums(assay(airway))/1e6) metadata(rowRanges(airway)) ## ----------------------------------------------------------------------------- sessionInfo()