## ---- results='hide', message=FALSE-------------------------------------- #source("http://bioconductor.org/biocLite.R") library(GenomicRanges) library(AnnotationHub) library(pd.genomewidesnp.6) library(rtracklayer) library(biovizBase) #needed for stain information library(CINdex) library(IRanges) ## ----seg----------------------------------------------------------------- #Load example segment data into the workspace. data("grl.data") #Examining the class of the object - GRangesList class(grl.data) #Print first few rows head(grl.data) #The names of the list items in the GRangesList names(grl.data) # NOTE - The names of the list items in 'grl.data' must match the # sample names in the clinical data input 'clin.crc' #Extracting segment information for the sample named "s4" shown as a GRanges object grl.data[["s4"]] #You can see that each row in the GRanges object is a segment. The "value" columns shows the #copy number value for that segment. ## ----probeAnnotHg19, cache=TRUE------------------------------------------ #connect to the underlying SQLite database that is part of the pd.genomewidesnp.6 package con <- db(pd.genomewidesnp.6) # get the copy number probes cnv <- dbGetQuery(con, "select man_fsetid, chrom, chrom_start, chrom_stop, strand from featureSetCNV;") head(cnv, n =3) #print first few rows #get the SNP probes snp <- dbGetQuery(con, "select man_fsetid, dbsnp_rs_id, chrom, physical_pos, strand from featureSet;") head(snp, n=3) ## ---- cache=TRUE--------------------------------------------------------- #function to convert Copy number data into GRanges object convert.to.gr.cnv <- function(cnv2) { cnv2$chrom <- paste0("chr", cnv2$chrom) # subset out SNPs with missing location cnv2 <- cnv2[!(is.na(cnv2$chrom) | is.na(cnv2$chrom_start)),] # convert strand info to +,-,* cnv2$strand[is.na(cnv2$strand)] <- 2 cnv2$strand <- cnv2$strand + 1 cnv2$strand <- c("+", "-", "*")[cnv2$strand] #convert into GRanges object cnv2.gr <- GRanges(cnv2$chrom, IRanges(cnv2$chrom_start,cnv2$chrom_stop), cnv2$strand, ID = cnv2$man_fsetid) return(cnv2.gr) } #function to convert SNP data into GRanges object convert.to.gr.snp <- function(snp2) { # make chromosomes the same as a chain file snp2$chrom <- paste0("chr", snp2$chrom) # subset out SNPs with missing location snp2 <- snp2[!(is.na(snp2$chrom) | is.na(snp2$physical_pos)),] # convert strand info to +,-,* snp2$strand[is.na(snp2$strand)] <- 2 snp2$strand <- snp2$strand + 1 snp2$strand <- c("+", "-", "*")[snp2$strand] snp2.gr <- GRanges(snp2$chrom, IRanges(snp2$physical_pos,snp2$physical_pos), snp2$strand, ID = snp2$man_fsetid, dbsnp = snp2$dbsnp_rs_id) return(snp2.gr) } # convert this copy number data from into a GRanges object cnv.gr <- convert.to.gr.cnv(cnv2 = cnv) head(cnv.gr, n=3) # convert this SNP data from into a GRanges object snp.gr <- convert.to.gr.snp(snp2 = snp) head(snp.gr, n=3) ## ---- cache=TRUE--------------------------------------------------------- #subset only those probes that are in autosomes snpgr.19.auto <- subset(snp.gr, seqnames(snp.gr) %in% c("chr1", "chr2", "chr3","chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18","chr19", "chr20", "chr21", "chr22")) #subset only those probes that are in autosomes cnvgr.19.auto <- subset(cnv.gr, seqnames(cnv.gr) %in% c("chr1", "chr2", "chr3","chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18","chr19", "chr20", "chr21", "chr22")) #This gives a total of 1756096 probes (copy number and SNP) on this Affymetrix chip ## ----probeAnnotHg18, cache=TRUE------------------------------------------ con <- db(pd.genomewidesnp.6) cnv2 <- dbGetQuery(con, "select man_fsetid, chrom, chrom_start, chrom_stop, strand from featureSetCNV;") snp2 <- dbGetQuery(con, "select man_fsetid, dbsnp_rs_id, chrom, physical_pos, strand from featureSet;") ## ---- cache=TRUE--------------------------------------------------------- # convert this copy number data into a GRanges object cnv2gr <- convert.to.gr.cnv(cnv2 = cnv2) head(cnv2gr, n=3) # convert this SNP data into a GRanges object snp2gr <- convert.to.gr.snp(snp2 = snp2) head(snp2gr, n=3) ## ---- eval=FALSE--------------------------------------------------------- # download.file("http://hgdownload.cse.ucsc.edu/gbdb/hg19/liftOver/hg19ToHg18.over.chain.gz", # "hg19ToHg18.over.chain.gz") # system("gzip -d hg19ToHg18.over.chain.gz") ## have to decompress ## ---- cache=TRUE--------------------------------------------------------- ## now use liftOver from rtracklayer, using the hg19ToHg18.over.chain from UCSC chain <- import.chain("hg19ToHg18.over.chain") snpgr.18 <- unlist(liftOver(snp2gr, chain)) head(snpgr.18, n=3) cnvgr.18 <- unlist(liftOver(cnv2gr, chain)) head(cnvgr.18, n=3) #subset only those probes that are in autosomes snpgr.18.auto <- subset(snpgr.18, seqnames(snpgr.18) %in% c("chr1", "chr2", "chr3","chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18","chr19", "chr20", "chr21", "chr22")) #subset only those probes that are in autosomes cnvgr.18.auto <- subset(cnvgr.18, seqnames(cnvgr.18) %in% c("chr1", "chr2", "chr3","chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18","chr19", "chr20", "chr21", "chr22")) #This gives us a total of 1756029 probes (about 1.7 million) #Save these objects for future #save(cnvgr.18.auto, file = "cnvgr.18.auto.RData") #save(snpgr.18.auto, file = "snpgr.18.auto.RData") ## ----refGen, message=FALSE, warning=FALSE-------------------------------- hg18.ucsctrack <- getIdeogram("hg18", cytoband = TRUE) head(hg18.ucsctrack, n=3) #The user must ensure that the input object is a GRanges object #Save this object for future use #save(hg18.ucsctrack, file = "hg18.ucsctrack.RData") #NOTE - To get the reference file for hg19, use the code below #hg19IdeogramCyto <- getIdeogram("hg19", cytoband = TRUE) ## ----Clin---------------------------------------------------------------- data("clin.crc") # checking the class of the object class(clin.crc) # checking the structure str(clin.crc) #Let us examine the first five rows of this object head(clin.crc,5) # Look at sample names clin.crc[,1] ## ----results='hide', message=FALSE--------------------------------------- ##biocLite("TxDb.Hsapiens.UCSC.hg18.knownGene") ##biocLite("Homo.sapiens") library(TxDb.Hsapiens.UCSC.hg18.knownGene) library(Homo.sapiens) ## ----GeneAnn------------------------------------------------------------- # We will continue to use hg18 gene annotations in this tutorial. TxDb(Homo.sapiens) <- TxDb.Hsapiens.UCSC.hg18.knownGene z <- select(Homo.sapiens, keys(Homo.sapiens, "ENTREZID"), c("CDSID","CDSCHROM","CDSSTRAND","CDSSTART","CDSEND","SYMBOL"), "ENTREZID") z1 <- na.omit(object = z) #remove NA values # extracting only the columns we want as a matrix geneAnno <- cbind(z1$CDSCHROM, z1$CDSSTRAND, z1$CDSSTART, z1$CDSEND, z1$SYMBOL) colnames(geneAnno) <- c("chrom","strand", "cdsStart", "cdsEnd", "GeneName") #So this gene annotation file looks like this head(geneAnno, n=3) # Examining the class and structure of this oject class(geneAnno) str(geneAnno) #Save this object for future use #save(geneAnno, file = "geneAnno.RData")