InterMineR constitutes an R package that interacts with InterMine, a data warehouse framework, which provides the ability to access, retrieve and analyze rapidly a variety of biological data (Smith et al. 2012; Kalderimis et al. 2014).
In this tutorial we will use the functionality of the InterMineR package to retrieve various information about the genomic regions that contain the Drosophila melanogaster genes zen, eve and r.
Then, we will visualize this information using the Gviz package (Hahne and Ivanek 2016).
All query results and calculations are correct according to FlyMine release 46.1 (November 2018). Genomic coordinates and p-values are likely to change between database releases but the methods will remain the same.
# load packages
library(InterMineR)
library(Gviz)
# load FlyMine and HumanMine
im.fly = initInterMine(listMines()["FlyMine"])
# load templates
templates.fly = getTemplates(im.fly)
# load data models
# model.fly = getModel(im.fly) # temporarily removed
First, we will create a new query to retrieve gene identifiers and genomic information for zen, eve and r.
There are two ways to build a query in InterMineR. We can either build a query as a list object with newQuery
function, and assign all input values (selection of retrieved data type, constraints, etc.) as items of that list:
# Build new query to retrieve information for zen, eve, and r Drosophila genes
# Get Gene types from FlyMine model
# head(subset(model.fly, type == "Gene"), 3) # temporarily removed
gene_info.list = as.list(1:3)
for(i in 1:3){
gene = c("zen", "eve", "r")[i]
# define new query
queryGeneIds = newQuery()
queryGeneIds
# set name
queryGeneIds$name = "Gene identifiers"
# set columns
queryGeneIds$select = c(
"Gene.primaryIdentifier",
"Gene.secondaryIdentifier",
"Gene.symbol",
"Gene.id",
"Gene.chromosome.primaryIdentifier",
"Gene.chromosomeLocation.start",
"Gene.chromosomeLocation.end",
"Gene.chromosomeLocation.strand"
)
# set sort order
queryGeneIds$orderBy = list(
c(Gene.secondaryIdentifier = "ASC")
)
# set constraints
newConstraint1 = list(
path = "Gene",
op = "LOOKUP",
value = gene,
code = "A"
)
queryGeneIds$where = list(newConstraint1)
# run query and store results
gene_info.list[[i]] = runQuery(im = im.fly, qry = queryGeneIds)
}
# concatenate to data.frame
gene_info.list = do.call(rbind, gene_info.list)
# print dimensions
print(dim(gene_info.list))
## [1] 3 8
Or we can build the query as an InterMineR-class
object with the functions setConstraint
, which allows us to generate a new or modify an existing list of constraints, and setQuery
, which generates the query as a InterMineR-class
object:
# set constraints
constraints = setConstraints(
paths = "Gene" ,
operators = "LOOKUP",
values = list(c("zen", "eve", "r"))
)
# define new query
queryGeneIds = setQuery(
select = c(
"Gene.primaryIdentifier",
"Gene.secondaryIdentifier",
"Gene.symbol",
"Gene.id",
"Gene.chromosome.primaryIdentifier",
"Gene.chromosomeLocation.start",
"Gene.chromosomeLocation.end",
"Gene.chromosomeLocation.strand"
),
where = constraints
)
# run query and store results
gene_info = runQuery(im = im.fly,
qry = queryGeneIds)
# print dimensions
print(dim(gene_info))
## [1] 3 8
As it is demonstrated below, setConstraints
and setQuery
functions are designed to facilitate the generation of queries for InterMine instances and avoid using multiple iterative loops, especially when it is required to include multiple constraints or constraint values (e.g. genes, organisms) in your query.
# compare the results from both type of queries
all(gene_info == gene_info.list)
## [1] TRUE
gene_info
## Gene.primaryIdentifier Gene.secondaryIdentifier Gene.symbol Gene.id
## 1 FBgn0004053 CG1046 zen 1007747
## 2 FBgn0000606 CG2328 eve 1007235
## 3 FBgn0003189 CG18572 r 1048944
## Gene.chromosome.primaryIdentifier Gene.chromosomeLocation.start
## 1 3R 6752864
## 2 2R 9979319
## 3 X 16655359
## Gene.chromosomeLocation.end Gene.chromosomeLocation.strand
## 1 6754194 -1
## 2 9980795 1
## 3 16668712 1
Now that we have the identifiers and the genomic information for zen, eve and r (Genes of Interest, GOIs), we can use Template Queries to retrieve information about:
# Retrieve Template Queries:
# Use Gene_ExonLocation2 template query to get the exons for the genes of interest
queryExons = getTemplateQuery(
im.fly,
name = "Gene_ExonLocation2")
# Use Gene_AdjacentGenes template query to get the adjacent genes
queryGeneAdjacentGenes = getTemplateQuery(
im = im.fly,
name = "Gene_AdjacentGenesLocations"
)
# Use Gene_OverlapppingGenes template query to get the overlapping genes
queryGeneOverlapppingGenes = getTemplateQuery(
im.fly,
name = "Gene_OverlapppingGenes"
)
Having retrieved the necessary template queries, we iterate through our GOIs and retrieve the appropriate gene identifiers for each query.
These identifiers will be used to modify the constraints of each query before we run it.
for(i in 1:3){
gene = c("zen", "eve", "r")[i]
# 1. Gene_ExonLocation2
# set Gene.secondaryIdentifier value
queryExons$where[[1]]$value = gene_info[which(gene_info$Gene.symbol == gene),2]
# or alternatively use setConstraints function
queryExons$where = setConstraints(
modifyQueryConstraints = queryExons,
m.index = 1,
values = list(gene_info[which(gene_info$Gene.symbol == gene),2])
)
# run query and save results
assign(
x = paste0(gene,".Exons"),
value = runQuery(im.fly, queryExons)
)
# 2. Gene_AdjacentGenes
# change the value of the third constraint of the Gene_AdjacentGenes query with the
# Gene.secondaryIdentifier of the genes of interest
queryGeneAdjacentGenes$where[[3]]$value = gene_info$Gene.secondaryIdentifier[i]
# or alternatively use setConstraints function
queryGeneAdjacentGenes$where = setConstraints(
modifyQueryConstraints = queryGeneAdjacentGenes,
m.index = 3,
values = list(gene_info$Gene.secondaryIdentifier[i])
)
# assign the adjacent gene information to each gene of interest
assign(x = paste0(gene_info$Gene.symbol[i], "_AdjacentGenes"),
value = runQuery(im.fly, queryGeneAdjacentGenes))
if(is.null(get(paste0(gene_info$Gene.symbol[i], "_AdjacentGenes")))){
print(paste0(gene_info$Gene.symbol[i], " query returns no adjacent genes"))
}
# 3. Gene_OverlapppingGenes
queryGeneOverlapppingGenes$where[[2]]$value = gene_info$Gene.secondaryIdentifier[i]
# or alternatively use setConstraints function
queryGeneOverlapppingGenes$where = setConstraints(
modifyQueryConstraints = queryGeneOverlapppingGenes,
m.index = 2,
values = list(gene_info$Gene.secondaryIdentifier[i])
)
assign(x = paste0(gene_info$Gene.symbol[i], "_OverlappingGenes"),
value = runQuery(im.fly, queryGeneOverlapppingGenes))
if(is.null(get(paste0(gene_info$Gene.symbol[i], "_OverlappingGenes")))){
print(paste0(gene_info$Gene.symbol[i], " query returns no overlapping genes"))
}
}
## [1] "zen query returns no overlapping genes"
## [1] "eve query returns no adjacent genes"
# show adjacent genes
head(zen_AdjacentGenes, 3)
## Gene.secondaryIdentifier Gene.symbol Gene.chromosome.primaryIdentifier
## 1 CG1046 zen 3R
## Gene.chromosomeLocation.start Gene.chromosomeLocation.end
## 1 6752864 6754194
## Gene.chromosomeLocation.strand
## 1 -1
## Gene.downstreamIntergenicRegion.adjacentGenes.primaryIdentifier
## 1 FBgn0085326
## Gene.downstreamIntergenicRegion.adjacentGenes.symbol
## 1 CG34297
## Gene.downstreamIntergenicRegion.adjacentGenes.chromosomeLocation.start
## 1 6747068
## Gene.downstreamIntergenicRegion.adjacentGenes.chromosomeLocation.end
## 1 6748007
## Gene.downstreamIntergenicRegion.adjacentGenes.chromosomeLocation.strand
## 1 -1
## Gene.upstreamIntergenicRegion.adjacentGenes.primaryIdentifier
## 1 FBgn0000166
## Gene.upstreamIntergenicRegion.adjacentGenes.symbol
## 1 bcd
## Gene.upstreamIntergenicRegion.adjacentGenes.chromosomeLocation.start
## 1 6755842
## Gene.upstreamIntergenicRegion.adjacentGenes.chromosomeLocation.end
## 1 6759466
## Gene.upstreamIntergenicRegion.adjacentGenes.chromosomeLocation.strand
## 1 -1
head(r_AdjacentGenes, 3)
## Gene.secondaryIdentifier Gene.symbol Gene.chromosome.primaryIdentifier
## 1 CG18572 r X
## Gene.chromosomeLocation.start Gene.chromosomeLocation.end
## 1 16655359 16668712
## Gene.chromosomeLocation.strand
## 1 1
## Gene.downstreamIntergenicRegion.adjacentGenes.primaryIdentifier
## 1 FBgn0030769
## Gene.downstreamIntergenicRegion.adjacentGenes.symbol
## 1 CG13012
## Gene.downstreamIntergenicRegion.adjacentGenes.chromosomeLocation.start
## 1 16668818
## Gene.downstreamIntergenicRegion.adjacentGenes.chromosomeLocation.end
## 1 16676753
## Gene.downstreamIntergenicRegion.adjacentGenes.chromosomeLocation.strand
## 1 -1
## Gene.upstreamIntergenicRegion.adjacentGenes.primaryIdentifier
## 1 FBgn0030768
## Gene.upstreamIntergenicRegion.adjacentGenes.symbol
## 1 CG9723
## Gene.upstreamIntergenicRegion.adjacentGenes.chromosomeLocation.start
## 1 16653220
## Gene.upstreamIntergenicRegion.adjacentGenes.chromosomeLocation.end
## 1 16655015
## Gene.upstreamIntergenicRegion.adjacentGenes.chromosomeLocation.strand
## 1 -1
# show overlapping genes
head(eve_OverlappingGenes, 3)
## Gene.secondaryIdentifier Gene.symbol Gene.chromosome.primaryIdentifier
## 1 CG2328 eve 2R
## Gene.chromosomeLocation.start Gene.chromosomeLocation.end
## 1 9979319 9980795
## Gene.chromosomeLocation.strand Gene.overlappingFeatures.primaryIdentifier
## 1 1 FBgn0264599
## Gene.overlappingFeatures.symbol
## 1 asRNA:CR43948
## Gene.overlappingFeatures.chromosomeLocation.start
## 1 9976996
## Gene.overlappingFeatures.chromosomeLocation.end
## 1 9979584
## Gene.overlappingFeatures.chromosomeLocation.strand
## 1 -1
head(r_OverlappingGenes, 3)
## Gene.secondaryIdentifier Gene.symbol Gene.chromosome.primaryIdentifier
## 1 CG18572 r X
## Gene.chromosomeLocation.start Gene.chromosomeLocation.end
## 1 16655359 16668712
## Gene.chromosomeLocation.strand Gene.overlappingFeatures.primaryIdentifier
## 1 1 FBgn0015336
## Gene.overlappingFeatures.symbol
## 1 CG15865
## Gene.overlappingFeatures.chromosomeLocation.start
## 1 16660898
## Gene.overlappingFeatures.chromosomeLocation.end
## 1 16664455
## Gene.overlappingFeatures.chromosomeLocation.strand
## 1 -1
Only the queries about the r gene return both adjacent and overlapping genes.
The queries for genes zen and eve return only the adjacent and the overlapping genes respectively.
It is time to define the Genomic Regions of Interest (ROIs) for which we will retrieve even more features.
To do so we will add 1000 bases in both sides of GOIs, taking also into consideration the start and the end of the adjacent and/or the overlapping genes.
# get genomic region of zen, eve, r and their respective
# adjacent and/or overlapping genes.
# Add 1000 bp on both sides of this region!
# chromosome region of interest (ROI) containing zen and adjacent genes
zen.ROI.start = min(
as.numeric(zen_AdjacentGenes[,grep("start", colnames(zen_AdjacentGenes))])
) - 1000
zen.ROI.end = max(
as.numeric(zen_AdjacentGenes[,grep("end", colnames(zen_AdjacentGenes))])
) + 1000
# chromosome region of interest (ROI) containing eve and overlapping genes
eve.ROI.start = min(
as.numeric(eve_OverlappingGenes[,grep("start", colnames(eve_OverlappingGenes))])
) - 1000
eve.ROI.end = max(
as.numeric(eve_OverlappingGenes[,grep("end", colnames(eve_OverlappingGenes))])
) + 1000
# chromosome region of interest (ROI) containing r, adjacent and overlapping genes
r.ROI.start = min(
as.numeric(
c(r_OverlappingGenes[,grep("start", colnames(r_OverlappingGenes))],
r_AdjacentGenes[,grep("start", colnames(r_AdjacentGenes))])
)
) - 1000
r.ROI.end = max(
as.numeric(
c(r_OverlappingGenes[,grep("end", colnames(r_OverlappingGenes))],
r_AdjacentGenes[,grep("end", colnames(r_AdjacentGenes))])
)
) + 1000
With our ROIs well-defined, it is time to retrieve specifically:
Template queries ChromLocation_TFBindingSiteLocationGeneFactor and ChromLocation_RegulatoryRegion will be used for this purpose.
# find all transcription factor (TF) binding sites within the ROIs
# by using the ChromLocation_TFBindingSiteLocationGeneFactor template query
queryTFBindingSites = getTemplateQuery(
im.fly,
"ChromLocation_TFBindingSiteLocationGeneFactor"
)
# find all Regulatory Regions (RRs) within the ROIs
# by using the ChromLocation_RegulatoryRegion template query
queryRRLocations = getTemplateQuery(
im.fly, "ChromLocation_RegulatoryRegion"
)
As before, the appropriate gene identifiers will be used to modify the constraints of our queries before we run them.
for(i in 1:3){
gene = c("zen", "eve", "r")[i]
# 1. ChromLocation_TFBindingSiteLocationGeneFactor
# set chromosome value
queryTFBindingSites$where[[3]]$value = gene_info[which(gene_info$Gene.symbol == gene),5]
# set location start
queryTFBindingSites$where[[4]]$value = as.character(get(paste0(gene,".ROI.start")))
# set location end
queryTFBindingSites$where[[5]]$value = as.character(get(paste0(gene,".ROI.end")))
# or alternatively use setConstraints function
queryTFBindingSites$where = setConstraints(
modifyQueryConstraints = queryTFBindingSites,
m.index = 3:5,
values = list(
# set chromosome value
gene_info[which(gene_info$Gene.symbol == gene),5],
# set location start
as.character(get(paste0(gene,".ROI.start"))),
# set location end
as.character(get(paste0(gene,".ROI.end")))
)
)
# run query and save results
assign(
x = paste0(gene, ".ROI.TFBindingSites"),
value = runQuery(im.fly, queryTFBindingSites)
)
if(is.null(get(paste0(gene, ".ROI.TFBindingSites")))){
print(paste0(gene, " ROI query returns no TF binding sites from REDfly database"))
}
# 2. ChromLocation_RegulatoryRegion
# set chromosome value
queryRRLocations$where[[1]]$value = gene_info[which(gene_info$Gene.symbol == gene),5]
# set location start
queryRRLocations$where[[2]]$value = as.character(get(paste0(gene,".ROI.start")))
# set location end
queryRRLocations$where[[3]]$value = as.character(get(paste0(gene,".ROI.end")))
# or alternatively use setConstraints function
queryRRLocations$where = setConstraints(
modifyQueryConstraints = queryRRLocations,
m.index = 1:3,
values = list(
# set chromosome value
gene_info[which(gene_info$Gene.symbol == gene),5],
# set location start
as.character(get(paste0(gene,".ROI.start"))),
# set location end
as.character(get(paste0(gene,".ROI.end")))
)
)
# run query and save results
assign(
x = paste0(gene, ".ROI.RRLocations"),
value = runQuery(im.fly, queryRRLocations)
)
if(is.null(get(paste0(gene, ".ROI.RRLocations")))){
print(paste0(gene, " ROI query returns no RRs"))
}
}
## [1] "r ROI query returns no TF binding sites from REDfly database"
## [1] "r ROI query returns no RRs"
head(zen.ROI.TFBindingSites, 3)
## TFBindingSite.chromosome.primaryIdentifier
## 1 3R
## 2 3R
## 3 3R
## TFBindingSite.chromosomeLocation.start
## 1 6754236
## 2 6754261
## 3 6754323
## TFBindingSite.chromosomeLocation.end TFBindingSite.primaryIdentifier
## 1 6754248 TF000822
## 2 6754273 TF000823
## 3 6754350 TF000824
## TFBindingSite.gene.primaryIdentifier TFBindingSite.gene.name
## 1 FBgn0004053 zerknullt
## 2 FBgn0004053 zerknullt
## 3 FBgn0004053 zerknullt
## TFBindingSite.factor.primaryIdentifier
## 1 FBgn0031436
## 2 FBgn0031436
## 3 FBgn0031436
## TFBindingSite.factor.name
## 1 NADH dehydrogenase (ubiquinone) B17.2 subunit
## 2 NADH dehydrogenase (ubiquinone) B17.2 subunit
## 3 NADH dehydrogenase (ubiquinone) B17.2 subunit
## TFBindingSite.gene.regulatoryRegions.dataSets.dataSource.name
## 1 REDfly
## 2 REDfly
## 3 REDfly
head(eve.ROI.TFBindingSites, 3)
## TFBindingSite.chromosome.primaryIdentifier
## 1 2R
## 2 2R
## 3 2R
## TFBindingSite.chromosomeLocation.start
## 1 9975998
## 2 9977715
## 3 9977725
## TFBindingSite.chromosomeLocation.end TFBindingSite.primaryIdentifier
## 1 9976007 TF000353
## 2 9977724 TF000354
## 3 9978295 TF002028
## TFBindingSite.gene.primaryIdentifier TFBindingSite.gene.name
## 1 FBgn0000606 even skipped
## 2 FBgn0000606 even skipped
## 3 FBgn0000606 even skipped
## TFBindingSite.factor.primaryIdentifier TFBindingSite.factor.name
## 1 FBgn0001180 hunchback
## 2 FBgn0001325 Kruppel
## 3 FBgn0086134 Proteasome alpha2 subunit
## TFBindingSite.gene.regulatoryRegions.dataSets.dataSource.name
## 1 REDfly
## 2 REDfly
## 3 REDfly
head(r.ROI.TFBindingSites, 3)
## NULL
head(zen.ROI.RRLocations, 3)
## RegulatoryRegion.chromosome.primaryIdentifier
## 1 3R
## 2 3R
## 3 3R
## RegulatoryRegion.chromosomeLocation.start
## 1 6749892
## 2 6754137
## 3 6754137
## RegulatoryRegion.chromosomeLocation.end RegulatoryRegion.primaryIdentifier
## 1 6752933 Unspecified_VT37508
## 2 6754240 FBsf0000435771
## 3 6754240 zen_0.04
## RegulatoryRegion.dataSets.name
## 1 REDfly Drosophila transcriptional cis-regulatory modules
## 2 FlyBase data set for Drosophila melanogaster
## 3 REDfly Drosophila transcriptional cis-regulatory modules
head(eve.ROI.RRLocations, 3)
## RegulatoryRegion.chromosome.primaryIdentifier
## 1 2R
## 2 2R
## 3 2R
## RegulatoryRegion.chromosomeLocation.start
## 1 9975998
## 2 9976181
## 3 9976184
## RegulatoryRegion.chromosomeLocation.end RegulatoryRegion.primaryIdentifier
## 1 9976007 TF000353
## 2 9979300 eve_GMR9E02
## 3 9977089 eve_3130
## RegulatoryRegion.dataSets.name
## 1 REDfly Drosophila transcription factor binding sites
## 2 REDfly Drosophila transcriptional cis-regulatory modules
## 3 REDfly Drosophila transcriptional cis-regulatory modules
head(r.ROI.RRLocations, 3)
## NULL
Finally, the Gviz::UcscTrack function allows us to retrieve extra information about the CpG islands, the Conservation score, and GC content of our ROIs.
for(i in 1:3){
gene = c("zen", "eve", "r")[i]
# set chromosome value
chrom = paste0("chr",gene_info[which(gene_info$Gene.symbol == gene),5])
# set the beginning and the end of the ROI
gene.start = get(paste0(gene, ".ROI.start"))
gene.end = get(paste0(gene, ".ROI.end"))
# get CpG islands
assign(
x = paste0(gene, ".ROI.cpgIslands"),
value = UcscTrack(genome = "dm6", chromosome = chrom,
track = "cpgIslandExt",
from = gene.start,
to = gene.end,
trackType = "AnnotationTrack",
start = "chromStart",
end = "chromEnd",
id = "name",
shape = "box",
fill = "lightgreen",
name = "CpGs")
)
# get Conservation
assign(
x = paste0(gene, ".ROI.conservation"),
value = UcscTrack(genome = "dm6", chromosome = chrom,
track = "Conservation",
table = "phyloP27way",
from = gene.start,
to = gene.end,
trackType = "DataTrack",
start = "start",
end = "end",
data = "score",
type = "hist",
window = "auto",
col.histogram = "darkblue",
fill.histogram = "darkblue",
name = "Cons")
)
# get GC Percent
assign(
x = paste0(gene, ".ROI.gcContent"),
value = UcscTrack(genome = "dm6", chromosome = chrom,
track = "GC Percent",
table = "gc5BaseBw",
from = gene.start,
to = gene.end,
trackType = "DataTrack", start = "start",
end = "end", data = "score", type = "hist",
window = "auto",
windowSize = 1500, fill.histogram = "#800080",
col.histogram = "#800080",
name = "GC%")
)
}
At this point, we are ready visualize all the information that we retrieved for the ROIs using Gviz package.
# Plot all features for the genes of interest
axTrack <- GenomeAxisTrack()
# zen gene
idxTrack <- IdeogramTrack(genome = "dm6", chromosome = "chr3R")
# get Exons for zen adjacent genes
queryExons$where[[1]]$value = zen_AdjacentGenes$Gene.downstreamIntergenicRegion.adjacentGenes.symbol
zen.down.Exons = runQuery(im.fly, queryExons)
queryExons$where[[1]]$value = zen_AdjacentGenes$Gene.upstreamIntergenicRegion.adjacentGenes.symbol
zen.up.Exons = runQuery(im.fly, queryExons)
# get strand for zen ROI genes
zen.strand = gsub(
pattern = "-1",
replacement = "-",
x = c(
zen.Exons$Gene.exons.chromosomeLocation.strand
)
)
zen.adjacent.strand = gsub(
pattern = "-1",
replacement = "-",
x = c(
zen.down.Exons$Gene.exons.chromosomeLocation.strand,
zen.up.Exons$Gene.exons.chromosomeLocation.strand
)
)
# index for zen gene_info
ind.gi = which(gene_info$Gene.symbol == "zen")
# zen data.frame for GeneRegionTrack
zenTrack = data.frame(
chromosome = "chr3R",
start = as.numeric(c(
zen.Exons$Gene.exons.chromosomeLocation.start
)),
end = as.numeric(c(
zen.Exons$Gene.exons.chromosomeLocation.end
)),
strand = zen.strand,
feature = "protein-coding",
gene = gene_info[ind.gi,1],
exon = c(
zen.Exons$Gene.exons.primaryIdentifier
),
transcript = "zen"
)
zenTrack <- GeneRegionTrack(zenTrack,
genome = "dm6",
chromosome = "chr3R",
name = "zen",
background.title = "brown",
transcriptAnnotation = "transcript"
)
# zen Adjacent genes data.frame for GeneRegionTrack
zenAdjacentTrack = data.frame(
chromosome = "chr3R",
start = as.numeric(c(
zen.down.Exons$Gene.exons.chromosomeLocation.start,
zen.up.Exons$Gene.exons.chromosomeLocation.start
)),
end = as.numeric(c(
zen.down.Exons$Gene.exons.chromosomeLocation.end,
zen.up.Exons$Gene.exons.chromosomeLocation.end
)),
strand = zen.adjacent.strand,
exon = c(
zen.down.Exons$Gene.exons.primaryIdentifier,
zen.up.Exons$Gene.exons.primaryIdentifier
),
transcript = c(
rep(zen_AdjacentGenes$Gene.downstreamIntergenicRegion.adjacentGenes.symbol,nrow(zen.down.Exons)),
rep(zen_AdjacentGenes$Gene.upstreamIntergenicRegion.adjacentGenes.symbol, nrow(zen.up.Exons))
)
)
zenAdjacentTrack <- GeneRegionTrack(zenAdjacentTrack,
genome = "dm6",
chromosome = "chr3R",
name = "zen Adjacent Genes",
transcriptAnnotation = "transcript",
background.title = "brown"
)
# zen ROI TFbinding sites for GeneRegionTrack
zen.ROI.TFBindingSites.track = data.frame(
chromosome = paste0("chr",zen.ROI.TFBindingSites$TFBindingSite.chromosome.primaryIdentifier),
start = as.numeric(zen.ROI.TFBindingSites$TFBindingSite.chromosomeLocation.start),
end = as.numeric(zen.ROI.TFBindingSites$TFBindingSite.chromosomeLocation.end),
symbol = zen.ROI.TFBindingSites$TFBindingSite.factor.name
)
zen.ROI.TFBindingSites.track = GeneRegionTrack(
zen.ROI.TFBindingSites.track,
genome = "dm6",
chromosome = "chr3R",
name = "TFs",
background.title = "darkgreen",
fill = "salmon"
)
# zen ROI Regulatory Regions for GeneRegionTrack
zen.ROI.RRLocations.track = data.frame(
chromosome = paste0("chr",zen.ROI.RRLocations$RegulatoryRegion.chromosome.primaryIdentifier),
start = as.numeric(zen.ROI.RRLocations$RegulatoryRegion.chromosomeLocation.start),
end = as.numeric(zen.ROI.RRLocations$RegulatoryRegion.chromosomeLocation.end),
symbol = zen.ROI.RRLocations$RegulatoryRegion.primaryIdentifier
)
zen.ROI.RRLocations.track = GeneRegionTrack(
zen.ROI.RRLocations.track,
genome = "dm6",
chromosome = "chr3R",
name = "Regulatory Regions",
background.title = "darkgreen",
fill = "lightblue"
)
plotTracks(list(idxTrack,
axTrack,
zenTrack,
zenAdjacentTrack,
zen.ROI.TFBindingSites.track,
zen.ROI.RRLocations.track,
zen.ROI.cpgIslands,
zen.ROI.conservation,
zen.ROI.gcContent),
showTitle = T,
shape = "arrow")
# eve gene
idxTrack <- IdeogramTrack(genome = "dm6", chromosome = "chr2R")
# get Exons for eve overlapping genes
queryExons$where[[1]]$value = eve_OverlappingGenes$Gene.overlappingFeatures.symbol
eve.over.Exons = runQuery(im.fly, queryExons)
# get strand for eve ROI genes
eve.strand = gsub(
pattern = "1",
replacement = "+",
x = c(
eve.Exons$Gene.exons.chromosomeLocation.strand
)
)
eve.over.strand = gsub(
pattern = "-1",
replacement = "-",
x = c(
eve.over.Exons$Gene.exons.chromosomeLocation.strand
)
)
# index for eve gene_info
ind.gi = which(gene_info$Gene.symbol == "eve")
# eve data.frame for GeneRegionTrack
eveTrack = data.frame(
chromosome = "chr2R",
start = as.numeric(c(
eve.Exons$Gene.exons.chromosomeLocation.start
)),
end = as.numeric(c(
eve.Exons$Gene.exons.chromosomeLocation.end
)),
strand = eve.strand,
feature = "protein-coding",
gene = gene_info[ind.gi,1],
exon = c(
eve.Exons$Gene.exons.primaryIdentifier
),
transcript = "eve"
)
eveTrack <- GeneRegionTrack(eveTrack,
genome = "dm6",
chromosome = "chr2R",
name = "eve",
background.title = "brown",
transcriptAnnotation = "transcript"
)
# eve Adjacent genes data.frame for GeneRegionTrack
eveOverlapTrack = data.frame(
chromosome = "chr2R",
start = as.numeric(c(
eve.over.Exons$Gene.exons.chromosomeLocation.start
)),
end = as.numeric(c(
eve.over.Exons$Gene.exons.chromosomeLocation.end
)),
strand = eve.over.strand,
exon = c(
eve.over.Exons$Gene.exons.primaryIdentifier
),
transcript = c(
eve_OverlappingGenes$Gene.overlappingFeatures.symbol
)
)
eveOverlapTrack <- GeneRegionTrack(eveOverlapTrack,
genome = "dm6",
chromosome = "chr2R",
name = "eve Overlapping Genes",
transcriptAnnotation = "transcript",
background.title = "brown"
)
# eve ROI TFbinding sites for GeneRegionTrack
eve.ROI.TFBindingSites.track = data.frame(
chromosome = paste0("chr",eve.ROI.TFBindingSites$TFBindingSite.chromosome.primaryIdentifier),
start = as.numeric(eve.ROI.TFBindingSites$TFBindingSite.chromosomeLocation.start),
end = as.numeric(eve.ROI.TFBindingSites$TFBindingSite.chromosomeLocation.end),
symbol = eve.ROI.TFBindingSites$TFBindingSite.factor.name
)
eve.ROI.TFBindingSites.track = GeneRegionTrack(
eve.ROI.TFBindingSites.track,
genome = "dm6",
chromosome = "chr2R",
name = "TFs",
background.title = "darkgreen",
fill = "salmon"
)
# eve ROI Regulatory Regions for GeneRegionTrack
eve.ROI.RRLocations.track = data.frame(
chromosome = paste0("chr",eve.ROI.RRLocations$RegulatoryRegion.chromosome.primaryIdentifier),
start = as.numeric(eve.ROI.RRLocations$RegulatoryRegion.chromosomeLocation.start),
end = as.numeric(eve.ROI.RRLocations$RegulatoryRegion.chromosomeLocation.end),
symbol = eve.ROI.RRLocations$RegulatoryRegion.primaryIdentifier
)
eve.ROI.RRLocations.track = GeneRegionTrack(
eve.ROI.RRLocations.track,
genome = "dm6",
chromosome = "chr2R",
name = "Regulatory Regions",
background.title = "darkgreen",
fill = "lightblue"
)
plotTracks(list(idxTrack,
axTrack,
eveTrack,
eveOverlapTrack,
eve.ROI.TFBindingSites.track,
eve.ROI.RRLocations.track,
eve.ROI.cpgIslands,
eve.ROI.conservation,
eve.ROI.gcContent),
showTitle = T,
shape = "arrow")
# r gene
idxTrack <- IdeogramTrack(genome = "dm6", chromosome = "chrX")
# get Exons for r adjacent genes
queryExons$where[[1]]$value = r_AdjacentGenes$Gene.downstreamIntergenicRegion.adjacentGenes.symbol
r.down.Exons = runQuery(im.fly, queryExons)
queryExons$where[[1]]$value = r_AdjacentGenes$Gene.upstreamIntergenicRegion.adjacentGenes.symbol
r.up.Exons = runQuery(im.fly, queryExons)
# get Exons for r adjacent genes
queryExons$where[[1]]$value = r_OverlappingGenes$Gene.overlappingFeatures.symbol
r.over.Exons = runQuery(im.fly, queryExons)
# get strand for r ROI genes
r.strand = gsub(
pattern = "1",
replacement = "+",
x = c(
r.Exons$Gene.exons.chromosomeLocation.strand
)
)
r.adjacent.strand = gsub(
pattern = "-1",
replacement = "-",
x = c(
r.down.Exons$Gene.exons.chromosomeLocation.strand,
r.up.Exons$Gene.exons.chromosomeLocation.strand
)
)
r.over.strand = gsub(
pattern = "-1",
replacement = "-",
x = c(
r.over.Exons$Gene.exons.chromosomeLocation.strand
)
)
# index for r gene_info
ind.gi = which(gene_info$Gene.symbol == "r")
# r data.frame for GeneRegionTrack
rTrack = data.frame(
chromosome = "chrX",
start = as.numeric(c(
r.Exons$Gene.exons.chromosomeLocation.start
)),
end = as.numeric(c(
r.Exons$Gene.exons.chromosomeLocation.end
)),
strand = r.strand,
feature = "protein-coding",
gene = gene_info[ind.gi,1],
exon = c(
r.Exons$Gene.exons.primaryIdentifier
),
transcript = "r"
)
rTrack <- GeneRegionTrack(rTrack,
genome = "dm6",
chromosome = "chrX",
name = "r",
background.title = "brown",
transcriptAnnotation = "transcript"
)
# r Adjacent genes data.frame for GeneRegionTrack
rAdjacentTrack = data.frame(
chromosome = "chrX",
start = as.numeric(c(
r.down.Exons$Gene.exons.chromosomeLocation.start,
r.up.Exons$Gene.exons.chromosomeLocation.start
)),
end = as.numeric(c(
r.down.Exons$Gene.exons.chromosomeLocation.end,
r.up.Exons$Gene.exons.chromosomeLocation.end
)),
strand = r.adjacent.strand,
exon = c(
r.down.Exons$Gene.exons.primaryIdentifier,
r.up.Exons$Gene.exons.primaryIdentifier
),
transcript = c(
rep(r_AdjacentGenes$Gene.downstreamIntergenicRegion.adjacentGenes.symbol,nrow(r.down.Exons)),
rep(r_AdjacentGenes$Gene.upstreamIntergenicRegion.adjacentGenes.symbol, nrow(r.up.Exons))
)
)
rAdjacentTrack <- GeneRegionTrack(rAdjacentTrack,
genome = "dm6",
chromosome = "chrX",
name = "r Adjacent Genes",
transcriptAnnotation = "transcript",
background.title = "brown"
)
# r Overlapping genes data.frame for GeneRegionTrack
rOverTrack = data.frame(
chromosome = "chrX",
start = as.numeric(c(
r.over.Exons$Gene.exons.chromosomeLocation.start
)),
end = as.numeric(c(
r.over.Exons$Gene.exons.chromosomeLocation.end
)),
strand = r.over.strand,
exon = c(
r.over.Exons$Gene.exons.primaryIdentifier
),
transcript = c(
r_OverlappingGenes$Gene.overlappingFeatures.symbol
)
)
rOverTrack <- GeneRegionTrack(rOverTrack,
genome = "dm6",
chromosome = "chrX",
name = "r Overlapping Genes",
transcriptAnnotation = "transcript",
background.title = "brown"
)
# r ROI TFbinding sites for GeneRegionTrack
is.null(r.ROI.TFBindingSites)
# r ROI Regulatory Regions for GeneRegionTrack
is.null(r.ROI.RRLocations)
plotTracks(list(idxTrack,
axTrack,
rTrack,
rAdjacentTrack,
rOverTrack,
r.ROI.cpgIslands,
r.ROI.conservation,
r.ROI.gcContent),
showTitle = T,
shape = "arrow")
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.2 LTS
##
## Matrix products: default
## BLAS: /home/biocbuild/bbs-3.9-bioc/R/lib/libRblas.so
## LAPACK: /home/biocbuild/bbs-3.9-bioc/R/lib/libRlapack.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=C LC_COLLATE=C
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] Gviz_1.28.0 GenomicRanges_1.36.0 GenomeInfoDb_1.20.0
## [4] org.Hs.eg.db_3.8.2 GO.db_3.8.2 GeneAnswers_2.26.0
## [7] RColorBrewer_1.1-2 Heatplus_2.30.0 MASS_7.3-51.4
## [10] RSQLite_2.1.1 annotate_1.62.0 XML_3.98-1.20
## [13] AnnotationDbi_1.46.0 IRanges_2.18.1 S4Vectors_0.22.0
## [16] Biobase_2.44.0 BiocGenerics_0.30.0 RCurl_1.95-4.12
## [19] bitops_1.0-6 igraph_1.2.4.1 InterMineR_1.6.1
## [22] BiocStyle_2.12.0
##
## loaded via a namespace (and not attached):
## [1] colorspace_1.4-1 biovizBase_1.32.0
## [3] htmlTable_1.13.1 XVector_0.24.0
## [5] base64enc_0.1-3 dichromat_2.0-0
## [7] rstudioapi_0.10 bit64_0.9-7
## [9] sqldf_0.4-11 xml2_1.2.0
## [11] splines_3.6.1 knitr_1.23
## [13] Formula_1.2-3 jsonlite_1.6
## [15] Rsamtools_2.0.0 cluster_2.1.0
## [17] graph_1.62.0 BiocManager_1.30.4
## [19] compiler_3.6.1 httr_1.4.0
## [21] backports_1.1.4 assertthat_0.2.1
## [23] Matrix_1.2-17 lazyeval_0.2.2
## [25] acepack_1.4.1 htmltools_0.3.6
## [27] prettyunits_1.0.2 tools_3.6.1
## [29] gtable_0.3.0 glue_1.3.1
## [31] GenomeInfoDbData_1.2.1 dplyr_0.8.3
## [33] Rcpp_1.0.1 Biostrings_2.52.0
## [35] RJSONIO_1.3-1.2 rtracklayer_1.44.0
## [37] xfun_0.8 stringr_1.4.0
## [39] proto_1.0.0 ensembldb_2.8.0
## [41] zlibbioc_1.30.0 scales_1.0.0
## [43] BSgenome_1.52.0 VariantAnnotation_1.30.1
## [45] ProtGenerics_1.16.0 hms_0.4.2
## [47] SummarizedExperiment_1.14.0 RBGL_1.60.0
## [49] AnnotationFilter_1.8.0 yaml_2.2.0
## [51] curl_3.3 memoise_1.1.0
## [53] gridExtra_2.3 ggplot2_3.2.0
## [55] downloader_0.4 biomaRt_2.40.1
## [57] rpart_4.1-15 latticeExtra_0.6-28
## [59] stringi_1.4.3 checkmate_1.9.4
## [61] GenomicFeatures_1.36.3 BiocParallel_1.18.0
## [63] chron_2.3-53 rlang_0.4.0
## [65] pkgconfig_2.0.2 matrixStats_0.54.0
## [67] evaluate_0.14 lattice_0.20-38
## [69] purrr_0.3.2 GenomicAlignments_1.20.1
## [71] htmlwidgets_1.3 bit_1.1-14
## [73] tidyselect_0.2.5 magrittr_1.5
## [75] bookdown_0.11 R6_2.4.0
## [77] Hmisc_4.2-0 DelayedArray_0.10.0
## [79] DBI_1.0.0 gsubfn_0.7
## [81] pillar_1.4.2 foreign_0.8-71
## [83] survival_2.44-1.1 nnet_7.3-12
## [85] tibble_2.1.3 crayon_1.3.4
## [87] rmarkdown_1.13 progress_1.2.2
## [89] data.table_1.12.2 blob_1.1.1
## [91] digest_0.6.20 xtable_1.8-4
## [93] munsell_0.5.0
Hahne, Florian, and Robert Ivanek. 2016. “Visualizing Genomic Data Using Gviz and Bioconductor.” In, 335–51. Humana Press, New York, NY. https://doi.org/10.1007/978-1-4939-3578-9_16.
Kalderimis, Alex, Rachel Lyne, Daniela Butano, Sergio Contrino, Mike Lyne, Joshua Heimbach, Fengyuan Hu, et al. 2014. “InterMine: extensive web services for modern biology.” Nucleic Acids Research 42 (Web Server issue). Oxford University Press:W468–72. https://doi.org/10.1093/nar/gku301.
Smith, R. N., J. Aleksic, D. Butano, A. Carr, S. Contrino, F. Hu, M. Lyne, et al. 2012. “InterMine: a flexible data warehouse system for the integration and analysis of heterogeneous biological data.” Bioinformatics 28 (23):3163–5. https://doi.org/10.1093/bioinformatics/bts577.