Contents

1 Introduction

Sequence-based TF affinity scoring can be conducted with the FIMO suite, see @Sonawane2017. We have serialized an object with references to FIMO outputs for 16 TFs.

suppressPackageStartupMessages({
library(TFutils)
library(GenomicRanges)
})
fimo16
## Loading required package: GenomicFiles
## Loading required package: SummarizedExperiment
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: DelayedArray
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## Loading required package: BiocParallel
## 
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
## 
##     colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following objects are masked from 'package:base':
## 
##     aperm, apply, rowsum
## Loading required package: Rsamtools
## Loading required package: Biostrings
## Loading required package: XVector
## 
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
## 
##     strsplit
## Loading required package: rtracklayer
## GenomicFiles object with 0 ranges and 16 files: 
## files: M0635_1.02sort.bed.gz, M3433_1.02sort.bed.gz, ..., M6159_1.02sort.bed.gz, M6497_1.02sort.bed.gz 
## detail: use files(), rowRanges(), colData(), ...

While the token bed is used in the filenames, the files are not actually bed format!

2 Importing with scanTabix

We can use reduceByRange to import selected scans.

si = TFutils::seqinfo_hg19_chr17
myg = GRanges("chr17", IRanges(38.07e6,38.09e6), seqinfo=si)
colnames(fimo16) = fimo16$HGNC 
lk2 = reduceByRange(fimo16[, c("POU2F1", "VDR")],
  MAP=function(r,f) scanTabix(f, param=r))
str(lk2)
##  list()

This result can be massaged into a GRanges or other desirable structure. fimo_granges takes care of this.

#fimo_ranges = function(gf, query) { # prototypical code
# rowRanges(gf) = query
# ans = reduceByRange(gf, MAP=function(r,f) scanTabix(f, param=r))
# ans = unlist(ans, recursive=FALSE)  # drop top list structure
# tabs = lapply(ans, lapply, function(x) {
#     con = textConnection(x)
#     on.exit(close(con))
#     dtf = read.delim(con, h=FALSE, stringsAsFactors=FALSE, sep="\t")
#     colnames(dtf) = c("chr", "start", "end", "rname", "score", "dir", "pval")
#     ans = with(dtf, GRanges(seqnames=chr, IRanges(start, end),
#            rname=rname, score=score, dir=dir, pval=pval))
#     ans
#     })
# GRangesList(unlist(tabs, recursive=FALSE))
#}
rr = fimo_granges(fimo16[, c("POU2F1", "VDR")], myg)
rr
## $POU2F1
## $POU2F1$`chr17:38070000-38090000`
## GRanges object with 76 ranges and 4 metadata columns:
##        seqnames            ranges strand |                   rname     score
##           <Rle>         <IRanges>  <Rle> |             <character> <numeric>
##    [1]    chr17 38070239-38070257      * | chr17:38070239-38070257  -1.54082
##    [2]    chr17 38070579-38070597      * | chr17:38070579-38070597 -0.969388
##    [3]    chr17 38070851-38070869      * | chr17:38070851-38070869  0.122449
##    [4]    chr17 38071025-38071043      * | chr17:38071025-38071043 0.0918367
##    [5]    chr17 38071253-38071271      * | chr17:38071253-38071271   3.67347
##    ...      ...               ...    ... .                     ...       ...
##   [72]    chr17 38088602-38088620      * | chr17:38088602-38088620   4.06122
##   [73]    chr17 38088637-38088655      * | chr17:38088637-38088655   11.6939
##   [74]    chr17 38089141-38089159      * | chr17:38089141-38089159   13.1837
##   [75]    chr17 38089439-38089457      * | chr17:38089439-38089457  -1.35714
##   [76]    chr17 38089822-38089840      * | chr17:38089822-38089840   3.67347
##                dir      pval
##        <character> <numeric>
##    [1]           +  0.000989
##    [2]           -  0.000849
##    [3]           -  0.000631
##    [4]           -  0.000636
##    [5]           +  0.000222
##    ...         ...       ...
##   [72]           +  0.000198
##   [73]           -  1.32e-05
##   [74]           -  7.09e-06
##   [75]           -  0.000942
##   [76]           -  0.000222
##   -------
##   seqinfo: 1 sequence from hg19 genome
## 
## 
## $VDR
## $VDR$`chr17:38070000-38090000`
## GRanges object with 40 ranges and 4 metadata columns:
##        seqnames            ranges strand |                   rname     score
##           <Rle>         <IRanges>  <Rle> |             <character> <numeric>
##    [1]    chr17 38070016-38070031      * | chr17:38070016-38070031 0.0505051
##    [2]    chr17 38070387-38070402      * | chr17:38070387-38070402  -3.38384
##    [3]    chr17 38070925-38070940      * | chr17:38070925-38070940   5.71717
##    [4]    chr17 38071183-38071198      * | chr17:38071183-38071198 -0.676768
##    [5]    chr17 38072289-38072304      * | chr17:38072289-38072304 -0.333333
##    ...      ...               ...    ... .                     ...       ...
##   [36]    chr17 38086915-38086930      * | chr17:38086915-38086930 -0.353535
##   [37]    chr17 38087304-38087319      * | chr17:38087304-38087319   0.10101
##   [38]    chr17 38087866-38087881      * | chr17:38087866-38087881   5.34343
##   [39]    chr17 38088893-38088908      * | chr17:38088893-38088908 -0.111111
##   [40]    chr17 38089214-38089229      * | chr17:38089214-38089229   1.10101
##                dir      pval
##        <character> <numeric>
##    [1]           +  0.000377
##    [2]           +  0.000966
##    [3]           -  6.44e-05
##    [4]           +  0.000463
##    [5]           -  0.000421
##    ...         ...       ...
##   [36]           -  0.000423
##   [37]           -  0.000371
##   [38]           +   7.3e-05
##   [39]           -  0.000395
##   [40]           -  0.000277
##   -------
##   seqinfo: 1 sequence from hg19 genome