Skip to content

Commit f8e7bd8

Browse files
authored
Implement829 (#830)
* Add plotCoverage --BED * Add coverage threshold stuff * tweak * OK, this seems to do the trick * bump version * take a stab at a galaxy wrapper * update galaxy wrapper, sort -ct
1 parent 57a6dbb commit f8e7bd8

File tree

8 files changed

+122
-9
lines changed

8 files changed

+122
-9
lines changed

CHANGES.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
3.3.0
2+
3+
* `plotCoverage` now has a `--BED` option, to restrict plots and output to apply to a specific set of regions given by a BED or GTF file or files (issue #829).
4+
* `plotCoverage` now has a `--DepthSummary` option, which produces a summary similar to GATK's DepthOfCoverage (issue #828).
5+
* `plotCoverage` is now able to compute coverage metrics for arbitrary coverage thresholds using multiples of the `-ct` option (e.g., `-ct 0 -ct 10 -ct 20 -ct 30`).
6+
17
3.2.1
28

39
* Changed a bug in `estimateReadFiltering` where the estimated number of filtered reads was typically too low.

deeptools/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
# This file is originally generated from Git information by running 'setup.py
33
# version'. Distribution tarballs contain a pre-generated copy of this file.
44

5-
__version__ = '3.2.1'
5+
__version__ = '3.3.0'

deeptools/countReadsPerBin.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,9 @@ class CountReadsPerBin(object):
130130
mappedList : list
131131
For each BAM file in bamFilesList, the number of mapped reads in the file.
132132
133+
bed_and_bin : boolean
134+
If true AND a bedFile is given, compute coverage of each bin of the given size in each region of bedFile
135+
133136
Returns
134137
-------
135138
numpy array
@@ -171,6 +174,7 @@ def __init__(self, bamFilesList, binLength=50, numberOfSamples=None, numberOfPro
171174
minFragmentLength=0,
172175
maxFragmentLength=0,
173176
out_file_for_raw_data=None,
177+
bed_and_bin=False,
174178
statsList=[],
175179
mappedList=[]):
176180

@@ -181,6 +185,7 @@ def __init__(self, bamFilesList, binLength=50, numberOfSamples=None, numberOfPro
181185
self.statsList = statsList
182186
self.mappedList = mappedList
183187
self.skipZeroOverZero = skipZeroOverZero
188+
self.bed_and_bin = bed_and_bin
184189

185190
if extendReads and len(bamFilesList):
186191
from deeptools.getFragmentAndReadSize import get_read_and_fragment_length
@@ -463,7 +468,10 @@ def count_reads_in_region(self, chrom, start, end, bed_regions_list=None):
463468
# A list of lists of tuples
464469
transcriptsToConsider = []
465470
if bed_regions_list is not None:
466-
transcriptsToConsider = [x[1] for x in bed_regions_list]
471+
if self.bed_and_bin:
472+
transcriptsToConsider.append([(x[1][0][0], x[1][0][1], self.binLength) for x in bed_regions_list])
473+
else:
474+
transcriptsToConsider = [x[1] for x in bed_regions_list]
467475
else:
468476
if self.stepSize == self.binLength:
469477
transcriptsToConsider.append([(start, end, self.binLength)])
@@ -484,7 +492,7 @@ def count_reads_in_region(self, chrom, start, end, bed_regions_list=None):
484492
for bam in bam_handles:
485493
for trans in transcriptsToConsider:
486494
tcov = self.get_coverage_of_region(bam, chrom, trans)
487-
if bed_regions_list is not None:
495+
if bed_regions_list is not None and not self.bed_and_bin:
488496
subnum_reads_per_bin.append(np.sum(tcov))
489497
else:
490498
subnum_reads_per_bin.extend(tcov)

deeptools/plotCoverage.py

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,11 +118,34 @@ def required_args():
118118
type=int,
119119
default=1000000)
120120

121+
optional.add_argument('--BED',
122+
help='Limits the coverage analysis to '
123+
'the regions specified in these files. This overrides --numberOfSamples. '
124+
'Due to memory requirements, it is inadvised to combine this with '
125+
'--outRawCounts or many tens of thousands of regions, as per-base '
126+
'coverage is used!',
127+
metavar='FILE1.bed FILE2.bed',
128+
nargs='+')
129+
121130
optional.add_argument('--outRawCounts',
122131
help='Save raw counts (coverages) to file.',
123132
type=parserCommon.writableFile,
124133
metavar='FILE')
125134

135+
optional.add_argument('--outCoverageMetrics',
136+
help='Save percentage of bins/regions above the specified thresholds to '
137+
'the specified file. The coverage thresholds are specified by '
138+
'--coverageThresholds. If no coverage thresholds are specified, the file '
139+
'will be empty.',
140+
type=parserCommon.writableFile,
141+
metavar='FILE')
142+
143+
optional.add_argument('--coverageThresholds', '-ct',
144+
type=int,
145+
action="append",
146+
help='The percentage of reported bins/regions with signal at least as '
147+
'high as the given threshold. This can be specified multiple times.')
148+
126149
optional.add_argument('--plotHeight',
127150
help='Plot height in cm.',
128151
type=float,
@@ -148,11 +171,17 @@ def required_args():
148171
def main(args=None):
149172
args = process_args(args)
150173

151-
if args.outRawCounts is None and args.plotFile is None:
152-
sys.exit("At least one of --plotFile and --outRawCounts are required.\n")
174+
if not args.outRawCounts and not args.plotFile and not args.outCoverageMetrics:
175+
sys.exit("At least one of --plotFile, --outRawCounts and --outCoverageMetrics are required.\n")
176+
177+
if 'BED' in args:
178+
bed_regions = args.BED
179+
else:
180+
bed_regions = None
153181

154182
cr = countR.CountReadsPerBin(args.bamfiles,
155183
binLength=1,
184+
bedFile=bed_regions,
156185
numberOfSamples=args.numberOfSamples,
157186
numberOfProcessors=args.numberOfProcessors,
158187
verbose=args.verbose,
@@ -166,10 +195,22 @@ def main(args=None):
166195
samFlag_exclude=args.samFlagExclude,
167196
minFragmentLength=args.minFragmentLength,
168197
maxFragmentLength=args.maxFragmentLength,
198+
bed_and_bin=True,
169199
out_file_for_raw_data=args.outRawCounts)
170200

171201
num_reads_per_bin = cr.run()
172202

203+
if args.outCoverageMetrics and args.coverageThresholds:
204+
args.coverageThresholds.sort() # Galaxy in particular tends to give things in a weird order
205+
of = open(args.outCoverageMetrics, "w")
206+
of.write("Sample\tThreshold\tPercent\n")
207+
nbins = float(num_reads_per_bin.shape[0])
208+
for thresh in args.coverageThresholds:
209+
vals = np.sum(num_reads_per_bin >= thresh, axis=0)
210+
for lab, val in zip(args.labels, vals):
211+
of.write("{}\t{}\t{:6.3f}\n".format(lab, thresh, 100. * val / nbins))
212+
of.close()
213+
173214
if args.outRawCounts:
174215
# append to the generated file the
175216
# labels

galaxy/wrapper/deepTools_macros.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
<macros>
22

33
<token name="@THREADS@">--numberOfProcessors "\${GALAXY_SLOTS:-4}"</token>
4-
<token name="@WRAPPER_VERSION@">3.2.1.0</token>
4+
<token name="@WRAPPER_VERSION@">3.3.0.0</token>
55
<xml name="requirements">
66
<requirements>
7-
<requirement type="package" version="3.2.1">deeptools</requirement>
7+
<requirement type="package" version="3.3.0">deeptools</requirement>
88
<requirement type="package" version="1.9">samtools</requirement>
99
</requirements>
1010
<expand macro="stdio" />

galaxy/wrapper/plotCoverage.xml

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,23 @@
2525
--outRawCounts '$outFileRawCounts'
2626
#end if
2727
28+
#if ' '.join(map(str, $BED)) != 'None':
29+
#set bedFileLList=[]
30+
#for $f in $BED:
31+
#silent $bedFileList.append("'%s'" % $f)
32+
#end for
33+
#if $bedFileList != ["'None'"]:
34+
--BED #echo ' '.join($bedFileList)#
35+
#end if
36+
#end if
37+
38+
#if $coverageOpt.showCoverageOpt == "yes":
39+
--outCoverageMetrics '$outFileCoverageMetrics'
40+
#for $t in $coverageOpt.thresholds:
41+
-ct $t.coverageThreshold
42+
#end for
43+
#end if
44+
2845
#if $advancedOpt.showAdvancedOpt == "yes":
2946
--numberOfSamples '$advancedOpt.numberOfSamples'
3047
$advancedOpt.skipZeros
@@ -48,11 +65,28 @@
4865

4966
<expand macro="multiple_input_bams" MIN="1"/>
5067
<expand macro="custom_sample_labels" />
68+
<param argument="--BED" type="data" format="bed,gtf" multiple="true" optional="true" min="0"
69+
label="Regions of interest"
70+
help="Limits the coverage analysis to the regions specified in these files. This overrides --numberOfSamples. It is inadvisable to combine this with saving the raw counts." />
71+
72+
<conditional name="coverageOpt">
73+
<param name="showCoverageOpt" type="select" label="Show coverage metrics options">
74+
<option value="no" selected="true">No</option>
75+
<option value="yes">Yes</option>
76+
</param>
77+
<when value="no" />
78+
<when value="yes">
79+
<param argument="--outCoverageMetrics" type="boolean" label="Save per-threshold coverage metrics?"/>
80+
<repeat name="thresholds" title="Coverage Thresholds">
81+
<param argument="--coverageThreshold" type="integer" min="0" label="Coverage Threshold" value="0"/>
82+
</repeat>
83+
</when>
84+
</conditional>
5185

5286
<conditional name="advancedOpt">
5387
<param name="showAdvancedOpt" type="select" label="Show advanced options" >
54-
<option value="no" selected="true">no</option>
55-
<option value="yes">yes</option>
88+
<option value="no" selected="true">No</option>
89+
<option value="yes">Yes</option>
5690
</param>
5791
<when value="no" />
5892
<when value="yes">
@@ -78,6 +112,9 @@
78112
<data format="tabular" name="outFileRawCounts" label="${tool.name} on ${on_string}: bin counts">
79113
<filter>outRawCounts is True</filter>
80114
</data>
115+
<data format="tabular" name="outFileCoverageMetrics" label="${tool.name} on ${on_string}: Threshold Metrics">
116+
<filter>coverageOpt.outCoverageMetrics is True</filter>
117+
</data>
81118
</outputs>
82119
<tests>
83120
<test>
@@ -89,6 +126,18 @@
89126
<output name="outFileRawCounts" file="plotCoverage_result1.tabular" ftype="tabular" />
90127
<output name="outFileName" file="plotCoverage_result1.png" ftype="png" compare="sim_size" delta="2400" />
91128
</test>
129+
<test>
130+
<param name="bamfiles" value="bowtie2 test1.bam,bowtie2 test1.bam" ftype="bam" />
131+
<param name="showAdvancedOpt" value="yes" />
132+
<param name="plotTitle" value="Test Title from Galaxy" />
133+
<param name="showCoverageOpt" value="yes" />
134+
<param name="coverageThreshold" value="0" />
135+
<param name="coverageThreshold" value="5" />
136+
<param name="coverageThreshold" value="10" />
137+
<param name="coverageThreshold" value="20" />
138+
<output name="outFileName" file="plotCoverage_result1.png" ftype="png" compare="sim_size" delta="2400" />
139+
<output name="outFileCoverageMetrics" file="plotCoverage.metrics" ftype="tabular" />
140+
</test>
92141
</tests>
93142
<help>
94143
<![CDATA[
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
Sample Threshold Percent
2+
bowtie2 test1.bam 0 100.000
3+
bowtie2 test1.bam 0 100.000
4+
bowtie2 test1.bam 5 1.509
5+
bowtie2 test1.bam 5 1.509
6+
bowtie2 test1.bam 10 1.461
7+
bowtie2 test1.bam 10 1.461
8+
bowtie2 test1.bam 20 1.406
9+
bowtie2 test1.bam 20 1.406
-535 Bytes
Loading

0 commit comments

Comments
 (0)