Skip to content

Commit f95e233

Browse files
AndrewssAndrewss
authored andcommitted
Deal correctly with zero length sequences in per tile quality.
Relates to s-andrews#57
1 parent 3e35053 commit f95e233

File tree

3 files changed

+30
-16
lines changed

3 files changed

+30
-16
lines changed

uk/ac/babraham/FastQC/Modules/PerTileQualityScores.java

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -146,21 +146,27 @@ private synchronized void getPercentages () {
146146
char minChar = 0;
147147
char maxChar = 0;
148148

149-
// Use the data from the first tile
150-
QualityCount [] qualityCounts = perTileQualityCounts.get(perTileQualityCounts.keySet().toArray()[0]);
151-
152-
for (int q=0;q<qualityCounts.length;q++) {
153-
if (q == 0) {
154-
minChar = qualityCounts[q].getMinChar();
155-
maxChar = qualityCounts[q].getMaxChar();
156-
}
157-
else {
158-
if (qualityCounts[q].getMinChar() < minChar) {
149+
// Iterate through the tiles to check them all in case
150+
// we're dealing with unrepresentative data in the first one.
151+
Iterator<QualityCount[]> qit = perTileQualityCounts.values().iterator();
152+
153+
while (qit.hasNext()) {
154+
155+
QualityCount [] qualityCounts = qit.next();
156+
157+
for (int q=0;q<qualityCounts.length;q++) {
158+
if (minChar == 0) {
159159
minChar = qualityCounts[q].getMinChar();
160-
}
161-
if (qualityCounts[q].getMaxChar() > maxChar) {
162160
maxChar = qualityCounts[q].getMaxChar();
163161
}
162+
else {
163+
if (qualityCounts[q].getMinChar() < minChar) {
164+
minChar = qualityCounts[q].getMinChar();
165+
}
166+
if (qualityCounts[q].getMaxChar() > maxChar) {
167+
maxChar = qualityCounts[q].getMaxChar();
168+
}
169+
}
164170
}
165171
}
166172

@@ -179,6 +185,12 @@ public void processSequence(Sequence sequence) {
179185

180186
// Don't waste time calculating this if we're not going to use it anyway
181187
if (ignoreInReport) return;
188+
189+
// Don't bother with sequences with zero length as they don't have any
190+
// quality information anyway.
191+
if (sequence.getQualityString().length() == 0) {
192+
return;
193+
}
182194

183195
calculated = false;
184196

@@ -192,10 +204,12 @@ public void processSequence(Sequence sequence) {
192204

193205
// This module does quite a lot of work and ends up being the limiting
194206
// step when calculating. We'll therefore take only a sample of the
195-
// sequences to try to get a representative selection.
207+
// sequences to try to get a representative selection. We'll use the
208+
// first 10k sequences in case we're dealing with a very small file
209+
// and then take 10% of the rest.
196210

197211
++totalCount;
198-
if (totalCount % 10 != 0) return;
212+
if (totalCount > 10000 && totalCount % 10 != 0) return;
199213

200214
// First try to split the id by :
201215
int tile = 0;

uk/ac/babraham/FastQC/Sequence/QualityEncoding/PhredEncoding.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ else if (lowestChar == ILLUMINA_1_3_ENCODING_OFFSET+1) {
4545
else if (lowestChar <= 126) {
4646
return new PhredEncoding("Illumina 1.5", ILLUMINA_1_3_ENCODING_OFFSET);
4747
}
48-
throw new IllegalArgumentException("No known encodings with chars > 126 (Yours was "+lowestChar+")");
48+
throw new IllegalArgumentException("No known encodings with chars > 126 (Yours was "+lowestChar+" with value "+(int)lowestChar+")");
4949
}
5050

5151
public static double convertSangerPhredToProbability (int phred) {

uk/ac/babraham/FastQC/Utilities/QualityCount.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ public class QualityCount {
2525
/*
2626
* So I'm on my third go at writing this. I've now tried an all
2727
* primitive version of this class so that we don't have to do
28-
* hash lookps which require a conversion from chr to Character.
28+
* hash lookups which require a conversion from chr to Character.
2929
* We should also be safe with 150 slots which will give us up to
3030
* Phred 86 with a 64 offset, which should be plenty.
3131
*/

0 commit comments

Comments
 (0)