@@ -146,21 +146,27 @@ private synchronized void getPercentages () {
146
146
char minChar = 0 ;
147
147
char maxChar = 0 ;
148
148
149
- // Use the data from the first tile
150
- QualityCount [] qualityCounts = perTileQualityCounts . get ( perTileQualityCounts . keySet (). toArray ()[ 0 ]);
151
-
152
- for ( int q = 0 ; q < qualityCounts . length ; q ++) {
153
- if ( q == 0 ) {
154
- minChar = qualityCounts [ q ]. getMinChar ();
155
- maxChar = qualityCounts [ q ]. getMaxChar ();
156
- }
157
- else {
158
- if (qualityCounts [ q ]. getMinChar () < minChar ) {
149
+ // Iterate through the tiles to check them all in case
150
+ // we're dealing with unrepresentative data in the first one.
151
+ Iterator < QualityCount []> qit = perTileQualityCounts . values (). iterator ();
152
+
153
+ while ( qit . hasNext ()) {
154
+
155
+ QualityCount [] qualityCounts = qit . next ();
156
+
157
+ for ( int q = 0 ; q < qualityCounts . length ; q ++) {
158
+ if (minChar == 0 ) {
159
159
minChar = qualityCounts [q ].getMinChar ();
160
- }
161
- if (qualityCounts [q ].getMaxChar () > maxChar ) {
162
160
maxChar = qualityCounts [q ].getMaxChar ();
163
161
}
162
+ else {
163
+ if (qualityCounts [q ].getMinChar () < minChar ) {
164
+ minChar = qualityCounts [q ].getMinChar ();
165
+ }
166
+ if (qualityCounts [q ].getMaxChar () > maxChar ) {
167
+ maxChar = qualityCounts [q ].getMaxChar ();
168
+ }
169
+ }
164
170
}
165
171
}
166
172
@@ -179,6 +185,12 @@ public void processSequence(Sequence sequence) {
179
185
180
186
// Don't waste time calculating this if we're not going to use it anyway
181
187
if (ignoreInReport ) return ;
188
+
189
+ // Don't bother with sequences with zero length as they don't have any
190
+ // quality information anyway.
191
+ if (sequence .getQualityString ().length () == 0 ) {
192
+ return ;
193
+ }
182
194
183
195
calculated = false ;
184
196
@@ -192,10 +204,12 @@ public void processSequence(Sequence sequence) {
192
204
193
205
// This module does quite a lot of work and ends up being the limiting
194
206
// step when calculating. We'll therefore take only a sample of the
195
- // sequences to try to get a representative selection.
207
+ // sequences to try to get a representative selection. We'll use the
208
+ // first 10k sequences in case we're dealing with a very small file
209
+ // and then take 10% of the rest.
196
210
197
211
++totalCount ;
198
- if (totalCount % 10 != 0 ) return ;
212
+ if (totalCount > 10000 && totalCount % 10 != 0 ) return ;
199
213
200
214
// First try to split the id by :
201
215
int tile = 0 ;
0 commit comments