@@ -51,58 +51,56 @@ Clustering::Clustering(const std::string &seqDB, const std::string &seqDBIndex,
51
51
std::ifstream mappingStream (seqDB + " .lookup" );
52
52
std::string line;
53
53
unsigned int setkey = 0 ;
54
+ unsigned int maxsetkey = 0 ;
54
55
while (std::getline (mappingStream, line)) {
55
56
std::vector<std::string> split = Util::split (line, " \t " );
56
57
unsigned int key = strtoul (split[0 ].c_str (), NULL , 10 );
57
58
setkey = strtoul (split[2 ].c_str (), NULL , 10 );
58
59
keyToSet[key] = setkey;
60
+ if (maxsetkey < setkey) {
61
+ maxsetkey = setkey;
62
+ }
59
63
}
60
64
for (size_t id = 0 ; id < originalseqDbr->getSize (); id++) {
61
65
setToLength[keyToSet[seqIndex[id].id ]] += seqIndex[id].length ;
62
66
keysInSeq[seqIndex[id].id ] = 1 ;
63
67
}
64
- unsigned int sourceLen = setkey + 1 ;
68
+ unsigned int sourceLen = maxsetkey + 1 ;
65
69
seqnum = setToLength.size ();
66
70
sourceList = new (std::nothrow) unsigned int [lastKey];
67
- sourceOffsets = new (std::nothrow) size_t [sourceLen + 1 ];
71
+ sourceOffsets = new (std::nothrow) size_t [sourceLen + 1 ]() ;
68
72
sourceLookupTable = new (std::nothrow) unsigned int *[sourceLen];
73
+ size_t * sourceOffsetsDecrease = new (std::nothrow) size_t [sourceLen + 1 ]();
69
74
70
75
mappingStream.close ();
71
76
mappingStream.open (seqDB + " .lookup" );
77
+
78
+ line = " " ;
79
+ while (std::getline (mappingStream, line)) {
80
+ std::vector<std::string> split = Util::split (line, " \t " );
81
+ setkey = strtoul (split[2 ].c_str (), NULL , 10 );
82
+ sourceOffsets[setkey]++;
83
+ sourceOffsetsDecrease[setkey]++;
84
+ }
85
+ AlignmentSymmetry::computeOffsetFromCounts (sourceOffsets, sourceLen);
86
+ AlignmentSymmetry::setupPointers<unsigned int >(sourceList, sourceLookupTable, sourceOffsets, sourceLen, lastKey);
87
+
88
+ mappingStream.close ();
89
+ mappingStream.open (seqDB + " .lookup" );
90
+
72
91
line = " " ;
73
- unsigned int prevsetkey = UINT_MAX;
74
- size_t n = 0 ;
75
- size_t lookupOrder = 0 ;
76
- setkey = UINT_MAX;
77
92
while (std::getline (mappingStream, line)) {
78
93
std::vector<std::string> split = Util::split (line, " \t " );
79
94
unsigned int key = strtoul (split[0 ].c_str (), NULL , 10 );
80
95
setkey = strtoul (split[2 ].c_str (), NULL , 10 );
81
- if (setkey != prevsetkey) {
82
- if (prevsetkey != UINT_MAX){
83
- sourceOffsets[prevsetkey] = n;
84
- for (size_t k = prevsetkey+1 ; k<setkey; k++) {
85
- sourceOffsets[k] = 0 ;
86
- }
87
- }
88
- prevsetkey = setkey;
89
- if (keysInSeq[key] == 1 ) {
90
- sourceKeyVec.emplace_back (setkey);
91
- }
92
- n = 0 ;
93
- }
96
+ size_t order = sourceOffsets[setkey + 1 ] - sourceOffsetsDecrease[setkey];
94
97
if (keysInSeq[key] == 1 ) {
95
- sourceList[lookupOrder ] = key;
98
+ sourceList[order ] = key;
96
99
} else {
97
- sourceList[lookupOrder ] = UINT_MAX;
100
+ sourceList[order ] = UINT_MAX;
98
101
}
99
- n++;
100
- lookupOrder++;
102
+ sourceOffsetsDecrease[setkey]--;
101
103
}
102
- sourceOffsets[prevsetkey] = n;
103
- AlignmentSymmetry::computeOffsetFromCounts (sourceOffsets, sourceLen);
104
- AlignmentSymmetry::setupPointers<unsigned int >(sourceList, sourceLookupTable, sourceOffsets, sourceLen, lastKey);
105
-
106
104
char * data = (char *)malloc (
107
105
sizeof (size_t ) +
108
106
sizeof (size_t ) +
@@ -114,7 +112,7 @@ Clustering::Clustering(const std::string &seqDB, const std::string &seqDBIndex,
114
112
115
113
std::vector<DBReader<unsigned int >::Index*> indexStorage (seqnum);
116
114
117
- n = 0 ;
115
+ size_t n = 0 ;
118
116
for (const auto & pairs : setToLength) {
119
117
indexStorage[n] = new DBReader<unsigned int >::Index;
120
118
indexStorage[n]->id = pairs.first ;
0 commit comments