-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbed.cpp
104 lines (87 loc) · 2.9 KB
/
bed.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
// ******************************************************
// vcfCTools (c) 2011 Alistair Ward
// Marth Lab, Department of Biology, Boston College
// All rights reserved.
// ------------------------------------------------------
// Last modified: 18 February 2011
// ------------------------------------------------------
// bedClass describes the bed class and all operations.
// ******************************************************
#include "bed.h"
using namespace std;
using namespace vcfCTools;
// Constructor.
bed::bed(void) {
numberTargets = 0;
targetLength = 0;
targetVariance = 0;
}
// Destructor.
bed::~bed(void)
{}
// Open a bed file.
bool bed::openBed(string& bedFilename) {
if (bedFilename != "-") {
file.open(bedFilename.c_str(), ifstream::in);
input = &file;
if (!file.is_open()) {
cerr << "Failed to open file: " << bedFilename << endl;
exit(1);
}
}
else {cerr << "bed file must be provided. Cannot read from stdin." << endl;}
}
// Close the bed file.
void bed::closeBed() {
if (bedFilename != "-") {
file.close();
if (file.is_open()) {
cerr << "Failed to close file: " << bedFilename << endl;
}
}
}
// Parse the bed header.
void bed::parseHeader(string& bedFile) {
unsigned int numberHeaderLines = 0;
string headerLine;
success = getline(*input, headerLine);
while (success && headerLine.substr(0, 1) == "#") {
success = getline(*input, headerLine);
numberHeaderLines++;
}
// Close the bed file, reopen and parse through the header lines.
closeBed();
openBed(bedFile);
for (unsigned int i = 0; i < numberHeaderLines; i++) {success = getline(*input, headerLine);}
}
// Get the next record from the vcf file.
bool bed::getRecord() {
success = getline(*input, record);
// Return false if no more records remain.
if (!success) {return false;}
vector<string> recordFields = split(record, '\t');
// Populate the variant values.
bRecord.referenceSequence = recordFields[0];
bRecord.start = atoi(recordFields[1].c_str()) + 1;
bRecord.end = atoi(recordFields[2].c_str());
if (recordFields.size() > 3) {bRecord.info = recordFields[3];}
// Check that the start and end coordinates define a valid interval.
if ( (bRecord.end - bRecord.start) < 0 ) {
cerr << "Invalid target interval:" << endl;
cerr << "\t" << record << endl;
exit(1);
}
// Update statistics on the targets.
numberTargets++;
targetLength += (bRecord.end - bRecord.start);
targetVariance = 0;
// Add the reference sequence to the map. If it didn't previously
// exist append the reference sequence to the end of the list as well.
// This ensures that the order in which the reference sequences appeared
// in the header can be preserved.
if (referenceSequences.count(bRecord.referenceSequence) == 0) {
referenceSequences[bRecord.referenceSequence] = true;
referenceSequenceVector.push_back(bRecord.referenceSequence);
}
return true;
}