-
Notifications
You must be signed in to change notification settings - Fork 7
/
splitList.cpp
117 lines (99 loc) · 3.55 KB
/
splitList.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
// splitList.cpp
/*===========================================================================
*
* PUBLIC DOMAIN NOTICE
* National Center for Biotechnology Information
*
* This software/database is a "United States Government Work" under the
* terms of the United States Copyright Act. It was written as part of
* the author's official duties as a United States Government employee and
* thus cannot be copyrighted. This software/database is freely available
* to the public for use. The National Library of Medicine and the U.S.
* Government have not placed any restriction on its use or reproduction.
*
* Although all reasonable efforts have been taken to ensure the accuracy
* and reliability of the software and data, the NLM and the U.S.
* Government do not and cannot warrant the performance or results that
* may be obtained by using this software or data. The NLM and the U.S.
* Government disclaim all warranties, express or implied, including
* warranties of performance, merchantability or fitness for any particular
* purpose.
*
* Please cite the author in any work or product based on this material.
*
* ===========================================================================
*
* Author: Vyacheslav Brover
*
* File Description:
* Split a list into subsets
*
*/
#undef NDEBUG
#include "common.hpp"
using namespace Common_sp;
#include "version.inc"
#include "common.inc"
namespace
{
struct ThisApplication : Application
{
ThisApplication ()
: Application ("Partition <in> into parts of size <size> lines sequentially.\n\
Parts are named <out_dir>/<prefix><i>")
{
version = VERSION;
addPositional ("in", "Text file");
addPositional ("size", "# lines in one part");
addPositional ("out_dir", "Output directory");
addKey ("start", "Start number of a part", "1");
addFlag ("zero", "Parts strat with 0, otherwise 1");
addKey ("prefix", "File name prefix");
addKey ("extension", "File name extension");
}
void body () const final
{
const string in = getArg ("in");
const streamsize size = str2<streamsize> (getArg ("size"));
const string out_dir = getArg ("out_dir");
const uint start_part = str2<uint> (getArg ("start"));
const bool zero = getFlag ("zero");
const string prefix = getArg ("prefix");
const string extension = getArg ("extension");
ASSERT (size > 0);
ASSERT (start_part >= 1);
LineInput inF (in);
OFStream outF;
uint part = 0;
streamsize n = size; // # lines in outF
bool writing = false;
Progress prog;
while (inF. nextLine ())
{
if (n == size)
{
prog ();
if (outF. is_open ())
outF. close ();
part++;
ASSERT (part);
if (part >= start_part)
writing = true;
if (writing)
outF. open (out_dir, prefix + toString (part - zero), extension);
n = 0;
}
if (writing)
outF << inF. line << endl;
ASSERT (outF. good ());
n++;
}
}
};
} // namespace
int main (int argc,
const char* argv[])
{
ThisApplication app;
return app. run (argc, argv);
}