Skip to content

Commit c7d1c7b

Browse files
committed
Added str_vec, dbl_set, and str_set to read/write
1 parent d9d82b3 commit c7d1c7b

File tree

7 files changed

+225
-9
lines changed

7 files changed

+225
-9
lines changed

data/AAPL_10dBucketWithMaps.csv

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
INDEX:4:<DateTimeAME>,Open:4:<double>,High:4:<double>,Low:4:<double>,Close:4:<double>,Mean:4:<double>,Median:4:<double>,25% Quantile:4:<double>,Std:4:<double>,MAD:4:<double>,Map 1:4:<str_dbl_map>,Unordered Map:4:<str_dbl_unomap>,Volume:4:<long>
2-
01/14/2000 00:00:00.000,0.999442,0.999442,0.77846,0.896763,0.8819754,0.8805805,0.8560265,0.0600931968588,0.0436942,3{label one 1:123.0|label one 2:-782.5|label one 3:444.44},3{Key one 1:123.0|Key one 2:-782.5|Key one 3:444.44},6400945600
3-
01/31/2000 00:00:00.000,0.928013,1.013393,0.907366,0.926339,0.9637277,0.966797,0.938337,0.0362519289143,0.0313617,3{label two 1:123.0|label two 2:-782.5|label two 3:444.44},3{Key two 1:123.0|Key two 2:-782.5|Key two 3:444.44},6154232000
4-
02/14/2000 00:00:00.000,0.895089,1.03404,0.882254,1.03404,0.9732142,0.988281,0.9433595,0.0559733198384,0.0462054,3{label three 1:123.0|label three 2:-782.5|label three 3:444.44},3{Key three 1:123.0|Key three 2:-782.5|Key three 3:444.44},3714592000
5-
02/29/2000 00:00:00.000,1.0625,1.0625,0.985491,1.023438,1.0203265,1.0212055,1.013672,0.0217113745778,0.0153041,3{label four 1:123.0|label four 2:-782.5|label four 3:444.44},3{Key four 1:123.0|Key four 2:-782.5|Key four 3:444.44},3605190400
6-
1+
INDEX:4:<DateTimeAME>,Open:4:<double>,High:4:<double>,Low:4:<double>,Close:4:<double>,Mean:4:<double>,Median:4:<double>,25% Quantile:4:<double>,Std:4:<double>,MAD:4:<double>,Map 1:4:<str_dbl_map>,Unordered Map:4:<str_dbl_unomap>,Str Vec:4<str_vec>,Double Set:4:<dbl_set>,Str Set:4:<str_set>,Volume:4:<long>
2+
01/14/2000 00:00:00.000,0.999442,0.999442,0.77846,0.896763,0.8819754,0.8805805,0.8560265,0.060093197,0.0436942,3{label one 1:123.0|label one 2:-782.5|label one 3:444.44},3{Key one 1:123.0|Key one 2:-782.5|Key one 3:444.44},4[bbb|aaa|zzz|ddd],3[123.0|-782.5|444.44],3[123.0|-782.5|444.44],6400945600
3+
01/31/2000 00:00:00.000,0.928013,1.013393,0.907366,0.926339,0.9637277,0.966797,0.938337,0.036251929,0.0313617,3{label two 1:123.0|label two 2:-782.5|label two 3:444.44},3{Key two 1:123.0|Key two 2:-782.5|Key two 3:444.44},4[aaa|bbb|ccc|www],3[1:123.0|-782.5|:444.44],3[1:123.0|-782.5|:444.44],6154232000
4+
02/14/2000 00:00:00.000,0.895089,1.03404,0.882254,1.03404,0.9732142,0.988281,0.9433595,0.05597332,0.0462054,3{label three 1:123.0|label three 2:-782.5|label three 3:444.44},3{Key three 1:123.0|Key three 2:-782.5|Key three 3:444.44},4[123|abc|345|list],3[123.0|-782.5|444.44],3[123.0|-782.5|444.44],3714592000
5+
02/29/2000 00:00:00.000,1.0625,1.0625,0.985491,1.023438,1.0203265,1.0212055,1.013672,0.021711375,0.0153041,3{label four 1:123.0|label four 2:-782.5|label four 3:444.44},3{Key four 1:123.0|Key four 2:-782.5|Key four 3:444.44},3[bbb|aaa|zzz],4[123.0|-782.5|444.44|100.5],4[123.0|-782.5|444.44|100.5],3605190400

docs/HTML/read.html

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,14 @@
103103
DateTimeAME -- DateTime string printed in American style (MM/DD/YYYY HH:MM:SS.mmm)
104104
DateTimeEUR -- DateTime string printed in European style (YYYY/MM/DD HH:MM:SS.mmm)
105105
DateTimeISO -- DateTime string printed in ISO style (YYYY-MM-DD HH:MM:SS.mmm)
106-
dbl_vector -- A vector of double precision values, The vector is printed as "s[d1|d2|...]"
106+
dbl_vec -- A vector of double precision values, The vector is printed as "s[d1|d2|...]"
107107
where s is the size of the vector and d's are the double values.
108+
str_vec -- A vector of std::string values, The vector is printed as "s[str1|str2|...]"
109+
where s is the size of the vector and str's are the strings.
110+
dbl_set -- A set of double precision values, The set is printed as "s[d1|d2|...]"
111+
where s is the size of the set and d's are the double values.
112+
str_set -- A set of std::string values, The set is printed as "s[str1|str2|...]"
113+
where s is the size of the set and str's are the strings.
108114
str_dbl_map -- A map of string keys to double precision values, The map is printed as "s{k1:v1|k2:v2|...}"
109115
where s is the size of the map and k's and v's are keys and values.
110116
str_dbl_unomap -- An unordered map of string keys to double precision values, The map is printed as "s{k1:v1|k2:v2|...}"

docs/HTML/write.html

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,14 @@
104104
DateTimeAME -- DateTime string printed in American style (MM/DD/YYYY HH:MM:SS.mmm)
105105
DateTimeEUR -- DateTime string printed in European style (YYYY/MM/DD HH:MM:SS.mmm)
106106
DateTimeISO -- DateTime string printed in ISO style (YYYY-MM-DD HH:MM:SS.mmm)
107-
dbl_vector -- A vector of double precision values, The vector is printed as "s[d1|d2|...]"
107+
dbl_vec -- A vector of double precision values, The vector is printed as "s[d1|d2|...]"
108108
where s is the size of the vector and d's are the double values.
109+
str_vec -- A vector of std::string values, The vector is printed as "s[str1|str2|...]"
110+
where s is the size of the vector and str's are the strings.
111+
dbl_set -- A set of double precision values, The set is printed as "s[d1|d2|...]"
112+
where s is the size of the set and d's are the double values.
113+
str_set -- A set of std::string values, The set is printed as "s[str1|str2|...]"
114+
where s is the size of the set and str's are the strings.
109115
str_dbl_map -- A map of string keys to double precision values, The map is printed as "s{k1:v1|k2:v2|...}"
110116
where s is the size of the map and k's and v's are keys and values.
111117
str_dbl_unomap -- An unordered map of string keys to double precision values, The map is printed as "s{k1:v1|k2:v2|...}"

examples/hello_world.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ using StrDataFrame = StdDataFrame<std::string>;
5151
//
5252
using DTDataFrame = StdDataFrame<DateTime>;
5353

54-
// This is just some arbitrary type to show how any type could be in DataFrame
54+
// This is just some arbitrary type to show how any type, including the DataFrame itself, could be in DataFrame
5555
//
5656
struct MyData {
5757
int i { 10 };

include/DataFrame/Internals/DataFrame_read.tcc

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,7 @@ read_csv2_(std::istream &stream,
514514
stream.unget();
515515

516516
// First get the header which is column names, sizes and types
517+
//
517518
if (! header_read) [[unlikely]] {
518519
col_name.clear();
519520
type_str.clear();
@@ -532,6 +533,7 @@ read_csv2_(std::istream &stream,
532533
size_type row_cnt = 0;
533534

534535
// Jump to the starting row
536+
//
535537
while (row_cnt < starting_row && stream.get(c))
536538
if (c == '\r' || c == '\n')
537539
row_cnt += 1;
@@ -592,6 +594,7 @@ read_csv2_(std::istream &stream,
592594
col_name.c_str(),
593595
nrows);
594596
// This includes DateTime, DateTimeAME, DateTimeEUR, DateTimeISO
597+
//
595598
else if (! ::strncmp(type_str.c_str(), "DateTime", 8))
596599
spec_vec.emplace_back(StlVecType<DateTime>(),
597600
type_str.c_str(),
@@ -607,6 +610,21 @@ read_csv2_(std::istream &stream,
607610
type_str.c_str(),
608611
col_name.c_str(),
609612
nrows);
613+
else if (type_str == "str_vec")
614+
spec_vec.emplace_back(StlVecType<StlVecType<std::string>>{ },
615+
type_str.c_str(),
616+
col_name.c_str(),
617+
nrows);
618+
else if (type_str == "dbl_set")
619+
spec_vec.emplace_back(StlVecType<std::set<double>>{ },
620+
type_str.c_str(),
621+
col_name.c_str(),
622+
nrows);
623+
else if (type_str == "str_set")
624+
spec_vec.emplace_back(StlVecType<std::set<std::string>>{ },
625+
type_str.c_str(),
626+
col_name.c_str(),
627+
nrows);
610628
else if (type_str == "str_dbl_map")
611629
spec_vec.emplace_back(
612630
StlVecType<std::map<std::string, double>>{ },
@@ -743,6 +761,39 @@ read_csv2_(std::istream &stream,
743761
value.c_str())));
744762
}
745763
}
764+
else if (col_spec.type_spec == "str_vec") {
765+
if (! value.empty()) {
766+
StlVecType<StlVecType<std::string>> &vec =
767+
std::any_cast<StlVecType<StlVecType<std::string>> &>
768+
(col_spec.col_vec);
769+
770+
vec.push_back(
771+
std::move(_get_str_vec_from_value_<DataFrame<I, H>>(
772+
value.c_str())));
773+
}
774+
}
775+
else if (col_spec.type_spec == "dbl_set") {
776+
using set_t = std::set<double>;
777+
778+
if (! value.empty()) {
779+
StlVecType<set_t> &vec =
780+
std::any_cast<StlVecType<set_t> &>(col_spec.col_vec);
781+
782+
vec.push_back(std::move(_get_dbl_set_from_value_(
783+
value.c_str())));
784+
}
785+
}
786+
else if (col_spec.type_spec == "str_set") {
787+
using set_t = std::set<std::string>;
788+
789+
if (! value.empty()) {
790+
StlVecType<set_t> &vec =
791+
std::any_cast<StlVecType<set_t> &>(col_spec.col_vec);
792+
793+
vec.push_back(std::move(_get_str_set_from_value_(
794+
value.c_str())));
795+
}
796+
}
746797
else if (col_spec.type_spec == "str_dbl_map") {
747798
using map_t = std::map<std::string, double>;
748799

@@ -864,6 +915,31 @@ read_csv2_(std::istream &stream,
864915
std::move(std::any_cast<StlVecType<StlVecType<double>> &>
865916
(col_spec.col_vec)),
866917
nan_policy::dont_pad_with_nans);
918+
else if (col_spec.type_spec == "str_vec")
919+
load_column<StlVecType<std::string>>(
920+
col_spec.col_name.c_str(),
921+
std::move(
922+
std::any_cast<StlVecType<StlVecType<std::string>> &>
923+
(col_spec.col_vec)),
924+
nan_policy::dont_pad_with_nans);
925+
else if (col_spec.type_spec == "dbl_set") {
926+
using set_t = std::set<double>;
927+
928+
load_column<set_t>(
929+
col_spec.col_name.c_str(),
930+
std::move(std::any_cast<StlVecType<set_t> &>
931+
(col_spec.col_vec)),
932+
nan_policy::dont_pad_with_nans);
933+
}
934+
else if (col_spec.type_spec == "str_set") {
935+
using set_t = std::set<std::string>;
936+
937+
load_column<set_t>(
938+
col_spec.col_name.c_str(),
939+
std::move(std::any_cast<StlVecType<set_t> &>
940+
(col_spec.col_vec)),
941+
nan_policy::dont_pad_with_nans);
942+
}
867943
else if (col_spec.type_spec == "str_dbl_map") {
868944
using map_t = std::map<std::string, double>;
869945

include/DataFrame/Internals/DataFrame_standalone.tcc

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,20 @@ static S &operator << (S &stream, const std::vector<T> &data) {
106106

107107
// ----------------------------------------------------------------------------
108108

109+
template<typename S, typename T>
110+
static S &operator << (S &stream, const std::set<T> &data) {
111+
112+
if (! data.empty()) {
113+
stream << data.size() << '[' << *(data.cbegin());
114+
for (auto citer = ++(data.cbegin()); citer != data.cend(); ++citer)
115+
stream << '|' << *citer;
116+
stream << ']';
117+
}
118+
return (stream);
119+
}
120+
121+
// ----------------------------------------------------------------------------
122+
109123
template<typename S, typename T, std::size_t N>
110124
static S &operator << (S &stream, const std::array<T, N> &data) {
111125

@@ -596,6 +610,101 @@ _get_dbl_vec_from_value_(const char *value) {
596610

597611
// ----------------------------------------------------------------------------
598612

613+
template<typename DF>
614+
inline static typename DF::template StlVecType<std::string>
615+
_get_str_vec_from_value_(const char *value) {
616+
617+
using vec_t = typename DF::template StlVecType<std::string>;
618+
619+
std::size_t vcnt { 0 };
620+
char buffer[2048];
621+
622+
while (value[vcnt] != '[') {
623+
buffer[vcnt] = value[vcnt];
624+
vcnt += 1;
625+
}
626+
buffer[vcnt] = '\0';
627+
628+
vec_t data;
629+
std::size_t bcnt;
630+
631+
data.reserve(std::strtol(buffer, nullptr, 10));
632+
vcnt += 1; // skip [
633+
while (value[vcnt] && value[vcnt] != ']') {
634+
bcnt = 0;
635+
while (value[vcnt] != '|' && value[vcnt] != ']')
636+
buffer[bcnt++] = value[vcnt++];
637+
buffer[bcnt] = '\0';
638+
data.push_back(buffer);
639+
vcnt += 1; // skip separator
640+
}
641+
return (data);
642+
}
643+
644+
// ----------------------------------------------------------------------------
645+
646+
inline static std::set<double>
647+
_get_dbl_set_from_value_(const char *value) {
648+
649+
using set_t = typename std::set<double>;
650+
651+
std::size_t vcnt = 0;
652+
char buffer[128];
653+
654+
while (value[vcnt] != '[') {
655+
buffer[vcnt] = value[vcnt];
656+
vcnt += 1;
657+
}
658+
buffer[vcnt] = '\0'; // That is the count which is useless for sets
659+
660+
set_t data;
661+
std::size_t bcnt;
662+
663+
vcnt += 1; // skip [
664+
while (value[vcnt] && value[vcnt] != ']') {
665+
bcnt = 0;
666+
while (value[vcnt] != '|' && value[vcnt] != ']')
667+
buffer[bcnt++] = value[vcnt++];
668+
buffer[bcnt] = '\0';
669+
data.insert(std::strtod(buffer, nullptr));
670+
vcnt += 1; // skip separator
671+
}
672+
return (data);
673+
}
674+
675+
// ----------------------------------------------------------------------------
676+
677+
inline static std::set<std::string>
678+
_get_str_set_from_value_(const char *value) {
679+
680+
using set_t = typename std::set<std::string>;
681+
682+
std::size_t vcnt = 0;
683+
char buffer[2048];
684+
685+
while (value[vcnt] != '[') {
686+
buffer[vcnt] = value[vcnt];
687+
vcnt += 1;
688+
}
689+
buffer[vcnt] = '\0'; // That is the count which is useless for sets
690+
691+
set_t data;
692+
std::size_t bcnt;
693+
694+
vcnt += 1; // skip [
695+
while (value[vcnt] && value[vcnt] != ']') {
696+
bcnt = 0;
697+
while (value[vcnt] != '|' && value[vcnt] != ']')
698+
buffer[bcnt++] = value[vcnt++];
699+
buffer[bcnt] = '\0';
700+
data.insert(buffer);
701+
vcnt += 1; // skip separator
702+
}
703+
return (data);
704+
}
705+
706+
// ----------------------------------------------------------------------------
707+
599708
template<typename MAP>
600709
inline static MAP
601710
_get_str_dbl_map_from_value_(const char *value) {

test/dataframe_tester_3.cc

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2199,6 +2199,9 @@ static void test_read_csv_with_maps() {
21992199
using DT_DataFrame = StdDataFrame<DateTime>;
22002200
using map_t = std::map<std::string, double>;
22012201
using unomap_t = std::unordered_map<std::string, double>;
2202+
using str_vec_t = std::vector<std::string>;
2203+
using str_set_t = std::set<std::string>;
2204+
using dbl_set_t = std::set<double>;
22022205

22032206
DT_DataFrame df;
22042207

@@ -2221,6 +2224,23 @@ static void test_read_csv_with_maps() {
22212224
assert((std::fabs(
22222225
df.get_column<unomap_t>
22232226
("Unordered Map")[0]["Key one 2"] - -782.5) < 0.001));
2227+
2228+
assert((df.get_column<str_vec_t>("Str Vec").size() == 4));
2229+
assert((df.get_column<str_vec_t>("Str Vec")[1].size() == 4));
2230+
assert((df.get_column<str_vec_t>("Str Vec")[3].size() == 3));
2231+
assert((df.get_column<str_vec_t>("Str Vec")[2][2] == "345"));
2232+
2233+
assert((df.get_column<dbl_set_t>("Double Set").size() == 4));
2234+
assert((df.get_column<dbl_set_t>("Double Set")[1].size() == 3));
2235+
assert((df.get_column<dbl_set_t>("Double Set")[3].size() == 4));
2236+
assert((*(df.get_column<dbl_set_t>("Double Set")[2].find(444.44)) ==
2237+
444.44));
2238+
2239+
assert((df.get_column<str_set_t>("Str Set").size() == 4));
2240+
assert((df.get_column<str_set_t>("Str Set")[1].size() == 3));
2241+
assert((df.get_column<str_set_t>("Str Set")[3].size() == 4));
2242+
assert((*(df.get_column<str_set_t>("Str Set")[0].find("123.0")) ==
2243+
"123.0"));
22242244
}
22252245
catch (const DataFrameError &ex) {
22262246
std::cout << ex.what() << std::endl;

0 commit comments

Comments
 (0)