@@ -22,6 +22,7 @@ limitations under the License.
2222#include < vector>
2323
2424#include " absl/status/status.h"
25+ #include " absl/strings/match.h"
2526#include " absl/strings/str_format.h"
2627#include " absl/strings/string_view.h"
2728#include " cpp/array_record_reader.h"
@@ -34,6 +35,8 @@ limitations under the License.
3435#include " riegeli/base/maker.h"
3536#include " riegeli/bytes/fd_reader.h"
3637#include " riegeli/bytes/fd_writer.h"
38+ #include " riegeli/gcs/gcs_object.h"
39+ #include " riegeli/gcs/gcs_reader.h"
3740
3841namespace py = pybind11;
3942
@@ -50,10 +53,13 @@ PYBIND11_MODULE(array_record_module, m) {
5053 throw py::value_error (
5154 std::string (status_or_option.status ().message ()));
5255 }
56+ riegeli::FdWriterBase::Options file_writer_options;
57+ file_writer_options.set_buffer_size (size_t {16 } << 20 );
5358 // Release the GIL because IO is time consuming.
5459 py::gil_scoped_release scoped_release;
5560 return new array_record::ArrayRecordWriter (
56- riegeli::Maker<riegeli::FdWriter>(path),
61+ riegeli::Maker<riegeli::FdWriter>(
62+ path, std::move (file_writer_options)),
5763 status_or_option.value ());
5864 }),
5965 py::arg (" path" ), py::arg (" options" ) = " " )
@@ -84,18 +90,29 @@ PYBIND11_MODULE(array_record_module, m) {
8490 std::string (status_or_option.status ().message ()));
8591 }
8692 riegeli::FdReaderBase::Options file_reader_options;
93+ riegeli::GcsReader::Options gcs_reader_options;
8794 if (kwargs.contains (" file_reader_buffer_size" )) {
8895 auto file_reader_buffer_size =
8996 kwargs[" file_reader_buffer_size" ].cast <int64_t >();
9097 file_reader_options.set_buffer_size (file_reader_buffer_size);
98+ gcs_reader_options.set_buffer_size (file_reader_buffer_size);
9199 }
92100 // Release the GIL because IO is time consuming.
93101 py::gil_scoped_release scoped_release;
94- return new array_record::ArrayRecordReader (
95- riegeli::Maker<riegeli::FdReader>(
96- path, std::move (file_reader_options)),
97- status_or_option.value (),
98- array_record::ArrayRecordGlobalPool ());
102+ if (absl::StartsWith (path, " gs://" )) {
103+ return new array_record::ArrayRecordReader (
104+ riegeli::Maker<riegeli::GcsReader>(
105+ google::cloud::storage::Client (),
106+ riegeli::GcsObject (path), std::move (gcs_reader_options)),
107+ status_or_option.value (),
108+ array_record::ArrayRecordGlobalPool ());
109+ } else {
110+ return new array_record::ArrayRecordReader (
111+ riegeli::Maker<riegeli::FdReader>(
112+ path, std::move (file_reader_options)),
113+ status_or_option.value (),
114+ array_record::ArrayRecordGlobalPool ());
115+ }
99116 }),
100117 py::arg (" path" ), py::arg (" options" ) = " " , R"(
101118 ArrayRecordReader for fast sequential or random access.
0 commit comments