Skip to content

Commit 6662d1d

Browse files
ArrayRecord Teamcopybara-github
authored andcommitted
Fix dependency issues and add reading/writing with gs:// URIs
PiperOrigin-RevId: 831144085
1 parent d73af39 commit 6662d1d

File tree

4 files changed

+30
-11
lines changed

4 files changed

+30
-11
lines changed

MODULE.bazel

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,15 @@
1313
# limitations under the License.
1414

1515
# TODO(fchern): automate version string alignment with setup.py
16-
VERSION = "0.8.2"
16+
VERSION = "0.8.3"
1717

1818
module(
1919
name = "array_record",
2020
version = VERSION,
2121
repo_name = "com_google_array_record",
2222
)
2323

24-
bazel_dep(name = "rules_proto", version = "7.0.2")
24+
bazel_dep(name = "rules_proto", version = "7.1.0")
2525
bazel_dep(name = "rules_python", version = "1.6.0")
2626
bazel_dep(name = "platforms", version = "0.0.11")
2727

@@ -34,10 +34,10 @@ single_version_override(
3434
)
3535

3636
bazel_dep(name = "googletest", version = "1.15.2")
37-
bazel_dep(name = "abseil-cpp", version = "20240722.0")
37+
bazel_dep(name = "abseil-cpp", version = "20250127.1")
3838
bazel_dep(name = "abseil-py", version = "2.1.0")
3939
bazel_dep(name = "eigen", version = "3.4.0.bcr.3")
40-
bazel_dep(name = "riegeli", version = "0.0.0-20241218-3385e3c")
40+
bazel_dep(name = "riegeli", version = "0.0.0-20250717-5b2e77e")
4141
bazel_dep(name = "pybind11_bazel", version = "2.12.0")
4242

4343
SUPPORTED_PYTHON_VERSIONS = [

python/BUILD

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ pybind_extension(
2020
"@riegeli//riegeli/base:initializer",
2121
"@riegeli//riegeli/bytes:fd_reader",
2222
"@riegeli//riegeli/bytes:fd_writer",
23+
"@riegeli//riegeli/gcs:gcs_object",
24+
"@riegeli//riegeli/gcs:gcs_reader",
2325
],
2426
)
2527

python/array_record_module.cc

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ limitations under the License.
2222
#include <vector>
2323

2424
#include "absl/status/status.h"
25+
#include "absl/strings/match.h"
2526
#include "absl/strings/str_format.h"
2627
#include "absl/strings/string_view.h"
2728
#include "cpp/array_record_reader.h"
@@ -34,6 +35,8 @@ limitations under the License.
3435
#include "riegeli/base/maker.h"
3536
#include "riegeli/bytes/fd_reader.h"
3637
#include "riegeli/bytes/fd_writer.h"
38+
#include "riegeli/gcs/gcs_object.h"
39+
#include "riegeli/gcs/gcs_reader.h"
3740

3841
namespace py = pybind11;
3942

@@ -50,10 +53,13 @@ PYBIND11_MODULE(array_record_module, m) {
5053
throw py::value_error(
5154
std::string(status_or_option.status().message()));
5255
}
56+
riegeli::FdWriterBase::Options file_writer_options;
57+
file_writer_options.set_buffer_size(size_t{16} << 20);
5358
// Release the GIL because IO is time consuming.
5459
py::gil_scoped_release scoped_release;
5560
return new array_record::ArrayRecordWriter(
56-
riegeli::Maker<riegeli::FdWriter>(path),
61+
riegeli::Maker<riegeli::FdWriter>(
62+
path, std::move(file_writer_options)),
5763
status_or_option.value());
5864
}),
5965
py::arg("path"), py::arg("options") = "")
@@ -84,18 +90,29 @@ PYBIND11_MODULE(array_record_module, m) {
8490
std::string(status_or_option.status().message()));
8591
}
8692
riegeli::FdReaderBase::Options file_reader_options;
93+
riegeli::GcsReader::Options gcs_reader_options;
8794
if (kwargs.contains("file_reader_buffer_size")) {
8895
auto file_reader_buffer_size =
8996
kwargs["file_reader_buffer_size"].cast<int64_t>();
9097
file_reader_options.set_buffer_size(file_reader_buffer_size);
98+
gcs_reader_options.set_buffer_size(file_reader_buffer_size);
9199
}
92100
// Release the GIL because IO is time consuming.
93101
py::gil_scoped_release scoped_release;
94-
return new array_record::ArrayRecordReader(
95-
riegeli::Maker<riegeli::FdReader>(
96-
path, std::move(file_reader_options)),
97-
status_or_option.value(),
98-
array_record::ArrayRecordGlobalPool());
102+
if (absl::StartsWith(path, "gs://")) {
103+
return new array_record::ArrayRecordReader(
104+
riegeli::Maker<riegeli::GcsReader>(
105+
google::cloud::storage::Client(),
106+
riegeli::GcsObject(path), std::move(gcs_reader_options)),
107+
status_or_option.value(),
108+
array_record::ArrayRecordGlobalPool());
109+
} else {
110+
return new array_record::ArrayRecordReader(
111+
riegeli::Maker<riegeli::FdReader>(
112+
path, std::move(file_reader_options)),
113+
status_or_option.value(),
114+
array_record::ArrayRecordGlobalPool());
115+
}
99116
}),
100117
py::arg("path"), py::arg("options") = "", R"(
101118
ArrayRecordReader for fast sequential or random access.

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def has_ext_modules(self):
3131

3232
setup(
3333
name='array_record',
34-
version='0.8.2',
34+
version='0.8.3',
3535
description='A file format that achieves a new frontier of IO efficiency',
3636
author='ArrayRecord team',
3737
author_email='[email protected]',

0 commit comments

Comments
 (0)