Skip to content

Commit 8901ff3

Browse files
committed
upload-gzstd.js refinements
1 parent 602216c commit 8901ff3

File tree

4 files changed

+120
-23
lines changed

4 files changed

+120
-23
lines changed

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,6 @@ current_commit_noagpl
44
*.term
55
*.sage-chat
66
node_modules/
7-
dist/
7+
dist/
8+
src/cloud-filesystem/scripts/package-lock.json
9+
src/cloud-filesystem/scripts/package.json

Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,11 @@ JFS_VERSION=1.2.0-rc1
141141
GCSFUSE_VERSION=2.2.0
142142
# See https://github.com/Snapchat/KeyDB/releases
143143
KEYDB_VERSION=6.3.4
144+
145+
# NOTE: we delete 'scripts/node_modules' since we don't want that artifact in our
146+
# container, as it is platform dependent -- it gets installed at runtime
144147
cloud-filesystem:
145-
cd src/cloud-filesystem && docker build --build-arg ARCH=$(ARCH) --build-arg ARCH1=$(ARCH1) --build-arg ARCH=$(ARCH) --build-arg JFS_VERSION=$(JFS_VERSION) --build-arg KEYDB_VERSION=$(KEYDB_VERSION) --build-arg GCSFUSE_VERSION=$(GCSFUSE_VERSION) --build-arg BASE_TAG=$(BASE_TAG) -t $(DOCKER_USER)/cloud-filesystem$(ARCH):$(CLOUD_FILESYSTEM_TAG) .
148+
cd src/cloud-filesystem && rm -rf scripts/node_modules && docker build --build-arg ARCH=$(ARCH) --build-arg ARCH1=$(ARCH1) --build-arg ARCH=$(ARCH) --build-arg JFS_VERSION=$(JFS_VERSION) --build-arg KEYDB_VERSION=$(KEYDB_VERSION) --build-arg GCSFUSE_VERSION=$(GCSFUSE_VERSION) --build-arg BASE_TAG=$(BASE_TAG) -t $(DOCKER_USER)/cloud-filesystem$(ARCH):$(CLOUD_FILESYSTEM_TAG) .
146149
run-cloud-filesystem:
147150
docker run --name run-cloud-filesystem -it --rm $(DOCKER_USER)/cloud-filesystem$(ARCH):$(CLOUD_FILESYSTEM_TAG) bash
148151
push-cloud-filesystem:

src/cloud-filesystem/scripts/cloud_filesystem.py

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -305,27 +305,6 @@ def start_keydb(filesystem, network):
305305
run(f"keydb-server {keydb_config_file}")
306306

307307

308-
# wait_until_keydb_replication_is_stable(filesystem['port'])
309-
310-
# def get_keydb_replication_info(port):
311-
# s = subprocess.run(['keydb-cli', '-p',
312-
# str(port), "INFO", "replication"],
313-
# capture_output=True)
314-
# if s.returncode:
315-
# raise RuntimeError(s.stderr)
316-
# v = [x for x in str(s.stdout.decode()).splitlines() if ':' in x]
317-
# return dict([x.split(':') for x in v])
318-
319-
# def wait_until_keydb_replication_is_stable(port):
320-
# while True:
321-
# info = get_keydb_replication_info(port)
322-
# if info.get('role', '') == 'master':
323-
# # no other peers, so nothing to wait for
324-
# return
325-
# if info.get("master_link_status",'') == 'up' and info.get("master_sync_in_progress","") == '0'
326-
# time.sleep(1)
327-
328-
329308
def juicefs_paths(filesystem):
330309
id = filesystem['id']
331310
return {
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
#!/usr/bin/env node
2+
/*
3+
This nodejs script takes as command line parameter:
4+
- a file
5+
- a bucket name, e.g., 'mybucket'
6+
7+
It reads the file into memory, compresses it (in memory), then uploades the compressed
8+
file to the given Google cloud storage bucket with the STANDARD storage class
9+
using the @google-cloud/storage npm package. Critically, it uses the standard storage
10+
class irregardless of what the default storage class is for the bucket.
11+
12+
This is very important for writing redis dump files periodically to buckets that
13+
might have a very non-STANDARD default storage class. The compression saves a lot
14+
of space, and since we write the dump file once per minute we save a huge amount
15+
of money on early deletion fees.
16+
*/
17+
18+
const fs = require("fs/promises");
19+
const util = require("util");
20+
const path = require("path");
21+
const exec = util.promisify(require("child_process").exec);
22+
const zlib = require("node:zlib");
23+
const gzip = util.promisify(zlib.gzip);
24+
25+
async function ensureGoogleCloudInstalled(pkg) {
26+
try {
27+
require(pkg);
28+
} catch (_) {
29+
console.log(`Installing ${pkg}...`);
30+
await exec(`npm install ${pkg}`);
31+
console.log(`${pkg} installed successfully!`);
32+
}
33+
}
34+
35+
async function uploadFileToGCS(filename, destFilename, bucketName) {
36+
await ensureGoogleCloudInstalled("@google-cloud/storage");
37+
const { Storage } = require("@google-cloud/storage");
38+
const storage = new Storage();
39+
40+
try {
41+
const buffer = await fs.readFile(filename);
42+
// level 1 since any compression at all is great, but beyond 1 it hardly makes a difference, due
43+
// to redis dumps already being locally compressed. Plus this is much faster, hence less wasted cpu.
44+
const bufferGz = await gzip(buffer, { level: 1 });
45+
46+
const bucket = storage.bucket(bucketName);
47+
const file = bucket.file(destFilename);
48+
49+
// Create a writable stream for the Google Cloud Storage file,
50+
// which creates the new object with STANDARD storage class,
51+
// which is key!
52+
const stream = file.createWriteStream({
53+
metadata: {
54+
contentType: "application/gzip",
55+
storageClass: "STANDARD",
56+
},
57+
resumable: false,
58+
});
59+
60+
// Stream events handlers
61+
await new Promise((resolve, reject) => {
62+
stream.on("error", reject);
63+
stream.on("finish", resolve);
64+
stream.end(bufferGz);
65+
});
66+
67+
console.log(
68+
`Uploaded ${destFilename} to gs://${bucketName}/${destFilename}`,
69+
);
70+
} catch (err) {
71+
console.error("Failed to upload file:", err);
72+
}
73+
}
74+
75+
function main() {
76+
if (process.argv.length !== 4) {
77+
console.error(
78+
`Gzips and uploads a file to Google Cloud Storage with the STANDARD
79+
storage class set, irregardless of bucket defaults.
80+
Assumes GOOGLE_APPLICATION_CREDENTIALS points to a service account file.`,
81+
);
82+
console.error(
83+
"Usage: node upload-gzstd.js <path/to/source> <gs://bucketName/path/to/target.gz>",
84+
);
85+
process.exit(1);
86+
}
87+
const filename = process.argv[2];
88+
let dest = process.argv[3];
89+
if (!dest.startsWith("gs://")) {
90+
console.error("target must start with gs://");
91+
}
92+
dest = dest.slice("gs://".length);
93+
i = dest.indexOf("/");
94+
let destFilename, bucketName;
95+
console.log({ i, dest });
96+
if (i == -1) {
97+
bucketName = dest;
98+
destFilename = "";
99+
} else {
100+
bucketName = dest.slice(0, i);
101+
destFilename = dest.slice(i + 1).trim();
102+
}
103+
if (!destFilename) {
104+
destFilename = path.basename(filename);
105+
}
106+
if (!destFilename.endsWith(".gz")) {
107+
destFilename += ".gz";
108+
}
109+
console.log({ filename, destFilename, bucketName });
110+
uploadFileToGCS(filename, destFilename, bucketName);
111+
}
112+
113+
main();

0 commit comments

Comments
 (0)