From d66e65bff1b4be88a78edd424376a01a393c71e6 Mon Sep 17 00:00:00 2001 From: Daniel Lipovetsky <3445370+dlipovetsky@users.noreply.github.com> Date: Thu, 27 Jul 2023 13:04:29 -0700 Subject: [PATCH] feat: Compress data backup using zstd (#264) The backup turns out to be highly compressible! I chose zstd. Although gzip is standard, zstd is faster, and compresses better. Example: 4.6M sloop-cluster-0.bak.zst 9.5M sloop-cluster-0.bak.gz 155M sloop-cluster-0.bak --- go.mod | 1 + go.sum | 2 ++ pkg/sloop/webserver/webserver.go | 21 ++++++++++++++++++--- pkg/sloop/webserver/webserver_test.go | 18 ++++++++++++++++++ 4 files changed, 39 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 16d7c40d..ad0e4e2c 100644 --- a/go.mod +++ b/go.mod @@ -10,6 +10,7 @@ require ( github.com/golang/glog v1.0.0 github.com/golang/protobuf v1.5.2 github.com/jteeuwen/go-bindata v3.0.7+incompatible + github.com/klauspost/compress v1.16.5 github.com/nsf/jsondiff v0.0.0-20190712045011-8443391ee9b6 github.com/pkg/errors v0.9.1 github.com/prometheus/client_golang v1.14.0 diff --git a/go.sum b/go.sum index 9405a870..93fa96e7 100644 --- a/go.sum +++ b/go.sum @@ -205,6 +205,8 @@ github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7V github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.16.5 h1:IFV2oUNUzZaz+XyusxpLzpzS8Pt5rh0Z16For/djlyI= +github.com/klauspost/compress v1.16.5/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= diff --git a/pkg/sloop/webserver/webserver.go b/pkg/sloop/webserver/webserver.go index 8f123579..c39d8e6f 100644 --- a/pkg/sloop/webserver/webserver.go +++ b/pkg/sloop/webserver/webserver.go @@ -23,6 +23,7 @@ import ( "syscall" "time" + "github.com/klauspost/compress/zstd" "github.com/salesforce/sloop/pkg/sloop/common" "github.com/spf13/afero" @@ -116,11 +117,25 @@ func backupHandler(db badgerwrap.DB, currentContext string) http.HandlerFunc { return } - w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=sloop-%s-%d.bak", currentContext, since)) - w.Header().Set("Content-Type", "application/octet-stream") + w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=sloop-%s-%d.bak.zst", currentContext, since)) + // The 'Content-Length' header is not set, because we do not know the size of the backup before we write it to the body. + w.Header().Set("Content-Encoding", "zstd") + w.Header().Set("Content-Type", "application/zstd") w.Header().Set("Transfer-Encoding", "chunked") - _, err = db.Backup(w, since) + zw, err := zstd.NewWriter(w) + if err != nil { + logWebError(err, "Error configuring compression", r, w) + return + } + + _, err = db.Backup(zw, since) + if err != nil { + logWebError(err, "Error writing backup", r, w) + return + } + + err = zw.Close() if err != nil { logWebError(err, "Error writing backup", r, w) return diff --git a/pkg/sloop/webserver/webserver_test.go b/pkg/sloop/webserver/webserver_test.go index f14c5875..fa9305ae 100644 --- a/pkg/sloop/webserver/webserver_test.go +++ b/pkg/sloop/webserver/webserver_test.go @@ -5,6 +5,8 @@ import ( "net/http/httptest" "testing" + badger "github.com/dgraph-io/badger/v2" + "github.com/salesforce/sloop/pkg/sloop/store/untyped/badgerwrap" "github.com/stretchr/testify/assert" ) @@ -55,3 +57,19 @@ func TestWebFileHandler(t *testing.T) { assert.Equal(t, http.StatusOK, rr.Code) assert.NotNil(t, rr.Body.String()) } + +func TestBackupHandler(t *testing.T) { + req, err := http.NewRequest("GET", "/clusterContext/data/backup", nil) + assert.Nil(t, err) + + db, err := (&badgerwrap.MockFactory{}).Open(badger.DefaultOptions("")) + assert.Nil(t, err) + + // Create a ResponseRecorder (which satisfies http.ResponseWriter) to record the response. + rr := httptest.NewRecorder() + handler := http.HandlerFunc(backupHandler(db, "clusterContext")) + handler.ServeHTTP(rr, req) + + assert.Equal(t, http.StatusOK, rr.Code) + assert.NotNil(t, rr.Body.String()) +}