diff --git a/admin/commands/summary.go b/admin/commands/summary.go index 68cc2850a9a..03232015aef 100644 --- a/admin/commands/summary.go +++ b/admin/commands/summary.go @@ -152,9 +152,9 @@ func addClientData(ctx context.Context, zipW *zip.Writer) { } // addServerData adds logs.zip from PMM Server to zip file. -func addServerData(ctx context.Context, zipW *zip.Writer) { +func addServerData(ctx context.Context, zipW *zip.Writer, usePprof bool) { var buf bytes.Buffer - _, err := client.Default.Server.Logs(&server.LogsParams{Context: ctx}, &buf) + _, err := client.Default.Server.Logs(&server.LogsParams{Context: ctx, Pprof: &usePprof}, &buf) if err != nil { logrus.Errorf("%s", err) return @@ -271,7 +271,6 @@ func addPprofData(ctx context.Context, zipW *zip.Writer, skipServer bool) { isRunOnPmmServer, _ := helpers.IsOnPmmServer() if !skipServer && isRunOnPmmServer { - sources["server/pprof/pmm-managed"] = fmt.Sprintf("http://%s:7773/debug/pprof", agentlocal.Localhost) sources["server/pprof/qan-api2"] = fmt.Sprintf("http://%s:9933/debug/pprof", agentlocal.Localhost) } @@ -339,7 +338,7 @@ func (cmd *summaryCommand) makeArchive(ctx context.Context) (err error) { } if !cmd.SkipServer { - addServerData(ctx, zipW) + addServerData(ctx, zipW, cmd.Pprof) } return //nolint:nakedret diff --git a/api/serverpb/json/client/server/logs_parameters.go b/api/serverpb/json/client/server/logs_parameters.go index 69dd8a9d4d5..96d552d3d63 100644 --- a/api/serverpb/json/client/server/logs_parameters.go +++ b/api/serverpb/json/client/server/logs_parameters.go @@ -14,6 +14,7 @@ import ( "github.com/go-openapi/runtime" cr "github.com/go-openapi/runtime/client" "github.com/go-openapi/strfmt" + "github.com/go-openapi/swag" ) // NewLogsParams creates a new LogsParams object, @@ -58,6 +59,12 @@ func NewLogsParamsWithHTTPClient(client *http.Client) *LogsParams { Typically these are written to a http.Request. */ type LogsParams struct { + /* Pprof. + + Include performance profiling data, + */ + Pprof *bool + timeout time.Duration Context context.Context HTTPClient *http.Client @@ -111,6 +118,17 @@ func (o *LogsParams) SetHTTPClient(client *http.Client) { o.HTTPClient = client } +// WithPprof adds the pprof to the logs params +func (o *LogsParams) WithPprof(pprof *bool) *LogsParams { + o.SetPprof(pprof) + return o +} + +// SetPprof adds the pprof to the logs params +func (o *LogsParams) SetPprof(pprof *bool) { + o.Pprof = pprof +} + // WriteToRequest writes these params to a swagger request func (o *LogsParams) WriteToRequest(r runtime.ClientRequest, reg strfmt.Registry) error { if err := r.SetTimeout(o.timeout); err != nil { @@ -118,6 +136,22 @@ func (o *LogsParams) WriteToRequest(r runtime.ClientRequest, reg strfmt.Registry } var res []error + if o.Pprof != nil { + + // query param pprof + var qrPprof bool + + if o.Pprof != nil { + qrPprof = *o.Pprof + } + qPprof := swag.FormatBool(qrPprof) + if qPprof != "" { + if err := r.SetQueryParam("pprof", qPprof); err != nil { + return err + } + } + } + if len(res) > 0 { return errors.CompositeValidationError(res...) } diff --git a/api/serverpb/json/header.json b/api/serverpb/json/header.json index ad56799b643..3dbc7c393af 100644 --- a/api/serverpb/json/header.json +++ b/api/serverpb/json/header.json @@ -17,6 +17,14 @@ "description": "Returns the PMM Server logs.", "summary": "Logs", "operationId": "Logs", + "parameters": [ + { + "type": "boolean", + "description": "Include performance profiling data,", + "name": "pprof", + "in": "query" + } + ], "produces": ["application/zip"], "responses": { "200": { diff --git a/api/serverpb/json/serverpb.json b/api/serverpb/json/serverpb.json index 9812e91477a..28c3b92a85f 100644 --- a/api/serverpb/json/serverpb.json +++ b/api/serverpb/json/serverpb.json @@ -26,6 +26,14 @@ ], "summary": "Logs", "operationId": "Logs", + "parameters": [ + { + "type": "boolean", + "description": "Include performance profiling data,", + "name": "pprof", + "in": "query" + } + ], "responses": { "200": { "description": "A successful response.", diff --git a/api/swagger/swagger-dev.json b/api/swagger/swagger-dev.json index aedf6c15057..40afc9b3b91 100644 --- a/api/swagger/swagger-dev.json +++ b/api/swagger/swagger-dev.json @@ -27,6 +27,14 @@ ], "summary": "Logs", "operationId": "Logs", + "parameters": [ + { + "type": "boolean", + "description": "Include performance profiling data,", + "name": "pprof", + "in": "query" + } + ], "responses": { "200": { "description": "A successful response.", diff --git a/api/swagger/swagger.json b/api/swagger/swagger.json index 087b1d426d3..afbe933f255 100644 --- a/api/swagger/swagger.json +++ b/api/swagger/swagger.json @@ -26,6 +26,14 @@ ], "summary": "Logs", "operationId": "Logs", + "parameters": [ + { + "type": "boolean", + "description": "Include performance profiling data,", + "name": "pprof", + "in": "query" + } + ], "responses": { "200": { "description": "A successful response.", diff --git a/managed/cmd/pmm-managed/main.go b/managed/cmd/pmm-managed/main.go index 256657dd6ed..0fa73f76a0d 100644 --- a/managed/cmd/pmm-managed/main.go +++ b/managed/cmd/pmm-managed/main.go @@ -108,14 +108,32 @@ const ( cleanInterval = 10 * time.Minute cleanOlderThan = 30 * time.Minute + + defaultContextTimeout = 10 * time.Second + pProfProfileDuration = 30 * time.Second + pProfTraceDuration = 10 * time.Second ) func addLogsHandler(mux *http.ServeMux, logs *supervisord.Logs) { l := logrus.WithField("component", "logs.zip") mux.HandleFunc("/logs.zip", func(rw http.ResponseWriter, req *http.Request) { + contextTimeout := defaultContextTimeout + // increase context timeout if pprof query parameter exist in request + pprofQueryParameter, err := strconv.ParseBool(req.FormValue("pprof")) + if err != nil { + l.Debug("Unable to read 'pprof' query param. Using default: pprof=false") + } + var pprofConfig *supervisord.PprofConfig + if pprofQueryParameter { + contextTimeout += pProfProfileDuration + pProfTraceDuration + pprofConfig = &supervisord.PprofConfig{ + ProfileDuration: pProfProfileDuration, + TraceDuration: pProfTraceDuration, + } + } // fail-safe - ctx, cancel := context.WithTimeout(req.Context(), 10*time.Second) + ctx, cancel := context.WithTimeout(req.Context(), contextTimeout) defer cancel() filename := fmt.Sprintf("pmm-server_%s.zip", time.Now().UTC().Format("2006-01-02_15-04")) @@ -124,7 +142,7 @@ func addLogsHandler(mux *http.ServeMux, logs *supervisord.Logs) { rw.Header().Set(`Content-Disposition`, `attachment; filename="`+filename+`"`) ctx = logger.Set(ctx, "logs") - if err := logs.Zip(ctx, rw); err != nil { + if err := logs.Zip(ctx, rw, pprofConfig); err != nil { l.Errorf("%+v", err) } }) diff --git a/managed/services/supervisord/logs.go b/managed/services/supervisord/logs.go index c3b4488d11a..ef55cac9561 100644 --- a/managed/services/supervisord/logs.go +++ b/managed/services/supervisord/logs.go @@ -30,12 +30,14 @@ import ( "os/exec" "path/filepath" "sort" + "sync" "time" "github.com/pkg/errors" "golang.org/x/sys/unix" "github.com/percona/pmm/managed/utils/logger" + pprofUtils "github.com/percona/pmm/managed/utils/pprof" "github.com/percona/pmm/utils/pdeathsig" ) @@ -68,7 +70,7 @@ func NewLogs(pmmVersion string, pmmUpdateChecker *PMMUpdateChecker) *Logs { } // Zip creates .zip archive with all logs. -func (l *Logs) Zip(ctx context.Context, w io.Writer) error { +func (l *Logs) Zip(ctx context.Context, w io.Writer, pprofConfig *PprofConfig) error { start := time.Now() log := logger.Get(ctx).WithField("component", "logs") log.WithField("d", time.Since(start).Seconds()).Info("Starting...") @@ -79,7 +81,7 @@ func (l *Logs) Zip(ctx context.Context, w io.Writer) error { zw := zip.NewWriter(w) now := time.Now().UTC() - files := l.files(ctx) + files := l.files(ctx, pprofConfig) log.WithField("d", time.Since(start).Seconds()).Infof("Collected %d files.", len(files)) for _, file := range files { @@ -126,8 +128,8 @@ func (l *Logs) Zip(ctx context.Context, w io.Writer) error { return nil } -// files reads log/config files and returns content. -func (l *Logs) files(ctx context.Context) []fileContent { +// files reads log/config/pprof files and returns content. +func (l *Logs) files(ctx context.Context, pprofConfig *PprofConfig) []fileContent { files := make([]fileContent, 0, 20) // add logs @@ -213,6 +215,45 @@ func (l *Logs) files(ctx context.Context) []fileContent { Err: err, }) + // add pprof + if pprofConfig != nil { + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + traceBytes, err := pprofUtils.Trace(ctx, pprofConfig.TraceDuration) + files = append(files, fileContent{ + Name: "pprof/trace.out", + Data: traceBytes, + Err: err, + }) + }() + + wg.Add(1) + go func() { + defer wg.Done() + profileBytes, err := pprofUtils.Profile(ctx, pprofConfig.ProfileDuration) + files = append(files, fileContent{ + Name: "pprof/profile.pb.gz", + Data: profileBytes, + Err: err, + }) + }() + + wg.Add(1) + go func() { + defer wg.Done() + heapBytes, err := pprofUtils.Heap(true) + files = append(files, fileContent{ + Name: "pprof/heap.pb.gz", + Data: heapBytes, + Err: err, + }) + }() + + wg.Wait() + } + sort.Slice(files, func(i, j int) bool { return files[i].Name < files[j].Name }) return files } diff --git a/managed/services/supervisord/logs_test.go b/managed/services/supervisord/logs_test.go index 2d0d8659491..0fa00c000cb 100644 --- a/managed/services/supervisord/logs_test.go +++ b/managed/services/supervisord/logs_test.go @@ -123,7 +123,7 @@ func TestFiles(t *testing.T) { l := NewLogs("2.4.5", checker) ctx := logger.Set(context.Background(), t.Name()) - files := l.files(ctx) + files := l.files(ctx, nil) actual := make([]string, 0, len(files)) for _, f := range files { // present only after update @@ -156,7 +156,7 @@ func TestZip(t *testing.T) { ctx := logger.Set(context.Background(), t.Name()) var buf bytes.Buffer - require.NoError(t, l.Zip(ctx, &buf)) + require.NoError(t, l.Zip(ctx, &buf, nil)) reader := bytes.NewReader(buf.Bytes()) r, err := zip.NewReader(reader, reader.Size()) require.NoError(t, err) diff --git a/managed/services/supervisord/pprof_config.go b/managed/services/supervisord/pprof_config.go new file mode 100644 index 00000000000..ce41cfc64f8 --- /dev/null +++ b/managed/services/supervisord/pprof_config.go @@ -0,0 +1,25 @@ +// pmm-managed +// Copyright (C) 2017 Percona LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package supervisord + +import "time" + +// PprofConfig pprof settings. +type PprofConfig struct { + ProfileDuration time.Duration + TraceDuration time.Duration +} diff --git a/managed/utils/pprof/pprof.go b/managed/utils/pprof/pprof.go new file mode 100644 index 00000000000..c4b9f58775b --- /dev/null +++ b/managed/utils/pprof/pprof.go @@ -0,0 +1,88 @@ +// pmm-managed +// Copyright (C) 2017 Percona LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package pprof + +import ( + "bytes" + "context" + "runtime" + "runtime/pprof" + "runtime/trace" + "time" + + "github.com/pkg/errors" +) + +// Profile responds with the pprof-formatted cpu profile. +// Profiling lasts for duration specified in seconds. +func Profile(ctx context.Context, duration time.Duration) ([]byte, error) { + var profileBuf bytes.Buffer + if err := pprof.StartCPUProfile(&profileBuf); err != nil { + return nil, err + } + + select { + case <-time.After(duration): + pprof.StopCPUProfile() + return profileBuf.Bytes(), nil + case <-ctx.Done(): + pprof.StopCPUProfile() + return nil, errors.New("pprof.Profile was canceled") + } +} + +// Trace responds with the execution trace in binary form. +// Tracing lasts for duration specified in seconds. +func Trace(ctx context.Context, duration time.Duration) ([]byte, error) { + var traceBuf bytes.Buffer + if err := trace.Start(&traceBuf); err != nil { + return nil, err + } + + select { + case <-time.After(duration): + trace.Stop() + return traceBuf.Bytes(), nil + case <-ctx.Done(): + trace.Stop() + return nil, errors.New("pprof.Trace was canceled") + } +} + +// Heap responds with the pprof-formatted profile named "heap". Listing the available profiles. +// You can specify the gc parameter to run gc before taking the heap sample. +func Heap(gc bool) ([]byte, error) { + var heapBuf bytes.Buffer + debug := 0 + profile := "heap" + + p := pprof.Lookup(profile) + if p == nil { + return nil, errors.Errorf("profile cannot be found: %s", profile) + } + + if gc { + runtime.GC() + } + + err := p.WriteTo(&heapBuf, debug) + if err != nil { + return nil, err + } + + return heapBuf.Bytes(), nil +} diff --git a/managed/utils/pprof/pprof_test.go b/managed/utils/pprof/pprof_test.go new file mode 100644 index 00000000000..627780d8fc5 --- /dev/null +++ b/managed/utils/pprof/pprof_test.go @@ -0,0 +1,107 @@ +// pmm-managed +// Copyright (C) 2017 Percona LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package pprof + +import ( + "bytes" + "compress/gzip" + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestHeap(t *testing.T) { + t.Parallel() + t.Run("Heap test", func(t *testing.T) { + heapBytes, err := Heap(true) + assert.NoError(t, err) + + // read gzip + reader, err := gzip.NewReader(bytes.NewBuffer(heapBytes)) + assert.NoError(t, err) + + var resB bytes.Buffer + _, err = resB.ReadFrom(reader) + assert.NoError(t, err) + assert.NotEmpty(t, resB.Bytes()) + }) +} + +func TestProfile(t *testing.T) { + t.Parallel() + t.Run("Profile test", func(t *testing.T) { + // Create a new context + ctx := context.Background() + profileBytes, err := Profile(ctx, 1*time.Second) + + assert.NoError(t, err) + assert.NotEmpty(t, profileBytes) + + // read gzip + reader, err := gzip.NewReader(bytes.NewBuffer(profileBytes)) + assert.NoError(t, err) + + var resB bytes.Buffer + _, err = resB.ReadFrom(reader) + assert.NoError(t, err) + + assert.NotEmpty(t, resB.Bytes()) + }) + + t.Run("Profile break test", func(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*30) + go func() { + profileBytes, err := Profile(ctx, 30*time.Second) + assert.Empty(t, profileBytes) + assert.Error(t, err) + }() + + go func() { + time.Sleep(1 * time.Second) + cancel() + }() + }) +} + +func TestTrace(t *testing.T) { + t.Parallel() + t.Run("Trace test", func(t *testing.T) { + // Create a new context + ctx := context.Background() + traceBytes, err := Trace(ctx, 1*time.Second) + + assert.NoError(t, err) + assert.NotEmpty(t, traceBytes) + }) + + t.Run("Trace break test", func(t *testing.T) { + // Create a new context + ctx, cancel := context.WithTimeout(context.Background(), time.Second*30) + go func() { + traceBytes, err := Trace(ctx, 30*time.Second) + assert.Empty(t, traceBytes) + assert.Error(t, err) + }() + + go func() { + time.Sleep(1 * time.Second) + cancel() + }() + }) +}