From 35e8580bb7cc014fd9c6d8995e810884d0c6aaf9 Mon Sep 17 00:00:00 2001 From: Ida Hou Date: Mon, 20 Jan 2025 20:01:21 +0000 Subject: [PATCH] Added conflicting agent installation detection, and more unit tests --- cmd/ops_agent_uap_plugin/plugin.go | 16 +- cmd/ops_agent_uap_plugin/service.go | 113 ++++++++-- cmd/ops_agent_uap_plugin/service_test.go | 266 ++++++++++++++++++++++- 3 files changed, 370 insertions(+), 25 deletions(-) diff --git a/cmd/ops_agent_uap_plugin/plugin.go b/cmd/ops_agent_uap_plugin/plugin.go index 653391a619..9d63f1aa2c 100644 --- a/cmd/ops_agent_uap_plugin/plugin.go +++ b/cmd/ops_agent_uap_plugin/plugin.go @@ -1,3 +1,17 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package main import ( @@ -49,7 +63,7 @@ func main() { server := grpc.NewServer() defer server.GracefulStop() - ps := &OpsAgentPluginServer{server: server} + ps := &OpsAgentPluginServer{server: server, runCommand: runCommand} // Successfully registering the server and starting to listen on the address // offered mean Guest Agent was successful in installing/launching the plugin // & will manage the lifecycle (start, stop, or revision change) here onwards. diff --git a/cmd/ops_agent_uap_plugin/service.go b/cmd/ops_agent_uap_plugin/service.go index 4eae6d2fd7..e09faa4a09 100644 --- a/cmd/ops_agent_uap_plugin/service.go +++ b/cmd/ops_agent_uap_plugin/service.go @@ -1,3 +1,19 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !windows + package main import ( @@ -5,6 +21,7 @@ import ( "fmt" "log" "os/exec" + "strings" "syscall" "google.golang.org/grpc" @@ -23,12 +40,18 @@ const ( DefaultPluginStateDirectory = "/var/lib/google-guest-agent/plugins/ops-agent-plugin" ) +// RunCommandFunc defines a function type that takes an exec.Cmd and returns +// its output and error. This abstraction is introduced +// primarily to facilitate testing by allowing the injection of mock +// implementations. +type RunCommandFunc func(cmd *exec.Cmd) (string, error) + // PluginServer implements the plugin RPC server interface. type OpsAgentPluginServer struct { pb.UnimplementedGuestAgentPluginServer - server *grpc.Server - // cancel is the cancel function to be called when core plugin is stopped. - cancel context.CancelFunc + server *grpc.Server + cancel context.CancelFunc + runCommand RunCommandFunc } // Apply applies the config sent or performs the work defined in the message. @@ -56,14 +79,47 @@ func (ps *OpsAgentPluginServer) Start(ctx context.Context, msg *pb.StartRequest) if pluginStateDir == "" { pluginStateDir = DefaultPluginStateDirectory } + + // Find pre-existent ops agent installation, and conflicting legacy agent installation. + foundOpsAgent, err := findPreExistentAgent(pContext, ps.runCommand, "google-cloud-ops-agent.service") + if err != nil || foundOpsAgent { + ps.cancel() + ps.cancel = nil + errMsg := fmt.Sprintf("found pre-existent Ops Agent: %v, err: %v", foundOpsAgent, err) + log.Printf("Start() failed: %s ", errMsg) + return nil, status.Error(1, errMsg) + } + + foundLegacyMonitoringAgent, err := findPreExistentAgent(pContext, ps.runCommand, "stackdriver-agent.service") + if err != nil || foundLegacyMonitoringAgent { + ps.cancel() + ps.cancel = nil + errMsg := fmt.Sprintf("found pre-existent Legacy Monitoring Agent: %v, err: %v", foundLegacyMonitoringAgent, err) + log.Printf("Start() failed: %s ", errMsg) + return nil, status.Error(1, errMsg) + } + + foundLegacyLoggingAgent, err := findPreExistentAgent(pContext, ps.runCommand, "google-fluentd.service") + if err != nil || foundLegacyLoggingAgent { + ps.cancel() + ps.cancel = nil + errMsg := fmt.Sprintf("found pre-existent Legacy Logging Agent: %v, err: %v", foundLegacyLoggingAgent, err) + log.Printf("Start() failed: %s", errMsg) + return nil, status.Error(1, errMsg) + } + // Ops Agent config validation - if err := validateOpsAgentConfig(pContext, pluginStateDir); err != nil { - log.Printf("failed to validate Ops Agent config: %s", err) + if err := validateOpsAgentConfig(pContext, ps.runCommand, pluginStateDir); err != nil { + log.Printf("Start() failed: %s", err) + ps.cancel() + ps.cancel = nil return nil, status.Errorf(1, "failed to validate Ops Agent config: %s", err) } // Subagent config generation - if err := generateSubagentConfigs(pContext, pluginStateDir); err != nil { - log.Printf("failed to generate subagent configs: %s", err) + if err := generateSubagentConfigs(pContext, ps.runCommand, pluginStateDir); err != nil { + log.Printf("Start() failed: %s", err) + ps.cancel() + ps.cancel = nil return nil, status.Errorf(1, "failed to generate subagent configs: %s", err) } @@ -100,32 +156,33 @@ func (ps *OpsAgentPluginServer) GetStatus(ctx context.Context, msg *pb.GetStatus return &pb.Status{Code: 0, Results: []string{"The Ops Agent Plugin is running ok."}}, nil } -func runCommand(cmd *exec.Cmd) error { +func runCommand(cmd *exec.Cmd) (string, error) { if cmd == nil { - return nil + return "", nil } cmd.SysProcAttr = &syscall.SysProcAttr{ Pdeathsig: syscall.SIGKILL, } - log.Printf("Running command: %s, with arguments: %s", cmd.Path, cmd.Args) + log.Printf("Running command: %s", cmd.Args) if out, err := cmd.CombinedOutput(); err != nil { - return fmt.Errorf("failed to execute cmd: %s with arguments %s, \ncommand output: %s\ncommand error: %s", cmd.Path, cmd.Args, out, err) + log.Printf("Command %s failed, \ncommand output: %s\ncommand error: %s", cmd.Args, string(out), err) + return string(out), err } - return nil + return "", nil } -func validateOpsAgentConfig(ctx context.Context, pluginBaseLocation string) error { +func validateOpsAgentConfig(ctx context.Context, runCommand RunCommandFunc, pluginBaseLocation string) error { configValidationCmd := exec.CommandContext(ctx, pluginBaseLocation+"/"+ConfGeneratorBinary, "-in", OpsAgentConfigLocationLinux, ) - if err := runCommand(configValidationCmd); err != nil { - return fmt.Errorf("failed to validate the Ops Agent config: %s", err) + if output, err := runCommand(configValidationCmd); err != nil { + return fmt.Errorf("failed to validate the Ops Agent config:\ncommand output: %s\ncommand error: %s", output, err) } return nil } -func generateSubagentConfigs(ctx context.Context, pluginBaseLocation string) error { +func generateSubagentConfigs(ctx context.Context, runCommand RunCommandFunc, pluginBaseLocation string) error { otelConfigGenerationCmd := exec.CommandContext(ctx, pluginBaseLocation+"/"+ConfGeneratorBinary, "-service", "otel", @@ -133,7 +190,7 @@ func generateSubagentConfigs(ctx context.Context, pluginBaseLocation string) err "-out", pluginBaseLocation+"/"+OtelRuntimeDirectory, "-logs", pluginBaseLocation+"/"+LogsDirectory) - if err := runCommand(otelConfigGenerationCmd); err != nil { + if _, err := runCommand(otelConfigGenerationCmd); err != nil { return fmt.Errorf("failed to generate Otel config: %s", err) } @@ -144,8 +201,26 @@ func generateSubagentConfigs(ctx context.Context, pluginBaseLocation string) err "-out", pluginBaseLocation+"/"+FluentBitRuntimeDirectory, "-logs", pluginBaseLocation+"/"+LogsDirectory, "-state", pluginBaseLocation+"/"+FluentBitStateDiectory) - if err := runCommand(fluentBitConfigGenerationCmd); err != nil { - return fmt.Errorf("failed to generate Fluntbit config: %s", err) + if output, err := runCommand(fluentBitConfigGenerationCmd); err != nil { + return fmt.Errorf("failed to generate Fluntbit config:\ncommand output: %s\ncommand error: %s", output, err) } return nil } + +func findPreExistentAgent(ctx context.Context, runCommand RunCommandFunc, serviceName string) (bool, error) { + findOpsAgentCmd := exec.CommandContext(ctx, + "systemctl", "status", serviceName, + ) + output, err := runCommand(findOpsAgentCmd) + if strings.Contains(output, fmt.Sprintf("Unit %s could not be found.", serviceName)) || strings.Contains(output, "Loaded: not-found") { + return false, nil + } + if strings.Contains(output, "Loaded:") { + return true, nil + } + + if err != nil { + return false, fmt.Errorf("unable to verify the existing installation of %s. Error: %s", serviceName, err) + } + return false, nil +} diff --git a/cmd/ops_agent_uap_plugin/service_test.go b/cmd/ops_agent_uap_plugin/service_test.go index 2876813d31..c09439ec26 100644 --- a/cmd/ops_agent_uap_plugin/service_test.go +++ b/cmd/ops_agent_uap_plugin/service_test.go @@ -1,4 +1,4 @@ -// Copyright 2024 Google LLC +// Copyright 2025 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,27 +15,283 @@ package main import ( + "context" + "fmt" "os" "os/exec" "testing" + + pb "github.com/GoogleCloudPlatform/ops-agent/cmd/ops_agent_uap_plugin/google_guest_agent/plugin" ) -func TestRunCommand(t *testing.T) { +func Test_runCommand(t *testing.T) { cmd := exec.Command(os.Args[0], "-test.run=TestHelperProcess") cmd.Env = []string{"GO_WANT_HELPER_PROCESS=1"} - err := runCommand(cmd) + _, err := runCommand(cmd) if err != nil { t.Errorf("runCommand got unexpected error: %v", err) } } -func TestRunCommandFailure(t *testing.T) { +func Test_runCommandFailure(t *testing.T) { cmd := exec.Command(os.Args[0], "-test.run=TestHelperProcess") cmd.Env = []string{"GO_WANT_HELPER_PROCESS=1", "GO_HELPER_FAILURE=1"} - if err := runCommand(cmd); err == nil { + if _, err := runCommand(cmd); err == nil { t.Error("runCommand got nil error, want exec failure") } } +func Test_findPreExistentAgent(t *testing.T) { + cases := []struct { + name string + mockRunCommandFunc RunCommandFunc + wantExist bool + serviceName string + }{ + { + name: "Found conflicting Ops Agent installation", + mockRunCommandFunc: func(cmd *exec.Cmd) (string, error) { + return "Loaded: loaded (/lib/systemd/system/google-cloud-ops-agent.service; enabled; preset: enabled)", nil + }, + wantExist: true, + serviceName: "google-cloud-ops-agent.service", + }, + { + name: "Pre-existent Ops Agent not found", + mockRunCommandFunc: func(cmd *exec.Cmd) (string, error) { + return "", fmt.Errorf("google-cloud-ops-agent.service could not be found.") + }, + wantExist: false, + serviceName: "google-cloud-ops-agent.service", + }, + { + name: "Pre-existent Legacy Monitoring Agent not found", + mockRunCommandFunc: func(cmd *exec.Cmd) (string, error) { + return "", fmt.Errorf("stackdriver-agent.service could not be found.") + }, + wantExist: false, + serviceName: "stackdriver-agent.service", + }, + { + name: "Pre-existent Legacy Logging Agent not found", + mockRunCommandFunc: func(cmd *exec.Cmd) (string, error) { + return "", fmt.Errorf("google-fluentd.service could not be found.") + }, + wantExist: false, + serviceName: "google-fluentd.service", + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + gotExist, _ := findPreExistentAgent(context.Background(), tc.mockRunCommandFunc, tc.serviceName) + + if gotExist != tc.wantExist { + t.Errorf("%v: findPreExistentAgent(%v) failed to verify if the service exists: gotExist: %v, wantExist %v", tc.name, tc.serviceName, gotExist, tc.wantExist) + } + }) + } +} + +func Test_validateOpsAgentConfig(t *testing.T) { + cases := []struct { + name string + mockCmdOutput string + mockCmdErr error + wantSuccess bool + }{ + { + name: "config validation successful", + mockCmdOutput: "", + mockCmdErr: nil, + wantSuccess: true, + }, + { + name: "config validation failed", + mockCmdOutput: "", + mockCmdErr: fmt.Errorf("error"), + wantSuccess: false, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + // Create a mock RunCommand function + mockRunCommand := func(cmd *exec.Cmd) (string, error) { + return tc.mockCmdOutput, tc.mockCmdErr + } + + ctx := context.Background() + err := validateOpsAgentConfig(ctx, mockRunCommand, "") + gotSuccess := (err == nil) + if gotSuccess != tc.wantSuccess { + t.Errorf("%s: validateOpsAgentConfig() failed to valide Ops Agent config: %v, want successful config validation: %v, error:%v", tc.name, gotSuccess, tc.wantSuccess, err) + } + }) + } +} + +func Test_generateSubagentConfigs(t *testing.T) { + cases := []struct { + name string + mockCmdOutput string + mockCmdErr error + wantSuccess bool + }{ + { + name: "configs generation successful", + mockCmdOutput: "", + mockCmdErr: nil, + wantSuccess: true, + }, + { + name: "configs generation failed", + mockCmdOutput: "", + mockCmdErr: fmt.Errorf("error"), + wantSuccess: false, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + // Create a mock RunCommand function + mockRunCommand := func(cmd *exec.Cmd) (string, error) { + return tc.mockCmdOutput, tc.mockCmdErr + } + + ctx := context.Background() + err := generateSubagentConfigs(ctx, mockRunCommand, "") + gotSuccess := (err == nil) + if gotSuccess != tc.wantSuccess { + t.Errorf("%s: generateSubagentConfigs() failed to generate subagents configs: %v, want successful config validation: %v, error:%v", tc.name, gotSuccess, tc.wantSuccess, err) + } + }) + } +} + +func TestStart(t *testing.T) { + t.Parallel() + cases := []struct { + name string + cancel context.CancelFunc + mockRunCommandFunc RunCommandFunc + wantError bool + }{ + { + name: "Happy path: plugin not already started, Start() exits successfully", + cancel: nil, + mockRunCommandFunc: func(cmd *exec.Cmd) (string, error) { + return "", nil + }, + wantError: false, + }, + { + name: "Plugin already started", + cancel: func() {}, // Non-nil function + mockRunCommandFunc: func(cmd *exec.Cmd) (string, error) { + return "", nil + }, + wantError: false, + }, + { + name: "Start() returns errors, cancel() function should be reset to nil", + cancel: nil, + mockRunCommandFunc: func(cmd *exec.Cmd) (string, error) { + return "", fmt.Errorf("error") + }, + wantError: true, + }, + } + + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + ps := &OpsAgentPluginServer{cancel: tc.cancel, runCommand: tc.mockRunCommandFunc} + _, err := ps.Start(context.Background(), &pb.StartRequest{}) + gotError := (err != nil) + if gotError != tc.wantError { + t.Errorf("%v: Start() got error: %v, err msg: %v, want error:%v", tc.name, gotError, err, tc.wantError) + } + if tc.wantError && ps.cancel != nil { + t.Errorf("%v: Start() did not reset the cancel function to nil", tc.name) + } + if !tc.wantError && ps.cancel == nil { + t.Errorf("%v: Start() reset cancel function to nil but shouldn't", tc.name) + } + }) + } +} +func TestStop(t *testing.T) { + t.Parallel() + cases := []struct { + name string + cancel context.CancelFunc + }{ + { + name: "PluginAlreadyStopped", + cancel: nil, + }, + { + name: "PluginRunning", + cancel: func() {}, // Non-nil function + + }, + } + + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + ps := &OpsAgentPluginServer{cancel: tc.cancel} + _, err := ps.Stop(context.Background(), &pb.StopRequest{}) + if err != nil { + t.Errorf("got error from Stop(): %v, wanted nil", err) + } + + if ps.cancel != nil { + t.Error("got non-nil cancel function after calling Stop(), want nil") + } + }) + } +} + +func TestGetStatus(t *testing.T) { + t.Parallel() + cases := []struct { + name string + cancel context.CancelFunc + wantStatusCode int32 + }{ + { + name: "PluginNotRunning", + cancel: nil, + wantStatusCode: 1, + }, + { + name: "PluginRunning", + cancel: func() {}, // Non-nil function + wantStatusCode: 0, + }, + } + + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + ps := &OpsAgentPluginServer{cancel: tc.cancel} + status, err := ps.GetStatus(context.Background(), &pb.GetStatusRequest{}) + if err != nil { + t.Errorf("got error from GetStatus: %v, wanted nil", err) + } + gotStatusCode := status.Code + if gotStatusCode != tc.wantStatusCode { + t.Errorf("Got status code %d from GetStatus(), wanted %d", gotStatusCode, tc.wantStatusCode) + } + + }) + } +} + // TestHelperProcess isn't a real test. It's used as a helper process to mock // command executions. func TestHelperProcess(t *testing.T) {