Skip to content

Commit b2c2788

Browse files
committed
no-jira: Hardening the GCP destroy code to attempt to delete dependent resources
The destroy process sporadically runs into ResourceInUseByAnotherResource error, but sometimes the dependent resource is not found by the cluster uninstaller. Now these resources will be removed when the error is found.
1 parent d9d84ea commit b2c2788

File tree

11 files changed

+234
-18
lines changed

11 files changed

+234
-18
lines changed

pkg/destroy/gcp/address.go

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,21 +15,43 @@ const (
1515
regionalAddressResource = "regionaddress"
1616
)
1717

18+
func (o *ClusterUninstaller) deleteAddressByName(ctx context.Context, resourceName, location string) error {
19+
typeName := globalAddressResource
20+
if location != string(global) {
21+
typeName = regionalAddressResource
22+
}
23+
items, err := o.listAddressesWithFilter(ctx, typeName, "items(name,labels),nextPageToken", func(item *compute.Address) bool {
24+
// Address should have labels, but none observed
25+
return item.Name == resourceName
26+
})
27+
if err != nil {
28+
return fmt.Errorf("failed to list address by name : %w", err)
29+
}
30+
for _, item := range items {
31+
if err := o.deleteAddress(ctx, item); err != nil {
32+
return fmt.Errorf("failed to delete address by name: %w", err)
33+
}
34+
}
35+
return nil
36+
}
37+
1838
func (o *ClusterUninstaller) listAddresses(ctx context.Context, typeName string) ([]cloudResource, error) {
19-
return o.listAddressesWithFilter(ctx, typeName, "items(name,region,addressType),nextPageToken", o.isClusterResource)
39+
return o.listAddressesWithFilter(ctx, typeName, "items(name,region,addressType,labels),nextPageToken", func(item *compute.Address) bool {
40+
return o.isClusterResource(item.Name) && !o.isSharedResource(item.Labels)
41+
})
2042
}
2143

2244
// listAddressesWithFilter lists addresses in the project that satisfy the filter criteria.
2345
// The fields parameter specifies which fields should be returned in the result, the filter string contains
2446
// a filter string passed to the API to filter results.
25-
func (o *ClusterUninstaller) listAddressesWithFilter(ctx context.Context, typeName, fields string, filterFunc resourceFilterFunc) ([]cloudResource, error) {
47+
func (o *ClusterUninstaller) listAddressesWithFilter(ctx context.Context, typeName, fields string, filterFunc func(item *compute.Address) bool) ([]cloudResource, error) {
2648
o.Logger.Debugf("Listing addresses")
2749
result := []cloudResource{}
2850

2951
pagesFunc := func(list *compute.AddressList) error {
3052
for _, item := range list.Items {
3153
o.Logger.Debugf("Found address (%s): %s", typeName, item.Name)
32-
if filterFunc(item.Name) {
54+
if filterFunc(item) {
3355
var quota []gcp.QuotaUsage
3456
if item.AddressType == "INTERNAL" {
3557
quota = []gcp.QuotaUsage{{

pkg/destroy/gcp/backendservice.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,25 @@ const (
1515
regionBackendServiceResource = "regionbackendservice"
1616
)
1717

18+
func (o *ClusterUninstaller) deleteBackendServiceByName(ctx context.Context, resourceName, location string) error {
19+
typeName := globalBackendServiceResource
20+
if location != string(global) {
21+
typeName = regionBackendServiceResource
22+
}
23+
items, err := o.listBackendServicesWithFilter(ctx, typeName, "items(name),nextPageToken", func(item *compute.BackendService) bool {
24+
return item.Name == resourceName
25+
})
26+
if err != nil {
27+
return fmt.Errorf("failed to list backend services by name : %w", err)
28+
}
29+
for _, item := range items {
30+
if err := o.deleteBackendService(ctx, item); err != nil {
31+
return fmt.Errorf("failed to delete backend service by name: %w", err)
32+
}
33+
}
34+
return nil
35+
}
36+
1837
func (o *ClusterUninstaller) listBackendServices(ctx context.Context, typeName string) ([]cloudResource, error) {
1938
return o.listBackendServicesWithFilter(ctx, typeName, "items(name),nextPageToken",
2039
func(item *compute.BackendService) bool {

pkg/destroy/gcp/cloudcontroller.go

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ func (o *ClusterUninstaller) listCloudControllerBackendServices(ctx context.Cont
5050

5151
fwList, err := o.computeSvc.Firewalls.List(o.ProjectID).Fields(googleapi.Field("items(name,targetTags),nextPageToken")).Context(ctx).Do()
5252
if err != nil {
53-
o.Logger.Debugf("failed to list firewall rules associated with backend service %s: %v", item.Name, err)
5453
return false
5554
}
5655
for _, fw := range fwList.Items {
@@ -103,9 +102,7 @@ func (o *ClusterUninstaller) listCloudControllerTargetPools(ctx context.Context,
103102
break
104103
}
105104
}
106-
107105
if !foundClusterResource {
108-
o.Logger.Debugf("Skipping target pool instance %s because it is not a cluster resource", item.Name)
109106
return false
110107
}
111108
}
@@ -121,7 +118,10 @@ func (o *ClusterUninstaller) discoverCloudControllerLoadBalancerResources(ctx co
121118
loadBalancerFilterFunc := o.createLoadBalancerFilterFunc(loadBalancerName)
122119

123120
// Discover associated addresses: loadBalancerName
124-
found, err := o.listAddressesWithFilter(ctx, regionalAddressResource, "items(name),nextPageToken", loadBalancerFilterFunc)
121+
found, err := o.listAddressesWithFilter(ctx, regionalAddressResource, "items(name),nextPageToken", func(item *compute.Address) bool {
122+
// address should have labels but none observed
123+
return strings.HasPrefix(item.Name, loadBalancerName)
124+
})
125125
if err != nil {
126126
return err
127127
}
@@ -160,7 +160,12 @@ func (o *ClusterUninstaller) discoverCloudControllerLoadBalancerResources(ctx co
160160
o.insertPendingItems(firewallResourceName, found)
161161

162162
// Discover associated forwarding rules: loadBalancerName
163-
found, err = o.listForwardingRulesWithFilter(ctx, regionForwardingRuleResource, "items(name),nextPageToken", loadBalancerFilterFunc)
163+
found, err = o.listForwardingRulesWithFilter(ctx, regionForwardingRuleResource, "items(name,labels),nextPageToken", func(item *compute.ForwardingRule) bool {
164+
// The forwarding rule should be checked for labels to ensure that
165+
// it is owned and not shared. However, the forwarding rules associated with
166+
// load balancers do not have these labels.
167+
return strings.HasPrefix(item.Name, loadBalancerName)
168+
})
164169
if err != nil {
165170
return err
166171
}
@@ -203,7 +208,6 @@ func (o *ClusterUninstaller) discoverCloudControllerLoadBalancerResources(ctx co
203208
// For each of those backend services, resources like forwarding rules, firewalls, health checks and
204209
// backend services are added to pendingItems
205210
func (o *ClusterUninstaller) discoverCloudControllerResources(ctx context.Context) error {
206-
o.Logger.Debugf("Discovering cloud controller resources")
207211
errs := []error{}
208212

209213
// Instance group related items
@@ -222,7 +226,6 @@ func (o *ClusterUninstaller) discoverCloudControllerResources(ctx context.Contex
222226
return err
223227
}
224228
for _, backend := range backends {
225-
o.Logger.Debugf("Discovering cloud controller resources for %s", backend.name)
226229
err := o.discoverCloudControllerLoadBalancerResources(ctx, backend.name)
227230
if err != nil {
228231
errs = append(errs, err)
@@ -244,7 +247,6 @@ func (o *ClusterUninstaller) discoverCloudControllerResources(ctx context.Contex
244247
return err
245248
}
246249
for _, pool := range pools {
247-
o.Logger.Debugf("Discovering cloud controller resources for %s", pool.name)
248250
err := o.discoverCloudControllerLoadBalancerResources(ctx, pool.name)
249251
if err != nil {
250252
errs = append(errs, err)

pkg/destroy/gcp/firewall.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package gcp
22

33
import (
44
"context"
5+
"fmt"
56
"strings"
67

78
"github.com/pkg/errors"
@@ -15,6 +16,19 @@ const (
1516
firewallResourceName = "firewall"
1617
)
1718

19+
func (o *ClusterUninstaller) deleteFirewallByName(ctx context.Context, resourceName string) error {
20+
items, err := o.listFirewallsWithFilter(ctx, "items(name),nextPageToken", func(item string) bool { return strings.Contains(item, resourceName) })
21+
if err != nil {
22+
return fmt.Errorf("failed to list firewall by name: %w", err)
23+
}
24+
for _, item := range items {
25+
if err := o.deleteFirewall(ctx, item); err != nil {
26+
return fmt.Errorf("failed to delete firewall by name: %w", err)
27+
}
28+
}
29+
return nil
30+
}
31+
1832
func (o *ClusterUninstaller) listFirewalls(ctx context.Context) ([]cloudResource, error) {
1933
// The firewall rules that the destroyer is searching for here include a
2034
// pattern before and after the cluster ID. Use a regular expression that allows

pkg/destroy/gcp/forwardingrule.go

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,36 @@ const (
1616
regionForwardingRuleResource = "regionalforwardingrule"
1717
)
1818

19+
func (o *ClusterUninstaller) deleteForwardingRuleByName(ctx context.Context, resourceName, location string) error {
20+
typeName := regionForwardingRuleResource
21+
if location == string(global) {
22+
typeName = globalForwardingRuleResource
23+
}
24+
items, err := o.listForwardingRulesWithFilter(ctx, typeName, "items(name,labels),nextPageToken", func(item *compute.ForwardingRule) bool {
25+
return item.Name == resourceName && o.isOwnedResource(item.Labels)
26+
})
27+
if err != nil {
28+
return fmt.Errorf("failed to list forwarding rules by name: %w", err)
29+
}
30+
for _, item := range items {
31+
if err := o.deleteForwardingRule(ctx, item); err != nil {
32+
return fmt.Errorf("failed to delete forwarding rule by name: %w", err)
33+
}
34+
}
35+
return nil
36+
}
37+
1938
func (o *ClusterUninstaller) listForwardingRules(ctx context.Context, typeName string) ([]cloudResource, error) {
20-
return o.listForwardingRulesWithFilter(ctx, typeName, "items(name,region,loadBalancingScheme),nextPageToken", o.isClusterResource)
39+
return o.listForwardingRulesWithFilter(ctx, typeName, "items(name,region,loadBalancingScheme,labels),nextPageToken", func(item *compute.ForwardingRule) bool {
40+
return o.isClusterResource(item.Name) && o.isOwnedResource(item.Labels)
41+
})
2142
}
2243

2344
// listForwardingRulesWithFilter lists forwarding rules in the project that satisfy the filter criteria.
2445
// The fields parameter specifies which fields should be returned in the result, the filter string contains
2546
// a filter string passed to the API to filter results. The filterFunc is a client-side filtering function
2647
// that determines whether a particular result should be returned or not.
27-
func (o *ClusterUninstaller) listForwardingRulesWithFilter(ctx context.Context, typeName, fields string, filterFunc resourceFilterFunc) ([]cloudResource, error) {
48+
func (o *ClusterUninstaller) listForwardingRulesWithFilter(ctx context.Context, typeName, fields string, filterFunc func(item *compute.ForwardingRule) bool) ([]cloudResource, error) {
2849
o.Logger.Debugf("Listing forwarding rules")
2950
ctx, cancel := context.WithTimeout(ctx, defaultTimeout)
3051
defer cancel()
@@ -33,7 +54,7 @@ func (o *ClusterUninstaller) listForwardingRulesWithFilter(ctx context.Context,
3354

3455
pagesFunc := func(list *compute.ForwardingRuleList) error {
3556
for _, item := range list.Items {
36-
if filterFunc(item.Name) {
57+
if filterFunc(item) {
3758
logrus.Debugf("Found forwarding rule: %s", item.Name)
3859
var quota []gcp.QuotaUsage
3960
if item.LoadBalancingScheme == "EXTERNAL" {

pkg/destroy/gcp/gcp.go

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ const (
4545

4646
// DONE represents the done status for a compute service operation.
4747
DONE = "DONE"
48+
49+
alreadyInUseStr = "is already being used by "
4850
)
4951

5052
// ClusterUninstaller holds the various options for the cluster we want to delete
@@ -441,6 +443,72 @@ func operationErrorMessage(op *compute.Operation) string {
441443
return strings.Join(errs, ", ")
442444
}
443445

446+
func (o *ClusterUninstaller) handleDependentResourceError(ctx context.Context, op *compute.Operation, err error) error {
447+
errorCode := 0
448+
errMsg := ""
449+
switch {
450+
case op != nil:
451+
errorCode = int(op.HttpErrorStatusCode)
452+
errMsg = operationErrorMessage(op)
453+
case err != nil:
454+
var apiErr *googleapi.Error
455+
if errors.As(err, &apiErr) {
456+
errorCode = apiErr.Code
457+
errMsg = apiErr.Message
458+
} else {
459+
errMsg = err.Error()
460+
}
461+
default:
462+
return fmt.Errorf("failed to extract information from operation or error")
463+
}
464+
465+
if errorCode == 400 && strings.Contains(errMsg, alreadyInUseStr) {
466+
splitDetails := strings.Split(errMsg, alreadyInUseStr)
467+
resource := strings.ReplaceAll(
468+
strings.ReplaceAll(
469+
strings.ReplaceAll(splitDetails[len(splitDetails)-1], "\"", ""),
470+
"'", "",
471+
), ", resourceInUseByAnotherResource", "",
472+
)
473+
splitResource := strings.Split(resource, "/")
474+
475+
if len(splitResource) > 6 || len(splitResource) < 5 {
476+
return fmt.Errorf("dependent resource information unable to be parsed: %s", resource)
477+
}
478+
// global -> ex: projects/xxxxxxxxxxxxxxxx/global/resource-type/resource-name
479+
// regional -> ex: projects/xxxxxxxxxxxxxx/region/region-name/resource-type/resource-name
480+
location, resourceType, resourceName := splitResource[len(splitResource)-3], splitResource[len(splitResource)-2], splitResource[len(splitResource)-1]
481+
o.Logger.Debugf("found dependent resource information: %s, %s, %s", location, resourceType, resourceName)
482+
483+
var deleteErr error
484+
switch resourceType {
485+
case "backendServices":
486+
deleteErr = o.deleteBackendServiceByName(ctx, resourceName, location)
487+
case "firewalls":
488+
deleteErr = o.deleteFirewallByName(ctx, resourceName)
489+
case "forwardingRules":
490+
deleteErr = o.deleteForwardingRuleByName(ctx, resourceName, location)
491+
case "subnetworks":
492+
deleteErr = o.deleteSubnetworkByName(ctx, resourceName)
493+
case "routers":
494+
deleteErr = o.deleteRouterByName(ctx, resourceName)
495+
case "addresses":
496+
deleteErr = o.deleteAddressByName(ctx, resourceName, location)
497+
case "targetPools":
498+
deleteErr = o.deleteTargetPoolByName(ctx, resourceName)
499+
case "targetTcpProxies":
500+
deleteErr = o.deleteTargetTCPProxyByName(ctx, resourceName)
501+
default:
502+
deleteErr = fmt.Errorf("failed to find resource type: %s for %s", resourceType, resourceName)
503+
}
504+
505+
if deleteErr != nil {
506+
return fmt.Errorf("failed to delete dependent resource: %w", deleteErr)
507+
}
508+
}
509+
return nil
510+
}
511+
444512
func (o *ClusterUninstaller) handleOperation(ctx context.Context, op *compute.Operation, err error, item cloudResource, resourceType string) error {
445513
identifier := []string{item.typeName, item.name}
446514
if item.zone != "" {
@@ -452,14 +520,22 @@ func (o *ClusterUninstaller) handleOperation(ctx context.Context, op *compute.Op
452520
o.Logger.Debugf("No operation found for %s %s", resourceType, item.name)
453521
return nil
454522
}
523+
524+
err = o.handleDependentResourceError(ctx, op, err)
525+
if err != nil {
526+
o.Logger.Debugf("failed to handle dependent resource error: %v", err)
527+
}
455528
o.resetRequestID(identifier...)
456529
return fmt.Errorf("failed to delete %s %s: %w", resourceType, item.name, err)
457530
}
458531

459532
// wait for operation to complete before checking any further
460533
op, err = o.waitFor(ctx, op, item)
461-
462534
if op != nil && op.Status == DONE && isErrorStatus(op.HttpErrorStatusCode) {
535+
err = o.handleDependentResourceError(ctx, op, err)
536+
if err != nil {
537+
o.Logger.Debugf("failed to handle dependent resource error: %v", err)
538+
}
463539
o.resetRequestID(identifier...)
464540
return fmt.Errorf("failed to delete %s %s with error: %s", resourceType, item.name, operationErrorMessage(op))
465541
}

pkg/destroy/gcp/image.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,22 +15,24 @@ const (
1515
)
1616

1717
func (o *ClusterUninstaller) listImages(ctx context.Context) ([]cloudResource, error) {
18-
return o.listImagesWithFilter(ctx, "items(name),nextPageToken", o.isClusterResource)
18+
return o.listImagesWithFilter(ctx, "items(name,labels),nextPageToken", func(item *compute.Image) bool {
19+
return o.isClusterResource(item.Name) && !o.isSharedResource(item.Labels)
20+
})
1921
}
2022

2123
// listImagesWithFilter lists addresses in the project that satisfy the filter criteria.
2224
// The fields parameter specifies which fields should be returned in the result, the filter string contains
2325
// a filter string passed to the API to filter results. The filterFunc is a client-side filtering function
2426
// that determines whether a particular result should be returned or not.
25-
func (o *ClusterUninstaller) listImagesWithFilter(ctx context.Context, fields string, filterFunc resourceFilterFunc) ([]cloudResource, error) {
27+
func (o *ClusterUninstaller) listImagesWithFilter(ctx context.Context, fields string, filterFunc func(item *compute.Image) bool) ([]cloudResource, error) {
2628
o.Logger.Debugf("Listing images")
2729
ctx, cancel := context.WithTimeout(ctx, defaultTimeout)
2830
defer cancel()
2931
result := []cloudResource{}
3032
req := o.computeSvc.Images.List(o.ProjectID).Fields(googleapi.Field(fields))
3133
err := req.Pages(ctx, func(list *compute.ImageList) error {
3234
for _, item := range list.Items {
33-
if filterFunc(item.Name) {
35+
if filterFunc(item) {
3436
o.Logger.Debugf("Found image: %s\n", item.Name)
3537
result = append(result, cloudResource{
3638
key: item.Name,

pkg/destroy/gcp/router.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package gcp
22

33
import (
44
"context"
5+
"fmt"
56

67
"github.com/pkg/errors"
78
"google.golang.org/api/compute/v1"
@@ -14,6 +15,19 @@ const (
1415
routerResourceName = "router"
1516
)
1617

18+
func (o *ClusterUninstaller) deleteRouterByName(ctx context.Context, resourceName string) error {
19+
items, err := o.listRoutersWithFilter(ctx, "items(name),nextPageToken", func(item string) bool { return item == resourceName })
20+
if err != nil {
21+
return fmt.Errorf("failed to list router by name: %w", err)
22+
}
23+
for _, item := range items {
24+
if err := o.deleteRouter(ctx, item); err != nil {
25+
return fmt.Errorf("failed to delete router by name: %w", err)
26+
}
27+
}
28+
return nil
29+
}
30+
1731
func (o *ClusterUninstaller) listRouters(ctx context.Context) ([]cloudResource, error) {
1832
return o.listRoutersWithFilter(ctx, "items(name),nextPageToken", o.isClusterResource)
1933
}

0 commit comments

Comments
 (0)