Skip to content

Commit

Permalink
Service discovery race on serviceBindings delete. Bug on IP reuse (#1808
Browse files Browse the repository at this point in the history
)

* Correct SetMatrix documentation

The SetMatrix is a generic data structure, so the description
should not be tight to any specific use

Signed-off-by: Flavio Crisciani <[email protected]>

* Service Discovery reuse name and serviceBindings deletion

- Added logic to handle name reuse from different services
- Moved the deletion from the serviceBindings map at the end
  of the rmServiceBindings body to avoid race with new services

Signed-off-by: Flavio Crisciani <[email protected]>

* Avoid race on network cleanup

Use the locker to avoid the race between the network
deletion and new endpoints being created

Signed-off-by: Flavio Crisciani <[email protected]>

* CleanupServiceBindings to clean the SD records

Allow the cleanupServicebindings to take care of the service discovery
cleanup. Also avoid to trigger the cleanup for each endpoint from an SD
point of view
LB and SD will be separated in the future

Signed-off-by: Flavio Crisciani <[email protected]>

* Addressed comments

Signed-off-by: Flavio Crisciani <[email protected]>

* NetworkDB deleteEntry has to happen

If there is an error locally guarantee that the delete entry
on network DB is still honored

Signed-off-by: Flavio Crisciani <[email protected]>
  • Loading branch information
Flavio Crisciani authored and mavenugo committed Jun 18, 2017
1 parent bfc5fe3 commit 6426d1e
Show file tree
Hide file tree
Showing 7 changed files with 313 additions and 142 deletions.
19 changes: 10 additions & 9 deletions agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -648,13 +648,13 @@ func (ep *endpoint) addServiceInfoToCluster(sb *sandbox) error {
TaskAliases: ep.myAliases,
EndpointIP: ep.Iface().Address().IP.String(),
})

if err != nil {
return err
}

if agent != nil {
if err := agent.networkDB.CreateEntry(libnetworkEPTable, n.ID(), ep.ID(), buf); err != nil {
logrus.Warnf("addServiceInfoToCluster NetworkDB CreateEntry failed for %s %s err:%s", ep.id, n.id, err)
return err
}
}
Expand Down Expand Up @@ -686,14 +686,21 @@ func (ep *endpoint) deleteServiceInfoFromCluster(sb *sandbox, method string) err
name = ep.MyAliases()[0]
}

if agent != nil {
// First delete from networkDB then locally
if err := agent.networkDB.DeleteEntry(libnetworkEPTable, n.ID(), ep.ID()); err != nil {
logrus.Warnf("deleteServiceInfoFromCluster NetworkDB DeleteEntry failed for %s %s err:%s", ep.id, n.id, err)
}
}

if ep.Iface().Address() != nil {
if ep.svcID != "" {
// This is a task part of a service
var ingressPorts []*PortConfig
if n.ingress {
ingressPorts = ep.ingressPorts
}
if err := c.rmServiceBinding(ep.svcName, ep.svcID, n.ID(), ep.ID(), name, ep.virtualIP, ingressPorts, ep.svcAliases, ep.myAliases, ep.Iface().Address().IP, "deleteServiceInfoFromCluster"); err != nil {
if err := c.rmServiceBinding(ep.svcName, ep.svcID, n.ID(), ep.ID(), name, ep.virtualIP, ingressPorts, ep.svcAliases, ep.myAliases, ep.Iface().Address().IP, "deleteServiceInfoFromCluster", true); err != nil {
return err
}
} else {
Expand All @@ -704,12 +711,6 @@ func (ep *endpoint) deleteServiceInfoFromCluster(sb *sandbox, method string) err
}
}

if agent != nil {
if err := agent.networkDB.DeleteEntry(libnetworkEPTable, n.ID(), ep.ID()); err != nil {
return err
}
}

logrus.Debugf("deleteServiceInfoFromCluster from %s END for %s %s", method, ep.svcName, ep.ID())

return nil
Expand Down Expand Up @@ -900,7 +901,7 @@ func (c *controller) handleEpTableEvent(ev events.Event) {
logrus.Debugf("handleEpTableEvent DEL %s R:%v", eid, epRec)
if svcID != "" {
// This is a remote task part of a service
if err := c.rmServiceBinding(svcName, svcID, nid, eid, containerName, vip, ingressPorts, serviceAliases, taskAliases, ip, "handleEpTableEvent"); err != nil {
if err := c.rmServiceBinding(svcName, svcID, nid, eid, containerName, vip, ingressPorts, serviceAliases, taskAliases, ip, "handleEpTableEvent", true); err != nil {
logrus.Errorf("failed removing service binding for %s epRec:%v err:%s", eid, epRec, err)
return
}
Expand Down
32 changes: 22 additions & 10 deletions common/setmatrix.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,26 @@ import (
type SetMatrix interface {
// Get returns the members of the set for a specific key as a slice.
Get(key string) ([]interface{}, bool)
// Contains is used to verify is an element is in a set for a specific key
// Contains is used to verify if an element is in a set for a specific key
// returns true if the element is in the set
// returns true if there is a set for the key
Contains(key string, value interface{}) (bool, bool)
// Insert inserts the mapping between the IP and the endpoint identifier
// returns true if the mapping was not present, false otherwise
// returns also the number of endpoints associated to the IP
// Insert inserts the value in the set of a key
// returns true if the value is inserted (was not already in the set), false otherwise
// returns also the length of the set for the key
Insert(key string, value interface{}) (bool, int)
// Remove removes the mapping between the IP and the endpoint identifier
// returns true if the mapping was deleted, false otherwise
// returns also the number of endpoints associated to the IP
// Remove removes the value in the set for a specific key
// returns true if the value is deleted, false otherwise
// returns also the length of the set for the key
Remove(key string, value interface{}) (bool, int)
// Cardinality returns the number of elements in the set of a specific key
// returns false if the key is not in the map
// Cardinality returns the number of elements in the set for a key
// returns false if the set is not present
Cardinality(key string) (int, bool)
// String returns the string version of the set, empty otherwise
// returns false if the key is not in the map
// returns false if the set is not present
String(key string) (string, bool)
// Returns all the keys in the map
Keys() []string
}

type setMatrix struct {
Expand Down Expand Up @@ -121,3 +123,13 @@ func (s *setMatrix) String(key string) (string, bool) {
}
return set.String(), ok
}

func (s *setMatrix) Keys() []string {
s.Lock()
defer s.Unlock()
keys := make([]string, 0, len(s.matrix))
for k := range s.matrix {
keys = append(keys, k)
}
return keys
}
118 changes: 116 additions & 2 deletions libnetwork_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/docker/libnetwork/driverapi"
"github.com/docker/libnetwork/ipamapi"
"github.com/docker/libnetwork/netlabel"
"github.com/docker/libnetwork/netutils"
"github.com/docker/libnetwork/testutils"
"github.com/docker/libnetwork/types"
)
Expand Down Expand Up @@ -382,8 +383,8 @@ func TestSRVServiceQuery(t *testing.T) {
}

sr := svcInfo{
svcMap: make(map[string][]net.IP),
svcIPv6Map: make(map[string][]net.IP),
svcMap: common.NewSetMatrix(),
svcIPv6Map: common.NewSetMatrix(),
ipMap: common.NewSetMatrix(),
service: make(map[string][]servicePorts),
}
Expand Down Expand Up @@ -440,6 +441,119 @@ func TestSRVServiceQuery(t *testing.T) {
}
}

func TestServiceVIPReuse(t *testing.T) {
c, err := New()
if err != nil {
t.Fatal(err)
}
defer c.Stop()

n, err := c.NewNetwork("bridge", "net1", "", nil)
if err != nil {
t.Fatal(err)
}
defer func() {
if err := n.Delete(); err != nil {
t.Fatal(err)
}
}()

ep, err := n.CreateEndpoint("testep")
if err != nil {
t.Fatal(err)
}

sb, err := c.NewSandbox("c1")
if err != nil {
t.Fatal(err)
}
defer func() {
if err := sb.Delete(); err != nil {
t.Fatal(err)
}
}()

err = ep.Join(sb)
if err != nil {
t.Fatal(err)
}

// Add 2 services with same name but different service ID to share the same VIP
n.(*network).addSvcRecords("ep1", "service_test", "serviceID1", net.ParseIP("192.168.0.1"), net.IP{}, true, "test")
n.(*network).addSvcRecords("ep2", "service_test", "serviceID2", net.ParseIP("192.168.0.1"), net.IP{}, true, "test")

ipToResolve := netutils.ReverseIP("192.168.0.1")

ipList, _ := n.(*network).ResolveName("service_test", types.IPv4)
if len(ipList) == 0 {
t.Fatal("There must be the VIP")
}
if len(ipList) != 1 {
t.Fatal("It must return only 1 VIP")
}
if ipList[0].String() != "192.168.0.1" {
t.Fatal("The service VIP is 192.168.0.1")
}
name := n.(*network).ResolveIP(ipToResolve)
if name == "" {
t.Fatal("It must return a name")
}
if name != "service_test.net1" {
t.Fatalf("It must return the service_test.net1 != %s", name)
}

// Delete service record for one of the services, the IP should remain because one service is still associated with it
n.(*network).deleteSvcRecords("ep1", "service_test", "serviceID1", net.ParseIP("192.168.0.1"), net.IP{}, true, "test")
ipList, _ = n.(*network).ResolveName("service_test", types.IPv4)
if len(ipList) == 0 {
t.Fatal("There must be the VIP")
}
if len(ipList) != 1 {
t.Fatal("It must return only 1 VIP")
}
if ipList[0].String() != "192.168.0.1" {
t.Fatal("The service VIP is 192.168.0.1")
}
name = n.(*network).ResolveIP(ipToResolve)
if name == "" {
t.Fatal("It must return a name")
}
if name != "service_test.net1" {
t.Fatalf("It must return the service_test.net1 != %s", name)
}

// Delete again the service using the previous service ID, nothing should happen
n.(*network).deleteSvcRecords("ep2", "service_test", "serviceID1", net.ParseIP("192.168.0.1"), net.IP{}, true, "test")
ipList, _ = n.(*network).ResolveName("service_test", types.IPv4)
if len(ipList) == 0 {
t.Fatal("There must be the VIP")
}
if len(ipList) != 1 {
t.Fatal("It must return only 1 VIP")
}
if ipList[0].String() != "192.168.0.1" {
t.Fatal("The service VIP is 192.168.0.1")
}
name = n.(*network).ResolveIP(ipToResolve)
if name == "" {
t.Fatal("It must return a name")
}
if name != "service_test.net1" {
t.Fatalf("It must return the service_test.net1 != %s", name)
}

// Delete now using the second service ID, now all the entries should be gone
n.(*network).deleteSvcRecords("ep2", "service_test", "serviceID2", net.ParseIP("192.168.0.1"), net.IP{}, true, "test")
ipList, _ = n.(*network).ResolveName("service_test", types.IPv4)
if len(ipList) != 0 {
t.Fatal("All the VIPs should be gone now")
}
name = n.(*network).ResolveIP(ipToResolve)
if name != "" {
t.Fatalf("It must return empty no more services associated, instead:%s", name)
}
}

func TestIpamReleaseOnNetDriverFailures(t *testing.T) {
if !testutils.IsRunningInContainer() {
defer testutils.SetupTestOSContext(t)()
Expand Down
Loading

0 comments on commit 6426d1e

Please sign in to comment.