Skip to content

Commit

Permalink
Merge pull request #578 from BjoernT/master
Browse files Browse the repository at this point in the history
Report conntrack count for all namespaces
  • Loading branch information
tonytan4ever authored Oct 25, 2018
2 parents a161335 + 1e4cd71 commit 81140d3
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 4 deletions.
60 changes: 58 additions & 2 deletions playbooks/files/rax-maas/plugins/conntrack_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
pass

import maas_common
import os
import subprocess
import tempfile


Expand Down Expand Up @@ -63,9 +65,29 @@ def get_metrics():
'nf_conntrack_max': {
'path': '/proc/sys/net/netfilter/nf_conntrack_max'}}

# Retrieve root namespace count
for data in metrics.viewvalues():
data['value'] = get_value(data['path'])

# Retrieve conntrack count per namespace
# and report the namespace with the highest count.
# This is necessary to limit the number of metrics to report to MAAS,
# as we can not report a metric per namespace, which by nature are
# also volatile.
try:
namespaces = os.listdir('/var/run/netns')
for ns in namespaces:
ps = subprocess.check_output(['ip', 'netns', 'exec',
ns, 'cat',
'/proc/sys/net/netfilter/'
'nf_conntrack_count'])
nscount = int(ps.strip(os.linesep))

if nscount > metrics['nf_conntrack_count']['value']:
metrics['nf_conntrack_count']['value'] = nscount
except (OSError):
pass

return metrics


Expand All @@ -89,19 +111,53 @@ def get_metrics_lxc_container(container_name=''):
# Check if container is even running
try:
with tempfile.TemporaryFile() as tmpfile:
# Retrieve root namespace count
if cont.attach_wait(lxc.attach_run_command,
['cat',
'/proc/sys/net/netfilter/nf_conntrack_count',
'/proc/sys/net/netfilter/nf_conntrack_max'],
stdout=tmpfile) > -1:
stdout=tmpfile,
stderr=tempfile.TemporaryFile()) > -1:

tmpfile.seek(0)
output = tmpfile.read()
metrics = {
'nf_conntrack_count': {'value': output.split('\n')[0]},
'nf_conntrack_max': {'value': output.split('\n')[1]}}

return metrics
# Retrieve conntrack count per namespace
# and report the namespace with the highest count.
# This is necessary to limit the number of metrics to report to MAAS,
# as we can not report a metric per namespace, which by nature are
# also volatile.
with tempfile.TemporaryFile() as nsfile:
if cont.attach_wait(lxc.attach_run_command,
['ls',
'-1',
'/var/run/netns'],
stdout=nsfile,
stderr=tempfile.TemporaryFile()) > -1:
nsfile.seek(0)

for line in nsfile.readlines():
ns = line.strip(os.linesep)
nscountfile = tempfile.TemporaryFile()

if cont.attach_wait(lxc.attach_run_command,
['ip', 'netns', 'exec',
ns, 'cat',
'/proc/sys/net/netfilter/'
'nf_conntrack_count'],
stdout=nscountfile,
stderr=tempfile.TemporaryFile()) > -1:

nscountfile.seek(0)
nscount = int(nscountfile.read().strip(os.linesep))

if nscount > metrics['nf_conntrack_count']['value']:
metrics['nf_conntrack_count']['value'] = nscount

return metrics

except maas_common.MaaSException as e:
maas_common.status_err(str(e), m_name='maas_conntrack')
Expand Down
4 changes: 2 additions & 2 deletions playbooks/templates/rax-maas/conntrack_count.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ alarms :
criteria : |
:set consecutiveCount={{ maas_alarm_local_consecutive_count }}
if (percentage(metric["nf_conntrack_count"] , metric["nf_conntrack_max"]) > {{ maas_nf_conntrack_critical_threshold }}) {
return new AlarmStatus(CRITICAL, "Connection count is > {{ maas_nf_conntrack_critical_threshold }}% of maximum allowed.");
return new AlarmStatus(CRITICAL, "Connection tracking count is > {{ maas_nf_conntrack_critical_threshold }}% of the critical threshold. Please check all namespaces listed at /var/run/netns including the host.");
}
if (percentage(metric["nf_conntrack_count"] , metric["nf_conntrack_max"]) > {{ maas_nf_conntrack_warning_threshold }}) {
return new AlarmStatus(WARNING, "Connection count is > {{ maas_nf_conntrack_warning_threshold }}% of maximum allowed.");
return new AlarmStatus(WARNING, "Connection tracking count is > {{ maas_nf_conntrack_warning_threshold }}% of the warning threshold. Please check all namespaces inside listed at /var/run/netns including the host.");
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
fixes:
- |
* The `conntrack_count.py` plugin is now checking for network namespaces
listed at `/var/run/netns` and retreives the iptables connection
tracking infomation for each namespace.
This ensures that embedded network namespaces are alerted in case
connection tracking hashes are about to exceed a configurable threshold.
Due to the limited availability of MAAS metrics per alarm, only the
namespace with the higest connection tracking count is reported.

0 comments on commit 81140d3

Please sign in to comment.