Skip to content

Commit

Permalink
Basis for scaleable cloud/container running
Browse files Browse the repository at this point in the history
  • Loading branch information
gcglinton committed Oct 20, 2023
1 parent dedb8d2 commit 0de3b65
Show file tree
Hide file tree
Showing 5 changed files with 311 additions and 0 deletions.
15 changes: 15 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
cache
.github
.pytest_cache
.vscode
data
debian
!debian/changelog
docker
docs
obsolete
s3-publisher
tests
tools
travis
vagrant
25 changes: 25 additions & 0 deletions docker/cloud/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
FROM ubuntu:latest

LABEL org.opencontainers.image.authors="[email protected]"

ENV TZ="Etc/UTC" \
DEBIAN_FRONTEND="noninteractive" \
BUILD_PACKAGES="build-essential"

RUN apt-get update &&\
apt-get install -y python3-amqp python3-appdirs python3-dateparser python3-humanfriendly \
python3-humanize python3-jsonpickle python3-netifaces python3-paramiko python3-pip \
python3-psutil python3-watchdog python3-magic

# need version >= 1.5.1 to get MQTT v5 support, not in repos of 20.04 ... so get from pip.
RUN pip3 install paho-mqtt redis python-redis-lock

WORKDIR /src

COPY . /src

RUN python3 setup.py install

WORKDIR /root

CMD sr3 foreground --config $SR3_CONFIG --logStdout --no $SR3_INSTANCE
35 changes: 35 additions & 0 deletions docker/cloud/dd_all.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# this fetches everything on the server.

# a good first test if you need to validate parameters.
# if the server is working at all this will download a lot.
# recommend using with -n (discard) so that you only see the notices.
#
broker amqps://dd.weather.gc.ca/
topicPrefix v02.post

#instancies is the number of downloaders to run at once. Defaults to 1, but likely need more.
#increase if you see high "lag" times in download logs.
instances 1

#expire, in operations should be longer than longest expected interruption.
expire 10m

subtopic #

directory /tmp/dd_all

discard true
queueName q_anonymous_subscribe.dd_all.40026876.58034545

retry_driver redis
redisqueue_serverurl redis://:SuperSecure@redis:6379/0
nodupe_driver redis
nodupe_redis_serverurl redis://:SuperSecure@redis:6379/0

nodupe_ttl 1000d

#It would help to get metrics out of the instances if we could set the metricsFilename
#Currently it's statically derived from pidFilename (which also isn't configurable)
#metricsFilename /tmp/metrics_subscribe.dd_all.${SR_INSTANCE}

#debug true
29 changes: 29 additions & 0 deletions docker/cloud/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
version: "3.8"

services:
ddall:
build: ../../
deploy:
mode: replicated
replicas: 0
environment:
- SR3_INSTANCE={{.Task.Slot}}
- SR3_CONFIG=subscribe/dd_all.conf
volumes:
- ./dd_all.conf:/root/.config/sr3/subscribe/dd_all.conf
#- ./metrics/subscribe_dd_all_{{.Task.Slot | printf "%02s"}}.metrics:/root/.cache/sr3/subscribe/dd_all/subscribe_dd_all_{{.Task.Slot | printf "%02s"}}.metrics

redis:
restart: unless-stopped
image: redis
deploy:
mode: replicated
replicas: 0
command: redis-server --requirepass SuperSecure
ports:
- 16379:6379
volumes:
- redis:/data

volumes:
redis:
207 changes: 207 additions & 0 deletions docker/cloud/telegraf.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
# Telegraf Configuration
#
# Telegraf is entirely plugin driven. All metrics are gathered from the
# declared inputs, and sent to the declared outputs.
#
# Plugins must be declared in here to be active.
# To deactivate a plugin, comment out the name and any variables.
#
# Use 'telegraf -config telegraf.conf -test' to see what metrics a config
# file would generate.
#
# Environment variables can be used anywhere in this config file, simply prepend
# them with $. For strings the variable must be within quotes (ie, "$STR_VAR"),
# for numbers and booleans they should be plain (ie, $INT_VAR, $BOOL_VAR)


# Global tags can be specified here in key="value" format.
[global_tags]
# dc = "us-east-1" # will tag all metrics with dc=us-east-1
# rack = "1a"
## Environment variables can be used as tags, and throughout the config file
# user = "$USER"
# platform = "linux"
# role = "server"


# Configuration for telegraf agent
[agent]
## Default data collection interval for all inputs
interval = "5s"
## Rounds collection interval to 'interval'
## ie, if interval="10s" then always collect on :00, :10, :20, etc.
round_interval = true

## Telegraf will send metrics to outputs in batches of at most
## metric_batch_size metrics.
## This controls the size of writes that Telegraf sends to output plugins.
metric_batch_size = 1000

## For failed writes, telegraf will cache metric_buffer_limit metrics for each
## output, and will flush this buffer on a successful write. Oldest metrics
## are dropped first when this buffer fills.
## This buffer only fills when writes fail to output plugin(s).
metric_buffer_limit = 10000

## Collection jitter is used to jitter the collection by a random amount.
## Each plugin will sleep for a random time within jitter before collecting.
## This can be used to avoid many plugins querying things like sysfs at the
## same time, which can have a measurable effect on the system.
collection_jitter = "0s"

## Default flushing interval for all outputs. Maximum flush_interval will be
## flush_interval + flush_jitter
flush_interval = "5s"
## Jitter the flush interval by a random amount. This is primarily to avoid
## large write spikes for users running a large number of telegraf instances.
## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
flush_jitter = "0s"

## By default or when set to "0s", precision will be set to the same
## timestamp order as the collection interval, with the maximum being 1s.
## ie, when interval = "10s", precision will be "1s"
## when interval = "250ms", precision will be "1ms"
## Precision will NOT be used for service inputs. It is up to each individual
## service input to set the timestamp at the appropriate precision.
## Valid time units are "ns", "us" (or "µs"), "ms", "s".
precision = ""

## Logging configuration:
## Run telegraf with debug log messages.
debug = false
## Run telegraf in quiet mode (error log messages only).
quiet = false
## Specify the log file name. The empty string means to log to stderr.
logfile = ""

## Override default hostname, if empty use os.Hostname()
hostname = "sr3"
## If set to true, do no set the "host" tag in the telegraf agent.
omit_hostname = true


###############################################################################
# OUTPUT PLUGINS #
###############################################################################

## Configuration for Graphite server to send metrics to
#[[outputs.graphite]]
# ## TCP endpoint for your graphite instance.
# ## If multiple endpoints are configured, output will be load balanced.
# ## Only one of the endpoints will be written to with each iteration.
# servers = ["swarm.int.thelintons.ca:2003"]
# ## Prefix metrics name
# prefix = "servers.lin."
# ## Graphite output template
# ## see https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
# template = "host.measurement.tags.field"
#
# ## Enable Graphite tags support
# graphite_tag_support = false
#
# ## timeout in seconds for the write connection to graphite
# timeout = 2
# tagexclude = [ "platform", "role" ]

#[[outputs.http]]
# ## URL is the address to send metrics to
# url = "http://swarm.int.thelintons.ca:2007"
# data_format = "json"


[[outputs.file]]
files = [ "stdout" ]
### data_format = "graphite"
### graphite_tag_support = true
### graphite_tag_sanitize_mode = "compatible"
### #tagexclude = [ "platform", "role" ]

###############################################################################
# INPUT PLUGINS #
###############################################################################

# Collect statistics about itself
[[inputs.internal]]
## If true, collect telegraf memory stats.
# collect_memstats = true
name_override = "telegraf"
tagexclude = [ "version", "go_version" ]

# Parse a complete file each interval
[[inputs.file]]
#alias = "sr3metrics"
#name_suffix = "_sr3metrics"
name_override = "sr3metrics"
## Files to parse each interval. Accept standard unix glob matching rules,
## as well as ** to match recursive files and directories.
files = ["/home/me/metrics/*.metrics"]

## Character encoding to use when interpreting the file contents. Invalid
## characters are replaced using the unicode replacement character. When set
## to the empty string the data is not decoded to text.
## ex: character_encoding = "utf-8"
## character_encoding = "utf-16le"
## character_encoding = "utf-16be"
## character_encoding = ""
# character_encoding = ""

## Name a tag containing the name of the file the data was parsed from. Leave empty
## to disable. Cautious when file name variation is high, this can increase the cardinality
## significantly. Read more about cardinality here:
## https://docs.influxdata.com/influxdb/cloud/reference/glossary/#series-cardinality
file_tag = "filename"

## Data format to consume.
## Each data format has its own unique set of configuration options, read
## more about them here:
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
data_format = "json"
fielddrop = ["flow_current_sleep", "flow_last_housekeeping", "flow_next_housekeeping", "flow_stop_requested",
"flow_transferConnected", "flow_transferConnectStart",
"gather.message_connected", "gather.message_disconnectLast", "gather.message_disconnectTime"
]


#data_format = "json_v2"
# [[inputs.file.json_v2]]
# [[inputs.file.json_v2.object]]
# #measurement_name = "sr3_flow"
# excluded_keys = ["current_sleep", "next_housekeeping", "stop_requested", "last_housekeeping", "transferConnected"]
# path = "flow"
# #disable_prepend_keys = true
# [[inputs.file.json_v2.object]]
# #measurement_name = "sr3_gather"
# excluded_keys = ["connected", "disconnectLast", "disconnectTime"]
# path = "gather\\.message"
# #disable_prepend_keys = true



[[processors.regex]]
#namepass = ["file"]
[[processors.regex.tags]]
key = "filename"
# subscribe_dd_all_01.metrics
pattern = '^(?P<component>cpost|cpump|flow|poll|post|report|sarra|sender|shovel|subscribe|watch|winnow)_(?P<config>.*)_(?P<instance>[0-9]{2})\.metrics$'
#pattern = '^(cpost|cpump|flow|poll|post|report|sarra|sender|shovel|subscribe|watch|winnow)_(.*)_([0-9]{2})\.metrics$'
replacement = "${1}"
result_key = "component"

[[processors.regex.tags]]
key = "filename"
# subscribe_dd_all_01.metrics
pattern = '^(?P<component>cpost|cpump|flow|poll|post|report|sarra|sender|shovel|subscribe|watch|winnow)_(?P<config>.*)_(?P<instance>[0-9]{2})\.metrics$'
#pattern = '^(cpost|cpump|flow|poll|post|report|sarra|sender|shovel|subscribe|watch|winnow)_(.*)_([0-9]{2})\.metrics$'
replacement = "${2}"
result_key = "config"

[[processors.regex.tags]]
key = "filename"
# subscribe_dd_all_01.metrics
pattern = '^(?P<component>cpost|cpump|flow|poll|post|report|sarra|sender|shovel|subscribe|watch|winnow)_(?P<config>.*)_(?P<instance>[0-9]{2})\.metrics$'
#pattern = '^(cpost|cpump|flow|poll|post|report|sarra|sender|shovel|subscribe|watch|winnow)_(.*)_([0-9]{2})\.metrics$'
replacement = "${3}"
result_key = "instance"



0 comments on commit 0de3b65

Please sign in to comment.