DataDog log gathering (#1931)

* Added datadog service log gathering

* Updated PENDING.md

* monitoring script fixes so logs don't fill the disk space
This commit is contained in:
Greg Szabo 2018-08-12 03:38:53 -04:00 committed by Ethan Buchman
parent c5fcc9b65f
commit 68c9214721
15 changed files with 796 additions and 4 deletions

View File

@ -54,6 +54,7 @@ FEATURES
* [gov] Add slashing for validators who do not vote on a proposal
* [cli] added `gov query-proposals` command to CLI. Can filter by `depositer`, `voter`, and `status`
* [core] added BaseApp.Seal - ability to seal baseapp parameters once they've been set
* [scripts] added log output monitoring to DataDog using Ansible scripts
* [gov] added TallyResult type that gets added stored in Proposal after tallying is finished
IMPROVEMENTS

View File

@ -0,0 +1,9 @@
init_config:
instances:
- collect_connection_state: true
excluded_interfaces:
- lo
- lo0
collect_rate_metrics: true
collect_count_metrics: true

View File

@ -0,0 +1,15 @@
init_config:
instances:
- name: ssh
search_string: ['ssh', 'sshd']
thresholds:
critical: [1, 5]
- name: gaiad
search_string: ['gaiad']
thresholds:
critical: [1, 1]
- name: gaiacli
search_string: ['gaiacli']
thresholds:
critical: [1, 1]

View File

@ -0,0 +1,7 @@
init_config:
instances:
- prometheus_url: http://127.0.0.1:26660
namespace: "gaiad"
metrics:
- p2p: *

View File

@ -0,0 +1,78 @@
-----BEGIN CERTIFICATE-----
MIIESTCCAzGgAwIBAgITBn+UV4WH6Kx33rJTMlu8mYtWDTANBgkqhkiG9w0BAQsF
ADA5MQswCQYDVQQGEwJVUzEPMA0GA1UEChMGQW1hem9uMRkwFwYDVQQDExBBbWF6
b24gUm9vdCBDQSAxMB4XDTE1MTAyMjAwMDAwMFoXDTI1MTAxOTAwMDAwMFowRjEL
MAkGA1UEBhMCVVMxDzANBgNVBAoTBkFtYXpvbjEVMBMGA1UECxMMU2VydmVyIENB
IDFCMQ8wDQYDVQQDEwZBbWF6b24wggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK
AoIBAQDCThZn3c68asg3Wuw6MLAd5tES6BIoSMzoKcG5blPVo+sDORrMd4f2AbnZ
cMzPa43j4wNxhplty6aUKk4T1qe9BOwKFjwK6zmxxLVYo7bHViXsPlJ6qOMpFge5
blDP+18x+B26A0piiQOuPkfyDyeR4xQghfj66Yo19V+emU3nazfvpFA+ROz6WoVm
B5x+F2pV8xeKNR7u6azDdU5YVX1TawprmxRC1+WsAYmz6qP+z8ArDITC2FMVy2fw
0IjKOtEXc/VfmtTFch5+AfGYMGMqqvJ6LcXiAhqG5TI+Dr0RtM88k+8XUBCeQ8IG
KuANaL7TiItKZYxK1MMuTJtV9IblAgMBAAGjggE7MIIBNzASBgNVHRMBAf8ECDAG
AQH/AgEAMA4GA1UdDwEB/wQEAwIBhjAdBgNVHQ4EFgQUWaRmBlKge5WSPKOUByeW
dFv5PdAwHwYDVR0jBBgwFoAUhBjMhTTsvAyUlC4IWZzHshBOCggwewYIKwYBBQUH
AQEEbzBtMC8GCCsGAQUFBzABhiNodHRwOi8vb2NzcC5yb290Y2ExLmFtYXpvbnRy
dXN0LmNvbTA6BggrBgEFBQcwAoYuaHR0cDovL2NydC5yb290Y2ExLmFtYXpvbnRy
dXN0LmNvbS9yb290Y2ExLmNlcjA/BgNVHR8EODA2MDSgMqAwhi5odHRwOi8vY3Js
LnJvb3RjYTEuYW1hem9udHJ1c3QuY29tL3Jvb3RjYTEuY3JsMBMGA1UdIAQMMAow
CAYGZ4EMAQIBMA0GCSqGSIb3DQEBCwUAA4IBAQCFkr41u3nPo4FCHOTjY3NTOVI1
59Gt/a6ZiqyJEi+752+a1U5y6iAwYfmXss2lJwJFqMp2PphKg5625kXg8kP2CN5t
6G7bMQcT8C8xDZNtYTd7WPD8UZiRKAJPBXa30/AbwuZe0GaFEQ8ugcYQgSn+IGBI
8/LwhBNTZTUVEWuCUUBVV18YtbAiPq3yXqMB48Oz+ctBWuZSkbvkNodPLamkB2g1
upRyzQ7qDn1X8nn8N8V7YJ6y68AtkHcNSRAnpTitxBKjtKPISLMVCx7i4hncxHZS
yLyKQXhw2W2Xs0qLeC1etA+jTGDK4UfLeC0SF7FSi8o5LL21L8IzApar2pR/
-----END CERTIFICATE-----
-----BEGIN CERTIFICATE-----
MIIEkjCCA3qgAwIBAgITBn+USionzfP6wq4rAfkI7rnExjANBgkqhkiG9w0BAQsF
ADCBmDELMAkGA1UEBhMCVVMxEDAOBgNVBAgTB0FyaXpvbmExEzARBgNVBAcTClNj
b3R0c2RhbGUxJTAjBgNVBAoTHFN0YXJmaWVsZCBUZWNobm9sb2dpZXMsIEluYy4x
OzA5BgNVBAMTMlN0YXJmaWVsZCBTZXJ2aWNlcyBSb290IENlcnRpZmljYXRlIEF1
dGhvcml0eSAtIEcyMB4XDTE1MDUyNTEyMDAwMFoXDTM3MTIzMTAxMDAwMFowOTEL
MAkGA1UEBhMCVVMxDzANBgNVBAoTBkFtYXpvbjEZMBcGA1UEAxMQQW1hem9uIFJv
b3QgQ0EgMTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBALJ4gHHKeNXj
ca9HgFB0fW7Y14h29Jlo91ghYPl0hAEvrAIthtOgQ3pOsqTQNroBvo3bSMgHFzZM
9O6II8c+6zf1tRn4SWiw3te5djgdYZ6k/oI2peVKVuRF4fn9tBb6dNqcmzU5L/qw
IFAGbHrQgLKm+a/sRxmPUDgH3KKHOVj4utWp+UhnMJbulHheb4mjUcAwhmahRWa6
VOujw5H5SNz/0egwLX0tdHA114gk957EWW67c4cX8jJGKLhD+rcdqsq08p8kDi1L
93FcXmn/6pUCyziKrlA4b9v7LWIbxcceVOF34GfID5yHI9Y/QCB/IIDEgEw+OyQm
jgSubJrIqg0CAwEAAaOCATEwggEtMA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/
BAQDAgGGMB0GA1UdDgQWBBSEGMyFNOy8DJSULghZnMeyEE4KCDAfBgNVHSMEGDAW
gBScXwDfqgHXMCs4iKK4bUqc8hGRgzB4BggrBgEFBQcBAQRsMGowLgYIKwYBBQUH
MAGGImh0dHA6Ly9vY3NwLnJvb3RnMi5hbWF6b250cnVzdC5jb20wOAYIKwYBBQUH
MAKGLGh0dHA6Ly9jcnQucm9vdGcyLmFtYXpvbnRydXN0LmNvbS9yb290ZzIuY2Vy
MD0GA1UdHwQ2MDQwMqAwoC6GLGh0dHA6Ly9jcmwucm9vdGcyLmFtYXpvbnRydXN0
LmNvbS9yb290ZzIuY3JsMBEGA1UdIAQKMAgwBgYEVR0gADANBgkqhkiG9w0BAQsF
AAOCAQEAYjdCXLwQtT6LLOkMm2xF4gcAevnFWAu5CIw+7bMlPLVvUOTNNWqnkzSW
MiGpSESrnO09tKpzbeR/FoCJbM8oAxiDR3mjEH4wW6w7sGDgd9QIpuEdfF7Au/ma
eyKdpwAJfqxGF4PcnCZXmTA5YpaP7dreqsXMGz7KQ2hsVxa81Q4gLv7/wmpdLqBK
bRRYh5TmOTFffHPLkIhqhBGWJ6bt2YFGpn6jcgAKUj6DiAdjd4lpFw85hdKrCEVN
0FE6/V1dN2RMfjCyVSRCnTawXZwXgWHxyvkQAiSr6w10kY17RSlQOYiypok1JR4U
akcjMS9cmvqtmg5iUaQqqcT5NJ0hGA==
-----END CERTIFICATE-----
-----BEGIN CERTIFICATE-----
MIIEdTCCA12gAwIBAgIJAKcOSkw0grd/MA0GCSqGSIb3DQEBCwUAMGgxCzAJBgNV
BAYTAlVTMSUwIwYDVQQKExxTdGFyZmllbGQgVGVjaG5vbG9naWVzLCBJbmMuMTIw
MAYDVQQLEylTdGFyZmllbGQgQ2xhc3MgMiBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0
eTAeFw0wOTA5MDIwMDAwMDBaFw0zNDA2MjgxNzM5MTZaMIGYMQswCQYDVQQGEwJV
UzEQMA4GA1UECBMHQXJpem9uYTETMBEGA1UEBxMKU2NvdHRzZGFsZTElMCMGA1UE
ChMcU3RhcmZpZWxkIFRlY2hub2xvZ2llcywgSW5jLjE7MDkGA1UEAxMyU3RhcmZp
ZWxkIFNlcnZpY2VzIFJvb3QgQ2VydGlmaWNhdGUgQXV0aG9yaXR5IC0gRzIwggEi
MA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDVDDrEKvlO4vW+GZdfjohTsR8/
y8+fIBNtKTrID30892t2OGPZNmCom15cAICyL1l/9of5JUOG52kbUpqQ4XHj2C0N
Tm/2yEnZtvMaVq4rtnQU68/7JuMauh2WLmo7WJSJR1b/JaCTcFOD2oR0FMNnngRo
Ot+OQFodSk7PQ5E751bWAHDLUu57fa4657wx+UX2wmDPE1kCK4DMNEffud6QZW0C
zyyRpqbn3oUYSXxmTqM6bam17jQuug0DuDPfR+uxa40l2ZvOgdFFRjKWcIfeAg5J
Q4W2bHO7ZOphQazJ1FTfhy/HIrImzJ9ZVGif/L4qL8RVHHVAYBeFAlU5i38FAgMB
AAGjgfAwge0wDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAYYwHQYDVR0O
BBYEFJxfAN+qAdcwKziIorhtSpzyEZGDMB8GA1UdIwQYMBaAFL9ft9HO3R+G9FtV
rNzXEMIOqYjnME8GCCsGAQUFBwEBBEMwQTAcBggrBgEFBQcwAYYQaHR0cDovL28u
c3MyLnVzLzAhBggrBgEFBQcwAoYVaHR0cDovL3guc3MyLnVzL3guY2VyMCYGA1Ud
HwQfMB0wG6AZoBeGFWh0dHA6Ly9zLnNzMi51cy9yLmNybDARBgNVHSAECjAIMAYG
BFUdIAAwDQYJKoZIhvcNAQELBQADggEBACMd44pXyn3pF3lM8R5V/cxTbj5HD9/G
VfKyBDbtgB9TxF00KGu+x1X8Z+rLP3+QsjPNG1gQggL4+C/1E2DUBc7xgQjB3ad1
l08YuW3e95ORCLp+QCztweq7dp4zBncdDQh/U90bZKuCJ/Fp1U1ervShw3WnWEQt
8jxwmKy6abaVd38PMV4s/KCHOkdp8Hlf9BRUpJVeEXgSYCfOn8J3/yNTd126/+pZ
59vPr5KW7ySaNRB6nJHGDn2Z9j8Z3/VyVOEVqQdZe4O/Ui5GjLIAZHYcSNPYeehu
VsyuLAOQ1xk4meTKCRlb/weWsKh/NEnfVqn3sF/tM+2MR7cwA130A4w=
-----END CERTIFICATE-----

View File

@ -0,0 +1,35 @@
# see "man logrotate" for details
# rotate log files weekly
daily
# keep 4 days worth of backlogs
rotate 4
# create new (empty) log files after rotating old ones
create
# use date as a suffix of the rotated file
dateext
# uncomment this if you want your log files compressed
compress
# RPM packages drop log rotation information into this directory
include /etc/logrotate.d
# no packages own wtmp and btmp -- we'll rotate them here
/var/log/wtmp {
monthly
create 0664 root utmp
minsize 1M
rotate 1
}
/var/log/btmp {
missingok
monthly
create 0600 root utmp
rotate 1
}
# system-specific logs may be also be configured here.

View File

@ -0,0 +1,13 @@
/var/log/cron
/var/log/maillog
/var/log/messages
/var/log/secure
/var/log/spooler
{
missingok
sharedscripts
postrotate
/bin/kill -HUP `cat /var/run/syslogd.pid 2> /dev/null` 2> /dev/null || true
service datadog-agent restart 2> /dev/null || true
endscript
}

View File

@ -3,3 +3,8 @@
- name: restart datadog-agent
service: name=datadog-agent state=restarted
- name: restart rsyslog
service: name=rsyslog state=restarted
- name: restart journald
service: name=systemd-journald state=restarted

View File

@ -13,3 +13,46 @@
DD_API_KEY: "{{DD_API_KEY}}"
DD_HOST_TAGS: "testnet:{{TESTNET_NAME}},cluster:{{CLUSTER_NAME}}"
- name: Set datadog.yaml config
template: src=datadog.yaml.j2 dest=/etc/datadog-agent/datadog.yaml
notify: restart datadog-agent
- name: Set metrics config
copy: src=conf.d/ dest=/etc/datadog-agent/conf.d/
notify: restart datadog-agent
- name: Disable journald rate-limiting
lineinfile: "dest=/etc/systemd/journald.conf regexp={{item.regexp}} line='{{item.line}}'"
with_items:
- { regexp: "^#RateLimitInterval", line: "RateLimitInterval=0s" }
- { regexp: "^#RateLimitBurst", line: "RateLimitBurst=0" }
- { regexp: "^#SystemMaxFileSize", line: "SystemMaxFileSize=500M" }
notify: restart journald
- name: As long as Datadog does not support journald on RPM-based linux, we enable rsyslog
yum: "name={{item}} state=installed"
with_items:
- rsyslog
- rsyslog-gnutls
#- name: Get DataDog certificate for rsyslog
# get_url: url=https://docs.datadoghq.com/crt/intake.logs.datadoghq.com.crt dest=/etc/ssl/certs/intake.logs.datadoghq.com.crt
- name: Get DataDog certificate for rsyslog
copy: src=intake.logs.datadoghq.com.crt dest=/etc/ssl/certs/intake.logs.datadoghq.com.crt
- name: Add datadog config to rsyslog
template: src=datadog.conf.j2 dest=/etc/rsyslog.d/datadog.conf mode=0600
notify: restart rsyslog
- name: Set logrotate to rotate daily so syslog does not use up all space
copy: src=logrotate.conf dest=/etc/logrotate.conf
- name: Set syslog to restart datadog-agent after logrotate
copy: src=syslog dest=/etc/logrotate.d/syslog
#semanage port -a -t syslog_tls_port_t -p tcp 10516
- name: Enable rsyslog to report to port 10516 in SELinux
seport: ports=10516 proto=tcp reload=yes setype=syslog_tls_port_t state=present
notify: restart rsyslog

View File

@ -0,0 +1,14 @@
$template DatadogFormat,"{{DD_API_KEY}} <%pri%>%protocol-version% %timestamp:::date-rfc3339% %HOSTNAME% %app-name% - - - %msg%\n"
$imjournalRatelimitInterval 0
$imjournalRatelimitBurst 0
$DefaultNetstreamDriver gtls
$DefaultNetstreamDriverCAFile /etc/ssl/certs/intake.logs.datadoghq.com.crt
$ActionSendStreamDriver gtls
$ActionSendStreamDriverMode 1
$ActionSendStreamDriverAuthMode x509/name
$ActionSendStreamDriverPermittedPeer *.logs.datadoghq.com
*.* @@intake.logs.datadoghq.com:10516;DatadogFormat

View File

@ -0,0 +1,561 @@
# The host of the Datadog intake server to send Agent data to
dd_url: https://app.datadoghq.com
# The Datadog api key to associate your Agent's data with your organization.
# Can be found here:
# https://app.datadoghq.com/account/settings
api_key: {{DD_API_KEY}}
# If you need a proxy to connect to the Internet, provide it here (default:
# disabled). You can use the 'no_proxy' list to specify hosts that should
# bypass the proxy. These settings might impact your checks requests, please
# refer to the specific check documentation for more details. Environment
# variables HTTP_PROXY, HTTPS_PROXY and NO_PROXY (coma-separated string) will
# override the values set here. See https://docs.datadoghq.com/agent/proxy/.
#
# proxy:
# http: http(s)://user:password@proxy_for_http:port
# https: http(s)://user:password@proxy_for_https:port
# no_proxy:
# - host1
# - host2
# Setting this option to "yes" will tell the agent to skip validation of SSL/TLS certificates.
# This may be necessary if the agent is running behind a proxy. See this page for details:
# https://github.com/DataDog/dd-agent/wiki/Proxy-Configuration#using-haproxy-as-a-proxy
# skip_ssl_validation: no
# Setting this option to "yes" will force the agent to only use TLS 1.2 when
# pushing data to the url specified in "dd_url".
# force_tls_12: no
# Force the hostname to whatever you want. (default: auto-detected)
# hostname: mymachine.mydomain
# Make the agent use "hostname -f" on unix-based systems as a last resort
# way of determining the hostname instead of Golang "os.Hostname()"
# This will be enabled by default in version 6.4
# More information at https://dtdg.co/flag-hostname-fqdn
# hostname_fqdn: false
# Set the host's tags (optional)
tags: ['testnet:{{TESTNET_NAME}}','cluster:{{CLUSTER_NAME}}']
# - mytag
# - env:prod
# - role:database
# Histogram and Historate configuration
#
# Configure which aggregated value to compute. Possible values are: min, max,
# median, avg, sum and count.
#
# histogram_aggregates: ["max", "median", "avg", "count"]
#
# Configure which percentiles will be computed. Must be a list of float
# between 0 and 1.
# Warning: percentiles must be specified as yaml strings
#
# histogram_percentiles: ["0.95"]
#
# Copy histogram values to distributions for true global distributions (in beta)
# This will increase the number of custom metrics created
# histogram_copy_to_distribution: false
#
# A prefix to add to distribution metrics created when histogram_copy_to_distributions is true
# histogram_copy_to_distribution_prefix: ""
# Forwarder timeout in seconds
# forwarder_timeout: 20
# The forwarder retries failed requests. Use this setting to change the
# maximum length of the forwarder's retry queue (each request in the queue
# takes no more than 2MB in memory)
# forwarder_retry_queue_max_size: 30
# The number of workers used by the forwarder. Please note each worker will
# open an outbound HTTP connection towards Datadog's metrics intake at every
# flush.
# forwarder_num_workers: 1
# Collect AWS EC2 custom tags as agent tags
collect_ec2_tags: true
# The path containing check configuration files
# By default, uses the conf.d folder located in the agent configuration folder.
# confd_path:
# Additional path where to search for Python checks
# By default, uses the checks.d folder located in the agent configuration folder.
# additional_checksd:
# The port for the go_expvar server
# expvar_port: 5000
# The port on which the IPC api listens
# cmd_port: 5001
# The port for the browser GUI to be served
# Setting 'GUI_port: -1' turns off the GUI completely
# Default is '5002' on Windows and macOS ; turned off on Linux
# GUI_port: -1
# The Agent runs workers in parallel to execute checks. By default the number
# of workers is set to 1. If set to 0 the agent will automatically determine
# the best number of runners needed based on the number of checks running. This
# would optimize the check collection time but may produce CPU spikes.
# check_runners: 1
# Metadata collection should always be enabled, except if you are running several
# agents/dsd instances per host. In that case, only one agent should have it on.
# WARNING: disabling it on every agent will lead to display and billing issues
# enable_metadata_collection: true
# Enable the gohai collection of systems data
# enable_gohai: true
# IPC api server timeout in seconds
# server_timeout: 15
# Some environments may have the procfs file system mounted in a miscellaneous
# location. The procfs_path configuration parameter provides a mechanism to
# override the standard default location: '/proc' - this setting will trickle
# down to integrations and affect their behavior if they rely on the psutil
# python package.
# procfs_path: /proc
# BETA: Encrypted Secrets (Linux only)
#
# This feature is in beta and its options or behaviour might break between
# minor or bugfix releases of the Agent.
#
# The agent can call an external command to fetch secrets. The command will be
# executed maximum once per instance containing an encrypted password.
# Secrets are cached by the agent, this will avoid executing again the
# secret_backend_command to fetch an already known secret (useful when combine
# with Autodiscovery). This feature is still in beta.
#
# For more information see: https://github.com/DataDog/datadog-agent/blob/master/docs/agent/secrets.md
#
# Path to the script to execute. The script must belong to the same user used
# to run the agent. Executable right must be given to the agent and no rights
# for 'group' or 'other'.
# secret_backend_command: /path/to/command
#
# A list of arguments to give to the command at each run (optional)
# secret_backend_arguments:
# - argument1
# - argument2
#
# The size in bytes of the buffer used to store the command answer (apply to
# both stdout and stderr)
# secret_backend_output_max_size: 1024
#
# The timeout to execute the command in second
# secret_backend_timeout: 5
# Metadata providers, add or remove from the list to enable or disable collection.
# Intervals are expressed in seconds. You can also set a provider's interval to 0
# to disable it.
# metadata_providers:
# - name: k8s
# interval: 60
# DogStatsd
#
# If you don't want to enable the DogStatsd server, set this option to no
# use_dogstatsd: yes
#
# Make sure your client is sending to the same UDP port
# dogstatsd_port: 8125
#
# The host to bind to receive external metrics (used only by the dogstatsd
# server for now). For dogstatsd this is ignored if
# 'dogstatsd_non_local_traffic' is set to true
# bind_host: localhost
#
# Dogstatsd can also listen for metrics on a Unix Socket (*nix only).
# Set to a valid filesystem path to enable.
# dogstatsd_socket: /var/run/dogstatsd/dsd.sock
#
# When using Unix Socket, dogstatsd can tag metrics with container metadata.
# If running dogstatsd in a container, host PID mode (e.g. with --pid=host) is required.
# dogstatsd_origin_detection: false
#
# The buffer size use to receive statsd packet, in bytes
# dogstatsd_buffer_size: 1024
#
# Whether dogstatsd should listen to non local UDP traffic
# dogstatsd_non_local_traffic: no
#
# Publish dogstatsd's internal stats as Go expvars
# dogstatsd_stats_enable: no
#
# How many items in the dogstatsd's stats circular buffer
# dogstatsd_stats_buffer: 10
#
# The port for the go_expvar server
# dogstatsd_stats_port: 5000
#
# The number of bytes allocated to dogstatsd's socket receive buffer (POSIX
# system only). By default, this value is set by the system. If you need to
# increase the size of this buffer but keep the OS default value the same, you
# can set dogstatsd's receive buffer size here. The maximum accepted value
# might change depending on the OS.
# dogstatsd_so_rcvbuf:
#
# If you want to forward every packet received by the dogstatsd server
# to another statsd server, uncomment these lines.
# WARNING: Make sure that forwarded packets are regular statsd packets and not "dogstatsd" packets,
# as your other statsd server might not be able to handle them.
# statsd_forward_host: address_of_own_statsd_server
# statsd_forward_port: 8125
#
# If you want all statsd metrics coming from this host to be namespaced
# you can configure the namspace below. Each metric received will be prefixed
# with the namespace before it's sent to Datadog.
# statsd_metric_namespace:
# Logs agent
#
# Logs agent is disabled by default
logs_enabled: true
#
# Enable logs collection for all containers, disabled by default
# logs_config:
# container_collect_all: false
#
# JMX
#
# jmx_pipe_path:
# jmx_pipe_name: dd-auto_discovery
#
# If you only run Autodiscovery tests, jmxfetch might fail to pick up custom_jar_paths
# set in the check templates. If that is the case, you can force custom jars here.
# jmx_custom_jars:
# - /jmx-jars/jboss-cli-client.jar
#
# When running in a memory cgroup, openjdk 8u131 and higher can automatically adjust
# its heap memory usage in accordance to the cgroup/container's memory limit.
# Default is false: we'll set a Xmx of 200MB if none is configured.
# Note: older openjdk versions and other jvms might fail to start if this option is set
#
# jmx_use_cgroup_memory_limit: true
#
# Autoconfig
#
# Directory containing configuration templates
# autoconf_template_dir: /datadog/check_configs
#
# The providers the Agent should call to collect checks configurations.
# Please note the File Configuration Provider is enabled by default and cannot
# be configured.
# config_providers:
## The kubelet provider handles templates embedded in pod annotations, see
## https://docs.datadoghq.com/guides/autodiscovery/#template-source-kubernetes-pod-annotations
# - name: kubelet
# polling: true
## The docker provider handles templates embedded in container labels, see
## https://docs.datadoghq.com/guides/autodiscovery/#template-source-docker-label-annotations
# - name: docker
# polling: true
# - name: etcd
# polling: true
# template_dir: /datadog/check_configs
# template_url: http://127.0.0.1
# username:
# password:
# - name: consul
# polling: true
# template_dir: /datadog/check_configs
# template_url: http://127.0.0.1
# ca_file:
# ca_path:
# cert_file:
# key_file:
# username:
# password:
# token:
# - name: zookeeper
# polling: true
# template_dir: /datadog/check_configs
# template_url: 127.0.0.1
# username:
# password:
# Logging
#
# log_level: info
# log_file: /var/log/datadog/agent.log
# Set to 'yes' to output logs in JSON format
# log_format_json: no
# Set to 'no' to disable logging to stdout
# log_to_console: yes
# Set to 'yes' to disable logging to the log file
# disable_file_logging: no
# Set to 'yes' to enable logging to syslog.
#
# log_to_syslog: no
#
# If 'syslog_uri' is left undefined/empty, a local domain socket connection will be attempted
#
# syslog_uri:
#
# Set to 'yes' to output in an RFC 5424-compliant format
#
# syslog_rfc: no
#
# If TLS enabled, you must specify a path to a PEM certificate here
#
# syslog_pem: /path/to/certificate.pem
#
# If TLS enabled, you must specify a path to a private key here
#
# syslog_key: /path/to/key.pem
#
# If TLS enabled, you may enforce TLS verification here (defaults to true)
#
# syslog_tls_verify: yes
#
# Autodiscovery
#
# Change the root directory to look at to get cgroup statistics. Useful when running inside a
# container with host directories mounted on a different folder.
# Default if environment variable "DOCKER_DD_AGENT" is set to "yes"
# "/host/sys/fs/cgroup" and "/sys/fs/cgroup" if not.
#
# container_cgroup_root: /host/sys/fs/cgroup/
#
# Change the root directory to look at to get proc statistics. Useful when running inside a
# container with host directories mounted on a different folder.
# Default if environment variable "DOCKER_DD_AGENT" is set to "yes"
# "/host/proc" and "/proc" if not.
#
# container_proc_root: /host/proc
#
# Choose "auto" if you want to let the agent find any relevant listener on your host
# At the moment, the only auto listener supported is docker
# If you have already set docker anywhere in the listeners, the auto listener is ignored
# listeners:
# - name: auto
# - name: docker
#
# Exclude containers from metrics and AD based on their name or image:
# An excluded container will not get any individual container metric reported for it.
# Please note that the `docker.containers.running`, `.stopped`, `.running.total` and
# `.stopped.total` metrics are not affected by these settings and always count all
# containers. This does not affect your per-container billing.
#
# How it works: include first.
# If a container matches an exclude rule, it won't be included unless it first matches an include rule.
#
# Rules are regexp.
#
# Examples:
# exclude all, except containers based on the 'ubuntu' image or the 'debian' image.
# ac_exclude: ["image:.*"]
# ac_include: ["image:ubuntu", "image:debian"]
#
# include all, except containers based on the 'ubuntu' image.
# ac_exclude: ["image:ubuntu"]
# ac_include: []
#
# exclude all debian images except containers with a name starting with 'frontend'.
# ac_exclude: ["image:debian"]
# ac_include: ["name:frontend.*"]
#
# ac_exclude: []
# ac_include: []
#
#
# Exclude default pause containers from orchestrators.
#
# By default the agent will not monitor kubernetes/openshift pause
# container. They will still be counted in the container count (just like
# excluded containers) since ignoring them would give a wrong impression
# about the docker daemon load.
#
# exclude_pause_container: true
# Exclude default containers from DockerCloud:
# The following configuration will instruct the agent to ignore the containers from Docker Cloud.
# You can remove the ones you want to collect.
# ac_exclude: ["image:dockercloud/network-daemon","image:dockercloud/cleanup","image:dockercloud/logrotate","image:dockercloud/events","image:dockercloud/ntpd"]
# ac_include: []
#
# You can also use the regex to ignore them all:
# ac_exclude: ["image:dockercloud/*"]
# ac_include: []
#
# The default timeout value when connecting to the docker daemon
# is 5 seconds. It can be configured with this option.
# docker_query_timeout: 5
#
# Docker tag extraction
#
# We can extract container label or environment variables
# as metric tags. If you prefix your tag name with +, it
# will only be added to high cardinality metrics (docker check)
#
# docker_labels_as_tags:
# label_name: tag_name
# high_cardinality_label_name: +tag_name
# docker_env_as_tags:
# ENVVAR_NAME: tag_name
#
# Example:
# docker_labels_as_tags:
# com.docker.compose.service: service_name
# com.docker.compose.project: +project_name
#
# Kubernetes tag extraction
#
# We can extract pod labels and annotations as metric tags. If you prefix your
# tag name with +, it will only be added to high cardinality metrics
#
# kubernetes_pod_labels_as_tags:
# app: kube_app
# pod-template-hash: +kube_pod-template-hash
#
# kubernetes_pod_annotations_as_tags:
# app: kube_app
# pod-template-hash: +kube_pod-template-hash
#
# ECS integration
#
# URL where the ECS agent can be found. Standard cases will be autodetected.
# ecs_agent_url: http://localhost:51678
#
# Kubernetes kubelet connectivity
#
# The kubelet host and port should be autodetected when running inside a pod.
# If you run into connectivity issues, you can set these options according to
# your cluster setup:
# kubernetes_kubelet_host: autodetected
# kubernetes_http_kubelet_port: 10255
# kubernetes_https_kubelet_port: 10250
#
# When using HTTPS, we verify the kubelet's certificate, you can tune this:
# kubelet_tls_verify: true
# kubelet_client_ca: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
#
# If authentication is needed, the agent will use the pod's serviceaccount's
# credentials. If you want to use a different account, or are running the agent
# on the host, you can set the credentials to use here:
# kubelet_auth_token_path: /path/to/file
# kubelet_client_crt: /path/to/key
# kubelet_client_key: /path/to/key
#
# Kubernetes apiserver integration
#
# When running in a pod, the agent will automatically use the pod's serviceaccount
# to authenticate with the apiserver. If you wish to install the agent out of a pod
# or customise connection parameters, you can provide the path to a KubeConfig file
# see https://kubernetes.io/docs/tasks/access-application-cluster/configure-access-multiple-clusters/
#
# kubernetes_kubeconfig_path: /path/to/file
#
# In order to collect Kubernetes service names, the agent needs certain rights (see RBAC documentation in
# [docker readme](https://github.com/DataDog/datadog-agent/blob/master/Dockerfiles/agent/README.md#kubernetes)).
# You can disable this option or set how often (in seconds) the agent refreshes the internal mapping of services to
# ContainerIDs with the following options:
# kubernetes_collect_metadata_tags: true
# kubernetes_metadata_tag_update_freq: 60
# kubernetes_apiserver_client_timeout: 10
# kubernetes_apiserver_poll_freq: 30
#
# To collect Kubernetes events, leader election must be enabled and collect_kubernetes_events set to true.
# Only the leader will collect events. More details about events [here](https://github.com/DataDog/datadog-agent/blob/master/Dockerfilesagent/README.md#event-collection).
# collect_kubernetes_events: false
#
#
# Leader Election settings, more details about leader election [here](https://github.com/DataDog/datadog-agent/blob/master/Dockerfilesagent/README.md#leader-election)
# To enable the leader election on this node, set the leader_election variable to true.
# leader_election: false
# The leader election lease is an integer in seconds.
# leader_lease_duration: 60
#
# Node labels that should be collected and their name in host tags. Off by default.
# Some of these labels are redundant with metadata collected by
# cloud provider crawlers (AWS, GCE, Azure)
#
# kubernetes_node_labels_as_tags:
# kubernetes.io/hostname: nodename
# beta.kubernetes.io/os: os
# Process agent specific settings
#
process_config:
# A string indicating the enabled state of the Process Agent.
# If "false" (the default) it will only collect containers.
# If "true" it will collect containers and processes.
# If "disabled" it will be disabled altogether and won't start.
enabled: "true"
# The full path to the file where process-agent logs will be written.
# log_file:
# The interval, in seconds, at which we will run each check. If you want consistent
# behavior between real-time you may set the Container/ProcessRT intervals to 10.
# Defaults to 10s for normal checks and 2s for others.
# intervals:
# container:
# container_realtime:
# process:
# process_realtime:
# A list of regex patterns that will exclude a process if matched.
# blacklist_patterns:
# How many check results to buffer in memory when POST fails. The default is usually fine.
# queue_size:
# The maximum number of file descriptors to open when collecting net connections.
# Only change if you are running out of file descriptors from the Agent.
# max_proc_fds:
# The maximum number of processes or containers per message.
# Only change if the defaults are causing issues.
# max_per_message:
# Overrides the path to the Agent bin used for getting the hostname. The default is usually fine.
# dd_agent_bin:
# Overrides of the environment we pass to fetch the hostname. The default is usually fine.
# dd_agent_env:
# Trace Agent Specific Settings
#
# apm_config:
# Whether or not the APM Agent should run
# enabled: true
# The environment tag that Traces should be tagged with
# Will inherit from "env" tag if none is applied here
# env: none
# The port that the Receiver should listen on
# receiver_port: 8126
# Whether the Trace Agent should listen for non local traffic
# Only enable if Traces are being sent to this Agent from another host/container
# apm_non_local_traffic: false
# Extra global sample rate to apply on all the traces
# This sample rate is combined to the sample rate from the sampler logic, still promoting interesting traces
# From 1 (no extra rate) to 0 (don't sample at all)
# extra_sample_rate: 1.0
# Maximum number of traces per second to sample.
# The limit is applied over an average over a few minutes ; much bigger spikes are possible.
# Set to 0 to disable the limit.
# max_traces_per_second: 10
# A blacklist of regular expressions can be provided to disable certain traces based on their resource name
# all entries must be surrounded by double quotes and separated by commas
# Example: ["(GET|POST) /healthcheck", "GET /V1"]
# ignore_resources: []

View File

@ -3,4 +3,3 @@
- name: restart journald
service: name=systemd-journald state=restarted

View File

@ -15,6 +15,3 @@
command: "systemd-tmpfiles --create --prefix /var/log/journal"
notify: restart journald
#- name: Ability to get the core dump on SIGABRT
# shell: "ulimit -c unlimited"

View File

@ -0,0 +1,13 @@
---
# Set the core file size to unlimited to allow the system to generate core dumps
- hosts: all
any_errors_fatal: true
gather_facts: no
tasks:
- name: Set core file size to unlimited to be able to get the core dump on SIGABRT
shell: "ulimit -c unlimited"

View File

@ -1,5 +1,7 @@
---
#DD_API_KEY
- hosts: all
any_errors_fatal: true
gather_facts: no