Merge PR #6399: SDK Telemetry

This commit is contained in:
Alexander Bezobchuk 2020-06-16 11:11:02 -04:00 committed by GitHub
parent e2f336345d
commit 8ee7d1f403
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 492 additions and 89 deletions

View File

@ -99,7 +99,7 @@ func (ctx Context) queryABCI(req abci.RequestQuery) (abci.ResponseQuery, error)
}
// data from trusted node or subspace query doesn't need verification
if ctx.TrustNode || !isQueryStoreWithProof(req.Path) {
if !opts.Prove || !isQueryStoreWithProof(req.Path) {
return result.Response, nil
}

View File

@ -10,85 +10,145 @@ Proposed
## Context
There has been discussion around exposing more metrics to users and node operators about the application. Currently there is only a way to expose metrics from Tendermint and not the application itself. To bring more visibility into applications, I would like to propose reporting of metrics through [Prometheus](https://prometheus.io/).
Telemetry is paramount into debugging and understanding what the application is doing and how it is
performing. We aim to expose metrics from modules and other core parts of the Cosmos SDK.
Extending `AppModuleBasic` to support registering of metrics would enable developers to see more information about individual modules.
In addition, we should aim to support multiple configurable sinks that an operator may choose from.
By default, when telemetry is enabled, the application should track and expose metrics that are
stored in-memory. The operator may choose to enable additional sinks, where we support only
[Prometheus](https://prometheus.io/) for now, as it's battle-tested, simple to setup, open source,
and is rich with ecosystem tooling.
```go
type AppModuleBasic interface {
Name() string
RegisterCodec(*codec.Codec)
RegisterMetrics(namespace string, labelsAndValues... string) *Metrics
We must also aim to integrate metrics into the Cosmos SDK in the most seamless way possible such that
metrics may be added or removed at will and without much friction. To do this, we will use the
[go-metrics](https://github.com/armon/go-metrics) library.
// genesis
DefaultGenesis() json.RawMessage
ValidateGenesis(json.RawMessage) error
// client functionality
RegisterRESTRoutes(client.Context, *mux.Router)
GetTxCmd(*codec.Codec) *cobra.Command
GetQueryCmd(*codec.Codec) *cobra.Command
}
// .....
func (bm BasicManager) RegisterMetrics(appName string, labelsAndValues... string) MetricsProvider {
for _, b := range bm {
b.CreateMetrics(appName, labelsAndValues)
}
}
```
Each module can define its own `Metrics` type and`CreateMetrics` function in the x/\<module\>/observability/metrics.go file:
```go
type Metrics struct {
Size metrics.Guage
Transactions metrics.Counter
}
func CreateMetrics(namespace string, labelsAndValues... string) *Metrics {
labels := make([]string, len(labelsAndValues/2))
for i := 0; i < len(labelsAndValues); i += 2 {
labels[i/2] = labelsAndValues[i]
}
return &Metrics{
Size: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: namespace,
Subsystem: "subsystem",
Name: "size",
Help: "Size of the custom metric",
}, labels).With(labelsAndValues...),
Transactions: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
Namespace: namespace,
Subsystem: "subsystem",
Name: "transactions",
Help: "Number of transactions processed",
}, labels).With(labelsAndValues...),
}
```
To get the correct namespace for the modules changing `BasicManager` to consist of the app name is needed.
```go
type BasicManager struct {
appName string
modules map[string]AppModuleBasic
}
```
Finally, operators may enable telemetry along with specific configuration options. If enabled, metrics
will be exposed via `/metrics?format={text|prometheus}` via the API server.
## Decision
- Use Prometheus for metric gathering.
- Add a method to register metrics to the `AppModuleBasic` interface
- Modules create a observability/metrics.go that defines the metrics and create the metrics object.
We will add an additional configuration block to `app.toml` that defines telemetry settings:
```toml
###############################################################################
### Telemetry Configuration ###
###############################################################################
[telemetry]
# Prefixed with keys to separate services
service-name = {{ .Telemetry.ServiceName }}
# Enabled enables the application telemetry functionality. When enabled,
# an in-memory sink is also enabled by default. Operators may also enabled
# other sinks such as Prometheus.
enabled = {{ .Telemetry.Enabled }}
# Enable prefixing gauge values with hostname
enable-hostname = {{ .Telemetry.EnableHostname }}
# Enable adding hostname to labels
enable-hostname-label = {{ .Telemetry.EnableHostnameLabel }}
# Enable adding service to labels
enable-service-label = {{ .Telemetry.EnableServiceLabel }}
# PrometheusRetentionTime, when positive, enables a Prometheus metrics sink.
prometheus-retention-time = {{ .Telemetry.PrometheusRetentionTime }}
```
The given configuration allows for two sinks -- in-memory and Prometheus. We create a `Metrics`
type that performs all the bootstrapping for the operator, so capturing metrics becomes seamless.
```go
// Metrics defines a wrapper around application telemetry functionality. It allows
// metrics to be gathered at any point in time. When creating a Metrics object,
// internally, a global metrics is registered with a set of sinks as configured
// by the operator. In addition to the sinks, when a process gets a SIGUSR1, a
// dump of formatted recent metrics will be sent to STDERR.
type Metrics struct {
memSink *metrics.InmemSink
prometheusEnabled bool
}
// Gather collects all registered metrics and returns a GatherResponse where the
// metrics are encoded depending on the type. Metrics are either encoded via
// Prometheus or JSON if in-memory.
func (m *Metrics) Gather(format string) (GatherResponse, error) {
switch format {
case FormatPrometheus:
return m.gatherPrometheus()
case FormatText:
return m.gatherGeneric()
case FormatDefault:
return m.gatherGeneric()
default:
return GatherResponse{}, fmt.Errorf("unsupported metrics format: %s", format)
}
}
```
In addition, `Metrics` allows us to gather the current set of metrics at any given point in time. An
operator may also choose to send a signal, SIGUSR1, to dump and print formatted metrics to STDERR.
During an application's bootstrapping and construction phase, if `Telemetry.Enabled` is `true`, the
API server will create an instance of a reference to `Metrics` object and will register a metrics
handler accordingly.
```go
func (s *Server) Start(cfg config.Config) error {
// ...
if cfg.Telemetry.Enabled {
m, err := telemetry.New(cfg.Telemetry)
if err != nil {
return err
}
s.metrics = m
s.registerMetrics()
}
// ...
}
func (s *Server) registerMetrics() {
metricsHandler := func(w http.ResponseWriter, r *http.Request) {
format := strings.TrimSpace(r.FormValue("format"))
gr, err := s.metrics.Gather(format)
if err != nil {
rest.WriteErrorResponse(w, http.StatusBadRequest, fmt.Sprintf("failed to gather metrics: %s", err))
return
}
w.Header().Set("Content-Type", gr.ContentType)
_, _ = w.Write(gr.Metrics)
}
s.Router.HandleFunc("/metrics", metricsHandler).Methods("GET")
}
```
Application developers may track counters, gauges, summaries, and key/value metrics. There is no
additional lifting required by modules to leverage profiling metrics. To do so, it's as simple as:
```go
func (k BaseKeeper) MintCoins(ctx sdk.Context, moduleName string, amt sdk.Coins) error {
defer metrics.MeasureSince([]string{"MintCoins"}, time.Now().UTC())
// ...
}
```
## Consequences
### Positive
- Add more visibility into SDK based application and modules
- Exposure into the performance and behavior of an application
### Negative

3
go.mod
View File

@ -2,6 +2,7 @@ module github.com/cosmos/cosmos-sdk
require (
github.com/99designs/keyring v1.1.5
github.com/armon/go-metrics v0.3.3
github.com/bgentry/speakeasy v0.1.0
github.com/btcsuite/btcd v0.20.1-beta
github.com/btcsuite/btcutil v1.0.2
@ -21,6 +22,8 @@ require (
github.com/otiai10/copy v1.2.0
github.com/pelletier/go-toml v1.8.0
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.6.0
github.com/prometheus/common v0.10.0
github.com/rakyll/statik v0.1.7
github.com/regen-network/cosmos-proto v0.3.0
github.com/spf13/afero v1.2.2 // indirect

24
go.sum
View File

@ -20,6 +20,7 @@ github.com/ChainSafe/go-schnorrkel v0.0.0-20200102211924-4bcbc698314f h1:4O1om+U
github.com/ChainSafe/go-schnorrkel v0.0.0-20200102211924-4bcbc698314f/go.mod h1:URdX5+vg25ts3aCh8H5IFZybJYKWhJHYMTnf+ULtoC4=
github.com/ChainSafe/go-schnorrkel v0.0.0-20200405005733-88cbf1b4c40d h1:nalkkPQcITbvhmL4+C4cKA87NW0tfm3Kl9VXRoPywFg=
github.com/ChainSafe/go-schnorrkel v0.0.0-20200405005733-88cbf1b4c40d/go.mod h1:URdX5+vg25ts3aCh8H5IFZybJYKWhJHYMTnf+ULtoC4=
github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=
github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0=
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo=
@ -38,7 +39,10 @@ github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb
github.com/apache/thrift v0.13.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ=
github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da h1:8GUt8eRujhVEGZFFEjBj46YV4rDjvGrNxb0KMWYkL2I=
github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY=
github.com/armon/go-metrics v0.3.3 h1:a9F4rlj7EWWrbj7BYw8J8+x+ZZkJeqzNyRk8hdPF+ro=
github.com/armon/go-metrics v0.3.3/go.mod h1:4O98XIr/9W0sxpJ8UaYkvjk10Iff7SnFrb4QAOwNTFc=
github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
github.com/aryann/difflib v0.0.0-20170710044230-e206f873d14a/go.mod h1:DAHtR1m6lCRdSC2Tm3DSWRPvIPr6xNKyeHdqDQSQT+A=
github.com/aws/aws-lambda-go v1.13.3/go.mod h1:4UKl9IzQMoD+QF79YdCuzCwp8VbmG4VAQwij/eHl5CU=
@ -73,6 +77,8 @@ github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko=
github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY=
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag=
github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I=
github.com/clbanning/x2j v0.0.0-20191024224557-825249438eec/go.mod h1:jMjuTZXRI4dUb/I5gc9Hdhagfvm9+RyrPryS/auMzxE=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
@ -235,14 +241,18 @@ github.com/hashicorp/consul/api v1.3.0/go.mod h1:MmDNSzIMUjNpY/mQ398R4bk2FnqQLoP
github.com/hashicorp/consul/sdk v0.1.1/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8=
github.com/hashicorp/consul/sdk v0.3.0/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
github.com/hashicorp/go-immutable-radix v1.0.0 h1:AKDB1HM5PWEA7i4nhcpwOrO2byshxBjXVn/J/3+z5/0=
github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=
github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM=
github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk=
github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs=
github.com/hashicorp/go-rootcerts v1.0.0/go.mod h1:K6zTfqpRlCUIjkwsN4Z+hiSfzSTQa6eBIzfwKfwNnHU=
github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU=
github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4=
github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/go-uuid v1.0.1 h1:fv1ep09latC32wFoVwnqcnKJGnMSdBanPczbHAYm1BE=
github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/go-version v1.2.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
github.com/hashicorp/go.net v0.0.1/go.mod h1:hjKkEWcCURg++eb33jQU7oqQcI9XDCnUzHA0oac0k90=
@ -360,7 +370,10 @@ github.com/otiai10/mint v1.3.0/go.mod h1:F5AjcsTsWUqX+Na9fpHb52P8pcRX2CI6A3ctIT9
github.com/otiai10/mint v1.3.1 h1:BCmzIS3n71sGfHB5NMNDB3lHYPz8fWSkCAErHed//qc=
github.com/otiai10/mint v1.3.1/go.mod h1:/yxELlJQ0ufhjUwhshSj+wFjZ78CnZ48/1wtmBH1OTc=
github.com/pact-foundation/pact-go v1.0.4/go.mod h1:uExwJY4kCzNPcHRj+hCR/HBbOOIwwtUjcrb0b5/5kLM=
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c h1:Lgl0gzECD8GnQ5QCWA8o6BtfL6mDH5rQgM4/fX3avOs=
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY=
github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k=
github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
github.com/pelletier/go-toml v1.8.0 h1:Keo9qb7iRJs2voHvunFtuuYFsbWeOBh8/P9v/kVMFtw=
@ -381,10 +394,13 @@ github.com/prometheus/client_golang v0.9.3-0.20190127221311-3c4408c8b829/go.mod
github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso=
github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
github.com/prometheus/client_golang v1.3.0/go.mod h1:hJaj2vgQTGQmVCsAACORcieXFeDPbaTKGT+JTgUa3og=
github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU=
github.com/prometheus/client_golang v1.5.0 h1:Ctq0iGpCmr3jeP77kbF2UxgvRwzWWz+4Bh9/vJTyg1A=
github.com/prometheus/client_golang v1.5.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU=
github.com/prometheus/client_golang v1.5.1 h1:bdHYieyGlH+6OLEk2YQha8THib30KP0/yD0YH9m6xcA=
github.com/prometheus/client_golang v1.5.1/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU=
github.com/prometheus/client_golang v1.6.0 h1:YVPodQOcK15POxhgARIvnDRVpLcuK8mglnMrWfyrw6A=
github.com/prometheus/client_golang v1.6.0/go.mod h1:ZLOG9ck3JLRdB5MgO8f+lLTe83AXG6ro35rLTxvnIl4=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20190115171406-56726106282f/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
@ -399,12 +415,16 @@ github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y8
github.com/prometheus/common v0.7.0/go.mod h1:DjGbpBbp5NYNiECxcL/VnbXCCaQpKd3tt26CguLLsqA=
github.com/prometheus/common v0.9.1 h1:KOMtN28tlbam3/7ZKEYKHhKoJZYYj3gMH4uc62x7X7U=
github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4=
github.com/prometheus/common v0.10.0 h1:RyRA7RzGXQZiW+tGMr7sxa85G1z0yOpM1qq5c8lNawc=
github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo=
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/prometheus/procfs v0.0.0-20190117184657-bf6a532e95b1/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
github.com/prometheus/procfs v0.0.8 h1:+fpWZdT24pJBiqJdAwYBjPSk+5YmQzYNPYzQsdzLkt8=
github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A=
github.com/prometheus/procfs v0.0.11 h1:DhHlBtkHWPYi8O2y31JkK0TF+DGM+51OopZjH/Ia5qI=
github.com/prometheus/procfs v0.0.11/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU=
github.com/rakyll/statik v0.1.7 h1:OF3QCZUuyPxuGEP7B4ypUa7sB/iHtqOTDYZXGM8KOdQ=
github.com/rakyll/statik v0.1.7/go.mod h1:AlZONWzMtEnMs7W4e/1LURLiI49pIMmp6V9Unghqrcc=
@ -497,6 +517,7 @@ github.com/tendermint/tm-db v0.5.1 h1:H9HDq8UEA7Eeg13kdYckkgwwkQLBnJGgX4PgLJRhie
github.com/tendermint/tm-db v0.5.1/go.mod h1:g92zWjHpCYlEvQXvy9M168Su8V1IBEeawpXVVBaK4f4=
github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM=
github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc=
github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
@ -607,11 +628,14 @@ golang.org/x/sys v0.0.0-20190712062909-fae7ac547cb7/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191220142924-d4481acd189f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200122134326-e047566fdf82 h1:ywK/j/KkyTHcdyYSZNXGjMwgmDSfjglYZ3vStQ/gSCU=
golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd h1:xhmwyvizuTgC2qz7ZlMluP20uW+C3Rm0FD/WLDX8884=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f h1:gWF768j/LaZugp8dyS4UwsslYCYz9XgFxvlgsn0n9H8=
golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=

View File

@ -1,9 +1,11 @@
package api
import (
"fmt"
"net"
"net/http"
"os"
"strings"
"time"
"github.com/gorilla/handlers"
@ -14,6 +16,8 @@ import (
"github.com/cosmos/cosmos-sdk/client"
"github.com/cosmos/cosmos-sdk/server/config"
"github.com/cosmos/cosmos-sdk/telemetry"
"github.com/cosmos/cosmos-sdk/types/rest"
// unnamed import of statik for swagger UI support
_ "github.com/cosmos/cosmos-sdk/client/docs/statik"
@ -25,6 +29,7 @@ type Server struct {
ClientCtx client.Context
logger log.Logger
metrics *telemetry.Metrics
listener net.Listener
}
@ -40,18 +45,28 @@ func New(clientCtx client.Context) *Server {
// JSON RPC server. Configuration options are provided via config.APIConfig
// and are delegated to the Tendermint JSON RPC server. The process is
// non-blocking, so an external signal handler must be used.
func (s *Server) Start(cfg config.APIConfig) error {
if cfg.Swagger {
func (s *Server) Start(cfg config.Config) error {
if cfg.API.Swagger {
s.registerSwaggerUI()
}
tmCfg := tmrpcserver.DefaultConfig()
tmCfg.MaxOpenConnections = int(cfg.MaxOpenConnections)
tmCfg.ReadTimeout = time.Duration(cfg.RPCReadTimeout) * time.Second
tmCfg.WriteTimeout = time.Duration(cfg.RPCWriteTimeout) * time.Second
tmCfg.MaxBodyBytes = int64(cfg.RPCMaxBodyBytes)
if cfg.Telemetry.Enabled {
m, err := telemetry.New(cfg.Telemetry)
if err != nil {
return err
}
listener, err := tmrpcserver.Listen(cfg.Address, tmCfg)
s.metrics = m
s.registerMetrics()
}
tmCfg := tmrpcserver.DefaultConfig()
tmCfg.MaxOpenConnections = int(cfg.API.MaxOpenConnections)
tmCfg.ReadTimeout = time.Duration(cfg.API.RPCReadTimeout) * time.Second
tmCfg.WriteTimeout = time.Duration(cfg.API.RPCWriteTimeout) * time.Second
tmCfg.MaxBodyBytes = int64(cfg.API.RPCMaxBodyBytes)
listener, err := tmrpcserver.Listen(cfg.API.Address, tmCfg)
if err != nil {
return err
}
@ -59,7 +74,7 @@ func (s *Server) Start(cfg config.APIConfig) error {
s.listener = listener
var h http.Handler = s.Router
if cfg.EnableUnsafeCORS {
if cfg.API.EnableUnsafeCORS {
return tmrpcserver.Serve(s.listener, handlers.CORS()(h), s.logger, tmCfg)
}
@ -75,3 +90,20 @@ func (s *Server) registerSwaggerUI() {
staticServer := http.FileServer(statikFS)
s.Router.PathPrefix("/").Handler(staticServer)
}
func (s *Server) registerMetrics() {
metricsHandler := func(w http.ResponseWriter, r *http.Request) {
format := strings.TrimSpace(r.FormValue("format"))
gr, err := s.metrics.Gather(format)
if err != nil {
rest.WriteErrorResponse(w, http.StatusBadRequest, fmt.Sprintf("failed to gather metrics: %s", err))
return
}
w.Header().Set("Content-Type", gr.ContentType)
_, _ = w.Write(gr.Metrics)
}
s.Router.HandleFunc("/metrics", metricsHandler).Methods("GET")
}

View File

@ -4,7 +4,10 @@ import (
"fmt"
"strings"
"github.com/spf13/viper"
"github.com/cosmos/cosmos-sdk/store"
"github.com/cosmos/cosmos-sdk/telemetry"
sdk "github.com/cosmos/cosmos-sdk/types"
)
@ -75,7 +78,9 @@ type APIConfig struct {
type Config struct {
BaseConfig `mapstructure:",squash"`
API APIConfig `mapstructure:"api"`
// Telemetry defines the application telemetry configuration
Telemetry telemetry.Config `mapstructure:"telemetry"`
API APIConfig `mapstructure:"api"`
}
// SetMinGasPrices sets the validator's minimum gas prices.
@ -115,6 +120,7 @@ func DefaultConfig() *Config {
PruningKeepEvery: "0",
PruningSnapshotEvery: "0",
},
Telemetry: telemetry.Config{},
API: APIConfig{
Enable: false,
Swagger: false,
@ -125,3 +131,34 @@ func DefaultConfig() *Config {
},
}
}
// GetConfig returns a fully parsed Config object.
func GetConfig() Config {
return Config{
BaseConfig: BaseConfig{
MinGasPrices: viper.GetString("minimum-gas-prices"),
InterBlockCache: viper.GetBool("inter-block-cache"),
Pruning: viper.GetString("pruning"),
PruningKeepEvery: viper.GetString("pruning-keep-every"),
PruningSnapshotEvery: viper.GetString("pruning-snapshot-every"),
HaltHeight: viper.GetUint64("halt-height"),
HaltTime: viper.GetUint64("halt-time"),
},
Telemetry: telemetry.Config{
ServiceName: viper.GetString("telemetry.service-name"),
Enabled: viper.GetBool("telemetry.enabled"),
EnableHostname: viper.GetBool("telemetry.enable-hostname"),
EnableHostnameLabel: viper.GetBool("telemetry.enable-hostname-label"),
EnableServiceLabel: viper.GetBool("telemetry.enable-service-label"),
PrometheusRetentionTime: viper.GetInt64("telemetry.prometheus-retention-time"),
},
API: APIConfig{
Address: viper.GetString("api.address"),
MaxOpenConnections: viper.GetUint("api.max-open-connections"),
RPCReadTimeout: viper.GetUint("api.rpc-read-timeout"),
RPCWriteTimeout: viper.GetUint("api.rpc-write-timeout"),
RPCMaxBodyBytes: viper.GetUint("api.rpc-max-body-bytes"),
EnableUnsafeCORS: viper.GetBool("api.enabled-unsafe-cors"),
},
}
}

View File

@ -47,6 +47,32 @@ halt-time = {{ .BaseConfig.HaltTime }}
# InterBlockCache enables inter-block caching.
inter-block-cache = {{ .BaseConfig.InterBlockCache }}
###############################################################################
### Telemetry Configuration ###
###############################################################################
[telemetry]
# Prefixed with keys to separate services
service-name = "{{ .Telemetry.ServiceName }}"
# Enabled enables the application telemetry functionality. When enabled,
# an in-memory sink is also enabled by default. Operators may also enabled
# other sinks such as Prometheus.
enabled = {{ .Telemetry.Enabled }}
# Enable prefixing gauge values with hostname
enable-hostname = {{ .Telemetry.EnableHostname }}
# Enable adding hostname to labels
enable-hostname-label = {{ .Telemetry.EnableHostnameLabel }}
# Enable adding service to labels
enable-service-label = {{ .Telemetry.EnableServiceLabel }}
# PrometheusRetentionTime, when positive, enables a Prometheus metrics sink.
prometheus-retention-time = {{ .Telemetry.PrometheusRetentionTime }}
###############################################################################
### API Configuration ###
###############################################################################

View File

@ -209,18 +209,9 @@ func startInProcess(ctx *Context, cdc codec.JSONMarshaler, appCreator AppCreator
WithTrustNode(true)
apiSrv := api.New(ctx)
apiCfg := config.APIConfig{
Address: viper.GetString("api.address"),
MaxOpenConnections: viper.GetUint("api.max-open-connections"),
RPCReadTimeout: viper.GetUint("api.rpc-read-timeout"),
RPCWriteTimeout: viper.GetUint("api.rpc-write-timeout"),
RPCMaxBodyBytes: viper.GetUint("api.rpc-max-body-bytes"),
EnableUnsafeCORS: viper.GetBool("api.enabled-unsafe-cors"),
}
app.RegisterAPIRoutes(apiSrv)
if err := apiSrv.Start(apiCfg); err != nil {
if err := apiSrv.Start(config.GetConfig()); err != nil {
return err
}
}

View File

@ -122,6 +122,9 @@ func InitTestnet(
simappConfig := srvconfig.DefaultConfig()
simappConfig.MinGasPrices = minGasPrices
simappConfig.API.Enable = true
simappConfig.Telemetry.Enabled = true
simappConfig.Telemetry.PrometheusRetentionTime = 60
simappConfig.Telemetry.EnableHostnameLabel = false
var (
genAccounts []authtypes.GenesisAccount

151
telemetry/metrics.go Normal file
View File

@ -0,0 +1,151 @@
package telemetry
import (
"bytes"
"encoding/json"
"fmt"
"time"
"github.com/armon/go-metrics"
metricsprom "github.com/armon/go-metrics/prometheus"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/expfmt"
)
// Metrics supported format types.
const (
FormatDefault = ""
FormatPrometheus = "prometheus"
FormatText = "text"
)
// Config defines the configuration options for application telemetry.
type Config struct {
// Prefixed with keys to separate services
ServiceName string `mapstructure:"service-name"`
// Enabled enables the application telemetry functionality. When enabled,
// an in-memory sink is also enabled by default. Operators may also enabled
// other sinks such as Prometheus.
Enabled bool `mapstructure:"enabled"`
// Enable prefixing gauge values with hostname
EnableHostname bool `mapstructure:"enable-hostname"`
// Enable adding hostname to labels
EnableHostnameLabel bool `mapstructure:"enable-hostname-label"`
// Enable adding service to labels
EnableServiceLabel bool `mapstructure:"enable-service-label"`
// PrometheusRetentionTime, when positive, enables a Prometheus metrics sink.
// It defines the retention duration in seconds.
PrometheusRetentionTime int64 `mapstructure:"prometheus-retention-time"`
}
// Metrics defines a wrapper around application telemetry functionality. It allows
// metrics to be gathered at any point in time. When creating a Metrics object,
// internally, a global metrics is registered with a set of sinks as configured
// by the operator. In addition to the sinks, when a process gets a SIGUSR1, a
// dump of formatted recent metrics will be sent to STDERR.
type Metrics struct {
memSink *metrics.InmemSink
prometheusEnabled bool
}
type GatherResponse struct {
Metrics []byte
ContentType string
}
func New(cfg Config) (*Metrics, error) {
if !cfg.Enabled {
return nil, nil
}
metricsConf := metrics.DefaultConfig(cfg.ServiceName)
metricsConf.EnableHostname = cfg.EnableHostname
metricsConf.EnableHostnameLabel = cfg.EnableHostnameLabel
memSink := metrics.NewInmemSink(10*time.Second, time.Minute)
metrics.DefaultInmemSignal(memSink)
m := &Metrics{memSink: memSink}
fanout := metrics.FanoutSink{memSink}
if cfg.PrometheusRetentionTime > 0 {
m.prometheusEnabled = true
prometheusOpts := metricsprom.PrometheusOpts{
Expiration: time.Duration(cfg.PrometheusRetentionTime) * time.Second,
}
promSink, err := metricsprom.NewPrometheusSinkFrom(prometheusOpts)
if err != nil {
return nil, err
}
fanout = append(fanout, promSink)
}
if _, err := metrics.NewGlobal(metricsConf, fanout); err != nil {
return nil, err
}
return m, nil
}
// Gather collects all registered metrics and returns a GatherResponse where the
// metrics are encoded depending on the type. Metrics are either encoded via
// Prometheus or JSON if in-memory.
func (m *Metrics) Gather(format string) (GatherResponse, error) {
switch format {
case FormatPrometheus:
return m.gatherPrometheus()
case FormatText:
return m.gatherGeneric()
case FormatDefault:
return m.gatherGeneric()
default:
return GatherResponse{}, fmt.Errorf("unsupported metrics format: %s", format)
}
}
func (m *Metrics) gatherPrometheus() (GatherResponse, error) {
if !m.prometheusEnabled {
return GatherResponse{}, fmt.Errorf("prometheus metrics are not enabled")
}
metricsFamilies, err := prometheus.DefaultGatherer.Gather()
if err != nil {
return GatherResponse{}, fmt.Errorf("failed to gather prometheus metrics: %w", err)
}
buf := &bytes.Buffer{}
defer buf.Reset()
e := expfmt.NewEncoder(buf, expfmt.FmtText)
for _, mf := range metricsFamilies {
if err := e.Encode(mf); err != nil {
return GatherResponse{}, fmt.Errorf("failed to encode prometheus metrics: %w", err)
}
}
return GatherResponse{ContentType: string(expfmt.FmtText), Metrics: buf.Bytes()}, nil
}
func (m *Metrics) gatherGeneric() (GatherResponse, error) {
summary, err := m.memSink.DisplayMetrics(nil, nil)
if err != nil {
return GatherResponse{}, fmt.Errorf("failed to gather in-memory metrics: %w", err)
}
content, err := json.Marshal(summary)
if err != nil {
return GatherResponse{}, fmt.Errorf("failed to encode in-memory metrics: %w", err)
}
return GatherResponse{ContentType: "application/json", Metrics: content}, nil
}

76
telemetry/metrics_test.go Normal file
View File

@ -0,0 +1,76 @@
package telemetry
import (
"encoding/json"
"strings"
"testing"
"time"
"github.com/armon/go-metrics"
"github.com/prometheus/common/expfmt"
"github.com/stretchr/testify/require"
)
func TestMetrics_Disabled(t *testing.T) {
m, err := New(Config{Enabled: false})
require.Nil(t, m)
require.Nil(t, err)
}
func TestMetrics_InMem(t *testing.T) {
m, err := New(Config{
Enabled: true,
EnableHostname: false,
ServiceName: "test",
})
require.NoError(t, err)
require.NotNil(t, m)
emitMetrics()
gr, err := m.Gather(FormatText)
require.NoError(t, err)
require.Equal(t, gr.ContentType, "application/json")
jsonMetrics := make(map[string]interface{})
require.NoError(t, json.Unmarshal(gr.Metrics, &jsonMetrics))
counters := jsonMetrics["Counters"].([]interface{})
require.Equal(t, counters[0].(map[string]interface{})["Count"].(float64), 10.0)
require.Equal(t, counters[0].(map[string]interface{})["Name"].(string), "test.dummy_counter")
}
func TestMetrics_Prom(t *testing.T) {
m, err := New(Config{
Enabled: true,
EnableHostname: false,
ServiceName: "test",
PrometheusRetentionTime: 60,
EnableHostnameLabel: false,
})
require.NoError(t, err)
require.NotNil(t, m)
require.True(t, m.prometheusEnabled)
emitMetrics()
gr, err := m.Gather(FormatPrometheus)
require.NoError(t, err)
require.Equal(t, gr.ContentType, string(expfmt.FmtText))
require.True(t, strings.Contains(string(gr.Metrics), "test_dummy_counter 30"))
}
func emitMetrics() {
ticker := time.NewTicker(time.Second)
timeout := time.After(30 * time.Second)
for {
select {
case <-ticker.C:
metrics.IncrCounter([]string{"dummy_counter"}, 1.0)
case <-timeout:
return
}
}
}