diff --git a/Cargo.lock b/Cargo.lock index a18bfbefb..a3fab65c5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1869,6 +1869,28 @@ dependencies = [ "autocfg", ] +[[package]] +name = "metrics" +version = "0.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b9b8653cec6897f73b519a43fba5ee3d50f62fe9af80b428accdcc093b4a849" +dependencies = [ + "ahash", + "metrics-macros", + "portable-atomic", +] + +[[package]] +name = "metrics-macros" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "731f8ecebd9f3a4aa847dfe75455e4757a45da40a7793d2f0b1f9b6ed18b23f3" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "minimal-lexical" version = "0.2.1" @@ -2201,6 +2223,12 @@ dependencies = [ "plotters-backend", ] +[[package]] +name = "portable-atomic" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15eb2c6e362923af47e13c23ca5afb859e83d54452c55b0b9ac763b8f7c1ac16" + [[package]] name = "postcard" version = "1.0.2" @@ -2677,6 +2705,7 @@ version = "0.1.0" dependencies = [ "aquamarine", "async-trait", + "metrics", "reth-db", "reth-interfaces", "reth-primitives", diff --git a/crates/stages/Cargo.toml b/crates/stages/Cargo.toml index b6c3e879f..f0bca4da0 100644 --- a/crates/stages/Cargo.toml +++ b/crates/stages/Cargo.toml @@ -17,6 +17,7 @@ tracing = "0.1.36" tracing-futures = "0.2.5" tokio = { version = "1.21.2", features = ["sync"] } aquamarine = "0.1.12" +metrics = "0.20.1" [dev-dependencies] tokio = { version = "*", features = ["rt", "sync", "macros"] } diff --git a/crates/stages/src/id.rs b/crates/stages/src/id.rs index ecc920c11..818962715 100644 --- a/crates/stages/src/id.rs +++ b/crates/stages/src/id.rs @@ -1,3 +1,4 @@ +use metrics::counter; use reth_interfaces::db::{tables::SyncStage, DbTx, DbTxMut, Error as DbError}; use reth_primitives::BlockNumber; use std::fmt::Display; @@ -26,6 +27,7 @@ impl StageId { tx: &impl DbTxMut<'db>, block: BlockNumber, ) -> Result<(), DbError> { + counter!("stage.progress", block, "stage" => self.0); tx.put::(self.0.as_bytes().to_vec(), block) } } diff --git a/crates/stages/src/lib.rs b/crates/stages/src/lib.rs index 7e4ce6525..5735240c4 100644 --- a/crates/stages/src/lib.rs +++ b/crates/stages/src/lib.rs @@ -7,6 +7,12 @@ //! Staged syncing primitives for reth. //! //! See [Stage] and [Pipeline]. +//! +//! # Metrics +//! +//! This library exposes metrics via. the [`metrics`][metrics] crate: +//! +//! - `stage.progress{stage}`: The block number each stage has currently reached. mod error; mod id; @@ -18,3 +24,7 @@ pub use error::*; pub use id::*; pub use pipeline::*; pub use stage::*; + +// NOTE: Needed so the link in the module-level rustdoc works. +#[allow(unused_extern_crates)] +extern crate metrics; diff --git a/docs/README.md b/docs/README.md index 144a3e06d..427690dfd 100644 --- a/docs/README.md +++ b/docs/README.md @@ -2,5 +2,5 @@ This directory contains documentation for contributors. -- [Repository and project structure](./repo) -- [Design documents](./design) \ No newline at end of file +- [Repository and Project Structure](./repo) +- [Design](./design) \ No newline at end of file diff --git a/docs/design/.gitkeep b/docs/design/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/docs/design/README.md b/docs/design/README.md new file mode 100644 index 000000000..346764423 --- /dev/null +++ b/docs/design/README.md @@ -0,0 +1,5 @@ +## Design + +### Observability + +- [Metrics](./metrics.md): Guidelines on metrics and traces. \ No newline at end of file diff --git a/docs/design/metrics.md b/docs/design/metrics.md new file mode 100644 index 000000000..1a7d3dcdb --- /dev/null +++ b/docs/design/metrics.md @@ -0,0 +1,61 @@ +## Metrics + +### Metrics or traces? + +A **metric** is a numeric representation of data measured over intervals of time. Metrics are malleable to statistical transformations such as sampling, aggregation and correlation, which make them suited to report the overall health of a system. + +A **trace** is a representation of a series of causally related distributed events that encode information about the end-to-end request flow through a distributed system. Traces are used to identify the amount of work done at each layer in an application while preserving causality. + +The main difference between metrics and traces is therefore that metrics are system-centric and traces are request-centric: metrics give you insight into how a particular system is doing, while traces help teams identify the path of requests through various services. + +**For most things, you likely want a metric**, except for two scenarios: + +- For contributors, traces are a good profiling tool +- For end-users that run complicated infrastructure, traces in the RPC component makes sense + +### How to add a metric + +To add metrics use the [`metrics`][metrics] crate. + +#### Metric anatomy + +There are three types of metrics: + +- **Counters**: Represent (ideally) monotonically increasing values, e.g. the number of errors that have occurred, the number of blocks processed, etc. +- **Gauges**: Represent metrics that can go up or down arbitrarily over time. Usually they are used to measure things like resource usage (memory, CPU, ...) and throughput. +- **Histograms**: Used to store an arbitrary number of observations of a specific measurement, and provides statistical analysis over the observed values. A typical use case is latency of some operation (writing to disk, responding to a request, ...). + +Each metric is identified by a [`Key`][metrics.Key], which itself is composed of a [`KeyName`][metrics.KeyName] and an arbitrary number of [`Label`][metrics.Label]s. + +The `KeyName` represents the actual metric name, and the labels are used to further drill down into the metric. + +For example, a metric that represents stage progress would have a key name of `stage.progress` and a `stage` label that can be used to get the progress of individual stages. + +There will only ever exist one description per metric `KeyName`; it is not possible to add a description for a label, or a `KeyName`/`Label` group. + +#### Creating metrics + +The `metrics` crate provides three macros per metric variant: `register_!`, `!`, and `describe_!`. Prefer to use these where possible, since they generate the code necessary to register and update metrics under various conditions. + +- The `register_!` macro simply creates the metric and returns a handle to it (e.g. a `Counter`). These metric structs are thread-safe and cheap to clone. +- The `!` macro registers the metric if it does not exist, and updates it's value. +- The `describe_!` macro adds an end-user description for the metric. + +How the metrics are exposed to the end-user is determined by the CLI. + +### Metric best practices + +- Use `.` to namespace metrics + - The top-level namespace should **NOT** be `reth`[^1] +- Metric names should not contain spaces +- Add a unit to the metric where appropriate + - Use the Prometheus [base units][prom_base_units] + +[^1]: The top-level namespace is added by the CLI using [`metrics_util::layers::PrefixLayer`][metrics_util.PrefixLayer]. + +[metrics]: https://docs.rs/metrics +[metrics.Key]: https://docs.rs/metrics/latest/metrics/struct.Key.html +[metrics.KeyName]: https://docs.rs/metrics/latest/metrics/struct.KeyName.html +[metrics.Label]: https://docs.rs/metrics/latest/metrics/struct.Label.html +[prom_base_units]: https://prometheus.io/docs/practices/naming/#base-units +[metrics_util.PrefixLayer]: https://docs.rs/metrics-util/latest/metrics_util/layers/struct.PrefixLayer.html \ No newline at end of file diff --git a/docs/repo/README.md b/docs/repo/README.md new file mode 100644 index 000000000..dca54ce93 --- /dev/null +++ b/docs/repo/README.md @@ -0,0 +1,11 @@ +## Repository and Project Structure + +### Planning + +Documents on planning and process in the repository: what the labels mean, how issues are triaged, how a new release is cut and so on. + +- [Labels](./labels.md): Describes the labels in the repository. + +### Structure + +- [Layout](./layout.md): Describes each of the crates in the repository and their function. \ No newline at end of file