From aaf73fd67960c5ab4a09a3fdb6f80f0b6810b1b1 Mon Sep 17 00:00:00 2001 From: Bjerg Date: Mon, 20 Mar 2023 21:50:32 +0100 Subject: [PATCH] feat: database metrics (#1870) --- Cargo.lock | 6 +- bin/reth/Cargo.toml | 28 ++- bin/reth/src/node/mod.rs | 12 +- bin/reth/src/prometheus_exporter.rs | 112 ++++++++++- bin/reth/src/stage/mod.rs | 10 +- etc/grafana/dashboards/overview.json | 291 +++++++++++++++++++++++++-- 6 files changed, 405 insertions(+), 54 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 34d31998a..d1b92e0b2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2675,9 +2675,9 @@ dependencies = [ [[package]] name = "hyper" -version = "0.14.24" +version = "0.14.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e011372fa0b68db8350aa7a248930ecc7839bf46d8485577d69f117a75f164c" +checksum = "cc5e554ff619822309ffd57d8734d77cd5ce6238bc956f037ea06c58238c9899" dependencies = [ "bytes", "futures-channel", @@ -4543,7 +4543,9 @@ dependencies = [ "fdlimit", "futures", "human_bytes", + "hyper", "jsonrpsee", + "metrics", "metrics-exporter-prometheus", "metrics-util", "proptest", diff --git a/bin/reth/Cargo.toml b/bin/reth/Cargo.toml index dc548fd25..421b2477c 100644 --- a/bin/reth/Cargo.toml +++ b/bin/reth/Cargo.toml @@ -42,22 +42,30 @@ shellexpand = "3.0.0" dirs-next = "2.0.0" confy = "0.5" -# rpc/metrics -metrics-exporter-prometheus = { version = "0.11.0", features = ["http-listener"] } +# metrics +metrics = "0.20.1" +metrics-exporter-prometheus = "0.11.0" metrics-util = "0.14.0" # test vectors generation proptest = "1.0" -# misc -eyre = "0.6.8" -clap = { version = "4", features = ["derive", "cargo"] } -tokio = { version = "1.21", features = ["sync", "macros", "rt-multi-thread"] } -futures = "0.3.25" -tempfile = { version = "3.3.0" } -backon = "0.4" +# tui comfy-table = "6.1.4" crossterm = "0.25.0" tui = "0.19.0" -jsonrpsee = { version = "0.16", features = ["server"] } human_bytes = "0.4.1" + +# async +tokio = { version = "1.21", features = ["sync", "macros", "rt-multi-thread"] } +futures = "0.3.25" + +# http/rpc +hyper = "0.14.25" +jsonrpsee = { version = "0.16", features = ["server"] } + +# misc +eyre = "0.6.8" +clap = { version = "4", features = ["derive", "cargo"] } +tempfile = { version = "3.3.0" } +backon = "0.4" diff --git a/bin/reth/src/node/mod.rs b/bin/reth/src/node/mod.rs index bf5954549..0cc75b887 100644 --- a/bin/reth/src/node/mod.rs +++ b/bin/reth/src/node/mod.rs @@ -161,7 +161,6 @@ pub struct Command { impl Command { /// Execute `node` command - // TODO: RPC pub async fn execute(self, ctx: CliContext) -> eyre::Result<()> { info!(target: "reth::cli", "reth {} starting", crate_version!()); @@ -177,7 +176,7 @@ impl Command { let shareable_db = ShareableDatabase::new(Arc::clone(&db), Arc::clone(&self.chain)); info!(target: "reth::cli", "Database opened"); - self.start_metrics_endpoint()?; + self.start_metrics_endpoint(Arc::clone(&db)).await?; debug!(target: "reth::cli", chain=%self.chain.chain, genesis=?self.chain.genesis_hash(), "Initializing genesis"); @@ -338,13 +337,14 @@ impl Command { } } - fn start_metrics_endpoint(&self) -> eyre::Result<()> { + async fn start_metrics_endpoint(&self, db: Arc>) -> eyre::Result<()> { if let Some(listen_addr) = self.metrics { info!(target: "reth::cli", addr = %listen_addr, "Starting metrics endpoint"); - prometheus_exporter::initialize(listen_addr) - } else { - Ok(()) + + prometheus_exporter::initialize_with_db_metrics(listen_addr, db).await?; } + + Ok(()) } fn init_engine_api( diff --git a/bin/reth/src/prometheus_exporter.rs b/bin/reth/src/prometheus_exporter.rs index 596ce8ee3..aa0947ac6 100644 --- a/bin/reth/src/prometheus_exporter.rs +++ b/bin/reth/src/prometheus_exporter.rs @@ -1,16 +1,36 @@ //! Prometheus exporter - use eyre::WrapErr; -use metrics_exporter_prometheus::PrometheusBuilder; +use hyper::{ + service::{make_service_fn, service_fn}, + Body, Request, Response, Server, +}; +use metrics::Unit; +use metrics_exporter_prometheus::{PrometheusBuilder, PrometheusHandle}; use metrics_util::layers::{PrefixLayer, Stack}; -use std::net::SocketAddr; +use reth_db::{ + database::Database, + mdbx::{Env, WriteMap}, + tables, +}; +use std::{convert::Infallible, net::SocketAddr, sync::Arc}; -pub(crate) fn initialize(listen_addr: SocketAddr) -> eyre::Result<()> { - let (recorder, exporter) = PrometheusBuilder::new() - .with_http_listener(listen_addr) - .build() - .wrap_err("Could not build Prometheus endpoint.")?; - tokio::task::spawn(exporter); +/// Installs Prometheus as the metrics recorder and serves it over HTTP with a hook. +/// +/// The hook is called every time the metrics are requested at the given endpoint, and can be used +/// to record values for pull-style metrics, i.e. metrics that are not automatically updated. +pub(crate) async fn initialize_with_hook( + listen_addr: SocketAddr, + hook: F, +) -> eyre::Result<()> { + let recorder = PrometheusBuilder::new().build_recorder(); + let handle = recorder.handle(); + + // Start endpoint + start_endpoint(listen_addr, handle, Arc::new(hook)) + .await + .wrap_err("Could not start Prometheus endpoint")?; + + // Build metrics stack Stack::new(recorder) .push(PrefixLayer::new("reth")) .install() @@ -18,3 +38,77 @@ pub(crate) fn initialize(listen_addr: SocketAddr) -> eyre::Result<()> { Ok(()) } + +/// Starts an endpoint at the given address to serve Prometheus metrics. +async fn start_endpoint( + listen_addr: SocketAddr, + handle: PrometheusHandle, + hook: Arc, +) -> eyre::Result<()> { + let make_svc = make_service_fn(move |_| { + let handle = handle.clone(); + let hook = Arc::clone(&hook); + async move { + Ok::<_, Infallible>(service_fn(move |_: Request| { + (hook)(); + let metrics = handle.render(); + async move { Ok::<_, Infallible>(Response::new(Body::from(metrics))) } + })) + } + }); + let server = + Server::try_bind(&listen_addr).wrap_err("Could not bind to address")?.serve(make_svc); + + tokio::spawn(async move { server.await.expect("Metrics endpoint crashed") }); + + Ok(()) +} + +/// Installs Prometheus as the metrics recorder and serves it over HTTP with database metrics. +pub(crate) async fn initialize_with_db_metrics( + listen_addr: SocketAddr, + db: Arc>, +) -> eyre::Result<()> { + let db_stats = move || { + // TODO: A generic stats abstraction for other DB types to deduplicate this and `reth db + // stats` + let _ = db.view(|tx| { + for table in tables::TABLES.iter().map(|(_, name)| name) { + let table_db = + tx.inner.open_db(Some(table)).wrap_err("Could not open db.")?; + + let stats = tx + .inner + .db_stat(&table_db) + .wrap_err(format!("Could not find table: {table}"))?; + + let page_size = stats.page_size() as usize; + let leaf_pages = stats.leaf_pages(); + let branch_pages = stats.branch_pages(); + let overflow_pages = stats.overflow_pages(); + let num_pages = leaf_pages + branch_pages + overflow_pages; + let table_size = page_size * num_pages; + + metrics::absolute_counter!("db.table_size", table_size as u64, "table" => *table); + metrics::absolute_counter!("db.table_pages", leaf_pages as u64, "table" => *table, "type" => "leaf"); + metrics::absolute_counter!("db.table_pages", branch_pages as u64, "table" => *table, "type" => "branch"); + metrics::absolute_counter!("db.table_pages", overflow_pages as u64, "table" => *table, "type" => "overflow"); + } + + Ok::<(), eyre::Report>(()) + }); + }; + + initialize_with_hook(listen_addr, db_stats).await?; + + // We describe the metrics after the recorder is installed, otherwise this information is not + // registered + metrics::describe_counter!( + "db.table_size", + Unit::Bytes, + "The size of a database table (in bytes)" + ); + metrics::describe_counter!("db.table_pages", "The number of database pages for a table"); + + Ok(()) +} diff --git a/bin/reth/src/stage/mod.rs b/bin/reth/src/stage/mod.rs index 3d18769ba..fcee34f1c 100644 --- a/bin/reth/src/stage/mod.rs +++ b/bin/reth/src/stage/mod.rs @@ -93,11 +93,6 @@ impl Command { // Does not do anything on windows. fdlimit::raise_fd_limit(); - if let Some(listen_addr) = self.metrics { - info!(target: "reth::cli", "Starting metrics endpoint at {}", listen_addr); - prometheus_exporter::initialize(listen_addr)?; - } - let config: Config = confy::load_path(&self.config).unwrap_or_default(); info!(target: "reth::cli", "reth {} starting stage {:?}", clap::crate_version!(), self.stage); @@ -111,6 +106,11 @@ impl Command { let db = Arc::new(init_db(&self.db)?); let mut tx = Transaction::new(db.as_ref())?; + if let Some(listen_addr) = self.metrics { + info!(target: "reth::cli", "Starting metrics endpoint at {}", listen_addr); + prometheus_exporter::initialize_with_db_metrics(listen_addr, Arc::clone(&db)).await?; + } + let num_blocks = self.to - self.from + 1; match self.stage { diff --git a/etc/grafana/dashboards/overview.json b/etc/grafana/dashboards/overview.json index e0e29aa13..836c2451c 100644 --- a/etc/grafana/dashboards/overview.json +++ b/etc/grafana/dashboards/overview.json @@ -35,6 +35,12 @@ "name": "Heatmap", "version": "" }, + { + "type": "panel", + "id": "piechart", + "name": "Pie chart", + "version": "" + }, { "type": "datasource", "id": "prometheus", @@ -509,13 +515,248 @@ "title": "Commit time heatmap", "type": "heatmap" }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "The size of tables in the database", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 17 + }, + "id": 48, + "options": { + "displayLabels": [ + "name" + ], + "legend": { + "displayMode": "table", + "placement": "right", + "showLegend": true, + "values": [ + "value" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.3.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "builder", + "expr": "reth_db_table_size", + "interval": "", + "legendFormat": "{{table}}", + "range": true, + "refId": "A" + } + ], + "title": "Database tables", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "The size of the database over time", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 17 + }, + "id": 52, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (job) ( reth_db_table_size )", + "legendFormat": "Size ({{job}})", + "range": true, + "refId": "A" + } + ], + "title": "Database growth", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "The type of the pages in the database:\n\n- **Leaf** pages contain KV pairs.\n- **Branch** pages contain information about keys in the leaf pages\n- **Overflow** pages store large values and should generally be avoided if possible", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 25 + }, + "id": 50, + "options": { + "legend": { + "displayMode": "table", + "placement": "right", + "showLegend": true, + "values": [ + "value" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "builder", + "expr": "sum by (type) ( reth_db_table_pages )", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Database pages", + "type": "piechart" + }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 17 + "y": 33 }, "id": 46, "panels": [], @@ -581,7 +822,7 @@ "h": 8, "w": 12, "x": 0, - "y": 18 + "y": 34 }, "id": 44, "options": { @@ -618,7 +859,7 @@ "h": 1, "w": 24, "x": 0, - "y": 26 + "y": 42 }, "id": 6, "panels": [], @@ -687,7 +928,7 @@ "h": 8, "w": 8, "x": 0, - "y": 27 + "y": 43 }, "id": 18, "options": { @@ -780,7 +1021,7 @@ "h": 8, "w": 8, "x": 8, - "y": 27 + "y": 43 }, "id": 16, "options": { @@ -898,7 +1139,7 @@ "h": 8, "w": 8, "x": 16, - "y": 27 + "y": 43 }, "id": 8, "options": { @@ -959,7 +1200,7 @@ "h": 1, "w": 24, "x": 0, - "y": 35 + "y": 51 }, "id": 24, "panels": [], @@ -1011,7 +1252,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1051,7 +1293,7 @@ "h": 8, "w": 12, "x": 0, - "y": 36 + "y": 52 }, "id": 26, "options": { @@ -1165,7 +1407,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1180,7 +1423,7 @@ "h": 8, "w": 12, "x": 12, - "y": 36 + "y": 52 }, "id": 33, "options": { @@ -1281,7 +1524,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1296,7 +1540,7 @@ "h": 8, "w": 12, "x": 0, - "y": 44 + "y": 60 }, "id": 36, "options": { @@ -1345,7 +1589,7 @@ "h": 1, "w": 24, "x": 0, - "y": 52 + "y": 68 }, "id": 32, "panels": [], @@ -1398,7 +1642,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1438,7 +1683,7 @@ "h": 8, "w": 12, "x": 0, - "y": 53 + "y": 69 }, "id": 30, "options": { @@ -1564,7 +1809,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null } ] } @@ -1575,7 +1821,7 @@ "h": 8, "w": 12, "x": 12, - "y": 53 + "y": 69 }, "id": 28, "options": { @@ -1655,7 +1901,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1670,7 +1917,7 @@ "h": 8, "w": 12, "x": 0, - "y": 61 + "y": 77 }, "id": 35, "options": { @@ -1722,13 +1969,13 @@ "list": [] }, "time": { - "from": "now-6h", + "from": "now-1h", "to": "now" }, "timepicker": {}, "timezone": "", "title": "reth", "uid": "2k8BXz24k", - "version": 2, + "version": 5, "weekStart": "" }