feat: database metrics (#1870)

This commit is contained in:
Bjerg
2023-03-20 21:50:32 +01:00
committed by GitHub
parent c110ede505
commit aaf73fd679
6 changed files with 405 additions and 54 deletions

6
Cargo.lock generated
View File

@ -2675,9 +2675,9 @@ dependencies = [
[[package]]
name = "hyper"
version = "0.14.24"
version = "0.14.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e011372fa0b68db8350aa7a248930ecc7839bf46d8485577d69f117a75f164c"
checksum = "cc5e554ff619822309ffd57d8734d77cd5ce6238bc956f037ea06c58238c9899"
dependencies = [
"bytes",
"futures-channel",
@ -4543,7 +4543,9 @@ dependencies = [
"fdlimit",
"futures",
"human_bytes",
"hyper",
"jsonrpsee",
"metrics",
"metrics-exporter-prometheus",
"metrics-util",
"proptest",

View File

@ -42,22 +42,30 @@ shellexpand = "3.0.0"
dirs-next = "2.0.0"
confy = "0.5"
# rpc/metrics
metrics-exporter-prometheus = { version = "0.11.0", features = ["http-listener"] }
# metrics
metrics = "0.20.1"
metrics-exporter-prometheus = "0.11.0"
metrics-util = "0.14.0"
# test vectors generation
proptest = "1.0"
# misc
eyre = "0.6.8"
clap = { version = "4", features = ["derive", "cargo"] }
tokio = { version = "1.21", features = ["sync", "macros", "rt-multi-thread"] }
futures = "0.3.25"
tempfile = { version = "3.3.0" }
backon = "0.4"
# tui
comfy-table = "6.1.4"
crossterm = "0.25.0"
tui = "0.19.0"
jsonrpsee = { version = "0.16", features = ["server"] }
human_bytes = "0.4.1"
# async
tokio = { version = "1.21", features = ["sync", "macros", "rt-multi-thread"] }
futures = "0.3.25"
# http/rpc
hyper = "0.14.25"
jsonrpsee = { version = "0.16", features = ["server"] }
# misc
eyre = "0.6.8"
clap = { version = "4", features = ["derive", "cargo"] }
tempfile = { version = "3.3.0" }
backon = "0.4"

View File

@ -161,7 +161,6 @@ pub struct Command {
impl Command {
/// Execute `node` command
// TODO: RPC
pub async fn execute(self, ctx: CliContext) -> eyre::Result<()> {
info!(target: "reth::cli", "reth {} starting", crate_version!());
@ -177,7 +176,7 @@ impl Command {
let shareable_db = ShareableDatabase::new(Arc::clone(&db), Arc::clone(&self.chain));
info!(target: "reth::cli", "Database opened");
self.start_metrics_endpoint()?;
self.start_metrics_endpoint(Arc::clone(&db)).await?;
debug!(target: "reth::cli", chain=%self.chain.chain, genesis=?self.chain.genesis_hash(), "Initializing genesis");
@ -338,13 +337,14 @@ impl Command {
}
}
fn start_metrics_endpoint(&self) -> eyre::Result<()> {
async fn start_metrics_endpoint(&self, db: Arc<Env<WriteMap>>) -> eyre::Result<()> {
if let Some(listen_addr) = self.metrics {
info!(target: "reth::cli", addr = %listen_addr, "Starting metrics endpoint");
prometheus_exporter::initialize(listen_addr)
} else {
Ok(())
prometheus_exporter::initialize_with_db_metrics(listen_addr, db).await?;
}
Ok(())
}
fn init_engine_api(

View File

@ -1,16 +1,36 @@
//! Prometheus exporter
use eyre::WrapErr;
use metrics_exporter_prometheus::PrometheusBuilder;
use hyper::{
service::{make_service_fn, service_fn},
Body, Request, Response, Server,
};
use metrics::Unit;
use metrics_exporter_prometheus::{PrometheusBuilder, PrometheusHandle};
use metrics_util::layers::{PrefixLayer, Stack};
use std::net::SocketAddr;
use reth_db::{
database::Database,
mdbx::{Env, WriteMap},
tables,
};
use std::{convert::Infallible, net::SocketAddr, sync::Arc};
pub(crate) fn initialize(listen_addr: SocketAddr) -> eyre::Result<()> {
let (recorder, exporter) = PrometheusBuilder::new()
.with_http_listener(listen_addr)
.build()
.wrap_err("Could not build Prometheus endpoint.")?;
tokio::task::spawn(exporter);
/// Installs Prometheus as the metrics recorder and serves it over HTTP with a hook.
///
/// The hook is called every time the metrics are requested at the given endpoint, and can be used
/// to record values for pull-style metrics, i.e. metrics that are not automatically updated.
pub(crate) async fn initialize_with_hook<F: Fn() + Send + Sync + 'static>(
listen_addr: SocketAddr,
hook: F,
) -> eyre::Result<()> {
let recorder = PrometheusBuilder::new().build_recorder();
let handle = recorder.handle();
// Start endpoint
start_endpoint(listen_addr, handle, Arc::new(hook))
.await
.wrap_err("Could not start Prometheus endpoint")?;
// Build metrics stack
Stack::new(recorder)
.push(PrefixLayer::new("reth"))
.install()
@ -18,3 +38,77 @@ pub(crate) fn initialize(listen_addr: SocketAddr) -> eyre::Result<()> {
Ok(())
}
/// Starts an endpoint at the given address to serve Prometheus metrics.
async fn start_endpoint<F: Fn() + Send + Sync + 'static>(
listen_addr: SocketAddr,
handle: PrometheusHandle,
hook: Arc<F>,
) -> eyre::Result<()> {
let make_svc = make_service_fn(move |_| {
let handle = handle.clone();
let hook = Arc::clone(&hook);
async move {
Ok::<_, Infallible>(service_fn(move |_: Request<Body>| {
(hook)();
let metrics = handle.render();
async move { Ok::<_, Infallible>(Response::new(Body::from(metrics))) }
}))
}
});
let server =
Server::try_bind(&listen_addr).wrap_err("Could not bind to address")?.serve(make_svc);
tokio::spawn(async move { server.await.expect("Metrics endpoint crashed") });
Ok(())
}
/// Installs Prometheus as the metrics recorder and serves it over HTTP with database metrics.
pub(crate) async fn initialize_with_db_metrics(
listen_addr: SocketAddr,
db: Arc<Env<WriteMap>>,
) -> eyre::Result<()> {
let db_stats = move || {
// TODO: A generic stats abstraction for other DB types to deduplicate this and `reth db
// stats`
let _ = db.view(|tx| {
for table in tables::TABLES.iter().map(|(_, name)| name) {
let table_db =
tx.inner.open_db(Some(table)).wrap_err("Could not open db.")?;
let stats = tx
.inner
.db_stat(&table_db)
.wrap_err(format!("Could not find table: {table}"))?;
let page_size = stats.page_size() as usize;
let leaf_pages = stats.leaf_pages();
let branch_pages = stats.branch_pages();
let overflow_pages = stats.overflow_pages();
let num_pages = leaf_pages + branch_pages + overflow_pages;
let table_size = page_size * num_pages;
metrics::absolute_counter!("db.table_size", table_size as u64, "table" => *table);
metrics::absolute_counter!("db.table_pages", leaf_pages as u64, "table" => *table, "type" => "leaf");
metrics::absolute_counter!("db.table_pages", branch_pages as u64, "table" => *table, "type" => "branch");
metrics::absolute_counter!("db.table_pages", overflow_pages as u64, "table" => *table, "type" => "overflow");
}
Ok::<(), eyre::Report>(())
});
};
initialize_with_hook(listen_addr, db_stats).await?;
// We describe the metrics after the recorder is installed, otherwise this information is not
// registered
metrics::describe_counter!(
"db.table_size",
Unit::Bytes,
"The size of a database table (in bytes)"
);
metrics::describe_counter!("db.table_pages", "The number of database pages for a table");
Ok(())
}

View File

@ -93,11 +93,6 @@ impl Command {
// Does not do anything on windows.
fdlimit::raise_fd_limit();
if let Some(listen_addr) = self.metrics {
info!(target: "reth::cli", "Starting metrics endpoint at {}", listen_addr);
prometheus_exporter::initialize(listen_addr)?;
}
let config: Config = confy::load_path(&self.config).unwrap_or_default();
info!(target: "reth::cli", "reth {} starting stage {:?}", clap::crate_version!(), self.stage);
@ -111,6 +106,11 @@ impl Command {
let db = Arc::new(init_db(&self.db)?);
let mut tx = Transaction::new(db.as_ref())?;
if let Some(listen_addr) = self.metrics {
info!(target: "reth::cli", "Starting metrics endpoint at {}", listen_addr);
prometheus_exporter::initialize_with_db_metrics(listen_addr, Arc::clone(&db)).await?;
}
let num_blocks = self.to - self.from + 1;
match self.stage {

View File

@ -35,6 +35,12 @@
"name": "Heatmap",
"version": ""
},
{
"type": "panel",
"id": "piechart",
"name": "Pie chart",
"version": ""
},
{
"type": "datasource",
"id": "prometheus",
@ -509,13 +515,248 @@
"title": "Commit time heatmap",
"type": "heatmap"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"description": "The size of tables in the database",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
}
},
"mappings": [],
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 17
},
"id": 48,
"options": {
"displayLabels": [
"name"
],
"legend": {
"displayMode": "table",
"placement": "right",
"showLegend": true,
"values": [
"value"
]
},
"pieType": "pie",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "9.3.6",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "builder",
"expr": "reth_db_table_size",
"interval": "",
"legendFormat": "{{table}}",
"range": true,
"refId": "A"
}
],
"title": "Database tables",
"type": "piechart"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"description": "The size of the database over time",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 17
},
"id": 52,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "code",
"expr": "sum by (job) ( reth_db_table_size )",
"legendFormat": "Size ({{job}})",
"range": true,
"refId": "A"
}
],
"title": "Database growth",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"description": "The type of the pages in the database:\n\n- **Leaf** pages contain KV pairs.\n- **Branch** pages contain information about keys in the leaf pages\n- **Overflow** pages store large values and should generally be avoided if possible",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
}
},
"mappings": [],
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 25
},
"id": 50,
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"showLegend": true,
"values": [
"value"
]
},
"pieType": "pie",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "builder",
"expr": "sum by (type) ( reth_db_table_pages )",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Database pages",
"type": "piechart"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 17
"y": 33
},
"id": 46,
"panels": [],
@ -581,7 +822,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 18
"y": 34
},
"id": 44,
"options": {
@ -618,7 +859,7 @@
"h": 1,
"w": 24,
"x": 0,
"y": 26
"y": 42
},
"id": 6,
"panels": [],
@ -687,7 +928,7 @@
"h": 8,
"w": 8,
"x": 0,
"y": 27
"y": 43
},
"id": 18,
"options": {
@ -780,7 +1021,7 @@
"h": 8,
"w": 8,
"x": 8,
"y": 27
"y": 43
},
"id": 16,
"options": {
@ -898,7 +1139,7 @@
"h": 8,
"w": 8,
"x": 16,
"y": 27
"y": 43
},
"id": 8,
"options": {
@ -959,7 +1200,7 @@
"h": 1,
"w": 24,
"x": 0,
"y": 35
"y": 51
},
"id": 24,
"panels": [],
@ -1011,7 +1252,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -1051,7 +1293,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 36
"y": 52
},
"id": 26,
"options": {
@ -1165,7 +1407,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -1180,7 +1423,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 36
"y": 52
},
"id": 33,
"options": {
@ -1281,7 +1524,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -1296,7 +1540,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 44
"y": 60
},
"id": 36,
"options": {
@ -1345,7 +1589,7 @@
"h": 1,
"w": 24,
"x": 0,
"y": 52
"y": 68
},
"id": 32,
"panels": [],
@ -1398,7 +1642,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -1438,7 +1683,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 53
"y": 69
},
"id": 30,
"options": {
@ -1564,7 +1809,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
}
]
}
@ -1575,7 +1821,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 53
"y": 69
},
"id": 28,
"options": {
@ -1655,7 +1901,8 @@
"mode": "absolute",
"steps": [
{
"color": "green"
"color": "green",
"value": null
},
{
"color": "red",
@ -1670,7 +1917,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 61
"y": 77
},
"id": 35,
"options": {
@ -1722,13 +1969,13 @@
"list": []
},
"time": {
"from": "now-6h",
"from": "now-1h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "reth",
"uid": "2k8BXz24k",
"version": 2,
"version": 5,
"weekStart": ""
}