consensus_core/network/
metrics.rs

1// Copyright (c) Mysten Labs, Inc.
2// Modifications Copyright (c) 2024 IOTA Stiftung
3// SPDX-License-Identifier: Apache-2.0
4
5use std::sync::Arc;
6
7use prometheus::{
8    HistogramVec, IntCounterVec, IntGauge, IntGaugeVec, Registry,
9    register_histogram_vec_with_registry, register_int_counter_vec_with_registry,
10    register_int_gauge_vec_with_registry, register_int_gauge_with_registry,
11};
12
13// Fields for network-agnostic metrics can be added here
14pub(crate) struct NetworkMetrics {
15    pub(crate) network_type: IntGaugeVec,
16    pub(crate) inbound: Arc<NetworkRouteMetrics>,
17    pub(crate) outbound: Arc<NetworkRouteMetrics>,
18    #[cfg_attr(msim, allow(dead_code))]
19    pub(crate) tcp_connection_metrics: Arc<TcpConnectionMetrics>,
20}
21
22impl NetworkMetrics {
23    pub(crate) fn new(registry: &Registry) -> Self {
24        Self {
25            network_type: register_int_gauge_vec_with_registry!(
26                "network_type",
27                "Type of the network used: tonic",
28                &["type"],
29                registry
30            )
31            .unwrap(),
32            inbound: Arc::new(NetworkRouteMetrics::new("inbound", registry)),
33            outbound: Arc::new(NetworkRouteMetrics::new("outbound", registry)),
34            tcp_connection_metrics: Arc::new(TcpConnectionMetrics::new(registry)),
35        }
36    }
37}
38
39#[cfg_attr(msim, allow(dead_code))]
40pub(crate) struct TcpConnectionMetrics {
41    /// Send buffer size of consensus TCP socket.
42    pub(crate) socket_send_buffer_size: IntGauge,
43    /// Receive buffer size of consensus TCP socket.
44    pub(crate) socket_recv_buffer_size: IntGauge,
45    /// Max send buffer size of TCP socket.
46    pub(crate) socket_send_buffer_max_size: IntGauge,
47    /// Max receive buffer size of TCP socket.
48    pub(crate) socket_recv_buffer_max_size: IntGauge,
49}
50
51impl TcpConnectionMetrics {
52    pub fn new(registry: &Registry) -> Self {
53        Self {
54            socket_send_buffer_size: register_int_gauge_with_registry!(
55                "tcp_socket_send_buffer_size",
56                "Send buffer size of consensus TCP socket.",
57                registry
58            )
59            .unwrap(),
60            socket_recv_buffer_size: register_int_gauge_with_registry!(
61                "tcp_socket_recv_buffer_size",
62                "Receive buffer size of consensus TCP socket.",
63                registry
64            )
65            .unwrap(),
66            socket_send_buffer_max_size: register_int_gauge_with_registry!(
67                "tcp_socket_send_buffer_max_size",
68                "Max send buffer size of TCP socket.",
69                registry
70            )
71            .unwrap(),
72            socket_recv_buffer_max_size: register_int_gauge_with_registry!(
73                "tcp_socket_recv_buffer_max_size",
74                "Max receive buffer size of TCP socket.",
75                registry
76            )
77            .unwrap(),
78        }
79    }
80}
81
82#[derive(Clone)]
83pub(crate) struct NetworkRouteMetrics {
84    /// Counter of requests by route
85    pub requests: IntCounterVec,
86    /// Request latency by route
87    pub request_latency: HistogramVec,
88    /// Request size by route
89    pub request_size: HistogramVec,
90    /// Response size by route
91    pub response_size: HistogramVec,
92    /// Counter of requests exceeding the "excessive" size limit
93    pub excessive_size_requests: IntCounterVec,
94    /// Counter of responses exceeding the "excessive" size limit
95    pub excessive_size_responses: IntCounterVec,
96    /// Gauge of the number of inflight requests at any given time by route
97    pub inflight_requests: IntGaugeVec,
98    /// Failed requests by route
99    pub errors: IntCounterVec,
100}
101
102const LATENCY_SEC_BUCKETS: &[f64] = &[
103    0.001, 0.005, 0.01, 0.05, 0.1, 0.25, 0.5, 1., 2.5, 5., 10., 20., 30., 60., 90.,
104];
105
106// Arbitrarily chosen buckets for message size, with gradually-lowering exponent
107// to give us better resolution at high sizes.
108const SIZE_BYTE_BUCKETS: &[f64] = &[
109    2048., 8192., // *4
110    16384., 32768., 65536., 131072., 262144., 524288., 1048576., // *2
111    1572864., 2359256., 3538944., // *1.5
112    4600627., 5980815., 7775060., 10107578., 13139851., 17081807., 22206349., 28868253., 37528729.,
113    48787348., 63423553., // *1.3
114];
115
116impl NetworkRouteMetrics {
117    pub fn new(direction: &'static str, registry: &Registry) -> Self {
118        let requests = register_int_counter_vec_with_registry!(
119            format!("{direction}_requests"),
120            "The number of requests made on the network",
121            &["route"],
122            registry
123        )
124        .unwrap();
125
126        let request_latency = register_histogram_vec_with_registry!(
127            format!("{direction}_request_latency"),
128            "Latency of a request by route",
129            &["route"],
130            LATENCY_SEC_BUCKETS.to_vec(),
131            registry,
132        )
133        .unwrap();
134
135        let request_size = register_histogram_vec_with_registry!(
136            format!("{direction}_request_size"),
137            "Size of a request by route",
138            &["route"],
139            SIZE_BYTE_BUCKETS.to_vec(),
140            registry,
141        )
142        .unwrap();
143
144        let response_size = register_histogram_vec_with_registry!(
145            format!("{direction}_response_size"),
146            "Size of a response by route",
147            &["route"],
148            SIZE_BYTE_BUCKETS.to_vec(),
149            registry,
150        )
151        .unwrap();
152
153        let excessive_size_requests = register_int_counter_vec_with_registry!(
154            format!("{direction}_excessive_size_requests"),
155            "The number of excessively large request messages sent",
156            &["route"],
157            registry
158        )
159        .unwrap();
160
161        let excessive_size_responses = register_int_counter_vec_with_registry!(
162            format!("{direction}_excessive_size_responses"),
163            "The number of excessively large response messages seen",
164            &["route"],
165            registry
166        )
167        .unwrap();
168
169        let inflight_requests = register_int_gauge_vec_with_registry!(
170            format!("{direction}_inflight_requests"),
171            "The number of inflight network requests",
172            &["route"],
173            registry
174        )
175        .unwrap();
176
177        let errors = register_int_counter_vec_with_registry!(
178            format!("{direction}_request_errors"),
179            "Number of errors by route",
180            &["route", "status"],
181            registry,
182        )
183        .unwrap();
184
185        Self {
186            requests,
187            request_latency,
188            request_size,
189            response_size,
190            excessive_size_requests,
191            excessive_size_responses,
192            inflight_requests,
193            errors,
194        }
195    }
196}