iota_metrics/
thread_stall_monitor.rs1use std::sync::Once;
6
7use tracing::{error, info};
8
9use crate::{get_metrics, spawn_logged_monitored_task};
10
11static THREAD_STALL_MONITOR: Once = Once::new();
12
13const MONITOR_INTERVAL: std::time::Duration = std::time::Duration::from_millis(500);
14
15pub fn start_thread_stall_monitor() {
19 let mut called = true;
20 THREAD_STALL_MONITOR.call_once(|| {
21 called = false;
22 });
23 if called {
24 return;
25 }
26 if tokio::runtime::Handle::try_current().is_err() {
27 info!("Not running in a tokio runtime, not starting thread stall monitor.");
28 return;
29 }
30
31 spawn_logged_monitored_task!(
32 async move {
33 let Some(metrics) = get_metrics() else {
34 info!("Metrics uninitialized, not starting thread stall monitor.");
35 return;
36 };
37 let mut last_sleep_time = tokio::time::Instant::now();
38 loop {
39 tokio::time::sleep(MONITOR_INTERVAL).await;
40 let current_time = tokio::time::Instant::now();
41 let stalled_duration = current_time - last_sleep_time - MONITOR_INTERVAL;
42 last_sleep_time = current_time;
43 if stalled_duration > MONITOR_INTERVAL {
44 metrics
45 .thread_stall_duration_sec
46 .observe(stalled_duration.as_secs_f64());
47 error!(
49 "Thread stalled for {}s. Possible causes include CPU overload or too much blocking calls.",
50 stalled_duration.as_secs_f64()
51 );
52 }
53 }
54 },
55 "ThreadStallMonitor"
56 );
57}