iota_aws_orchestrator/
monitor.rs

1// Copyright (c) Mysten Labs, Inc.
2// Modifications Copyright (c) 2024 IOTA Stiftung
3// SPDX-License-Identifier: Apache-2.0
4
5use std::{fs, net::SocketAddr, path::PathBuf};
6
7use crate::{
8    client::Instance,
9    error::{MonitorError, MonitorResult},
10    protocol::ProtocolMetrics,
11    ssh::{CommandContext, SshConnectionManager},
12};
13
14pub struct Monitor {
15    instance: Instance,
16    clients: Vec<Instance>,
17    nodes: Vec<Instance>,
18    ssh_manager: SshConnectionManager,
19}
20
21impl Monitor {
22    /// Create a new monitor.
23    pub fn new(
24        instance: Instance,
25        clients: Vec<Instance>,
26        nodes: Vec<Instance>,
27        ssh_manager: SshConnectionManager,
28    ) -> Self {
29        Self {
30            instance,
31            clients,
32            nodes,
33            ssh_manager,
34        }
35    }
36
37    /// Dependencies to install.
38    pub fn dependencies() -> Vec<&'static str> {
39        let mut commands = Vec::new();
40        commands.extend(Prometheus::install_commands());
41        commands.extend(Grafana::install_commands());
42        commands
43    }
44
45    /// Start a prometheus instance on each remote machine.
46    pub async fn start_prometheus<P: ProtocolMetrics>(
47        &self,
48        protocol_commands: &P,
49    ) -> MonitorResult<()> {
50        let instance = std::iter::once(self.instance.clone());
51        let commands =
52            Prometheus::setup_commands(self.clients.clone(), self.nodes.clone(), protocol_commands);
53        self.ssh_manager
54            .execute(instance, commands, CommandContext::default())
55            .await?;
56        Ok(())
57    }
58
59    /// Start grafana on the local host.
60    pub async fn start_grafana(&self) -> MonitorResult<()> {
61        // Configure and reload grafana.
62        let instance = std::iter::once(self.instance.clone());
63        let commands = Grafana::setup_commands();
64        self.ssh_manager
65            .execute(instance, commands, CommandContext::default())
66            .await?;
67
68        Ok(())
69    }
70
71    /// The public address of the grafana instance.
72    pub fn grafana_address(&self) -> String {
73        format!("http://{}:{}", self.instance.main_ip, Grafana::DEFAULT_PORT)
74    }
75}
76
77/// Generate the commands to setup prometheus on the given instances.
78/// TODO: Modify the configuration to also get client metrics.
79pub struct Prometheus;
80
81impl Prometheus {
82    /// The default prometheus configuration path.
83    const DEFAULT_PROMETHEUS_CONFIG_PATH: &'static str = "/etc/prometheus/prometheus.yml";
84    /// The default prometheus port.
85    pub const DEFAULT_PORT: u16 = 9090;
86
87    /// The commands to install prometheus.
88    pub fn install_commands() -> Vec<&'static str> {
89        vec![
90            "sudo apt-get -y install prometheus",
91            "sudo chmod 777 -R /var/lib/prometheus/ /etc/prometheus/",
92        ]
93    }
94
95    /// Generate the commands to update the prometheus configuration and restart
96    /// prometheus.
97    pub fn setup_commands<I, P>(clients: I, nodes: I, protocol: &P) -> String
98    where
99        I: IntoIterator<Item = Instance>,
100        P: ProtocolMetrics,
101    {
102        // Generate the prometheus' global configuration.
103        let mut config = vec![Self::global_configuration()];
104
105        // Add configurations to scrape the clients.
106        let clients_metrics_path = protocol.clients_metrics_path(clients);
107        for (i, (_, clients_metrics_path)) in clients_metrics_path.into_iter().enumerate() {
108            let id = format!("client-{i}");
109            let scrape_config = Self::scrape_configuration(&id, &clients_metrics_path);
110            config.push(scrape_config);
111        }
112
113        // Add configurations to scrape the nodes.
114        let nodes_metrics_path = protocol.nodes_metrics_path(nodes);
115        for (i, (_, nodes_metrics_path)) in nodes_metrics_path.into_iter().enumerate() {
116            let id = format!("node-{i}");
117            let scrape_config = Self::scrape_configuration(&id, &nodes_metrics_path);
118            config.push(scrape_config);
119        }
120
121        // Make the command to configure and restart prometheus.
122        [
123            &format!(
124                "sudo echo \"{}\" > {}",
125                config.join("\n"),
126                Self::DEFAULT_PROMETHEUS_CONFIG_PATH
127            ),
128            "sudo service prometheus restart",
129        ]
130        .join(" && ")
131    }
132
133    /// Generate the global prometheus configuration.
134    /// NOTE: The configuration file is a yaml file so spaces are important.
135    fn global_configuration() -> String {
136        [
137            "global:",
138            "  scrape_interval: 5s",
139            "  evaluation_interval: 5s",
140            "scrape_configs:",
141        ]
142        .join("\n")
143    }
144
145    /// Generate the prometheus configuration from the given metrics path.
146    /// NOTE: The configuration file is a yaml file so spaces are important.
147    fn scrape_configuration(id: &str, nodes_metrics_path: &str) -> String {
148        let parts: Vec<_> = nodes_metrics_path.split('/').collect();
149        let address = parts[0].parse::<SocketAddr>().unwrap();
150        let ip = address.ip();
151        let port = address.port();
152        let path = parts[1];
153
154        [
155            &format!("  - job_name: {id}"),
156            &format!("    metrics_path: /{path}"),
157            "    static_configs:",
158            "      - targets:",
159            &format!("        - {ip}:{port}"),
160        ]
161        .join("\n")
162    }
163}
164
165pub struct Grafana;
166
167impl Grafana {
168    /// The path to the datasources directory.
169    const DATASOURCES_PATH: &'static str = "/etc/grafana/provisioning/datasources";
170    /// The default grafana port.
171    pub const DEFAULT_PORT: u16 = 3000;
172
173    /// The commands to install prometheus.
174    pub fn install_commands() -> Vec<&'static str> {
175        vec![
176            "sudo apt-get install -y apt-transport-https software-properties-common wget",
177            "sudo wget -q -O /usr/share/keyrings/grafana.key https://apt.grafana.com/gpg.key",
178            "(sudo rm /etc/apt/sources.list.d/grafana.list || true)",
179            "echo \"deb [signed-by=/usr/share/keyrings/grafana.key] https://apt.grafana.com stable main\" | sudo tee -a /etc/apt/sources.list.d/grafana.list",
180            "sudo apt-get update",
181            "sudo apt-get install -y grafana",
182            "sudo chmod 777 -R /etc/grafana/",
183        ]
184    }
185
186    /// Generate the commands to update the grafana datasource and restart
187    /// grafana.
188    pub fn setup_commands() -> String {
189        [
190            &format!("(rm -r {} || true)", Self::DATASOURCES_PATH),
191            &format!("mkdir -p {}", Self::DATASOURCES_PATH),
192            &format!(
193                "sudo echo \"{}\" > {}/testbed.yml",
194                Self::datasource(),
195                Self::DATASOURCES_PATH
196            ),
197            "sudo service grafana-server restart",
198        ]
199        .join(" && ")
200    }
201
202    /// Generate the content of the datasource file for the given instance.
203    /// NOTE: The datasource file is a yaml file so spaces are important.
204    fn datasource() -> String {
205        [
206            "apiVersion: 1",
207            "deleteDatasources:",
208            "  - name: testbed",
209            "    orgId: 1",
210            "datasources:",
211            "  - name: testbed",
212            "    type: prometheus",
213            "    access: proxy",
214            "    orgId: 1",
215            &format!("    url: http://localhost:{}", Prometheus::DEFAULT_PORT),
216            "    editable: true",
217            "    uid: Fixed-UID-testbed",
218        ]
219        .join("\n")
220    }
221}
222
223/// Bootstrap the grafana with datasource to connect to the given instances.
224/// NOTE: Only for macOS. Grafana must be installed through homebrew (and not
225/// from source). Deeper grafana configuration can be done through the
226/// grafana.ini file (/opt/homebrew/etc/grafana/grafana.ini) or the plist file
227/// (~/Library/LaunchAgents/homebrew.mxcl.grafana.plist).
228pub struct LocalGrafana;
229
230#[expect(dead_code)]
231impl LocalGrafana {
232    /// The default grafana home directory (macOS, homebrew install).
233    const DEFAULT_GRAFANA_HOME: &'static str = "/opt/homebrew/opt/grafana/share/grafana/";
234    /// The path to the datasources directory.
235    const DATASOURCES_PATH: &'static str = "conf/provisioning/datasources/";
236    /// The default grafana port.
237    pub const DEFAULT_PORT: u16 = 3000;
238
239    /// Configure grafana to connect to the given instances. Only for macOS.
240    pub fn run<I>(instances: I) -> MonitorResult<()>
241    where
242        I: IntoIterator<Item = Instance>,
243    {
244        let path: PathBuf = [Self::DEFAULT_GRAFANA_HOME, Self::DATASOURCES_PATH]
245            .iter()
246            .collect();
247
248        // Remove the old datasources.
249        fs::remove_dir_all(&path).unwrap();
250        fs::create_dir(&path).unwrap();
251
252        // Create the new datasources.
253        for (i, instance) in instances.into_iter().enumerate() {
254            let mut file = path.clone();
255            file.push(format!("instance-{}.yml", i));
256            fs::write(&file, Self::datasource(&instance, i)).map_err(|e| {
257                MonitorError::Grafana(format!("Failed to write grafana datasource ({e})"))
258            })?;
259        }
260
261        // Restart grafana.
262        std::process::Command::new("brew")
263            .arg("services")
264            .arg("restart")
265            .arg("grafana")
266            .arg("-q")
267            .spawn()
268            .map_err(|e| MonitorError::Grafana(e.to_string()))?;
269
270        Ok(())
271    }
272
273    /// Generate the content of the datasource file for the given instance. This
274    /// grafana instance takes one datasource per instance and assumes one
275    /// prometheus server runs per instance. NOTE: The datasource file is a
276    /// yaml file so spaces are important.
277    fn datasource(instance: &Instance, index: usize) -> String {
278        [
279            "apiVersion: 1",
280            "deleteDatasources:",
281            &format!("  - name: instance-{index}"),
282            "    orgId: 1",
283            "datasources:",
284            &format!("  - name: instance-{index}"),
285            "    type: prometheus",
286            "    access: proxy",
287            "    orgId: 1",
288            &format!(
289                "    url: http://{}:{}",
290                instance.main_ip,
291                Prometheus::DEFAULT_PORT
292            ),
293            "    editable: true",
294            &format!("    uid: UID-{index}"),
295        ]
296        .join("\n")
297    }
298}