starfish_config/
parameters.rs

1// Copyright (c) Mysten Labs, Inc.
2// Modifications Copyright (c) 2024 IOTA Stiftung
3// SPDX-License-Identifier: Apache-2.0
4
5use std::{path::PathBuf, time::Duration};
6
7use serde::{Deserialize, Serialize};
8
9/// Operational configurations of a consensus authority.
10///
11/// All fields should tolerate inconsistencies among authorities, without
12/// affecting safety of the protocol. Otherwise, they need to be part of IOTA
13/// protocol config or epoch state on-chain.
14///
15/// NOTE: fields with default values are specified in the serde default
16/// functions. Most operators should not need to specify any field, except
17/// db_path.
18#[derive(Clone, Debug, Deserialize, Serialize)]
19pub struct Parameters {
20    /// Path to consensus DB for this epoch. Required when initializing
21    /// consensus. This is calculated based on user configuration for base
22    /// directory.
23    #[serde(skip)]
24    pub db_path: PathBuf,
25
26    /// Time to wait for parent round leader before sealing a block, from when
27    /// parent round has a quorum.
28    #[serde(default = "Parameters::default_leader_timeout")]
29    pub leader_timeout: Duration,
30
31    /// Minimum delay between own blocks. This avoids generating too many rounds
32    /// when latency is low. This is especially necessary for tests running
33    /// locally. If setting a non-default value, it should be set low enough
34    /// to avoid reducing round rate and increasing latency in realistic and
35    /// distributed configurations.
36    #[serde(default = "Parameters::default_min_block_delay")]
37    pub min_block_delay: Duration,
38
39    /// Maximum forward time drift (how far in future) allowed for received
40    /// blocks.
41    #[serde(default = "Parameters::default_max_forward_time_drift")]
42    pub max_forward_time_drift: Duration,
43
44    /// Number of block headers to fetch per commit sync request.
45    #[serde(default = "Parameters::default_max_headers_per_commit_sync_fetch")]
46    pub max_headers_per_commit_sync_fetch: usize,
47
48    /// Number of transactions to fetch per commit sync request.
49    #[serde(default = "Parameters::default_max_transactions_per_commit_sync_fetch")]
50    pub max_transactions_per_commit_sync_fetch: usize,
51
52    /// Number of block headers to fetch per periodic or live sync request
53    #[serde(default = "Parameters::default_max_headers_per_regular_sync_fetch")]
54    pub max_headers_per_regular_sync_fetch: usize,
55
56    /// Number of transactions to fetch per request.
57    #[serde(default = "Parameters::default_max_transactions_per_regular_sync_fetch")]
58    pub max_transactions_per_regular_sync_fetch: usize,
59
60    /// Time to wait during node start up until the node has synced the last
61    /// proposed block via the network peers. When set to `0` the sync
62    /// mechanism is disabled. This property is meant to be used for amnesia
63    /// recovery.
64    #[serde(default = "Parameters::default_sync_last_known_own_block_timeout")]
65    pub sync_last_known_own_block_timeout: Duration,
66
67    /// The number of rounds of blocks to be kept in the Dag state cache per
68    /// authority. The larger the number the more the blocks that will be
69    /// kept in memory allowing minimising any potential disk access.
70    /// Value should be at minimum 50 rounds to ensure node performance, but
71    /// being too large can be expensive in memory usage.
72    #[serde(default = "Parameters::default_dag_state_cached_rounds")]
73    pub dag_state_cached_rounds: u32,
74
75    // Number of authorities commit syncer fetches in parallel.
76    // Both commits in a range and blocks referenced by the commits are fetched per authority.
77    #[serde(default = "Parameters::default_commit_sync_parallel_fetches")]
78    pub commit_sync_parallel_fetches: usize,
79
80    // Number of commits to fetch in a batch, also the maximum number of commits returned per
81    // fetch. If this value is set too small, fetching becomes inefficient.
82    // If this value is set too large, it can result in load imbalance and stragglers.
83    #[serde(default = "Parameters::default_commit_sync_batch_size")]
84    pub commit_sync_batch_size: u32,
85
86    // This affects the maximum number of commit batches being fetched, and those fetched but not
87    // processed as consensus output, before throttling of outgoing commit fetches starts.
88    #[serde(default = "Parameters::default_commit_sync_batches_ahead")]
89    pub commit_sync_batches_ahead: usize,
90
91    /// Maximum number of headers to be included in a bundle. Headers exceeding
92    /// the max allowed limit will be truncated.
93    #[serde(default = "Parameters::default_max_headers_per_bundle")]
94    pub max_headers_per_bundle: usize,
95
96    /// Maximum number of transaction shards to be included in a bundle. Shards
97    /// exceeding the max allowed limit will be truncated.
98    #[serde(default = "Parameters::default_max_shards_per_bundle")]
99    pub max_shards_per_bundle: usize,
100
101    /// Tonic network settings.
102    #[serde(default = "TonicParameters::default")]
103    pub tonic: TonicParameters,
104
105    // Number of commits to fetch in a batch for fast commit syncer, also the maximum number of
106    // commits returned per fetch. If this value is set too small, fetching becomes
107    // inefficient. If this value is set too large, it can result in load imbalance and
108    // stragglers.
109    #[serde(default = "Parameters::default_fast_commit_sync_batch_size")]
110    pub fast_commit_sync_batch_size: u32,
111
112    // Gap threshold for switching between commit syncers. When the gap between quorum and local
113    // commit index is larger than this threshold, FastCommitSyncer fetches. Otherwise,
114    // CommitSyncer fetches.
115    #[serde(default = "Parameters::default_commit_sync_gap_threshold")]
116    pub commit_sync_gap_threshold: u32,
117
118    /// Enable FastCommitSyncer for faster recovery from large commit gaps.
119    /// This is a local node configuration that works in conjunction with the
120    /// protocol-level consensus_fast_commit_sync feature flag. Both must be
121    /// enabled for FastCommitSyncer to run. The protocol flag controls
122    /// whether gRPC endpoints are available, while this local flag controls
123    /// whether this specific node creates and runs the FastCommitSyncer.
124    /// Enabled by default; operators can disable it locally if bugs are
125    /// discovered, without affecting protocol-level endpoint availability.
126    #[serde(default = "Parameters::default_enable_fast_commit_syncer")]
127    pub enable_fast_commit_syncer: bool,
128}
129
130impl Parameters {
131    pub(crate) fn default_leader_timeout() -> Duration {
132        Duration::from_millis(250)
133    }
134
135    pub(crate) fn default_min_block_delay() -> Duration {
136        if cfg!(msim) || std::env::var("__TEST_ONLY_CONSENSUS_USE_LONG_MIN_BLOCK_DELAY").is_ok() {
137            // Checkpoint building and execution cannot keep up with high commit rate in
138            // simtests, leading to long reconfiguration delays. This is because
139            // simtest is single threaded, and spending too much time in
140            // consensus can lead to starvation elsewhere.
141            Duration::from_millis(400)
142        } else if cfg!(test) {
143            // Avoid excessive CPU, data and logs in tests.
144            Duration::from_millis(250)
145        } else {
146            // For production, use min delay between block being set to 50ms, reducing the
147            // block rate to 20 blocks/sec
148            Duration::from_millis(50)
149        }
150    }
151
152    pub(crate) fn default_max_forward_time_drift() -> Duration {
153        Duration::from_millis(500)
154    }
155
156    // Maximum number of block headers to fetch per commit sync request.
157    pub(crate) fn default_max_headers_per_commit_sync_fetch() -> usize {
158        if cfg!(msim) {
159            // Exercise hitting blocks per fetch limit.
160            10
161        } else {
162            1000
163        }
164    }
165
166    // Maximum number of transactions to fetch per commit sync request.
167    pub(crate) fn default_max_transactions_per_commit_sync_fetch() -> usize {
168        if cfg!(msim) {
169            // Exercise hitting transactions per fetch limit.
170            10
171        } else {
172            1000
173        }
174    }
175
176    // Maximum number of block headers to fetch per periodic or live sync request.
177    pub(crate) fn default_max_headers_per_regular_sync_fetch() -> usize {
178        if cfg!(msim) {
179            // Exercise hitting blocks per fetch limit.
180            10
181        } else {
182            // TODO: This might should match the value of block headers in the bundle.
183            100
184        }
185    }
186
187    // Maximum number of transactions to fetch per request.
188    pub(crate) fn default_max_transactions_per_regular_sync_fetch() -> usize {
189        if cfg!(msim) { 10 } else { 1000 }
190    }
191
192    pub(crate) fn default_sync_last_known_own_block_timeout() -> Duration {
193        if cfg!(msim) {
194            Duration::from_millis(500)
195        } else {
196            // Here we prioritise liveness over the complete de-risking of block
197            // equivocation. 5 seconds in the majority of cases should be good
198            // enough for this given a healthy network.
199            Duration::from_secs(5)
200        }
201    }
202
203    pub(crate) fn default_dag_state_cached_rounds() -> u32 {
204        if cfg!(msim) {
205            // Exercise reading blocks from store.
206            5
207        } else {
208            500
209        }
210    }
211
212    pub(crate) fn default_commit_sync_parallel_fetches() -> usize {
213        8
214    }
215
216    pub(crate) fn default_commit_sync_batch_size() -> u32 {
217        if cfg!(msim) {
218            // Exercise commit sync.
219            5
220        } else {
221            100
222        }
223    }
224
225    pub(crate) fn default_commit_sync_batches_ahead() -> usize {
226        // This is set to be a multiple of default commit_sync_parallel_fetches to allow
227        // fetching ahead, while keeping the total number of inflight fetches
228        // and unprocessed fetched commits limited.
229        32
230    }
231
232    pub(crate) fn default_max_headers_per_bundle() -> usize {
233        150
234    }
235
236    pub(crate) fn default_max_shards_per_bundle() -> usize {
237        150
238    }
239
240    pub(crate) fn default_fast_commit_sync_batch_size() -> u32 {
241        if cfg!(msim) {
242            // Exercise fast commit sync.
243            5
244        } else {
245            // With ~10KB per commit and 4MB max message size, 1000 commits (~10MB) requires
246            // chunking. The server will chunk commits across multiple response messages.
247            1000
248        }
249    }
250
251    pub(crate) fn default_commit_sync_gap_threshold() -> u32 {
252        if cfg!(msim) {
253            // Use smaller threshold for testing.
254            10
255        } else {
256            // When gap > 1000, FastCommitSyncer is more efficient.
257            // When gap <= 1000, CommitSyncer handles incremental sync.
258            1000
259        }
260    }
261
262    pub(crate) fn default_enable_fast_commit_syncer() -> bool {
263        // Enabled by default. Operators can disable it locally if bugs are discovered,
264        // without waiting for a protocol upgrade.
265        true
266    }
267}
268
269impl Default for Parameters {
270    fn default() -> Self {
271        Self {
272            db_path: PathBuf::default(),
273            leader_timeout: Parameters::default_leader_timeout(),
274            min_block_delay: Parameters::default_min_block_delay(),
275            max_forward_time_drift: Parameters::default_max_forward_time_drift(),
276            max_headers_per_commit_sync_fetch:
277                Parameters::default_max_headers_per_commit_sync_fetch(),
278            max_transactions_per_commit_sync_fetch:
279                Parameters::default_max_transactions_per_commit_sync_fetch(),
280            max_headers_per_regular_sync_fetch:
281                Parameters::default_max_headers_per_regular_sync_fetch(),
282            max_transactions_per_regular_sync_fetch:
283                Parameters::default_max_transactions_per_regular_sync_fetch(),
284            sync_last_known_own_block_timeout:
285                Parameters::default_sync_last_known_own_block_timeout(),
286            dag_state_cached_rounds: Parameters::default_dag_state_cached_rounds(),
287            commit_sync_parallel_fetches: Parameters::default_commit_sync_parallel_fetches(),
288            commit_sync_batch_size: Parameters::default_commit_sync_batch_size(),
289            commit_sync_batches_ahead: Parameters::default_commit_sync_batches_ahead(),
290            max_headers_per_bundle: Parameters::default_max_headers_per_bundle(),
291            max_shards_per_bundle: Parameters::default_max_shards_per_bundle(),
292            tonic: TonicParameters::default(),
293            fast_commit_sync_batch_size: Parameters::default_fast_commit_sync_batch_size(),
294            commit_sync_gap_threshold: Parameters::default_commit_sync_gap_threshold(),
295            enable_fast_commit_syncer: Parameters::default_enable_fast_commit_syncer(),
296        }
297    }
298}
299
300#[derive(Clone, Debug, Deserialize, Serialize)]
301pub struct TonicParameters {
302    /// Keepalive interval and timeouts for both client and server.
303    ///
304    /// If unspecified, this will default to 5s.
305    #[serde(default = "TonicParameters::default_keepalive_interval")]
306    pub keepalive_interval: Duration,
307
308    /// Size of various per-connection buffers.
309    ///
310    /// If unspecified, this will default to 32MiB.
311    #[serde(default = "TonicParameters::default_connection_buffer_size")]
312    pub connection_buffer_size: usize,
313
314    /// Messages over this size threshold will increment a counter.
315    ///
316    /// If unspecified, this will default to 16MiB.
317    #[serde(default = "TonicParameters::default_excessive_message_size")]
318    pub excessive_message_size: usize,
319
320    /// Hard message size limit for both requests and responses.
321    /// This value is higher than strictly necessary, to allow overheads.
322    /// Message size targets and soft limits are computed based on this value.
323    ///
324    /// If unspecified, this will default to 1GiB.
325    #[serde(default = "TonicParameters::default_message_size_limit")]
326    pub message_size_limit: usize,
327}
328
329impl TonicParameters {
330    fn default_keepalive_interval() -> Duration {
331        Duration::from_secs(5)
332    }
333
334    fn default_connection_buffer_size() -> usize {
335        32 << 20
336    }
337
338    fn default_excessive_message_size() -> usize {
339        16 << 20
340    }
341
342    fn default_message_size_limit() -> usize {
343        64 << 20
344    }
345}
346
347impl Default for TonicParameters {
348    fn default() -> Self {
349        Self {
350            keepalive_interval: TonicParameters::default_keepalive_interval(),
351            connection_buffer_size: TonicParameters::default_connection_buffer_size(),
352            excessive_message_size: TonicParameters::default_excessive_message_size(),
353            message_size_limit: TonicParameters::default_message_size_limit(),
354        }
355    }
356}