1use std::{
6 cell::RefCell,
7 sync::{
8 Arc,
9 atomic::{AtomicU64, Ordering},
10 },
11 time::Duration,
12};
13
14use once_cell::sync::OnceCell;
15use prometheus::{
16 HistogramVec, IntCounterVec, IntGaugeVec, Registry, register_histogram_vec_with_registry,
17 register_int_counter_vec_with_registry, register_int_gauge_vec_with_registry,
18};
19use rocksdb::{PerfContext, PerfMetric, PerfStatsLevel, perf::set_perf_stats};
20use tap::TapFallible;
21use tracing::warn;
22
23thread_local! {
24 static PER_THREAD_ROCKS_PERF_CONTEXT: std::cell::RefCell<rocksdb::PerfContext> = RefCell::new(PerfContext::default());
25}
26
27const LATENCY_SEC_BUCKETS: &[f64] = &[
28 0.00001, 0.00005, 0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.001, 0.002, 0.003, 0.004, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1., 2.5, 5., 10.,
32];
33
34#[derive(Debug, Clone)]
35pub struct SamplingInterval {
38 pub once_every_duration: Duration,
40 pub after_num_ops: u64,
42 pub counter: Arc<AtomicU64>,
44}
45
46impl Default for SamplingInterval {
47 fn default() -> Self {
48 SamplingInterval::new(Duration::from_secs(60), 0)
50 }
51}
52
53impl SamplingInterval {
54 pub fn new(once_every_duration: Duration, after_num_ops: u64) -> Self {
55 let counter = Arc::new(AtomicU64::new(1));
56 if !once_every_duration.is_zero() {
57 let counter = counter.clone();
58 tokio::task::spawn(async move {
59 loop {
60 if counter.load(Ordering::SeqCst) > after_num_ops {
61 counter.store(0, Ordering::SeqCst);
62 }
63 tokio::time::sleep(once_every_duration).await;
64 }
65 });
66 }
67 SamplingInterval {
68 once_every_duration,
69 after_num_ops,
70 counter,
71 }
72 }
73 pub fn new_from_self(&self) -> SamplingInterval {
74 SamplingInterval::new(self.once_every_duration, self.after_num_ops)
75 }
76 pub fn sample(&self) -> bool {
77 if self.once_every_duration.is_zero() {
78 self.counter.fetch_add(1, Ordering::Relaxed) % (self.after_num_ops + 1) == 0
79 } else {
80 self.counter.fetch_add(1, Ordering::Relaxed) == 0
81 }
82 }
83}
84
85#[derive(Debug)]
86pub struct ColumnFamilyMetrics {
87 pub rocksdb_total_sst_files_size: IntGaugeVec,
88 pub rocksdb_total_blob_files_size: IntGaugeVec,
89 pub rocksdb_total_num_files: IntGaugeVec,
90 pub rocksdb_num_level0_files: IntGaugeVec,
91 pub rocksdb_current_size_active_mem_tables: IntGaugeVec,
92 pub rocksdb_size_all_mem_tables: IntGaugeVec,
93 pub rocksdb_num_snapshots: IntGaugeVec,
94 pub rocksdb_oldest_snapshot_time: IntGaugeVec,
95 pub rocksdb_actual_delayed_write_rate: IntGaugeVec,
96 pub rocksdb_is_write_stopped: IntGaugeVec,
97 pub rocksdb_block_cache_capacity: IntGaugeVec,
98 pub rocksdb_block_cache_usage: IntGaugeVec,
99 pub rocksdb_block_cache_pinned_usage: IntGaugeVec,
100 pub rocksdb_estimate_table_readers_mem: IntGaugeVec,
101 pub rocksdb_num_immutable_mem_tables: IntGaugeVec,
102 pub rocksdb_mem_table_flush_pending: IntGaugeVec,
103 pub rocksdb_compaction_pending: IntGaugeVec,
104 pub rocksdb_estimate_pending_compaction_bytes: IntGaugeVec,
105 pub rocksdb_num_running_compactions: IntGaugeVec,
106 pub rocksdb_num_running_flushes: IntGaugeVec,
107 pub rocksdb_estimate_oldest_key_time: IntGaugeVec,
108 pub rocksdb_background_errors: IntGaugeVec,
109 pub rocksdb_estimated_num_keys: IntGaugeVec,
110 pub rocksdb_base_level: IntGaugeVec,
111}
112
113impl ColumnFamilyMetrics {
114 pub(crate) fn new(registry: &Registry) -> Self {
115 ColumnFamilyMetrics {
116 rocksdb_total_sst_files_size: register_int_gauge_vec_with_registry!(
117 "rocksdb_total_sst_files_size",
118 "The storage size occupied by the sst files in the column family",
119 &["cf_name"],
120 registry,
121 )
122 .unwrap(),
123 rocksdb_total_blob_files_size: register_int_gauge_vec_with_registry!(
124 "rocksdb_total_blob_files_size",
125 "The storage size occupied by the blob files in the column family",
126 &["cf_name"],
127 registry,
128 )
129 .unwrap(),
130 rocksdb_total_num_files: register_int_gauge_vec_with_registry!(
131 "rocksdb_total_num_files",
132 "Total number of files used in the column family",
133 &["cf_name"],
134 registry,
135 )
136 .unwrap(),
137 rocksdb_num_level0_files: register_int_gauge_vec_with_registry!(
138 "rocksdb_num_level0_files",
139 "Number of level 0 files in the column family",
140 &["cf_name"],
141 registry,
142 )
143 .unwrap(),
144 rocksdb_current_size_active_mem_tables: register_int_gauge_vec_with_registry!(
145 "rocksdb_current_size_active_mem_tables",
146 "The current approximate size of active memtable (bytes).",
147 &["cf_name"],
148 registry,
149 )
150 .unwrap(),
151 rocksdb_size_all_mem_tables: register_int_gauge_vec_with_registry!(
152 "rocksdb_size_all_mem_tables",
153 "The memory size occupied by the column family's in-memory buffer",
154 &["cf_name"],
155 registry,
156 )
157 .unwrap(),
158 rocksdb_num_snapshots: register_int_gauge_vec_with_registry!(
159 "rocksdb_num_snapshots",
160 "Number of snapshots held for the column family",
161 &["cf_name"],
162 registry,
163 )
164 .unwrap(),
165 rocksdb_oldest_snapshot_time: register_int_gauge_vec_with_registry!(
166 "rocksdb_oldest_snapshot_time",
167 "Unit timestamp of the oldest unreleased snapshot",
168 &["cf_name"],
169 registry,
170 )
171 .unwrap(),
172 rocksdb_actual_delayed_write_rate: register_int_gauge_vec_with_registry!(
173 "rocksdb_actual_delayed_write_rate",
174 "The current actual delayed write rate. 0 means no delay",
175 &["cf_name"],
176 registry,
177 )
178 .unwrap(),
179 rocksdb_is_write_stopped: register_int_gauge_vec_with_registry!(
180 "rocksdb_is_write_stopped",
181 "A flag indicating whether writes are stopped on this column family. 1 indicates writes have been stopped.",
182 &["cf_name"],
183 registry,
184 )
185 .unwrap(),
186 rocksdb_block_cache_capacity: register_int_gauge_vec_with_registry!(
187 "rocksdb_block_cache_capacity",
188 "The block cache capacity of the column family.",
189 &["cf_name"],
190 registry,
191 )
192 .unwrap(),
193 rocksdb_block_cache_usage: register_int_gauge_vec_with_registry!(
194 "rocksdb_block_cache_usage",
195 "The memory size used by the column family in the block cache.",
196 &["cf_name"],
197 registry,
198 )
199 .unwrap(),
200 rocksdb_block_cache_pinned_usage: register_int_gauge_vec_with_registry!(
201 "rocksdb_block_cache_pinned_usage",
202 "The memory size used by the column family in the block cache where entries are pinned",
203 &["cf_name"],
204 registry,
205 )
206 .unwrap(),
207 rocksdb_estimate_table_readers_mem: register_int_gauge_vec_with_registry!(
208 "rocksdb_estimate_table_readers_mem",
209 "The estimated memory size used for reading SST tables in this column
210 family such as filters and index blocks. Note that this number does not
211 include the memory used in block cache.",
212 &["cf_name"],
213 registry,
214 )
215 .unwrap(),
216 rocksdb_num_immutable_mem_tables: register_int_gauge_vec_with_registry!(
217 "rocksdb_num_immutable_mem_tables",
218 "The number of immutable memtables that have not yet been flushed.",
219 &["cf_name"],
220 registry,
221 )
222 .unwrap(),
223 rocksdb_mem_table_flush_pending: register_int_gauge_vec_with_registry!(
224 "rocksdb_mem_table_flush_pending",
225 "A 1 or 0 flag indicating whether a memtable flush is pending.
226 If this number is 1, it means a memtable is waiting for being flushed,
227 but there might be too many L0 files that prevents it from being flushed.",
228 &["cf_name"],
229 registry,
230 )
231 .unwrap(),
232 rocksdb_compaction_pending: register_int_gauge_vec_with_registry!(
233 "rocksdb_compaction_pending",
234 "A 1 or 0 flag indicating whether a compaction job is pending.
235 If this number is 1, it means some part of the column family requires
236 compaction in order to maintain shape of LSM tree, but the compaction
237 is pending because the desired compaction job is either waiting for
238 other dependent compactions to be finished or waiting for an available
239 compaction thread.",
240 &["cf_name"],
241 registry,
242 )
243 .unwrap(),
244 rocksdb_estimate_pending_compaction_bytes: register_int_gauge_vec_with_registry!(
245 "rocksdb_estimate_pending_compaction_bytes",
246 "Estimated total number of bytes compaction needs to rewrite to get all levels down
247 to under target size. Not valid for other compactions than level-based.",
248 &["cf_name"],
249 registry,
250 )
251 .unwrap(),
252 rocksdb_num_running_compactions: register_int_gauge_vec_with_registry!(
253 "rocksdb_num_running_compactions",
254 "The number of compactions that are currently running for the column family.",
255 &["cf_name"],
256 registry,
257 )
258 .unwrap(),
259 rocksdb_num_running_flushes: register_int_gauge_vec_with_registry!(
260 "rocksdb_num_running_flushes",
261 "The number of flushes that are currently running for the column family.",
262 &["cf_name"],
263 registry,
264 )
265 .unwrap(),
266 rocksdb_estimate_oldest_key_time: register_int_gauge_vec_with_registry!(
267 "rocksdb_estimate_oldest_key_time",
268 "Estimation of the oldest key timestamp in the DB. Only available
269 for FIFO compaction with compaction_options_fifo.allow_compaction = false.",
270 &["cf_name"],
271 registry,
272 )
273 .unwrap(),
274 rocksdb_estimated_num_keys: register_int_gauge_vec_with_registry!(
275 "rocksdb_estimated_num_keys",
276 "The estimated number of keys in the table",
277 &["cf_name"],
278 registry,
279 )
280 .unwrap(),
281 rocksdb_background_errors: register_int_gauge_vec_with_registry!(
282 "rocksdb_background_errors",
283 "The accumulated number of RocksDB background errors.",
284 &["cf_name"],
285 registry,
286 )
287 .unwrap(),
288 rocksdb_base_level: register_int_gauge_vec_with_registry!(
289 "rocksdb_base_level",
290 "The number of level to which L0 data will be compacted.",
291 &["cf_name"],
292 registry,
293 )
294 .unwrap(),
295 }
296 }
297}
298
299#[derive(Debug)]
300pub struct OperationMetrics {
301 pub rocksdb_iter_latency_seconds: HistogramVec,
302 pub rocksdb_iter_bytes: HistogramVec,
303 pub rocksdb_iter_keys: HistogramVec,
304 pub rocksdb_get_latency_seconds: HistogramVec,
305 pub rocksdb_get_bytes: HistogramVec,
306 pub rocksdb_multiget_latency_seconds: HistogramVec,
307 pub rocksdb_multiget_bytes: HistogramVec,
308 pub rocksdb_put_latency_seconds: HistogramVec,
309 pub rocksdb_put_bytes: HistogramVec,
310 pub rocksdb_batch_put_bytes: HistogramVec,
311 pub rocksdb_delete_latency_seconds: HistogramVec,
312 pub rocksdb_deletes: IntCounterVec,
313 pub rocksdb_batch_commit_latency_seconds: HistogramVec,
314 pub rocksdb_batch_commit_bytes: HistogramVec,
315 pub rocksdb_num_active_db_handles: IntGaugeVec,
316 pub rocksdb_very_slow_batch_writes_count: IntCounterVec,
317 pub rocksdb_very_slow_batch_writes_duration_ms: IntCounterVec,
318 pub rocksdb_very_slow_puts_count: IntCounterVec,
319 pub rocksdb_very_slow_puts_duration_ms: IntCounterVec,
320}
321
322impl OperationMetrics {
323 pub(crate) fn new(registry: &Registry) -> Self {
324 OperationMetrics {
325 rocksdb_iter_latency_seconds: register_histogram_vec_with_registry!(
326 "rocksdb_iter_latency_seconds",
327 "Rocksdb iter latency in seconds",
328 &["cf_name"],
329 LATENCY_SEC_BUCKETS.to_vec(),
330 registry,
331 )
332 .unwrap(),
333 rocksdb_iter_bytes: register_histogram_vec_with_registry!(
334 "rocksdb_iter_bytes",
335 "Rocksdb iter size in bytes",
336 &["cf_name"],
337 prometheus::exponential_buckets(1.0, 4.0, 15)
338 .unwrap()
339 .to_vec(),
340 registry,
341 )
342 .unwrap(),
343 rocksdb_iter_keys: register_histogram_vec_with_registry!(
344 "rocksdb_iter_keys",
345 "Rocksdb iter num keys",
346 &["cf_name"],
347 registry,
348 )
349 .unwrap(),
350 rocksdb_get_latency_seconds: register_histogram_vec_with_registry!(
351 "rocksdb_get_latency_seconds",
352 "Rocksdb get latency in seconds",
353 &["cf_name"],
354 LATENCY_SEC_BUCKETS.to_vec(),
355 registry,
356 )
357 .unwrap(),
358 rocksdb_get_bytes: register_histogram_vec_with_registry!(
359 "rocksdb_get_bytes",
360 "Rocksdb get call returned data size in bytes",
361 &["cf_name"],
362 prometheus::exponential_buckets(1.0, 4.0, 15)
363 .unwrap()
364 .to_vec(),
365 registry
366 )
367 .unwrap(),
368 rocksdb_multiget_latency_seconds: register_histogram_vec_with_registry!(
369 "rocksdb_multiget_latency_seconds",
370 "Rocksdb multiget latency in seconds",
371 &["cf_name"],
372 LATENCY_SEC_BUCKETS.to_vec(),
373 registry,
374 )
375 .unwrap(),
376 rocksdb_multiget_bytes: register_histogram_vec_with_registry!(
377 "rocksdb_multiget_bytes",
378 "Rocksdb multiget call returned data size in bytes",
379 &["cf_name"],
380 prometheus::exponential_buckets(1.0, 4.0, 15)
381 .unwrap()
382 .to_vec(),
383 registry,
384 )
385 .unwrap(),
386 rocksdb_put_latency_seconds: register_histogram_vec_with_registry!(
387 "rocksdb_put_latency_seconds",
388 "Rocksdb put latency in seconds",
389 &["cf_name"],
390 LATENCY_SEC_BUCKETS.to_vec(),
391 registry,
392 )
393 .unwrap(),
394 rocksdb_put_bytes: register_histogram_vec_with_registry!(
395 "rocksdb_put_bytes",
396 "Rocksdb put call puts data size in bytes",
397 &["cf_name"],
398 prometheus::exponential_buckets(1.0, 4.0, 15)
399 .unwrap()
400 .to_vec(),
401 registry,
402 )
403 .unwrap(),
404 rocksdb_batch_put_bytes: register_histogram_vec_with_registry!(
405 "rocksdb_batch_put_bytes",
406 "Rocksdb batch put call puts data size in bytes",
407 &["cf_name"],
408 prometheus::exponential_buckets(1.0, 4.0, 15)
409 .unwrap()
410 .to_vec(),
411 registry,
412 )
413 .unwrap(),
414 rocksdb_delete_latency_seconds: register_histogram_vec_with_registry!(
415 "rocksdb_delete_latency_seconds",
416 "Rocksdb delete latency in seconds",
417 &["cf_name"],
418 LATENCY_SEC_BUCKETS.to_vec(),
419 registry,
420 )
421 .unwrap(),
422 rocksdb_deletes: register_int_counter_vec_with_registry!(
423 "rocksdb_deletes",
424 "Rocksdb delete calls",
425 &["cf_name"],
426 registry
427 )
428 .unwrap(),
429 rocksdb_batch_commit_latency_seconds: register_histogram_vec_with_registry!(
430 "rocksdb_write_batch_commit_latency_seconds",
431 "Rocksdb schema batch commit latency in seconds",
432 &["db_name"],
433 LATENCY_SEC_BUCKETS.to_vec(),
434 registry,
435 )
436 .unwrap(),
437 rocksdb_batch_commit_bytes: register_histogram_vec_with_registry!(
438 "rocksdb_batch_commit_bytes",
439 "Rocksdb schema batch commit size in bytes",
440 &["db_name"],
441 prometheus::exponential_buckets(1.0, 4.0, 15)
442 .unwrap()
443 .to_vec(),
444 registry,
445 )
446 .unwrap(),
447 rocksdb_num_active_db_handles: register_int_gauge_vec_with_registry!(
448 "rocksdb_num_active_db_handles",
449 "Number of active db handles",
450 &["db_name"],
451 registry,
452 )
453 .unwrap(),
454 rocksdb_very_slow_batch_writes_count: register_int_counter_vec_with_registry!(
455 "rocksdb_num_very_slow_batch_writes",
456 "Number of batch writes that took more than 1 second",
457 &["db_name"],
458 registry,
459 )
460 .unwrap(),
461 rocksdb_very_slow_batch_writes_duration_ms: register_int_counter_vec_with_registry!(
462 "rocksdb_very_slow_batch_writes_duration",
463 "Total duration of batch writes that took more than 1 second",
464 &["db_name"],
465 registry,
466 )
467 .unwrap(),
468 rocksdb_very_slow_puts_count: register_int_counter_vec_with_registry!(
469 "rocksdb_num_very_slow_puts",
470 "Number of puts that took more than 1 second",
471 &["cf_name"],
472 registry,
473 )
474 .unwrap(),
475 rocksdb_very_slow_puts_duration_ms: register_int_counter_vec_with_registry!(
476 "rocksdb_very_slow_puts_duration",
477 "Total duration of puts that took more than 1 second",
478 &["cf_name"],
479 registry,
480 )
481 .unwrap(),
482 }
483 }
484}
485
486pub struct RocksDBPerfContext;
487
488impl Default for RocksDBPerfContext {
489 fn default() -> Self {
490 set_perf_stats(PerfStatsLevel::EnableTime);
491 PER_THREAD_ROCKS_PERF_CONTEXT.with(|perf_context| {
492 perf_context.borrow_mut().reset();
493 });
494 RocksDBPerfContext {}
495 }
496}
497
498impl Drop for RocksDBPerfContext {
499 fn drop(&mut self) {
500 set_perf_stats(PerfStatsLevel::Disable);
501 }
502}
503
504#[derive(Debug)]
505pub struct ReadPerfContextMetrics {
506 pub user_key_comparison_count: IntCounterVec,
507 pub block_cache_hit_count: IntCounterVec,
508 pub block_read_count: IntCounterVec,
509 pub block_read_byte: IntCounterVec,
510 pub block_read_nanos: IntCounterVec,
511 pub block_checksum_nanos: IntCounterVec,
512 pub block_decompress_nanos: IntCounterVec,
513 pub get_read_bytes: IntCounterVec,
514 pub multiget_read_bytes: IntCounterVec,
515 pub get_snapshot_nanos: IntCounterVec,
516 pub get_from_memtable_nanos: IntCounterVec,
517 pub get_from_memtable_count: IntCounterVec,
518 pub get_post_process_nanos: IntCounterVec,
519 pub get_from_output_files_nanos: IntCounterVec,
520 pub db_mutex_lock_nanos: IntCounterVec,
521 pub db_condition_wait_nanos: IntCounterVec,
522 pub merge_operator_nanos: IntCounterVec,
523 pub read_index_block_nanos: IntCounterVec,
524 pub read_filter_block_nanos: IntCounterVec,
525 pub new_table_block_iter_nanos: IntCounterVec,
526 pub block_seek_nanos: IntCounterVec,
527 pub find_table_nanos: IntCounterVec,
528 pub bloom_memtable_hit_count: IntCounterVec,
529 pub bloom_memtable_miss_count: IntCounterVec,
530 pub bloom_sst_hit_count: IntCounterVec,
531 pub bloom_sst_miss_count: IntCounterVec,
532 pub key_lock_wait_time: IntCounterVec,
533 pub key_lock_wait_count: IntCounterVec,
534 pub internal_delete_skipped_count: IntCounterVec,
535 pub internal_skipped_count: IntCounterVec,
536}
537
538impl ReadPerfContextMetrics {
539 pub(crate) fn new(registry: &Registry) -> Self {
540 ReadPerfContextMetrics {
541 user_key_comparison_count: register_int_counter_vec_with_registry!(
542 "user_key_comparison_count",
543 "Helps us figure out whether too many comparisons in binary search can be a problem,
544 especially when a more expensive comparator is used. Moreover, since number of comparisons
545 is usually uniform based on the memtable size, the SST file size for Level 0 and size of other
546 levels, an significant increase of the counter can indicate unexpected LSM-tree shape.
547 You may want to check whether flush/compaction can keep up with the write speed",
548 &["cf_name"],
549 registry,
550 )
551 .unwrap(),
552 block_cache_hit_count: register_int_counter_vec_with_registry!(
553 "block_cache_hit_count",
554 "Tells us how many times we read data blocks from block cache, and block_read_count tells us how many
555 times we have to read blocks from the file system (either block cache is disabled or it is a cache miss).
556 We can evaluate the block cache efficiency by looking at the two counters over time.",
557 &["cf_name"],
558 registry,
559 )
560 .unwrap(),
561 block_read_count: register_int_counter_vec_with_registry!(
562 "block_read_count",
563 "Tells us how many times we have to read blocks from the file system (either block cache is disabled or it is a cache miss)",
564 &["cf_name"],
565 registry,
566 )
567 .unwrap(),
568 block_read_byte: register_int_counter_vec_with_registry!(
569 "block_read_byte",
570 "Tells us how many total bytes we read from the file system. It can tell us whether a slow query can be caused by reading
571 large blocks from the file system. Index and bloom filter blocks are usually large blocks. A large block can also be the result
572 of a very large key or value",
573 &["cf_name"],
574 registry,
575 )
576 .unwrap(),
577 block_read_nanos: register_int_counter_vec_with_registry!(
578 "block_read_nanos",
579 "Total nanos spent on block reads",
580 &["cf_name"],
581 registry,
582 )
583 .unwrap(),
584 block_checksum_nanos: register_int_counter_vec_with_registry!(
585 "block_checksum_nanos",
586 "Total nanos spent on verifying block checksum",
587 &["cf_name"],
588 registry,
589 )
590 .unwrap(),
591 block_decompress_nanos: register_int_counter_vec_with_registry!(
592 "block_decompress_nanos",
593 "Total nanos spent on decompressing a block",
594 &["cf_name"],
595 registry,
596 )
597 .unwrap(),
598 get_read_bytes: register_int_counter_vec_with_registry!(
599 "get_read_bytes",
600 "Total bytes for values returned by Get",
601 &["cf_name"],
602 registry,
603 )
604 .unwrap(),
605 multiget_read_bytes: register_int_counter_vec_with_registry!(
606 "multiget_read_bytes",
607 "Total bytes for values returned by MultiGet.",
608 &["cf_name"],
609 registry,
610 )
611 .unwrap(),
612 get_snapshot_nanos: register_int_counter_vec_with_registry!(
613 "get_snapshot_nanos",
614 "Time spent in getting snapshot.",
615 &["cf_name"],
616 registry,
617 )
618 .unwrap(),
619 get_from_memtable_nanos: register_int_counter_vec_with_registry!(
620 "get_from_memtable_nanos",
621 "Time spent on reading data from memtable.",
622 &["cf_name"],
623 registry,
624 )
625 .unwrap(),
626 get_from_memtable_count: register_int_counter_vec_with_registry!(
627 "get_from_memtable_count",
628 "Number of memtables queried",
629 &["cf_name"],
630 registry,
631 )
632 .unwrap(),
633 get_post_process_nanos: register_int_counter_vec_with_registry!(
634 "get_post_process_nanos",
635 "Total nanos spent after Get() finds a key",
636 &["cf_name"],
637 registry,
638 )
639 .unwrap(),
640 get_from_output_files_nanos: register_int_counter_vec_with_registry!(
641 "get_from_output_files_nanos",
642 "Total nanos reading from output files",
643 &["cf_name"],
644 registry,
645 )
646 .unwrap(),
647 db_mutex_lock_nanos: register_int_counter_vec_with_registry!(
648 "db_mutex_lock_nanos",
649 "Time spent on acquiring db mutex",
650 &["cf_name"],
651 registry,
652 )
653 .unwrap(),
654 db_condition_wait_nanos: register_int_counter_vec_with_registry!(
655 "db_condition_wait_nanos",
656 "Time spent waiting with a condition variable created with DB Mutex.",
657 &["cf_name"],
658 registry,
659 )
660 .unwrap(),
661 merge_operator_nanos: register_int_counter_vec_with_registry!(
662 "merge_operator_nanos",
663 "Time spent on merge operator.",
664 &["cf_name"],
665 registry,
666 )
667 .unwrap(),
668 read_index_block_nanos: register_int_counter_vec_with_registry!(
669 "read_index_block_nanos",
670 "Time spent on reading index block from block cache or SST file",
671 &["cf_name"],
672 registry,
673 )
674 .unwrap(),
675 read_filter_block_nanos: register_int_counter_vec_with_registry!(
676 "read_filter_block_nanos",
677 "Time spent on reading filter block from block cache or SST file",
678 &["cf_name"],
679 registry,
680 )
681 .unwrap(),
682 new_table_block_iter_nanos: register_int_counter_vec_with_registry!(
683 "new_table_block_iter_nanos",
684 "Time spent on creating data block iterator",
685 &["cf_name"],
686 registry,
687 )
688 .unwrap(),
689 block_seek_nanos: register_int_counter_vec_with_registry!(
690 "block_seek_nanos",
691 "Time spent on seeking a key in data/index blocks",
692 &["cf_name"],
693 registry,
694 )
695 .unwrap(),
696 find_table_nanos: register_int_counter_vec_with_registry!(
697 "find_table_nanos",
698 "Time spent on finding or creating a table reader",
699 &["cf_name"],
700 registry,
701 )
702 .unwrap(),
703 bloom_memtable_hit_count: register_int_counter_vec_with_registry!(
704 "bloom_memtable_hit_count",
705 "Total number of mem table bloom hits",
706 &["cf_name"],
707 registry,
708 )
709 .unwrap(),
710 bloom_memtable_miss_count: register_int_counter_vec_with_registry!(
711 "bloom_memtable_miss_count",
712 "Total number of mem table bloom misses",
713 &["cf_name"],
714 registry,
715 )
716 .unwrap(),
717 bloom_sst_hit_count: register_int_counter_vec_with_registry!(
718 "bloom_sst_hit_count",
719 "Total number of SST table bloom hits",
720 &["cf_name"],
721 registry,
722 )
723 .unwrap(),
724 bloom_sst_miss_count: register_int_counter_vec_with_registry!(
725 "bloom_sst_miss_count",
726 "Total number of SST table bloom misses",
727 &["cf_name"],
728 registry,
729 )
730 .unwrap(),
731 key_lock_wait_time: register_int_counter_vec_with_registry!(
732 "key_lock_wait_time",
733 "Time spent waiting on key locks in transaction lock manager",
734 &["cf_name"],
735 registry,
736 )
737 .unwrap(),
738 key_lock_wait_count: register_int_counter_vec_with_registry!(
739 "key_lock_wait_count",
740 "Number of times acquiring a lock was blocked by another transaction",
741 &["cf_name"],
742 registry,
743 )
744 .unwrap(),
745 internal_delete_skipped_count: register_int_counter_vec_with_registry!(
746 "internal_delete_skipped_count",
747 "Total number of deleted keys skipped during iteration",
748 &["cf_name"],
749 registry,
750 )
751 .unwrap(),
752 internal_skipped_count: register_int_counter_vec_with_registry!(
753 "internal_skipped_count",
754 "Totall number of internal keys skipped during iteration",
755 &["cf_name"],
756 registry,
757 )
758 .unwrap(),
759 }
760 }
761
762 pub fn report_metrics(&self, cf_name: &str) {
763 PER_THREAD_ROCKS_PERF_CONTEXT.with(|perf_context_cell| {
764 set_perf_stats(PerfStatsLevel::Disable);
765 let perf_context = perf_context_cell.borrow();
766 self.user_key_comparison_count
767 .with_label_values(&[cf_name])
768 .inc_by(perf_context.metric(PerfMetric::UserKeyComparisonCount));
769 self.block_cache_hit_count
770 .with_label_values(&[cf_name])
771 .inc_by(perf_context.metric(PerfMetric::BlockCacheHitCount));
772 self.block_read_count
773 .with_label_values(&[cf_name])
774 .inc_by(perf_context.metric(PerfMetric::BlockReadCount));
775 self.block_read_byte
776 .with_label_values(&[cf_name])
777 .inc_by(perf_context.metric(PerfMetric::BlockReadByte));
778 self.block_read_nanos
779 .with_label_values(&[cf_name])
780 .inc_by(perf_context.metric(PerfMetric::BlockReadTime));
781 self.block_read_count
782 .with_label_values(&[cf_name])
783 .inc_by(perf_context.metric(PerfMetric::BlockReadCount));
784 self.block_checksum_nanos
785 .with_label_values(&[cf_name])
786 .inc_by(perf_context.metric(PerfMetric::BlockChecksumTime));
787 self.block_decompress_nanos
788 .with_label_values(&[cf_name])
789 .inc_by(perf_context.metric(PerfMetric::BlockDecompressTime));
790 self.get_read_bytes
791 .with_label_values(&[cf_name])
792 .inc_by(perf_context.metric(PerfMetric::GetReadBytes));
793 self.multiget_read_bytes
794 .with_label_values(&[cf_name])
795 .inc_by(perf_context.metric(PerfMetric::MultigetReadBytes));
796 self.get_snapshot_nanos
797 .with_label_values(&[cf_name])
798 .inc_by(perf_context.metric(PerfMetric::GetSnapshotTime));
799 self.get_from_memtable_nanos
800 .with_label_values(&[cf_name])
801 .inc_by(perf_context.metric(PerfMetric::GetFromMemtableTime));
802 self.get_from_memtable_count
803 .with_label_values(&[cf_name])
804 .inc_by(perf_context.metric(PerfMetric::GetFromMemtableCount));
805 self.get_post_process_nanos
806 .with_label_values(&[cf_name])
807 .inc_by(perf_context.metric(PerfMetric::GetPostProcessTime));
808 self.get_from_output_files_nanos
809 .with_label_values(&[cf_name])
810 .inc_by(perf_context.metric(PerfMetric::GetFromOutputFilesTime));
811 self.db_mutex_lock_nanos
812 .with_label_values(&[cf_name])
813 .inc_by(perf_context.metric(PerfMetric::DbMutexLockNanos));
814 self.db_condition_wait_nanos
815 .with_label_values(&[cf_name])
816 .inc_by(perf_context.metric(PerfMetric::DbConditionWaitNanos));
817 self.merge_operator_nanos
818 .with_label_values(&[cf_name])
819 .inc_by(perf_context.metric(PerfMetric::MergeOperatorTimeNanos));
820 self.read_index_block_nanos
821 .with_label_values(&[cf_name])
822 .inc_by(perf_context.metric(PerfMetric::ReadIndexBlockNanos));
823 self.read_filter_block_nanos
824 .with_label_values(&[cf_name])
825 .inc_by(perf_context.metric(PerfMetric::ReadFilterBlockNanos));
826 self.new_table_block_iter_nanos
827 .with_label_values(&[cf_name])
828 .inc_by(perf_context.metric(PerfMetric::NewTableBlockIterNanos));
829 self.block_seek_nanos
830 .with_label_values(&[cf_name])
831 .inc_by(perf_context.metric(PerfMetric::BlockSeekNanos));
832 self.find_table_nanos
833 .with_label_values(&[cf_name])
834 .inc_by(perf_context.metric(PerfMetric::FindTableNanos));
835 self.bloom_memtable_hit_count
836 .with_label_values(&[cf_name])
837 .inc_by(perf_context.metric(PerfMetric::BloomMemtableHitCount));
838 self.bloom_memtable_miss_count
839 .with_label_values(&[cf_name])
840 .inc_by(perf_context.metric(PerfMetric::BloomMemtableMissCount));
841 self.bloom_sst_hit_count
842 .with_label_values(&[cf_name])
843 .inc_by(perf_context.metric(PerfMetric::BloomSstHitCount));
844 self.bloom_sst_miss_count
845 .with_label_values(&[cf_name])
846 .inc_by(perf_context.metric(PerfMetric::BloomSstMissCount));
847 self.key_lock_wait_time
848 .with_label_values(&[cf_name])
849 .inc_by(perf_context.metric(PerfMetric::KeyLockWaitTime));
850 self.key_lock_wait_count
851 .with_label_values(&[cf_name])
852 .inc_by(perf_context.metric(PerfMetric::KeyLockWaitCount));
853 self.internal_delete_skipped_count
854 .with_label_values(&[cf_name])
855 .inc_by(perf_context.metric(PerfMetric::InternalDeleteSkippedCount));
856 self.internal_skipped_count
857 .with_label_values(&[cf_name])
858 .inc_by(perf_context.metric(PerfMetric::InternalKeySkippedCount));
859 });
860 }
861}
862
863#[derive(Debug)]
864pub struct WritePerfContextMetrics {
865 pub write_wal_nanos: IntCounterVec,
866 pub write_memtable_nanos: IntCounterVec,
867 pub write_delay_nanos: IntCounterVec,
868 pub write_pre_and_post_process_nanos: IntCounterVec,
869 pub write_db_mutex_lock_nanos: IntCounterVec,
870 pub write_db_condition_wait_nanos: IntCounterVec,
871 pub write_key_lock_wait_nanos: IntCounterVec,
872 pub write_key_lock_wait_count: IntCounterVec,
873}
874
875impl WritePerfContextMetrics {
876 pub(crate) fn new(registry: &Registry) -> Self {
877 WritePerfContextMetrics {
878 write_wal_nanos: register_int_counter_vec_with_registry!(
879 "write_wal_nanos",
880 "Total nanos spent on writing to WAL",
881 &["cf_name"],
882 registry,
883 )
884 .unwrap(),
885 write_memtable_nanos: register_int_counter_vec_with_registry!(
886 "write_memtable_nanos",
887 "Total nanos spent on writing to memtable",
888 &["cf_name"],
889 registry,
890 )
891 .unwrap(),
892 write_delay_nanos: register_int_counter_vec_with_registry!(
893 "write_delay_nanos",
894 "Total nanos spent on delaying or throttling write",
895 &["cf_name"],
896 registry,
897 )
898 .unwrap(),
899 write_pre_and_post_process_nanos: register_int_counter_vec_with_registry!(
900 "write_pre_and_post_process_nanos",
901 "Total nanos spent on writing a record, excluding the above four things",
902 &["cf_name"],
903 registry,
904 )
905 .unwrap(),
906 write_db_mutex_lock_nanos: register_int_counter_vec_with_registry!(
907 "write_db_mutex_lock_nanos",
908 "Time spent on acquiring db mutex",
909 &["cf_name"],
910 registry,
911 )
912 .unwrap(),
913 write_db_condition_wait_nanos: register_int_counter_vec_with_registry!(
914 "write_db_condition_wait_nanos",
915 "Time spent waiting with a condition variable created with DB Mutex.",
916 &["cf_name"],
917 registry,
918 )
919 .unwrap(),
920 write_key_lock_wait_nanos: register_int_counter_vec_with_registry!(
921 "write_key_lock_wait_time",
922 "Time spent waiting on key locks in transaction lock manager",
923 &["cf_name"],
924 registry,
925 )
926 .unwrap(),
927 write_key_lock_wait_count: register_int_counter_vec_with_registry!(
928 "write_key_lock_wait_count",
929 "Number of times acquiring a lock was blocked by another transaction",
930 &["cf_name"],
931 registry,
932 )
933 .unwrap(),
934 }
935 }
936 pub fn report_metrics(&self, db_name: &str) {
937 PER_THREAD_ROCKS_PERF_CONTEXT.with(|perf_context_cell| {
938 set_perf_stats(PerfStatsLevel::Disable);
939 let perf_context = perf_context_cell.borrow();
940 self.write_wal_nanos
941 .with_label_values(&[db_name])
942 .inc_by(perf_context.metric(PerfMetric::WriteWalTime));
943 self.write_memtable_nanos
944 .with_label_values(&[db_name])
945 .inc_by(perf_context.metric(PerfMetric::WriteMemtableTime));
946 self.write_delay_nanos
947 .with_label_values(&[db_name])
948 .inc_by(perf_context.metric(PerfMetric::WriteDelayTime));
949 self.write_pre_and_post_process_nanos
950 .with_label_values(&[db_name])
951 .inc_by(perf_context.metric(PerfMetric::WritePreAndPostProcessTime));
952 self.write_db_mutex_lock_nanos
953 .with_label_values(&[db_name])
954 .inc_by(perf_context.metric(PerfMetric::DbMutexLockNanos));
955 self.write_db_condition_wait_nanos
956 .with_label_values(&[db_name])
957 .inc_by(perf_context.metric(PerfMetric::DbConditionWaitNanos));
958 self.write_key_lock_wait_nanos
959 .with_label_values(&[db_name])
960 .inc_by(perf_context.metric(PerfMetric::KeyLockWaitTime));
961 self.write_key_lock_wait_count
962 .with_label_values(&[db_name])
963 .inc_by(perf_context.metric(PerfMetric::KeyLockWaitCount));
964 });
965 }
966}
967
968#[derive(Debug)]
969pub struct DBMetrics {
970 pub op_metrics: OperationMetrics,
971 pub cf_metrics: ColumnFamilyMetrics,
972 pub read_perf_ctx_metrics: ReadPerfContextMetrics,
973 pub write_perf_ctx_metrics: WritePerfContextMetrics,
974}
975
976static ONCE: OnceCell<Arc<DBMetrics>> = OnceCell::new();
977
978impl DBMetrics {
979 fn new(registry: &Registry) -> Self {
980 DBMetrics {
981 op_metrics: OperationMetrics::new(registry),
982 cf_metrics: ColumnFamilyMetrics::new(registry),
983 read_perf_ctx_metrics: ReadPerfContextMetrics::new(registry),
984 write_perf_ctx_metrics: WritePerfContextMetrics::new(registry),
985 }
986 }
987 pub fn init(registry: &Registry) -> &'static Arc<DBMetrics> {
988 let _ = ONCE
996 .set(Arc::new(DBMetrics::new(registry)))
997 .tap_err(|_| warn!("DBMetrics registry overwritten"));
999 ONCE.get().unwrap()
1000 }
1001 pub fn increment_num_active_dbs(&self, db_name: &str) {
1002 self.op_metrics
1003 .rocksdb_num_active_db_handles
1004 .with_label_values(&[db_name])
1005 .inc();
1006 }
1007 pub fn decrement_num_active_dbs(&self, db_name: &str) {
1008 self.op_metrics
1009 .rocksdb_num_active_db_handles
1010 .with_label_values(&[db_name])
1011 .dec();
1012 }
1013 pub fn get() -> &'static Arc<DBMetrics> {
1014 ONCE.get()
1015 .unwrap_or_else(|| DBMetrics::init(prometheus::default_registry()))
1016 }
1017}