1use std::{
6 cell::RefCell,
7 sync::{
8 Arc,
9 atomic::{AtomicU64, Ordering},
10 },
11 time::Duration,
12};
13
14use once_cell::sync::OnceCell;
15use prometheus::{
16 HistogramVec, IntCounterVec, IntGaugeVec, Registry, register_histogram_vec_with_registry,
17 register_int_counter_vec_with_registry, register_int_gauge_vec_with_registry,
18};
19use rocksdb::{PerfContext, PerfMetric, PerfStatsLevel, perf::set_perf_stats};
20use tap::TapFallible;
21use tracing::warn;
22
23thread_local! {
24 static PER_THREAD_ROCKS_PERF_CONTEXT: std::cell::RefCell<rocksdb::PerfContext> = RefCell::new(PerfContext::default());
25}
26
27const LATENCY_SEC_BUCKETS: &[f64] = &[
28 0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1., 2.5, 5., 10., 20., 30., 60., 90.,
29];
30
31#[derive(Debug, Clone)]
32pub struct SamplingInterval {
35 pub once_every_duration: Duration,
37 pub after_num_ops: u64,
39 pub counter: Arc<AtomicU64>,
41}
42
43impl Default for SamplingInterval {
44 fn default() -> Self {
45 SamplingInterval::new(Duration::from_secs(60), 0)
47 }
48}
49
50impl SamplingInterval {
51 pub fn new(once_every_duration: Duration, after_num_ops: u64) -> Self {
52 let counter = Arc::new(AtomicU64::new(1));
53 if !once_every_duration.is_zero() {
54 let counter = counter.clone();
55 tokio::task::spawn(async move {
56 loop {
57 if counter.load(Ordering::SeqCst) > after_num_ops {
58 counter.store(0, Ordering::SeqCst);
59 }
60 tokio::time::sleep(once_every_duration).await;
61 }
62 });
63 }
64 SamplingInterval {
65 once_every_duration,
66 after_num_ops,
67 counter,
68 }
69 }
70 pub fn new_from_self(&self) -> SamplingInterval {
71 SamplingInterval::new(self.once_every_duration, self.after_num_ops)
72 }
73 pub fn sample(&self) -> bool {
74 if self.once_every_duration.is_zero() {
75 self.counter.fetch_add(1, Ordering::Relaxed) % (self.after_num_ops + 1) == 0
76 } else {
77 self.counter.fetch_add(1, Ordering::Relaxed) == 0
78 }
79 }
80}
81
82#[derive(Debug)]
83pub struct ColumnFamilyMetrics {
84 pub rocksdb_total_sst_files_size: IntGaugeVec,
85 pub rocksdb_total_blob_files_size: IntGaugeVec,
86 pub rocksdb_total_num_files: IntGaugeVec,
87 pub rocksdb_num_level0_files: IntGaugeVec,
88 pub rocksdb_current_size_active_mem_tables: IntGaugeVec,
89 pub rocksdb_size_all_mem_tables: IntGaugeVec,
90 pub rocksdb_num_snapshots: IntGaugeVec,
91 pub rocksdb_oldest_snapshot_time: IntGaugeVec,
92 pub rocksdb_actual_delayed_write_rate: IntGaugeVec,
93 pub rocksdb_is_write_stopped: IntGaugeVec,
94 pub rocksdb_block_cache_capacity: IntGaugeVec,
95 pub rocksdb_block_cache_usage: IntGaugeVec,
96 pub rocksdb_block_cache_pinned_usage: IntGaugeVec,
97 pub rocksdb_estimate_table_readers_mem: IntGaugeVec,
98 pub rocksdb_num_immutable_mem_tables: IntGaugeVec,
99 pub rocksdb_mem_table_flush_pending: IntGaugeVec,
100 pub rocksdb_compaction_pending: IntGaugeVec,
101 pub rocksdb_estimate_pending_compaction_bytes: IntGaugeVec,
102 pub rocksdb_num_running_compactions: IntGaugeVec,
103 pub rocksdb_num_running_flushes: IntGaugeVec,
104 pub rocksdb_estimate_oldest_key_time: IntGaugeVec,
105 pub rocksdb_background_errors: IntGaugeVec,
106 pub rocksdb_estimated_num_keys: IntGaugeVec,
107 pub rocksdb_base_level: IntGaugeVec,
108}
109
110impl ColumnFamilyMetrics {
111 pub(crate) fn new(registry: &Registry) -> Self {
112 ColumnFamilyMetrics {
113 rocksdb_total_sst_files_size: register_int_gauge_vec_with_registry!(
114 "rocksdb_total_sst_files_size",
115 "The storage size occupied by the sst files in the column family",
116 &["cf_name"],
117 registry,
118 )
119 .unwrap(),
120 rocksdb_total_blob_files_size: register_int_gauge_vec_with_registry!(
121 "rocksdb_total_blob_files_size",
122 "The storage size occupied by the blob files in the column family",
123 &["cf_name"],
124 registry,
125 )
126 .unwrap(),
127 rocksdb_total_num_files: register_int_gauge_vec_with_registry!(
128 "rocksdb_total_num_files",
129 "Total number of files used in the column family",
130 &["cf_name"],
131 registry,
132 )
133 .unwrap(),
134 rocksdb_num_level0_files: register_int_gauge_vec_with_registry!(
135 "rocksdb_num_level0_files",
136 "Number of level 0 files in the column family",
137 &["cf_name"],
138 registry,
139 )
140 .unwrap(),
141 rocksdb_current_size_active_mem_tables: register_int_gauge_vec_with_registry!(
142 "rocksdb_current_size_active_mem_tables",
143 "The current approximate size of active memtable (bytes).",
144 &["cf_name"],
145 registry,
146 )
147 .unwrap(),
148 rocksdb_size_all_mem_tables: register_int_gauge_vec_with_registry!(
149 "rocksdb_size_all_mem_tables",
150 "The memory size occupied by the column family's in-memory buffer",
151 &["cf_name"],
152 registry,
153 )
154 .unwrap(),
155 rocksdb_num_snapshots: register_int_gauge_vec_with_registry!(
156 "rocksdb_num_snapshots",
157 "Number of snapshots held for the column family",
158 &["cf_name"],
159 registry,
160 )
161 .unwrap(),
162 rocksdb_oldest_snapshot_time: register_int_gauge_vec_with_registry!(
163 "rocksdb_oldest_snapshot_time",
164 "Unit timestamp of the oldest unreleased snapshot",
165 &["cf_name"],
166 registry,
167 )
168 .unwrap(),
169 rocksdb_actual_delayed_write_rate: register_int_gauge_vec_with_registry!(
170 "rocksdb_actual_delayed_write_rate",
171 "The current actual delayed write rate. 0 means no delay",
172 &["cf_name"],
173 registry,
174 )
175 .unwrap(),
176 rocksdb_is_write_stopped: register_int_gauge_vec_with_registry!(
177 "rocksdb_is_write_stopped",
178 "A flag indicating whether writes are stopped on this column family. 1 indicates writes have been stopped.",
179 &["cf_name"],
180 registry,
181 )
182 .unwrap(),
183 rocksdb_block_cache_capacity: register_int_gauge_vec_with_registry!(
184 "rocksdb_block_cache_capacity",
185 "The block cache capacity of the column family.",
186 &["cf_name"],
187 registry,
188 )
189 .unwrap(),
190 rocksdb_block_cache_usage: register_int_gauge_vec_with_registry!(
191 "rocksdb_block_cache_usage",
192 "The memory size used by the column family in the block cache.",
193 &["cf_name"],
194 registry,
195 )
196 .unwrap(),
197 rocksdb_block_cache_pinned_usage: register_int_gauge_vec_with_registry!(
198 "rocksdb_block_cache_pinned_usage",
199 "The memory size used by the column family in the block cache where entries are pinned",
200 &["cf_name"],
201 registry,
202 )
203 .unwrap(),
204 rocksdb_estimate_table_readers_mem: register_int_gauge_vec_with_registry!(
205 "rocksdb_estimate_table_readers_mem",
206 "The estimated memory size used for reading SST tables in this column
207 family such as filters and index blocks. Note that this number does not
208 include the memory used in block cache.",
209 &["cf_name"],
210 registry,
211 )
212 .unwrap(),
213 rocksdb_num_immutable_mem_tables: register_int_gauge_vec_with_registry!(
214 "rocksdb_num_immutable_mem_tables",
215 "The number of immutable memtables that have not yet been flushed.",
216 &["cf_name"],
217 registry,
218 )
219 .unwrap(),
220 rocksdb_mem_table_flush_pending: register_int_gauge_vec_with_registry!(
221 "rocksdb_mem_table_flush_pending",
222 "A 1 or 0 flag indicating whether a memtable flush is pending.
223 If this number is 1, it means a memtable is waiting for being flushed,
224 but there might be too many L0 files that prevents it from being flushed.",
225 &["cf_name"],
226 registry,
227 )
228 .unwrap(),
229 rocksdb_compaction_pending: register_int_gauge_vec_with_registry!(
230 "rocksdb_compaction_pending",
231 "A 1 or 0 flag indicating whether a compaction job is pending.
232 If this number is 1, it means some part of the column family requires
233 compaction in order to maintain shape of LSM tree, but the compaction
234 is pending because the desired compaction job is either waiting for
235 other dependent compactions to be finished or waiting for an available
236 compaction thread.",
237 &["cf_name"],
238 registry,
239 )
240 .unwrap(),
241 rocksdb_estimate_pending_compaction_bytes: register_int_gauge_vec_with_registry!(
242 "rocksdb_estimate_pending_compaction_bytes",
243 "Estimated total number of bytes compaction needs to rewrite to get all levels down
244 to under target size. Not valid for other compactions than level-based.",
245 &["cf_name"],
246 registry,
247 )
248 .unwrap(),
249 rocksdb_num_running_compactions: register_int_gauge_vec_with_registry!(
250 "rocksdb_num_running_compactions",
251 "The number of compactions that are currently running for the column family.",
252 &["cf_name"],
253 registry,
254 )
255 .unwrap(),
256 rocksdb_num_running_flushes: register_int_gauge_vec_with_registry!(
257 "rocksdb_num_running_flushes",
258 "The number of flushes that are currently running for the column family.",
259 &["cf_name"],
260 registry,
261 )
262 .unwrap(),
263 rocksdb_estimate_oldest_key_time: register_int_gauge_vec_with_registry!(
264 "rocksdb_estimate_oldest_key_time",
265 "Estimation of the oldest key timestamp in the DB. Only available
266 for FIFO compaction with compaction_options_fifo.allow_compaction = false.",
267 &["cf_name"],
268 registry,
269 )
270 .unwrap(),
271 rocksdb_estimated_num_keys: register_int_gauge_vec_with_registry!(
272 "rocksdb_estimated_num_keys",
273 "The estimated number of keys in the table",
274 &["cf_name"],
275 registry,
276 )
277 .unwrap(),
278 rocksdb_background_errors: register_int_gauge_vec_with_registry!(
279 "rocksdb_background_errors",
280 "The accumulated number of RocksDB background errors.",
281 &["cf_name"],
282 registry,
283 )
284 .unwrap(),
285 rocksdb_base_level: register_int_gauge_vec_with_registry!(
286 "rocksdb_base_level",
287 "The number of level to which L0 data will be compacted.",
288 &["cf_name"],
289 registry,
290 )
291 .unwrap(),
292 }
293 }
294}
295
296#[derive(Debug)]
297pub struct OperationMetrics {
298 pub rocksdb_iter_latency_seconds: HistogramVec,
299 pub rocksdb_iter_bytes: HistogramVec,
300 pub rocksdb_iter_keys: HistogramVec,
301 pub rocksdb_get_latency_seconds: HistogramVec,
302 pub rocksdb_get_bytes: HistogramVec,
303 pub rocksdb_multiget_latency_seconds: HistogramVec,
304 pub rocksdb_multiget_bytes: HistogramVec,
305 pub rocksdb_put_latency_seconds: HistogramVec,
306 pub rocksdb_put_bytes: HistogramVec,
307 pub rocksdb_batch_put_bytes: HistogramVec,
308 pub rocksdb_delete_latency_seconds: HistogramVec,
309 pub rocksdb_deletes: IntCounterVec,
310 pub rocksdb_batch_commit_latency_seconds: HistogramVec,
311 pub rocksdb_batch_commit_bytes: HistogramVec,
312 pub rocksdb_num_active_db_handles: IntGaugeVec,
313 pub rocksdb_very_slow_batch_writes_count: IntCounterVec,
314 pub rocksdb_very_slow_batch_writes_duration_ms: IntCounterVec,
315 pub rocksdb_very_slow_puts_count: IntCounterVec,
316 pub rocksdb_very_slow_puts_duration_ms: IntCounterVec,
317}
318
319impl OperationMetrics {
320 pub(crate) fn new(registry: &Registry) -> Self {
321 OperationMetrics {
322 rocksdb_iter_latency_seconds: register_histogram_vec_with_registry!(
323 "rocksdb_iter_latency_seconds",
324 "Rocksdb iter latency in seconds",
325 &["cf_name"],
326 LATENCY_SEC_BUCKETS.to_vec(),
327 registry,
328 )
329 .unwrap(),
330 rocksdb_iter_bytes: register_histogram_vec_with_registry!(
331 "rocksdb_iter_bytes",
332 "Rocksdb iter size in bytes",
333 &["cf_name"],
334 prometheus::exponential_buckets(1.0, 4.0, 15)
335 .unwrap()
336 .to_vec(),
337 registry,
338 )
339 .unwrap(),
340 rocksdb_iter_keys: register_histogram_vec_with_registry!(
341 "rocksdb_iter_keys",
342 "Rocksdb iter num keys",
343 &["cf_name"],
344 registry,
345 )
346 .unwrap(),
347 rocksdb_get_latency_seconds: register_histogram_vec_with_registry!(
348 "rocksdb_get_latency_seconds",
349 "Rocksdb get latency in seconds",
350 &["cf_name"],
351 LATENCY_SEC_BUCKETS.to_vec(),
352 registry,
353 )
354 .unwrap(),
355 rocksdb_get_bytes: register_histogram_vec_with_registry!(
356 "rocksdb_get_bytes",
357 "Rocksdb get call returned data size in bytes",
358 &["cf_name"],
359 prometheus::exponential_buckets(1.0, 4.0, 15)
360 .unwrap()
361 .to_vec(),
362 registry
363 )
364 .unwrap(),
365 rocksdb_multiget_latency_seconds: register_histogram_vec_with_registry!(
366 "rocksdb_multiget_latency_seconds",
367 "Rocksdb multiget latency in seconds",
368 &["cf_name"],
369 LATENCY_SEC_BUCKETS.to_vec(),
370 registry,
371 )
372 .unwrap(),
373 rocksdb_multiget_bytes: register_histogram_vec_with_registry!(
374 "rocksdb_multiget_bytes",
375 "Rocksdb multiget call returned data size in bytes",
376 &["cf_name"],
377 prometheus::exponential_buckets(1.0, 4.0, 15)
378 .unwrap()
379 .to_vec(),
380 registry,
381 )
382 .unwrap(),
383 rocksdb_put_latency_seconds: register_histogram_vec_with_registry!(
384 "rocksdb_put_latency_seconds",
385 "Rocksdb put latency in seconds",
386 &["cf_name"],
387 LATENCY_SEC_BUCKETS.to_vec(),
388 registry,
389 )
390 .unwrap(),
391 rocksdb_put_bytes: register_histogram_vec_with_registry!(
392 "rocksdb_put_bytes",
393 "Rocksdb put call puts data size in bytes",
394 &["cf_name"],
395 prometheus::exponential_buckets(1.0, 4.0, 15)
396 .unwrap()
397 .to_vec(),
398 registry,
399 )
400 .unwrap(),
401 rocksdb_batch_put_bytes: register_histogram_vec_with_registry!(
402 "rocksdb_batch_put_bytes",
403 "Rocksdb batch put call puts data size in bytes",
404 &["cf_name"],
405 prometheus::exponential_buckets(1.0, 4.0, 15)
406 .unwrap()
407 .to_vec(),
408 registry,
409 )
410 .unwrap(),
411 rocksdb_delete_latency_seconds: register_histogram_vec_with_registry!(
412 "rocksdb_delete_latency_seconds",
413 "Rocksdb delete latency in seconds",
414 &["cf_name"],
415 LATENCY_SEC_BUCKETS.to_vec(),
416 registry,
417 )
418 .unwrap(),
419 rocksdb_deletes: register_int_counter_vec_with_registry!(
420 "rocksdb_deletes",
421 "Rocksdb delete calls",
422 &["cf_name"],
423 registry
424 )
425 .unwrap(),
426 rocksdb_batch_commit_latency_seconds: register_histogram_vec_with_registry!(
427 "rocksdb_write_batch_commit_latency_seconds",
428 "Rocksdb schema batch commit latency in seconds",
429 &["db_name"],
430 LATENCY_SEC_BUCKETS.to_vec(),
431 registry,
432 )
433 .unwrap(),
434 rocksdb_batch_commit_bytes: register_histogram_vec_with_registry!(
435 "rocksdb_batch_commit_bytes",
436 "Rocksdb schema batch commit size in bytes",
437 &["db_name"],
438 prometheus::exponential_buckets(1.0, 4.0, 15)
439 .unwrap()
440 .to_vec(),
441 registry,
442 )
443 .unwrap(),
444 rocksdb_num_active_db_handles: register_int_gauge_vec_with_registry!(
445 "rocksdb_num_active_db_handles",
446 "Number of active db handles",
447 &["db_name"],
448 registry,
449 )
450 .unwrap(),
451 rocksdb_very_slow_batch_writes_count: register_int_counter_vec_with_registry!(
452 "rocksdb_num_very_slow_batch_writes",
453 "Number of batch writes that took more than 1 second",
454 &["db_name"],
455 registry,
456 )
457 .unwrap(),
458 rocksdb_very_slow_batch_writes_duration_ms: register_int_counter_vec_with_registry!(
459 "rocksdb_very_slow_batch_writes_duration",
460 "Total duration of batch writes that took more than 1 second",
461 &["db_name"],
462 registry,
463 )
464 .unwrap(),
465 rocksdb_very_slow_puts_count: register_int_counter_vec_with_registry!(
466 "rocksdb_num_very_slow_puts",
467 "Number of puts that took more than 1 second",
468 &["cf_name"],
469 registry,
470 )
471 .unwrap(),
472 rocksdb_very_slow_puts_duration_ms: register_int_counter_vec_with_registry!(
473 "rocksdb_very_slow_puts_duration",
474 "Total duration of puts that took more than 1 second",
475 &["cf_name"],
476 registry,
477 )
478 .unwrap(),
479 }
480 }
481}
482
483pub struct RocksDBPerfContext;
484
485impl Default for RocksDBPerfContext {
486 fn default() -> Self {
487 set_perf_stats(PerfStatsLevel::EnableTime);
488 PER_THREAD_ROCKS_PERF_CONTEXT.with(|perf_context| {
489 perf_context.borrow_mut().reset();
490 });
491 RocksDBPerfContext {}
492 }
493}
494
495impl Drop for RocksDBPerfContext {
496 fn drop(&mut self) {
497 set_perf_stats(PerfStatsLevel::Disable);
498 }
499}
500
501#[derive(Debug)]
502pub struct ReadPerfContextMetrics {
503 pub user_key_comparison_count: IntCounterVec,
504 pub block_cache_hit_count: IntCounterVec,
505 pub block_read_count: IntCounterVec,
506 pub block_read_byte: IntCounterVec,
507 pub block_read_nanos: IntCounterVec,
508 pub block_checksum_nanos: IntCounterVec,
509 pub block_decompress_nanos: IntCounterVec,
510 pub get_read_bytes: IntCounterVec,
511 pub multiget_read_bytes: IntCounterVec,
512 pub get_snapshot_nanos: IntCounterVec,
513 pub get_from_memtable_nanos: IntCounterVec,
514 pub get_from_memtable_count: IntCounterVec,
515 pub get_post_process_nanos: IntCounterVec,
516 pub get_from_output_files_nanos: IntCounterVec,
517 pub db_mutex_lock_nanos: IntCounterVec,
518 pub db_condition_wait_nanos: IntCounterVec,
519 pub merge_operator_nanos: IntCounterVec,
520 pub read_index_block_nanos: IntCounterVec,
521 pub read_filter_block_nanos: IntCounterVec,
522 pub new_table_block_iter_nanos: IntCounterVec,
523 pub block_seek_nanos: IntCounterVec,
524 pub find_table_nanos: IntCounterVec,
525 pub bloom_memtable_hit_count: IntCounterVec,
526 pub bloom_memtable_miss_count: IntCounterVec,
527 pub bloom_sst_hit_count: IntCounterVec,
528 pub bloom_sst_miss_count: IntCounterVec,
529 pub key_lock_wait_time: IntCounterVec,
530 pub key_lock_wait_count: IntCounterVec,
531 pub internal_delete_skipped_count: IntCounterVec,
532 pub internal_skipped_count: IntCounterVec,
533}
534
535impl ReadPerfContextMetrics {
536 pub(crate) fn new(registry: &Registry) -> Self {
537 ReadPerfContextMetrics {
538 user_key_comparison_count: register_int_counter_vec_with_registry!(
539 "user_key_comparison_count",
540 "Helps us figure out whether too many comparisons in binary search can be a problem,
541 especially when a more expensive comparator is used. Moreover, since number of comparisons
542 is usually uniform based on the memtable size, the SST file size for Level 0 and size of other
543 levels, an significant increase of the counter can indicate unexpected LSM-tree shape.
544 You may want to check whether flush/compaction can keep up with the write speed",
545 &["cf_name"],
546 registry,
547 )
548 .unwrap(),
549 block_cache_hit_count: register_int_counter_vec_with_registry!(
550 "block_cache_hit_count",
551 "Tells us how many times we read data blocks from block cache, and block_read_count tells us how many
552 times we have to read blocks from the file system (either block cache is disabled or it is a cache miss).
553 We can evaluate the block cache efficiency by looking at the two counters over time.",
554 &["cf_name"],
555 registry,
556 )
557 .unwrap(),
558 block_read_count: register_int_counter_vec_with_registry!(
559 "block_read_count",
560 "Tells us how many times we have to read blocks from the file system (either block cache is disabled or it is a cache miss)",
561 &["cf_name"],
562 registry,
563 )
564 .unwrap(),
565 block_read_byte: register_int_counter_vec_with_registry!(
566 "block_read_byte",
567 "Tells us how many total bytes we read from the file system. It can tell us whether a slow query can be caused by reading
568 large blocks from the file system. Index and bloom filter blocks are usually large blocks. A large block can also be the result
569 of a very large key or value",
570 &["cf_name"],
571 registry,
572 )
573 .unwrap(),
574 block_read_nanos: register_int_counter_vec_with_registry!(
575 "block_read_nanos",
576 "Total nanos spent on block reads",
577 &["cf_name"],
578 registry,
579 )
580 .unwrap(),
581 block_checksum_nanos: register_int_counter_vec_with_registry!(
582 "block_checksum_nanos",
583 "Total nanos spent on verifying block checksum",
584 &["cf_name"],
585 registry,
586 )
587 .unwrap(),
588 block_decompress_nanos: register_int_counter_vec_with_registry!(
589 "block_decompress_nanos",
590 "Total nanos spent on decompressing a block",
591 &["cf_name"],
592 registry,
593 )
594 .unwrap(),
595 get_read_bytes: register_int_counter_vec_with_registry!(
596 "get_read_bytes",
597 "Total bytes for values returned by Get",
598 &["cf_name"],
599 registry,
600 )
601 .unwrap(),
602 multiget_read_bytes: register_int_counter_vec_with_registry!(
603 "multiget_read_bytes",
604 "Total bytes for values returned by MultiGet.",
605 &["cf_name"],
606 registry,
607 )
608 .unwrap(),
609 get_snapshot_nanos: register_int_counter_vec_with_registry!(
610 "get_snapshot_nanos",
611 "Time spent in getting snapshot.",
612 &["cf_name"],
613 registry,
614 )
615 .unwrap(),
616 get_from_memtable_nanos: register_int_counter_vec_with_registry!(
617 "get_from_memtable_nanos",
618 "Time spent on reading data from memtable.",
619 &["cf_name"],
620 registry,
621 )
622 .unwrap(),
623 get_from_memtable_count: register_int_counter_vec_with_registry!(
624 "get_from_memtable_count",
625 "Number of memtables queried",
626 &["cf_name"],
627 registry,
628 )
629 .unwrap(),
630 get_post_process_nanos: register_int_counter_vec_with_registry!(
631 "get_post_process_nanos",
632 "Total nanos spent after Get() finds a key",
633 &["cf_name"],
634 registry,
635 )
636 .unwrap(),
637 get_from_output_files_nanos: register_int_counter_vec_with_registry!(
638 "get_from_output_files_nanos",
639 "Total nanos reading from output files",
640 &["cf_name"],
641 registry,
642 )
643 .unwrap(),
644 db_mutex_lock_nanos: register_int_counter_vec_with_registry!(
645 "db_mutex_lock_nanos",
646 "Time spent on acquiring db mutex",
647 &["cf_name"],
648 registry,
649 )
650 .unwrap(),
651 db_condition_wait_nanos: register_int_counter_vec_with_registry!(
652 "db_condition_wait_nanos",
653 "Time spent waiting with a condition variable created with DB Mutex.",
654 &["cf_name"],
655 registry,
656 )
657 .unwrap(),
658 merge_operator_nanos: register_int_counter_vec_with_registry!(
659 "merge_operator_nanos",
660 "Time spent on merge operator.",
661 &["cf_name"],
662 registry,
663 )
664 .unwrap(),
665 read_index_block_nanos: register_int_counter_vec_with_registry!(
666 "read_index_block_nanos",
667 "Time spent on reading index block from block cache or SST file",
668 &["cf_name"],
669 registry,
670 )
671 .unwrap(),
672 read_filter_block_nanos: register_int_counter_vec_with_registry!(
673 "read_filter_block_nanos",
674 "Time spent on reading filter block from block cache or SST file",
675 &["cf_name"],
676 registry,
677 )
678 .unwrap(),
679 new_table_block_iter_nanos: register_int_counter_vec_with_registry!(
680 "new_table_block_iter_nanos",
681 "Time spent on creating data block iterator",
682 &["cf_name"],
683 registry,
684 )
685 .unwrap(),
686 block_seek_nanos: register_int_counter_vec_with_registry!(
687 "block_seek_nanos",
688 "Time spent on seeking a key in data/index blocks",
689 &["cf_name"],
690 registry,
691 )
692 .unwrap(),
693 find_table_nanos: register_int_counter_vec_with_registry!(
694 "find_table_nanos",
695 "Time spent on finding or creating a table reader",
696 &["cf_name"],
697 registry,
698 )
699 .unwrap(),
700 bloom_memtable_hit_count: register_int_counter_vec_with_registry!(
701 "bloom_memtable_hit_count",
702 "Total number of mem table bloom hits",
703 &["cf_name"],
704 registry,
705 )
706 .unwrap(),
707 bloom_memtable_miss_count: register_int_counter_vec_with_registry!(
708 "bloom_memtable_miss_count",
709 "Total number of mem table bloom misses",
710 &["cf_name"],
711 registry,
712 )
713 .unwrap(),
714 bloom_sst_hit_count: register_int_counter_vec_with_registry!(
715 "bloom_sst_hit_count",
716 "Total number of SST table bloom hits",
717 &["cf_name"],
718 registry,
719 )
720 .unwrap(),
721 bloom_sst_miss_count: register_int_counter_vec_with_registry!(
722 "bloom_sst_miss_count",
723 "Total number of SST table bloom misses",
724 &["cf_name"],
725 registry,
726 )
727 .unwrap(),
728 key_lock_wait_time: register_int_counter_vec_with_registry!(
729 "key_lock_wait_time",
730 "Time spent waiting on key locks in transaction lock manager",
731 &["cf_name"],
732 registry,
733 )
734 .unwrap(),
735 key_lock_wait_count: register_int_counter_vec_with_registry!(
736 "key_lock_wait_count",
737 "Number of times acquiring a lock was blocked by another transaction",
738 &["cf_name"],
739 registry,
740 )
741 .unwrap(),
742 internal_delete_skipped_count: register_int_counter_vec_with_registry!(
743 "internal_delete_skipped_count",
744 "Total number of deleted keys skipped during iteration",
745 &["cf_name"],
746 registry,
747 )
748 .unwrap(),
749 internal_skipped_count: register_int_counter_vec_with_registry!(
750 "internal_skipped_count",
751 "Totall number of internal keys skipped during iteration",
752 &["cf_name"],
753 registry,
754 )
755 .unwrap(),
756 }
757 }
758
759 pub fn report_metrics(&self, cf_name: &str) {
760 PER_THREAD_ROCKS_PERF_CONTEXT.with(|perf_context_cell| {
761 set_perf_stats(PerfStatsLevel::Disable);
762 let perf_context = perf_context_cell.borrow();
763 self.user_key_comparison_count
764 .with_label_values(&[cf_name])
765 .inc_by(perf_context.metric(PerfMetric::UserKeyComparisonCount));
766 self.block_cache_hit_count
767 .with_label_values(&[cf_name])
768 .inc_by(perf_context.metric(PerfMetric::BlockCacheHitCount));
769 self.block_read_count
770 .with_label_values(&[cf_name])
771 .inc_by(perf_context.metric(PerfMetric::BlockReadCount));
772 self.block_read_byte
773 .with_label_values(&[cf_name])
774 .inc_by(perf_context.metric(PerfMetric::BlockReadByte));
775 self.block_read_nanos
776 .with_label_values(&[cf_name])
777 .inc_by(perf_context.metric(PerfMetric::BlockReadTime));
778 self.block_read_count
779 .with_label_values(&[cf_name])
780 .inc_by(perf_context.metric(PerfMetric::BlockReadCount));
781 self.block_checksum_nanos
782 .with_label_values(&[cf_name])
783 .inc_by(perf_context.metric(PerfMetric::BlockChecksumTime));
784 self.block_decompress_nanos
785 .with_label_values(&[cf_name])
786 .inc_by(perf_context.metric(PerfMetric::BlockDecompressTime));
787 self.get_read_bytes
788 .with_label_values(&[cf_name])
789 .inc_by(perf_context.metric(PerfMetric::GetReadBytes));
790 self.multiget_read_bytes
791 .with_label_values(&[cf_name])
792 .inc_by(perf_context.metric(PerfMetric::MultigetReadBytes));
793 self.get_snapshot_nanos
794 .with_label_values(&[cf_name])
795 .inc_by(perf_context.metric(PerfMetric::GetSnapshotTime));
796 self.get_from_memtable_nanos
797 .with_label_values(&[cf_name])
798 .inc_by(perf_context.metric(PerfMetric::GetFromMemtableTime));
799 self.get_from_memtable_count
800 .with_label_values(&[cf_name])
801 .inc_by(perf_context.metric(PerfMetric::GetFromMemtableCount));
802 self.get_post_process_nanos
803 .with_label_values(&[cf_name])
804 .inc_by(perf_context.metric(PerfMetric::GetPostProcessTime));
805 self.get_from_output_files_nanos
806 .with_label_values(&[cf_name])
807 .inc_by(perf_context.metric(PerfMetric::GetFromOutputFilesTime));
808 self.db_mutex_lock_nanos
809 .with_label_values(&[cf_name])
810 .inc_by(perf_context.metric(PerfMetric::DbMutexLockNanos));
811 self.db_condition_wait_nanos
812 .with_label_values(&[cf_name])
813 .inc_by(perf_context.metric(PerfMetric::DbConditionWaitNanos));
814 self.merge_operator_nanos
815 .with_label_values(&[cf_name])
816 .inc_by(perf_context.metric(PerfMetric::MergeOperatorTimeNanos));
817 self.read_index_block_nanos
818 .with_label_values(&[cf_name])
819 .inc_by(perf_context.metric(PerfMetric::ReadIndexBlockNanos));
820 self.read_filter_block_nanos
821 .with_label_values(&[cf_name])
822 .inc_by(perf_context.metric(PerfMetric::ReadFilterBlockNanos));
823 self.new_table_block_iter_nanos
824 .with_label_values(&[cf_name])
825 .inc_by(perf_context.metric(PerfMetric::NewTableBlockIterNanos));
826 self.block_seek_nanos
827 .with_label_values(&[cf_name])
828 .inc_by(perf_context.metric(PerfMetric::BlockSeekNanos));
829 self.find_table_nanos
830 .with_label_values(&[cf_name])
831 .inc_by(perf_context.metric(PerfMetric::FindTableNanos));
832 self.bloom_memtable_hit_count
833 .with_label_values(&[cf_name])
834 .inc_by(perf_context.metric(PerfMetric::BloomMemtableHitCount));
835 self.bloom_memtable_miss_count
836 .with_label_values(&[cf_name])
837 .inc_by(perf_context.metric(PerfMetric::BloomMemtableMissCount));
838 self.bloom_sst_hit_count
839 .with_label_values(&[cf_name])
840 .inc_by(perf_context.metric(PerfMetric::BloomSstHitCount));
841 self.bloom_sst_miss_count
842 .with_label_values(&[cf_name])
843 .inc_by(perf_context.metric(PerfMetric::BloomSstMissCount));
844 self.key_lock_wait_time
845 .with_label_values(&[cf_name])
846 .inc_by(perf_context.metric(PerfMetric::KeyLockWaitTime));
847 self.key_lock_wait_count
848 .with_label_values(&[cf_name])
849 .inc_by(perf_context.metric(PerfMetric::KeyLockWaitCount));
850 self.internal_delete_skipped_count
851 .with_label_values(&[cf_name])
852 .inc_by(perf_context.metric(PerfMetric::InternalDeleteSkippedCount));
853 self.internal_skipped_count
854 .with_label_values(&[cf_name])
855 .inc_by(perf_context.metric(PerfMetric::InternalKeySkippedCount));
856 });
857 }
858}
859
860#[derive(Debug)]
861pub struct WritePerfContextMetrics {
862 pub write_wal_nanos: IntCounterVec,
863 pub write_memtable_nanos: IntCounterVec,
864 pub write_delay_nanos: IntCounterVec,
865 pub write_pre_and_post_process_nanos: IntCounterVec,
866 pub write_db_mutex_lock_nanos: IntCounterVec,
867 pub write_db_condition_wait_nanos: IntCounterVec,
868 pub write_key_lock_wait_nanos: IntCounterVec,
869 pub write_key_lock_wait_count: IntCounterVec,
870}
871
872impl WritePerfContextMetrics {
873 pub(crate) fn new(registry: &Registry) -> Self {
874 WritePerfContextMetrics {
875 write_wal_nanos: register_int_counter_vec_with_registry!(
876 "write_wal_nanos",
877 "Total nanos spent on writing to WAL",
878 &["cf_name"],
879 registry,
880 )
881 .unwrap(),
882 write_memtable_nanos: register_int_counter_vec_with_registry!(
883 "write_memtable_nanos",
884 "Total nanos spent on writing to memtable",
885 &["cf_name"],
886 registry,
887 )
888 .unwrap(),
889 write_delay_nanos: register_int_counter_vec_with_registry!(
890 "write_delay_nanos",
891 "Total nanos spent on delaying or throttling write",
892 &["cf_name"],
893 registry,
894 )
895 .unwrap(),
896 write_pre_and_post_process_nanos: register_int_counter_vec_with_registry!(
897 "write_pre_and_post_process_nanos",
898 "Total nanos spent on writing a record, excluding the above four things",
899 &["cf_name"],
900 registry,
901 )
902 .unwrap(),
903 write_db_mutex_lock_nanos: register_int_counter_vec_with_registry!(
904 "write_db_mutex_lock_nanos",
905 "Time spent on acquiring db mutex",
906 &["cf_name"],
907 registry,
908 )
909 .unwrap(),
910 write_db_condition_wait_nanos: register_int_counter_vec_with_registry!(
911 "write_db_condition_wait_nanos",
912 "Time spent waiting with a condition variable created with DB Mutex.",
913 &["cf_name"],
914 registry,
915 )
916 .unwrap(),
917 write_key_lock_wait_nanos: register_int_counter_vec_with_registry!(
918 "write_key_lock_wait_time",
919 "Time spent waiting on key locks in transaction lock manager",
920 &["cf_name"],
921 registry,
922 )
923 .unwrap(),
924 write_key_lock_wait_count: register_int_counter_vec_with_registry!(
925 "write_key_lock_wait_count",
926 "Number of times acquiring a lock was blocked by another transaction",
927 &["cf_name"],
928 registry,
929 )
930 .unwrap(),
931 }
932 }
933 pub fn report_metrics(&self, db_name: &str) {
934 PER_THREAD_ROCKS_PERF_CONTEXT.with(|perf_context_cell| {
935 set_perf_stats(PerfStatsLevel::Disable);
936 let perf_context = perf_context_cell.borrow();
937 self.write_wal_nanos
938 .with_label_values(&[db_name])
939 .inc_by(perf_context.metric(PerfMetric::WriteWalTime));
940 self.write_memtable_nanos
941 .with_label_values(&[db_name])
942 .inc_by(perf_context.metric(PerfMetric::WriteMemtableTime));
943 self.write_delay_nanos
944 .with_label_values(&[db_name])
945 .inc_by(perf_context.metric(PerfMetric::WriteDelayTime));
946 self.write_pre_and_post_process_nanos
947 .with_label_values(&[db_name])
948 .inc_by(perf_context.metric(PerfMetric::WritePreAndPostProcessTime));
949 self.write_db_mutex_lock_nanos
950 .with_label_values(&[db_name])
951 .inc_by(perf_context.metric(PerfMetric::DbMutexLockNanos));
952 self.write_db_condition_wait_nanos
953 .with_label_values(&[db_name])
954 .inc_by(perf_context.metric(PerfMetric::DbConditionWaitNanos));
955 self.write_key_lock_wait_nanos
956 .with_label_values(&[db_name])
957 .inc_by(perf_context.metric(PerfMetric::KeyLockWaitTime));
958 self.write_key_lock_wait_count
959 .with_label_values(&[db_name])
960 .inc_by(perf_context.metric(PerfMetric::KeyLockWaitCount));
961 });
962 }
963}
964
965#[derive(Debug)]
966pub struct DBMetrics {
967 pub op_metrics: OperationMetrics,
968 pub cf_metrics: ColumnFamilyMetrics,
969 pub read_perf_ctx_metrics: ReadPerfContextMetrics,
970 pub write_perf_ctx_metrics: WritePerfContextMetrics,
971}
972
973static ONCE: OnceCell<Arc<DBMetrics>> = OnceCell::new();
974
975impl DBMetrics {
976 fn new(registry: &Registry) -> Self {
977 DBMetrics {
978 op_metrics: OperationMetrics::new(registry),
979 cf_metrics: ColumnFamilyMetrics::new(registry),
980 read_perf_ctx_metrics: ReadPerfContextMetrics::new(registry),
981 write_perf_ctx_metrics: WritePerfContextMetrics::new(registry),
982 }
983 }
984 pub fn init(registry: &Registry) -> &'static Arc<DBMetrics> {
985 let _ = ONCE
993 .set(Arc::new(DBMetrics::new(registry)))
994 .tap_err(|_| warn!("DBMetrics registry overwritten"));
996 ONCE.get().unwrap()
997 }
998 pub fn increment_num_active_dbs(&self, db_name: &str) {
999 self.op_metrics
1000 .rocksdb_num_active_db_handles
1001 .with_label_values(&[db_name])
1002 .inc();
1003 }
1004 pub fn decrement_num_active_dbs(&self, db_name: &str) {
1005 self.op_metrics
1006 .rocksdb_num_active_db_handles
1007 .with_label_values(&[db_name])
1008 .dec();
1009 }
1010 pub fn get() -> &'static Arc<DBMetrics> {
1011 ONCE.get()
1012 .unwrap_or_else(|| DBMetrics::init(prometheus::default_registry()))
1013 }
1014}