Fix bug that skips collecting zfs_raw_size for pool, add more info to readme

2023-01-16 19:54:38 -05:00 · 2023-01-16 19:54:38 -05:00 · 57c6b3f2b2
parent d4cee37c5a
commit 57c6b3f2b2
2 changed files with 29 additions and 6 deletions
--- a/README.md
+++ b/README.md
@ -13,4 +13,27 @@ Options:
      --log-level <LOG_LEVEL>        The lowest log level (off, error, warn, info, debug, or trace) [default: info]
  -h, --help                         Print help
  -V, --version                      Print version
-```
+```
+
+## What Metrics Are Exported?
+* `zfs_health` (`enum`): Represents the device's health, can be `online`, `degraded`, `faulted`, `offline`, `available`, `unavailable` and `removed`. The state is stored in the `state` label, and the value is `0` for not-in-state, and `1` for in-state.
+* `zfs_read_errors` (int counter): The amount of read errors for this device.
+* `zfs_write_errors` (int counter): The amount of write errors for this device.
+* `zfs_checksum_errors` (int counter): The amount of checksum errors for this device.
+* `zfs_disk_count` (int counter): The amount of disks in this pool or vdev.
+* `zfs_vdev_count` (int counter): The amount of vdevs in the pool.
+* `zfs_spare_count` (int counter): The spare amount in the pool.
+* `zfs_raw_size` (int counter): The raw size (in bytes) of the device. This is not the actual capacity.
+* `zfs_capacity` (int counter): The capacity (in bytes) of the device.
+* `zfs_available` (int counter): The available bytes of the device.
+* `zfs_read_operations` (int counter): The amount of read operations on this device.
+* `zfs_write_operations` (int counter): The amount of write operations on this device.
+* `zfs_read_bandwidth` (int counter): The read bandwidth for this device in bytes per second.
+* `zfs_write_bandwidth` (int counter): The write bandwidth for this device in bytes per second.
+
+**Note: the `zpool status` commands use 1024, not 1000.**
+
+There are some common labels for the metrics:
+* `device_name`: The name of the device that this metric is related to.
+* `device_type`: The type of the device. Can be `pool`, `vdev` or `disk`.
+* `pool`: The ZFS pool that this device (`vdev` or `disk`) is a part of.
--- a/src/main.rs
+++ b/src/main.rs
@ -109,7 +109,7 @@ fn register_vdev_stats(vdev: &Vdev, vdev_device: &Device, vdev_name: String, sta
    vdev_device.io_stats.collect_metrics(&vdev_reg)?;
    register_error_stats(&vdev_reg, vdev.error_statistics().clone())?;
    
-    register_intcounter(&vdev_reg, "drive_count", "Total count of drives in this pool or vdev", vdev.disks().len() as u64)?;
+    register_intcounter(&vdev_reg, "disk_count", "Total count of drives in this pool or vdev", vdev.disks().len() as u64)?;

    Ok(vdev_reg)
 }
@ -153,11 +153,11 @@ async fn metrics_endpoint() -> impl Responder {
        register_intcounter(&pool_reg, "spare_count", "The amount of spare drives", pool.spares().len() as u64).unwrap();

        // Calculate the total drive count and register it as a metric.
-        let total_drive_count = IntCounter::new("drive_count", "Total count of drives in this pool or vdev").unwrap();
+        let total_disk_count = IntCounter::new("disk_count", "Total count of drives in this pool or vdev").unwrap();
        for vdev in pool.vdevs().iter() {
-            total_drive_count.inc_by(vdev.disks().len() as u64);
+            total_disk_count.inc_by(vdev.disks().len() as u64);
        }
-        pool_reg.register(Box::new(total_drive_count)).unwrap();
+        pool_reg.register(Box::new(total_disk_count)).unwrap();

        // Register pool health
        registries.extend(register_health(labels.clone(), pool.health().clone()).unwrap());
@ -195,7 +195,7 @@ async fn metrics_endpoint() -> impl Responder {
            // Get the raw size of the pool.
            let output = String::from_utf8(
                Command::new("zpool")
-                    .args(["list", pool.name().as_str(), "-Hp"])
+                    .args(["list", "-Hp", pool.name().as_str()])
                    .output()
                    .expect(&format!("Failure to execute `zpool iostat {} -v 1 2`", pool.name()))
                .stdout).expect(&format!("Failure to convert output of `zpool iostat {} -v 1 2` to utf8.", pool.name()));