Fix bug that skips collecting zfs_raw_size for pool, add more info to readme

This commit is contained in:
SeanOMik 2023-01-16 19:54:38 -05:00
parent d4cee37c5a
commit 57c6b3f2b2
Signed by: SeanOMik
GPG Key ID: 568F326C7EB33ACB
2 changed files with 29 additions and 6 deletions

View File

@ -13,4 +13,27 @@ Options:
--log-level <LOG_LEVEL> The lowest log level (off, error, warn, info, debug, or trace) [default: info]
-h, --help Print help
-V, --version Print version
```
```
## What Metrics Are Exported?
* `zfs_health` (`enum`): Represents the device's health, can be `online`, `degraded`, `faulted`, `offline`, `available`, `unavailable` and `removed`. The state is stored in the `state` label, and the value is `0` for not-in-state, and `1` for in-state.
* `zfs_read_errors` (int counter): The amount of read errors for this device.
* `zfs_write_errors` (int counter): The amount of write errors for this device.
* `zfs_checksum_errors` (int counter): The amount of checksum errors for this device.
* `zfs_disk_count` (int counter): The amount of disks in this pool or vdev.
* `zfs_vdev_count` (int counter): The amount of vdevs in the pool.
* `zfs_spare_count` (int counter): The spare amount in the pool.
* `zfs_raw_size` (int counter): The raw size (in bytes) of the device. This is not the actual capacity.
* `zfs_capacity` (int counter): The capacity (in bytes) of the device.
* `zfs_available` (int counter): The available bytes of the device.
* `zfs_read_operations` (int counter): The amount of read operations on this device.
* `zfs_write_operations` (int counter): The amount of write operations on this device.
* `zfs_read_bandwidth` (int counter): The read bandwidth for this device in bytes per second.
* `zfs_write_bandwidth` (int counter): The write bandwidth for this device in bytes per second.
**Note: the `zpool status` commands use 1024, not 1000.**
There are some common labels for the metrics:
* `device_name`: The name of the device that this metric is related to.
* `device_type`: The type of the device. Can be `pool`, `vdev` or `disk`.
* `pool`: The ZFS pool that this device (`vdev` or `disk`) is a part of.

View File

@ -109,7 +109,7 @@ fn register_vdev_stats(vdev: &Vdev, vdev_device: &Device, vdev_name: String, sta
vdev_device.io_stats.collect_metrics(&vdev_reg)?;
register_error_stats(&vdev_reg, vdev.error_statistics().clone())?;
register_intcounter(&vdev_reg, "drive_count", "Total count of drives in this pool or vdev", vdev.disks().len() as u64)?;
register_intcounter(&vdev_reg, "disk_count", "Total count of drives in this pool or vdev", vdev.disks().len() as u64)?;
Ok(vdev_reg)
}
@ -153,11 +153,11 @@ async fn metrics_endpoint() -> impl Responder {
register_intcounter(&pool_reg, "spare_count", "The amount of spare drives", pool.spares().len() as u64).unwrap();
// Calculate the total drive count and register it as a metric.
let total_drive_count = IntCounter::new("drive_count", "Total count of drives in this pool or vdev").unwrap();
let total_disk_count = IntCounter::new("disk_count", "Total count of drives in this pool or vdev").unwrap();
for vdev in pool.vdevs().iter() {
total_drive_count.inc_by(vdev.disks().len() as u64);
total_disk_count.inc_by(vdev.disks().len() as u64);
}
pool_reg.register(Box::new(total_drive_count)).unwrap();
pool_reg.register(Box::new(total_disk_count)).unwrap();
// Register pool health
registries.extend(register_health(labels.clone(), pool.health().clone()).unwrap());
@ -195,7 +195,7 @@ async fn metrics_endpoint() -> impl Responder {
// Get the raw size of the pool.
let output = String::from_utf8(
Command::new("zpool")
.args(["list", pool.name().as_str(), "-Hp"])
.args(["list", "-Hp", pool.name().as_str()])
.output()
.expect(&format!("Failure to execute `zpool iostat {} -v 1 2`", pool.name()))
.stdout).expect(&format!("Failure to convert output of `zpool iostat {} -v 1 2` to utf8.", pool.name()));