Compare metrics between two guardrail evaluations of the same dataset.
Examples
data <- data.frame(
input = c("hello", "DROP TABLE users"),
expected = c(TRUE, FALSE)
)
guard_v1 <- function(text) !grepl("DROP", text, fixed = TRUE)
guard_v2 <- function(text) !grepl("DROP TABLE", text, fixed = TRUE)
r1 <- guardrail_eval(guard_v1, data)
r2 <- guardrail_eval(guard_v2, data)
guardrail_compare(r1, r2)
#> $delta_precision
#> [1] 0
#>
#> $delta_recall
#> [1] 0
#>
#> $delta_f1
#> [1] 0
#>
#> $delta_accuracy
#> [1] 0
#>
#> $improved
#> [1] 0
#>
#> $regressed
#> [1] 0
#>
#> $unchanged
#> [1] 2
#>