easystats · strengejacke · Aug 28, 2024 · Aug 18, 2024 · Aug 18, 2024 · Aug 18, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: performance
 Title: Assessment of Regression Models Performance
-Version: 0.12.2.12
+Version: 0.12.2.13
 Authors@R:
     c(person(given = "Daniel",
              family = "Lüdecke",
@@ -52,7 +52,11 @@ Authors@R:
              "Bacher", ,
              "[email protected]",
              role = "ctb",
-             comment = c(ORCID = "0000-0002-9271-5075")))
+             comment = c(ORCID = "0000-0002-9271-5075")),
+      person(given = "Joseph",
+             family = "Luchman",
+             role = "ctb",
+             comment = c(ORCID = "0000-0002-8886-9717")))
 Maintainer: Daniel Lüdecke <[email protected]>
 Description: Utilities for computing measures to assess model quality,
     which are not directly provided by R's 'base' or 'stats' packages.

diff --git a/NAMESPACE b/NAMESPACE
@@ -486,6 +486,7 @@ S3method(r2_mcfadden,serp)
 S3method(r2_mcfadden,truncreg)
 S3method(r2_mcfadden,vglm)
 S3method(r2_mckelvey,default)
+S3method(r2_mlm,mlm)
 S3method(r2_nagelkerke,BBreg)
 S3method(r2_nagelkerke,DirichletRegModel)
 S3method(r2_nagelkerke,bife)
@@ -610,6 +611,7 @@ export(r2_loo)
 export(r2_loo_posterior)
 export(r2_mcfadden)
 export(r2_mckelvey)
+export(r2_mlm)
 export(r2_nagelkerke)
 export(r2_nakagawa)
 export(r2_posterior)

diff --git a/R/print-methods.R b/R/print-methods.R
@@ -37,7 +37,7 @@
  }

  # separate lines for multiple R2
  out <- paste0(out, collapse = "\n")

  cat(out)
  cat("\n")
@@ -68,23 +68,39 @@
 #' @export
 print.r2_mlm <- function(x, digits = 3, ...) {
   model_type <- attr(x, "model_type")
-  if (!is.null(model_type)) {
-    insight::print_color(sprintf("# R2 for %s Regression\n\n", model_type), "blue")
+  is_multivar_r2 <- all(names(x) == c("Symmetric Rxy", "Asymmetric Pxy"))
+  if (!is.null(model_type) && !is_multivar_r2) {
+    insight::print_color(
+      sprintf("# R2 for %s Regression\n\n", model_type),
+      "blue"
+    )
+  } else if (!is.null(model_type) && is_multivar_r2) {
+    insight::print_color(
+      sprintf("# Multivariate R2 for %s Regression\n", model_type),
+      "blue"
+    )
   } else {
     insight::print_color("# R2\n\n", "blue")
   }
 
-  for (i in names(x)) {
-    insight::print_color(sprintf("## %s\n", i), "cyan")
-    out <- paste0(
-      c(
-        sprintf("        R2: %.*f", digits, x[[i]]$R2),
-        sprintf("   adj. R2: %.*f", digits, x[[i]]$R2_adjusted)
-      ),
-      collapse = "\n"
-    )
-    cat(out)
+  if (is_multivar_r2) {
+    cat(sprintf(" Symmetric Rxy: %.*f", digits, x[["Symmetric Rxy"]]))
+    cat("\n")
+    cat(sprintf("Asymmetric Pxy: %.*f", digits, x[["Asymmetric Pxy"]]))
     cat("\n\n")
+  } else {
+    for (i in names(x)) {
+      insight::print_color(sprintf("## %s\n", i), "cyan")
+      out <- paste(
+        c(
+          sprintf("        R2: %.*f", digits, x[[i]]$R2),
+          sprintf("   adj. R2: %.*f", digits, x[[i]]$R2_adjusted)
+        ),
+        collapse = "\n"
+      )
+      cat(out)
+      cat("\n\n")
+    }
   }
   invisible(x)
 }
@@ -110,7 +126,7 @@
      ci = attributes(x)$CI$R2_Bayes_marginal$CI,
      digits = digits
    )
    out <- paste0(c(out, sprintf("     Marginal R2: %.*f (%s)", digits, x$R2_Bayes_marginal, r2_marginal_ci)), collapse = "\n")
  }

  cat(out)
@@ -139,7 +155,7 @@
      ci = attributes(x)$CI$R2_loo_marginal$CI,
      digits = digits
    )
    out <- paste0(c(out, sprintf("     Marginal R2: %.*f (%s)", digits, x$R2_loo_marginal, r2_marginal_ci)), collapse = "\n")
  }

  cat(out)

diff --git a/R/r2.R b/R/r2.R
@@ -10,6 +10,9 @@
 #' (`TRUE`) or not (`FALSE`)?
 #' @param ci Confidence interval level, as scalar. If `NULL` (default), no
 #' confidence intervals for R2 are calculated.
+#' @param multivariate Logical. Should multiple R2 values be reported as
+#' separated by response (FALSE) or should a single R2 be reported as
+#' combined across responses computed by [`r2_mlm`] (TRUE).
 #' @param ... Arguments passed down to the related r2-methods.
 #' @inheritParams r2_nakagawa
 #'
@@ -31,7 +34,7 @@
 #' @seealso
 #' [`r2_bayes()`], [`r2_coxsnell()`], [`r2_kullback()`], [`r2_loo()`],
 #' [`r2_mcfadden()`], [`r2_nagelkerke()`], [`r2_nakagawa()`], [`r2_tjur()`],
-#' [`r2_xu()`] and [`r2_zeroinflated()`].
+#' [`r2_xu()`], [`r2_zeroinflated()`], and [`r2_mlm()`].
 #'
 #' @examplesIf require("lme4")
 #' # Pseudo r-quared for GLM
@@ -245,24 +248,29 @@ r2.aov <- function(model, ci = NULL, ...) {
   structure(class = "r2_generic", out)
 }
 
-
+#' @rdname r2
 #' @export
-r2.mlm <- function(model, ...) {
-  model_summary <- summary(model)
+r2.mlm <- function(model, multivariate = TRUE, ...) {
 
-  out <- lapply(names(model_summary), function(i) {
-    tmp <- list(
-      R2 = model_summary[[i]]$r.squared,
-      R2_adjusted = model_summary[[i]]$adj.r.squared,
-      Response = sub("Response ", "", i, fixed = TRUE)
-    )
-    names(tmp$R2) <- "R2"
-    names(tmp$R2_adjusted) <- "adjusted R2"
-    names(tmp$Response) <- "Response"
-    tmp
-  })
+  if (multivariate) {
+    out <- r2_mlm(model)
+  } else {
+    model_summary <- summary(model)
+
+    out <- lapply(names(model_summary), function(i) {
+      tmp <- list(
+        R2 = model_summary[[i]]$r.squared,
+        R2_adjusted = model_summary[[i]]$adj.r.squared,
+        Response = sub("Response ", "", i, fixed = TRUE)
+      )
+      names(tmp$R2) <- "R2"
+      names(tmp$R2_adjusted) <- "adjusted R2"
+      names(tmp$Response) <- "Response"
+      tmp
+    })
 
-  names(out) <- names(model_summary)
+    names(out) <- names(model_summary)
+  }
 
   attr(out, "model_type") <- "Multivariate Linear"
   structure(class = "r2_mlm", out)

diff --git a/R/r2_mlm.R b/R/r2_mlm.R
@@ -0,0 +1,85 @@
+#' @title Multivariate R2
+#' @name r2_mlm
+#'
+#' @description
+#' Calculates two multivariate R2 values for multivariate linear regression.
+#'
+#' @param model Multivariate linear regression model.
+#' @param ... Currently not used.
+#'
+#' @details
+#' The two indexes returned summarize model fit for the set of predictors
+#' given the system of responses. As compared to the default
+#' [r2][performance::r2] index for multivariate linear models, the indexes
+#' returned by this function provide a single fit value collapsed across
+#' all responses.
+#'
+#' The two returned indexes were proposed by *Van den Burg and Lewis (1988)*
+#' as an extension of the metrics proposed by *Cramer and Nicewander (1979)*.
+#' Of the numerous indexes proposed across these two papers, only two metrics,
+#' the \eqn{R_{xy}} and \eqn{P_{xy}}, are recommended for use
+#' by *Azen and Budescu (2006)*.
+#'
+#' For a multivariate linear regression with \eqn{p} predictors and
+#' \eqn{q} responses where \eqn{p > q}, the \eqn{R_{xy}} index is
+#' computed as:
+#'
+#' \deqn{R_{xy} = 1 - \prod_{i=1}^p (1 - \rho_i^2)}
+#'
+#' Where \eqn{\rho} is a canonical variate from a
+#' [canonical correlation][cancor] between the predictors and responses.
+#' This metric is symmetric and its value does not change when the roles of
+#' the variables as predictors or responses are swapped.
+#'
+#' The \eqn{P_{xy}} is computed as:
+#'
+#' \deqn{P_{xy} = \frac{q - trace(\bf{S}_{\bf{YY}}^{-1}\bf{S}_{\bf{YY|X}})}{q}}
+#'
+#' Where \eqn{\bf{S}_{\bf{YY}}} is the matrix of response covariances and
+#' \eqn{\bf{S}_{\bf{YY|X}}} is the matrix of residual covariances given
+#' the predictors. This metric is asymmetric and can change
+#' depending on which variables are considered predictors versus responses.
+#'
+#' @return A named vector with the R2 values.
+#'
+#' @examples
+#' model <- lm(cbind(qsec, drat) ~ wt + mpg + cyl, data = mtcars)
+#' r2_mlm(model)
+#'
+#' model_swap <- lm(cbind(wt, mpg, cyl) ~ qsec + drat, data = mtcars)
+#' r2_mlm(model_swap)
+#'
+#' @references
+#' - Azen, R., & Budescu, D. V. (2006). Comparing predictors in
+#'   multivariate regression models: An extension of dominance analysis.
+#'   Journal of Educational and Behavioral Statistics, 31(2), 157-180.
+#'-  Cramer, E. M., & Nicewander, W. A. (1979). Some symmetric,
+#'   invariant measures of multivariate association. Psychometrika, 44, 43-54.
+#' - Van den Burg, W., & Lewis, C. (1988). Some properties of two
+#'   measures of multivariate association. Psychometrika, 53, 109-122.
+#'
+#' @author Joseph Luchman
+#'
+#' @export
+r2_mlm <- function(model, ...) {
+  UseMethod("r2_mlm")
+}
+
+# methods ---------------------------
+
+#' @export
+r2_mlm.mlm <- function(model, verbose = TRUE, ...) {
+  rho2_vec <- 1 - stats::cancor(
+    insight::get_predictors(model),
+    insight::get_response(model)
+  )$cor^2
+  R_xy  <- 1 - Reduce(`*`, rho2_vec, 1)
+
+  resid_cov <- stats::cov(residuals(model))
+  resp_cov <- stats::cov(insight::get_response(model))
+  qq <- ncol(insight::get_response(model))
+  V_xy <- qq - sum(diag(solve(resp_cov) %*% resid_cov))
+  P_xy <- V_xy / qq
+
+  c("Symmetric Rxy" = R_xy, "Asymmetric Pxy" = P_xy)
+}
diff --git a/inst/WORDLIST b/inst/WORDLIST
@@ -9,6 +9,7 @@ Ankerst
 Archimbaud
 Arel
 Asq
+Azen
 BCI
 BFBayesFactor
 BMJ
@@ -27,6 +28,7 @@ Breunig
 Breusch
 BRM
 Bryk
+Budescu
 Bundock
 Burnham
 Byrne
@@ -40,6 +42,7 @@ CochransQ
 CompQuadForm
 Concurvity
 Confounder
+Cramer
 Cribari
 Cronbach's
 Crujeiras
@@ -162,6 +165,7 @@ Neto's
 Nondegenerate
 Nordhausen
 Normed
+Nicewander
 ORCID
 OSF
 OSX

diff --git a/man/performance-package.Rd b/man/performance-package.Rd
diff --git a/man/r2.Rd b/man/r2.Rd
diff --git a/man/r2_mlm.Rd b/man/r2_mlm.Rd
diff --git a/tests/testthat/test-r2_mlm.R b/tests/testthat/test-r2_mlm.R
@@ -0,0 +1,9 @@
+test_that("r2_mlm_Rxy", {
+  model <- lm(cbind(qsec, drat) ~ wt + mpg, data = mtcars)
+  expect_equal(r2_mlm(model)[["Symmetric Rxy"]], 0.68330688076502, tolerance = 1e-3)
+})
+
+test_that("r2_mlm_Pxy", {
+  model <- lm(cbind(qsec, drat) ~ wt + mpg, data = mtcars)
+  expect_equal(r2_mlm(model)[["Asymmetric Pxy"]], 0.407215267524997, tolerance = 1e-3)
+})