paul-buerkner · saudiwin · Jan 21, 2026 · Jan 22, 2026 · Jan 22, 2026 · Jan 22, 2026
diff --git a/NAMESPACE b/NAMESPACE
@@ -631,6 +631,11 @@ export(waic)
 export(weibull)
 export(wiener)
 export(xbeta)
+export(ordbeta)
+export(dordbeta)
+export(pordbeta)
+export(qordbeta)
+export(rordbeta)
 export(zero_inflated_beta)
 export(zero_inflated_beta_binomial)
 export(zero_inflated_binomial)

diff --git a/R/distributions.R b/R/distributions.R
@@ -1742,6 +1742,200 @@ rxbeta <- function(...) {
   betareg::rxbeta(...)
 }
 
+#' Ordered Beta Distribution
+#'
+#' Density, distribution function, quantile function and random generation
+#' for the ordered beta distribution as described in Kubinec (2023).
+#'
+#' @name OrdBeta
+#'
+#' @param x,q vector of quantiles
+#' @param p vector of probabilities
+#' @param n number of observations
+#' @param mu mean parameter (on response scale, i.e., in (0, 1))
+#' @param phi precision parameter of the beta distribution
+#' @param xi cutpoint of boundary at 0 (latent scale)
+#' @param kappa offset to xi for cutpoint of boundary at 1 (i.e., cutpoint = xi + kappa)
+#' @param log,log.p logical; if TRUE, probabilities are given as log(p)
+#' @param lower.tail logical; if TRUE (default), probabilities are
+#'   \eqn{P(X \le x)}, otherwise, \eqn{P(X > x)}
+#'
+#' @details
+#' The ordered beta distribution combines a beta distribution for values
+#' in (0, 1) with point masses at 0 and 1. The probability of each component
+#' is determined by cutpoint thresholds: P(Y=0) = logit^-1(xi - logit(mu)),
+#' P(Y=1) = 1 - logit^-1((xi + kappa) - logit(mu)), and P(0 < Y < 1) is the remaining
+#' probability. For the continuous component, the mean is mu and precision is phi.
+#'
+#' @references
+#' Kubinec, R. (2023). Ordered Beta Regression: A Parsimonious, Well-Fitting
+#' Model for Continuous Data with Lower and Upper Bounds.
+#' \emph{Political Analysis}, 31(4), 519-536.
+#' \doi{10.1017/pan.2022.20}
+#'
+NULL
+
+#' @rdname OrdBeta
+#' @export
+dordbeta <- function(x, mu, phi, xi, coi, log = FALSE) {
+  # Determine output length
+  n <- max(length(x), length(mu))
+  x <- rep_len(x, n)
+  mu <- rep_len(mu, n)
+  phi <- rep_len(phi, n)
+  xi <- rep_len(xi, n)
+  coi <- rep_len(coi, n)
+  # Transform mu to latent scale for threshold comparison
+  mu_latent <- qlogis(mu)
+  # probability of each component
+  pr_zero <- plogis(xi - mu_latent)
+  pr_one <- 1 - plogis(coi - mu_latent)
+  pr_cont <- plogis(coi - mu_latent) - plogis(xi - mu_latent)
+  # compute log-density
+  out <- rep(NA_real_, n)
+  is_zero <- x == 0
+  is_one <- x == 1
+  is_cont <- x > 0 & x < 1
+  if (any(is_zero)) {
+    out[is_zero] <- log(pr_zero[is_zero])
+  }
+  if (any(is_one)) {
+    out[is_one] <- log(pr_one[is_one])
+  }
+  if (any(is_cont)) {
+    out[is_cont] <- log(pr_cont[is_cont]) +
+      dbeta(x[is_cont], mu[is_cont] * phi[is_cont],
+            (1 - mu[is_cont]) * phi[is_cont], log = TRUE)
+  }
+  if (!log) {
+    out <- exp(out)
+  }
+  out
+}
+
+#' @rdname OrdBeta
+#' @export
+pordbeta <- function(q, mu, phi, xi, coi,
+                     lower.tail = TRUE, log.p = FALSE) {
+  # Determine output length
+  n <- max(length(q), length(mu))
+  q <- rep_len(q, n)
+  mu <- rep_len(mu, n)
+  phi <- rep_len(phi, n)
+  xi <- rep_len(xi, n)
+  coi <- rep_len(coi, n)
+  # Transform mu to latent scale for threshold comparison
+  mu_latent <- qlogis(mu)
+  # probability of each component
+  pr_zero <- plogis(xi - mu_latent)
+  pr_one <- 1 - plogis(coi - mu_latent)
+  pr_cont <- plogis(coi - mu_latent) - plogis(xi - mu_latent)
+  # compute CDF
+  out <- rep(NA_real_, n)
+  is_neg <- q < 0
+  is_zero <- q == 0
+  is_cont <- q > 0 & q < 1
+  is_one <- q >= 1
+  if (any(is_neg)) {
+    out[is_neg] <- 0
+  }
+  if (any(is_zero)) {
+    out[is_zero] <- pr_zero[is_zero]
+  }
+  if (any(is_cont)) {
+    out[is_cont] <- pr_zero[is_cont] + pr_cont[is_cont] *
+      pbeta(q[is_cont], mu[is_cont] * phi[is_cont],
+            (1 - mu[is_cont]) * phi[is_cont])
+  }
+  if (any(is_one)) {
+    out[is_one] <- 1
+  }
+  if (!lower.tail) {
+    out <- 1 - out
+  }
+  if (log.p) {
+    out <- log(out)
+  }
+  out
+}
+
+#' @rdname OrdBeta
+#' @export
+qordbeta <- function(p, mu, phi, xi, coi,
+                     lower.tail = TRUE, log.p = FALSE) {
+  if (log.p) {
+    p <- exp(p)
+  }
+  if (!lower.tail) {
+    p <- 1 - p
+  }
+  # Determine output length
+  n <- max(length(p), length(mu))
+  p <- rep_len(p, n)
+  mu <- rep_len(mu, n)
+  phi <- rep_len(phi, n)
+  xi <- rep_len(xi, n)
+  coi <- rep_len(coi, n)
+  # Transform mu to latent scale for threshold comparison
+  mu_latent <- qlogis(mu)
+  # probability of each component
+  pr_zero <- plogis(xi - mu_latent)
+  pr_one <- 1 - plogis(coi - mu_latent)
+  pr_cont <- plogis(coi - mu_latent) - plogis(xi - mu_latent)
+  # compute quantile
+  out <- rep(NA_real_, n)
+  is_zero <- p <= pr_zero
+  is_one <- p >= 1 - pr_one
+  is_cont <- !is_zero & !is_one
+  if (any(is_zero)) {
+    out[is_zero] <- 0
+  }
+  if (any(is_one)) {
+    out[is_one] <- 1
+  }
+  if (any(is_cont)) {
+    # rescale p to be within the continuous component
+    p_rescaled <- (p[is_cont] - pr_zero[is_cont]) / pr_cont[is_cont]
+    out[is_cont] <- qbeta(p_rescaled, mu[is_cont] * phi[is_cont],
+                          (1 - mu[is_cont]) * phi[is_cont])
+  }
+  out
+}
+
+#' @rdname OrdBeta
+#' @export
+rordbeta <- function(n, mu, phi, xi, coi) {
+  # recycle parameters to length n
+  mu <- rep_len(mu, n)
+  phi <- rep_len(phi, n)
+  xi <- rep_len(xi, n)
+  coi <- rep_len(coi, n)
+  # Transform mu to latent scale for threshold comparison
+  mu_latent <- qlogis(mu)
+  # probability of each component
+  pr_zero <- plogis(xi - mu_latent)
+  pr_one <- 1 - plogis(coi - mu_latent)
+  pr_cont <- plogis(coi - mu_latent) - plogis(xi - mu_latent)
+  # sample component indicators
+  u <- runif(n)
+  out <- rep(NA_real_, n)
+  is_zero <- u < pr_zero
+  is_one <- u >= 1 - pr_one
+  is_cont <- !is_zero & !is_one
+  if (any(is_zero)) {
+    out[is_zero] <- 0
+  }
+  if (any(is_one)) {
+    out[is_one] <- 1
+  }
+  if (any(is_cont)) {
+    out[is_cont] <- rbeta(sum(is_cont),
+                          mu[is_cont] * phi[is_cont],
+                          (1 - mu[is_cont]) * phi[is_cont])
+  }
+  out
+}
+
 #' Zero-Inflated Distributions
 #'
 #' Density and distribution functions for zero-inflated distributions.

diff --git a/R/families.R b/R/families.R
@@ -23,7 +23,8 @@
 #'   \code{hurdle_gamma}, \code{hurdle_lognormal}, \code{hurdle_cumulative},
 #'   \code{zero_inflated_binomial}, \code{zero_inflated_beta_binomial},
 #'   \code{zero_inflated_beta}, \code{zero_inflated_negbinomial},
-#'   \code{zero_inflated_poisson}, \code{zero_one_inflated_beta}, and \code{xbeta}.
+#'   \code{zero_inflated_poisson}, \code{zero_one_inflated_beta}, \code{xbeta},
+#'   and \code{ordbeta}.
 #' @param link A specification for the model link function. This can be a
 #'   name/expression or character string. See the 'Details' section for more
 #'   information on link functions supported by each family.
@@ -106,6 +107,13 @@
 #'   provide more flexibility.  For details see Kosmidis & Zeileis
 #'   (2024).}
 #'
+#'   \item{Family \code{ordbeta} ('ordered beta') provides an alternative
+#'   approach to handling \code{[0, 1]} responses with exact \code{0}s
+#'   and \code{1}s. It models the response as a mixture of a degenerate
+#'   distribution at 0, a beta distribution for (0,1), and a degenerate
+#'   distribution at 1. The cutpoint parameters control the probability
+#'   of each component. For details see Kubinec (2023).}
+#'
 #'   \item{Family \code{asym_laplace} allows for quantile regression when fixing
 #'   the auxiliary \code{quantile} parameter to the quantile of interest.}
 #'
@@ -193,6 +201,10 @@
 #' Kosmidis I, Zeileis A (2024). Extended-Support Beta Regression for [0, 1] Responses.
 #' \emph{arXiv Preprint}. \doi{10.48550/arXiv.2409.07233}
 #'
+#' Kubinec R (2023). Ordered Beta Regression: A Parsimonious, Well-Fitting
+#' Model for Continuous Data with Lower and Upper Bounds.
+#' \emph{Political Analysis}, 31(4), 519-536. \doi{10.1017/pan.2022.20}
+#'
 #' @seealso
 #'   \code{\link[brms:brm]{brm}},
 #'   \code{\link[stats:family]{family}},
@@ -641,6 +653,15 @@ xbeta <- function(link = "logit", link_phi = "log",
               link_phi = link_phi, link_kappa = link_kappa)
 }
 
+#' @rdname brmsfamily
+#' @export
+ordbeta <- function(link = "logit", link_phi = "log",
+                    link_xi = "identity", link_kappa = "log") {
+  slink <- substitute(link)
+  .brmsfamily("ordbeta", link = link, slink = slink,
+              link_phi = link_phi, link_xi = link_xi, link_kappa = link_kappa)
+}
+
 #' @rdname brmsfamily
 #' @export
 dirichlet <- function(link = "logit", link_phi = "log", refcat = NULL) {
@@ -1622,6 +1643,10 @@ is_cox <- function(family) {
   "cox" %in% family_info(family, "specials")
 }
 
+is_ordbeta <- function(family) {
+  "ordbeta" %in% family_info(family, "specials")
+}
+
 # has joint link function over multiple inputs
 has_joint_link <- function(family) {
   "joint_link" %in% family_info(family, "specials")

diff --git a/R/family-lists.R b/R/family-lists.R
@@ -87,6 +87,38 @@
   )
 }
 
+.family_ordbeta <- function() {
+  list(
+    links = c("logit", "probit", "probit_approx", "cloglog", "cauchit"),
+    dpars = c("mu", "phi", "xi", "kappa"),
+    type = "real",
+    ybounds = c(0, 1),
+    closed = c(TRUE, TRUE),
+    ad = c("weights", "subset", "index"),
+    include = "fun_ordbeta.stan",
+    prior = function(dpar, link = "identity", ...) {
+      if (dpar == "xi") {
+
+        return("student_t(3, 0, 2.5)")
+
+      }
+      if (dpar == "kappa") {
+
+        if(link=='identity') {
+
+          return("gamma(0.01, 0.01)")
+
+        } else {
+          return("student_t(3, 0, 2.5)")
+      } 
+    }  
+      NULL 
+    },
+    specials = c("ordbeta"),
+    normalized = ""
+  )
+}
+
 .family_beta_binomial <- function() {
   list(
     links = c(

diff --git a/R/log_lik.R b/R/log_lik.R
@@ -643,6 +643,19 @@ log_lik_xbeta <- function(i, prep) {
   log_lik_weight(out, i = i, prep = prep)
 }
 
+log_lik_ordbeta <- function(i, prep) {
+  # mu is already on response scale (0-1) after brms applies link function
+  mu <- get_dpar(prep, "mu", i = i)
+  phi <- get_dpar(prep, "phi", i = i)
+  xi <- get_dpar(prep, "xi", i = i)
+  kappa <- get_dpar(prep, "kappa", i = i)
+  # coi = xi + kappa (ensures ordering)
+  coi <- xi + kappa
+  y <- prep$data$Y[i]
+  out <- dordbeta(x = y, mu = mu, phi = phi, xi = xi, coi = coi, log = TRUE)
+  log_lik_weight(out, i = i, prep = prep)
+}
+
 log_lik_von_mises <- function(i, prep) {
   args <- list(
     mu = get_dpar(prep, "mu", i),

diff --git a/R/posterior_epred.R b/R/posterior_epred.R
@@ -465,6 +465,24 @@ posterior_epred_xbeta <- function(prep) {
   1 + t1 - t2 - t3
 }
 
+posterior_epred_ordbeta <- function(prep) {
+  # Based on Kubinec (2023): https://doi.org/10.1017/pan.2022.20
+  # mu is already on response scale (0-1) after brms applies link function
+  mu <- get_dpar(prep, "mu")
+  xi <- get_dpar(prep, "xi")
+  kappa <- get_dpar(prep, "kappa")
+  # coi = xi + kappa (ensures ordering)
+  coi <- xi + kappa
+  # Transform mu to latent scale for threshold comparison
+  mu_latent <- qlogis(mu)
+  # probability of each component
+  pr_zero <- plogis(xi - mu_latent)
+  pr_one <- 1 - plogis(coi - mu_latent)
+  pr_cont <- plogis(coi - mu_latent) - plogis(xi - mu_latent)
+  # expected value: E[Y] = 0 * pr_zero + mu * pr_cont + 1 * pr_one
+  pr_cont * mu + pr_one
+}
+
 posterior_epred_von_mises <- function(prep) {
   prep$dpars$mu
 }

diff --git a/R/posterior_predict.R b/R/posterior_predict.R
@@ -695,6 +695,20 @@ posterior_predict_xbeta <- function(i, prep, ntrys = 5, ...) {
   )
 }
 
+posterior_predict_ordbeta <- function(i, prep, ...) {
+  # mu is already on response scale (0-1) after brms applies link function
+  mu <- get_dpar(prep, "mu", i = i)
+  phi <- get_dpar(prep, "phi", i = i)
+  xi <- get_dpar(prep, "xi", i = i)
+  kappa <- get_dpar(prep, "kappa", i = i)
+  # coi = xi + kappa (ensures ordering)
+  coi <- xi + kappa
+  rordbeta(
+    n = prep$ndraws,
+    mu = mu, phi = phi, xi = xi, coi = coi
+  )
+}
+
 posterior_predict_von_mises <- function(i, prep, ntrys = 5, ...) {
   rcontinuous(
     n = prep$ndraws, dist = "von_mises",

diff --git a/R/stan-likelihood.R b/R/stan-likelihood.R
@@ -723,6 +723,11 @@ stan_log_lik_xbeta <- function(bterms, ...) {
   sdist("xbeta", p$mu, p$phi, p$kappa)
 }
 
+stan_log_lik_ordbeta <- function(bterms, ...) {
+  p <- stan_log_lik_dpars(bterms, reqn = TRUE)
+  sdist("ordbeta", p$mu, p$phi, p$xi, p$kappa, vec = FALSE)
+}
+
 stan_log_lik_von_mises <- function(bterms, ...) {
   p <- stan_log_lik_dpars(bterms)
   sdist("von_mises", p$mu, p$kappa)