Skip to content
Open
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -631,6 +631,11 @@ export(waic)
export(weibull)
export(wiener)
export(xbeta)
export(ordbeta)
export(dordbeta)
export(pordbeta)
export(qordbeta)
export(rordbeta)
export(zero_inflated_beta)
export(zero_inflated_beta_binomial)
export(zero_inflated_binomial)
Expand Down
194 changes: 194 additions & 0 deletions R/distributions.R
Original file line number Diff line number Diff line change
Expand Up @@ -1742,6 +1742,200 @@ rxbeta <- function(...) {
betareg::rxbeta(...)
}

#' Ordered Beta Distribution
#'
#' Density, distribution function, quantile function and random generation
#' for the ordered beta distribution as described in Kubinec (2023).
#'
#' @name OrdBeta
#'
#' @param x,q vector of quantiles
#' @param p vector of probabilities
#' @param n number of observations
#' @param mu mean parameter (on response scale, i.e., in (0, 1))
#' @param phi precision parameter of the beta distribution
#' @param xi cutpoint of boundary at 0 (latent scale)
#' @param kappa offset to xi for cutpoint of boundary at 1 (i.e., cutpoint = xi + kappa)
#' @param log,log.p logical; if TRUE, probabilities are given as log(p)
#' @param lower.tail logical; if TRUE (default), probabilities are
#' \eqn{P(X \le x)}, otherwise, \eqn{P(X > x)}
#'
#' @details
#' The ordered beta distribution combines a beta distribution for values
#' in (0, 1) with point masses at 0 and 1. The probability of each component
#' is determined by cutpoint thresholds: P(Y=0) = logit^-1(xi - logit(mu)),
#' P(Y=1) = 1 - logit^-1((xi + kappa) - logit(mu)), and P(0 < Y < 1) is the remaining
#' probability. For the continuous component, the mean is mu and precision is phi.
#'
#' @references
#' Kubinec, R. (2023). Ordered Beta Regression: A Parsimonious, Well-Fitting
#' Model for Continuous Data with Lower and Upper Bounds.
#' \emph{Political Analysis}, 31(4), 519-536.
#' \doi{10.1017/pan.2022.20}
#'
NULL

#' @rdname OrdBeta
#' @export
dordbeta <- function(x, mu, phi, xi, coi, log = FALSE) {
# Determine output length
n <- max(length(x), length(mu))
x <- rep_len(x, n)
mu <- rep_len(mu, n)
phi <- rep_len(phi, n)
xi <- rep_len(xi, n)
coi <- rep_len(coi, n)
# Transform mu to latent scale for threshold comparison
mu_latent <- qlogis(mu)
# probability of each component
pr_zero <- plogis(xi - mu_latent)
pr_one <- 1 - plogis(coi - mu_latent)
pr_cont <- plogis(coi - mu_latent) - plogis(xi - mu_latent)
# compute log-density
out <- rep(NA_real_, n)
is_zero <- x == 0
is_one <- x == 1
is_cont <- x > 0 & x < 1
if (any(is_zero)) {
out[is_zero] <- log(pr_zero[is_zero])
}
if (any(is_one)) {
out[is_one] <- log(pr_one[is_one])
}
if (any(is_cont)) {
out[is_cont] <- log(pr_cont[is_cont]) +
dbeta(x[is_cont], mu[is_cont] * phi[is_cont],
(1 - mu[is_cont]) * phi[is_cont], log = TRUE)
}
if (!log) {
out <- exp(out)
}
out
}

#' @rdname OrdBeta
#' @export
pordbeta <- function(q, mu, phi, xi, coi,
lower.tail = TRUE, log.p = FALSE) {
# Determine output length
n <- max(length(q), length(mu))
q <- rep_len(q, n)
mu <- rep_len(mu, n)
phi <- rep_len(phi, n)
xi <- rep_len(xi, n)
coi <- rep_len(coi, n)
# Transform mu to latent scale for threshold comparison
mu_latent <- qlogis(mu)
# probability of each component
pr_zero <- plogis(xi - mu_latent)
pr_one <- 1 - plogis(coi - mu_latent)
pr_cont <- plogis(coi - mu_latent) - plogis(xi - mu_latent)
# compute CDF
out <- rep(NA_real_, n)
is_neg <- q < 0
is_zero <- q == 0
is_cont <- q > 0 & q < 1
is_one <- q >= 1
if (any(is_neg)) {
out[is_neg] <- 0
}
if (any(is_zero)) {
out[is_zero] <- pr_zero[is_zero]
}
if (any(is_cont)) {
out[is_cont] <- pr_zero[is_cont] + pr_cont[is_cont] *
pbeta(q[is_cont], mu[is_cont] * phi[is_cont],
(1 - mu[is_cont]) * phi[is_cont])
}
if (any(is_one)) {
out[is_one] <- 1
}
if (!lower.tail) {
out <- 1 - out
}
if (log.p) {
out <- log(out)
}
out
}

#' @rdname OrdBeta
#' @export
qordbeta <- function(p, mu, phi, xi, coi,
lower.tail = TRUE, log.p = FALSE) {
if (log.p) {
p <- exp(p)
}
if (!lower.tail) {
p <- 1 - p
}
# Determine output length
n <- max(length(p), length(mu))
p <- rep_len(p, n)
mu <- rep_len(mu, n)
phi <- rep_len(phi, n)
xi <- rep_len(xi, n)
coi <- rep_len(coi, n)
# Transform mu to latent scale for threshold comparison
mu_latent <- qlogis(mu)
# probability of each component
pr_zero <- plogis(xi - mu_latent)
pr_one <- 1 - plogis(coi - mu_latent)
pr_cont <- plogis(coi - mu_latent) - plogis(xi - mu_latent)
# compute quantile
out <- rep(NA_real_, n)
is_zero <- p <= pr_zero
is_one <- p >= 1 - pr_one
is_cont <- !is_zero & !is_one
if (any(is_zero)) {
out[is_zero] <- 0
}
if (any(is_one)) {
out[is_one] <- 1
}
if (any(is_cont)) {
# rescale p to be within the continuous component
p_rescaled <- (p[is_cont] - pr_zero[is_cont]) / pr_cont[is_cont]
out[is_cont] <- qbeta(p_rescaled, mu[is_cont] * phi[is_cont],
(1 - mu[is_cont]) * phi[is_cont])
}
out
}

#' @rdname OrdBeta
#' @export
rordbeta <- function(n, mu, phi, xi, coi) {
# recycle parameters to length n
mu <- rep_len(mu, n)
phi <- rep_len(phi, n)
xi <- rep_len(xi, n)
coi <- rep_len(coi, n)
# Transform mu to latent scale for threshold comparison
mu_latent <- qlogis(mu)
# probability of each component
pr_zero <- plogis(xi - mu_latent)
pr_one <- 1 - plogis(coi - mu_latent)
pr_cont <- plogis(coi - mu_latent) - plogis(xi - mu_latent)
# sample component indicators
u <- runif(n)
out <- rep(NA_real_, n)
is_zero <- u < pr_zero
is_one <- u >= 1 - pr_one
is_cont <- !is_zero & !is_one
if (any(is_zero)) {
out[is_zero] <- 0
}
if (any(is_one)) {
out[is_one] <- 1
}
if (any(is_cont)) {
out[is_cont] <- rbeta(sum(is_cont),
mu[is_cont] * phi[is_cont],
(1 - mu[is_cont]) * phi[is_cont])
}
out
}

#' Zero-Inflated Distributions
#'
#' Density and distribution functions for zero-inflated distributions.
Expand Down
27 changes: 26 additions & 1 deletion R/families.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
#' \code{hurdle_gamma}, \code{hurdle_lognormal}, \code{hurdle_cumulative},
#' \code{zero_inflated_binomial}, \code{zero_inflated_beta_binomial},
#' \code{zero_inflated_beta}, \code{zero_inflated_negbinomial},
#' \code{zero_inflated_poisson}, \code{zero_one_inflated_beta}, and \code{xbeta}.
#' \code{zero_inflated_poisson}, \code{zero_one_inflated_beta}, \code{xbeta},
#' and \code{ordbeta}.
#' @param link A specification for the model link function. This can be a
#' name/expression or character string. See the 'Details' section for more
#' information on link functions supported by each family.
Expand Down Expand Up @@ -106,6 +107,13 @@
#' provide more flexibility. For details see Kosmidis & Zeileis
#' (2024).}
#'
#' \item{Family \code{ordbeta} ('ordered beta') provides an alternative
#' approach to handling \code{[0, 1]} responses with exact \code{0}s
#' and \code{1}s. It models the response as a mixture of a degenerate
#' distribution at 0, a beta distribution for (0,1), and a degenerate
#' distribution at 1. The cutpoint parameters control the probability
#' of each component. For details see Kubinec (2023).}
#'
#' \item{Family \code{asym_laplace} allows for quantile regression when fixing
#' the auxiliary \code{quantile} parameter to the quantile of interest.}
#'
Expand Down Expand Up @@ -193,6 +201,10 @@
#' Kosmidis I, Zeileis A (2024). Extended-Support Beta Regression for [0, 1] Responses.
#' \emph{arXiv Preprint}. \doi{10.48550/arXiv.2409.07233}
#'
#' Kubinec R (2023). Ordered Beta Regression: A Parsimonious, Well-Fitting
#' Model for Continuous Data with Lower and Upper Bounds.
#' \emph{Political Analysis}, 31(4), 519-536. \doi{10.1017/pan.2022.20}
#'
#' @seealso
#' \code{\link[brms:brm]{brm}},
#' \code{\link[stats:family]{family}},
Expand Down Expand Up @@ -641,6 +653,15 @@ xbeta <- function(link = "logit", link_phi = "log",
link_phi = link_phi, link_kappa = link_kappa)
}

#' @rdname brmsfamily
#' @export
ordbeta <- function(link = "logit", link_phi = "log",
link_xi = "identity", link_kappa = "log") {
slink <- substitute(link)
.brmsfamily("ordbeta", link = link, slink = slink,
link_phi = link_phi, link_xi = link_xi, link_kappa = link_kappa)
}

#' @rdname brmsfamily
#' @export
dirichlet <- function(link = "logit", link_phi = "log", refcat = NULL) {
Expand Down Expand Up @@ -1622,6 +1643,10 @@ is_cox <- function(family) {
"cox" %in% family_info(family, "specials")
}

is_ordbeta <- function(family) {
"ordbeta" %in% family_info(family, "specials")
}

# has joint link function over multiple inputs
has_joint_link <- function(family) {
"joint_link" %in% family_info(family, "specials")
Expand Down
32 changes: 32 additions & 0 deletions R/family-lists.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,38 @@
)
}

.family_ordbeta <- function() {
list(
links = c("logit", "probit", "probit_approx", "cloglog", "cauchit"),
dpars = c("mu", "phi", "xi", "kappa"),
type = "real",
ybounds = c(0, 1),
closed = c(TRUE, TRUE),
ad = c("weights", "subset", "index"),
include = "fun_ordbeta.stan",
prior = function(dpar, link = "identity", ...) {
if (dpar == "xi") {

return("student_t(3, 0, 2.5)")

}
if (dpar == "kappa") {

if(link=='identity') {

return("gamma(0.01, 0.01)")

} else {
return("student_t(3, 0, 2.5)")
}
}
NULL
},
specials = c("ordbeta"),
normalized = ""
)
}

.family_beta_binomial <- function() {
list(
links = c(
Expand Down
13 changes: 13 additions & 0 deletions R/log_lik.R
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,19 @@ log_lik_xbeta <- function(i, prep) {
log_lik_weight(out, i = i, prep = prep)
}

log_lik_ordbeta <- function(i, prep) {
# mu is already on response scale (0-1) after brms applies link function
mu <- get_dpar(prep, "mu", i = i)
phi <- get_dpar(prep, "phi", i = i)
xi <- get_dpar(prep, "xi", i = i)
kappa <- get_dpar(prep, "kappa", i = i)
# coi = xi + kappa (ensures ordering)
coi <- xi + kappa
y <- prep$data$Y[i]
out <- dordbeta(x = y, mu = mu, phi = phi, xi = xi, coi = coi, log = TRUE)
log_lik_weight(out, i = i, prep = prep)
}

log_lik_von_mises <- function(i, prep) {
args <- list(
mu = get_dpar(prep, "mu", i),
Expand Down
18 changes: 18 additions & 0 deletions R/posterior_epred.R
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,24 @@ posterior_epred_xbeta <- function(prep) {
1 + t1 - t2 - t3
}

posterior_epred_ordbeta <- function(prep) {
# Based on Kubinec (2023): https://doi.org/10.1017/pan.2022.20
# mu is already on response scale (0-1) after brms applies link function
mu <- get_dpar(prep, "mu")
xi <- get_dpar(prep, "xi")
kappa <- get_dpar(prep, "kappa")
# coi = xi + kappa (ensures ordering)
coi <- xi + kappa
# Transform mu to latent scale for threshold comparison
mu_latent <- qlogis(mu)
# probability of each component
pr_zero <- plogis(xi - mu_latent)
pr_one <- 1 - plogis(coi - mu_latent)
pr_cont <- plogis(coi - mu_latent) - plogis(xi - mu_latent)
# expected value: E[Y] = 0 * pr_zero + mu * pr_cont + 1 * pr_one
pr_cont * mu + pr_one
}

posterior_epred_von_mises <- function(prep) {
prep$dpars$mu
}
Expand Down
14 changes: 14 additions & 0 deletions R/posterior_predict.R
Original file line number Diff line number Diff line change
Expand Up @@ -695,6 +695,20 @@ posterior_predict_xbeta <- function(i, prep, ntrys = 5, ...) {
)
}

posterior_predict_ordbeta <- function(i, prep, ...) {
# mu is already on response scale (0-1) after brms applies link function
mu <- get_dpar(prep, "mu", i = i)
phi <- get_dpar(prep, "phi", i = i)
xi <- get_dpar(prep, "xi", i = i)
kappa <- get_dpar(prep, "kappa", i = i)
# coi = xi + kappa (ensures ordering)
coi <- xi + kappa
rordbeta(
n = prep$ndraws,
mu = mu, phi = phi, xi = xi, coi = coi
)
}

posterior_predict_von_mises <- function(i, prep, ntrys = 5, ...) {
rcontinuous(
n = prep$ndraws, dist = "von_mises",
Expand Down
5 changes: 5 additions & 0 deletions R/stan-likelihood.R
Original file line number Diff line number Diff line change
Expand Up @@ -723,6 +723,11 @@ stan_log_lik_xbeta <- function(bterms, ...) {
sdist("xbeta", p$mu, p$phi, p$kappa)
}

stan_log_lik_ordbeta <- function(bterms, ...) {
p <- stan_log_lik_dpars(bterms, reqn = TRUE)
sdist("ordbeta", p$mu, p$phi, p$xi, p$kappa, vec = FALSE)
}

stan_log_lik_von_mises <- function(bterms, ...) {
p <- stan_log_lik_dpars(bterms)
sdist("von_mises", p$mu, p$kappa)
Expand Down
Loading