% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/freq_by.R
\name{freq_by}
\alias{freq_by}
\title{Frequency Table by Group (wide): n (\%) with flexible ordering and formats}
\usage{
freq_by(
  data,
  denom_data = NULL,
  main_group,
  last_group,
  label,
  sec_ord,
  fmt = NULL,
  use_sas_round = FALSE,
  indent = 2,
  id_var = "USUBJID",
  include_all_fmt_levels = TRUE,
  na_to_code = NULL
)
}
\arguments{
\item{data}{A data frame containing at least \code{main_group}, \code{last_group}, and an ID column.}

\item{denom_data}{Optional data frame used to derive denominators (N per treatment).
Defaults to \code{data}.}

\item{main_group}{Character scalar. The treatment or grouping variable name (columns in output),
e.g., \code{"TRTAN"}.}

\item{last_group}{Character scalar. The categorical \strong{code} variable to tabulate (rows).
Numeric or character are both accepted; converted to character for display/ordering.}

\item{label}{Character scalar. A header row displayed on top (unindented).}

\item{sec_ord}{Integer scalar carried through for downstream table sorting.}

\item{fmt}{Optional. Either:
\itemize{
\item a \strong{named character vector} like \code{c("1"="<1","2"="1-<4",...)} (names = codes, values = labels), or
\item a \strong{data.frame/tibble} with columns \code{value} (codes) and \code{raw} (labels), or
\item a \strong{string} naming an object (in parent frame) that resolves to either of the above.
If \code{NULL} (default), labels are derived from unique values of \code{data[[last_group]]}.
}}

\item{use_sas_round}{Logical; if \code{TRUE}, percent is rounded with SAS-compatible
“round halves away from zero” via \code{sas_round()}. Default \code{FALSE}.}

\item{indent}{Integer number of \strong{leading spaces} applied to all category rows
(the first \code{label} row is not indented). Default \code{2}.}

\item{id_var}{Character; the subject identifier column. If not found in \code{data},
the function tries common alternatives (e.g., \code{USUBJID}, \code{SUBJID}, etc.).}

\item{include_all_fmt_levels}{Logical; if \code{TRUE} (default), the row order is built from the
\strong{union of format codes and data codes} (numeric sort). When \code{fmt = NULL},
this effectively reduces to observed data codes only.}

\item{na_to_code}{Optional character scalar (e.g., \code{"4"}). If supplied, NA values in
\code{last_group} are \strong{counted under that code} before tabulation.}
}
\value{
A tibble with:
\itemize{
\item \code{stat} (character), \code{sort_ord} (integer), \code{sec_ord} (integer),
\item One column per treatment arm (e.g., \code{trt1}, \code{trt2}, …), with \code{"n (pct)"} or \code{"0"}.
}
}
\description{
\code{freq_by()} produces a one-level frequency table by treatment (wide layout)
where each row is a category of \code{last_group} (e.g., a bucketed lab value),
and each treatment column shows \strong{n (\%)} using distinct subject counts.

New: If \code{fmt} is \strong{not provided} (\code{NULL}), labels are derived from the \strong{unique
values present in \code{data[[last_group]]}} (post \code{na_to_code} mapping, if used).

It supports:
\itemize{
\item \strong{SAS-style rounding} (\code{use_sas_round = TRUE}) for the percent.
\item Format mapping via either a \strong{named vector} or a \strong{tibble/data.frame} with
columns \code{value} (codes) and \code{raw} (labels).
\item \strong{Ordering} by the \strong{numeric value} of \code{last_group} found in the data,
or optionally the \strong{union} of format + data codes (\code{include_all_fmt_levels}).
\item Counting \strong{NA} under a chosen code/label using \code{na_to_code} (e.g., code \code{"4"} = \code{"MISSING"}).
\item Auto-detecting the subject ID column when \code{id_var} is not provided.
}
}
\details{
\itemize{
\item Counting uses \code{n_distinct(id_var)} within each \verb{(main_group, last_group)} cell.
\item Percent is \code{100 * n / N} where \code{N} = distinct subjects in \code{denom_data} by \code{main_group}.
\item When \code{fmt = NULL}, both \strong{codes} and \strong{labels} are taken from the observed values
of \code{last_group} (after applying \code{na_to_code} mapping), ordered numerically where possible.
\item Output treatment columns are normalized to \code{trtXX} if original names start with digits.
\item Missing treatment arms are added as \code{"0"}.
}
}
\examples{
set.seed(1)

toy_adsl <- tibble::tibble(
  USUBJID = sprintf("ID\%03d", 1:60),
  TRTAN   = sample(c(1, 2), size = 60, replace = TRUE),
  AGE     = sample(18:85, size = 60, replace = TRUE),
  SEX     = sample(c("Male", "Female"), size = 60, replace = TRUE),
  ETHNIC  = sample(
    c("Hispanic or Latino",
      "Not Hispanic or Latino",
      "Unknown",
      NA_character_),
    size = 60, replace = TRUE
  )
) |>
  dplyr::mutate(
    AGEGR1 = dplyr::case_when(
      AGE < 65            ~ "<65 years",
      AGE >= 65 & AGE < 75 ~ "65–<75 years",
      AGE >= 75           ~ ">=75 years"
    )
  )

toy_dm <- toy_adsl |>
  dplyr::select(USUBJID, TRTAN)

freq_by(
  data       = toy_adsl,
  denom_data = toy_dm,
  main_group = "TRTAN",
  last_group = "AGEGR1",
  label      = "Age group, n (\%)",
  sec_ord    = 1,
  fmt        = NULL,
  na_to_code = NULL
)

freq_by(
  data       = toy_adsl,
  denom_data = toy_dm,
  main_group = "TRTAN",
  last_group = "SEX",
  label      = "Sex, n (\%)",
  sec_ord    = 2,
  fmt        = NULL,
  na_to_code = "99"
)

fmt_ethnic <- c(
  "Hispanic or Latino"         = "Hispanic or Latino",
  "Not Hispanic or Latino"     = "Not Hispanic or Latino",
  "Unknown"                    = "Unknown",
  "99"                         = "Missing"
)

freq_by(
  data       = toy_adsl,
  denom_data = toy_dm,
  main_group = "TRTAN",
  last_group = "ETHNIC",
  label      = "Ethnic group, n (\%)",
  sec_ord    = 3,
  fmt        = fmt_ethnic,
  include_all_fmt_levels = TRUE,
  na_to_code = "99"
)

}
