% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/data_tools.R
\name{to_integer}
\alias{to_integer}
\title{Fast transform of any type of vector(s) into an integer vector}
\usage{
to_integer(
  ...,
  inputs = NULL,
  sorted = FALSE,
  add_items = FALSE,
  items.list = FALSE,
  multi.df = FALSE,
  multi.join = "_",
  na.valid = FALSE,
  internal = FALSE
)
}
\arguments{
\item{...}{Vectors of any type, to be transformed into a single integer vector ranging
from 1 to the number of unique elements.}

\item{inputs}{A list of inputs, by default it is \code{NULL}. If provided, it completely
replaces the elements in \code{...}.}

\item{sorted}{Logical, default is \code{FALSE}. Whether the integer vector should make reference
to sorted values?}

\item{add_items}{Logical, default is \code{FALSE}. Whether to add the unique values of the
original vector(s). If requested, an attribute \code{items} is created containing the
values (alternatively, they can appear in a list if \code{items.list=TRUE}).}

\item{items.list}{Logical, default is \code{FALSE}. Only used if \code{add_items=TRUE}. If \code{TRUE},
then a list of length 2 is returned with \code{x} the integer vector and \code{items} the vector of items.}

\item{multi.df}{Logical, default is \code{FALSE}. If \code{TRUE} then a data.frame listing the
unique elements is returned in the form of a data.frame. Ignored if \code{add_items = FALSE}.}

\item{multi.join}{Character scalar used to join the items of multiple vectors.
The default is \code{"_"}. Ignored if \code{add_items = FALSE}.}

\item{na.valid}{Logical, default is \code{FALSE}. Whether to consider NAs as regular values.
If \code{TRUE}, the returned index will not contain any NA value.}

\item{internal}{Logical, default is \code{FALSE}. For programming only. If this function
is used within another function, setting \code{internal = TRUE} is needed to make the
evaluation of \code{...} valid. End users of \code{to_integer} should not care.}
}
\value{
Reruns a vector of the same length as the input vectors.
If \code{add_items=TRUE} and \code{items.list=TRUE}, a list of two elements is returned: \code{x}
being the integer vector and \code{items} being the unique values to which the values
in \code{x} make reference.
}
\description{
Tool to transform any type of vector, or even combination of vectors, into an integer vector
ranging from 1 to the number of unique values. This actually creates an unique identifier vector.
}
\examples{

x1 = iris$Species
x2 = as.integer(iris$Sepal.Length)

# transforms the species vector into integers
to_integer(x1)

# To obtain the "items":
to_integer(x1, add_items = TRUE)
# same but in list form
to_integer(x1, add_items = TRUE, items.list = TRUE)

# transforms x2 into an integer vector from 1 to 4
to_integer(x2, add_items = TRUE)

# To have the sorted items:
to_integer(x2, add_items = TRUE, sorted = TRUE)

# placing the three side to side
head(cbind(x2, as_index = to_integer(x2), 
           as_index_sorted = to_integer(x2, sorted = TRUE)))

# The result can safely be used as an index
res = to_integer(x2, add_items = TRUE, sorted = TRUE, items.list = TRUE)
all(res$items[res$x] == x2)


#
# Multiple vectors
#

to_integer(x1, x2, add_items = TRUE)

# You can use multi.join to handle the join of the items:
to_integer(x1, x2, add_items = TRUE, multi.join = "; ")

# alternatively, return the items as a data.frame
to_integer(x1, x2, add_items = TRUE, multi.df = TRUE)

#
# NA values
#

x1_na = c("a", "a", "b", NA, NA, "b", "a", "c", NA)
x2_na = c(NA,    1,  NA,  1,  1,   1,   2,   2,  2)

# by default the NAs are propagated
to_integer(x1_na, x2_na, add_items = TRUE)

# but you can treat them as valid values with na.valid = TRUE
to_integer(x1_na, x2_na, add_items = TRUE, na.valid = TRUE)

#
# programmatic use
#

# the argument `inputs` can be used for easy programmatic use
all_vars = list(x1_na, x2_na)
to_integer(inputs = all_vars)

}
\author{
Laurent Berge
}
