% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/auto_vif.R
\name{auto_vif}
\alias{auto_vif}
\title{Multicollinearity reduction via Variance Inflation Factor}
\usage{
auto_vif(
  x = NULL,
  preference.order = NULL,
  vif.threshold = 5,
  verbose = TRUE
)
}
\arguments{
\item{x}{A data frame with predictors or the result of \code{\link[=auto_cor]{auto_cor()}}. Default: \code{NULL}.}

\item{preference.order}{a character vector with columns names of x ordered by the user preference, Default: \code{NULL}.}

\item{vif.threshold}{Numeric between 2.5 and 10 defining the selection threshold for the VIF analysis. Higher numbers result in a more relaxed variable selection. Default: 5.}

\item{verbose}{Logical. if \code{TRUE}, describes the function operations to the user. Default:: \code{TRUE}}
}
\value{
List with three slots:
\itemize{
\item \code{vif}: data frame with the names of the selected variables and their respective VIF scores.
\item \code{selected.variables}: character vector with the names of the selected variables.
\item \code{selected.variables.df}: data frame with the selected variables.
}
}
\description{
Selects predictors that are not linear combinations of other predictors by using computing their variance inflation factors (VIF). Allows the user to define an order of preference for the selection of predictors. \strong{Warning}: variables in \code{preference.order} not in \code{colnames(x)}, and non-numeric columns are removed silently from \code{x} and \code{preference.order}. The same happens with rows having NA values (\code{\link[=na.omit]{na.omit()}} is applied). The function issues a warning if zero-variance columns are found.
}
\details{
This function has two modes of operation:
\itemize{
\item 1. When the argument \code{preference.order} is \code{NULL}, the function removes on each iteration the variable with the highest VIF until all VIF values are lower than \code{vif.threshold}.
\item 2. When \code{preference.order} is provided, the variables are selected by giving them priority according to their order in \code{preference.order}. If there are variables not in \code{preference.order}, these are selected as in option 1. Once both groups of variables have been processed, all variables are put together and selected by giving priority to the ones in \code{preference.order}. This method preserves the variables desired by the user as much as possible.
}
Can be chained together with \code{\link[=auto_cor]{auto_cor()}} through pipes, see the examples below.
}
\examples{
if(interactive()){

#loading data
data(plant_richness_df)

#on a data frame
out <- auto_vif(x = plant_richness_df[, 5:21])

#getting out the vif data frame
out$vif

#getting the names of the selected variables
out$selected.variables

#getting the data frame of selected variables
out$selected.variables.df

#on the result of auto_cor
out <- auto_cor(x = plant_richness_df[, 5:21])
out <- auto_vif(x = out)

#with pipes
out <- plant_richness_df[, 5:21] \%>\%
 auto_cor() \%>\%
 auto_vif()

}
}
\seealso{
\code{\link[=auto_cor]{auto_cor()}}
}
