The goal of kerasnip is to provide a seamless bridge
between the keras and tidymodels frameworks.
It allows for the dynamic creation of parsnip model
specifications for Keras models, making them fully compatible with
tidymodels workflows.
You can install the development version of kerasnip from
GitHub with:
# install.packages("pak")
pak::pak("davidrsch/kerasnip")This example shows the core workflow for building a simple, linear
stack of layers using create_keras_sequential_spec().
library(kerasnip)
library(tidymodels)
library(keras3)
# 1. Define Keras layer blocks
# The first block initializes the model.
input_block <- function(model, input_shape) {
keras_model_sequential(input_shape = input_shape)
}
# Subsequent blocks add layers.
dense_block <- function(model, units = 32) {
model |> layer_dense(units = units, activation = "relu")
}
# The final block creates the output layer.
output_block <- function(model) {
model |>
layer_dense(units = 1)
}
# 2. Create a spec from the layer blocks
# This creates a new model function, `basic_mlp()`, in your environment.
create_keras_sequential_spec(
model_name = "basic_mlp",
layer_blocks = list(
input = input_block,
dense = dense_block,
output = output_block
),
mode = "regression"
)
# 3. Use the generated spec to define a model.
# We can set the number of dense layers (`num_dense`) and their parameters (`dense_units`).
spec <- basic_mlp(
num_dense = 2,
dense_units = 64,
fit_epochs = 10,
learn_rate = 0.01
) |>
set_engine("keras")
# 4. Fit the model within a tidymodels workflow
rec <- recipe(mpg ~ ., data = mtcars) |>
step_normalize(all_numeric_predictors())
wf <- workflow(rec, spec)
set.seed(123)
fit_obj <- fit(wf, data = mtcars)
# 5. Make predictions
predict(fit_obj, new_data = mtcars[1:5, ])
#> # A tibble: 5 × 1
#> .pred
#> <dbl>
#> 1 21.3
#> 2 21.3
#> 3 22.8
#> 4 21.4
#> 5 18.7For complex, non-linear architectures, use
create_keras_functional_spec(). This example builds a model
where the input is forked into two paths, which are then
concatenated.
library(kerasnip)
library(tidymodels)
library(keras3)
# 1. Define blocks. For the functional API, blocks are nodes in a graph.
input_block <- function(input_shape) layer_input(shape = input_shape)
path_block <- function(tensor, units = 16) tensor |> layer_dense(units = units)
concat_block <- function(input_a, input_b) layer_concatenate(list(input_a, input_b))
output_block <- function(tensor) layer_dense(tensor, units = 1)
# 2. Create the spec. The graph is defined by block names and their arguments.
create_keras_functional_spec(
model_name = "forked_mlp",
layer_blocks = list(
main_input = input_block,
path_a = inp_spec(path_block, "main_input"),
path_b = inp_spec(path_block, "main_input"),
concatenated = inp_spec(concat_block, c(path_a = "input_a", path_b = "input_b")),
# The output block must be named 'output'.
output = inp_spec(output_block, "concatenated")
),
mode = "regression"
)
# 3. Use the new spec. Arguments are prefixed with their block name.
spec <- forked_mlp(path_a_units = 16, path_b_units = 8, fit_epochs = 10) |>
set_engine("keras")
# Fit and predict as usual
set.seed(123)
fit(spec, mpg ~ ., data = mtcars) |>
predict(new_data = mtcars[1:5, ])
#> # A tibble: 5 × 1
#> .pred
#> <dbl>
#> 1 19.4
#> 2 19.5
#> 3 21.9
#> 4 18.6
#> 5 17.9This example demonstrates how to tune the number of dense layers and the rate of a final dropout layer, showcasing how to tune both architecture and block hyperparameters simultaneously.
library(kerasnip)
library(tidymodels)
library(keras3)
# 1. Define Keras layer blocks for a tunable MLP
input_block <- function(model, input_shape) {
keras_model_sequential(input_shape = input_shape)
}
dense_block <- function(model, units = 32) {
model |> layer_dense(units = units, activation = "relu")
}
dropout_block <- function(model, rate = 0.2) {
model |> layer_dropout(rate = rate)
}
output_block <- function(model) {
model |> layer_dense(units = 1)
}
# 2. Create a spec from the layer blocks
create_keras_sequential_spec(
model_name = "tunable_mlp",
layer_blocks = list(
input = input_block,
dense = dense_block,
dropout = dropout_block,
output = output_block
),
mode = "regression"
)
# 3. Define a tunable model specification
tune_spec <- tunable_mlp(
num_dense = tune(),
dense_units = tune(),
num_dropout = 1,
dropout_rate = tune(),
fit_epochs = 10
) |>
set_engine("keras")
# 4. Set up and run a tuning workflow
rec <- recipe(mpg ~ ., data = mtcars) |>
step_normalize(all_numeric_predictors())
wf_tune <- workflow(rec, tune_spec)
# Define the tuning grid.
params <- extract_parameter_set_dials(wf_tune) |>
update(
num_dense = dials::num_terms(c(1, 3)),
dense_units = dials::hidden_units(c(8, 64)),
dropout_rate = dials::dropout(c(0.1, 0.5))
)
grid <- grid_regular(params, levels = 2)
# 5. Run the tuning
set.seed(456)
folds <- vfold_cv(mtcars, v = 3)
tune_res <- tune_grid(
wf_tune,
resamples = folds,
grid = grid,
control = control_grid(verbose = FALSE)
)
# 6. Show the best architecture
show_best(tune_res, metric = "rmse")
#> # A tibble: 5 × 7
#> num_dense dense_units dropout_rate .metric .estimator .mean .config
#> <int> <int> <dbl> <chr> <chr> <dbl> <chr>
#> 1 1 64 0.1 rmse standard 2.92 Preprocessor1_Model02
#> 2 1 64 0.5 rmse standard 3.02 Preprocessor1_Model08
#> 3 3 64 0.1 rmse standard 3.15 Preprocessor1_Model04
#> 4 1 8 0.1 rmse standard 3.20 Preprocessor1_Model01
#> 5 3 8 0.1 rmse standard 3.22 Preprocessor1_Model03