library(metasplines)
library(splines2)
# Estimate a linear regression model using an individual participant data (IPD):
res <- lm(
Petal.Width ~
Species +
nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5)),
data=iris)
summary(res)
#>
#> Call:
#> lm(formula = Petal.Width ~ Species + nsk(Sepal.Length, Boundary.knots = c(4.5,
#> 7.5), knots = c(5, 6, 6.5)), data = iris)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -0.55321 -0.10548 -0.00895 0.10758 0.51982
#>
#> Coefficients:
#> Estimate
#> (Intercept) 0.18478
#> Speciesversicolor 0.92487
#> Speciesvirginica 1.52223
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))1 0.05965
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))2 0.21911
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))3 0.35234
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))4 0.38634
#> Std. Error
#> (Intercept) 0.05507
#> Speciesversicolor 0.05720
#> Speciesvirginica 0.06811
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))1 0.06102
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))2 0.08139
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))3 0.08618
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))4 0.09927
#> t value
#> (Intercept) 3.355
#> Speciesversicolor 16.170
#> Speciesvirginica 22.348
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))1 0.978
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))2 2.692
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))3 4.088
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))4 3.892
#> Pr(>|t|)
#> (Intercept) 0.001016
#> Speciesversicolor < 2e-16
#> Speciesvirginica < 2e-16
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))1 0.329970
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))2 0.007951
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))3 7.22e-05
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))4 0.000152
#>
#> (Intercept) **
#> Speciesversicolor ***
#> Speciesvirginica ***
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))1
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))2 **
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))3 ***
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))4 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.19 on 143 degrees of freedom
#> Multiple R-squared: 0.9404, Adjusted R-squared: 0.9379
#> F-statistic: 375.9 on 6 and 143 DF, p-value: < 2.2e-16
# How is the estimated correlation matrix like?
cor.m <- cov2cor(vcov(res))
colnames(cor.m) <- NULL
round(cor.m, 3)
#> [,1]
#> (Intercept) 1.000
#> Speciesversicolor -0.024
#> Speciesvirginica -0.041
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))1 -0.857
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))2 -0.604
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))3 -0.618
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))4 -0.524
#> [,2]
#> (Intercept) -0.024
#> Speciesversicolor 1.000
#> Speciesvirginica 0.768
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))1 -0.112
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))2 -0.662
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))3 -0.615
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))4 -0.517
#> [,3]
#> (Intercept) -0.041
#> Speciesversicolor 0.768
#> Speciesvirginica 1.000
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))1 -0.059
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))2 -0.622
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))3 -0.659
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))4 -0.649
#> [,4]
#> (Intercept) -0.857
#> Speciesversicolor -0.112
#> Speciesvirginica -0.059
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))1 1.000
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))2 0.549
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))3 0.617
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))4 0.511
#> [,5]
#> (Intercept) -0.604
#> Speciesversicolor -0.662
#> Speciesvirginica -0.622
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))1 0.549
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))2 1.000
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))3 0.886
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))4 0.757
#> [,6]
#> (Intercept) -0.618
#> Speciesversicolor -0.615
#> Speciesvirginica -0.659
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))1 0.617
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))2 0.886
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))3 1.000
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))4 0.780
#> [,7]
#> (Intercept) -0.524
#> Speciesversicolor -0.517
#> Speciesvirginica -0.649
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))1 0.511
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))2 0.757
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))3 0.780
#> nsk(Sepal.Length, Boundary.knots = c(4.5, 7.5), knots = c(5, 6, 6.5))4 1.000
Note that the selection of the left boundary knot (in this example
4.5) should be the same as the one used on the estimates obtained from
literature. This is because the reference point (where the value of the
spline is set to zero for identifiability) in splines is generally the
left boundary knot. The spline parameter values correspond to the spline
function values at the internal (5, 6 and 6.5) and right boundary (7.5)
knots, when the nsk
splines are used. Other spline families
are not supported at the moment.
# "Literature-based" (LB) estimates:
lb.df <- read.table(text=
"variable, cov.value, est, est.var
Sepal.Length, 4.5, 0, 0
Sepal.Length, 5, 0.15, 0.01
Sepal.Length, 5.5, 0.25, 0.01
Sepal.Length, 6, 0.4, 0.01
Sepal.Length, 6.5, 0.5, 0.01
Sepal.Length, 8, 0.25, 0.04
", sep=",", header=TRUE)
lb.df
#> variable cov.value est est.var
#> 1 Sepal.Length 4.5 0.00 0.00
#> 2 Sepal.Length 5.0 0.15 0.01
#> 3 Sepal.Length 5.5 0.25 0.01
#> 4 Sepal.Length 6.0 0.40 0.01
#> 5 Sepal.Length 6.5 0.50 0.01
#> 6 Sepal.Length 8.0 0.25 0.04
# Output table with the point estimates and the estimated variances:
pool_splines(v="Sepal.Length", meta.df=lb.df, glm.res=res)
#> $pooled
#> stat V1 V2 V3 V4
#> 1 est.lb 0.140485675 0.390851632 0.503442966 0.375372517
#> 2 est.ipd 0.059649933 0.219106883 0.352341484 0.386343207
#> 3 var.lb 0.009617833 0.009609035 0.010122903 0.021873413
#> 4 var.ipd 0.003723764 0.006624838 0.007427437 0.009854545
#> 5 est.pool 0.082211943 0.289193741 0.416288778 0.382935765
#> 6 var.pool 0.002684427 0.003921326 0.004284089 0.006793773
#> 7 pval.Q 0.484026476 0.177675852 0.254044526 0.950889039
#>
#> attr(,"idx.param")
#> [1] 4 5 6 7
# Full output containing also spline estimates and covariance matrix:
pooled <- pool_splines(v="Sepal.Length", meta.df=lb.df, glm.res=res, full.output = TRUE)
str(pooled)
#> List of 4
#> $ pooled :'data.frame': 7 obs. of 5 variables:
#> ..$ stat: chr [1:7] "est.lb" "est.ipd" "var.lb" "var.ipd" ...
#> ..$ V1 : num [1:7] 0.14049 0.05965 0.00962 0.00372 0.08221 ...
#> ..$ V2 : num [1:7] 0.39085 0.21911 0.00961 0.00662 0.28919 ...
#> ..$ V3 : num [1:7] 0.50344 0.35234 0.01012 0.00743 0.41629 ...
#> ..$ V4 : num [1:7] 0.37537 0.38634 0.02187 0.00985 0.38294 ...
#> $ log.hr :'data.frame': 150 obs. of 5 variables:
#> ..$ model : chr [1:150] "ipd" "ipd" "ipd" "ipd" ...
#> ..$ cov.value: num [1:150] 4.5 4.57 4.64 4.71 4.79 ...
#> ..$ est : num [1:150] 0 0.00865 0.01729 0.02589 0.03444 ...
#> ..$ ci.low : num [1:150] 0 -0.0125 -0.0245 -0.0355 -0.045 ...
#> ..$ ci.upp : num [1:150] 0 0.0298 0.0591 0.0873 0.1138 ...
#> $ meta.df :'data.frame': 6 obs. of 4 variables:
#> ..$ variable : chr [1:6] "Sepal.Length" "Sepal.Length" "Sepal.Length" "Sepal.Length" ...
#> ..$ cov.value: num [1:6] 4.5 5 5.5 6 6.5 8
#> ..$ est : num [1:6] 0 0.15 0.25 0.4 0.5 0.25
#> ..$ est.var : num [1:6] 0 0.01 0.01 0.01 0.01 0.04
#> $ meta.vcov: num [1:4, 1:4] 0.00962 0.00528 0.00608 0.00741 0.00528 ...
The default output of the pool_splines
function is a
list with one element:
pooled
is a data frame containing the point estimates
and the estimated variances of the spline parameters of the internal and
right boundary knot. The IPD, LB and pooled estimates are included, and
also the p-value of the Q heterogeneity test.With the full.output = TRUE
option, more output is
included:
log.hr
contains the spline function estimates with 95%
confidence interval limits at a grid of covariate values, ranging
between the boundary knots. Also here the values are provided for the
IPD, LB and pooled estimates.meta.df
contains the LB estimates provided in the
meta.df
argument.meta.vcov
contains the estimated covariance matrix of
the pooled spline parameters.The LB estimates given in the lb.df
data frame above are
presented as green dots, and the corresponding 95% confidence intervals
as vertical dashed lines.
The IPD, LB and pooled point estimates of the spline functions are
the smooth curves, and the corresponding dashed lines are the 95%
confidence intervals. Note that the LB spline estimates are close to
those estimates reported in lb.df
, but in the points which
do not equal the knots of the spline, there are small differences. Note
also, that as the left boundary knot is at 4.5, the values of the
splines are zero, and the widths of the confidence intervals are
zero.
Here we can see that the pooled spline estimate is between the IPD estimate and the LB estimate, and the confidence interval is narrower than the confidence intervals based on the IPD or literature.
plot(0, 0, type="n", xlim=range(pooled$log.hr$cov.value), ylim=c(-0.2, 0.7),
xlab="Covariate value", ylab="")
col.v <- rainbow(3)
names(col.v) <- unique(pooled$log.hr$model)
for (i in unique(pooled$log.hr$model)) {
with(subset(pooled$log.hr, model==i), {
lines(cov.value, est, col=col.v[i], lwd=2)
lines(cov.value, ci.low, col=col.v[i], lty=2)
lines(cov.value, ci.upp, col=col.v[i], lty=2)
})
}
with(lb.df, {
points(cov.value, est, pch=15, col=col.v["lb"])
for (j in 2:nrow(lb.df))
lines(rep(cov.value[j], 2), est[j] + 1.96 * c(-1, 1) * sqrt(est.var[j]),
col=col.v["lb"], lty=2)
})
legend("bottomleft", legend=names(col.v), col=col.v, lwd=c(2,2,2))