lgspline {lgspline} | R Documentation |
Fit Lagrangian Multiplier Smoothing Splines
Description
A comprehensive software package for fitting a variant of smoothing splines as a constrained optimization problem, avoiding the need to algebraically disentangle a spline basis after fitting, and allowing for interpretable interactions and non-spline effects to be included.
lgspline
fits piecewise polynomial regression splines constrained to be smooth where
they meet, penalized by the squared, integrated, second-derivative of the
estimated function with respect to predictors.
The method of Lagrangian multipliers is used to enforce the following:
Equivalent fitted values at knots
Equivalent first derivatives at knots, with respect to predictors
Equivalent second derivatives at knots, with respect to predictors
The coefficients are penalized by an analytical form of the traditional cubic smoothing spline penalty, as well as tunable modifications that allow for unique penalization of multiple predictors and partitions.
This package supports model fitting for multiple spline and non-spline effects, GLM families, Weibull accelerated failure time (AFT) models, correlation structures, quadratic programming constraints, and extensive customization for user-defined models.
In addition, parallel processing capabilities and comprehensive tools for visualization, frequentist, and Bayesian inference are provided.
Usage
lgspline(predictors = NULL, y = NULL, formula = NULL, response = NULL,
standardize_response = TRUE, standardize_predictors_for_knots = TRUE,
standardize_expansions_for_fitting = TRUE, family = gaussian(),
glm_weight_function = function(mu, y, order_indices, family, dispersion,
observation_weights, ...) {
if(any(!is.null(observation_weights))){
family$variance(mu) * observation_weights
} else {
family$variance(mu)
}
}, shur_correction_function = function(X, y, B, dispersion, order_list, K,
family, observation_weights, ...) {
lapply(1:(K+1), function(k) 0)
}, need_dispersion_for_estimation = FALSE,
dispersion_function = function(mu, y, order_indices, family,
observation_weights, ...) { 1 },
K = NULL, custom_knots = NULL, cluster_on_indicators = FALSE,
make_partition_list = NULL, previously_tuned_penalties = NULL,
smoothing_spline_penalty = NULL, opt = TRUE, use_custom_bfgs = TRUE,
delta = NULL, tol = 10*sqrt(.Machine$double.eps),
invsoftplus_initial_wiggle = c(-25, 20, -15, -10, -5),
invsoftplus_initial_flat = c(-14, -7), wiggle_penalty = 2e-07,
flat_ridge_penalty = 0.5, unique_penalty_per_partition = TRUE,
unique_penalty_per_predictor = TRUE, meta_penalty = 1e-08,
predictor_penalties = NULL, partition_penalties = NULL,
include_quadratic_terms = TRUE, include_cubic_terms = TRUE,
include_quartic_terms = NULL, include_2way_interactions = TRUE,
include_3way_interactions = TRUE, include_quadratic_interactions = FALSE,
offset = c(), just_linear_with_interactions = NULL,
just_linear_without_interactions = NULL, exclude_interactions_for = NULL,
exclude_these_expansions = NULL, custom_basis_fxn = NULL,
include_constrain_fitted = TRUE, include_constrain_first_deriv = TRUE,
include_constrain_second_deriv = TRUE,
include_constrain_interactions = TRUE, cl = NULL, chunk_size = NULL,
parallel_eigen = TRUE, parallel_trace = FALSE, parallel_aga = FALSE,
parallel_matmult = FALSE, parallel_unconstrained = TRUE,
parallel_find_neighbors = FALSE, parallel_penalty = FALSE,
parallel_make_constraint = FALSE,
unconstrained_fit_fxn = unconstrained_fit_default,
keep_weighted_Lambda = FALSE, iterate_tune = TRUE,
iterate_final_fit = TRUE, blockfit = FALSE,
qp_score_function = function(X, y, mu, order_list, dispersion,
VhalfInv, observation_weights, ...) {
if(!is.null(observation_weights)) {
crossprod(X, cbind((y - mu)*observation_weights))
} else {
crossprod(X, cbind(y - mu))
}
}, qp_observations = NULL, qp_Amat = NULL, qp_bvec = NULL, qp_meq = 0,
qp_positive_derivative = FALSE, qp_negative_derivative = FALSE,
qp_monotonic_increase = FALSE, qp_monotonic_decrease = FALSE,
qp_range_upper = NULL, qp_range_lower = NULL, qp_Amat_fxn = NULL,
qp_bvec_fxn = NULL, qp_meq_fxn = NULL, constraint_values = cbind(),
constraint_vectors = cbind(), return_G = TRUE, return_Ghalf = TRUE,
return_U = TRUE, estimate_dispersion = TRUE, unbias_dispersion = NULL,
return_varcovmat = TRUE, custom_penalty_mat = NULL,
cluster_args = c(custom_centers = NA, nstart = 10),
dummy_dividor = 1.2345672152894e-22,
dummy_adder = 2.234567210529e-18, verbose = FALSE,
verbose_tune = FALSE, expansions_only = FALSE,
observation_weights = NULL, do_not_cluster_on_these = c(),
neighbor_tolerance = 1 + 1e-08, null_constraint = NULL,
critical_value = qnorm(1 - 0.05/2), data = NULL, weights = NULL,
no_intercept = FALSE, correlation_id = NULL, spacetime = NULL,
correlation_structure = NULL, VhalfInv = NULL, Vhalf = NULL,
VhalfInv_fxn = NULL, Vhalf_fxn = NULL, VhalfInv_par_init = c(),
REML_grad = NULL, custom_VhalfInv_loss = NULL, VhalfInv_logdet = NULL,
include_warnings = TRUE, ...)
Arguments
predictors |
Default: NULL. Numeric matrix or data frame of predictor variables. Supports direct matrix input or formula interface when used with 'data' argument. Must contain numeric predictors, with categorical variables pre-converted to numeric indicators. |
y |
Default: NULL. Numeric response variable vector representing the target/outcome/dependent variable etc. to be modeled. |
formula |
Default: NULL. Optional statistical formula for model specification, serving as an alternative to direct matrix input. Supports standard R formula syntax with special 'spl()' function for defining spline terms. |
response |
Default: NULL. Alternative name for response variable, providing compatibility with different naming conventions. Takes precedence only if 'y' is not supplied. |
standardize_response |
Default: TRUE. Logical indicator controlling whether the response variable should be centered by mean and scaled by standard deviation before model fitting. When TRUE, tends to improve numerical stability. Only offered for identity link functions (when family$link == 'identity') |
standardize_predictors_for_knots |
Default: TRUE. Logical flag determining whether predictor variables should be standardized before knot placement. Ensures consistent knot selection across different predictor scales, and that no one predictor dominates in terms of influence on knot placement. For all expansions (x), standardization corresponds to dividing by the difference in 69 and 31st percentiles = x / (quantile(x, 0.69) - quantile(x, 0.31)). |
standardize_expansions_for_fitting |
Default: TRUE. Logical switch to standardize polynomial basis expansions during model fitting. Provides computational stability during penalty tuning without affecting statistical inference, as design matrices, variance-covariance matrices, and coefficient estimates are systematically backtransformed after fitting to account for the standardization. If TRUE, |
family |
Default: gaussian(). Generalized linear model (GLM) distribution family specifying the error distribution and link function for model fitting. Defaults to Gaussian distribution with identity link. Supports custom family specifications, including user-defined link functions and optional custom tuning loss criteria. Minimally requires 1) family name (family) 2) link name (link) 3) linkfun (link function) 4) linkinv (link function inverse) 5) variance (mean variance relationship function, |
glm_weight_function |
Default: function that returns family$variance(mu) * observation_weights if weights exist, family$variance(mu) otherwise. Codes the mean-variance relationship of a GLM or GLM-like model, the diagonal |
shur_correction_function |
Default: function that returns list of zeros. Advanced function for computing Schur complements |
need_dispersion_for_estimation |
Default: FALSE. Logical indicator specifying whether a dispersion parameter is required for coefficient estimation. This is not needed for canonical regular exponential family models, but is often needed otherwise (such as fitting Weibull AFT models). |
dispersion_function |
Default: function that returns 1. Custom function for estimating the dispersion parameter. Unless |
K |
Default: NULL. Integer specifying the number of knot locations for spline partitions. This can intuitively be considered the total number of partitions - 1. |
custom_knots |
Default: NULL. Optional matrix providing user-specified knot locations in 1-D. |
cluster_on_indicators |
Default: FALSE. Logical flag determining whether indicator variables should be used for clustering knot locations. |
make_partition_list |
Default: NULL. Optional list allowing direct specification of custom partition assignments. The intent is that the make_partition_list returned by one model can be supplied here to keep the same knot locations for another. |
previously_tuned_penalties |
Default: NULL. Optional list of pre-computed penalty components from previous model fits. |
smoothing_spline_penalty |
Default: NULL. Optional custom smoothing spline penalty matrix for fine-tuned complexity control. |
opt |
Default: TRUE. Logical switch controlling whether model penalties should be automatically optimized via generalized cross-validation. Turn this off if previously_tuned_penalties are supplied AND desired, otherwise, the previously_tuned_penalties will be ignored. |
use_custom_bfgs |
Default: TRUE. Logical indicator selecting between a native implementation of damped-BFGS optimization method with analytical gradients or base R's BFGS implementation with finite-difference approximation of gradients. |
delta |
Default: NULL. Numeric pseudocount used for stabilizing optimization in non-identity link function scenarios. |
tol |
Default: 10*sqrt(.Machine$double.eps). Numeric convergence tolerance controlling the precision of optimization algorithms. |
invsoftplus_initial_wiggle |
Default: c(-25, 20, -15, -10, -5). Numeric vector of initial grid points for wiggle penalty optimization, specified on the inverse-softplus ( |
invsoftplus_initial_flat |
Default: c(-7, 0). Numeric vector of initial grid points for ridge penalty optimization, specified on the inverse-softplus ( |
wiggle_penalty |
Default: 2e-7. Numeric penalty controlling the integrated squared second derivative, governing function smoothness. Applied to both smoothing spline penalty (alone) and is multiplied by |
flat_ridge_penalty |
Default: 0.5. Numeric flat ridge penalty for additional regularization on only intercepts and linear terms (won't affect interactions or quadratic/cubic/quartic terms by default). If |
unique_penalty_per_partition |
Default: TRUE. Logical flag allowing the magnitude of the smoothing spline penalty to differ across partition. |
unique_penalty_per_predictor |
Default: TRUE. Logical flag allowing the magnitude of the smoothing spline penalty to differ between predictors. |
meta_penalty |
Default: 1e-8. Numeric "meta-penalty" applied to predictor and partition penalties during tuning. The minimization of GCV is modified to be a penalized minimization problem, with penalty |
predictor_penalties |
Default: NULL. Optional list of custom penalties specified per predictor. |
partition_penalties |
Default: NULL. Optional list of custom penalties specified per partition. |
include_quadratic_terms |
Default: TRUE. Logical switch to include squared predictor terms in basis expansions. |
include_cubic_terms |
Default: TRUE. Logical switch to include cubic predictor terms in basis expansions. |
include_quartic_terms |
Default: NULL. Logical switch to include quartic predictor terms in basis expansions. This is highly recommended for fitting models with multiple predictors to avoid over-specified constraints. When NULL (by default), will internally set to FALSE if only one predictor present, and TRUE otherwise. |
include_2way_interactions |
Default: TRUE. Logical switch to include linear two-way interactions between predictors. |
include_3way_interactions |
Default: TRUE. Logical switch to include three-way interactions between predictors. |
include_quadratic_interactions |
Default: FALSE. Logical switch to include linear-quadratic interaction terms. |
offset |
Default: Empty vector. When non-missing, this is a vector of column indices/names to include as offsets. |
just_linear_with_interactions |
Default: NULL. Integer vector specifying columns to retain linear terms with interactions. |
just_linear_without_interactions |
Default: NULL. Integer vector specifying columns to retain only linear terms without interactions. |
exclude_interactions_for |
Default: NULL. Integer vector indicating columns to exclude from all interaction terms. |
exclude_these_expansions |
Default: NULL. Character vector specifying basis expansions to be excluded from the model. These must be named columns of the data, or in the form "_1_", "_2_", "_1_x_2_", "_2_^2" etc. where "1" and "2" indicate column indices of predictor matrix input. |
custom_basis_fxn |
Default: NULL. Optional user-defined function for generating custom basis expansions. See |
include_constrain_fitted |
Default: TRUE. Logical switch to constrain fitted values at knot points. |
include_constrain_first_deriv |
Default: TRUE. Logical switch to constrain first derivatives at knot points. |
include_constrain_second_deriv |
Default: TRUE. Logical switch to constrain second derivatives at knot points. |
include_constrain_interactions |
Default: TRUE. Logical switch to constrain interaction terms at knot points. |
cl |
Default: NULL. Parallel processing cluster object for distributed computation (use |
chunk_size |
Default: NULL. Integer specifying custom fixed chunk size for parallel processing. |
parallel_eigen |
Default: TRUE. Logical flag to enable parallel processing for eigenvalue decomposition computations. |
parallel_trace |
Default: FALSE. Logical flag to enable parallel processing for trace computation. |
parallel_aga |
Default: FALSE. Logical flag to enable parallel processing for specific matrix operations involving G and A. |
parallel_matmult |
Default: FALSE. Logical flag to enable parallel processing for block-diagonal matrix multiplication. |
parallel_unconstrained |
Default: TRUE. Logical flag to enable parallel processing for unconstrained maximum likelihood estimation. |
parallel_find_neighbors |
Default: FALSE. Logical flag to enable parallel processing for neighbor identification (which partitions are neighbors). |
parallel_penalty |
Default: FALSE. Logical flag to enable parallel processing for penalty matrix construction. |
parallel_make_constraint |
Default: FALSE. Logical flag to enable parallel processing for constraint matrix generation. |
unconstrained_fit_fxn |
Default: |
keep_weighted_Lambda |
Default: FALSE. Logical flag to retain generalized linear model weights in penalty constraints using Tikhonov parameterization. It is advised to turn this to TRUE when fitting non-canonical GLMs. The default |
iterate_tune |
Default: TRUE. Logical switch to use iterative optimization during penalty tuning. If FALSE, |
iterate_final_fit |
Default: TRUE. Logical switch to use iterative optimization for final model fitting. If FALSE, |
blockfit |
Default: FALSE. Logical switch to abandon per-partition fitting for non-spline effects without interactions, collapse all matrices into block-diagonal single-matrix form, and fit agnostic to partition. This would be more efficient for many non-spline effects without interactions and relatively few spline effects or non-spline effects with interactions. Ignored if |
qp_score_function |
Default: |
qp_observations |
Default: NULL. Numeric vector of observations to apply constraints to for monotonic and range quadratic programming constraints. Useful for saving computational resources. |
qp_Amat |
Default: NULL. Constraint matrix for quadratic programming formulation. The |
qp_bvec |
Default: NULL. Constraint vector for quadratic programming formulation. The |
qp_meq |
Default: 0. Number of equality constraints in quadratic programming setup. The |
qp_positive_derivative , qp_monotonic_increase |
Default: FALSE. Logical flags to constrain the function to have positive first derivatives/be monotonically increasing using quadratic programming with respect to the order (ascending rows) of the input data set. |
qp_negative_derivative , qp_monotonic_decrease |
Default: FALSE. Logical flags to constrain the function to have negative first derivatives/be monotonically decreasing using quadratic programming with respect to the order (ascending rows) of the input data set. |
qp_range_upper |
Default: NULL. Numeric upper bound for constrained fitted values using quadratic programming. |
qp_range_lower |
Default: NULL. Numeric lower bound for constrained fitted values using quadratic programming. |
qp_Amat_fxn |
Default: NULL. Custom function for generating Amat matrix in quadratic programming. |
qp_bvec_fxn |
Default: NULL. Custom function for generating bvec vector in quadratic programming. |
qp_meq_fxn |
Default: NULL. Custom function for determining meq equality constraints in quadratic programming. |
constraint_values |
Default: |
constraint_vectors |
Default: |
return_G |
Default: TRUE. Logical switch to return the unscaled variance-covariance matrix without smoothing constraints ( |
return_Ghalf |
Default: TRUE. Logical switch to return the matrix square root of the unscaled variance-covariance matrix without smoothing constraints ( |
return_U |
Default: TRUE. Logical switch to return the constraint projection matrix |
estimate_dispersion |
Default: TRUE. Logical flag to estimate the dispersion parameter after fitting. |
unbias_dispersion |
Default NULL. Logical switch to multiply final dispersion estimates by |
return_varcovmat |
Default: TRUE. Logical switch to return the variance-covariance matrix of the estimated coefficients. This is needed for performing Wald inference. |
custom_penalty_mat |
Default: NULL. Optional |
cluster_args |
Default: |
dummy_dividor |
Default: 0.00000000000000000000012345672152894. Small numeric constant to prevent division by zero in computational routines. |
dummy_adder |
Default: 0.000000000000000002234567210529. Small numeric constant to prevent division by zero in computational routines. |
verbose |
Default: FALSE. Logical flag to print general progress messages during model fitting (does not include during tuning). |
verbose_tune |
Default: FALSE. Logical flag to print detailed progress messages during penalty tuning specifically. |
expansions_only |
Default: FALSE. Logical switch to return only basis expansions without full model fitting. Useful for setting up custom constraints and penalties. |
observation_weights |
Default: NULL. Numeric vector of observation-specific weights for generalized least squares estimation. |
do_not_cluster_on_these |
Default: c(). Vector specifying predictor columns to exclude from clustering procedures, in addition to the non-spline effects by default. |
neighbor_tolerance |
Default: 1 + 1e-8. Numeric tolerance for determining neighboring partitions using k-means clustering. Greater values means more partitions are likely to be considered neighbors. Intended for internal use only (modify at your own risk!). |
null_constraint |
Default: NULL. Alternative parameterization of constraint values. |
critical_value |
Default: |
data |
Default: NULL. Optional data frame providing context for formula-based model specification. |
weights |
Default: NULL. Alternative name for observation weights, maintained for interface compatibility. |
no_intercept |
Default: FALSE. Logical flag to remove intercept, constraining it to 0. The function automatically constructs constraint_vectors and constraint_values to achieve this. Calling formulas with a "0+" in it like |
correlation_id , spacetime |
Default: NULL. N-length vector and N-row matrix of cluster (or subject, group etc.) ids and longitudinal/spatial variables respectively, whereby observations within each grouping of |
correlation_structure |
Default: NULL. Native implementations of popular variance-covariance structures. Offers options for "exchangeable", "spatial-exponential", "squared-exponential", "ar(1)", "spherical", "gaussian-cosine", "gamma-cosine", and "matern", along with their aliases. The eponymous correlation structure is fit along with coefficients and dispersion, with correlation estimated using a REML objective. See section "Correlation Structure Estimation" for more details. |
VhalfInv |
Default: NULL. Matrix representing a fixed, custom square-root-inverse covariance structure for the response variable of longitudinal and spatial modeling. Must be an |
Vhalf |
Default: NULL. Matrix representing a fixed, custom square-root covariance structure for the response variable of longitudinal and spatial modeling. Must be an |
VhalfInv_fxn |
Default: NULL. Function for parametric modeling of the covariance structure |
Vhalf_fxn |
Default: NULL. Function for efficient computation of |
VhalfInv_par_init |
Default: c(). Numeric vector of initial parameter values for |
REML_grad |
Default: NULL. Function for evaluating the gradient of the objective function (negative REML or custom loss) with respect to the parameters of |
custom_VhalfInv_loss |
Default: NULL. Alternative to negative REML for serving as the objective function for optimizing correlation parameters. Must take the same "par" argument as |
VhalfInv_logdet |
Default: NULL. Function for efficient computation of |
include_warnings |
Default: TRUE. Logical switch to control display of warning messages during model fitting. |
... |
Additional arguments passed to the unconstrained model fitting function. |
Details
A flexible and interpretable implementation of smoothing splines including:
Multiple predictors and interaction terms
Various GLM families and link functions
Correlation structures for longitudinal/clustered data
Shape constraints via quadratic programming
Parallel computation for large datasets
Comprehensive inference tools
Value
A list containing the following components:
- y
Original response vector.
- ytilde
Fitted/predicted values on the scale of the response.
- X
List of design matrices
\textbf{X}_k
for each partition k, containing basis expansions including intercept, linear, quadratic, cubic, and interaction terms as specified.- A
Constraint matrix
\textbf{A}
encoding smoothness constraints at knot points and any user-specified linear constraints.- B
List of fitted coefficients
\boldsymbol{\beta}_k
for each partition k on the original, unstandardized scale of the predictors and response.- B_raw
List of fitted coefficients for each partition on the predictor-and-response standardized scale.
- K
Number of interior knots with one predictor (number of partitions minus 1 with > 1 predictor).
- p
Number of basis expansions of predictors per partition.
- q
Number of predictor variables.
- P
Total number of coefficients (
p \times (K+1)
).- N
Number of observations.
- penalties
List containing optimized penalty matrices and components:
Lambda: Combined penalty matrix (
\boldsymbol{\Lambda}
), includes\textbf{L}_{\text{predictor\_list}}
contributions but not\textbf{L}_{\text{partition\_list}}
.L1: Smoothing spline penalty matrix (
\textbf{L}_1
).L2: Ridge penalty matrix (
\textbf{L}_2
).L predictor list: Predictor-specific penalty matrices (
\textbf{L}_{\text{predictor\_list}}
).L partition list: Partition-specific penalty matrices (
\textbf{L}_{\text{partition\_list}}
).
- knot_scale_transf
Function for transforming predictors to standardized scale used for knot placement.
- knot_scale_inv_transf
Function for transforming standardized predictors back to original scale.
- knots
Matrix of knot locations on original unstandarized predictor scale for one predictor.
- partition_codes
Vector assigning observations to partitions.
- partition_bounds
Vector or matrix specifying the boundaries between partitions.
- knot_expand_function
Internal function for expanding data according to partition structure.
- predict
Function for generating predictions on new data.
- assign_partition
Function for assigning new observations to partitions.
- family
GLM family object specifying the error distribution and link function.
- estimate_dispersion
Logical indicating whether dispersion parameter was estimated.
- unbias_dispersion
Logical indicating whether dispersion estimates should be unbiased.
- backtransform_coefficients
Function for converting standardized coefficients to original scale.
- forwtransform_coefficients
Function for converting coefficients to standardized scale.
- mean_y, sd_y
Mean and standard deviation of response if standardized.
- og_order
Original ordering of observations before partitioning.
- order_list
List containing observation indices for each partition.
- constraint_values, constraint_vectors
Matrices specifying linear equality constraints if provided.
- make_partition_list
List containing partition information for > 1-D cases.
- expansion_scales
Vector of scaling factors used for standardizing basis expansions.
- take_derivative, take_interaction_2ndderivative
Functions for computing derivatives of basis expansions.
- get_all_derivatives_insample
Function for computing all derivatives on training data.
- numerics
Indices of numeric predictors used in basis expansions.
- power1_cols, power2_cols, power3_cols, power4_cols
Column indices for linear through quartic terms.
- quad_cols
Column indices for all quadratic terms (including interactions).
- interaction_single_cols, interaction_quad_cols
Column indices for linear-linear and linear-quadratic interactions.
- triplet_cols
Column indices for three-way interactions.
- nonspline_cols
Column indices for terms excluded from spline expansion.
- return_varcovmat
Logical indicating whether variance-covariance matrix was computed.
- raw_expansion_names
Names of basis expansion terms.
- std_X, unstd_X
Functions for standardizing/unstandardizing design matrices.
- parallel_cluster_supplied
Logical indicating whether a parallel cluster was supplied.
- weights
List of observation weights per partition.
- G
List of unscaled unconstrained variance-covariance matrices
\textbf{G}_k
per partition k ifreturn_G=TRUE
. Computed as(\textbf{X}_k^T\textbf{X}_k + \boldsymbol{\Lambda}_\text{eff})^{-1}
for partition k.- Ghalf
List of
\textbf{G}_k^{1/2}
matrices ifreturn_Ghalf=TRUE
.- U
Constraint projection matrix
\textbf{U}
ifreturn_U=TRUE
. For K=0 and no constraints, returns identity. Otherwise, returns\textbf{U} = \textbf{I} - \textbf{G}\textbf{A}(\textbf{A}^T\textbf{G}\textbf{A})^{-1}\textbf{A}^T
. Used for computing the variance-covariance matrix\sigma^2 \textbf{U}\textbf{G}
.- sigmasq_tilde
Estimated (or fixed) dispersion parameter
\tilde{\sigma}^2
.- trace_XUGX
Effective degrees of freedom (
\text{trace}(\textbf{X}\textbf{U}\textbf{G}\textbf{X}^{T})
).- varcovmat
Variance-covariance matrix of coefficient estimates if
return_varcovmat=TRUE
.- VhalfInv
The
\textbf{V}^{-1/2}
matrix used for implementing correlation structures, if specified.- VhalfInv_fxn, Vhalf_fxn, VhalfInv_logdet, REML_grad
Functions for generating
\textbf{V}^{-1/2}
,\textbf{V}^{1/2}
,\log|\textbf{V}^{-1/2}|
, and gradient of REML if provided.- VhalfInv_params_estimates
Vector of estimated correlation parameters when using
VhalfInv_fxn
.- VhalfInv_params_vcov
Approximate variance-covariance matrix of estimated correlation parameters from BFGS optimization.
- wald_univariate
Function for computing univariate Wald statistics and confidence intervals.
- critical_value
Critical value used for confidence interval construction.
- generate_posterior
Function for drawing from the posterior distribution of coefficients.
- find_extremum
Function for optimizing the fitted function.
- plot
Function for visualizing fitted curves.
- quadprog_list
List containing quadratic programming components if applicable.
When expansions_only=TRUE
is used, a reduced list is returned containing only the following prior to any fitting or tuning:
- X
Design matrices
\textbf{X}_k
- y
Response vectors
\textbf{y}_k
- A
Constraint matrix
\textbf{A}
- penalties
Penalty matrices
- order_list, og_order
Ordering information
- expansion_scales, colnm_expansions
Scaling and naming information
- K, knots
Knot information
- make_partition_list, partition_codes, partition_bounds
Partition information
- constraint_vectors, constraint_values
Constraint information
- quadprog_list
Quadratic programming components if applicable
The returned object has class "lgspline" and provides comprehensive tools for
model interpretation, inference, prediction, and visualization. All
coefficients and predictions can be transformed between standardized and
original scales using the provided transformation functions. The object includes
both frequentist and Bayesian inference capabilities through Wald statistics
and posterior sampling. Advanced customization options are available for
analyzing arbitrarily complex study designs.
See Details
for descriptions of the model fitting process.
See Also
-
solve.QP
for quadratic programming optimization -
plot_ly
for interactive plotting -
kmeans
for k-means clustering -
optim
for general purpose optimization routines
Examples
## ## ## ## Simple Examples ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
## Simulate some data, fit using default settings, and plot
set.seed(1234)
t <- runif(2500, -10, 10)
y <- 2*sin(t) + -0.06*t^2 + rnorm(length(t))
model_fit <- lgspline(t, y)
plot(t, y, main = 'Observed Data vs. Fitted Function, Colored by Partition')
plot(model_fit, add = TRUE)
## Repeat using logistic regression, with univariate inference shown
# and alternative function call
y <- rbinom(length(y), 1, 1/(1+exp(-std(y))))
df <- data.frame(t = t, y = y)
model_fit <- lgspline(y ~ spl(t),
df,
opt = FALSE, # no tuning penalties
family = quasibinomial())
plot(t, y, main = 'Observed Data vs Fitted Function with Formulas and Derivatives',
ylim = c(-0.5, 1.05), cex.main = 0.8)
plot(model_fit,
show_formulas = TRUE,
text_size_formula = 0.65,
legend_pos = 'bottomleft',
legend_args = list(y.intersp = 1.1),
add = TRUE)
## Notice how the coefficients match the formula, and expansions are
# homogenous across partitions without reparameterization
print(summary(model_fit))
## Overlay first and second derivatives of fitted function respectively
derivs <- predict(model_fit,
new_predictors = sort(t),
take_first_derivatives = TRUE,
take_second_derivatives = TRUE)
points(sort(t), derivs$first_deriv, col = 'gold', type = 'l')
points(sort(t), derivs$second_deriv, col = 'goldenrod', type = 'l')
legend('bottomright',
col = c('gold','goldenrod'),
lty = 1,
legend = c('First Derivative', 'Second Derivative'))
## Simple 2D example - including a non-spline effect
z <- seq(-2, 2, length.out = length(y))
df <- data.frame(Predictor1 = t,
Predictor2 = z,
Response = sin(y)+0.1*z)
model_fit <- lgspline(Response ~ spl(Predictor1) + Predictor1*Predictor2,
df)
## Notice, while spline effects change over partitions,
# interactions and non-spline effects are constrained to remain the same
coefficients <- Reduce('cbind', coef(model_fit))
colnames(coefficients) <- paste0('Partition ', 1:(model_fit$K+1))
print(coefficients)
## One or two variables can be selected for plotting at a time
# even when >= 3 predictors are present
plot(model_fit,
custom_title = 'Marginal Relationship of Predictor 1 and Response',
vars = 'Predictor1',
custom_response_lab = 'Response',
show_formulas = TRUE,
legend_pos = 'bottomright',
digits = 4,
text_size_formula = 0.5)
## 3D plots are implemented as well, retaining analytical formulas
my_plot <- plot(model_fit,
show_formulas = TRUE,
custom_response_lab = 'Response')
my_plot
## ## ## ## More Detailed 1D Example ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
## 1D data generating functions
t <- seq(-9, 9, length.out = 1000)
slinky <- function(x) {
(50 * cos(x * 2) -2 * x^2 + (0.25 * x)^4 + 80)
}
coil <- function(x) {
(100 * cos(x * 2) +-1.5 * x^2 + (0.1 * x)^4 +
(0.05 * x^3) + (-0.01 * x^5) +
(0.00002 * x^6) -(0.000001 * x^7) + 100)
}
exponential_log <- function(x) {
unlist(c(sapply(x, function(xx) {
if (xx <= 1) {
100 * (exp(xx) - exp(1))
} else {
100 * (log(xx))
}
})))
}
scaled_abs_gamma <- function(x) {
2*sqrt(gamma(abs(x)))
}
## Composite function
fxn <- function(x)(slinky(t) +
coil(t) +
exponential_log(t) +
scaled_abs_gamma(t))
## Bind together with random noise
dat <- cbind(t, fxn(t) + rnorm(length(t), 0, 50))
colnames(dat) <- c('t', 'y')
x <- dat[,'t']
y <- dat[,'y']
## Fit Model, 4 equivalent ways are shown below
model_fit <- lgspline(t, y)
model_fit <- lgspline(y ~ spl(t), as.data.frame(dat))
model_fit <- lgspline(response = y, predictors = t)
model_fit <- lgspline(data = as.data.frame(dat), formula = y ~ .)
# This is not valid: lgspline(y ~ ., t)
# This is not valid: lgspline(y, data = as.data.frame(dat))
# Do not put operations in formulas, not valid: lgspline(y ~ log(t) + spl(t))
## Basic Functionality
predict(model_fit, new_predictors = rnorm(1)) # make prediction on new data
head(leave_one_out(model_fit)) # leave-one-out cross-validated predictions
coef(model_fit) # extract coefficients
summary(model_fit) # model information and Wald inference
generate_posterior(model_fit) # generate draws of parameters from posterior distribution
find_extremum(model_fit, minimize = TRUE) # find the minimum of the fitted function
## Incorporate range constraints, custom knots, keep penalization identical
# across partitions
model_fit <- lgspline(y ~ spl(t),
unique_penalty_per_partition = FALSE,
custom_knots = cbind(c(-2, -1, 0, 1, 2)),
data = data.frame(t = t, y = y),
qp_range_lower = -150,
qp_range_upper = 150)
## Plotting the constraints and knots
plot(model_fit,
custom_title = 'Fitted Function Constrained to Lie Between (-150, 150)',
cex.main = 0.75)
# knot locations
abline(v = model_fit$knots)
# lower bound from quadratic program
abline(h = -150, lty = 2)
# upper bound from quadratic program
abline(h = 150, lty = 2)
# observed data
points(t, y, cex = 0.24)
## Enforce monotonic increasing constraints on fitted values
# K = 4 => 5 partitions
t <- seq(-10, 10, length.out = 100)
y <- 5*sin(t) + t + 2*rnorm(length(t))
model_fit <- lgspline(t,
y,
K = 4,
qp_monotonic_increase = TRUE)
plot(t, y, main = 'Monotonic Increasing Function with Respect to Fitted Values')
plot(model_fit,
add = TRUE,
show_formulas = TRUE,
legend_pos = 'bottomright',
custom_predictor_lab = 't',
custom_response_lab = 'y')
## ## ## ## 2D Example using Volcano Dataset ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
## Prep
data('volcano')
volcano_long <-
Reduce('rbind', lapply(1:nrow(volcano), function(i){
t(sapply(1:ncol(volcano), function(j){
c(i, j, volcano[i,j])
}))
}))
colnames(volcano_long) <- c('Length', 'Width', 'Height')
## Fit, with 50 partitions
# When fitting with > 1 predictor and large K, including quartic terms
# is highly recommended, and/or dropping the second-derivative constraint.
# Otherwise, the constraints can impose all partitions to be equal, with one
# cubic function fit for all (there isn't enough degrees of freedom to fit
# unique cubic functions due to the massive amount of constraints).
# Below, quartic terms are included and the constraint of second-derivative
# smoothness at knots is ignored.
model_fit <- lgspline(volcano_long[,c(1, 2)],
volcano_long[,3],
include_quadratic_interactions = TRUE,
K = 49,
opt = FALSE,
return_U = FALSE,
return_varcov = FALSE,
estimate_variance = TRUE,
return_Ghalf = FALSE,
return_G = FALSE,
include_constrain_second_deriv = FALSE,
unique_penalty_per_predictor = FALSE,
unique_penalty_per_partition = FALSE,
wiggle_penalty = 1e-5, # the fixed wiggle penalty
flat_ridge_penalty = 1e-4) # the ridge penalty
## Plotting on new data with interactive visual + formulas
new_input <- expand.grid(seq(min(volcano_long[,1]),
max(volcano_long[,1]),
length.out = 250),
seq(min(volcano_long[,2]),
max(volcano_long[,2]),
length.out = 250))
model_fit$plot(new_predictors = new_input,
show_formulas = TRUE,
custom_response_lab = "Height",
custom_title = 'Volcano 3-D Map',
digits = 2)
## ## ## ## Advanced Techniques using Trees Dataset ## ## ## ## ## ## ## ## ## ## ## ## ##
## Goal here is to introduce how lgspline works with non-canonical GLMs and
# demonstrate some custom features
data('trees')
## L1-regularization constraint function on standardized coefficients
# Bound all coefficients to be less than a certain value (l1_bound) in absolute
# magnitude such that | B^{(j)}_k | < lambda for all j = 1....p coefficients,
# and k = 1...K+1 partitions.
l1_constraint_matrix <- function(p, K) {
## Total number of coefficients
P <- p * (K + 1)
## Create diagonal matrices for L1 constraint
# First matrix: lamdba > -bound
# Second matrix: -lambda > -bound
first_diag <- diag(P)
second_diag <- -diag(P)
## Combine matrices
l1_Amat <- cbind(first_diag, second_diag)
return(l1_Amat)
}
## Bounds absolute value of coefficients to be < l1_bound
l1_bound_vector <- function(qp_Amat,
scales,
l1_bound) {
## Combine matrices
l1_bvec <- rep(-l1_bound, ncol(qp_Amat)) * c(1, scales)
return(l1_bvec)
}
## Fit model, using predictor-response formulation, assuming
# Gamma-distributed response, and custom quadratic-programming constraints,
# with qp_score_function/glm_weight_function updated for non-canonical GLMs
# as well as quartic terms, keeping the effect of height constant across
# partitions, and 3 partitions in total. Hence, this is an advanced-usage
# case.
# You can modify this code for performing l1-regularization in general.
# For canonical GLMs, the default qp_score_function/glm_weight_function are
# correct and do not need to be changed
# (custom functionality is not needed for canonical GLMs).
model_fit <- lgspline(
Volume ~ spl(Girth) + Height*Girth,
data = with(trees, cbind(Girth, Height, Volume)),
family = Gamma(link = 'log'),
keep_weighted_Lambda = TRUE,
glm_weight_function = function(
mu,
y,
order_indices,
family,
dispersion,
observation_weights,
...){
rep(1/dispersion, length(y))
},
dispersion_function = function(
mu,
y,
order_indices,
family,
observation_weights,
...){
mean(
mu^2/((y-mu)^2)
)
}, # = biased estimate of 1/shape parameter
need_dispersion_for_estimation = TRUE,
unbias_dispersion = TRUE, # multiply dispersion by N/(N-trace(XUGX^{T}))
K = 2, # 3 partitions
opt = FALSE, # keep penalties fixed
unique_penalty_per_partition = FALSE,
unique_penalty_per_predictor = FALSE,
flat_ridge_penalty = 1e-64,
wiggle_penalty = 1e-64,
qp_score_function = function(X, y, mu, order_list, dispersion, VhalfInv,
observation_weights, ...){
t(X) %**% diag(c(1/mu * 1/dispersion)) %**% cbind(y - mu)
}, # updated score for gamma regression with log link
qp_Amat_fxn = function(N, p, K, X, colnm, scales, deriv_fxn, ...) {
l1_constraint_matrix(p, K)
},
qp_bvec_fxn = function(qp_Amat, N, p, K, X, colnm, scales, deriv_fxn, ...) {
l1_bound_vector(qp_Amat, scales, 25)
},
qp_meq_fxn = function(qp_Amat, N, p, K, X, colnm, scales, deriv_fxn, ...) 0
)
## Notice, interaction effect is constant across partitions as is the effect
# of Height alone
Reduce('cbind', coef(model_fit))
print(summary(model_fit))
## Plot results
plot(model_fit, custom_predictor_lab1 = 'Girth',
custom_predictor_lab2 = 'Height',
custom_response_lab = 'Volume',
custom_title = 'Girth and Height Predicting Volume of Trees',
show_formulas = TRUE)
## Verify magnitude of unstandardized coefficients does not exceed bound (25)
print(max(abs(unlist(model_fit$B))))
## Find height and girth where tree volume is closest to 42
# Uses custom objective that minimizes MSE discrepancy between predicted
# value and 42.
# The vanilla find_extremum function can be thought of as
# using "function(mu)mu" aka the identity function as the
# objective, where mu = "f(t)", our estimated function. The derivative is then
# d_mu = "f'(t)" with respect to predictors t.
# But with more creative objectives, and since we have machinery for
# f'(t) already available, we can compute gradients for (and optimize)
# arbitrary differentiable functions of our predictors too.
# For any objective, differentiate w.r.t. to mu, then multiply by d_mu to
# satisfy chain rule.
# Here, we have objective function: 0.5*(mu-42)^2
# and gradient : (mu-42)*d_mu
# and L-BFGS-B will be used to find the height and girth that most closely
# yields a prediction of 42 within the bounds of the observed data.
# The d_mu also takes into account link function transforms automatically
# for most common link functions, and will return warning + instructions
# on how to program the link-function derivatives otherwise.
## Custom acquisition functions for Bayesian optimization could be coded here.
find_extremum(
model_fit,
minimize = TRUE,
custom_objective_function = function(mu, sigma, ybest, ...){
0.5*(mu - 42)^2
},
custom_objective_derivative = function(mu, sigma, ybest, d_mu, ...){
(mu - 42) * d_mu
}
)
## ## ## ## How to Use Formulas in lgspline ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
## Demonstrates splines with multiple mixed predictors and interactions
## Generate data
n <- 2500
x <- rnorm(n)
y <- rnorm(n)
z <- sin(x)*mean(abs(y))/2
## Categorical predictors
cat1 <- rbinom(n, 1, 0.5)
cat2 <- rbinom(n, 1, 0.5)
cat3 <- rbinom(n, 1, 0.5)
## Response with mix of effects
response <- y + z + 0.1*(2*cat1 - 1)
## Continuous predictors re-named
continuous1 <- x
continuous2 <- z
## Combine data
dat <- data.frame(
response = response,
continuous1 = continuous1,
continuous2 = continuous2,
cat1 = cat1,
cat2 = cat2,
cat3 = cat3
)
## Example 1: Basic Model with Default Terms, No Intercept
# standardize_response = FALSE often needed when constraining intercepts to 0
fit1 <- lgspline(
formula = response ~ 0 + spl(continuous1, continuous2) +
cat1*cat2*continuous1 + cat3,
K = 2,
standardize_response = FALSE,
data = dat
)
## Examine coefficients included
rownames(fit1$B$partition1)
## Verify intercept term is near 0 up to some numeric tolerance
abs(fit1$B[[1]][1]) < 1e-8
## Example 2: Similar Model with Intercept, Other Terms Excluded
fit2 <- lgspline(
formula = response ~ spl(continuous1, continuous2) +
cat1*cat2*continuous1 + cat3,
K = 1,
standardize_response = FALSE,
include_cubic_terms = FALSE,
exclude_these_expansions = c( # Not all need to actually be present
'_batman_x_robin_',
'_3_x_4_', # no cat1 x cat2 interaction, coded using column indices
'continuous1xcontinuous2', # no continuous1 x continuous2 interaction
'thejoker'
),
data = dat
)
## Examine coefficients included
rownames(Reduce('cbind',coef(fit2)))
# Intercept will probably be present and non-0 now
abs(fit2$B[[1]][1]) < 1e-8
## ## ## ## Compare Inference to survreg for Weibull AFT Model Validation ##
# Only linear predictors, no knots, no penalties, using Weibull AFT Model
# The goal here is to ensure that for the special case of no spline effects
# and no knots, this implementation will be consistent with other model
# implementations.
# Also note, that when using models (like Weibull AFT) where dispersion is
# being estimated and is required for estimating beta coefficients,
# we use a shur complement correction function to adjust (or "correct") our
# variance-covariance matrix for both estimation and inference to account for
# uncertainty in estimating the dispersion.
# Typically the shur_correction_function would return a negative-definite
# matrix, as it's output is elementwise added to the information matrix prior
# to inversion.
require(survival)
df <- data.frame(na.omit(
pbc[,c('time','trt','stage','hepato','bili','age','status')]
))
## Weibull AFT using lgspline, showing how some custom options can be used to
# fit more complicated models
model_fit <- lgspline(time ~ trt + stage + hepato + bili + age,
df,
family = weibull_family(),
need_dispersion_for_estimation = TRUE,
dispersion_function = weibull_dispersion_function,
glm_weight_function = weibull_glm_weight_function,
shur_correction_function = weibull_shur_correction,
unconstrained_fit_fxn = unconstrained_fit_weibull,
opt = FALSE,
wiggle_penalty = 0,
flat_ridge_penalty = 0,
K = 0,
status = pbc$status!=0)
print(summary(model_fit))
## Survreg results match closely on estimates and inference for coefficients
survreg_fit <- survreg(Surv(time, status!=0) ~ trt + stage + hepato + bili + age,
df)
print(summary(survreg_fit))
## sigmasq_tilde = scale^2 of survreg
print(c(sqrt(model_fit$sigmasq_tilde), survreg_fit$scale))
## ## ## ## Modelling Correlation Structures ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
## Setup
n_blocks <- 150 # Number of correlation_ids (subjects)
block_size <- 5 # Size of each correlation_ids (number of repeated measures per subj.)
N <- n_blocks * block_size # total sample size (balanced here)
rho_true <- 0.25 # True correlation
## Generate predictors and mean structure
t <- seq(-9, 9, length.out = N)
true_mean <- sin(t)
## Create block compound symmetric errors = I(1-p) + Jp
errors <- Reduce('rbind',
lapply(1:n_blocks,
function(i){
sigma <- diag(block_size) + rho_true *
(matrix(1, block_size, block_size) -
diag(block_size))
matsqrt(sigma) %*% rnorm(block_size)
}))
## Generate response with correlated errors
y <- true_mean + errors * 0.5
## Fit model with correlation structure
# include_warnings = FALSE is a good idea here, since many proposed
# correlations won't work
model_fit <- lgspline(t,
y,
K = 4,
correlation_id = rep(1:n_blocks, each = block_size),
correlation_structure = 'exchangeable',
include_warnings = FALSE
)
## Assess overall fit
plot(t, y, main = 'Sinosudial Fit Under Correlation Structure')
plot(model_fit, add = TRUE, show_formulas = TRUE, custom_predictor_lab = 't')
## Compare estimated vs true correlation
rho_est <- tanh(model_fit$VhalfInv_params_estimates)
print(c("True correlation:" = rho_true,
"Estimated correlation:" = rho_est))
## Quantify uncertainty in correlation estimate with 95% confidence interval
se <- c(sqrt(diag(model_fit$VhalfInv_params_vcov))) / sqrt(model_fit$N)
ci <- tanh(model_fit$VhalfInv_params_estimates + c(-1.96, 1.96)*se)
print("95% CI for correlation:")
print(ci)
## Also check SD (should be close to 0.5)
print(sqrt(model_fit$sigmasq_tilde))
## Toeplitz Simulation Setup, with demonstration of custom functions
# and boilerplate. Toep is not implemented by default, because it makes
# strong assumptions on the study design and missingness that are rarely met,
# with non-obvious workarounds.
# If a GLM was to-be-fit, you'd also submit a function "Vhalf_fxn" analogous
# to VhalfInv_fxn with same argument (par) and an output of an N x N matrix
# that yields the inverse of VhalfInv_fxn output.
n_blocks <- 150 # Number of correlation_ids
block_size <- 4 # Observations per correlation_id
N <- n_blocks * block_size # total sample size
rho_true <- 0.5 # True correlation within correlation_ids
true_intercept <- 2 # True intercept
true_slope <- 0.5 # True slope for covariate
## Create design matrix with meaningful predictors
Tmat <- matrix(0, N, 2)
Tmat[,1] <- 1 # Intercept
Tmat[,2] <- cos(rnorm(N)) # Continuous predictor
## True coefficients
beta <- c(true_intercept, true_slope)
## Create time and correlation_id variables
time_var <- rep(1:block_size, n_blocks)
correlation_id_var <- rep(1:n_blocks, each = block_size)
## Create block compound symmetric errors
errors <- Reduce('rbind',
lapply(1:n_blocks, function(i) {
sigma <- diag(block_size) + rho_true *
(matrix(1, block_size, block_size) -
diag(block_size))
matsqrt(sigma) %*% rnorm(block_size)
}))
## Generate response with correlated errors and covariate effect
y <- Tmat %*% beta + errors * 2
## Toeplitz correlation function
VhalfInv_fxn <- function(par) {
# Initialize correlation matrix
corr <- matrix(0, block_size, block_size)
# Construct Toeplitz matrix with correlation by lag
for(i in 1:block_size) {
for(j in 1:block_size) {
lag <- abs(time_var[i] - time_var[j])
if(lag == 0) {
corr[i,j] <- 1
} else if(lag <= length(par)) {
# Use tanh to bound correlations between -1 and 1
corr[i,j] <- tanh(par[lag])
}
}
}
## Matrix square root inverse
corr_inv_sqrt <- matinvsqrt(corr)
## Expand to full matrix using Kronecker product
kronecker(diag(n_blocks), corr_inv_sqrt)
}
## Determinant function (for efficiency)
# This avoids taking determinant of N by N matrix
VhalfInv_logdet <- function(par) {
# Initialize correlation matrix
corr <- matrix(0, block_size, block_size)
# Construct Toeplitz matrix
for(i in 1:block_size) {
for(j in 1:block_size) {
lag <- abs(time_var[i] - time_var[j])
if(lag == 0) {
corr[i,j] <- 1
} else if(lag <= length(par)) {
corr[i,j] <- tanh(par[lag])
}
}
}
# Compute log determinant
log_det_invsqrt_corr <- -0.5 * determinant(corr, logarithm=TRUE)$modulus[1]
return(n_blocks * log_det_invsqrt_corr)
}
## REML gradient function
REML_grad <- function(par, model_fit, ...) {
## Initialize gradient vector
n_par <- length(par)
gradient <- numeric(n_par)
## Get dimensions and organize data
nr <- nrow(model_fit$X[[1]])
## Process derivatives one parameter at a time
for(p in 1:n_par) {
## Initialize derivative matrix
dV <- matrix(0, nrow(model_fit$VhalfInv), ncol(model_fit$VhalfInv))
V <- matrix(0, nrow(model_fit$VhalfInv), ncol(model_fit$VhalfInv))
## Compute full correlation matrix and its derivative for parameter p
for(clust in unique(correlation_id_var)) {
inds <- which(correlation_id_var == clust)
block_size <- length(inds)
## Initialize block matrices
V_block <- matrix(0, block_size, block_size)
dV_block <- matrix(0, block_size, block_size)
## Construct Toeplitz matrix and its derivative
for(i in 1:block_size) {
for(j in 1:block_size) {
## Compute lag between observations
lag <- abs(time_var[i] - time_var[j])
## Diagonal is always 1
if(i == j) {
V_block[i,j] <- 1
dV_block[i,j] <- 0
} else {
## Correlation for off-diagonal depends on lag
if(lag <= length(par)) {
## Correlation via tanh parameterization
V_block[i,j] <- tanh(par[lag])
## Derivative for the relevant parameter
if(lag == p) {
## Chain rule for tanh: d/dx tanh(x) = 1 - tanh^2(x)
dV_block[i,j] <- 1 - tanh(par[p])^2
}
}
}
}
}
## Assign blocks to full matrices
V[inds, inds] <- V_block
dV[inds, inds] <- dV_block
}
## GLM Weights based on current model fit (all 1s for normal)
glm_weights <- rep(1, model_fit$N)
## Quadratic form contribution
resid <- model_fit$y - model_fit$ytilde
VinvResid <- model_fit$VhalfInv %**% cbind(resid) / glm_weights
quad_term <- -0.5 * ((t(VinvResid) %**% dV) %**% VinvResid) /
model_fit$sigmasq_tilde
## Log|V| contribution - trace term
trace_term <- 0.5 * sum(diag(model_fit$VhalfInv %**%
model_fit$VhalfInv %**%
dV))
## Information matrix contribution
U <- t(t(model_fit$U) * rep(c(1, model_fit$expansion_scales),
model_fit$K + 1)) /
model_fit$sd_y
VhalfInvX <- model_fit$VhalfInv %**%
collapse_block_diagonal(model_fit$X)[unlist(
model_fit$og_order
),] %**%
U
## Lambda computation for GLMs
if(length(model_fit$penalties$L_partition_list) != (model_fit$K + 1)){
model_fit$penalties$L_partition_list <- lapply(
1:(model_fit$K + 1), function(k)0
)
}
Lambda <- U %**% collapse_block_diagonal(
lapply(1:(model_fit$K + 1),
function(k)
c(1, model_fit$expansion_scales) * (
model_fit$penalties$L_partition_list[[k]] +
model_fit$penalties$Lambda) %**%
diag(c(1, model_fit$expansion_scales)) /
model_fit$sd_y^2
)
) %**% t(U)
XVinvX_inv <- invert(gramMatrix(t(t(VhalfInvX)*c(glm_weights))) +
Lambda)
VInvX <- model_fit$VhalfInv %**% VhalfInvX
sc <- sqrt(norm(VInvX, '2'))
VInvX <- VInvX/sc
dXVinvX <-
(XVinvX_inv %**% t(VInvX)) %**%
(dV %**% VInvX)
XVinvX_term <- -0.5 * colSums(cbind(c(diag(dXVinvX) * sc))) * sc
## Store gradient component (all three terms)
gradient[p] <- as.numeric(quad_term + trace_term + XVinvX_term)
}
## Return normalized gradient
return(gradient / model_fit$N)
}
## Visualization
plot(time_var, y, col = correlation_id_var,
main = "Simulated Data with Toeplitz Correlation")
## Fit model with custom Toeplitz (takes ~ 5-10 minutes on my laptop)
# Note, for GLMs, efficiency would be improved by supplying a Vhalf_fxn
# although strictly only VhalfInv_fxn and VhalfInv_par_init are needed
model_fit <- lgspline(
response = y,
predictors = Tmat[,2],
K = 4,
VhalfInv_fxn = VhalfInv_fxn,
VhalfInv_logdet = VhalfInv_logdet,
REML_grad = REML_grad,
VhalfInv_par_init = c(0, 0, 0),
include_warnings = FALSE
)
## Print comparison of true and estimated correlations
rho_true <- rep(0.5, 3)
rho_est <- tanh(model_fit$VhalfInv_params_estimates)
cat("Correlation Estimates:\n")
print(data.frame(
"True Correlation" = rho_true,
"Estimated Correlation" = rho_est
))
## Should be ~ 2
print(sqrt(model_fit$sigmasq_tilde))
## ## ## ## Parallelism ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ## ##
## Data generating function
a <- runif(500000, -9, 9)
b <- runif(500000, -9, 9)
c <- rnorm(500000)
d <- rpois(500000, 1)
y <- sin(a) + cos(b) - 0.2*sqrt(a^2 + b^2) +
abs(a) + b +
0.5*(a^2 + b^2) +
(1/6)*(a^3 + b^3) +
a*b*c -
c +
d +
rnorm(500000, 0, 5)
## Set up cores
cl <- parallel::makeCluster(1)
on.exit(parallel::stopCluster(cl))
## This example shows some options for what operations can be parallelized
# By default, only parallel_eigen and parallel_unconstrained are TRUE
# G, G^{-1/2}, and G^{1/2} are computed in parallel across each of the
# K+1 partitions.
# However, parallel_unconstrained only affects GLMs without corr. components
# - it does not affect fitting here
system.time({
parfit <- lgspline(y ~ spl(a, b) + a*b*c + d,
data = data.frame(y = y,
a = a,
b = b,
c = c,
d = d),
cl = cl,
K = 1,
parallel_eigen = TRUE,
parallel_unconstrained = TRUE,
parallel_aga = FALSE,
parallel_find_neighbors = FALSE,
parallel_trace = FALSE,
parallel_matmult = FALSE,
parallel_make_constraint = FALSE,
parallel_penalty = FALSE)
})
parallel::stopCluster(cl)
print(summary(parfit))