Title: | Linear and Smooth Predictor Modelling with Penalisation and Variable Selection |
---|---|
Description: | Fit a model with potentially many linear and smooth predictors. Interaction effects can also be quantified. Variable selection is done using penalisation. For l1-type penalties we use iterative steps alternating between using linear predictors (lasso) and smooth predictors (generalised additive model). |
Authors: | Indrayudh Ghosal [aut, cre], Matthias Kormaksson [aut] |
Maintainer: | Indrayudh Ghosal <[email protected]> |
License: | GPL-2 |
Version: | 0.2.0 |
Built: | 2024-11-16 04:04:48 UTC |
Source: | https://github.com/cran/plsmselect |
Undocumented function. Do not use directly
cbh(lp, event.time, status)
cbh(lp, event.time, status)
lp |
The linear predictor to be used as offset |
event.time |
The event times |
status |
Status indicating the complement of censoring |
Undocumented function. Do not use directly
create_dataset()
create_dataset()
This is only used when with family="cox"
cumbasehaz(object)
cumbasehaz(object)
object |
fitted model object of the class |
This function returns the cumulative baseline hazard function of
a gamlasso
object if fitted using family = "cox"
. More
specifically, cumbasehaz(object) is the cumulative baseline hazard function
corresponding to the linear predictor predict(object).
library(plsmselect) data(simData) ## Fit Cox gamlasso model using the formula approach: ## (L1-penalty both on X terms and smooth terms (bs="ts")) simData$X = model.matrix(~x1+x2+x3+x4+x5+x6+x7+x8+x9+x10, data=simData)[,-1] cfit = gamlasso(time ~ X + s(z1, bs="ts", k=5) + s(z2, bs="ts", k=5) + s(z3, bs="ts", k=5) + s(z4, bs="ts", k=5), data = simData, family = "cox", weights="status", seed=1) ## Obtain and plot predicted cumulative baseline hazard: H0.pred <- cumbasehaz(cfit) time.seq <- seq(0, 60, by=1) plot(time.seq, H0.pred(time.seq), type="l", ylab="Predicted Cumulative Baseline Hazard") ## Obtain predicted survial probabilities at month 1 and 2 (days 30 & 60): lp <- predict(cfit) # estimated linear predictor S.pred <- cbind(exp(-H0.pred(30)*exp(lp)), exp(-H0.pred(60)*exp(lp))) ## Obtain predicted survival at month 1 and 2 directly: S.pred2 <- predict(cfit, type="response", new.event.times=c(30,60)) ## Confirm that the two arrived at the same values: all.equal(S.pred, S.pred2) # See ?gamlasso for an example fitting a gaussian response model # See ?summary.gamlasso for an example fitting a binomial response model # See ?predict.gamlasso for an example fitting a poisson response model
library(plsmselect) data(simData) ## Fit Cox gamlasso model using the formula approach: ## (L1-penalty both on X terms and smooth terms (bs="ts")) simData$X = model.matrix(~x1+x2+x3+x4+x5+x6+x7+x8+x9+x10, data=simData)[,-1] cfit = gamlasso(time ~ X + s(z1, bs="ts", k=5) + s(z2, bs="ts", k=5) + s(z3, bs="ts", k=5) + s(z4, bs="ts", k=5), data = simData, family = "cox", weights="status", seed=1) ## Obtain and plot predicted cumulative baseline hazard: H0.pred <- cumbasehaz(cfit) time.seq <- seq(0, 60, by=1) plot(time.seq, H0.pred(time.seq), type="l", ylab="Predicted Cumulative Baseline Hazard") ## Obtain predicted survial probabilities at month 1 and 2 (days 30 & 60): lp <- predict(cfit) # estimated linear predictor S.pred <- cbind(exp(-H0.pred(30)*exp(lp)), exp(-H0.pred(60)*exp(lp))) ## Obtain predicted survival at month 1 and 2 directly: S.pred2 <- predict(cfit, type="response", new.event.times=c(30,60)) ## Confirm that the two arrived at the same values: all.equal(S.pred, S.pred2) # See ?gamlasso for an example fitting a gaussian response model # See ?summary.gamlasso for an example fitting a binomial response model # See ?predict.gamlasso for an example fitting a poisson response model
Undocumented function. Do not use directly
find_family(fam)
find_family(fam)
fam |
Family in character form |
Undocumented function. Do not use directly
formula_setup( formula = NULL, response.name = NULL, linear.name = NULL, smooth.name = NULL, family = NULL, smooth.penalty = NULL, num.knots = NULL, offset.name = NULL, interactions = F )
formula_setup( formula = NULL, response.name = NULL, linear.name = NULL, smooth.name = NULL, family = NULL, smooth.penalty = NULL, num.knots = NULL, offset.name = NULL, interactions = F )
formula |
A formula to be parsed |
response.name |
The name of the response variable. Vector of two if
|
linear.name |
The names of the variables to be used as linear predictors |
smooth.name |
The names of the variables to be used as smoothers |
family |
The family describing the error distribution and link function
to be used in the model. A character string which can only be
|
smooth.penalty |
The penalty used on the smoothers. Can be 1 or 2 |
num.knots |
Number of knots for each smoothers. Can be a single integer (recycled for each smoother variable) or a vector of integers the same length as the number of smoothers. |
offset.name |
The name of the offset variable. |
interactions |
logical. Should interactions be included. |
This function will fit a gamlasso model with the given penalties. For some
special cases using gam
or glmnet
might be more efficient and/or flexible
## S3 method for class 'formula' gamlasso( formula, data, family = "gaussian", linear.penalty = "l1", smooth.penalty = "l2", num.knots = 5, offset = NULL, weights = NULL, interactions = F, seed = .Random.seed[1], num.iter = 100, tolerance = 1e-04, ... ) ## Default S3 method: gamlasso( response, linear.terms, smooth.terms, data, family = "gaussian", linear.penalty = "l1", smooth.penalty = "l2", num.knots = 5, offset = NULL, weights = NULL, interactions = F, seed = .Random.seed[1], num.iter = 100, tolerance = 1e-04, prompts = F, verbose = T, ... )
## S3 method for class 'formula' gamlasso( formula, data, family = "gaussian", linear.penalty = "l1", smooth.penalty = "l2", num.knots = 5, offset = NULL, weights = NULL, interactions = F, seed = .Random.seed[1], num.iter = 100, tolerance = 1e-04, ... ) ## Default S3 method: gamlasso( response, linear.terms, smooth.terms, data, family = "gaussian", linear.penalty = "l1", smooth.penalty = "l2", num.knots = 5, offset = NULL, weights = NULL, interactions = F, seed = .Random.seed[1], num.iter = 100, tolerance = 1e-04, prompts = F, verbose = T, ... )
formula |
A formula describing the model to be fitted |
response |
The name of the response variable. Could be two variables in case of a general binomial fit (see details below) |
linear.terms |
The names of the variables to be used as linear predictors |
smooth.terms |
The names of the variables to be used as smoothers |
data |
The data with which to fit the model |
family |
The family describing the error distribution and link function
to be used in the model. A character string which can only be
|
linear.penalty |
The penalty used on the linear predictors. A character
string which can be |
smooth.penalty |
The penalty used on the smoothers. A character
string which can be |
num.knots |
Number of knots for each smoothers. Can be a single integer (recycled for each smoother variable) or a vector of integers the same length as the number of smoothers. |
offset |
The name of the offset variable. |
weights |
The name of the weights variable. |
interactions |
logical. Should interactions be included as covariates.
If |
seed |
The random seed can be specified for reproducibility. This is used for fitting the gam and lasso models, or fixed before each loop of gamlasso. |
num.iter |
Number of iterations for the gamlasso loop |
tolerance |
Tolerance for covergence of the gamlasso loop |
prompts |
logical. Should |
verbose |
logical. Should there be "progress reports" printed to the console while fitting the model. |
... |
Additional arguments |
gamlasso
allows for specifying models in two ways:
1) with the the formula approach, and 2) with the term specification approach.
The formula approach is appropriate for when the user wants an L1-penalty on the
linear terms of the model, in which case the user is required to specify the linear terms
in a model matrix named "X" appended to the input data frame. A typical formula specification
would be "y ~ X + s(z) + ...
" where "X
" corresponds to the model-matrix of
linear terms subject to an L1-penalty, while everything to the right of "X
" is
considered part of the gam formula (i.e. all smooth terms). In light of the above formula,
gamlasso iterates (until convergence) between the following two lines of pseudo code:
model.cv.glmnet <- cv.glmnet(y=y, x=X, offset="model.gam fitted values")
model.gam <- gam(y ~ s(z) + ..., offset="model.cv.glmnet fitted values")
The term specification approach can fit the same type of models as the formula approach
(i.e. models with L1-penalty on the linear terms). However, it is more flexible in terms
of penalty-structure and can be useful if the user has big data sets with lots of variables
making the formula specification cumbersome. In the term specification approach
the user simply specifies the names of the data columns corresponding to the
response
, linear.terms
and smooth.terms
and then specifies
whether to put a linear.penalty="l1"
, "l2"
or "none"
(on linear.terms
) and whether to put a smooth.penalty="l1"
or
"l2"
(on smooth.terms
).
While fitting a binomial model for binary responses (0/1) include the response
variable before "~" if using the formula approach or when using the term-
specification approach the response
argument will be a single variable name.
In general if the responses are success/failure counts then the formula should
start with something similar to cbind(success,failure) ~ ...
and for
using the term-specification approach the response
argument should be a
vector of length two giving the success and failure variable names.
If family="cox"
then the weights
argument must be provided
and should correspond to a status variable (1-censor). For other models
it should correspond to a custom weights variables to be used for the
weighted log-likelihood, for example the total counts for fitting a
binomial model. (weights for families other than "cox" currently not
implemented)
Both the formula and term-specification approaches can fit interaction models as
well. There are three kinds of interactions - those between two linear predictors,
between two smooth predictors and between linear and smooth predictors. For the
formula approach the first type of interaction must be included as additional
columns in the "X
" matrix and the other two types must be mentioned in the
smooth terms part of the formula. For the term-specification approach the argument
interaction
must be TRUE
in which case all the pairwise
interactions are used as predictors and variable selection is done on all of them.
If the arguments fail the basic checking by gamlassoChecks
then returns NULL
. Else the function calls gamlassoFit
which
returns a list of two models, gam
and cv.glmnet
.
Either of these could be NULL
but if both are non-null then
convergence
, a matrix of values determining the convergence
of the gamlasso loop is also returned.
gamlassoFit
also returns inherit
, a list of select
arguments used to fit the gamlasso
model and some more values needed
for prediction.
The default values of num.iter
and tolerance
are
essentially arbitrary. Also for each step when we check for convergence
between the new and old predictions by the gam and lasso predictions,
we use the following distance metric
library(plsmselect) data(simData) ## Fit gaussian gamlasso model using the formula approach: ## (L1-penalty both on model matrix (X) and smooth terms (bs="ts")) simData$X = model.matrix(~x1+x2+x3+x4+x5+x6+x7+x8+x9+x10, data=simData)[,-1] gfit = gamlasso(Yg ~ X + s(z1, k=5, bs="ts") + s(z2, k=5, bs="ts") + s(z3, k=5, bs="ts") + s(z4, k=5, bs="ts"), data = simData, seed=1) ## Equivalently with term specification approach: gfit = gamlasso(response="Yg", linear.terms=paste0("x",1:10), smooth.terms=paste0("z",1:4), data=simData, linear.penalty = "l1", smooth.penalty = "l1", num.knots = 5, seed=1) ## The two main components of gfit are ## gfit$cv.glmnet (LASSO component) and gfit$gam (GAM components): ## Extract lasso estimates of linear terms: coef(gfit$cv.glmnet, s="lambda.min") ## Plot the estimates of the smooth effects: plot(gfit$gam, pages=1) # See ?summary.gamlasso for an example fitting a binomial response model # See ?predict.gamlasso for an example fitting a poisson response model # See ?cumbasehaz for an example fitting a survival response model
library(plsmselect) data(simData) ## Fit gaussian gamlasso model using the formula approach: ## (L1-penalty both on model matrix (X) and smooth terms (bs="ts")) simData$X = model.matrix(~x1+x2+x3+x4+x5+x6+x7+x8+x9+x10, data=simData)[,-1] gfit = gamlasso(Yg ~ X + s(z1, k=5, bs="ts") + s(z2, k=5, bs="ts") + s(z3, k=5, bs="ts") + s(z4, k=5, bs="ts"), data = simData, seed=1) ## Equivalently with term specification approach: gfit = gamlasso(response="Yg", linear.terms=paste0("x",1:10), smooth.terms=paste0("z",1:4), data=simData, linear.penalty = "l1", smooth.penalty = "l1", num.knots = 5, seed=1) ## The two main components of gfit are ## gfit$cv.glmnet (LASSO component) and gfit$gam (GAM components): ## Extract lasso estimates of linear terms: coef(gfit$cv.glmnet, s="lambda.min") ## Plot the estimates of the smooth effects: plot(gfit$gam, pages=1) # See ?summary.gamlasso for an example fitting a binomial response model # See ?predict.gamlasso for an example fitting a poisson response model # See ?cumbasehaz for an example fitting a survival response model
This function checks if the arguments entered for fitting a gamlasso model
are compatible with each other. Not recommended to call directly. Only use
if cleaning data prior to fitting gamlassoFit
gamlassoChecks( data, response.name, linear.name, smooth.name, family, linear.penalty, smooth.penalty, offset.name, weights.name, num.knots, num.iter, tolerance, seed, prompts )
gamlassoChecks( data, response.name, linear.name, smooth.name, family, linear.penalty, smooth.penalty, offset.name, weights.name, num.knots, num.iter, tolerance, seed, prompts )
data |
The training data for fitting the model |
response.name |
The name of the response variable. Vector of two if
|
linear.name |
The names of the variables to be used as linear predictors |
smooth.name |
The names of the variables to be used as smoothers |
family |
The family describing the error distribution and link function
to be used in the model. A character string which can only be
|
linear.penalty |
The penalty used on the linear predictors. Can be 0, 1 or 2 |
smooth.penalty |
The penalty used on the smoothers. Can be 1 or 2 |
offset.name |
The name of the offset variable. |
weights.name |
The name of the weights variable. |
num.knots |
Number of knots for each smoothers. Can be a single integer (recycled for each smoother variable) or a vector of integers the same length as the number of smoothers. |
num.iter |
Number of iterations for the gamlasso loop |
tolerance |
Tolerance for covergence of the gamlasso loop |
seed |
The random seed can be specified for reproducibility. This is used for fitting the gam and lasso models, or fixed before each loop of gamlasso. |
prompts |
logical. Should |
gamlassoChecks
produces a series of logical values:
allcheck
indicating if the arguments passed all the checks,
fit.smoothgam
indicating if there aren't any linear predictors and
a model with only smoothers should be fitted, fit.glmnet
is the counterpart for smooth predictors. It also returns the cleaned
(if needed) arguments as a list named cleandata
who's elements are:
train.data |
The training data with unnecessary columns deleted |
linear.name , smooth.name , num.knots |
The changed variable names and number of knots |
linear.penalty , smooth.penalty |
The changed penalties for linear and smooth terms. Reset to their default values only in the rare case of too few predictors |
The arguments offset.name
, num.iter
, tolerance
and seed
are not currently not being used in testing.
## Usage similar to gamlassoFit
## Usage similar to gamlassoFit
This function is the workhorse for fitting a gamlasso model. Not recommended
to call directly. It is slightly more efficient than gamlasso.default
since
it doesn't perform any quality checks. Only use if the data has been cleaned
and no errors are expected to occur.
gamlassoFit( data, formula = NULL, response.name = NULL, linear.name = NULL, smooth.name = NULL, family = "gaussian", linear.penalty = 0, smooth.penalty = 2, offset.name = NULL, weights.name = NULL, num.knots = 5, num.iter = 100, interactions = F, tolerance = 1e-04, seed = .Random.seed[1], verbose = TRUE )
gamlassoFit( data, formula = NULL, response.name = NULL, linear.name = NULL, smooth.name = NULL, family = "gaussian", linear.penalty = 0, smooth.penalty = 2, offset.name = NULL, weights.name = NULL, num.knots = 5, num.iter = 100, interactions = F, tolerance = 1e-04, seed = .Random.seed[1], verbose = TRUE )
data |
The training data for fitting the model |
formula |
A formula describing the model to be fitted |
response.name |
The name of the response variable. Vector of two if
|
linear.name |
The names of the variables to be used as linear predictors |
smooth.name |
The names of the variables to be used as smoothers |
family |
The family describing the error distribution and link function
to be used in the model. A character string which can only be
|
linear.penalty |
The penalty used on the linear predictors. Can be 0, 1 or 2 |
smooth.penalty |
The penalty used on the smoothers. Can be 1 or 2 |
offset.name |
The name of the offset variable. |
weights.name |
The name of the weights variable. |
num.knots |
Number of knots for each smoothers. Can be a single integer (recycled for each smoother variable) or a vector of integers the same length as the number of smoothers. |
num.iter |
Number of iterations for the gamlasso loop |
interactions |
logical. Should interactions be included. |
tolerance |
Tolerance for covergence of the gamlasso loop |
seed |
The random seed can be specified for reproducibility. This is used for fitting the gam and lasso models, or fixed before each loop of gamlasso. |
verbose |
logical. Should there be "progress reports" printed to the console while fitting the model. |
See gamlasso
## Not recommended to use directly. Please see examples of gamlasso
## Not recommended to use directly. Please see examples of gamlasso
Undocumented function. Do not use directly
lasso_gam_loop( data, response.name, families, formulae, num.iter, tolerance, offset.name, weights, seed )
lasso_gam_loop( data, response.name, families, formulae, num.iter, tolerance, offset.name, weights, seed )
data |
The data with value for all the linear and smooth predictors |
response.name |
The name of the response variable. Vector of two if
|
families |
List of two families as returned by |
formulae |
List of formulae as returned by |
num.iter |
Number of iterations for the gamlasso loop |
tolerance |
Tolerance for covergence of the gamlasso loop |
offset.name |
The name of the offset variable. |
weights |
Vector with values of the weights variable if it exists.
|
seed |
The random seed can be specified for reproducibility. This is used for fitting the gam and lasso models, or fixed before each loop of gamlasso. |
Undocumented function. Do not use directly
meandist(x, y)
meandist(x, y)
x , y
|
Vectors of the same length |
Undocumented function. Do not use directly
nzeros(x, y = NULL)
nzeros(x, y = NULL)
x , y
|
Vectors of the same length |
Takes a fitted gamlasso
object produced by gamlasso
and
returns predictions given a new set of values of the linear and smooth
variables.
## S3 method for class 'gamlasso' predict( object, newdata = NULL, type = "link", s = "lambda.min", new.event.times = NULL, ... )
## S3 method for class 'gamlasso' predict( object, newdata = NULL, type = "link", s = "lambda.min", new.event.times = NULL, ... )
object |
fitted model object of the class |
newdata |
A data frame with the values of the linear and smooth
variables for which predictions are to be made. If not provided then
predictions corresponding to the original data used to fit |
type |
When this has the value |
s |
Value of the lasso penalty parameter |
new.event.times |
A vector of new event times to be used for predicting
survival times when |
... |
Other arguments |
Lasso models do not have standard errors so predict.gamlasso
does not provide them either. The standard errors for the gam part of the
model can be accesed by using mgcv::predict.gam
with suitable options.
Offsets are always included in the prediction if present in the original call
to gamlasso
. Also if type
is anything other than "link"
or "response"
then the function throws an error.
Returns a vector of the same length as nrow(newdata)
with
the values of the linear predictor or on the response scale depending
on type
. For type = "link"
the value is simply the elementwise
sum of the predictions from the gam and lasso models in object
.
For type = "response"
the values are on the response scale, for
example exponential of the linear response is returned if
object$inherit$family = "poisson"
gamlasso
, predict.gam
,
predict.glmnet
.
library(plsmselect) data(simData) ## Fit poisson gamlasso model using the term specification approach: ## (L2-penalty on linear terms & L2-penalty on smooth terms) pfit = gamlasso(response="Yp", linear.terms=paste0("x",1:10), smooth.terms=paste0("z",1:4), data=simData, linear.penalty = "l2", smooth.penalty = "l2", family="poisson", num.knots = 5, seed=1) ## fitted values (of linear predictor): fitted.values <- predict(pfit) ## predicted values on response scale: pred.response <- predict(pfit, type="response", newdata=simData) ## For same model as above, but with L1-penalty on linear terms ## i.e. L1-penalty on the model matrix (X) we can use formula approach: simData$X = model.matrix(~x1+x2+x3+x4+x5+x6+x7+x8+x9+x10, data=simData)[,-1] pfit = gamlasso(Yp ~ X + s(z1, k=5) + # L2-penalty (bs="tp") is default (see ?mgcv::s) s(z2, k=5) + s(z3, k=5) + s(z4, k=5), family="poisson", data = simData, seed=1) # See ?gamlasso for an example fitting a gaussian response model # See ?summary.gamlasso for an example fitting a binomial response model # See ?cumbasehaz for an example fitting a survival response model
library(plsmselect) data(simData) ## Fit poisson gamlasso model using the term specification approach: ## (L2-penalty on linear terms & L2-penalty on smooth terms) pfit = gamlasso(response="Yp", linear.terms=paste0("x",1:10), smooth.terms=paste0("z",1:4), data=simData, linear.penalty = "l2", smooth.penalty = "l2", family="poisson", num.knots = 5, seed=1) ## fitted values (of linear predictor): fitted.values <- predict(pfit) ## predicted values on response scale: pred.response <- predict(pfit, type="response", newdata=simData) ## For same model as above, but with L1-penalty on linear terms ## i.e. L1-penalty on the model matrix (X) we can use formula approach: simData$X = model.matrix(~x1+x2+x3+x4+x5+x6+x7+x8+x9+x10, data=simData)[,-1] pfit = gamlasso(Yp ~ X + s(z1, k=5) + # L2-penalty (bs="tp") is default (see ?mgcv::s) s(z2, k=5) + s(z3, k=5) + s(z4, k=5), family="poisson", data = simData, seed=1) # See ?gamlasso for an example fitting a gaussian response model # See ?summary.gamlasso for an example fitting a binomial response model # See ?cumbasehaz for an example fitting a survival response model
The default print method for a gamlasso
object
## S3 method for class 'gamlasso' print(x, ...)
## S3 method for class 'gamlasso' print(x, ...)
x |
fitted model object of the class |
... |
Other arguments |
Outputs a list of two. lasso
prints the lasso
model (the same output as print(object$cv.glmnet$glmnet.fit)
) if
it is non-null and gam
prints the gam model (the same output
as print(object$gam)
) if it is non-null.
gamlasso
, summary.gamlasso
,
print.gam
, print.glmnet
.
## Please see the examples in ?gamlasso
## Please see the examples in ?gamlasso
The package includes a simulated dataset that we will use for the examples.
data(simData)
data(simData)
A 100-by-23 data frame. There are 10 variables
(x1
,...,x10
) corresponding to the linear predictors and 4
(z1
,...,z4
) corresponding to the smooth predictors. There are
7 response variables corresponding to the different models fitted -
Yg
for the Gaussian response
Yb
as Bernoulli and success
and failure
as
Binomial count responses
Yp
as the Poisson response
time
and status
as the survival model responses
The variables starting with X
are the same as the linear predictors but are
concatenated into a matrix X
to be used for the formula implementation of
gamlasso
The code for creating this simulated dataset is included in the vignette of this package.
## Please see examples in ?gamlasso
## Please see examples in ?gamlasso
Default sumary method for a gamlasso
object
## S3 method for class 'gamlasso' summary(object, s = "lambda.min", ...)
## S3 method for class 'gamlasso' summary(object, s = "lambda.min", ...)
object |
fitted model object of the class |
s |
Value of the lasso penalty parameter |
... |
Other arguments |
Outputs a list of two. gam
prints a summary of the
gam model (the same output as summary(object$gam)
) if it
is non-null. Objects of the class cv.glmnet
do not have a
default summary method, so the list item lasso
produces the
coefficients of the cross-vaidated lasso fit corresponding to the
lowest value of the used ( the same output as
coef(object$cv.glmnet, s = "lambda.min")
if it is non-null).
gamlasso
, summary.gam
,
coef.cv.glmnet
.
library(plsmselect) data(simData) ## Fit binomial gamlasso model using the term specification ## approach with binomial counts response ## (L2-penalty on linear terms & L1-penalty on smooth terms) bfit = gamlasso(c("success","failure"), linear.terms=paste0("x",1:10), smooth.terms=paste0("z",1:4), data=simData, family = "binomial", linear.penalty = "l2", smooth.penalty = "l1", num.knots = 5, seed=1) ## Since the above model has linear.penalty = "l2" it is ## a pure GAM model (i.e. no LASSO component): bfit$cv.glmnet ## Summary of model (here essentially the same as summary(bfit$gam) ## because there is no LASSO component, i.e. linear.penalty="l2") summary(bfit) ## We could use the formula approach below to fit the same model as above: simData$X = model.matrix(~x1+x2+x3+x4+x5+x6+x7+x8+x9+x10, data=simData)[,-1] bfit = gamlasso(cbind(success,failure) ~ X + s(z1, bs="ts") + s(z2, bs="ts") + s(z3, bs="ts") + s(z4, bs="ts"), data = simData, family = "binomial", linear.penalty = "l2", smooth.penalty = "l1", seed=1) ## For a binary responses we only need one response variable in the formula bfit2 = gamlasso(Yb ~ X + s(z1, bs="ts") + s(z2, bs="ts") + s(z3, bs="ts") + s(z4, bs="ts"), data = simData, family = "binomial", seed=1) # See ?gamlasso for an example fitting a gaussian response model # See ?predict.gamlasso for an example fitting a poisson response model # See ?cumbasehaz for an example fitting a survival response model
library(plsmselect) data(simData) ## Fit binomial gamlasso model using the term specification ## approach with binomial counts response ## (L2-penalty on linear terms & L1-penalty on smooth terms) bfit = gamlasso(c("success","failure"), linear.terms=paste0("x",1:10), smooth.terms=paste0("z",1:4), data=simData, family = "binomial", linear.penalty = "l2", smooth.penalty = "l1", num.knots = 5, seed=1) ## Since the above model has linear.penalty = "l2" it is ## a pure GAM model (i.e. no LASSO component): bfit$cv.glmnet ## Summary of model (here essentially the same as summary(bfit$gam) ## because there is no LASSO component, i.e. linear.penalty="l2") summary(bfit) ## We could use the formula approach below to fit the same model as above: simData$X = model.matrix(~x1+x2+x3+x4+x5+x6+x7+x8+x9+x10, data=simData)[,-1] bfit = gamlasso(cbind(success,failure) ~ X + s(z1, bs="ts") + s(z2, bs="ts") + s(z3, bs="ts") + s(z4, bs="ts"), data = simData, family = "binomial", linear.penalty = "l2", smooth.penalty = "l1", seed=1) ## For a binary responses we only need one response variable in the formula bfit2 = gamlasso(Yb ~ X + s(z1, bs="ts") + s(z2, bs="ts") + s(z3, bs="ts") + s(z4, bs="ts"), data = simData, family = "binomial", seed=1) # See ?gamlasso for an example fitting a gaussian response model # See ?predict.gamlasso for an example fitting a poisson response model # See ?cumbasehaz for an example fitting a survival response model