### de Castro, Cancho, and Rodrigues (2010), ### A hands-on approach for fitting long-term survival models ### under the GAMLSS framework ### Computer Methods and Programs in Biomedicine 97(2), 168-177. ### doi:10.1016/j.cmpb.2009.08.002 ### Example. Melanoma data (timereg package) source("models-cmpb.R") # Long-term survival models ## Data data(melanoma, package = "timereg") datam = melanoma datam$days = datam$days / 365 # Time in years datam$thick = datam$thick / 100 datam$status[datam$status > 1] = 0 datam$ulc = factor(datam$ulc, labels = c("Ausent", "Present")) datam$sex = factor(datam$sex, labels = c("Female", "Male")) ## Tumor thickness as two binary variables (cut point = 2.0mm) # This parametrization is used to obtain confidence intervals for p0 thickness.le.2 = 1 - model.matrix(~ cut(datam$thick,c(0, 2, max(datam$thick))))[,2] # <= 2 mm thickness.gt.2 = 1 - thickness.le.2 # > 2 mm ## Kaplan-Meier estimates mKM = survfit(Surv(days, status) ~ thickness.gt.2, data = datam, se.fit = FALSE) ## Time to event ~ Weibull (WEI4) ## Covariate thick linked to nu = p0 # Competing causes ~ negative binomial m4 = gamlss(Surv(days, status) ~ 1, family = cens(NBWEI4), nu.formula = ~ thickness.le.2 + thickness.gt.2 - 1, data = datam, c.crit = 0.001, n.cyc = 1000) summary(m4) plot(m4, xvar = m4$nu.fv) wp(m4) SpNB4 = pNBWEI4(datam$days, mu = m4$mu.fv, sigma = m4$sigma.fv, nu = m4$nu.fv, tau = m4$tau.fv, lower.tail = FALSE) # Spop # Competing causes ~ Bernoulli m4b = gamlss(Surv(days, status) ~ 1, family = cens(NBWEI4), nu.formula = ~ thickness.le.2 + thickness.gt.2 - 1, data = datam, c.crit = 0.001, n.cyc = 1000, tau.start = -1, tau.fix = TRUE) summary(m4b) plot(m4b, xvar = m4$nu.fv) wp(m4b) SpB4 = pNBWEI4(datam$days, mu = m4b$mu.fv, sigma = m4b$sigma.fv, nu = m4b$nu.fv, tau = -1, lower.tail = FALSE) # Spop # Competing causes ~ Poisson m4p = gamlss(Surv(days, status) ~ 1, family = cens(POWEI4), nu.formula = ~ thickness.le.2 + thickness.gt.2 - 1, data = datam, c.crit = 0.001, n.cyc = 1000) summary(m4p) plot(m4p, xvar = m4p$nu.fv) wp(m4p) SpP4 = pPOWEI4(datam$days, mu = m4p$mu.fv, sigma = m4p$sigma.fv, nu = m4p$nu.fv, lower.tail = FALSE) # Spop # Promotion time cure model as a particular case of the negat. bin. model m4p0 = gamlss(Surv(days, status) ~ 1, family = cens(NBWEI4), nu.formula = ~ thickness.le.2 + thickness.gt.2 - 1, data = datam, c.crit = 0.001, n.cyc = 1000, tau.start = 0.0, tau.fix = TRUE) summary(m4p0) plot(m4p0, xvar = m4p0$nu.fv) wp(m4p0) ## Comparison between m4, m4b, and m4p layout(rbind(c(0,1,1,0), c(2,2,3,3))) par(mai = c(0.7,0.0.65,0.30,0.05)) # Margins: inf, left, sup and right wp(m4b, pch = 20, col = "black") mtext("(a)", cex = 1.3) wp(m4p, pch = 20, col = "black") mtext("(b)", cex = 1.3) wp(m4, pch = 20, col = "black") mtext("(c)", cex = 1.3) ## Fitting statistics sumtable = cbind(c(m4$G.dev, m4b$G.dev, m4p$G.dev), c(m4$aic, m4b$aic, m4p$aic), c(m4$sbc, m4b$sbc, m4p$sbc)) colnames(sumtable) = c("Global deviance", "AIC", "SBC") rownames(sumtable) = c("Neg. binomial WEI4", "Bernoulli WEI4", "Poisson WEI4") sumtable = sumtable[order(sumtable[,"AIC"]),] print(round(sumtable, 1)) # Survival function (tumor thickness as categorical) Sp1 = SpB4; Sp2 = SpP4; Sp3 = SpNB4 layout(rbind(c(0,1,1,0), c(2,2,3,3))) par(mai = c(0.85,0.85,0.30,0.05)) # Margins: inf, left, sup and right plot(mKM, conf.int = FALSE, mark.time = FALSE, xlab = "Time (years)", cex.main = 1.5, ylab = "Survival function", cex.axis = 1.5, cex.lab = 1.5, main = "(a)") points(datam$days, Sp1, pch = 20) plot(mKM, conf.int = FALSE, mark.time = FALSE, xlab = "Time (years)", cex.main = 1.5, ylab = "Survival function", cex.axis = 1.5, cex.lab = 1.5, main = "(b)") points(datam$days, Sp2, pch = 20) plot(mKM, conf.int = FALSE, mark.time = FALSE, xlab = "Time (years)", cex.main = 1.5, ylab = "Survival function", cex.axis = 1.5, cex.lab = 1.5, main = "(c)") points(datam$days, Sp3, pch = 20) ## Asymptotic confidence interval (c. i.) for the cured fraction conf = 0.95 tconf = qt((1+conf)/2, m4$df.residual) # thickness <= 2mm cierr = tconf * sqrt(vcov(m4)[3,3]) cibeta = c(m4$nu.coeff[1] - cierr, m4$nu.coeff[1] + cierr) cip0.le.2 = 1 / (1 + exp(-cibeta)) # thickness > 2mm cierr = tconf * sqrt(vcov(m4)[4,4]) cibeta = c(m4$nu.coeff[2] - cierr, m4$nu.coeff[2] + cierr) cip0.gt.2 = 1 / (1 + exp(-cibeta)) cat("\n", conf * 100, "% asymptotic c.i. for p0 (thickness <= 2mm):", cip0.le.2, "\n") cat("\n", conf * 100, "% asymptotic c.i. for p0 (thickness > 2mm):", cip0.gt.2, "\n") ## Profile c.i. for the cured fraction (time consuming) ## prof.term function kindly provided by an anonymous referee # thickness <= 2mm m4x = quote(gamlss(Surv(days, status) ~ 1, family = cens(NBWEI4), nu.formula = ~ thickness.gt.2 + offset(this * thickness.le.2) - 1, data = datam, c.crit = 0.001, n.cyc = 1000)) m4prof = prof.term(m4x, min = -5, max = 2.3, step = 0.1, perc = conf * 100, criterion = "GD") # c.i. for beta cip0d.le.2 = 1 / (1 + exp(-m4prof$ci)) # thickness > 2mm m4x = quote(gamlss(Surv(days, status) ~ 1, family = cens(NBWEI4), nu.formula = ~ thickness.le.2 + offset(this * thickness.gt.2) - 1, data = datam, c.crit = 0.001, n.cyc = 1000)) m4prof = prof.term(m4x, min = -5.5, max = 0.6, step = 0.1, perc = conf * 100, criterion = "GD") # c.i. for beta cip0d.gt.2 = 1 / (1 + exp(-m4prof$ci)) cat("\n", conf * 100, "% deviance c.i. for p0 (thickness <= 2mm):", cip0d.le.2, "\n") cat("\n", conf * 100, "% deviance c.i. for p0 (thickness > 2mm):", cip0d.gt.2, "\n") ## Bootstrapping # Cured fraction and P(Y > ymin) = Spop(ymin) # Cured fractions according to tumor thickness categories cured.fun = function(data) { mboot = gamlss(Surv(days, status) ~ 1, family = cens(NBWEI4), nu.formula = ~ thickness.le.2 + thickness.gt.2 - 1, data = data, c.crit = 0.001, n.cyc = 1000, start.from = m4) Spopt0.le.2 = pNBWEI4(ymin, mu = mboot$mu.fv[ind.le.2], sigma = mboot$sigma.fv[ind.le.2], nu = mboot$nu.fv[ind.le.2], tau = mboot$tau.fv[ind.le.2], lower.tail = FALSE) Spopt0.gt.2 = pNBWEI4(ymin, mu = mboot$mu.fv[ind.gt.2], sigma = mboot$sigma.fv[ind.gt.2], nu = mboot$nu.fv[ind.gt.2], tau = mboot$tau.fv[ind.gt.2], lower.tail = FALSE) c(mboot$nu.fv[ind.le.2], mboot$nu.fv[ind.gt.2], Spopt0.le.2, Spopt0.gt.2) } ind.le.2 = which(thickness.le.2 == 1)[[1]] # thickness <= 2 (first index) ind.gt.2 = which(thickness.le.2 == 0)[[1]] # thickness > 2 (first index) ymin = 5 # Estimation of Spop(ymin), ymin in years # Resampling Brep = 500 # Number of bootstrap replicates # (2.5 hours on a PC Intel Core2 2.13 GHz and 2GB RAM) ord.boot = censboot(datam, cured.fun, R = Brep, strata = thickness.le.2, sim = "ordinary") ord.boot # Bootstrap confidence intervals (c.i.) for p0 and Spop(ymin) # according to tumor thickness categories bootnames = c("p0 (<= 2mm)", "p0 (> 2mm)", "Spop(ymin) (<= 2mm)", "Spop(ymin) (> 2mm)") cilim = matrix(0, 4, 2) for (i in 1:4) { cat("\n Estimate of", bootnames[i], ": ", ord.boot$t0[i], "\n") cat("\n", conf * 100, "% c.i. for", bootnames[i], "\n") ci = boot.ci(ord.boot, conf = conf, index = i, type = "perc") print(ci) cilim[i,] = ci$percent[4:5] } # Density functions and c.i. from resampled p0^ and Spop^(ymin) par(mfcol = c(2,2)) par(mai = c(0.85,0.85,0.30,0.05)) # Margins: inf, left, sup and right rangex = range(ord.boot$t[,1:2]) plot(density(ord.boot$t[,1]), main = "(a)", xlab = expression(hat(p[0])), ylab = "Density", xlim = rangex, cex.axis = 1.5, cex.lab = 1.5) rug(ord.boot$t[,1]) rug(cilim[1,], col = "red", lwd = 2) plot(density(ord.boot$t[,2]), main = "(b)", xlab = expression(hat(p[0])), ylab = "Density", xlim = rangex, cex.axis = 1.5, cex.lab = 1.5) rug(ord.boot$t[,2]) rug(cilim[2,], col = "red", lwd = 2) rangex = range(ord.boot$t[,3:4]) plot(density(ord.boot$t[,3]), main = "(c)", xlab = expression(hat(S)[pop](5)), ylab = "Density", xlim = rangex, cex.axis = 1.5, cex.lab = 1.5) rug(ord.boot$t[,3]) rug(cilim[3,], col = "red", lwd = 2) plot(density(ord.boot$t[,4]), main = "(d)", xlab = expression(hat(S)[pop](5)), ylab = "Density", xlim = rangex, cex.axis = 1.5, cex.lab = 1.5) rug(ord.boot$t[,4]) rug(cilim[4,], col = "red", lwd = 2) ## A more complex model cat.thickness = cut(datam$thick, c(0, 2, max(datam$thick)), labels = c(" <= 2 mm", " > 2 mm")) m4f = gamlss(Surv(days, status) ~ 1, family = cens(NBWEI4), nu.formula = ~ ulc + cat.thickness + sex, tau.formula = ~ ulc, data = datam, c.crit = 0.001, n.cyc = 1000, start.from = m4) summary(m4f) plot(m4f, xvar = m4f$nu.fv) wp(m4f) stepGAIC(m4f, what = "nu")