#############################################################################
#                                                                           #
#                 CUMULATIVE INCIDENCE CURVES IN R                          #
#                                                                           #
# Written by Luca Scrucca                                                   #
#                                                                           #
# Reference:                                                                #
# Scrucca L., Santucci A., Aversa F. (2007) Competing risks analysis using  #
#   R: an easy guide for clinicians. Bone Marrow Transplantation, 40,       #
#   381--387.                                                               #
#############################################################################
# ver. 1.1 Feb 2008
#          - allow group to be missing
#          - if t is provided both computation and plots use t as time points
#          - allow col, lwd to be used for curves with confidence bands
#          - fix some bugs in the legend
#          - added help on source code
# ver. 1.0 May 2007
#          - Version appearing in the BMT paper
#############################################################################
#
# Usage:
# 
#   CumIncidence(ftime, fstatus, group, t, strata, rho = 0, cencode = 0,
#             	 subset, na.action = na.omit, level, 
#                xlab = "Time", ylab = "Probability", 
#                col, lty, lwd, digits = 4)
# 
# Arguments:
# 
# ftime = failure time variable.
# fstatus = variable with distinct codes for different causes of 
#           failure and also a distinct code for censored observations.
# group	= estimates will be calculated within groups given by distinct 
#         values of this variable. Tests will compare these groups. If 
#         missing then treated as all one group (no test statistics).
# t = a vector of time points where the cumulative incidence function 
#     should be evaluated.
# strata = stratification variable. Has no effect on estimates. Tests 
#          will be stratified on this variable. (all data in 1 stratum,
#          if missing).
# rho = power of the weight function used in the tests. By default is 
#       set to 0.
# cencode = value of fstatus variable which indicates the failure time
#           is censored.
# subset = a logical vector specifying a subset of cases to include in 
#          the analysis.
# na.action = a function specifying the action to take for any cases 
#             missing any of ftime, fstatus, group, strata, or subset. 
#             By default missing cases are omitted.
# level = a value in the range [0,1] specifying the level for pointwise
#         confidence bands.
# xlab = text for the x-axis label.
# ylab = text for the y-axis label.
# col = color(s) used for plotting curves (see plot.default).
# lty = line type(s) used for plotting curves (see plot.default).
# lwd = line width(s) used for plotting curves (see plot.default).
# digits = number of significant digits used for printing values. By 
#          default set at 4.
# 
#############################################################################

"CumIncidence" <- function(ftime, fstatus, group, t, strata, rho = 0, 
	                         cencode = 0, subset, na.action = na.omit, level,
	                         xlab = "Time", ylab = "Probability", 
	                         col, lty, lwd, digits = 4)
{
  # check for the required package
  if(!require("cmprsk"))
    { stop("Package `cmprsk' is required and must be installed.\n 
           See help(install.packages) or write the following command at prompt
           and then follow the instructions:\n
           > install.packages(\"cmprsk\")") } 
  # collect data
  mf  <- match.call(expand.dots = FALSE)
  mf[[1]] <- as.name("list")
  mf$t <- mf$digits <- mf$col <- mf$lty <- mf$lwd <- mf$level <- 
  mf$xlab <- mf$ylab <- NULL
  mf <- eval(mf, parent.frame())
  g <- max(1, length(unique(mf$group)))
  s <- length(unique(mf$fstatus))
  if(missing(t)) 
    { time <- pretty(c(0, max(mf$ftime)), 6)
      ttime <- time <- time[time < max(mf$ftime)] }
  else { ttime <- time <- t }
  # fit model and estimates at time points
  fit   <- do.call("cuminc", mf)
  tfit <- timepoints(fit, time)
  # print result
  cat("\n+", paste(rep("-", 67), collapse=""), "+", sep ="")
  cat("\n| Cumulative incidence function estimates from competing risks data |")
  cat("\n+", paste(rep("-", 67), collapse=""), "+\n", sep ="")
  tests <- NULL
  if(g > 1)
    { tests <- fit$Tests
	    colnames(tests) <- c("Statistic", "p-value", "df")
      cat("Test equality across groups:\n")
      print(tests, digits = digits) }
  cat("\nEstimates at time points:\n")
  print(tfit$est, digits = digits)
  cat("\nStandard errors:\n")
  print(sqrt(tfit$var), digits = digits)
  #
  if(missing(level))
    { # plot cumulative incidence functions
      if(missing(t))
        { time <- sort(unique(c(ftime, time)))
          x <- timepoints(fit, time) }
      else x <- tfit
      col <- if(missing(col)) rep(1:(s-1), rep(g,(s-1))) else col
      lty <- if(missing(lty)) rep(1:g, s-1) else lty
      lwd <- if(missing(lwd)) rep(1, g*(s-1)) else lwd      
      matplot(time, base::t(x$est), type="s", ylim = c(0,1), 
              xlab = xlab, ylab = ylab, xaxs="i", yaxs="i", 
              col = col, lty = lty, lwd = lwd)
      legend("topleft", legend =  rownames(x$est), x.intersp = 2, 
             bty = "n", xjust = 1, col = col, lty = lty, lwd = lwd)
      out <- list(test = tests, est = tfit$est, se = sqrt(tfit$var))
    }
  else
    { if(level < 0 | level > 1) 
        error("level must be a value in the range [0,1]")
      # compute pointwise confidence intervals
      oldpar <- par(ask=TRUE)
      on.exit(par(oldpar))
      if(missing(t))
        { time <- sort(unique(c(ftime, time)))
          x <- timepoints(fit, time) }
      else x <- tfit
      z <- qnorm(1-(1-level)/2)
      lower <- x$est ^ exp(-z*sqrt(x$var)/(x$est*log(x$est)))
      upper <- x$est ^ exp(z*sqrt(x$var)/(x$est*log(x$est)))
      col <- if(missing(col)) rep(1:(s-1), rep(g,(s-1))) 
             else             rep(col, g*(s-1))
      lwd <- if(missing(lwd)) rep(1, g*(s-1)) 
             else             rep(lwd, g*(s-1))      
      # plot pointwise confidence intervals
      for(j in 1:nrow(x$est))
         { matplot(time, cbind(x$est[j,], lower[j,], upper[j,]), type="s", 
                   xlab = xlab, ylab = ylab, xaxs="i", yaxs="i", 
                   ylim = c(0,1), col = col[j], lwd = lwd[j], lty = c(1,3,3))
           legend("topleft", legend =  rownames(x$est)[j], bty = "n", xjust = 1) }
      # print pointwise confidence intervals
      i <- match(ttime, time)
      ci <- array(NA, c(2, length(i), nrow(lower)))
      ci[1,,] <- base::t(lower[,i])
      ci[2,,] <- base::t(upper[,i])
      dimnames(ci) <- list(c("lower", "upper"), ttime, rownames(lower))
      cat(paste("\n", level*100, "% pointwise confidence intervals:\n\n", sep=""))
      print(ci, digits = digits)
      out <- list(test = tests, est = x$est, se = sqrt(tfit$var), ci = ci)
    }
  # return results
  invisible(out)
}