Tópico 1 - Dados de Sobrevivência Intervalar

1.1 Ilustração - Estimador de Turnbull

require(survival)
source("https://docs.ufpr.br/~giolo/Livro/ApendiceE/Turnbull.R")
left<-c(0,1,4,5,5)
right<-c(5,8,9,8,9)
dat<-as.data.frame(cbind(left,right))
attach(dat)
right[is.na(right)] <- Inf
tau <- cria.tau(dat)
p  <- S.ini(tau=tau)
A  <- cria.A(data=dat,tau=tau)
tb <- Turnbull(p,A,dat)
tb
cbind(tb$time,tb$surv)

1.2 Estimador de Turnbull - Dados de Câncer de Mama

rm(list=ls())
require(survival)
source("https://docs.ufpr.br/~giolo/Livro/ApendiceE/Turnbull.R")
dat <- read.table('https://docs.ufpr.br/~giolo/CE063/Dados/breast.txt',header=T)
attach(dat)
dat1 <- dat[dat$ther==1,]
dat1$right[is.na(dat1$right)] <- Inf
tau <- cria.tau(dat1)
p <- S.ini(tau=tau)
A <- cria.A(data=dat1,tau=tau)
tb1 <- Turnbull(p,A,dat1)
tb1

dat1 <- dat[dat$ther==0,]
dat1$right[is.na(dat1$right)] <- Inf
tau <- cria.tau(dat1)
p <- S.ini(tau=tau)
A <- cria.A(data=dat1,tau=tau)
tb2 <- Turnbull(p,A,dat1)
tb2

plot(tb1$time,tb1$surv, lty=1, type="s", ylim=c(0,1), xlim=c(0,50), xlab="Tempo (meses)", ylab="S(t)")
lines(tb2$time,tb2$surv, lty=4, type="s")
legend(1,0.3, lty=c(1,4),c("Radioterapia","Radioterapia + Quimioterapia"), bty="n", cex=0.8)

pm<-ifelse(dat$cens==1,(dat$left+dat$right)/2,dat$left)
ekm<-survfit(Surv(pm,cens)~ther, type=c("kaplan-meier"), data=dat)
summary(ekm)

ekm<-survfit(Surv(pm,cens)~ther, type=c("kaplan-meier"), data=dat)
plot(tb1$time,tb1$surv, lty=1, type="s", ylim=c(0,1), xlim=c(0,50), xlab="Tempos (meses)", ylab="S(t)")
lines(tb2$time,tb2$surv,lty=2,type="s")
lines(ekm[1]$time,ekm[1]$surv,type="s",lty=3)
lines(ekm[2]$time,ekm[2]$surv,type="s",lty=3)
legend(1,0.3,lty=c(1,2), c("Radioterapia","Radioterapia + Quimioterapia"), bty="n", cex=0.8)
legend(1,0.21,lty=3, "Usando Ponto Médio dos Intervalos", bty="n",cex=0.8)

1.2.1 Testes para comparação de curvas de sobrevivência (package FHtest)

install.packages("FHtest")     
source("https://bioconductor.org/biocLite.R")
biocLite("Icens")
require(FHtest)
dat <- read.table('https://docs.ufpr.br/~giolo/CE063/Dados/breast.txt', header=T)

# two-sample test
FHtesticp(Surv(left, right, type="interval2")~ther, rho=0, data=dat) 
FHtesticp(Surv(left, right, type="interval2")~ther, rho=1, data=dat)
FHtesticp(Surv(left, right, type="interval2")~ther, data=dat, exact=TRUE)

1.3 Modelos de regressão paramétricos - Contexto de sobrevivência intervalar

breast<-read.table("https://docs.ufpr.br/~giolo/CE063/Dados/breast.txt", h=T)  
ei<-rnorm(94,0,0.001)
left<-abs(breast$left+ei)
breast$left<-left
attach(breast)
require(survival)
fit1<-survreg(Surv(left,right,type="interval2")~ther, breast, dist="weibull")
summary(fit1)

# Gráfico de S(t) obtida via Turnbull em 1.2 e S(t) obtida via modelo de regressão Weibull em 1.3  
plot(tb1$time,tb1$surv, lty=1,type="s", ylim=c(0,1), xlim=c(0,50), xlab="Tempo (meses)", ylab="S(t)")
lines(tb2$time,tb2$surv, lty=2, type="s")
t1<-0:50
b0<-fit1$coefficients[1]
b1<-fit1$coefficients[2]
s<- 1/fit1$scale
a1<- exp(b0+b1)
st1<-exp(-(t1/a1)^s)
t2<-0:50
a2<- exp(b0)
st2<-exp(-(t2/a2)^s)
lines(t1,st1, type="l", lty=4)
lines(t2,st2, type="l", lty=4)
legend(1,0.2, lty=c(1,2,4), c("Turnbull: Radioterapia","Turnbull: Radioterapia + Quimioterapia","Weibull"),
       lwd=1, bty="n", cex=0.8)

1.4 Modelo semiparamétrico de Cox - Contexto de dados de sobrevivência intervalar

 breast<-read.table("https://docs.ufpr.br/~giolo/Livro/ApendiceA/breast.txt", h=T) 
 attach(breast)
 require(icenReg)    # instalar previamente o pacote 
 fit1<- ic_sp(cbind(left, right) ~ ther, model = 'ph', bs_samples = 100, data = breast)
 summary(fit1)

1.5 Contexto de dados de sobrevivência grupados

1.5.1 Análise exploratória - dados mangueiras

mang<-read.table("https://docs.ufpr.br/~giolo/Livro/ApendiceA/mang.txt",h=T)  
attach(mang)
require(survival)

# K-M usando imputação: ti = limite superior se falha e ti = li se censura
ekm<-survfit(Surv(ti,cens)~1,conf.type="none")
summary(ekm)
plot(ekm)

# K-M usando imputação: ti = ponto médio se falha e ti = li se censura
ti1<-ifelse(is.na(ui),li,(li+ui)/2)
ekm1<-survfit(Surv(ti1,cens)~1,conf.type="none")
summary(ekm1)
 
source("https://docs.ufpr.br/~giolo/Livro/ApendiceE/Turnbull.R") 
mang$ui[is.na(mang$ui)] <- Inf

mang$right<-ui
mang$left<-li              
tau <- cria.tau(mang)

p <- S.ini(tau=tau)
A <- cria.A(data=mang,tau=tau)
tb1 <- Turnbull(p,A,mang)
tb1

plot(ekm1, col=2, lty=2, mark.time=F)
lines(c(tb1$time,21),c(tb1$surv[1:11],0.175, 0.175), type="s", lty=2, col=4)
legend(1,0.3, lty=c(2,2), col=c(2,4), c("Kaplan-Meier usando ponto médio","Turnbull"), bty="n", cex=0.8)

1.5.2 Modelos de regressão discretos - Dados Mangueiras

## Modelo de riscos proporcionais
mang1<-read.table("https://docs.ufpr.br/~giolo/CE063/Dados/dadmang.txt",h=T) 
attach(mang1)
require(survival)
fit1<-glm(y~-1+int1+int2+int3+int4+int5+int6+int7+int8+int9+int10+int11+int12+
                       factor(bloco,levels=5:1)+ factor(copa)+ factor(cavalo)+
                       factor(copa)*factor(cavalo), family=binomial(link="cloglog"))
anova(fit1,test="Chisq")
fit1$aic

fit2<-glm(y~-1+int1+int2+int3+int4+int5+int6+int7+int8+int9+int10+ int11+int12+
          factor(bloco,levels=5:1)+factor(copa), family=binomial(link="cloglog"))
anova(fit2, test="Chisq")
summary(fit2)
fit2$aic

## AUC
require(ROCR) 
pred <- prediction(fit2$fitted.values,y)
perf <- performance(pred,"tpr","fpr")      
plot(perf); abline(c(0,0), c(1,1), lty=3)
area <- performance(pred,"auc"); area

## Modelo Logístico
fit1 <-glm(y ~ -1+int1+int2+int3+int4+int5+int6+int7+int8+int9+int10+int11+int12+
                        factor(bloco,levels=5:1)+ factor(copa)+ factor(cavalo)+
                     factor(copa)*factor(cavalo), family=binomial(link="logit"))
anova(fit1, test="Chisq")

fit2<-glm(y ~ -1+int1+int2+int3+int4+int5+int6+int7+int8+int9+int10+int11+int12+
          factor(bloco,levels=5:1)+factor(copa), family=binomial(link="logit"))
anova(fit2, test="Chisq")
summary(fit2)
fit2$aic

## AUC
require(ROCR) 
pred <- prediction(fit2$fitted.values, y)
perf <- performance(pred,"tpr","fpr")      
plot(perf); abline(c(0,0), c(1,1), lty=3)
area <- performance(pred,"auc"); area

## Figura 8.6
mang1<-read.table("https://docs.ufpr.br/~giolo/CE063/Dados/dadmang.txt",h=T) 
attach(mang1)
fit1<-glm(y~-1+int1+int2+int3+int4+int5+int6+int7+int8+int9+int10+int11+int12+
      factor(bloco,levels=5:1)+as.factor(copa),family=binomial(link="cloglog"))
cf<-as.vector(fit1$coefficients[1:12])  #(gama_i)*
gi<-exp(-exp(cf))                       #(gama_i)
S0<-gi
 for(i in 1:11){
  S0[i+1]<-prod(gi[1:(i+1)])}
S0<-c(1,S0)                             # So(t)
cf1<-fit1$coefficients[18:22]
Sc1<-S0
Sc2<-(S0)^exp(cf1[1])
Sc3<-(S0)^exp(cf1[2])
Sc4<-(S0)^exp(cf1[3])
Sc5<-(S0)^exp(cf1[4])
Sc6<-(S0)^exp(cf1[5])
t<-c(0,2,3,4,10,12,14,15,16,17,18,19,21)
cbind(t,Sc1,Sc2,Sc3,Sc4,Sc5,Sc6)
plot(t,Sc1,type="s",lty=1,ylim=range(c(0,1)),xlab="Tempo de Vida (anos)",ylab="S(t) Estimada")
points(t,Sc1,pch=21)
lines(t,Sc2, type="s", lty=2); points(t,Sc2, pch=15)
lines(t,Sc3, type="s", lty=3); points(t,Sc3, pch=14)
lines(t,Sc4, type="s", lty=4); points(t,Sc4, pch=8) 
lines(t,Sc5, type="s", lty=5); points(t,Sc5, pch=16)
lines(t,Sc6, type="s", lty=6); points(t,Sc6, pch=17)
legend(1,0.5, lty=c(1,2,3,4,5,6), pch=c(21,15,14,8,16,17), c("Copa 1-Extrema","Copa 2-Oliveira",
       "Copa 3-Pahiri","Copa 4-Imperial","Copa 5-Carlota","Copa 6-Bourbon"), bty="n",cex=0.9)
title("Modelo de Riscos Proporcionais")

## Figura 8.7
mang1<-read.table("https://docs.ufpr.br/~giolo/CE063/Dados/dadmang.txt",h=T)
attach(mang1)
fit2<-glm(y~-1+int1+int2+int3+int4+int5+int6+int7+int8+int9+int10+int11+int12+
      factor(bloco,levels=5:1)+as.factor(copa), family=binomial(link="logit"))
cf<-fit2$coefficients[1:12]        #(gama_i)*
gi<-exp(cf)                        #(gama_i)
cf1<-fit2$coefficients[18:22]
qi1<-(1/(1+gi))
Slc1<-qi1
for(i in 1:11){
  Slc1[i+1]<-prod(qi1[1:(i+1)])}
Slc1<-c(1,Slc1)                   # S(t) copa 1
qi2<-(1/(1+gi*exp(cf1[1])))
Slc2<-qi2
for(i in 1:11){
  Slc2[i+1]<-prod(qi2[1:(i+1)])}
Slc2<-c(1,Slc2)                   # S(t) copa 2
qi3<-(1/(1+gi*exp(cf1[2])))
Slc3<-qi3
for(i in 1:11){
  Slc3[i+1]<-prod(qi3[1:(i+1)])}
Slc3<-c(1,Slc3)                   # S(t) copa 3
qi4<-(1/(1+gi*exp(cf1[3])))
Slc4<-qi4
for(i in 1:11){
  Slc4[i+1]<-prod(qi4[1:(i+1)])}
Slc4<-c(1,Slc4)                   # S(t) copa 4
qi5<-(1/(1+gi*exp(cf1[4])))
Slc5<-qi5
for(i in 1:11){
  Slc5[i+1]<-prod(qi5[1:(i+1)])}
Slc5<-c(1,Slc5)                   # S(t) copa 5
qi6<-(1/(1+gi*exp(cf1[5])))
Slc6<-qi6
for(i in 1:11){
  Slc6[i+1]<-prod(qi6[1:(i+1)])}
Slc6<-c(1,Slc6)                   # S(t) copa 6
t<-c(0,2,3,4,10,12,14,15,16,17,18,19,21)
cbind(t,Slc1,Slc2,Slc3,Slc4,Slc5,Slc6)
plot(t,Slc1, type="s", lty=1, ylim=range(c(0,1)), xlab="Tempo de Vida (anos)", ylab="S(t) Estimada")
points(t,Slc1,pch=21)
lines(t,Slc2, type="s", lty=2); points(t,Slc2, pch=15)
lines(t,Slc3, type="s", lty=3); points(t,Slc3, pch=14)
lines(t,Slc4, type="s", lty=4); points(t,Slc4, pch=8)
lines(t,Slc5, type="s", lty=5); points(t,Slc5, pch=16)
lines(t,Slc6, type="s", lty=6); points(t,Slc6, pch=17)
legend(1,0.5, lty=c(1,2,3,4,5,6), pch=c(21,15,14,8,16,17), c("Copa 1-Extrema","Copa 2-Oliveira",
       "Copa 3-Pahiri","Copa 4-Imperial","Copa 5-Carlota","Copa 6-Bourbon"), bty="n", cex=0.9)
title("Modelo Logístico") 

Tópico 2 - Modelos de Efeitos Mistos em Análise de Sobrevivência

2.1 Modelo semiparamétrico de fragilidade gama - contexto univariado

require(survival)
leucc<-read.table("https://docs.ufpr.br/~giolo/Livro/ApendiceA/leucc.txt",h=T) 
attach(leucc)
require(survival)
id<-1:103
fit3a<-coxph(Surv(tempos,cens)~leuinic + idadec + zpesoc + pasc + vacc + frailty(id, dist="gamma"),
             data=leucc, x = T, method="breslow")
fit3a$loglik
summary(fit3a)
 
par(mfrow=c(1,2))
rd<-resid(fit3a, type="deviance")       # resíduos deviance
rm<-resid(fit3a, type="martingale")     # resíduos martingal
pl<-fit3a$linear.predictors
plot(pl, rm, xlab="Preditor linear", ylab="Resíduo martingal", ylim=c(-2.5,2.5), pch=16)
plot(pl, rd, xlab="Preditor linear", ylab="Resíduo deviance" , ylim=c(-2.5,2.5), pch=16)
 
wi<-fit3a$frail
zi<-exp(wi)
plot(id,zi, xlab="Crianças (1 a 103)", ylab="zi estimados", pch=16)
abline(h=1,lty=2)

# AUC do modelo com fragilidade
require(survival)
require(survivalROC)
mod1<-coxph(Surv(tempos,cens)~leuinic+idadec+zpesoc+pasc+vacc+frailty(id,dist="gamma"), data=leucc, method="breslow")
pi_cox<-mod1$linear.predictors              # marcador Mi(t)
cut<-c(0.5,1.0,1.5,2.5,3.5,4.0)             # tempos fixados para predição das AUC
AUC<-matrix(0,6,3)                          # matriz com estimativas AUC(t)
par(mfrow=c(1,6))
for(i in 1:6){
 cutoff <- cut[i]
 ic.1= survivalROC(Stime=leucc$tempos, status=leucc$cens, marker = pi_cox, 
                   predict.time = cutoff, method="NNE", lambda=0.02)
 AUC[i,1]<-ic.1$AUC
 ic.2= survivalROC(Stime=leucc$tempos, status=leucc$cens, marker = pi_cox, 
                   predict.time = cutoff, method="KM")
 AUC[i,2]<-ic.2$AUC; AUC[i,3]<-cut[i]
 plot(ic.2$FP,ic.2$TP,  type="l", xlim=c(0,1), ylim=c(0,1))          # ROC método KM
 lines(ic.1$FP,ic.1$TP, type="l", xlim=c(0,1), ylim=c(0,1), col=2)   # ROC método NNE
}
colnames(AUC)<-c("AUC_NNE","AUC_KM","t"); AUC
 
# AUC do modelo sem fragilidade
mod1<-coxph(Surv(tempos,cens)~leuinic + idadec + zpesoc + pasc + vacc, data=leucc, method="breslow")
pi_cox<-mod1$linear.predictors              # marcador Mi(t)
cut<-c(0.5,1.0,1.5,2.5,3.5,4.0)             # tempos fixados para predição das AUC
AUC<-matrix(0,6,3)                          # matriz com estimativas AUC(t)
par(mfrow=c(1,6))
for(i in 1:6){
cutoff <- cut[i]
ic.1= survivalROC(Stime=leucc$tempos, status=leucc$cens, marker = pi_cox, 
                  predict.time = cutoff, method="NNE", lambda=0.02)
AUC[i,1]<-ic.1$AUC
ic.2= survivalROC(Stime=leucc$tempos, status=leucc$cens, marker = pi_cox, 
                  predict.time = cutoff, method="KM")
AUC[i,2]<-ic.2$AUC; AUC[i,3]<-cut[i]
plot(ic.2$FP,ic.2$TP,  type="l", xlim=c(0,1), ylim=c(0,1))            # ROC método KM
lines(ic.1$FP,ic.1$TP, type="l", xlim=c(0,1), ylim=c(0,1), col=2)     # ROC método NNE
}
colnames(AUC)<-c("AUC_NNE","AUC_KM","t") 
AUC

2.2 Modelo semiparamétrico de fragilidade gama - contexto multivariado

cattle<-read.table("https://docs.ufpr.br/~giolo/Livro/ApendiceA/cattle.txt",h=T)
attach(cattle)
require(survival)
fit1<-coxph(Surv(tempo,censura)~factor(sex) + agedam + frailty(sire, dist="gamma"), data=cattle)
summary(fit1)
fit2<-coxph(Surv(tempo,censura)~factor(sex) + frailty(sire, dist="gamma"), data=cattle)
summary(fit2)

H0<-basehaz(fit2,centered=F)
S0<-exp(-H0$hazard)
S3m<-S0^(1.798*exp(0.797))    # machos touro 3
S3f<-S0^(1.798)               # fêmeas touro 3
S1m<-S0^(0.767*exp(0.797))    # machos touro 1
S1f<-S0^(0.767)               # fêmeas touro 1

par(mfrow=c(1,2))
t<-H0$time
plot(t,S1m, type="s", ylim=range(c(0,1)), xlab="Tempo (dias)", ylab="Sobrevivência Estimada")
lines(t,S1f,type="s", lty=4)
legend(142,0.25, lty=c(1,4),c("Machos", "Fêmeas"), bty="n", cex=0.8)
title("Touro 1")
plot(t,S3m, type="s", ylim=range(c(0,1)), xlab="Tempo (dias)", ylab="Sobrevivência Estimada")
lines(t,S3f,type="s", lty=4)
legend(142,0.25, lty=c(1,4),c("Machos", "Fêmeas"), bty="n", cex=0.8)
title("Touro 3")

## AUC do modelo ajustado
require(survivalROC)
pi_cox<-fit2$linear.predictors              # marcador Mi(t)
cut<-c(160,170,180,190,200,210)             # tempos fixados para predição das AUC
AUC<-matrix(0,6,3)                          # matriz com estimativas AUC(t)
par(mfrow=c(1,6))
for(i in 1:6){
cutoff <- cut[i]
ic.1= survivalROC(Stime=cattle$tempo, status=cattle$censura, marker = pi_cox, 
                  predict.time = cutoff, method="NNE", lambda=0.01)
AUC[i,1]<-ic.1$AUC
ic.2= survivalROC(Stime=cattle$tempo, status=cattle$censura, marker = pi_cox, 
                  predict.time = cutoff, method="KM")
AUC[i,2]<-ic.2$AUC; AUC[i,3]<-cut[i]
plot(ic.2$FP,ic.2$TP,  type="l", xlim=c(0,1), ylim=c(0,1))            # ROC método KM
lines(ic.1$FP,ic.1$TP, type="l", xlim=c(0,1), ylim=c(0,1), col=2)     # ROC método NNE
}
colnames(AUC)<-c("AUC_NNE","AUC_KM","t") 
AUC

2.3 Modelo semiparamétrico de fragilidade normal com estrutura de covariância (simples e parentesco)

library(coxme)
library(kinship2)
data(minnbreast)
names(minnbreast)
with(minnbreast, table(sex, cancer, exclude=NULL))
mped <- with(minnbreast, pedigree(id, fatherid, motherid, sex, affected=cancer, famid=famid,
             status=proband))
plot(mped["8"])
 
minnfemale <- minnbreast[minnbreast$sex == 'F' & !is.na(minnbreast$sex),]
fit1 <- coxme(Surv(endage, cancer) ~ I(parity>0) + (1|famid), data=minnfemale, subset=(proband==0))
fit1
 
kmat <- kinship(mped)        
fit2 <- coxme(Surv(endage, cancer) ~ I(parity>0) +(1|id), data=minnfemale, varlist=coxmeMlist(2*kmat,rescale=F), subset=(proband==0))
print(fit2)
plot(fit2$frail$id, pch=16); abline(h=0, lty=3)
 
# AUC do Modelo 2 - fit2 (matriz de parentesco)
a<-as.data.frame(fit2$frail$id)
a1<-dimnames(a)[1]
id.line<-as.vector(a1[[1]])
dat <- minnfemale[id.line, ]                # dat = somente os dados considerados na análise (n = 9421)
 
require(survivalROC)
pi_cox<-fit2$linear.predictor               # marcador Mi(t)
cut<-c(40,50,60,75,85,100)                  # tempos fixados para predição das AUC
AUC<-matrix(0,6,3)                          # matriz com estimativas AUC(t)
par(mfrow=c(1,6))
for(i in 1:6){
cutoff <- cut[i]
ic.1= survivalROC(Stime=dat$endage, status=dat$cancer, marker = pi_cox, predict.time = cutoff, method="NNE", lambda=0.005)
AUC[i,1]<-ic.1$AUC
ic.2= survivalROC(Stime=dat$endage, status=dat$cancer, marker = pi_cox, predict.time = cutoff, method="KM")
AUC[i,2]<-ic.2$AUC; AUC[i,3]<-cut[i]
plot(ic.2$FP,ic.2$TP,  type="l", xlim=c(0,1), ylim=c(0,1))            # ROC método KM
lines(ic.1$FP,ic.1$TP, type="l", xlim=c(0,1), ylim=c(0,1), col=2)     # ROC método NNE
}
colnames(AUC)<-c("AUC_NNE","AUC_KM","t") 
AUC
 
# AUC do Modelo 1 - fit1 (matriz simples)
require(survivalROC)
pi_cox<-fit1$linear.predictor               # marcador Mi(t)
cut<-c(40,50,60,75,85,100)                  # tempos fixados para predição das AUC
AUC<-matrix(0,6,3)                          # matriz com estimativas AUC(t)
par(mfrow=c(1,6))
for(i in 1:6){
cutoff <- cut[i]
ic.1= survivalROC(Stime=dat$endage, status=dat$cancer, marker = pi_cox, 
                  predict.time = cutoff, method="NNE", lambda=0.005)
AUC[i,1]<-ic.1$AUC
ic.2= survivalROC(Stime=dat$endage, status=dat$cancer, marker = pi_cox, 
                  predict.time = cutoff, method="KM")
AUC[i,2]<-ic.2$AUC; AUC[i,3]<-cut[i]
plot(ic.2$FP,ic.2$TP,  type="l", xlim=c(0,1), ylim=c(0,1))            # ROC método KM
lines(ic.1$FP,ic.1$TP, type="l", xlim=c(0,1), ylim=c(0,1), col=2)     # ROC método NNE
}
colnames(AUC)<-c("AUC_NNE","AUC_KM","t") 
AUC
require(survival)
leucc<-read.table("https://docs.ufpr.br/~giolo/Livro/ApendiceA/leucc.txt",h=T) 
attach(leucc)
require(survival)
id<-1:103
fit3a<-coxph(Surv(tempos,cens)~leuinic + idadec + zpesoc + pasc + vacc + frailty(id, dist="gamma"),
             data=leucc,x = T, method="breslow")
fit3a$loglik
summary(fit3a)
 
par(mfrow=c(1,2))
rd<-resid(fit3a, type="deviance")       # resíduos deviance
rm<-resid(fit3a, type="martingale")     # resíduos martingal
pl<-fit3a$linear.predictors
plot(pl, rm, xlab="Preditor linear", ylab="Resíduo martingal", ylim=c(-2.5,2.5), pch=16)
plot(pl, rd, xlab="Preditor linear", ylab="Resíduo deviance" , ylim=c(-2.5,2.5), pch=16)
 
wi<-fit3a$frail
zi<-exp(wi)
plot(id,zi, xlab="Crianças (1 a 103)", ylab="zi estimados", pch=16)
abline(h=1,lty=2)


# AUC do modelo com fragilidade
require(survival)
require(survivalROC)
mod1<-coxph(Surv(tempos,cens)~leuinic + idadec + zpesoc + pasc + vacc + frailty(id, dist="gamma"), 
            data=leucc, method="breslow")
pi_cox<-mod1$linear.predictors              # marcador Mi(t)
cut<-c(0.5,1.0,1.5,2.5,3.5,4.0)             # tempos fixados para predição das AUC
AUC<-matrix(0,6,3)                          # matriz com estimativas AUC(t)
par(mfrow=c(1,6))
for(i in 1:6){
cutoff <- cut[i]
ic.1= survivalROC(Stime=leucc$tempos, status=leucc$cens, marker = pi_cox, 
                  predict.time = cutoff, method="NNE", lambda=0.02)
AUC[i,1]<-ic.1$AUC
ic.2= survivalROC(Stime=leucc$tempos, status=leucc$cens, marker = pi_cox, 
                  predict.time = cutoff, method="KM")
AUC[i,2]<-ic.2$AUC; AUC[i,3]<-cut[i]
plot(ic.2$FP,ic.2$TP,  type="l", xlim=c(0,1), ylim=c(0,1))          # ROC método KM
lines(ic.1$FP,ic.1$TP, type="l", xlim=c(0,1), ylim=c(0,1), col=2)   # ROC método NNE
}
colnames(AUC)<-c("AUC_NNE","AUC_KM","t") 
AUC
 
# AUC do modelo sem fragilidade
mod1<-coxph(Surv(tempos,cens)~leuinic + idadec + zpesoc + pasc + vacc, data=leucc, method="breslow")
pi_cox<-mod1$linear.predictors              # marcador Mi(t)
cut<-c(0.5,1.0,1.5,2.5,3.5,4.0)             # tempos fixados para predição das AUC
AUC<-matrix(0,6,3)                          # matriz com estimativas AUC(t)
par(mfrow=c(1,6))
for(i in 1:6){
cutoff <- cut[i]
ic.1= survivalROC(Stime=leucc$tempos, status=leucc$cens, marker = pi_cox, 
                  predict.time = cutoff, method="NNE", lambda=0.02)
AUC[i,1]<-ic.1$AUC
ic.2= survivalROC(Stime=leucc$tempos, status=leucc$cens, marker = pi_cox, 
                  predict.time = cutoff, method="KM")
AUC[i,2]<-ic.2$AUC; AUC[i,3]<-cut[i]
plot(ic.2$FP,ic.2$TP, type="l", xlim=c(0,1), ylim=c(0,1))            # ROC método KM
lines(ic.1$FP,ic.1$TP,type="l", xlim=c(0,1), ylim=c(0,1), col=2)     # ROC método NNE
}
colnames(AUC)<-c("AUC_NNE","AUC_KM","t") 
AUC

Tópico 3 - Modelos para Dados com Sobreviventes de Longa Duração

3.1 Modelo tempo de promoção considerando M ~ Poisson, T~Weibull e link=logito (package gamlss)

require(timereg)
data(melanoma)
attach(melanoma)
 
Status<-ifelse(status==1, 1, 0)    
espessura<- thick/100 
esp<- ifelse(espessura > 2,1,0)        # <= 2mm = 0 e > 2mm = 1
tempos<-days/365                                    
 
data2 <- as.data.frame(cbind(ulc,esp,sex,tempos,Status))
attach(data2)
 
# Kaplan-Meier Geral
ekm <- survfit(Surv(tempos,Status) ~ 1, data=data2)
plot(ekm, mark.time=F, xlab="tempo (em anos)", ylab="S(t)-KM ")
 
# Kaplan-Meier por Covariável
ekm1 <- survfit(Surv(tempos,Status) ~ esp, data=data2)
summary(ekm1)
survdiff(Surv(tempos, Status)~ esp, rho=0)
 
par(mfrow=c(1,3))
plot(ekm1, mark.time=F,lty=c(2,1), xlab="Tempo(anos)", ylab="S(t) estimada")
legend(1,0.4, lty=c(2,1), c("esp <= 2mm","esp  > 2mm"), lwd=1, bty="n")
legend(0.6,0.25, c("Teste logrank: p < 0,001"), bty="n")
title("Espessura")
 
ekm2 <- survfit(Surv(tempos,Status) ~ ulc, data=data2)
summary(ekm2)
survdiff(Surv(tempos, Status)~ ulc, rho=0)
plot(ekm2, mark.time=F,lty=c(2,1), xlab="Tempo(anos)", ylab="S(t) estimada")
legend(1,0.4, lty=c(2,1), c("presença","ausência" ), lwd=1, bty="n")
legend(0.6,0.25, c("Teste logrank: p < 0,001"), bty="n")
title("Ulceração")
 
ekm3 <- survfit(Surv(tempos,Status) ~ sex, data=data2)
summary(ekm3)
survdiff(Surv(tempos, Status)~ sex, rho=0)
plot(ekm3, mark.time=F,lty=c(2,1), xlab="Tempo(anos)",ylab="S(t) estimada")
legend(1,0.4, lty=c(2,1), c("feminino","masculino"),lwd=1,bty="n")
legend(0.6,0.25, c("Teste logrank: p < 0,011"),bty="n")
title("Sexo")
 
# Modelo Tempo de Promoção (Poisson-Logito-Weibull)
require(gamlss)
require(gamlss.cens)
source("https://docs.ufpr.br/~giolo/CE063/Dados/models-cmpb.R.txt") 
 
fit1<-gamlss(Surv(tempos, Status) ~ ulc, family = cens(POWEI4), nu.formula = ~ ulc + esp + sex,
      data = data2, c.crit=0.001, n.cyc =1000)
summary(fit1)
plot(fit1) 
 
par(mfrow=c(1,1))
wp(fit1)
 
# fornece a probabilidade de ser imune ao evento dado zi
fit1$nu.fv                     
cbind(data2, fit1$nu.fv)
 
par(mfrow=c(1,4))
plot(fit1$nu.fv, ylim=c(0,1), pch=16, ylab="prob de cura")
plot(data2$ulc, fit1$nu.fv, pch=16, ylim=c(0,1), xlab="ulc", ylab="prob. de cura")
plot(data2$esp, fit1$nu.fv, pch=16, ylim=c(0,1), xlab="esp", ylab="prob. de cura")
plot(data2$sex, fit1$nu.fv, pch=16, ylim=c(0,1), xlab="sex", ylab="prob. de cura")
 
# média geral de proporção de imunes ao evento e IC95% 
mg<-mean(fit1$nu.fv)       
li<- mg-1.96*sqrt((mg*(1-mg))/205)
ls<- mg+1.96*sqrt((mg*(1-mg))/205)
cbind(mg,li,ls)
 
# Obs: no gamlss tem-se para a WEI4: Spop(t|x,z) = (pi(z))^[1- exp(-t^gamma1(x)*exp(gamma2)]

# Spop para ulc = 0, sex = 0 e esp = 0
par(mfrow=c(1,2))
data4<-subset(data2, ulc==0 & sex==0 & esp==0)
ekm1<- survfit(Surv(tempos,Status)~1, data=data4)
plot(ekm1, mark.time=F, conf.int=F, xlab="tempos", ylab="Sp(t|x,z)")
t<-0.01:15
Spop1<-(0.9033878)^(1-exp((-t^1.802546)*exp(-2.98163)))
lines(t,Spop1, col=2, type="l")
 
## Spop para ulc = 1, sex = 1 e esp = 1
data4<-subset(data2, ulc==1 & sex==1 & esp==1)
ekm1<- survfit(Surv(tempos,Status)~1, data=data4)
plot(ekm1, mark.time=F, conf.int=F, xlab="tempos", ylab="Sp(t|x,z)")
t<-0.01:15
Spop1<-(0.2310981)^(1-exp((-t^1.631008)*exp(-2.98163)))
lines(t,Spop1, col=2, type="l")
 
# No gamlss tem-se para WEI4:  S(t|x) = exp(-t^gamma1(x)*exp(gamma2))
par(mfrow=c(1,1))
t<-0.01:15
St1<-exp(-t^1.802546*exp(-2.98163))
plot(t,St1,type="l", col=2, xlab="Tempos", ylab="S(t| x)")
St2<-exp(-t^1.631008*exp(-2.98163))
lines(t,St2,type="l", col=4)
legend(6,0.8,c("ulc=1", "ulc=0"), lty=c(1,1), col=c(2,4), bty="n")

3.1.1 Modelo tempo de promoção considerando M ~ BN, T~Weibull e link=logito (package gamlss)

# Modelo Tempo de Promoção (BN-Logito-Weibull)
fit2<-gamlss(Surv(tempos, Status) ~ ulc, family = cens(NBWEI4), nu.formula = ~ ulc + esp + sex,
      data = data2, c.crit=0.001, n.cyc =1000)
summary(fit2)
plot(fit2) 
wp(fit2)
fit2$nu.fv          

3.1.2 Modelo de Mistura como caso particular do modelo em que M ~ BN (package gamlss)

# Modelo de Mistura (Bernoulli-Logito-Weibull)
fit3 <-gamlss(Surv(tempos, Status) ~ ulc, family = cens(NBWEI4), nu.formula = ~ ulc + esp + sex, 
       data = data2, c.crit = 0.001, n.cyc = 1000, tau.start = -1, tau.fix = TRUE)
summary(fit3)
plot(fit3) 
wp(fit3)
fit3$nu.fv        

4.1 Modelo de fragilidade

require(survival)
require(parfm)
dados<-read.table("https://docs.ufpr.br/~giolo/CE063/Dados/kidney_1.txt", h=T)
dados$sex <- dados$sex - 1   # 0 = male; 1 = female
attach(dados)
 
summary(age)
summary(time)
summary(stop)
table(disease)/2
table(sex)/2
 
ekm<-survfit(Surv(time,status)~sex, data=dados)
summary(ekm)
ekm1<- survfit(Surv(time,status)~disease, data=dados)
summary(ekm1)
 
par(mfrow=c(1,2))
plot(ekm, col=c(1,4), mark.time=F, xlab="Tempos (dias)", ylab="S(t)")
legend(300,0.8, lty=c(1,1), col=c(1,4), c("masculino","feminino"), lwd=1, bty="n", cex=1.0)
plot(ekm1, col=c(1,2,3,4), mark.time=F, xlab="Tempos (dias)", ylab="S(t)")
legend(300,0.8, lty=c(1,1,1,1), col=c(1,2,3,4), c("Other","GN","AN","PKD"), lwd=1, bty="n", cex=1.0)
 
# Modelo Semiparamétrico de fragilidade gama
mod1<-coxph(Surv(time, status) ~ sex + age + frailty(id, dist="gamma"), data=dados, method="breslow")
summary(mod1)
mod1<-coxph(Surv(time, status) ~ sex + frailty(id, dist="gamma"), data=dados, method="breslow")
summary(mod1)
aic<-mod1$loglik-2*2
 
par(mfrow=c(1,2))
rd<-resid(mod1,type="deviance")       # resíduos deviance
rm<-resid(mod1,type="martingale")     # resíduos martingal
pl<-mod1$linear.predictors
plot(pl,rm, xlab="Preditor linear", ylab="Resíduo martingal", ylim=c(-3,3), pch=16)
plot(pl,rd, xlab="Preditor linear", ylab="Resíduo deviance" , ylim=c(-3,3), pch=16)
 
wi<-mod1$frail
zi<-exp(wi)
plot(zi[order(zi)], pch=16, xlab="Pacientes", ylab="zi estimados", xaxt="n")
xlabs <- order(zi)
axis(1, c(1:38), xlabs, cex.axis= 0.8, las=3)
abline(h=1, lty=2)
 
# AUC do modelo semiparamétrico com fragilidade gama
require(survival)
require(survivalROC)
mod1<-coxph(Surv(time, status) ~ sex + frailty(id, dist = "gamma"), data = dados, method="breslow")
pi_cox<-mod1$linear.predictors              # marcador Mi(t)
cut<-c(50,100,150,200,300,400)              # tempos fixados para predição das AUC
AUC<-matrix(0,6,3)                          # matriz com estimativas AUC(t)
par(mfrow=c(1,6))
for(i in 1:6){
cutoff <- cut[i]
ic.1= survivalROC(Stime=dados$time, status=dados$status, marker = pi_cox, predict.time = cutoff, method="NNE", lambda=0.01)
AUC[i,1]<-ic.1$AUC
ic.2= survivalROC(Stime=dados$time, status=dados$status, marker = pi_cox, predict.time = cutoff, method="KM")
AUC[i,2]<-ic.2$AUC; AUC[i,3]<-cut[i]
plot(ic.2$FP,ic.2$TP, type="l", xlim=c(0,1), ylim=c(0,1))          # ROC método KM
lines(ic.1$FP,ic.1$TP,type="l", xlim=c(0,1), ylim=c(0,1), col=2)   # ROC método NNE
}
colnames(AUC)<-c("AUC_NNE","AUC_KM","t"); AUC
 
# Modelo Paramétrico

## Avaliando a distribuição de T e de Zi
kidney.parfm <- select.parfm(Surv(time, status) ~ sex, cluster = "id", data = dados, dist = c("exponential",
                "weibull", "loglogistic", "lognormal"), frailty = c("gamma", "ingau", "possta"))
kidney.parfm; plot(kidney.parfm)

## Ajustando o modelo selecionado (T~Exp e Zi~Gama)
mod<-parfm(Surv(time, status) ~ sex, cluster = "id", data = dados, dist="exponential", frailty="gamma")
mod
ci.parfm(mod, level = 0.05)["sex", ]
zi <- predict(mod)                          # frailty prediction
par(mfrow=c(1,1))
plot(zi)

4.2 Modelo AG (Andersen e Gill)

require(survival)
dados<-read.table("https://docs.ufpr.br/~giolo/CE063/Dados/kidney_1.txt", h=T)

agfit <- coxph(Surv(start, stop, status) ~ sex + age + cluster(id), data = dados)
summary(agfit)
agfit <- coxph(Surv(start, stop, status) ~ sex + cluster(id), data = dados)
summary(agfit)

par(mfrow=c(1,2))
rd<-resid(agfit, type="deviance")       # resíduos deviance
rm<-resid(agfit, type="martingale")     # resíduos martingal
pl<-agfit$linear.predictors
plot(pl,rm, xlab="Preditor linear", ylab="Resíduo martingal", ylim=c(-3,3), pch=16)
plot(pl,rd, xlab="Preditor linear", ylab="Resíduo deviance" , ylim=c(-3,3), pch=16)

H0<-basehaz(agfit, centered=F)          # risco acumulado de base
H0
tempo<- H0$time 
H0f<-H0$hazard*exp(-0.8866)
H0m<-H0$hazard                        
par(mfrow=c(1,1))
plot(tempo,H0f, lty=4, type="s", xlab="Tempos", ylab=expression(Lambda[0]*(t)), ylim=c(0,11))
lines(tempo,H0m, type="s", lty=1)
legend(100,8, lty=c(4,1), c("Feminino","Masculino"), lwd=1, bty="n", cex=1.0)

4.3 Modelo PWP (Prentice, Williams e Petersen)

# (A) Taxa de falha de base evento-específico e efeito comum das covariáveis 

pwpfit <- coxph(Surv(start, stop, status) ~ sex + age + cluster(id) + strata(even), data = dados)
summary(pwpfit)
pwpfit <- coxph(Surv(start, stop, status) ~ sex + cluster(id) + strata(even), data = dados)
summary(pwpfit)

par(mfrow=c(1,2))
rd<-resid(pwpfit, type="deviance")       # resíduos deviance
rm<-resid(pwpfit, type="martingale")     # resíduos martingal
pl<-pwpfit$linear.predictors
plot(pl,rm, xlab="Preditor linear", ylab="Resíduo martingal", ylim=c(-3,3), pch=16)
plot(pl,rd, xlab="Preditor linear", ylab="Resíduo deviance" , ylim=c(-3,3), pch=16)

H0<-basehaz(pwpfit,centered=F)           # risco acumulado de base
H0
H01<-as.matrix(H0[1:34,1])               # risco acumulado de base do estrato 1 (even=1)
H02<-as.matrix(H0[35:71,1])              # risco acumulado de base do estrato 2 (even=2)
tempo1<- H0$time[1:34]                   # tempos do estrato 1
tempo2<- H0$time[35:71]                  # tempos do estrato 2
 
par(mfrow=c(1,1))
plot(tempo2,H02, lty=4, type="s", xlab="Tempos", ylab=expression(Lambda[0]*(t)))
lines(tempo1,H01,type="s",lty=1)
legend(500,5, lty=c(1,4), c("Evento 1","Evento 2"), lwd=1, bty="n", cex=0.8)

# (B) Taxa de falha de base evento-específico e efeito das covariáveis evento-específico

pwpfit1 <- coxph(Surv(start, stop, status) ~ sex*strata(even) + cluster(id), data = dados)
summary(pwpfit1)

par(mfrow=c(1,2))
rd<-resid(pwpfit1, type="deviance")       # resíduos deviance
rm<-resid(pwpfit1, type="martingale")     # resíduos martingal
pl<-pwpfit$linear.predictors
plot(pl,rm, xlab="Preditor linear", ylab="Resíduo martingal", ylim=c(-3,3), pch=16)
plot(pl,rd, xlab="Preditor linear", ylab="Resíduo deviance" , ylim=c(-3,3), pch=16)
 
# Se gap times
pwpfit <- coxph(Surv(time, status) ~ sex + cluster(id) + strata(even), data = dados)
summary(pwpfit)

pwpfit <- coxph(Surv(time, status) ~ sex*strata(even) + cluster(id), data = dados)
summary(pwpfit)

Tópico 5 - Modelos para Dados de Sobrevivência com Estrutura de Riscos Competitivos

5.1 Estimador não paramétrico da função de incidência acumulada (FIA)

require(cmprsk)
t<-c(128,80,61,24,113,65,106,37,80,49)
cause<-c(1,1,2,2,0,3,2,0,3,1)
    
fit<-cuminc(t, cause)        # var = variância assintótica de Aalen (1978)
fit
timepoints(fit, times=c(0,24,37,49,61,65,80,106,113,128))
plot.cuminc(fit, xlab="Days", ylab="CIF", curvlab=c("Cause 1", "Cause 2", "Cause 3"))

# ou usando a função CumIncidence
require(cmprsk)
source("https://docs.ufpr.br/~giolo/CE063/Dados/CumIncidence.R.txt")

t<-c(128,80,61,24,113,65,106,37,80,49)
cause<-c(1,1,2,2,0,3,2,0,3,1)
fit=CumIncidence(t,cause, xlab="days", t=c(0,24,37,49,61,65,80,106,113,128))
fit

Ft<-fit$est[1,] + fit$est[2,] + fit$est[3,]
tt<-c(0,24,37,49,61,65,80,106,113,128)
estim<-as.data.frame(cbind(tt, Ft, fit$est[1,],fit$est[2,],fit$est[3,]))
names(estim)<-c("tt","Ft","F1","F2","F3")
estim

plot(estim$tt,  estim$Ft, type="s", col=1, xlab="Tempos", ylab="F(t) e Fk(t)")
lines(estim$tt, estim$F1, type="s", col=2, lty=2)
lines(estim$tt, estim$F2, type="s", col=4, lty=2)
lines(estim$tt, estim$F3, type="s", col=3, lty=3)
legend(10,0.8, c("qualquer causa","causa 1", "causa 2", "causa 3"), col=c(1,2,4,3), lty=c(1,2,2,3), bty="n")

5.1.1 Ilustração - Estimador não paramétrico e teste de comparação entre FIA’s

bmt<-read.table("https://docs.ufpr.br/~giolo/CE063/Dados/bmt.txt", h=T)
attach(bmt)
dis<-factor(dis, levels=c(0,1), labels= c("ALL", "AML"))
table(dis, status) 
tapply(ftime, list(dis, status), mean)

require(cmprsk) 
fit1<- cuminc(ftime, status, dis, rho=0, cencode=0)
fit1
timepoints(fit1, times=c(0,10,20,30,40,50,60,70))
plot.cuminc(fit1, xlab="Meses", ylab="FIA", lty=c(1,2,1,2), col=c(1,1,2,2), curvlab=c("ALL - TRM", 
            "AML - TRM", "ALL - Recaída", "AML - Recaída")) 

# ou usando a função CumIncidence
require(cmprsk) 
source("https://docs.ufpr.br/~giolo/CE063/Dados/CumIncidence.R.txt")
fit<-CumIncidence(ftime, status, dis, cencode=0, xlab="Meses", ylab="FIA por doença")

## curvas estimadas com intervalos de confiança
par(mfrow=c(2,2))
fit<-CumIncidence(ftime,status, dis, cencode=0, xlab="Meses", ylab="FIA", 
                  t=c(0,3,6,9,12,24,36,48,60,70), level = 0.95)

5.3 Exemplo - Dados Linfoma de células foliculares (follic.txt)

5.3.1 - Análise ecploratória

follic<-read.table("https://docs.ufpr.br/~giolo/CE063/Dados/follic.txt", h=T)
attach(follic)
require(cmprsk)
fia1<-cuminc(time, cause, rho=0, cencode=0)
fia1
plot(fia1, lty=1, col=1:2, xlab="Tempo (anos)", ylab="Fk(t)", curvlab=c("Causa 1", "Causa 2"))

fia2<-cuminc(time, cause, stage, rho=0, cencode=0)
fia2

fia3<-cuminc(time, cause, trat, rho=0, cencode=0)
fia3

agec<-ifelse(age<58,0,1)
fia4<-cuminc(time, cause, agec, rho=0, cencode=0)
fia4

hgbc<-ifelse(hgb<140,0,1)
fia5<-cuminc(time, cause, hgbc, rho=0, cencode=0)
fia5

par(mfrow=c(1,4))
plot(fia2,col=c(1,1,2,2), xlab="Tempo (anos)", ylab="Fk(t|x)", curvlab=c("Est 1 - Causa 1",
    "Est 2 - Causa 1", "Est 1 - Causa 2","Est 2 - Causa 2"), main="Estágio")
plot(fia3,col=c(1,1,2,2), xlab="Tempo (anos)", ylab="Fk(t|x)", curvlab=c("R+Q - Causa 1",
    "Rad  - Causa 1", "R+Q - Causa 2","Rad  - Causa 2"), main="Tratamento")
plot(fia4,col=c(1,1,2,2), xlab="Tempo (anos)", ylab="Fk(t|x)", curvlab=c("< 58 - Causa 1",
    "> 58 - Causa 1", "< 58 - Causa 2","> 58 - Causa 2"), main="Idade")
plot(fia5,col=c(1,1,2,2), xlab="Tempo (anos)", ylab="Fk(t|x)", curvlab=c("< 140 - Causa 1",
     "> 140 - Causa 1", "< 140 - Causa 2","> 140 - Causa 2"), main="Hemoglobina")

5.3.2 - Ajuste do modelo de Fine-Gray

x=cbind(age, stage)
mod2 <- crr(time, cause, x)
summary(mod2)

names(mod2)
mod2$loglik.null
mod2$loglik
aic<- -2*mod2$loglik + 2*length(mod2$coef); aic

par(mfrow=c(1,2))
z.p <- predict(mod2,rbind(c(58,1),c(58,2)))
plot(z.p, lty=1:2, color=1:2, ylim=c(0,1), xlab="Tempo (anos)", ylab="F(t |x) - Causa 1")
legend(2,0.9,lty=1:2,col=1:2, c("age = 58, stage = 1","age = 58, stage = 2"),bty="n")

z.p <- predict(mod2,rbind(c(58,1),c(58,2),c(20,1),c(20,2)))
plot(z.p, lty=c(1,2,1,2), col=c(2,2,4,4), ylim=c(0,1), xlab="Tempo (anos)", ylab="F(t |x) - Causa 1")
legend(2,0.99, lty=c(1,2,1,2), col=c(2,2,4,4), c("age 58, stage 1","age 58, stage 2","age 20, stage 1",
      "age 20, stage 2"), bty="n")

5.3.3 Suposição de proporcionalidade  

names(mod2$coef)=c("Idade", "Estágio Clínico") 
par(mfrow = c(1,2)) 
for(j in 1:ncol(mod2$res)) 
scatter.smooth(mod2$uft, mod2$res[,j], lpars=list(col='red', lwd=3), main = names(mod2$coef)[j],  
               xlab = 'Tempo (anos)', ylab = 'Resíduos de Schoenfeld')

Tópico 6 - Modelos para Dados de Sobrevivência Multiestados


6.1 Estimador não paramétrico - Ausência de Covariáveis

# Autor do script: Henrique Laureano 

require(mstate)
follic <- read.table("https://docs.ufpr.br/~giolo/CE063/Dados/follic1.txt", header = TRUE, sep = " ", 
          colClasses = c("numeric", "factor", rep("integer", 2), "factor", "integer", rep("factor", 3), 
         "numeric", "factor", "numeric", rep("factor",3)))
follic <- follic[, c(15, 5, 8, 1, 3, 14, 7, 10, 12, 13, 11)]
names(follic) <-c("id","estc","radq","idade","hem","resp","rec","survtime","dftime","cens","stat")
levels(follic$radq)[1] <- "N"
levels(follic$rec)[1]  <- "S"
summary(follic[ , 1:5])
summary(follic[ , 6:11])
follic[follic$id == "271", "rec"] <- "S"
follic[follic$id %in% c(72, 160, 383), "dftime"] <- follic[follic$id %in% c(72, 160, 383), "survtime"]
follic$idade2 <- as.factor(ifelse(follic$idade < median(follic$idade), "1", "2"))
follic$hem2 <- as.factor(ifelse(follic$hem < median(follic$hem), "1", "2"))

dmstate <- follic
# estado 1 (cr)
dmstate$cr <- ifelse(follic$resp == "CR", 1, 0)
# estado 2 (nr)
dmstate$nr <- ifelse(follic$resp == "NR", 1, 0)
# estado 3 (recaida local)
dmstate$l <- ifelse(follic$rec == "L", 1, 0)
# estado 4 (recaida distante)
dmstate$d <- ifelse(follic$rec == "D", 1, 0)
# estado 5 (recaida em ambos: local e distante)
dmstate$b <- ifelse(follic$rec == "B", 1, 0)
# estado 6 (morte)
dmstate$morte <- ifelse(follic$stat == "1", 1, 0)

# tmat: matriz de transições
tmat <- transMat(list(c(3, 4, 5, 6), c(6), c(6), c(6), c(6), c()), names = as.character(1:6))
# dados no format longo (msprep)
dmstate <- msprep(dmstate, trans = tmat
                  , time = c(NA, NA, "dftime", "dftime", "dftime", "survtime")
                  , status = c("cr", "nr", "l", "d", "b", "morte")
                  , start = list(state = ifelse(follic$resp == "CR", 1, 2), time = rep(0, 541))
                  , keep = c("estc", "radq", "idade2", "hem2"))
dmstate[1871,6]<- dmstate[1871,6]+0.000001

# transições observadas - frequências 
events(dmstate)

# possíveis caminhos/trajetórias
paths(tmat, start=1)  

# ajuste modelo sem covariáveis
mod0 <- coxph(Surv(Tstart, Tstop, status) ~ strata(trans), dmstate, method = "breslow")
mod0_1 <- msfit(mod0, newdata = data.frame(trans = 1:8, strata = 1:8), trans = tmat)
plot(mod0_1, xlab = "Tempo (em anos)", ylab = "Taxa de transição acumulada", 
     sub="1=CR, 2= NR, 3= Recaída Local, 4= Recaída Distante, 5= Recaída L e D, 6= Morte")

# probabilidade de transição a partir de s = 0 (ou seja, a partir do diagnóstico) para outro estado
pt0 <- probtrans(mod0_1, predt = 0)
 
par(mfrow=c(1,2))
plot(pt0, from=1, type="single", legend.pos = c(0, .65), xlab = "Tempo (em anos)", 
     ylab = "Probabilidade de Transição",
     sub="1=CR, 2= NR, 3= Recaída Local, 4= Recaída Distante, 5= Recaída L e D, 6= Morte")
title("(a) A partir do estado inicial 1")

plot(pt0, from=2, type="single", legend.pos = c(20, .6), xlab = "Tempo (em anos)", 
     ylab = "Probabilidade de Transição",
     sub="1=CR, 2= NR, 3= Recaída Local, 4= Recaída Distante, 5= Recaída L e D, 6= Morte")
title("(b) A partir do estado inicial 2")

# probabilidades de transição entre estados ??
pt0_1 <- probtrans(mod0_1, predt=0)[[1]]
pt0_2 <- probtrans(mod0_1, predt=0)[[2]]

plot(pt0_1, type="single", legend.pos = c(0, .65), xlab = "Tempo (em anos)", 
     ylab = "Probabilidade de Transição",
     sub="1=CR, 2= NR, 3= Recaída Local, 4= Recaída Distante, 5= Recaída L e D, 6= Morte")

# probabilidade de transição a partir de s = 10 anos após diagnóstico para outro estado
pt10 <- probtrans(mod0_1, predt = 10)
plot(pt10, from=1, type="single", legend.pos = c(0, .6), xlab = "Tempo (em anos)", 
     ylab = "Probabilidade de Transição",
     sub="1=CR, 2= NR, 3= Recaída Local, 4= Recaída Distante, 5= Recaída L e D, 6= Morte")

# tempos esperados de permanência, em anos
round(ELOS(pt0), 3)
# ver opção tau --> ELOS(pt0, tau=20)

#ajuste modelo com covariáveis
mod1 <- coxph(Surv(Tstart, Tstop, status) ~ radq + estc + idade2 + hem2 + strata(trans), dmstate, 
              method = "breslow")
mod2 <- coxph(Surv(Tstart, Tstop, status) ~ radq + estc + idade2 + strata(trans), dmstate, 
              method = "breslow")
anova(mod2)
summary(mod2)

6.2 Tabela de Transições + Estimação Paramétrica na Ausência de Covariáveis

# Autor do script: Henrique Laureano 

follic <- read.table("https://docs.ufpr.br/~giolo/CE063/Dados/follic1.txt", header = TRUE, sep = " ", 
          colClasses = c("numeric", "factor", rep("integer", 2), "factor", "integer", rep("factor", 3), 
         "numeric", "factor", "numeric", rep("factor",3)))
follic <- follic[, c(15, 5, 8, 1, 3, 14, 7, 10, 12, 13, 11)]
names(follic) <-c("id","estc","radq","idade","hem","resp","rec","survtime","dftime","cens","stat")
levels(follic$radq)[1] <- "N"
levels(follic$rec)[1]  <- "S"
summary(follic[ , 1:5])
summary(follic[ , 6:11])
follic[follic$id == "271", "rec"] <- "S"
follic[follic$id %in% c(72, 160, 383), "dftime"] <- follic[follic$id %in% c(72, 160, 383), "survtime"]
follic$idade2 <- as.factor(ifelse(follic$idade < median(follic$idade), "1", "2"))
follic$hem2 <- as.factor(ifelse(follic$hem < median(follic$hem), "1", "2"))

dmsm <- follic

# estados 1 e 2 (CR e NR)
dmsm$estado <- ifelse(follic$resp == "CR", 1, 2)

# estado 3, 4 e 5 (recaída Local, Distante e em amBos)
dmsm <- dmsm[rep(seq_len(nrow(dmsm)), ifelse(dmsm$rec == "S", 1, 2)), ]
dmsm$tempo <- ifelse(row.names(dmsm) == as.integer(row.names(dmsm)), 0, dmsm$dftime)
dmsm$estado <- ifelse(row.names(dmsm) != as.integer(row.names(dmsm)) & dmsm$rec == "L", 3
             , ifelse(row.names(dmsm) != as.integer(row.names(dmsm)) & dmsm$rec == "D", 4
             , ifelse(row.names(dmsm) != as.integer(row.names(dmsm)) & dmsm$rec == "B", 5
             , dmsm$estado)))

# estado 6 (morte)
dmsm <- dmsm[rep(seq_len(nrow(dmsm)), ifelse(row.names(dmsm) != as.integer(row.names(dmsm)), 2, 1)), ]
dmsm$tempo <- ifelse(is.na(as.integer(row.names(dmsm))), dmsm$survtime, dmsm$tempo)
dmsm$estado <- ifelse(is.na(as.integer(row.names(dmsm))) & dmsm$stat == 1, 6, dmsm$estado)

# pacientes que não saíram dos estados 1 e 2 ou que saíram apenas para o estado 6
dmsm <- dmsm[rep(seq_len(nrow(dmsm)), ifelse(dmsm$rec == "S", 2, 1)), ]
dmsm$tempo <- ifelse(row.names(dmsm) != as.integer(row.names(dmsm)) & dmsm$rec == "S",
                     dmsm$survtime, dmsm$tempo)
dmsm$estado <- ifelse(row.names(dmsm) != as.integer(row.names(dmsm))& dmsm$rec == "S" 
                      & dmsm$stat == 1, 6, dmsm$estado)

library(msm)
tabela <- statetable.msm(dmsm$estado, dmsm$id)
tabela

# modelo paramétrico (pacote msm)
dmsm <- follic

# estados 1 e 2 (CR e NR)
dmsm$estado <- ifelse(follic$resp == "CR", 1, 2)

# estado 3, 4 e 5 (recaída Local, Distante e em ambos)
dmsm <- dmsm[rep(seq_len(nrow(dmsm)), ifelse(dmsm$rec == "S", 1, 2)), ]
dmsm$tempo  <- ifelse(row.names(dmsm) == as.integer(row.names(dmsm)), 0, dmsm$dftime)
dmsm$estado <- ifelse(row.names(dmsm) != as.integer(row.names(dmsm)) & dmsm$rec == "L", 3
             , ifelse(row.names(dmsm) != as.integer(row.names(dmsm)) & dmsm$rec == "D", 4
             , ifelse(row.names(dmsm) != as.integer(row.names(dmsm)) & dmsm$rec == "B", 5
             , dmsm$estado)))

# estado 6 (morte)
dmsm <- dmsm[rep(seq_len(nrow(dmsm)), ifelse(row.names(dmsm) != as.integer(row.names(dmsm)), 2, 1)), ]
dmsm$tempo <- ifelse(is.na(as.integer(row.names(dmsm))), dmsm$survtime, dmsm$tempo)
dmsm$estado <- ifelse(is.na(as.integer(row.names(dmsm))) & dmsm$stat == 1, 6, dmsm$estado)

# pacientes que não saíram dos estados 1 e 2 ou que saíram apenas para o estado 6
dmsm <- dmsm[rep(seq_len(nrow(dmsm)), ifelse(dmsm$rec == "S", 2, 1)), ]
dmsm$tempo <- ifelse(row.names(dmsm) != as.integer(row.names(dmsm)) & dmsm$rec == "S", 
                     dmsm$survtime, dmsm$tempo)
dmsm$estado <- ifelse(row.names(dmsm) != as.integer(row.names(dmsm))& dmsm$rec == "S" 
                      & dmsm$stat == 1, 6, dmsm$estado)

library(msm)
tabela <- statetable.msm(dmsm$estado, dmsm$id)