Comandos em Linguagem R

Regressão Linear Simples


EXEMPLO 1 - Tempo de reação

Y = tempo de reação a certo estímulo (em segundos)
X = idade (em anos)
Arquivo: Exemplo1s.txt (Fonte: Bussab, 1988)  
rm(list=ls())
dados<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exemplo1s.txt", h=T)
attach(dados)
head(dados)
##   tempo idade
## 1    96    20
## 2    92    20
## 3   106    20
## 4   100    20
## 5    98    25
## 6   104    25
plot(idade,tempo, pch=16, col=4)
points(c(20,25,30,35,40),c(98.5,103.25,107.75,110.75,117.25), pch=15, col=2)
cor(idade,tempo)
## [1] 0.7680814
cor.test(idade,tempo)
## 
##  Pearson's product-moment correlation
## 
## data:  idade and tempo
## t = 5.0889, df = 18, p-value = 7.662e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4931929 0.9035073
## sample estimates:
##       cor 
## 0.7680814
mod<-lm(tempo~idade, data=dados)
summary(mod)
## 
## Call:
## lm(formula = tempo ~ idade, data = dados)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -7.500 -4.125 -0.750  2.625 10.500 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  80.5000     5.4510  14.768 1.67e-11 ***
## idade         0.9000     0.1769   5.089 7.66e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.593 on 18 degrees of freedom
## Multiple R-squared:  0.5899, Adjusted R-squared:  0.5672 
## F-statistic:  25.9 on 1 and 18 DF,  p-value: 7.662e-05
anova(mod)
## Analysis of Variance Table
## 
## Response: tempo
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## idade      1    810  810.00  25.897 7.662e-05 ***
## Residuals 18    563   31.28                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
x<-20:40
ye<-80.5+0.9*x
lines(x,ye)

e1 <- resid(mod)
round(e1,3)                        # resíduos brutos ou ordinários
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
## -2.5 -6.5  7.5  1.5 -5.0  1.0  7.0 -2.0  8.5 -1.5  1.5 -7.5  0.0 -7.0  6.0 
##   16   17   18   19   20 
## -4.0 -3.5 -4.5 10.5  0.5
plot(mod$fitted,e1, pch=16)
abline(h=0, col=2, lty=2) 

sigma <- summary(mod)$sigma        # sqrt(Qmres)
sigma
## [1] 5.592654
d1 <- e1/sigma
round(d1,1)                        # resíduos padronizados
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
## -0.4 -1.2  1.3  0.3 -0.9  0.2  1.3 -0.4  1.5 -0.3  0.3 -1.3  0.0 -1.3  1.1 
##   16   17   18   19   20 
## -0.7 -0.6 -0.8  1.9  0.1
plot(mod$fitted,d1, pch=16)
abline(h=0, col=2, lty=2)

r1 <- rstandard(mod)               # resíduos studentizados (internamente)
round(r1,3)
##      1      2      3      4      5      6      7      8      9     10 
## -0.485 -1.261  1.455  0.291 -0.930  0.186  1.301 -0.372  1.559 -0.275 
##     11     12     13     14     15     16     17     18     19     20 
##  0.275 -1.376  0.000 -1.301  1.115 -0.744 -0.679 -0.873  2.036  0.097
plot(mod$fitted, r1, pch=16)
abline(h=0, col=2, lty=2)

r2 <- rstudent(mod)
round(r2,3)                         # resíduos studentizados (externamente)
##      1      2      3      4      5      6      7      8      9     10 
## -0.474 -1.283  1.505  0.283 -0.926  0.181  1.329 -0.363  1.629 -0.268 
##     11     12     13     14     15     16     17     18     19     20 
##  0.268 -1.414  0.000 -1.329  1.124 -0.734 -0.668 -0.867  2.256  0.094
plot(mod$fitted, r1, pch=16)
abline(h=0, col=2, lty=2)

par(mfrow=c(2,2))
plot(mod, which=c(1:4), add.smooth=FALSE, pch=20)

shapiro.test(mod$resid)
## 
##  Shapiro-Wilk normality test
## 
## data:  mod$resid
## W = 0.93762, p-value = 0.2161
## QQplot com envelope simulado

fit.model<-mod
source("https://www.ime.usp.br/~giapaula/envel_norm")

# Obs: Como há mais de um y observado para cada valor de x (repetições), é possível utilizar o teste de Bartlett para testar a homogeneidade de variâncias.

bartlett.test(tempo~idade)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  tempo by idade
## Bartlett's K-squared = 0.29867, df = 4, p-value = 0.9899
predict(lm(tempo~idade), interval="confidence", se.fit=T, level=0.95)
## $fit
##      fit       lwr      upr
## 1   98.5  93.94935 103.0507
## 2   98.5  93.94935 103.0507
## 3   98.5  93.94935 103.0507
## 4   98.5  93.94935 103.0507
## 5  103.0  99.78220 106.2178
## 6  103.0  99.78220 106.2178
## 7  103.0  99.78220 106.2178
## 8  103.0  99.78220 106.2178
## 9  107.5 104.87268 110.1273
## 10 107.5 104.87268 110.1273
## 11 107.5 104.87268 110.1273
## 12 107.5 104.87268 110.1273
## 13 112.0 108.78220 115.2178
## 14 112.0 108.78220 115.2178
## 15 112.0 108.78220 115.2178
## 16 112.0 108.78220 115.2178
## 17 116.5 111.94935 121.0507
## 18 116.5 111.94935 121.0507
## 19 116.5 111.94935 121.0507
## 20 116.5 111.94935 121.0507
## 
## $se.fit
##  [1] 2.166026 2.166026 2.166026 2.166026 1.531611 1.531611 1.531611
##  [8] 1.531611 1.250555 1.250555 1.250555 1.250555 1.531611 1.531611
## [15] 1.531611 1.531611 2.166026 2.166026 2.166026 2.166026
## 
## $df
## [1] 18
## 
## $residual.scale
## [1] 5.592654
new <- data.frame(idade = seq(24,28,1))
new
##   idade
## 1    24
## 2    25
## 3    26
## 4    27
## 5    28
predict(lm(tempo~idade), new, interval="prediction", se.fit=T, level=0.95)
## $fit
##     fit      lwr      upr
## 1 102.1 89.85545 114.3445
## 2 103.0 90.81762 115.1824
## 3 103.9 91.76872 116.0313
## 4 104.8 92.70862 116.8914
## 5 105.7 93.63720 117.7628
## 
## $se.fit
##        1        2        3        4        5 
## 1.640088 1.531611 1.436779 1.358451 1.299615 
## 
## $df
## [1] 18
## 
## $residual.scale
## [1] 5.592654
pred<-  predict(lm(tempo~idade), interval="confidence", level=0.95)
pred1<- predict(lm(tempo~idade), interval="prediction", level=0.95)
matplot(idade, cbind(pred, pred1[,-1]), lty=c(1,2,2,3,3), col=c(1,2,2,4,4),type = "l",
        ylab = "valores preditos de y")

EXEMPLO 2 - Tempo para estocar refrigerantes

Y = tempo necessário para um comerciante estocar uma prateleira da mercearia com refrigerantes (em minutos)
X = quantidade da mercadoria (em unidades)
Arquivo: Exemplo2s.txt (Fonte: Montgomery e Peck (1992))  
rm(list=ls())
ex2<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exemplo2s.txt",h=T)
attach(ex2)
plot(x,y, pch=16, col=4)
cor(x,y)
cor.test(x,y)
mod1<-lm(y~x)
anova(mod1)
summary(mod1)
par(mfrow=c(2,2))
plot(mod1, which=c(1:4), add.smooth=FALSE, pch=20)
shapiro.test(mod1$resid)
plot(x,y, pch=16, col=4)
x1<-1:30
ye<- -0.093756+0.407107*x1
lines(x1,ye)
# OBS: o limite inferior de x é muito próximo de zero fazendo sentido que quando x = 0 se tenha y = 0.
# Foi, então, ajustado o modelo sem intercepto.

mod2<-lm(y ~ -1 + x)    # modelo sem intercepto
summary(mod2) 
1 - (sum(y^2)-(sum((y*x))^2/sum(x^2)))/sum((y-mean(y))^2)   # R^2*(0)
anova(mod2)
par(mfrow=c(2,2))
plot(mod2, which=c(1:4), add.smooth=FALSE, pch=20)
shapiro.test(mod2$resid)
fit.model<-mod2
source("https://www.ime.usp.br/~giapaula/envel_norm")
par(mfrow=c(1,1))
plot(x,y, pch=16, col=2)
x1<-min(x):max(x)
ye<- mod2$coef[1]*x1                           
lines(x1,ye)                                   
ye1<- mod1$coef[1] + mod1$coef[2]*x1
lines(x1,ye1, lty=4)  

EXEMPLO 3 - Máquinas automáticas de vendas de refrigerantes

Y  = tempo (minutos) para entregador repor e executar pequenos serviços em máquinas de vendas de refrigerantes.
X1 = quantidade de volumes repostos nas máquinas (em unidades)
Arquivo: Exemplo3s.txt (Fonte: Montgomery e Peck (1992))  
ex3<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exemplo3s.txt", h=T)
attach(ex3)
head(ex3)
plot(X1, Y, pch=16, col=4)
cor(X1,Y)
cor.test(X1,Y)
mod1<-lm(Y~X1)
anova(mod1)
summary(mod1)
par(mfrow=c(2,2))
plot(mod1, which=c(1:4), add.smooth=FALSE, pch=20)
shapiro.test(mod1$resid)

x1<-2:30
ye<-3.321+2.176*x1
lines(x1,ye)

################################################
#              Transformações em Y
################################################

mod2<-lm(sqrt(Y)~X1)
par(mfrow=c(2,2))
plot(mod2, which=c(1:4), add.smooth=FALSE, pch=20)
plot(mod2$fitted.values, mod2$resid, pch=16)
summary(mod2)
shapiro.test(mod2$resid)
require(MASS)
boxcox(Y~X1,data=ex3,plotit=T)    
boxcox(Y~X1,data=ex3,lam=seq(-0.5,1.5,1/10))
boxcox(Y~X1,data=ex3,plotit=F)

EXEMPLO 4 - Renda média mensal em restaurantes

Y = Renda média mensal sobre vendas de refeições em restaurantes (em mil dólares)
X = Despesas anual com propagandas (em mil dólares)
Arquivo: Exemplo4s.txt (Fonte: Montgomery e Peck (1992))  
rm(list = ls())
ex4<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exemplo4s.txt",h=T)
attach(ex4)
head(ex4)
plot(x,y, pch=16, col=4)
cor(x,y)
cor.test(x,y)
mod1<-lm(y~x)
anova(mod1)
summary(mod1)
shapiro.test(mod1$resid)

par(mfrow=c(2,2))
plot(mod1, which=c(1:4), add.smooth=FALSE, pch=20)
x1<-3:20
ye<-49.4434+8.0484*x1
plot(x,y, pch=16, col=4)
lines(x1,ye,type="l")

################################################
#            Transformações em Y
################################################

mod2<-lm(sqrt(y)~x)
par(mfrow=c(2,2))
plot(mod2, which=c(1:4), add.smooth=FALSE, pch=20)
mod3<-lm(log(y)~x)
par(mfrow=c(2,2))
plot(mod3, which=c(1:4), add.smooth=FALSE, pch=20)
par(mfrow=c(1,1))

require(MASS)
boxcox(y~x,data=ex4,plotit=T)          
boxcox(y~x,data=ex4,lam=seq(-0.5,1.5,1/10))
par(mfrow=c(2,2))
mod4<-lm((y^0.6)~x)
plot(mod4, which=c(1:4), add.smooth=FALSE, pch=20)
shapiro.test(mod1$resid)
shapiro.test(mod2$resid)
shapiro.test(mod3$resid)
shapiro.test(mod4$resid)

EXEMPLO 5 - Dados de árvores

Girth = diâmetro das árvores (em polegadas)
Volume = volume de madeira   (em pés cúbicos)
Height = altura das árvores  (em pés)
Arquivo: dados disponíveis no R (Fonte: Ryan, T.A.; Joiner, B.L.; Ryan, B.F., 1976) 
rm(list = ls())
data(trees)
# help(trees)
attach(trees)
trees
plot(Girth,Volume)
cor(Girth,Volume)
cor.test(Girth,Volume)
fit1<-lm(Volume~Girth)
summary(fit1)
anova(fit1)
shapiro.test(fit1$res)
par(mfrow=c(2,2))
plot(fit1, which=c(1:4), add.smooth=FALSE, pch=20)
fit.model<-fit1
source("https://www.ime.usp.br/~giapaula/envel_norm")

################################################
#        Transformações em Y
################################################

fit2<-lm(sqrt(Volume)~Girth)
summary(fit2)
anova(fit2)
shapiro.test(fit2$res)
par(mfrow=c(2,2))
plot(fit2, which=c(1:4), add.smooth=FALSE, pch=20)
fit.model<-fit2
source("https://www.ime.usp.br/~giapaula/envel_norm")

require(MASS)
boxcox(Volume~Girth,data=trees,plotit=T)
boxcox(Volume~Girth,data=trees,lam=seq(-0.5,1.5,1/10))
bc<-boxcox(Volume~Girth,data=trees,plotit=F)
cbind(bc$x,bc$y)
fit3<-lm((Volume^0.4)~Girth)
par(mfrow=c(2,2))
plot(fit3, which=c(1:4), add.smooth=FALSE, pch=20)
fit.model<-fit3
source("https://www.ime.usp.br/~giapaula/envel_norm")

EXEMPLO 6 - Bactérias sobreviventes em produto alimentício enlatado

Y = número médio de bactérias sobreviventes em um produto alimentício enlatado
X = minutos de exposição a 300º F
Arquivo: Fonte: Montgomery & Peck (1992)
rm(list = ls())
y<-c(175,108,95,82,71,50,49,31,28,17,16,11)
x<-1:12
cbind(y,x)
plot(x,y, pch=16, col=2)
yt<-log(y)
plot(x,yt, pch=16, col=2)
cor.test(x,yt)
mod1<-lm(yt~x)
summary(mod1)
anova(mod1)
par(mfrow=c(2,2))
plot(mod1, which=c(1:4), add.smooth=FALSE, pch=20)
shapiro.test(mod1$resid)

par(mfrow=c(1,2))
xe<-min(x):max(x)
yte<-mod1$coef[1] + mod1$coef[2]*xe
plot(x, yt, pch=16, col=4, ylab="ln(y)")
lines(xe, yte)
plot(x, y, pch=16, col=4, ylab="ln(y)")
ye<-exp(mod1$coef[1])*exp(mod1$coef[2]*xe)
lines(xe, ye)

EXEMPLO 7 - Energia gerada por moinho de vento

Y = energia elétrica gerada por um moinho de vento
X = velocidade do vento (em MPH)
Arquivo: Fonte: Montgomery & Peck (1992)
rm(list = ls())
ex7<-read.table("http://www.ufpr.br/~giolo/CE071/Exemplos/Exemplo7s.txt",h=T)
attach(ex7)
plot(x,y, pch=16)

## Transformação em x: procedimento de Box-Tidwell
## Chute inicial: alpha0 = 1

mod1<-lm(y~x)
summary(mod1)
w<-x*log(x)
mod2<-lm(y~x+w)
summary(mod2)
alpha1<-(mod2$coef[3]/mod1$coef[2]) + 1
alpha1
x1<-x^alpha1
mod11<-lm(y~x1)
summary(mod11)

w1<-x1*log(x1)
mod22<-lm(y~x1+w1)
summary(mod22)
alpha2<-(mod22$coef[3]/mod11$coef[2]) + alpha1
alpha2

# Transformação indicada: x^-1 = 1/x

xt<-1/x
plot(xt,y, pch=16,col=2)
mod3<-lm(y~xt)
summary(mod3)
par(mfrow=c(2,2))
plot(mod3, which=c(1:4), add.smooth=FALSE, pch=20)
shapiro.test(mod3$resid)

par(mfrow=c(1,1))
plot(xt,y, pch=16, col=4, xlab="1/x")
xe<-seq(0,0.41,by=0.05)
ye<-mod3$coef[1]+mod3$coef[2]*xe
lines(xe,ye)

EXEMPLO 8 - Mínimos Quadrados Ponderados (Dados Exemplo 4)

Y = Renda média mensal sobre vendas de refeições em restaurantes (em mil dólares)
X = Despesas anual com propagandas (em mil dólares)
Fonte: Montgomery & Peck (1992)
rm(list = ls())
ex4<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exemplo4s.txt",h=T)
attach(ex4)
mod1<-lm(y~x)
par(mfrow=c(1,2))
plot(x, mod1$resid, pch = 20)
plot(mod1$fitted, mod1$resid, pch = 20)
m1<-mean(x[1:3])
s1<-var(y[1:3])
m2<-mean(x[4:5])
s2<-var(y[4:5])
m3<-mean(x[7:11])
s3<-var(y[7:11])
m4<-mean(x[13:16])
s4<-var(y[13:16])
m5<-mean(x[18:23])
s5<-var(y[18:23])
m6<-mean(x[24:25])
s6<-var(y[24:25])
m7<-mean(x[27:30])
s7<-var(y[27:30])
medias<-as.vector(c(m1,m2,m3,m4,m5,m6,m7))
s<-as.vector(c(s1,s2,s3,s4,s5,s6,s7))
medias
s
plot(medias, s, pch=16, col=4)
lm(s~medias)
aux<- -7.376+7.820*x     
wi<-1/aux
wi

mod3<-lm(y~x,weights=wi)
summary(mod3)
anova(mod3)
rqwi<-sqrt(wi)
we<-rqwi*mod3$resid
wx<-rqwi*x
wy<-rqwi*mod3$fitted.values
plot(wy,we, pch=16)

mod1<-lm(y~x)
mod2<-lm(sqrt(y)~x)
mod2a<-lm(log(y)~x)
mod3<-lm(y~x,weights=wi)
par(mfrow=c(1,4))
plot(mod1$fitted.values,mod1$resid, pch=16)
plot(mod2$fitted.values,mod2$resid, pch=16)
plot(mod2a$fitted.values,mod2a$resid, pch=16)
plot(wy,we, pch=16)

par(mfrow=c(1,2))
plot(x,sqrt(y), pch=16,col=2)
xe<- min(x):max(x)
ye2<- mod2$coef[1]+mod2$coef[2]*xe
ye3<- mod3$coef[1]+mod3$coef[2]*xe
lines(xe,ye2)
title("y^0.5 = 7.8079 + 0.3454*x") 
plot(x,y, pch=16,col=2)
lines(xe,ye3)
title("y = 50.975 + 7.922*x") 

predict(lm(y~x), interval="confidence", level=0.95)
(predict(lm(sqrt(y)~x), interval="confidence", level=0.95))^2
exp(predict(lm(log(y)~x), interval="confidence", level=0.95))
pred<-predict(lm(y~x, weight=wi), interval="confidence", level=0.95)
pred

Comandos em Linguagem R

Regressão Linear Múltipla


EXEMPLO 1 - Manutenção de máquinas de bebidas

Y  = tempo gasto para execução dos serviços (em minutos)  
X1 = quantidade de bebida estocada em máquinas acionadas por moeda (em unidades)  
X2 = distância percorrida pelo profissional responsavel pelos servicos (em pés)
Arquivo: Exemplo1m.txt (Fonte: Montgomery & Peck, 1992)  
rm(list=ls())
ex1<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exemplo1m.txt", h=T)
ex1
attach(ex1)

# instalar package scatterplot3d

require(scatterplot3d)
par(mfrow=c(1,2))
scatterplot3d(X1,X2,Y, type="h", highlight.3d=TRUE, angle= 30, scale.y=0.8, pch=16)
scatterplot3d(X1,X2,Y, type="h", highlight.3d=TRUE, angle=320, scale.y=0.8, pch=16)

X<-as.matrix(cbind(X1,X2))
cor(X)

mod1 <- lm(Y~X1+X2)
anova(mod1)
summary(mod1)

mod2 <- lm(Y~X1)
anova(mod2)
summary(mod2)

mod3 <- lm(Y~X2)
anova(mod3)
summary(mod3)

source("https://docs.ufpr.br/~giolo/CE071/Exemplos/vif.R")
mod1 <- lm(Y~X1+X2)
vif(mod1)

summary(mod1)
anova(mod1)
par(mfrow=c(2,2))
plot(mod1, which=c(1:4),pch=16, add.smooth=FALSE)
 
par(mfrow=c(1,1))
plot(mod1$residuals,pch=16,ylab="Residuals")

par(mfrow=c(1,2))
plot(X1,mod1$residuals,pch=16,ylab="Residuals")
plot(X2,mod1$residuals,pch=16,ylab="Residuals")

resparc1<-mod1$residuals + 1.615*X1
resparc2<-mod1$residuals + 0.014*X2
plot(X1,resparc1,pch=16,ylab="Parcial Residuals")
plot(X2,resparc2,pch=16,ylab="Parcial Residuals")

inflm.mod1 <- influence.measures(mod1)
inflm.mod1
summary(inflm.mod1)

ex1s9<-ex1[-9,]
detach(ex1)
attach(ex1s9)
mod1s9<-lm(Y~X1+X2)
summary(mod1s9)
anova(mod1s9)
par(mfrow=c(2,2))
plot(mod1s9, which=c(1:4),pch=16, add.smooth=FALSE)

ex1s22<-ex1[-22,]
detach(ex1s9)
attach(ex1s22)
mod1s22<-lm(Y~X1+X2)
summary(mod1s22)
anova(mod1s22)
par(mfrow=c(2,2))
plot(mod1s22, which=c(1:4),pch=16, add.smooth=FALSE)

ex1s9s22<-ex1s9[-21,]
detach(ex1s22)
attach(ex1s9s22)
mod1s9s22<-lm(Y~X1+X2)
summary(mod1s9s22)
anova(mod1s9s22)
par(mfrow=c(2,2))
plot(mod1s9s22, which=c(1:4),pch=16, add.smooth=FALSE)

detach(ex1s9s22)
attach(ex1s9)
mod2s9<-lm(Y~X1)
anova(mod2s9)
summary(mod2s9)
par(mfrow=c(2,2))
plot(mod2s9, which=c(1:4),pch=16, add.smooth=FALSE)

detach(ex1s9)
attach(ex1)
require(scatterplot3d)
par(mfrow=c(1,1))
s3d <- scatterplot3d(X1,X2,Y, type="h", highlight.3d=TRUE,
       angle=55, scale.y=0.7, pch=16)
my.lm <- lm(Y ~ X1 + X2)
s3d$plane3d(my.lm,col=4)

par(mfrow=c(1,1))
x1 <- seq(2,30,length=50)
x2 <- seq(30,1500,length=50)
      f <- function(x1,x2){
         r <- 4.447+1.498*x1+0.0103*x2
         }
y <- outer(x1,x2,f)
y[is.na(y)] <- 1
par(bg = "white")
persp(x1,x2,y, theta=30, phi=20, expand=0.5, col="blue",
      ltheta=120, shade=0.15, ticktype="detailed",
      xlab="X1", ylab="X2", zlab="Y")

new<-data.frame(cbind(8,275))
predict(mod1s9,new,interval="confidence")
predict(mod1s9,interval="confidence")

predict(mod1s9,new,interval="prediction")
predict(mod1s9,interval="prediction")

EXEMPLO 2 - Calor liberado, em calorias, por grama de cimento

Y = heat evolved in calories per gram of cement  
X1 = tricalcium aluminate  
X2 = tricalcium silicate  
X3 = tetracalcium alumino ferrite  
X4 = dicalcium silicate  
Arquivo: Exemplo2m.txt          (Fonte: Montgomery & Peck, 1992).  
rm(list=ls())
ex2<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exemplo2m.txt", h=T)
ex2
attach(ex2)
x<-as.matrix(cbind(X1,X2,X3,X4))
rxx<-cor(x)
rxx
 
mod1<-lm(Y~X1+X2+X3+X4,data=ex2)
source("https://docs.ufpr.br/~giolo/CE071/Exemplos/vif.R")
vif(mod1)

2.1 Procedimento de seleção forward (passo a frente)

modp<-lm(Y~X1+X2+X3+X4, data=ex2)
anova(modp)
mod0<-lm(Y~1, data=ex2)
step(mod0, ~X1 + X2 + X3 + X4, direction=c("forward"), scale=5.98, test="F") 

## scale = estimativa de (sigma)^2 = QMres do modelo com as 4 regressoras
## use help(step) no R para mais detalhes

2.2 Procedimento de seleção backward (passo atrás)

step(modp, ~X1 + X2 + X3 + X4, direction=c("backward"), scale=5.98, test="F")

2.3 Procedimento de seleção stepwise (passo a passo)

step(mod0, ~X1 + X2 + X3 + X4, direction=c("both"), scale=5.98, test="F")

EXEMPLO 3 - Resistência do papel kraft (Regressão Polinomial)

Y = Tensile strength (psi)  
X = Hardwood concentration (%)  
Arquivo: Exemplo3m.txt            (Fonte: Montgomery & Peck, 1992)  
rm(list=ls())
ex3<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exemplo3m.txt",h=T)
attach(ex3)
plot(X,Y, pch=20)
cor(X,X^2)

X2<-X*X
mod1<-lm(Y~X+X2)
source("https://docs.ufpr.br/~giolo/CE071/Exemplos/vif.R")
vif(mod1)
Xb<-X-mean(X)
Xb2<-(X-mean(X))^2
plot(Xb,Y, pch=20)
cor(Xb,Xb2)

mod<-lm(Y~Xb+Xb2)
vif(mod)
anova(mod)
summary(mod)

par(mfrow=c(2,2))
plot(mod, which=c(1:4),add.smooth=FALSE, pch=20)
shapiro.test(mod$resid)

influence.measures(mod)
summary(influence.measures(mod))

par(mfrow=c(1,1))
plot(Xb,Y, pch=16)
xb<--6:8
yest<-45.295+2.546*xb-0.635*(xb^2)
lines(xb,yest)

pred<-  predict(mod, interval="confidence", level=0.95)
pred1<- predict(mod, interval="prediction", level=0.95)
matplot(Xb, cbind(pred, pred1[,-1]), lty=c(1,2,2,3,3), col=c(1,2,2,4,4),type = "l",
        ylab = "valores preditos de y")

EXEMPLO 4 - Custo de manutenção e produção de um equipamento

Y = custo anual médio de manutenção de um equipamento (em dólares)
X = produção (em unidades)
Arquivo: Exemplo4m.txt            (Fonte: Montgomery & Peck, 1992)
rm(list=ls())
ex4<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exemplo4m.txt",h=T)
ex4
attach(ex4)
plot(x,y, pch=16)
x2<-x*x
cor(x,x2)
mod<-lm(y~x+x2)
source("https://docs.ufpr.br/~giolo/CE071/Exemplos/vif.R")
vif(mod)
a1<-(x-mean(x))/25
p1<-2*a1
p1
a2<-(a1^2)-((10^2 - 1)/12)
p2<-1/2*a2
p2

mod1<-lm(y~p1+p2)
vif(mod1)
anova(mod1)
summary(mod1)
par(mfrow=c(2,2))
plot(mod1, which=c(1:4),add.smooth=FALSE, pch=20)
influence.measures(mod1)
summary(influence.measures(mod1))

mod1$fitted

xe<-50:275
yest<-312.7686+0.0595*(xe-162.5)+0.0022*(xe-162.5)^2
par(mfrow=c(1,1))
plot(x,y, pch=16)
lines(xe,yest)

pred<-  predict(mod1, interval="confidence", level=0.95)
pred1<- predict(mod1, interval="prediction", level=0.95)
matplot(x, cbind(pred, pred1[,-1]), lty=c(1,2,2,3,3), col=c(1,2,2,4,4),type = "l",
        ylab = "valores preditos de y")

# usando a função poly

fit<-lm(y~poly(x,degree=2))
fit$model
summary(fit)
anova(fit)
 
par(mfrow=c(2,2))
plot(fit, which=c(1:4),add.smooth=FALSE, pch=20)

influence.measures(mod1)
summary(influence.measures(mod1))

fit$fitted

par(mfrow=c(1,1))
pred<-  predict(fit, interval="confidence", level=0.95)
pred1<- predict(fit, interval="prediction", level=0.95)
matplot(x, cbind(pred, pred1[,-1]), lty=c(1,2,2,3,3), col=c(1,2,2,4,4),type = "l",
                                                   ylab = "valores preditos de y")

EXEMPLO 5 - Black cherry trees

Y  = Volume (volume de madeira)
X1 = Girth (diâmetro da árvore a uma determinada altura do solo)
X2 = Height (altura da árvore)
Arquivo: Exemplo5m.txt ou data(trees) no R  
rm(list=ls())

data(trees)
attach(trees)

require(scatterplot3d)
par(mfrow=c(1,2))
scatterplot3d(Girth,Height,Volume, type="h", highlight.3d=TRUE,angle=30, scale.y=0.7, pch=16)
scatterplot3d(Girth,Height,Volume, type="h", highlight.3d=TRUE,angle=320,scale.y=0.7, pch=16)

cor(Girth,Height)

mod1<-lm(Volume~Height)
anova(mod1)
summary(mod1)
par(mfrow=c(2,2))
plot(mod1,which=c(1:4),add.smooth=FALSE, pch=20)
summary(influence.measures(mod1))

mod2<-lm(Volume~Girth)
anova(mod2)
summary(mod2)
par(mfrow=c(2,2))
plot(mod2, which=c(1:4),add.smooth=FALSE, pch=20)
summary(influence.measures(mod2))

mod3<-lm(Volume~Height +Girth)
anova(mod3)
summary(mod3)
par(mfrow=c(2,2))
plot(mod3, which=c(1:4),add.smooth=FALSE, pch=20)
summary(influence.measures(mod3))

mod4<-lm(Volume~Height+Girth+I(Girth^2))
anova(mod4)
summary(mod4)
par(mfrow=c(2,2))
plot(mod4, which=c(1:4),add.smooth=FALSE, pch=20)
summary(influence.measures(mod4))

mod5<-lm(Volume~Girth+I(Girth^2)+I(Girth^3))
anova(mod5)
summary(mod5)
par(mfrow=c(2,2))
plot(mod5, which=c(1:4),add.smooth=FALSE, pch=20)
summary(influence.measures(mod5))

mod6<-lm(Volume~Height+I(Height^2)+Girth+I(Girth^2))
anova(mod6)
summary(mod6)
par(mfrow=c(2,2))
plot(mod6, which=c(1:4),add.smooth=FALSE, pch=20)
summary(influence.measures(mod6))

source("https://docs.ufpr.br/~giolo/CE071/Exemplos/vif.R")
vif(mod4)

Gb<-Girth-mean(Girth)
model<-lm(Volume~Height+Gb+I(Gb^2),data=trees)
vif(model)
summary(model)
anova(model)
par(mfrow=c(2,2))
plot(model, which=c(1:4),add.smooth=FALSE, pch=20)
shapiro.test(model$residuals)

pred<-  predict(mod4, interval="confidence", level=0.95)
cbind(Volume, pred)

x <- seq(63,87,length=30)
y <- seq(8,21,length=30)
z <- function(x,y){v <-9.9204 +0.3764*x -2.8851*y +0.2686*y^2}
z <- outer(x, y, z)
persp(x,y,z, theta = -50, phi = 5, 
      expand = 0.5, col = "lightblue", 
      xlab="Height",ylab="Girth", zlab="Volume", 
      scale=T, ticktype = "detailed")

#install.packages('rgl')

library(rgl)
x <- seq(63,87,length=30)
y <- seq(8,21,length=30)
z <- function(x,y){v <-9.9204 +0.3764*x -2.8851*y +0.2686*y^2}
z <- outer(x, y, z)
open3d()
persp3d(x,y,z, theta = -50, phi = 5, 
      expand = 0.5, col = "lightblue", 
      xlab="Height",ylab="Girth", zlab="Volume", 
      scale=T, ticktype = "detailed")
play3d(spin3d(axis=c(0,0,1), rpm=5), duration=13)

EXEMPLO 6 - Comparação de ferramentas

Y  = tempo efetivo de vida (horas)
X1 = velocidade do torno (rpm)
X2 = tipo de ferramenta (tipo A = 0, tipo B = 1)
Arquivo: Exemplo6m.txt      (Fonte: Montgomery & Peck, 1992)
rm(list=ls())
ex6<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exemplo6m.txt", h=T)
attach(ex6)

par(mfrow=c(1,2))
plot(X1[X2==1],Y[X2==1],pch="B",col=2,ylim=range(Y),xlab="Velocidade",ylab="Tempo")
points(X1[X2==0],Y[X2==0],pch="A",col=4)
plot(X1[X2==1],Y[X2==1],pch=16,col=2,ylim=range(Y),xlab="Velocidade",ylab="Tempo")
points(X1[X2==0],Y[X2==0],pch=16,col=4)

Esquema 1: dummy do tipo 0 e 1

X12<-X1*X2
mod1<-lm(Y~X1+X2+X12)
summary(mod1)
anova(mod1)

mod2<-lm(Y~X1+X2)
summary(mod2)
anova(mod2)
par(mfrow=c(2,2))
plot(mod2,which=c(1:4),add.smooth=FALSE, pch=20)
shapiro.test(mod2$resid)

x1<-min(X1):max(X1)
ya<-mod2$coef[1]+ mod2$coef[2]*x1
yb<-mod2$coef[1]+ mod2$coef[3]+ mod2$coef[2]*x1
par(mfrow=c(1,1))
plot(X1[X2==1],Y[X2==1],pch=16,col=2,ylim=range(c(0,50)),xlab="Velocidade",ylab="Tempo")
points(X1[X2==0],Y[X2==0],pch=16,col=4)
lines(x1,ya,lty=1)
lines(x1,yb,lty=2)
legend(850,45,lty=c(1,2),c("Ferramenta A","Ferramenta B"),bty="n",cex=0.8)

Esquema 2: dummy do tipo 1 e -1

ex6a<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exemplo6m.txt", h=T)
attach(ex6a)
 
x2<-ifelse(X2==0,1,-1)   # A = 1 e B = -1
x12<-X1*x2
mod1<-lm(Y~X1+x2+x12)
summary(mod1)
anova(mod1)

mod2<-lm(Y~X1+x2)
summary(mod2)
anova(mod2)
par(mfrow=c(2,2))
plot(mod2,which=c(1:4),add.smooth=FALSE, pch=20)
shapiro.test(mod1$resid)
 
x1<-min(X1):max(X1)
ya<-mod2$coef[1]+ mod2$coef[2]*x1 + mod2$coef[3]
yb<-mod2$coef[1]+ mod2$coef[2]*x1 - mod2$coef[3]
par(mfrow=c(1,1))
plot(X1[x2==-1],Y[x2==-1],pch=16,col=2,ylim=range(c(0,50)),xlab="Velocidade", ylab="Tempo")
points(X1[x2==1],Y[x2==1],pch=16, col=4)
lines(x1,ya,lty=1)
lines(x1,yb,lty=2)
legend(850,45,lty=c(1,2),c("Ferramenta A","Ferramenta B"),bty="n",cex=0.8)

Esquema 3: sem intercepto e dummy do tipo 0 e 1

ex6b<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exemplo6m.txt",h=T)
attach(ex6b)
 
x21<-ifelse(X2==0,1,0)
x22<-ifelse(X2==0,0,1)
cbind(x21,x22)
 
x1x21<-X1*x21
x1x22<-X1*x22
mod<-lm(Y~-1+X1+x21+x22+x1x21)  # Como x1x21 + x1x22 = x1, incluir somente x1x21 ou x1x22 
summary(mod)
anova(mod)
 
mod1<-lm(Y~-1+X1+x21+x22)
summary(mod1)
anova(mod1)
par(mfrow=c(2,2))
plot(mod1,which=c(1:4),add.smooth=FALSE, pch=20)
shapiro.test(mod1$resid)

x1<-min(X1):max(X1)
ya<-mod1$coef[1]*x1 + mod1$coef[2]
yb<-mod1$coef[1]*x1 + mod1$coef[3]
par(mfrow=c(1,1))
plot(X1[x21==1],Y[x21==1],pch=16,col=2,ylim=range(c(0,50)),xlab="Velocidade",ylab="Tempo")
points(X1[x22==1],Y[x22==1],pch=16,col=4)
lines(x1,ya,lty=1)
lines(x1,yb,lty=2)
legend(850,45,lty=c(1,2),c("Ferramenta A","Ferramenta B"),bty="n",cex=0.8)

EXEMPLO 7 - Regressão segmentada com um ponto de quebra

rm(list=ls())

data<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/piecewise1.txt", h=T)
data$x2 <- with(data, ifelse(x1 <= 18.8, 0, x1 - 18.8))

attach(data)
m1<-lm(y ~ x1 + x2, data)

plot(data$x1, data$y, pch=16)
x1<-1:19
y1<-32.514 - 1.138*x1  
lines(x1, y1, col=2, lwd=2)
x1<-19:40
y1<- -3.8264 + 0.795*x1  
lines(x1, y1, col=2, lwd=2)

fit.model<-m1
source("https://www.ime.usp.br/~giapaula/envel_norm")

par(mfrow=c(2,2))
plot(m1, which=c(1:4), add.smooth=FALSE, pch=20)

require(segmented)
lin.mod<-lm(y~x1, data=data)
seg.mod <- segmented(lin.mod, seg.Z = ~x1, psi=20)

par(mfrow=c(1,1))
plot(data$x1, data$y, col=4, pch=16)
plot(seg.mod, add=T, col=2, lwd=2)

par(mfrow=c(1,2))
plot(seg.mod$fitted.values, seg.mod$residuals, pch=20)
qqnorm(seg.mod$residuals, pch=16)

EXEMPLO 8 - Regressão segmentada com dois pontos de quebra

rm(list=ls())
data<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/piecewise2.txt", h=T)

require(segmented)
lin.mod<-lm(Y~X, data=data)
seg.mod <- segmented(lin.mod, seg.Z = ~X, psi=c(15,30))
seg.mod

plot(data$X,data$Y, pch=16, col=4)
plot(seg.mod, add=T, col=2, lwd=2)

# ou alternativamente 

plot(data$X,data$Y, col=4, pch=16)
x1<-1:15.06
y1<-2.189 + 1.815*x1  
lines(x1,y1, col=2, lwd=2)
x1<-15.06:30.35
y1<- 58.049 -1.909*x1  
lines(x1,y1, col=2, lwd=2)

x1<-30.35:50
y1<- -61.337 +2.032*x1  
lines(x1, y1, col=2, lwd=2)

Comandos R - Exercícios Lista 3


EXERCÍCIO 1 - Produção de processo químico

Y  = produção de um processo químico
X1 = temperatura
X2 = concentração
Arquivo: Exercicio1m.txt (Fonte: Montgomery & Peck (1992); Charnet et al.(2008))
ex1<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exercicio1m.txt", h=T)
attach(ex1)
 
mod1<-lm(Y~X1)
mod1
summary(mod1)
anova(mod1)

mod2<-lm(Y~X2)
mod2
summary(mod2)
anova(mod2)

mod3<-lm(Y~X1+X2)
mod3
summary(mod3)
anova(mod3)

mod4<-lm(Y~X2+X1)
mod4
summary(mod4)
anova(mod4)

X3<-X1*X2
mod5<-lm(Y~X1+X2+X3)
mod5
summary(mod5)
anova(mod5)

mod6<-lm(Y~X2+X1+X3)
mod6
summary(mod6)
anova(mod6)

plot(X1,X2)
cor(X1,X2)

X<-as.matrix(cbind(X1,X2))
rxx<- cor(X)
det(rxx)
eigen(rxx)

resid<- mod3$residuals
resid

par(mfrow=c(2,2))
plot(mod3, which=c(1:4),pch=16, add.smooth=FALSE, id.n = 0)
shapiro.test(mod3$resid)

par(mfrow=c(1,1))
plot(mod3$residuals,pch=16,ylab="Residuals")

par(mfrow=c(1,2))
plot(X1,mod3$residuals,pch=16,ylab="Residuals")
plot(X2,mod3$residuals,pch=16,ylab="Residuals")

par(mfrow=c(1,2))
resparc1<-mod3$residuals + 0.87*X1
resparc2<-mod3$residuals + 2.424*X2
plot(X1,resparc1,pch=16,ylab="Parcial Residuals")
plot(X2,resparc2,pch=16,ylab="Parcial Residuals") 

par(mfrow=c(1,1))
plot(X1*X2,mod3$residuals,pch=16,ylab="Residuals")
 
inflm.mod3 <- influence.measures(mod3)
inflm.mod3
summary(inflm.mod3)

summary(mod3)
anova(mod3)

obspred<-cbind(Y,mod3$fitted.values)
obspred

x1<-c(0,200)
y<-c(117.42, 291.42)
plot(x1,y,type="l",lty=1,ylim=range(c(100,300)),ylab="Y=Produção",
xlab = "X1 = Temperatura", bty="n")
x1<-c(0,200)
y<-c(153.78, 327.78)
lines(x1,y, lty=2)
legend(150,150,lty=c(1,2),c("X2=10","X2=25"), lwd=1, bty="n")
 
x2<-c(0,25)
y<-c(180.18, 240.78)
plot(x2,y,type="l",lty=1,ylim=range(c(100,300)),ylab="Y=Produção",
xlab = "X2 = Concentracao", bty="n")
x2<-c(0,25)
y<-c(206.28, 266.88)
lines(x2,y, lty=2)
legend(17,150,lty=c(1,2),c("X1=100","X1=130"), lwd=1, bty="n")

new<-data.frame(cbind(80,10))
predict(mod3, new,interval="confidence")
predict(mod3, interval="confidence")

require(scatterplot3d)
s3d <- scatterplot3d(X1,X2,Y,type="h",highlight.3d=TRUE,
ngle=55, scale.y =0.7, pch=16)
my.lm <- lm(Y~X1+X2)
s3d$plane3d(my.lm, col=4)

x1 <- seq(80, 160, length=50)
x2 <- seq(10,25,length=50)
 f <- function(x1,x2){r <- 93.18+0.87*x1+2.424*x2}
 y <- outer(x1,x2,f)
 y[is.na(y)] <- 1
par(bg = "white")
persp(x1,x2,y,theta = 30, phi = 20,expand = 0.5,col = "blue",
      ltheta = 120, shade = 0.75, ticktype = "detailed",
      xlab = "X1", ylab = "X2", zlab = "Y")

EXERCÍCIO 2 - Estudo sobre comparação de ferramentas

 Y  = tempo de vida efetivo (em horas)
 X1 = velocidade do torno (rpm)
 X2 = tipo da ferramenta
 Arquivos: (a) Exercicio2ma.txt  (b) Exercicio2mb.txt  (c) Exercicio2mc.txt  
## (a) Considerando ferramentas: A, B, C
 
rm(list=ls())

ex2a<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exercicio2ma.txt", h=T)
attach(ex2a)
 
plot(X1[X2=="B"],Y[X2=="B"],pch="B",col=2,ylim=range(Y),xlab="Velocidade",ylab="Tempo")
points(X1[X2=="A"],Y[X2=="A"],pch="A",col=4)
points(X1[X2=="C"],Y[X2=="C"],pch="C",col=6)
 
mod1<-lm(Y~X1+factor(X2))
summary(mod1)
anova(mod1)

par(mfrow=c(2,2))
plot(mod1, which=c(1:4),pch=16, add.smooth=FALSE)
shapiro.test(mod1$resid)
 
mod2<-lm(Y~X1+factor(X2)+X1*factor(X2)) ## verificando a significancia da interacao
summary(mod2)
 
inflm.mod1 <- influence.measures(mod1)
inflm.mod1
summary(inflm.mod1)
 
x1<-min(X1):max(X1)
ya<-mod1$coef[1]+ mod1$coef[2]*x1
yb<-mod1$coef[1]+ mod1$coef[3]+ mod1$coef[2]*x1
yc<-mod1$coef[1]+ mod1$coef[4]+ mod1$coef[2]*x1

par(mfrow=c(1,1))
plot(X1[X2=="B"],Y[X2=="B"],pch="B",col=2,ylim=range(c(0,50)),xlab="Velocidade",ylab="Tempo")
points(X1[X2=="A"],Y[X2=="A"],pch="A",col=4)
points(X1[X2=="C"],Y[X2=="C"],pch="C",col=6)
lines(x1,ya,lty=1)
lines(x1,yb,lty=2)
lines(x1,yc,lty=3)
legend(850,45,lty=c(1,2,3),c("Ferramenta A","Ferramenta B","Ferramenta C"),bty="n",cex=0.8)

## (b) Considerando ferramentas: A, B, D
 
ex2b<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exercicio2mb.txt", h=T)
attach(ex2b)
 
plot(X1[X2=="B"],Y[X2=="B"],pch="B",col=2,ylim=range(Y),xlab="Velocidade",ylab="Tempo")
points(X1[X2=="A"],Y[X2=="A"],pch="A",col=4)
points(X1[X2=="D"],Y[X2=="D"],pch="D",col=6)
 
mod1<-lm(Y~X1+factor(X2)+X1*factor(X2))
summary(mod1)

par(mfrow=c(2,2))
plot(mod1, which=c(1:4),pch=16, add.smooth=FALSE)
shapiro.test(mod1$resid)
 
inflm.mod1 <- influence.measures(mod1)
inflm.mod1
summary(inflm.mod1)
 
x1<-min(X1):max(X1)
ya<-mod1$coef[1]+ mod1$coef[2]*x1
yb<-mod1$coef[1]+ mod1$coef[2]*x1 + mod1$coef[3]+ mod1$coef[5]*x1
yd<-mod1$coef[1]+ mod1$coef[2]*x1 + mod1$coef[4]+ mod1$coef[6]*x1

par(mfrow=c(1,1))
plot(X1[X2=="B"],Y[X2=="B"],pch="B",col=2,ylim=range(c(0,50)),xlab="Velocidade",ylab="Tempo")
points(X1[X2=="A"],Y[X2=="A"],pch="A",col=4)
points(X1[X2=="D"],Y[X2=="D"],pch="D",col=6)
lines(x1,ya,lty=1)
lines(x1,yb,lty=2)
lines(x1,yd,lty=3)
legend(850,45,lty=c(1,2,3),c("Ferramenta A","Ferramenta B","Ferramenta D"),bty="n",cex=0.8)
 
## (c) Considerando ferramentas: A, B, C, D
 
ex2c<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exercicio2mc.txt",h=T)
attach(ex2c)
 
mod1<-lm(Y~X1+factor(X2)+X1*factor(X2))
summary(mod1)
par(mfrow=c(2,2))
plot(mod1, which=c(1:4),pch=16, add.smooth=FALSE)
shapiro.test(mod1$resid)

inflm.mod1 <- influence.measures(mod1)
inflm.mod1
summary(inflm.mod1)

x1<-min(X1):max(X1)
ya<-mod1$coef[1]+ mod1$coef[2]*x1
yb<-mod1$coef[1]+ mod1$coef[2]*x1 + mod1$coef[3]+ mod1$coef[6]*x1
yc<-mod1$coef[1]+ mod1$coef[2]*x1 + mod1$coef[4]+ mod1$coef[7]*x1
yd<-mod1$coef[1]+ mod1$coef[2]*x1 + mod1$coef[5]+ mod1$coef[8]*x1

par(mfrow=c(1,1))
plot(X1[X2=="B"],Y[X2=="B"],pch="B",col=2,ylim=range(c(0,50)),xlab="Velocidade",ylab="Tempo")
points(X1[X2=="A"],Y[X2=="A"],pch="A",col=4)
points(X1[X2=="C"],Y[X2=="C"],pch="C",col=1)
points(X1[X2=="D"],Y[X2=="D"],pch="D",col=3)
lines(x1,ya,lty=1)
lines(x1,yb,lty=2)
lines(x1,yc,lty=3)
lines(x1,yd,lty=4)
legend(850,45,lty=c(1,2,3,4),c("Ferramenta A","Ferramenta B","Ferramenta C",
                               "Ferramenta D"),bty="n",cex=0.8)

EXERCÍCIO 3 - Performance para certo trabalho

Y = performance para o trabalho sob avaliação,
X1 = teste para verificar as habilidades relacionadas ao trabalho
X2 = raça (branca = 0 e outras = 1)
Arquivo: Exercicio3m.txt          Fonte: Montgomery & Peck (1992)            
rm(list=ls())

ex3<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exercicio3m.txt", h=T)
attach(ex3)
 
plot(X1[X2==1],Y[X2==1],pch=16,col=2,ylim=range(c(0,9)),xlab="Teste",ylab="Aptidão")
points(X1[X2==0],Y[X2==0],pch=16,col=4)
legend(0.5,7,pch=c(16,16),col=c(4,2),c("Branca","Outras"),bty="n",cex=0.8)

mod1<-lm(Y~X1+factor(X2)+X1*(factor(X2)))
summary(mod1)

par(mfrow=c(2,2))
plot(mod1, which=c(1:4),pch=16, add.smooth=FALSE, id.n = 0)
shapiro.test(mod1$resid)

inflm.mod1 <- influence.measures(mod1)
inflm.mod1
summary(inflm.mod1)

x1<-0:3
yb<-mod1$coef[1]+ mod1$coef[2]*x1
yo<-mod1$coef[1]+ mod1$coef[2]*x1 + mod1$coef[3] + mod1$coef[4]*x1
par(mfrow=c(1,1))
plot(X1[X2==1],Y[X2==1],pch=16,col=2,ylim=range(c(0,9)),xlab="Teste",ylab="Aptidão")
points(X1[X2==0],Y[X2==0],pch=16,col=4)
lines(x1,yb,lty=1)
lines(x1,yo,lty=2)
legend(0.5,7,lty=c(1,2),c("Branca","Outras"),bty="n",cex=0.8)

EXERCÍCIO 4 - Estudo sobre salário

Y: Salário anual (SAL) [em dólares]
X1: Anos de Experiência (EXP)
X2: Nível Educacional (EDUC) [1 = básico, 2 = médio, 3 = superior]
X3: Função administrativa (MGT) [0 = subordinado, 1 = chefe]
Arquivo: Exercicio4m.txt     Fonte: Neter & Wassermam (1990?)
rm(list=ls())

ex4<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exercicio4m.txt",h=T)
attach(ex4)
 
table(EDUC,MGT)
par(mfrow=c(1,2))
plot(EXP, SAL,pch=16,col=1, xlab="Experiencia (anos)",ylab="Salario Anual")
 
plot(EXP[EDUC==1 & MGT==1],SAL[EDUC==1 & MGT==1],pch="1",col=1,ylim=range(SAL),
               xlim=range(EXP), xlab="Experiencia (anos)",ylab="Salario Anual")
points(EXP[EDUC==2 & MGT==1],SAL[EDUC==2 & MGT==1],pch="2",col=2)
points(EXP[EDUC==3 & MGT==1],SAL[EDUC==3 & MGT==1],pch="3",col=3)
points(EXP[EDUC==1 & MGT==0],SAL[EDUC==1 & MGT==0],pch="4",col=4)
points(EXP[EDUC==2 & MGT==0],SAL[EDUC==2 & MGT==0],pch="5",col=5)
points(EXP[EDUC==3 & MGT==0],SAL[EDUC==3 & MGT==0],pch="6",col=6)

mod1<-lm(SAL~EXP+factor(EDUC)+factor(MGT))
summary(mod1)
par(mfrow=c(2,2))
plot(mod1, which=c(1:4), add.smooth=FALSE, pch=20)
shapiro.test(mod1$resid)
influence.measures(mod1)
summary(influence.measures(mod1))

x1<-min(EXP):max(EXP)
y10<-mod1$coef[1]+mod1$coef[2]*x1                           #EDUC=1 e MGT=0
y20<-mod1$coef[1]+mod1$coef[2]*x1+mod1$coef[3]              #EDUC=2 e MGT=0
y21<-mod1$coef[1]+mod1$coef[2]*x1+mod1$coef[3]+mod1$coef[5] #EDUC=2 e MGT=1
y31<-mod1$coef[1]+mod1$coef[2]*x1+mod1$coef[4]+mod1$coef[5] #EDUC=3 e MGT=1
x6<-min(EXP):5
y30<-mod1$coef[1]+mod1$coef[2]*x6+mod1$coef[4]              #EDUC=3 e MGT=0
x11<-min(EXP):8
y11<-mod1$coef[1]+mod1$coef[2]*x11+mod1$coef[5]             #EDUC=1 e MGT=1

par(mfrow=c(1,1))
plot(EXP[EDUC==1 & MGT==1],SAL[EDUC==1 & MGT==1],pch="1",col=1,ylim=range(SAL),
     xlim=range(EXP), xlab="Experiencia (anos)",ylab="Salario Anual")
points(EXP[EDUC==2 & MGT==1],SAL[EDUC==2 & MGT==1],pch="2",col=2)
points(EXP[EDUC==3 & MGT==1],SAL[EDUC==3 & MGT==1],pch="3",col=3)
points(EXP[EDUC==1 & MGT==0],SAL[EDUC==1 & MGT==0],pch="4",col=4)
points(EXP[EDUC==2 & MGT==0],SAL[EDUC==2 & MGT==0],pch="5",col=5)
points(EXP[EDUC==3 & MGT==0],SAL[EDUC==3 & MGT==0],pch="6",col=6)
lines(x1,y10)
lines(x1,y20)
lines(x6,y30)
lines(x11,y11)
lines(x1,y21)
lines(x1,y31)

# incluindo interação entre EDUC e MGT

mod2<-lm(SAL~EXP+factor(EDUC)+factor(MGT)+factor(EDUC)*factor(MGT))
summary(mod2)

par(mfrow=c(2,2))
plot(mod2, which=c(1:4), add.smooth=FALSE, pch=20)
shapiro.test(mod2$resid)
summary(influence.measures(mod2))

ex4a<-ex4[-33,]
mod3<-lm(SAL~EXP+factor(EDUC)+factor(MGT)+factor(EDUC)*factor(MGT),data=ex4a)
summary(mod3)

par(mfrow=c(2,2))
plot(mod3, which=c(1:4), add.smooth=FALSE, pch=20)
shapiro.test(mod3$resid)
summary(influence.measures(mod3))

x1<-min(EXP):max(EXP)
y10<-mod3$coef[1]+mod3$coef[2]*x1                                        #EDUC=1 e MGT=0
y20<-mod3$coef[1]+mod3$coef[2]*x1+mod3$coef[3]                           #EDUC=2 e MGT=0
y21<-mod3$coef[1]+mod3$coef[2]*x1+mod3$coef[3]+mod3$coef[5]+mod3$coef[6] #EDUC=2 e MGT=1
y31<-mod3$coef[1]+mod3$coef[2]*x1+mod3$coef[4]+mod3$coef[5]+mod3$coef[7] #EDUC=3 e MGT=1
x6<-min(EXP):5
y30<-mod3$coef[1]+mod3$coef[2]*x6+mod3$coef[4]                           #EDUC=3 e MGT=0
x11<-min(EXP):8
y11<-mod3$coef[1]+mod3$coef[2]*x11+mod3$coef[5]                          #EDUC=1 e MGT=1


par(mfrow=c(1,1))
plot(EXP[EDUC==1 & MGT==1],SAL[EDUC==1 & MGT==1],pch="1",col=1,ylim=range(SAL),
              xlim=range(EXP), xlab="Experiencia (anos)",ylab="Salario Anual")
points(EXP[EDUC==2 & MGT==1],SAL[EDUC==2 & MGT==1],pch="2",col=2)
points(EXP[EDUC==3 & MGT==1],SAL[EDUC==3 & MGT==1],pch="3",col=3)
points(EXP[EDUC==1 & MGT==0],SAL[EDUC==1 & MGT==0],pch="4",col=4)
points(EXP[EDUC==2 & MGT==0],SAL[EDUC==2 & MGT==0],pch="5",col=5)
points(EXP[EDUC==3 & MGT==0],SAL[EDUC==3 & MGT==0],pch="6",col=6)
lines(x1,y10)
lines(x1,y20)
lines(x6,y30)
lines(x11,y11)
lines(x1,y21)
lines(x1,y31)

EXERCÍCIO 5 - Tensão de bateria

Y = queda de tensão da bateria (em voltagem) de um motor de míssil guiado
X = tempo (em segundos)
rm(list=ls())
ex5<-read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exercicio5m.txt", h=T)
attach(ex5)
 
plot(X,Y,pch=16, col=4)
X2<-X*X
X3<-X2*X
mod1<-lm(Y~X+X2+X3)
summary(mod1)
anova(mod1)

par(mfrow=c(2,2))
plot(mod1, which=c(1:4), add.smooth=FALSE, pch=20)
shapiro.test(mod1$resid)

source("https://docs.ufpr.br/~giolo/CE071/Exemplos/vif.R")
vif(mod1)

Xb<-X-mean(X)
Xb2<-Xb*Xb
Xb3<-Xb2*Xb
mod2<-lm(Y~Xb+Xb2+Xb3)
vif(mod2)

summary(mod2)
anova(mod2)

par(mfrow=c(2,2))
plot(mod2, which=c(1:4), add.smooth=FALSE, pch=20)
shapiro.test(mod2$resid)
 
Xb4<-Xb3*Xb
mod3<-lm(Y~Xb+Xb2+Xb3+Xb4)
vif(mod3)
anova(mod3)
summary(mod3)
par(mfrow=c(2,2))
plot(mod3, which=c(1:4), add.smooth=FALSE, pch=20)
shapiro.test(mod2$resid)

inflm.mod3 <- influence.measures(mod3)
inflm.mod3
summary(inflm.mod3)

par(mfrow=c(1,1))
plot(Xb, Y, pch=20, col=4)
lines(Xb, mod3$fitted, type="l", col=2, lwd=2)

pred<-predict(mod3, interval="confidence", level=0.95)
pred1<- predict(mod3, interval="prediction", level=0.95)
matplot(Xb, cbind(pred, pred1[,-1]), lty=c(1,2,2,3,3), col=c(1,2,2,4,4), type = "l",
        ylab = "valores preditos de y", lwd=2)

EXERCÍCIO 6 - Demanda de supervisores

Y = no de supervisores em 27 estabelecimentos industriais
X = no de trabalhadores supervisionados
Arquivo: Exercicio6m.txt 
##  Modelo 1: Y em X

rm(list=ls())
ex6<- read.table("https://docs.ufpr.br/~giolo/CE071/Exemplos/Exercicio6m.txt", h=T)
attach(ex6)
plot(X,Y,pch=16, col=4)
 
mod1<-lm(Y~X)
summary(mod1)
summary(influence.measures(mod1))
 
par(mfrow=c(2,2))
plot(mod1, which=c(1:4), add.smooth=FALSE, pch=16)
shapiro.test(mod1$resid)
 
##  Modelo 2: transformação raiz quadrada em Y

require(MASS)
boxcox(Y~X,data=ex6,plotit=T)
boxcox(Y~X,data=ex6,lam=seq(-0.5,1.5,1/10))

mod2<-lm(sqrt(Y)~X)
summary(mod2)
anova(mod2)
summary(influence.measures(mod2))
 
par(mfrow=c(2,2))
plot(mod2, which=c(1:4), add.smooth=FALSE, pch=16)
shapiro.test(mod2$resid)

##  Modelo 3: MQP (Mínimos Quadrados Ponderados)

wi<-1/(X^2)                             # pesos
mod3<-lm(Y~X,weights=wi)
summary(mod3)
anova(mod3)
summary(influence.measures(mod3))
 
rqwi<-sqrt(wi)
we<-rqwi*mod3$resid
wx<-rqwi*X
wy<-rqwi*mod3$fitted.values
plot(wy,we, pch=16)

par(mfrow=c(1,3))                     
plot(mod1$fitted.values,mod1$resid, pch=16)
plot(mod2$fitted.values,mod2$resid, pch=16)
plot(wy,we, pch=16)

#  Modelo 4: log(Y) e inclusão do termo X2

X2<-X*X
mod4<-lm(log(Y)~X+X2)
source("https://docs.ufpr.br/~giolo/CE071/Exemplos/vif.R")
vif(mod4)

Xb<-X-mean(X)
Xb2<-Xb*Xb
mod5<-lm(log(Y)~Xb+Xb2)
vif(mod5)
summary(mod5)
anova(mod5)

par(mfrow=c(2,2))
plot(mod5, which=c(1:4), add.smooth=FALSE, pch=16)
shapiro.test(mod5$resid)

summary(influence.measures(mod5))

##  resíduos versus valores preditos para os 4 modelos ajustados!

par(mfrow=c(1,4))                     
plot(mod1$fitted.values,mod1$resid, pch=16)
plot(mod2$fitted.values,mod2$resid, pch=16)
plot(wy,we, pch=16)
plot(mod5$fitted.values,mod5$resid, pch=16)


lines(X,logyest, type="l", col=2, lwd=2)
plot(X,log(Y), pch=16) logyest<-4.58+1.439e-03*Xb-1.101e-06*Xb2 lines(X,logyest, type="l", col=2, lwd=2)
yest<-exp(4.58+1.439e-03*Xb-1.101e-06*Xb2)
plot(X,Y, pch=16) 
lines(X,yest, type="l", col=2, lwd=2)

logY<-log(Y)
mod5<-lm(logY~Xb+Xb2) 
pred<-predict(mod5, interval="confidence", level=0.95) 
pred1<- predict(mod5, interval="prediction", level=0.95) 
matplot(Xb, cbind(pred, pred1[,-1]), lty=c(1,2,2,3,3), col=c(1,2,2,4,4),type = "l", 
        ylab = "valores preditos de log(y)", lwd=2)

pred<- exp(predict(mod5, interval="confidence", level=0.95)) 
pred1<- exp(predict(mod5, interval="prediction", level=0.95)) 
matplot(Xb, cbind(pred, pred1[,-1]), lty=c(1,2,2,3,3), col=c(1,2,2,4,4),type = "l", 
        ylab = "valores preditos de y", lwd=2)